King Eke
King Eke

Reputation: 21

Format AWS Transcribe Audio Identification

I've been searching for this solution, couldn't find anything reasonably to what AWS currently have on their UI, so came up with mine in laravel, and dropping it here to help anyone hopefully searching for this as well.

Upvotes: 0

Views: 176

Answers (1)

King Eke
King Eke

Reputation: 21

public function convertTextToSpeakers($response)
    {
        $segments = $response['results']['speaker_labels']['segments'];
        $items    = $response['results']['items'];
        $result   = [];

        foreach ($items as $key => $item) {

            if (!isset($item['start_time'])) {

                $prev_item = $items[$key - 1];

                if ($prev_item) {

                    $item['start_time'] = $prev_item['start_time'];
                    $item['end_time']   = $prev_item['end_time'];
                    $items[$key]        = $item;
                }
            }
        }

        foreach ($segments as $key => $segment) {

            $has_data = true;

            $temp_key = $key;

            while ($has_data) {

                $temp_key++;

                $next_segment = $segments[$temp_key] ?? null;

                if ($next_segment && $next_segment['speaker_label'] == $segment['speaker_label']) {

                    $itemsData = array_merge($segment['items'], $next_segment['items']);

                    $segment['items'] = $itemsData;

                    unset($segments[$temp_key]);

                    $segments[$key] = $segment;

                } else {

                    $has_data = false;
                }
            }

        }

        $items = collect($items);

        $segments = collect($segments)->sortBy('start_time');

        foreach ($segments as $segment) {

            $text = '';

            $segmentItems = collect($segment['items'])->sortBy('start_time');

            foreach ($segmentItems as $seg_item) {

                $words = $items->where('start_time', $seg_item['start_time'])
                    ->where('end_time', $seg_item['end_time']);

                foreach ($words as $word) {

                    $text .= $word['alternatives'][0]['content'];
                }

                $text .= " ";
            }

            $result[] = [
                'speaker' => $segment['speaker_label'],
                'text'    => $text,
            ];
        }

        return $result;
    }

Upvotes: 0

Related Questions