File size: 1,819 Bytes
7b18d60
 
502159a
eb134bd
0659665
7b18d60
68a9c43
 
 
eb134bd
68a9c43
 
 
a4f93b2
68a9c43
 
a4f93b2
7b18d60
0659665
 
 
 
 
 
 
 
 
 
 
 
 
 
ebd3d99
65129d9
0659665
ebd3d99
6aaee7d
0659665
ebd3d99
eb134bd
542278b
eb134bd
ebd3d99
 
 
 
eb134bd
972bbda
 
 
eb134bd
 
92d60f9
7b18d60
 
266358f
65129d9
0659665
 
7b18d60
 
0659665
266358f
7b18d60
77f489c
92d60f9
 
ad0f8bc
92d60f9
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import gradio as gr
import time
from transformers import pipeline
import torch
import ffmpeg  # Make sure it's ffmpeg-python

# Check if GPU is available
use_gpu = torch.cuda.is_available()


# Configure the pipeline to use the GPU if available
if use_gpu:
    p = pipeline("automatic-speech-recognition", 
             model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0)
else:
    p = pipeline("automatic-speech-recognition", 
             model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h")


def extract_audio_from_m3u8(url):
    try:
        output_file = "output_audio.aac"
        ffmpeg.input(url).output(output_file).run(overwrite_output=True)
        return output_file
    except Exception as e:
        return f"An error occurred: {e}"


def transcribe(audio, state="", uploaded_audio=None, m3u8_url=""):
    if m3u8_url:
        audio = extract_audio_from_m3u8(m3u8_url)

    if uploaded_audio is not None:
        audio = uploaded_audio

    if not audio:
        return state, state  # Return a meaningful message

    try:
        time.sleep(3)
        text = p(audio, chunk_length_s= 50)["text"]
        state += text + "\n"
        return state, state
    except Exception as e:
        return "An error occurred during transcription.", state  # Handle other exceptions


def reset(state):
    state = ''
    return state


demo = gr.Interface(
    fn=transcribe, 
    inputs=[
        gr.components.Audio(source="microphone", type="filepath"),
        'state',
        gr.components.Audio(label="Upload Audio File", type="filepath", source="upload"),
        gr.components.Textbox(label="m3u8 URL | E.g.: from kvf.fo or logting.fo")
    ],
    outputs=[
        gr.components.Textbox(type="text"),
        "state"
    ],

    live=True)


demo.launch()