File size: 3,129 Bytes
24dd290
117b0c3
 
 
216357c
117b0c3
 
 
 
 
99890d7
117b0c3
24dd290
 
 
 
 
 
117b0c3
24dd290
 
 
 
 
 
 
 
 
 
117b0c3
24dd290
 
 
117b0c3
24dd290
 
 
 
 
 
 
 
4087c59
24dd290
 
 
99890d7
 
805f7ee
24dd290
805f7ee
 
 
 
24dd290
805f7ee
 
 
 
 
 
 
24dd290
805f7ee
 
 
 
 
 
 
 
24dd290
805f7ee
 
 
 
24dd290
 
 
 
 
 
 
 
 
 
 
 
 
805f7ee
 
 
 
 
 
 
 
24dd290
 
805f7ee
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
from datetime import datetime, timedelta
import os

import gradio as gr
import nemo.collections.asr as nemo_asr
import wandb

MODEL_HISTORY_DAYS = 180
WANDB_ENTITY = os.environ.get("WANDB_ENTITY", "tarteel")
WANDB_PROJECT_NAME = os.environ.get("WANDB_PROJECT_NAME", "nemo-experiments")

wandb_api = wandb.Api(overrides={"entity": WANDB_ENTITY})

all_artifacts_versions = [
    version
    for version in [
        collection.versions()
        for collection in wandb_api.artifact_type(
            type_name="model", project=WANDB_PROJECT_NAME
        ).collections()
    ]
]

latest_artifacts = [
    artifact
    for artifact_versions in all_artifacts_versions
    for artifact in artifact_versions
    if (
        datetime.fromisoformat(artifact.created_at)
        > datetime.now() - timedelta(days=MODEL_HISTORY_DAYS)  # last 180 days
        and artifact.state != "DELETED"
    )
]
latest_artifacts.sort(key=lambda a: a.created_at, reverse=True)

models = {artifact.name: None for artifact in latest_artifacts}


def lazy_load_models(models_names):
    for model_name in models_names:
        model = models[model_name]
        if not model:
            models[model_name] = nemo_asr.models.EncDecCTCModelBPE.restore_from(
                list(filter(lambda x: x.name == model_name, latest_artifacts))[0].file()
            )
            models[model_name].eval()


def transcribe(audio_mic, audio_file, models_names):
    lazy_load_models(models_names)
    # transcribe audio_mic and audio_file separately
    # because transcribe() fails is path is empty
    transcription_mic = "\n".join(
        [
            f"{model_name} => {models[model_name].transcribe([audio_mic])[0]}"
            for model_name in models_names
        ]
        if audio_mic
        else ""
    )
    transcription_file = "\n".join(
        [
            f"{model_name} => {models[model_name].transcribe([audio_file])[0]}"
            for model_name in models_names
        ]
        if audio_file
        else ""
    )
    return transcription_mic, transcription_file


model_selection = list(models.keys())

demo = gr.Blocks()

with demo:
    gr.Markdown(
        """
        # ﷽
        These are the latest* Tarteel models.

        Please note that the models are lazy loaded.
        This means that the first time you use a model,
        it might take some time to be downloaded and loaded for inference.

        *: last 180 days since the space was launched.
        To update the list, restart the space.
        """
    )
    with gr.Row():
        audio_mic = gr.Audio(source="microphone", type="filepath", label="Microphone")
        audio_file = gr.Audio(source="upload", type="filepath", label="File")

    with gr.Row():
        output_mic = gr.TextArea(label="Microphone Transcription")
        output_file = gr.TextArea(label="Audio Transcription")

    models_names = gr.CheckboxGroup(model_selection, label="Select Models to Use")

    b1 = gr.Button("Transcribe")

    b1.click(
        transcribe,
        inputs=[audio_mic, audio_file, models_names],
        outputs=[output_mic, output_file],
    )

demo.launch()