Spaces:

zinoubm
/

Voice_Commands

Runtime error

File size: 1,970 Bytes

0013d95
 
bb7af57
4f8f8b7
bb7af57
0013d95
bb7af57
 
0013d95
bb7af57
1a81701
 
 
bb7af57
 
 
 
 
 
 
 
4f8f8b7
1a81701
 
 
 
 
4f8f8b7
 
 
 
1a81701
 
 
 
0013d95
 
 
 
1a81701
0013d95
bb7af57
4f8f8b7
1a81701
4f8f8b7
0013d95
 
bb7af57
 
 
4f8f8b7
 
 
 
 
 
 
 
 
 
 
 
bb7af57

import gradio as gr
import librosa
import openai
from constants import *


openai.api_key = OPENAI_API_KEY


def get_command(command, model, id2label):
    """
    This function get the classification outputs from openai API
    """
    completion = openai.Completion.create(
        model=model, prompt=f"{command}->", max_tokens=1, temperature=0
    )
    id = int(completion["choices"][0]["text"].strip())
    result = id2label[id] if id in id2label else "unknown"
    return result


def transcribe(audio, text):
    """
    if text provided the function will classify the input directly.
    if not the audio will be transcribed then the transcription will be classified.
    """

    if text:
        result = get_command(text, MODEL, id2label)
        return "Text provided by the user", text_respnses[result], None

    # Downsample original frequency to 16000hrz
    input, rate = librosa.load(audio, sr=16000)

    # getting text transcription
    inputs = processor(input, sampling_rate=rate, return_tensors="pt")
    generated_ids = model.generate(
        inputs["input_features"], attention_mask=inputs["attention_mask"]
    )

    transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
    result = get_command(transcription, MODEL, id2label)
    audio_res = resoponses.get(result)()

    return transcription, text_respnses[result], audio_res


if __name__ == "__main__":
    gr.Interface(
        fn=transcribe,
        inputs=[
            gr.Audio(label="", source="microphone", type="filepath"),
            gr.Textbox(label="If you prefer type your command (more accurate)"),
        ],
        outputs=[
            gr.Textbox(
                label="Input Transcription (Please check that this matches what you've said)"
            ),
            gr.Textbox(label="Machine Response (Text Version)"),
            gr.Audio(label="Machine Response (Audio Version)"),
        ],
        allow_flagging="auto",
    ).launch()