Spaces:

zinoubm
/

Voice_Commands

Runtime error

zinoubm commited on Feb 19, 2023

Commit

1a81701

1 Parent(s): 4f8f8b7

adding comments and cleaning app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,9 @@ openai.api_key = OPENAI_API_KEY
 def get_command(command, model, id2label):
     completion = openai.Completion.create(
         model=model, prompt=f"{command}->", max_tokens=1, temperature=0
     )
@@ -17,20 +20,28 @@ def get_command(command, model, id2label):
 def transcribe(audio, text):
     if text:
         result = get_command(text, MODEL, id2label)
         return "Text provided by the user", text_respnses[result], None
-    input, rate = librosa.load(
-        audio, sr=16000
-    )  # Downsample original frequency to 16000hrz
     inputs = processor(input, sampling_rate=rate, return_tensors="pt")
     generated_ids = model.generate(
         inputs["input_features"], attention_mask=inputs["attention_mask"]
     )
     transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
     result = get_command(transcription, MODEL, id2label)
     audio_res = resoponses.get(result)()
     return transcription, text_respnses[result], audio_res

 def get_command(command, model, id2label):
+    """
+    This function get the classification outputs from openai API
+    """
     completion = openai.Completion.create(
         model=model, prompt=f"{command}->", max_tokens=1, temperature=0
     )
 def transcribe(audio, text):
+    """
+    if text provided the function will classify the input directly.
+    if not the audio will be transcribed then the transcription will be classified.
+    """
     if text:
         result = get_command(text, MODEL, id2label)
         return "Text provided by the user", text_respnses[result], None
+    # Downsample original frequency to 16000hrz
+    input, rate = librosa.load(audio, sr=16000)
+    # getting text transcription
     inputs = processor(input, sampling_rate=rate, return_tensors="pt")
     generated_ids = model.generate(
         inputs["input_features"], attention_mask=inputs["attention_mask"]
     )
     transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
     result = get_command(transcription, MODEL, id2label)
     audio_res = resoponses.get(result)()
     return transcription, text_respnses[result], audio_res