finished setup

Browse files

Files changed (17) hide show

__pycache__/run_command_responses.cpython-38.pyc +0 -0
app.py +40 -4
assets/command_responses/cooled_activated.wav +0 -0
assets/command_responses/cooled_deactivated.wav +0 -0
assets/command_responses/heated_activated.wav +0 -0
assets/command_responses/heated_deactivated.wav +0 -0
assets/command_responses/massage_activated.wav +0 -0
assets/command_responses/massage_deactivated.wav +0 -0
command_responses/cooled_seats_activated.m4a +0 -0
command_responses/cooled_seats_deactivated.m4a +0 -0
command_responses/heated_seats_activated.m4a +0 -0
command_responses/heated_seats_deactivated.m4a +0 -0
command_responses/massage_seats_activated.m4a +0 -0
command_responses/massage_seats_deactivated.m4a +0 -0
requirements.txt +2 -1
run_command_responses.py +21 -0
script.py +12 -0

__pycache__/run_command_responses.cpython-38.pyc ADDED Viewed

Binary file (1.64 kB). View file

app.py CHANGED Viewed

@@ -5,6 +5,26 @@ import librosa
 import torch
 from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
 from dotenv import load_dotenv
 load_dotenv()
@@ -13,12 +33,23 @@ os.environ["PATH"] += ".\env\Lib\site-packages\ffprobe"
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 MODEL = os.getenv("MODEL")
 model = Speech2TextForConditionalGeneration.from_pretrained(
     "facebook/s2t-small-librispeech-asr"
 )
 processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
 def transcribe(audio):
     input, rate = librosa.load(
         audio, sr=16000
@@ -28,9 +59,14 @@ def transcribe(audio):
         inputs["input_features"], attention_mask=inputs["attention_mask"]
     )
     transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
-    return transcription
-gr.Interface(
-    fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs="text"
-).launch()

 import torch
 from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
 from dotenv import load_dotenv
+import openai
+from run_command_responses import ResponseManager as rs
+resoponses = {
+    "heated_seats_on": rs.activate_heated_seats,
+    "heated_seats_off": rs.deactivate_heated_seats,
+    "cooled_seats_on": rs.activate_cooled_seats,
+    "cooled_seats_off": rs.deactivate_cooled_seats,
+    "massage_seats_on": rs.activate_massage_seats,
+    "massage_seats_off": rs.deactivate_massage_seats,
+}
+id2label = {
+    1: "massage_seats_on",
+    2: "massage_seats_off",
+    3: "heated_seats_on",
+    4: "heated_seats_off",
+    5: "cooled_seats_on",
+    6: "cooled_seats_off",
+}
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
 MODEL = os.getenv("MODEL")
+openai.api_key = OPENAI_API_KEY
 model = Speech2TextForConditionalGeneration.from_pretrained(
     "facebook/s2t-small-librispeech-asr"
 )
 processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
+def get_command(command, model, id2label):
+    completion = openai.Completion.create(
+        model=model, prompt=f"{command}->", max_tokens=1, temperature=0
+    )
+    id = int(completion["choices"][0]["text"].strip())
+    result = id2label[id] if id in id2label else "unknown"
+    return result
 def transcribe(audio):
     input, rate = librosa.load(
         audio, sr=16000
         inputs["input_features"], attention_mask=inputs["attention_mask"]
     )
     transcription = processor.batch_decode(generated_ids, skip_special_tokens=True)
+    result = get_command(transcription, MODEL, id2label)
+    resoponses.get(result)()
+    return result
+if __name__ == "__main__":
+    gr.Interface(
+        fn=transcribe,
+        inputs=gr.Audio(source="microphone", type="filepath"),
+        outputs="text",
+    ).launch()

assets/command_responses/cooled_activated.wav ADDED Viewed

Binary file (389 kB). View file

assets/command_responses/cooled_deactivated.wav ADDED Viewed

Binary file (451 kB). View file

assets/command_responses/heated_activated.wav ADDED Viewed

Binary file (578 kB). View file

assets/command_responses/heated_deactivated.wav ADDED Viewed

Binary file (451 kB). View file

assets/command_responses/massage_activated.wav ADDED Viewed

Binary file (557 kB). View file

assets/command_responses/massage_deactivated.wav ADDED Viewed

Binary file (549 kB). View file

command_responses/cooled_seats_activated.m4a ADDED Viewed

Binary file (50.6 kB). View file

command_responses/cooled_seats_deactivated.m4a ADDED Viewed

Binary file (58.3 kB). View file

command_responses/heated_seats_activated.m4a ADDED Viewed

Binary file (73.1 kB). View file

command_responses/heated_seats_deactivated.m4a ADDED Viewed

Binary file (58.6 kB). View file

command_responses/massage_seats_activated.m4a ADDED Viewed

Binary file (71.1 kB). View file

command_responses/massage_seats_deactivated.m4a ADDED Viewed

Binary file (70.3 kB). View file

requirements.txt CHANGED Viewed

@@ -7,4 +7,5 @@ sounddevice
 librosa
 python-dotenv
 ffmpeg
-ffprobe

 librosa
 python-dotenv
 ffmpeg
+ffprobe
+playsound==1.2.2

run_command_responses.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from playsound import playsound
+class ResponseManager:
+    def activate_heated_seats():
+        playsound("assets\command_responses\heated_activated.wav")
+    def deactivate_heated_seats():
+        playsound("assets\command_responses\heated_deactivated.wav")
+    def activate_cooled_seats():
+        playsound("assets\command_responses\cooled_activated.wav")
+    def deactivate_cooled_seats():
+        playsound("assets\command_responses\cooled_deactivated.wav")
+    def activate_massage_seats():
+        playsound("assets\command_responses\massage_activated.wav")
+    def deactivate_massage_seats():
+        playsound("assets\command_responses\massage_deactivated.wav")

script.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import os
+from pydub import AudioSegment
+path = "command_responses"
+file_names = os.listdir(path)
+res_path = "assets\command_responses"
+for file_name in file_names:
+    m4a_file = file_name
+    wav_filename = os.path.splitext(file_name)[0] + ".wav"
+    track = AudioSegment.from_file(os.path.join(path, m4a_file), format="m4a")
+    file_handle = track.export(os.path.join(res_path, wav_filename), format="wav")