import logging import gradio as gr import openai from constants import * import string openai.api_key = OPENAI_API_KEY openai.organization = OPENAI_ORGANIZATION title = "Car Seats Voice Commands" description = """ This is a demo for controlling car seats with Voice Commands, On the left there's the inputs section and on the right you'll find your outputs. For the inputs you have two choices **Voice** and **Text**, Use **Voice** If you want a closer experience to the final product, Or use **Text** if you just want to test the command model. for the outputs you have the **transcription**(Please check that it's accurate), **command**(to know which command the system detected) and you have the robot voice (again use this if you want a more real experience). **Features** : You can either activate of deactivate the following features - Heated Seats - Cooled Seats - Massage Seats Examples: - **Direct Commands** : Try to say something like "Activate heated seats" or "Turn Off massage seats" - **Indirect Commands** : Try "My back is cold" , "No heating is needed anymore" or "I'm stressed today" """ article = """ This demo processes commands in two steps, the first step is the transcription phase and the second is the Command Classification phase. For Transcription I used The OpenAi whisper model, and for the classification I Fine-Tuned the OpenAi **ada** model on Car Seats Command. """ def remove_punctuation(input_string): translator = str.maketrans('', '', string.punctuation) clean_string = input_string.translate(translator) return clean_string id2label = { 1:"massage_seats_on", 2:"massage_seats_off", 3:"heated_seats_on", 4:"heated_seats_off", 5:"cooled_seats_on", 6:"cooled_seats_off" } def get_command(command, id2label, model = "text-davinci-003"): """ This function get the classification outputs from openai API """ prompt = f""" We want to control the seats of a car which has features to cool, heat, or massage a seat. The user said "{command}", Which feature we should use to ensure user comfort? Give just the number of the feature without any punctuation. Mapping: 1: "massage_seats_on" 2: "massage_seats_off" 3: "heated_seats_on" 4: "heated_seats_off" 5: "cooled_seats_on" 6: "cooled_seats_off" Command_Code: """ completion = openai.Completion.create( model=model, prompt=prompt, max_tokens=2, temperature=0 ) print("result") print(completion["choices"][0]["text"].strip()) id = int(remove_punctuation(completion["choices"][0]["text"]).strip()) result = id2label[id] if id in id2label else "unknown" return result def command_tokens(command, model = "text-davinci-003"): """ This function get the classification outputs from openai API """ prompt = f""" Give an array of the same length of the input, for every element of the returned array use one of the labels in the label-list label-list : - unit if belongs to the International System of Units - value - none if none of the above input : [{",".join(command.split(" "))}] output : """ completion = openai.Completion.create( model=model, prompt=prompt, max_tokens=128, temperature=0 ) result = completion["choices"][0]["text"].strip() result_list = result.replace("[", "").replace("]", "").replace("'", "").split(',') return list(zip(command.split(" "), result_list)) def transcribe(audio): """ if text provided the function will classify the input directly. if not the audio will be transcribed then the transcription will be classified. return a tuple of (transcription, command, audio to be played) """ # getting text transcription audio_file = open(audio, "rb") transcription = openai.Audio.transcribe("whisper-1", audio_file, language="en") transcription = transcription["text"] result = get_command(transcription, id2label) tokens = command_tokens(transcription) print("result", result) print("tokens", tokens) return result, tokens if __name__=="__main__": gr.Interface( fn=transcribe, inputs=gr.Audio(source="microphone", type="filepath"), outputs=["text", "highlight"], title=title, description=description ).launch()