Spaces:

hwberry2
/

WhisperDemo

Sleeping

App Files Files Community

WhisperDemo / app.py

hwberry2

Update app.py

24ff843 over 1 year ago

raw

history blame

3.92 kB

	import os
	import gradio as gr
	import openai
	from gtts import gTTS # Google Text To Speech

	# load the api key
	openai.api_key = os.environ["OPEN_AI_KEY"]

	# takes an audio file from the microphone
	# submits the raw audio to OpenAI for
	# Speech to Text Translation
	# input from Microphone Component
	# output to User Input - Textbox Component
	def transcribe(audio):
	audio_file = open(audio, "rb")
	# Call the transcribe method with the file-like object
	transcript = openai.Audio.transcribe("whisper-1", audio_file)

	return transcript["text"]



	# Create a Gradio App using Blocks
	with gr.Blocks() as demo:
	gr.Markdown(
	"""
	# Welcome to the Virtual Therapist Chat Bot!
	Tell the therapist your problems, by recording your query.
	Submit your query, and follow the chat or listen to the Therapists advice.
	When you are ready to resond, clear your last recording and resubmit.
	""")
	# First message as instructions to OpenAI
	# Establishes a State object to create a
	# unique state for each user and on reload
	messages = gr.State(value=[{"role": "system", "content": "You are a therapist. Respond in less than 5 sentences."}])

	# Takes the users transcribed audio as a string
	# Takes the messages list as a reference
	# Sends the ongoing chat log to OpenAI
	# input from User Input - Textbox Component
	# output to Chat Log - Textbox Component
	def botResponse(user_input, messages):
	# adds the user input to the ongoing chat log
	# and submits the log to OpenAI
	messages.append({"role": "user", "content": user_input})
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo-0301",
	messages=messages
	)

	# Parse the response from OpenAI and store
	# it in the chat log
	system_message = response["choices"][0]["message"]["content"]
	messages.append({"role": "assistant", "content": system_message})

	# Process the messages list to get the
	# chat log into a string. Exclude the
	# System responses from the string
	chat_transcript = ""
	for message in messages:
	if (message["role"] != "system"):
	chat_transcript += message["role"] + ": " + message["content"] + "\n\n"

	return chat_transcript

	# Gets the last message in the
	# chat log and uses GTTS to
	# convert the last response into
	# an audio file. Returns a path to
	# the converted text as an mp3 file
	# input from messages as a reference
	# output to GPT Voice - Audio Component
	def giveVoice(messages):
	bot_message=messages[-1]

	myobj = gTTS(text=bot_message["content"])
	myobj.save("temp.mp3")

	dir = os.getcwd()
	new_path = os.path.join(dir, "temp.mp3")

	return new_path

	# Creates the Gradio interface objects
	# The submit button triggers a cascade of
	# events that each engage a different
	# component as input/output
	with gr.Row():
	with gr.Column(scale=1):
	user_audio = gr.Audio(source="microphone", type="filepath", label="Input Phrase")
	submit_btn = gr.Button(value="Transcribe")
	gpt_voice = gr.Audio(label="Voice Response")
	with gr.Column(scale=2):
	user_transcript = gr.Text(label="User Transcript")
	gpt_transcript = gr.Text(label="Chat Transcript")
	with gr.Row():
	with gr.Accordion("Open for Instructions"):
	gr.Markdown("Instructions go here")
	submit_btn.click(transcribe, user_audio, user_transcript)
	user_transcript.change(botResponse, [user_transcript, messages], gpt_transcript)
	gpt_transcript.change(giveVoice, messages, gpt_voice)


	# creates a local web server
	# if share=True creates a public
	# demo on huggingface.co
	demo.launch(share=False)