Spaces:

adnaan05
/

VoiceToVoice_V2

Sleeping

App Files Files Community

adnaan05 commited on Nov 24, 2024

Commit

b633ce6

verified ·

1 Parent(s): 20036d6

created app

Browse files

Files changed (1) hide show

app.py +87 -0

app.py ADDED Viewed

	@@ -0,0 +1,87 @@

+# Install necessary libraries
+!pip uninstall -y whisper
+!pip install git+https://github.com/openai/whisper.git
+!pip install gradio gtts groq ffmpeg-python
+# Import required libraries
+import os
+import gradio as gr
+import whisper
+from gtts import gTTS
+import io
+from groq import Groq
+# Set your GROQ_API_KEY
+os.environ["GROQ_API_KEY"] = "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"
+# Initialize Groq client and Whisper model
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+model = whisper.load_model("base", device="cpu")
+# Function to process audio
+def process_audio(file_path):
+    try:
+        # Ensure the file exists
+        if not os.path.isfile(file_path):
+            raise FileNotFoundError(f"The file {file_path} does not exist.")
+        print(f"Processing file: {file_path}")
+        # Load and process the audio with Whisper
+        audio = whisper.load_audio(file_path)
+        print("Audio loaded successfully.")
+        # Transcribe the audio
+        result = model.transcribe(audio)
+        text = result["text"]
+        print("Transcription:", text)
+        # Generate a response using Groq API
+        chat_completion = client.chat.completions.create(
+            messages=[{"role": "user", "content": text}],
+            model="llama3-8b-8192",
+        )
+        response_message = chat_completion.choices[0].message.content.strip()
+        print("Chatbot response:", response_message)
+        # Convert the response to audio
+        tts = gTTS(response_message)
+        response_audio_io = io.BytesIO()
+        tts.write_to_fp(response_audio_io)
+        response_audio_io.seek(0)
+        # Save the response audio to a file
+        response_audio_path = "response.mp3"
+        with open(response_audio_path, "wb") as audio_file:
+            audio_file.write(response_audio_io.getvalue())
+        return response_message, response_audio_path
+    except FileNotFoundError as e:
+        return f"File not found: {e}", None
+    except UnicodeDecodeError as e:
+        return f"Invalid audio file encoding: {e}", None
+    except Exception as e:
+        return f"An unexpected error occurred: {e}", None
+# Define Gradio interface
+title = "Voice-to-Voice Chatbot Application"
+description = "Run a voice-to-voice chatbot with transcription and audio response."
+article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio."
+iface = gr.Interface(
+    fn=process_audio,
+    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
+    outputs=[
+        gr.Textbox(label="Response Text"),
+        gr.Audio(label="Response Audio")
+    ],
+    live=True,
+    title=title,
+    description=description,
+    theme="dark",
+    article=article
+)
+# Launch Gradio interface
+iface.launch()