adnaan05 commited on
Commit
b633ce6
·
verified ·
1 Parent(s): 20036d6

created app

Browse files
Files changed (1) hide show
  1. app.py +87 -0
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Install necessary libraries
2
+ !pip uninstall -y whisper
3
+ !pip install git+https://github.com/openai/whisper.git
4
+ !pip install gradio gtts groq ffmpeg-python
5
+
6
+ # Import required libraries
7
+ import os
8
+ import gradio as gr
9
+ import whisper
10
+ from gtts import gTTS
11
+ import io
12
+ from groq import Groq
13
+
14
+ # Set your GROQ_API_KEY
15
+ os.environ["GROQ_API_KEY"] = "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"
16
+
17
+ # Initialize Groq client and Whisper model
18
+ client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
19
+ model = whisper.load_model("base", device="cpu")
20
+
21
+ # Function to process audio
22
+ def process_audio(file_path):
23
+ try:
24
+ # Ensure the file exists
25
+ if not os.path.isfile(file_path):
26
+ raise FileNotFoundError(f"The file {file_path} does not exist.")
27
+
28
+ print(f"Processing file: {file_path}")
29
+
30
+ # Load and process the audio with Whisper
31
+ audio = whisper.load_audio(file_path)
32
+ print("Audio loaded successfully.")
33
+
34
+ # Transcribe the audio
35
+ result = model.transcribe(audio)
36
+ text = result["text"]
37
+ print("Transcription:", text)
38
+
39
+ # Generate a response using Groq API
40
+ chat_completion = client.chat.completions.create(
41
+ messages=[{"role": "user", "content": text}],
42
+ model="llama3-8b-8192",
43
+ )
44
+ response_message = chat_completion.choices[0].message.content.strip()
45
+ print("Chatbot response:", response_message)
46
+
47
+ # Convert the response to audio
48
+ tts = gTTS(response_message)
49
+ response_audio_io = io.BytesIO()
50
+ tts.write_to_fp(response_audio_io)
51
+ response_audio_io.seek(0)
52
+
53
+ # Save the response audio to a file
54
+ response_audio_path = "response.mp3"
55
+ with open(response_audio_path, "wb") as audio_file:
56
+ audio_file.write(response_audio_io.getvalue())
57
+
58
+ return response_message, response_audio_path
59
+
60
+ except FileNotFoundError as e:
61
+ return f"File not found: {e}", None
62
+ except UnicodeDecodeError as e:
63
+ return f"Invalid audio file encoding: {e}", None
64
+ except Exception as e:
65
+ return f"An unexpected error occurred: {e}", None
66
+
67
+ # Define Gradio interface
68
+ title = "Voice-to-Voice Chatbot Application"
69
+ description = "Run a voice-to-voice chatbot with transcription and audio response."
70
+ article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio."
71
+
72
+ iface = gr.Interface(
73
+ fn=process_audio,
74
+ inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
75
+ outputs=[
76
+ gr.Textbox(label="Response Text"),
77
+ gr.Audio(label="Response Audio")
78
+ ],
79
+ live=True,
80
+ title=title,
81
+ description=description,
82
+ theme="dark",
83
+ article=article
84
+ )
85
+
86
+ # Launch Gradio interface
87
+ iface.launch()