puzan789 commited on
Commit
0c39cc3
·
0 Parent(s):
Files changed (8) hide show
  1. Dockerfile +25 -0
  2. README.md +6 -0
  3. README_gradio.md +15 -0
  4. __init__.py +0 -0
  5. app.py +184 -0
  6. index.html +623 -0
  7. index2.html +496 -0
  8. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && apt-get install -y --no-install-recommends \
7
+ build-essential \
8
+ libportaudio2 \
9
+ libsndfile1 \
10
+ ffmpeg \
11
+ && apt-get clean \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Install Python dependencies
15
+ COPY requirements.txt .
16
+ RUN pip install --no-cache-dir -r requirements.txt
17
+
18
+ # Copy application code
19
+ COPY . .
20
+
21
+ # Expose port
22
+ EXPOSE 7860
23
+
24
+ # Command to run the application
25
+ CMD ["python", "app.py"]
README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ title: Asjirtnj
2
+ emoji: 🏃
3
+ colorFrom: pink
4
+ colorTo: purple
5
+ sdk: static
6
+ pinned: false
README_gradio.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Talk to Gemini (Gradio UI)
3
+ emoji: ♊️
4
+ colorFrom: purple
5
+ colorTo: red
6
+ sdk: gradio
7
+ sdk_version: 5.16.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ short_description: Talk to Gemini (Gradio UI)
12
+ tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|GEMINI_API_KEY]
13
+ ---
14
+
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
__init__.py ADDED
File without changes
app.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import os
5
+ import pathlib
6
+ from typing import AsyncGenerator, Literal
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ from dotenv import load_dotenv
11
+ from fastapi import FastAPI
12
+ from fastapi.responses import HTMLResponse
13
+ from fastrtc import (
14
+ AsyncStreamHandler,
15
+ Stream,
16
+ get_twilio_turn_credentials,
17
+ wait_for_item,
18
+ )
19
+ from google import genai
20
+ from google.genai.types import (
21
+ LiveConnectConfig,
22
+ PrebuiltVoiceConfig,
23
+ SpeechConfig,
24
+ VoiceConfig,
25
+ )
26
+ from gradio.utils import get_space
27
+ from pydantic import BaseModel
28
+ current_dir = pathlib.Path(__file__).parent
29
+ load_dotenv()
30
+ api_key = os.getenv("GEMINI_API_KEY")
31
+ if not api_key:
32
+ raise ValueError("GEMINI_API_KEY environment variable is not set")
33
+
34
+
35
+
36
+ def encode_audio(data: np.ndarray) -> str:
37
+ """Encode Audio data to send to the server"""
38
+ return base64.b64encode(data.tobytes()).decode("UTF-8")
39
+
40
+
41
+ class GeminiHandler(AsyncStreamHandler):
42
+ """Handler for the Gemini API"""
43
+
44
+ def __init__(
45
+ self,
46
+ expected_layout: Literal["mono"] = "mono",
47
+ output_sample_rate: int = 24000,
48
+ output_frame_size: int = 480,
49
+ ) -> None:
50
+ super().__init__(
51
+ expected_layout,
52
+ output_sample_rate,
53
+ output_frame_size,
54
+ input_sample_rate=16000,
55
+ )
56
+ self.input_queue: asyncio.Queue = asyncio.Queue()
57
+ self.output_queue: asyncio.Queue = asyncio.Queue()
58
+ self.quit: asyncio.Event = asyncio.Event()
59
+
60
+ def copy(self) -> "GeminiHandler":
61
+ return GeminiHandler(
62
+ expected_layout="mono",
63
+ output_sample_rate=self.output_sample_rate,
64
+ output_frame_size=self.output_frame_size,
65
+ )
66
+
67
+ async def start_up(self):
68
+ if not self.phone_mode:
69
+ await self.wait_for_args()
70
+ # Fix: Extract voice_name properly - it should be a string, not a list
71
+ voice_name = self.latest_args[1] if len(self.latest_args) > 1 else "Puck"
72
+ else:
73
+ voice_name = "Puck" # Default to Puck for phone mode
74
+
75
+ client = genai.Client(
76
+ api_key=api_key,
77
+ http_options={"api_version": "v1alpha"},
78
+ )
79
+
80
+ config = LiveConnectConfig(
81
+ response_modalities=["AUDIO"], # type: ignore
82
+ speech_config=SpeechConfig(
83
+ voice_config=VoiceConfig(
84
+ prebuilt_voice_config=PrebuiltVoiceConfig(
85
+ voice_name=voice_name, # Now voice_name is a string
86
+ )
87
+ )
88
+ )
89
+ )
90
+ async with client.aio.live.connect(
91
+ model="gemini-2.0-flash-exp", config=config
92
+ ) as session:
93
+ async for audio in session.start_stream(
94
+ stream=self.stream(), mime_type="audio/pcm"
95
+ ):
96
+ if audio.data:
97
+ array = np.frombuffer(audio.data, dtype=np.int16)
98
+ self.output_queue.put_nowait((self.output_sample_rate, array))
99
+
100
+ async def stream(self) -> AsyncGenerator[bytes, None]:
101
+ while not self.quit.is_set():
102
+ try:
103
+ audio = await asyncio.wait_for(self.input_queue.get(), 0.1)
104
+ yield audio
105
+ except (asyncio.TimeoutError, TimeoutError):
106
+ pass
107
+
108
+ async def receive(self, frame: tuple[int, np.ndarray]) -> None:
109
+ _, array = frame
110
+ array = array.squeeze()
111
+ audio_message = encode_audio(array)
112
+ self.input_queue.put_nowait(audio_message)
113
+
114
+ async def emit(self) -> tuple[int, np.ndarray] | None:
115
+ return await wait_for_item(self.output_queue)
116
+
117
+ def shutdown(self) -> None:
118
+ self.quit.set()
119
+
120
+
121
+ stream = Stream(
122
+ modality="audio",
123
+ mode="send-receive",
124
+ handler=GeminiHandler(),
125
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
126
+ concurrency_limit=5 if get_space() else None,
127
+ time_limit=90 if get_space() else None,
128
+ additional_inputs=[
129
+
130
+ gr.Dropdown(
131
+ label="Voice",
132
+ choices=[
133
+ "Puck",
134
+ "Charon",
135
+ "Kore",
136
+ "Fenrir",
137
+ "Aoede",
138
+ ],
139
+ value="Puck",
140
+ ),
141
+ ],
142
+ )
143
+
144
+
145
+ class InputData(BaseModel):
146
+ webrtc_id: str
147
+ voice_name: str
148
+
149
+
150
+
151
+ app = FastAPI()
152
+
153
+ stream.mount(app)
154
+
155
+
156
+ @app.post("/input_hook")
157
+ async def _(body: InputData):
158
+ stream.set_input(body.webrtc_id, body.voice_name)
159
+ return {"status": "ok"}
160
+
161
+
162
+ @app.get("/")
163
+ async def index():
164
+ rtc_config = get_twilio_turn_credentials() if get_space() else None
165
+ html_content = (current_dir / "index.html").read_text()
166
+ html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
167
+ return HTMLResponse(content=html_content)
168
+
169
+
170
+ if __name__ == "__main__":
171
+ import os
172
+ import uvicorn
173
+
174
+ # Special handling for Hugging Face
175
+ if os.environ.get("SPACE_ID"):
176
+ # Running on Hugging Face Spaces
177
+ port = int(os.environ.get("PORT", 7860))
178
+ uvicorn.run(app, host="0.0.0.0", port=port)
179
+ elif (mode := os.getenv("MODE")) == "UI":
180
+ stream.ui.launch(server_port=7860)
181
+ elif mode == "PHONE":
182
+ stream.fastphone(host="localhost", port=7860)
183
+ else:
184
+ uvicorn.run(app, host="localhost", port=7860)
index.html ADDED
@@ -0,0 +1,623 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Voice Assistant</title>
8
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
9
+ <style>
10
+ :root {
11
+ --color-primary: #7c3aed;
12
+ --color-primary-dark: #6d28d9;
13
+ --color-primary-light: #a78bfa;
14
+ --color-background: #111827;
15
+ --color-surface: #1f2937;
16
+ --color-surface-light: #374151;
17
+ --color-text: #f9fafb;
18
+ --color-text-secondary: #d1d5db;
19
+ --gradient: linear-gradient(135deg, #4f46e5 0%, #7c3aed 50%, #9333ea 100%);
20
+ --boxSize: 8px;
21
+ --gutter: 4px;
22
+ --box-shadow: 0 10px 30px -10px rgba(124, 58, 237, 0.5);
23
+ }
24
+
25
+ * {
26
+ box-sizing: border-box;
27
+ margin: 0;
28
+ padding: 0;
29
+ }
30
+
31
+ body {
32
+ margin: 0;
33
+ padding: 0;
34
+ background-color: var(--color-background);
35
+ color: var(--color-text);
36
+ font-family: 'Inter', system-ui, -apple-system, sans-serif;
37
+ min-height: 100vh;
38
+ display: flex;
39
+ flex-direction: column;
40
+ align-items: center;
41
+ justify-content: center;
42
+ background-image:
43
+ radial-gradient(circle at 100% 10%, rgba(124, 58, 237, 0.12) 0%, transparent 30%),
44
+ radial-gradient(circle at 10% 90%, rgba(79, 70, 229, 0.12) 0%, transparent 30%);
45
+ background-attachment: fixed;
46
+ }
47
+
48
+ .header {
49
+ text-align: center;
50
+ margin-bottom: 2rem;
51
+ max-width: 600px;
52
+ padding: 0 1rem;
53
+ }
54
+
55
+ .header h1 {
56
+ font-size: 2.5rem;
57
+ margin-bottom: 0.5rem;
58
+ background: var(--gradient);
59
+ -webkit-background-clip: text;
60
+ background-clip: text;
61
+ -webkit-text-fill-color: transparent;
62
+ font-weight: 800;
63
+ letter-spacing: -0.025em;
64
+ }
65
+
66
+ .header p {
67
+ color: var(--color-text-secondary);
68
+ font-size: 1.125rem;
69
+ margin-bottom: 0.5rem;
70
+ line-height: 1.5;
71
+ }
72
+
73
+ .container {
74
+ width: 90%;
75
+ max-width: 800px;
76
+ background-color: var(--color-surface);
77
+ padding: 2.5rem;
78
+ border-radius: 1.5rem;
79
+ box-shadow: var(--box-shadow);
80
+ backdrop-filter: blur(10px);
81
+ border: 1px solid rgba(255, 255, 255, 0.05);
82
+ }
83
+
84
+ .wave-container {
85
+ position: relative;
86
+ display: flex;
87
+ min-height: 100px;
88
+ max-height: 128px;
89
+ justify-content: center;
90
+ align-items: center;
91
+ margin: 2rem 0;
92
+ background-color: var(--color-surface-light);
93
+ border-radius: 1rem;
94
+ padding: 1rem;
95
+ border: 1px solid rgba(255, 255, 255, 0.05);
96
+ }
97
+
98
+ .box-container {
99
+ display: flex;
100
+ justify-content: space-between;
101
+ height: 64px;
102
+ width: 100%;
103
+ gap: 4px;
104
+ }
105
+
106
+ .box {
107
+ height: 100%;
108
+ width: var(--boxSize);
109
+ background: var(--gradient);
110
+ border-radius: 8px;
111
+ transition: transform 0.05s ease;
112
+ }
113
+
114
+ .controls {
115
+ display: grid;
116
+ grid-template-columns: 1fr 1fr;
117
+ gap: 1.5rem;
118
+ margin-bottom: 2rem;
119
+ }
120
+
121
+ @media (max-width: 768px) {
122
+ .controls {
123
+ grid-template-columns: 1fr;
124
+ }
125
+ }
126
+
127
+ .input-group {
128
+ display: flex;
129
+ flex-direction: column;
130
+ gap: 0.5rem;
131
+ }
132
+
133
+ label {
134
+ font-size: 0.875rem;
135
+ font-weight: 600;
136
+ color: var(--color-text-secondary);
137
+ display: flex;
138
+ align-items: center;
139
+ gap: 0.5rem;
140
+ }
141
+
142
+ label i {
143
+ color: var(--color-primary-light);
144
+ }
145
+
146
+ input,
147
+ select {
148
+ padding: 0.75rem;
149
+ border-radius: 0.75rem;
150
+ border: 1px solid rgba(255, 255, 255, 0.1);
151
+ background-color: var(--color-background);
152
+ color: var(--color-text);
153
+ font-size: 1rem;
154
+ transition: all 0.2s ease;
155
+ }
156
+
157
+ input:focus,
158
+ select:focus {
159
+ outline: none;
160
+ border-color: var(--color-primary);
161
+ box-shadow: 0 0 0 2px rgba(124, 58, 237, 0.3);
162
+ }
163
+
164
+ button {
165
+ padding: 1.25rem;
166
+ border-radius: 1rem;
167
+ border: none;
168
+ background: var(--gradient);
169
+ color: white;
170
+ font-weight: 600;
171
+ font-size: 1.125rem;
172
+ cursor: pointer;
173
+ transition: all 0.3s ease;
174
+ box-shadow: 0 4px 10px rgba(0, 0, 0, 0.1);
175
+ position: relative;
176
+ overflow: hidden;
177
+ }
178
+
179
+ button::before {
180
+ content: '';
181
+ position: absolute;
182
+ top: 0;
183
+ left: -100%;
184
+ width: 100%;
185
+ height: 100%;
186
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
187
+ transition: all 0.6s ease;
188
+ }
189
+
190
+ button:hover::before {
191
+ left: 100%;
192
+ }
193
+
194
+ button:hover {
195
+ transform: translateY(-2px);
196
+ box-shadow: 0 6px 20px rgba(124, 58, 237, 0.4);
197
+ }
198
+
199
+ button:active {
200
+ transform: translateY(1px);
201
+ }
202
+
203
+ .icon-with-spinner {
204
+ display: flex;
205
+ align-items: center;
206
+ justify-content: center;
207
+ gap: 12px;
208
+ min-width: 180px;
209
+ }
210
+
211
+ .spinner {
212
+ width: 20px;
213
+ height: 20px;
214
+ border: 2px solid white;
215
+ border-top-color: transparent;
216
+ border-radius: 50%;
217
+ animation: spin 1s linear infinite;
218
+ flex-shrink: 0;
219
+ }
220
+
221
+ @keyframes spin {
222
+ to {
223
+ transform: rotate(360deg);
224
+ }
225
+ }
226
+
227
+ .pulse-container {
228
+ display: flex;
229
+ align-items: center;
230
+ justify-content: center;
231
+ gap: 12px;
232
+ min-width: 180px;
233
+ }
234
+
235
+ .pulse-circle {
236
+ width: 20px;
237
+ height: 20px;
238
+ border-radius: 50%;
239
+ background: linear-gradient(135deg, #4f46e5, #7c3aed);
240
+ opacity: 0.8;
241
+ flex-shrink: 0;
242
+ transform: translateX(-0%) scale(var(--audio-level, 1));
243
+ transition: transform 0.1s ease;
244
+ box-shadow: 0 0 10px rgba(124, 58, 237, 0.7);
245
+ }
246
+
247
+ /* Toast notifications */
248
+ .toast {
249
+ position: fixed;
250
+ top: 20px;
251
+ left: 50%;
252
+ transform: translateX(-50%);
253
+ padding: 1rem 1.5rem;
254
+ border-radius: 0.75rem;
255
+ font-size: 0.875rem;
256
+ z-index: 1000;
257
+ display: none;
258
+ box-shadow: 0 10px 30px rgba(0, 0, 0, 0.2);
259
+ animation: slideIn 0.3s ease;
260
+ }
261
+
262
+ @keyframes slideIn {
263
+ from {
264
+ transform: translate(-50%, -20px);
265
+ opacity: 0;
266
+ }
267
+ to {
268
+ transform: translate(-50%, 0);
269
+ opacity: 1;
270
+ }
271
+ }
272
+
273
+ .toast.error {
274
+ background-color: #ef4444;
275
+ color: white;
276
+ border-left: 4px solid #b91c1c;
277
+ }
278
+
279
+ .toast.warning {
280
+ background-color: #f59e0b;
281
+ color: white;
282
+ border-left: 4px solid #d97706;
283
+ }
284
+
285
+ /* Footer styles */
286
+ .footer {
287
+ margin-top: 2rem;
288
+ text-align: center;
289
+ font-size: 0.875rem;
290
+ color: var(--color-text-secondary);
291
+ }
292
+
293
+ .footer a {
294
+ color: var(--color-primary-light);
295
+ text-decoration: none;
296
+ }
297
+
298
+ .footer a:hover {
299
+ text-decoration: underline;
300
+ }
301
+ </style>
302
+ </head>
303
+
304
+ <body>
305
+ <div id="error-toast" class="toast"></div>
306
+
307
+ <div class="header">
308
+ <h1>Voice Assistant</h1>
309
+ <p>Real-time voice conversations with advanced AI technology</p>
310
+ </div>
311
+
312
+ <div class="container">
313
+ <div class="controls">
314
+ <!-- <div class="input-group">
315
+ <label for="api-key"><i class="fas fa-key"></i> API Key</label>
316
+ <input type="password" id="api-key" placeholder="Enter your API key">
317
+ </div> -->
318
+ <div class="input-group">
319
+ <label for="voice"><i class="fas fa-microphone-alt"></i> Voice</label>
320
+ <select id="voice">
321
+ <option value="Puck">Puck (Casual)</option>
322
+ <option value="Charon">Charon (Deep)</option>
323
+ <option value="Kore">Kore (Professional)</option>
324
+ <option value="Fenrir">Fenrir (Mature)</option>
325
+ <option value="Aoede">Aoede (Melodic)</option>
326
+ </select>
327
+ </div>
328
+ </div>
329
+
330
+ <div class="wave-container">
331
+ <div class="box-container">
332
+ <!-- Boxes will be dynamically added here -->
333
+ </div>
334
+ </div>
335
+
336
+ <button id="start-button">Start Conversation</button>
337
+ </div>
338
+
339
+ <div class="footer">
340
+ <p>Powered by advanced AI technology</p>
341
+ </div>
342
+
343
+ <audio id="audio-output"></audio>
344
+
345
+ <script>
346
+ // The JavaScript remains the same as your original code
347
+ let peerConnection;
348
+ let audioContext;
349
+ let dataChannel;
350
+ let isRecording = false;
351
+ let webrtc_id;
352
+ let animationId;
353
+ let analyser;
354
+ let analyser_input;
355
+ let dataArray;
356
+ let dataArray_input;
357
+
358
+ const startButton = document.getElementById('start-button');
359
+ // const apiKeyInput = document.getElementById('api-key');
360
+ const voiceSelect = document.getElementById('voice');
361
+ const audioOutput = document.getElementById('audio-output');
362
+ const boxContainer = document.querySelector('.box-container');
363
+
364
+ const numBars = 32;
365
+ for (let i = 0; i < numBars; i++) {
366
+ const box = document.createElement('div');
367
+ box.className = 'box';
368
+ boxContainer.appendChild(box);
369
+ }
370
+
371
+ function updateButtonState() {
372
+ if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
373
+ startButton.innerHTML = `
374
+ <div class="icon-with-spinner">
375
+ <div class="spinner"></div>
376
+ <span>Connecting...</span>
377
+ </div>
378
+ `;
379
+ } else if (peerConnection && peerConnection.connectionState === 'connected') {
380
+ startButton.innerHTML = `
381
+ <div class="pulse-container">
382
+ <div class="pulse-circle"></div>
383
+ <span>End Conversation</span>
384
+ </div>
385
+ `;
386
+ } else {
387
+ startButton.innerHTML = 'Start Conversation';
388
+ }
389
+ }
390
+
391
+ function showError(message) {
392
+ const toast = document.getElementById('error-toast');
393
+ toast.textContent = message;
394
+ toast.className = 'toast error';
395
+ toast.style.display = 'block';
396
+
397
+ // Hide toast after 5 seconds
398
+ setTimeout(() => {
399
+ toast.style.display = 'none';
400
+ }, 5000);
401
+ }
402
+
403
+ async function setupWebRTC() {
404
+ // Replace placeholder with actual configuration
405
+ const config = {
406
+ iceServers: [
407
+ { urls: 'stun:stun.l.google.com:19302' }
408
+ ]
409
+ };
410
+ peerConnection = new RTCPeerConnection(config);
411
+ webrtc_id = Math.random().toString(36).substring(7);
412
+
413
+ const timeoutId = setTimeout(() => {
414
+ const toast = document.getElementById('error-toast');
415
+ toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
416
+ toast.className = 'toast warning';
417
+ toast.style.display = 'block';
418
+
419
+ // Hide warning after 5 seconds
420
+ setTimeout(() => {
421
+ toast.style.display = 'none';
422
+ }, 5000);
423
+ }, 5000);
424
+
425
+ try {
426
+ // Check if mediaDevices is supported
427
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
428
+ const errorMsg = 'Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.';
429
+ showError(errorMsg);
430
+ throw new Error(errorMsg);
431
+ }
432
+
433
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
434
+ stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
435
+
436
+ // Update audio visualization setup
437
+ audioContext = new AudioContext();
438
+ analyser_input = audioContext.createAnalyser();
439
+ const source = audioContext.createMediaStreamSource(stream);
440
+ source.connect(analyser_input);
441
+ analyser_input.fftSize = 64;
442
+ dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
443
+
444
+ function updateAudioLevel() {
445
+ analyser_input.getByteFrequencyData(dataArray_input);
446
+ const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
447
+ const audioLevel = average / 255;
448
+
449
+ const pulseCircle = document.querySelector('.pulse-circle');
450
+ if (pulseCircle) {
451
+ pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
452
+ }
453
+
454
+ animationId = requestAnimationFrame(updateAudioLevel);
455
+ }
456
+ updateAudioLevel();
457
+
458
+ // Add connection state change listener
459
+ peerConnection.addEventListener('connectionstatechange', () => {
460
+ console.log('connectionstatechange', peerConnection.connectionState);
461
+ if (peerConnection.connectionState === 'connected') {
462
+ clearTimeout(timeoutId);
463
+ const toast = document.getElementById('error-toast');
464
+ toast.style.display = 'none';
465
+ }
466
+ updateButtonState();
467
+ });
468
+
469
+ // Handle incoming audio
470
+ peerConnection.addEventListener('track', (evt) => {
471
+ if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
472
+ audioOutput.srcObject = evt.streams[0];
473
+ audioOutput.play();
474
+
475
+ // Set up audio visualization on the output stream
476
+ audioContext = new AudioContext();
477
+ analyser = audioContext.createAnalyser();
478
+ const source = audioContext.createMediaStreamSource(evt.streams[0]);
479
+ source.connect(analyser);
480
+ analyser.fftSize = 2048;
481
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
482
+ updateVisualization();
483
+ }
484
+ });
485
+
486
+ // Create data channel for messages
487
+ dataChannel = peerConnection.createDataChannel('text');
488
+ dataChannel.onmessage = (event) => {
489
+ const eventJson = JSON.parse(event.data);
490
+ if (eventJson.type === "error") {
491
+ showError(eventJson.message);
492
+ } else if (eventJson.type === "send_input") {
493
+ fetch('/input_hook', {
494
+ method: 'POST',
495
+ headers: {
496
+ 'Content-Type': 'application/json',
497
+ },
498
+ body: JSON.stringify({
499
+ webrtc_id: webrtc_id,
500
+ // api_key: apiKeyInput.value,
501
+ voice_name: voiceSelect.value
502
+ })
503
+ });
504
+ }
505
+ };
506
+
507
+ // Create and send offer
508
+ const offer = await peerConnection.createOffer();
509
+ await peerConnection.setLocalDescription(offer);
510
+
511
+ await new Promise((resolve) => {
512
+ if (peerConnection.iceGatheringState === "complete") {
513
+ resolve();
514
+ } else {
515
+ const checkState = () => {
516
+ if (peerConnection.iceGatheringState === "complete") {
517
+ peerConnection.removeEventListener("icegatheringstatechange", checkState);
518
+ resolve();
519
+ }
520
+ };
521
+ peerConnection.addEventListener("icegatheringstatechange", checkState);
522
+ }
523
+ });
524
+
525
+ const response = await fetch('/webrtc/offer', {
526
+ method: 'POST',
527
+ headers: { 'Content-Type': 'application/json' },
528
+ body: JSON.stringify({
529
+ sdp: peerConnection.localDescription.sdp,
530
+ type: peerConnection.localDescription.type,
531
+ webrtc_id: webrtc_id,
532
+ })
533
+ });
534
+
535
+ const serverResponse = await response.json();
536
+
537
+ if (serverResponse.status === 'failed') {
538
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
539
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
540
+ : serverResponse.meta.error);
541
+ stopWebRTC();
542
+ startButton.textContent = 'Start Conversation';
543
+ return;
544
+ }
545
+
546
+ await peerConnection.setRemoteDescription(serverResponse);
547
+ } catch (err) {
548
+ clearTimeout(timeoutId);
549
+ console.error('Error setting up WebRTC:', err);
550
+ showError('Failed to establish connection. Please try again.');
551
+ stopWebRTC();
552
+ startButton.textContent = 'Start Conversation';
553
+ }
554
+ }
555
+
556
+ function updateVisualization() {
557
+ if (!analyser) return;
558
+
559
+ analyser.getByteFrequencyData(dataArray);
560
+ const bars = document.querySelectorAll('.box');
561
+
562
+ for (let i = 0; i < bars.length; i++) {
563
+ const barHeight = (dataArray[i] / 255) * 2;
564
+ bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
565
+ }
566
+
567
+ animationId = requestAnimationFrame(updateVisualization);
568
+ }
569
+
570
+ function stopWebRTC() {
571
+ if (peerConnection) {
572
+ // Clean up all transceivers and tracks
573
+ if (peerConnection.getTransceivers) {
574
+ peerConnection.getTransceivers().forEach(transceiver => {
575
+ if (transceiver.stop) {
576
+ transceiver.stop();
577
+ }
578
+ });
579
+ }
580
+
581
+ if (peerConnection.getSenders) {
582
+ peerConnection.getSenders().forEach(sender => {
583
+ if (sender.track && sender.track.stop) sender.track.stop();
584
+ });
585
+ }
586
+
587
+ peerConnection.close();
588
+ peerConnection = null;
589
+ }
590
+
591
+ if (animationId) {
592
+ cancelAnimationFrame(animationId);
593
+ animationId = null;
594
+ }
595
+
596
+ if (audioContext) {
597
+ audioContext.close();
598
+ audioContext = null;
599
+ }
600
+
601
+ updateButtonState();
602
+ }
603
+
604
+ startButton.addEventListener('click', () => {
605
+ // First check for browser support
606
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
607
+ showError('Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.');
608
+ return;
609
+ }
610
+
611
+ if (!isRecording) {
612
+ setupWebRTC();
613
+ startButton.classList.add('recording');
614
+ } else {
615
+ stopWebRTC();
616
+ startButton.classList.remove('recording');
617
+ }
618
+ isRecording = !isRecording;
619
+ });
620
+ </script>
621
+ </body>
622
+
623
+ </html>
index2.html ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Gemini Voice Chat</title>
8
+ <style>
9
+ :root {
10
+ --color-accent: #6366f1;
11
+ --color-background: #0f172a;
12
+ --color-surface: #1e293b;
13
+ --color-text: #e2e8f0;
14
+ --boxSize: 8px;
15
+ --gutter: 4px;
16
+ }
17
+
18
+ body {
19
+ margin: 0;
20
+ padding: 0;
21
+ background-color: var(--color-background);
22
+ color: var(--color-text);
23
+ font-family: system-ui, -apple-system, sans-serif;
24
+ min-height: 100vh;
25
+ display: flex;
26
+ flex-direction: column;
27
+ align-items: center;
28
+ justify-content: center;
29
+ }
30
+
31
+ .container {
32
+ width: 90%;
33
+ max-width: 800px;
34
+ background-color: var(--color-surface);
35
+ padding: 2rem;
36
+ border-radius: 1rem;
37
+ box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
38
+ }
39
+
40
+ .wave-container {
41
+ position: relative;
42
+ display: flex;
43
+ min-height: 100px;
44
+ max-height: 128px;
45
+ justify-content: center;
46
+ align-items: center;
47
+ margin: 2rem 0;
48
+ }
49
+
50
+ .box-container {
51
+ display: flex;
52
+ justify-content: space-between;
53
+ height: 64px;
54
+ width: 100%;
55
+ }
56
+
57
+ .box {
58
+ height: 100%;
59
+ width: var(--boxSize);
60
+ background: var(--color-accent);
61
+ border-radius: 8px;
62
+ transition: transform 0.05s ease;
63
+ }
64
+
65
+ .controls {
66
+ display: grid;
67
+ gap: 1rem;
68
+ margin-bottom: 2rem;
69
+ }
70
+
71
+ .input-group {
72
+ display: flex;
73
+ flex-direction: column;
74
+ gap: 0.5rem;
75
+ }
76
+
77
+ label {
78
+ font-size: 0.875rem;
79
+ font-weight: 500;
80
+ }
81
+
82
+ input,
83
+ select {
84
+ padding: 0.75rem;
85
+ border-radius: 0.5rem;
86
+ border: 1px solid rgba(255, 255, 255, 0.1);
87
+ background-color: var(--color-background);
88
+ color: var(--color-text);
89
+ font-size: 1rem;
90
+ }
91
+
92
+ button {
93
+ padding: 1rem 2rem;
94
+ border-radius: 0.5rem;
95
+ border: none;
96
+ background-color: var(--color-accent);
97
+ color: white;
98
+ font-weight: 600;
99
+ cursor: pointer;
100
+ transition: all 0.2s ease;
101
+ }
102
+
103
+ button:hover {
104
+ opacity: 0.9;
105
+ transform: translateY(-1px);
106
+ }
107
+
108
+ .icon-with-spinner {
109
+ display: flex;
110
+ align-items: center;
111
+ justify-content: center;
112
+ gap: 12px;
113
+ min-width: 180px;
114
+ }
115
+
116
+ .spinner {
117
+ width: 20px;
118
+ height: 20px;
119
+ border: 2px solid white;
120
+ border-top-color: transparent;
121
+ border-radius: 50%;
122
+ animation: spin 1s linear infinite;
123
+ flex-shrink: 0;
124
+ }
125
+
126
+ @keyframes spin {
127
+ to {
128
+ transform: rotate(360deg);
129
+ }
130
+ }
131
+
132
+ .pulse-container {
133
+ display: flex;
134
+ align-items: center;
135
+ justify-content: center;
136
+ gap: 12px;
137
+ min-width: 180px;
138
+ }
139
+
140
+ .pulse-circle {
141
+ width: 20px;
142
+ height: 20px;
143
+ border-radius: 50%;
144
+ background-color: white;
145
+ opacity: 0.2;
146
+ flex-shrink: 0;
147
+ transform: translateX(-0%) scale(var(--audio-level, 1));
148
+ transition: transform 0.1s ease;
149
+ }
150
+
151
+ /* Add styles for toast notifications */
152
+ .toast {
153
+ position: fixed;
154
+ top: 20px;
155
+ left: 50%;
156
+ transform: translateX(-50%);
157
+ padding: 16px 24px;
158
+ border-radius: 4px;
159
+ font-size: 14px;
160
+ z-index: 1000;
161
+ display: none;
162
+ box-shadow: 0 2px 5px rgba(0, 0, 0, 0.2);
163
+ }
164
+
165
+ .toast.error {
166
+ background-color: #f44336;
167
+ color: white;
168
+ }
169
+
170
+ .toast.warning {
171
+ background-color: #ffd700;
172
+ color: black;
173
+ }
174
+ </style>
175
+ </head>
176
+
177
+
178
+ <body>
179
+ <!-- Add toast element after body opening tag -->
180
+ <div id="error-toast" class="toast"></div>
181
+ <div style="text-align: center">
182
+ <h1>Gemini Voice Chat</h1>
183
+ <p>Speak with Gemini using real-time audio streaming</p>
184
+ <p>
185
+ Get a Gemini API key
186
+ <a href="https://ai.google.dev/gemini-api/docs/api-key">here</a>
187
+ </p>
188
+ </div>
189
+ <div class="container">
190
+ <div class="controls">
191
+ <div class="input-group">
192
+ <label for="api-key">API Key</label>
193
+ <input type="password" id="api-key" placeholder="Enter your API key">
194
+ </div>
195
+ <div class="input-group">
196
+ <label for="voice">Voice</label>
197
+ <select id="voice">
198
+ <option value="Puck">Puck</option>
199
+ <option value="Charon">Charon</option>
200
+ <option value="Kore">Kore</option>
201
+ <option value="Fenrir">Fenrir</option>
202
+ <option value="Aoede">Aoede</option>
203
+ </select>
204
+ </div>
205
+ </div>
206
+
207
+ <div class="wave-container">
208
+ <div class="box-container">
209
+ <!-- Boxes will be dynamically added here -->
210
+ </div>
211
+ </div>
212
+
213
+ <button id="start-button">Start Recording</button>
214
+ </div>
215
+
216
+ <audio id="audio-output"></audio>
217
+
218
+ <script>
219
+ let peerConnection;
220
+ let audioContext;
221
+ let dataChannel;
222
+ let isRecording = false;
223
+ let webrtc_id;
224
+ let animationId;
225
+ let analyser;
226
+ let analyser_input;
227
+ let dataArray;
228
+ let dataArray_input;
229
+
230
+ const startButton = document.getElementById('start-button');
231
+ const apiKeyInput = document.getElementById('api-key');
232
+ const voiceSelect = document.getElementById('voice');
233
+ const audioOutput = document.getElementById('audio-output');
234
+ const boxContainer = document.querySelector('.box-container');
235
+
236
+ const numBars = 32;
237
+ for (let i = 0; i < numBars; i++) {
238
+ const box = document.createElement('div');
239
+ box.className = 'box';
240
+ boxContainer.appendChild(box);
241
+ }
242
+
243
+ function updateButtonState() {
244
+ if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
245
+ startButton.innerHTML = `
246
+ <div class="icon-with-spinner">
247
+ <div class="spinner"></div>
248
+ <span>Connecting...</span>
249
+ </div>
250
+ `;
251
+ } else if (peerConnection && peerConnection.connectionState === 'connected') {
252
+ startButton.innerHTML = `
253
+ <div class="pulse-container">
254
+ <div class="pulse-circle"></div>
255
+ <span>Stop Recording</span>
256
+ </div>
257
+ `;
258
+ } else {
259
+ startButton.innerHTML = 'Start Recording';
260
+ }
261
+ }
262
+
263
+ function showError(message) {
264
+ const toast = document.getElementById('error-toast');
265
+ toast.textContent = message;
266
+ toast.className = 'toast error';
267
+ toast.style.display = 'block';
268
+
269
+ // Hide toast after 5 seconds
270
+ setTimeout(() => {
271
+ toast.style.display = 'none';
272
+ }, 5000);
273
+ }
274
+
275
+ async function setupWebRTC() {
276
+ // Replace placeholder with actual configuration
277
+ const config = {
278
+ iceServers: [
279
+ { urls: 'stun:stun.l.google.com:19302' }
280
+ ]
281
+ };
282
+ peerConnection = new RTCPeerConnection(config);
283
+ webrtc_id = Math.random().toString(36).substring(7);
284
+
285
+ const timeoutId = setTimeout(() => {
286
+ const toast = document.getElementById('error-toast');
287
+ toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
288
+ toast.className = 'toast warning';
289
+ toast.style.display = 'block';
290
+
291
+ // Hide warning after 5 seconds
292
+ setTimeout(() => {
293
+ toast.style.display = 'none';
294
+ }, 5000);
295
+ }, 5000);
296
+
297
+ try {
298
+ // Check if mediaDevices is supported
299
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
300
+ const errorMsg = 'Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.';
301
+ showError(errorMsg);
302
+ throw new Error(errorMsg);
303
+ }
304
+
305
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
306
+ stream.getTracks().forEach(track => peerConnection.addTrack(track, stream));
307
+
308
+ // Update audio visualization setup
309
+ audioContext = new AudioContext();
310
+ analyser_input = audioContext.createAnalyser();
311
+ const source = audioContext.createMediaStreamSource(stream);
312
+ source.connect(analyser_input);
313
+ analyser_input.fftSize = 64;
314
+ dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
315
+
316
+ function updateAudioLevel() {
317
+ analyser_input.getByteFrequencyData(dataArray_input);
318
+ const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
319
+ const audioLevel = average / 255;
320
+
321
+ const pulseCircle = document.querySelector('.pulse-circle');
322
+ if (pulseCircle) {
323
+ console.log("audioLevel", audioLevel);
324
+ pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
325
+ }
326
+
327
+ animationId = requestAnimationFrame(updateAudioLevel);
328
+ }
329
+ updateAudioLevel();
330
+
331
+ // Add connection state change listener
332
+ peerConnection.addEventListener('connectionstatechange', () => {
333
+ console.log('connectionstatechange', peerConnection.connectionState);
334
+ if (peerConnection.connectionState === 'connected') {
335
+ clearTimeout(timeoutId);
336
+ const toast = document.getElementById('error-toast');
337
+ toast.style.display = 'none';
338
+ }
339
+ updateButtonState();
340
+ });
341
+
342
+ // Handle incoming audio
343
+ peerConnection.addEventListener('track', (evt) => {
344
+ if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
345
+ audioOutput.srcObject = evt.streams[0];
346
+ audioOutput.play();
347
+
348
+ // Set up audio visualization on the output stream
349
+ audioContext = new AudioContext();
350
+ analyser = audioContext.createAnalyser();
351
+ const source = audioContext.createMediaStreamSource(evt.streams[0]);
352
+ source.connect(analyser);
353
+ analyser.fftSize = 2048;
354
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
355
+ updateVisualization();
356
+ }
357
+ });
358
+
359
+ // Create data channel for messages
360
+ dataChannel = peerConnection.createDataChannel('text');
361
+ dataChannel.onmessage = (event) => {
362
+ const eventJson = JSON.parse(event.data);
363
+ if (eventJson.type === "error") {
364
+ showError(eventJson.message);
365
+ } else if (eventJson.type === "send_input") {
366
+ fetch('/input_hook', {
367
+ method: 'POST',
368
+ headers: {
369
+ 'Content-Type': 'application/json',
370
+ },
371
+ body: JSON.stringify({
372
+ webrtc_id: webrtc_id,
373
+ api_key: apiKeyInput.value,
374
+ voice_name: voiceSelect.value
375
+ })
376
+ });
377
+ }
378
+ };
379
+
380
+ // Create and send offer
381
+ const offer = await peerConnection.createOffer();
382
+ await peerConnection.setLocalDescription(offer);
383
+
384
+ await new Promise((resolve) => {
385
+ if (peerConnection.iceGatheringState === "complete") {
386
+ resolve();
387
+ } else {
388
+ const checkState = () => {
389
+ if (peerConnection.iceGatheringState === "complete") {
390
+ peerConnection.removeEventListener("icegatheringstatechange", checkState);
391
+ resolve();
392
+ }
393
+ };
394
+ peerConnection.addEventListener("icegatheringstatechange", checkState);
395
+ }
396
+ });
397
+
398
+ const response = await fetch('/webrtc/offer', {
399
+ method: 'POST',
400
+ headers: { 'Content-Type': 'application/json' },
401
+ body: JSON.stringify({
402
+ sdp: peerConnection.localDescription.sdp,
403
+ type: peerConnection.localDescription.type,
404
+ webrtc_id: webrtc_id,
405
+ })
406
+ });
407
+
408
+ const serverResponse = await response.json();
409
+
410
+ if (serverResponse.status === 'failed') {
411
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
412
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
413
+ : serverResponse.meta.error);
414
+ stopWebRTC();
415
+ startButton.textContent = 'Start Recording';
416
+ return;
417
+ }
418
+
419
+ await peerConnection.setRemoteDescription(serverResponse);
420
+ } catch (err) {
421
+ clearTimeout(timeoutId);
422
+ console.error('Error setting up WebRTC:', err);
423
+ showError('Failed to establish connection. Please try again.');
424
+ stopWebRTC();
425
+ startButton.textContent = 'Start Recording';
426
+ }
427
+ }
428
+
429
+ function updateVisualization() {
430
+ if (!analyser) return;
431
+
432
+ analyser.getByteFrequencyData(dataArray);
433
+ const bars = document.querySelectorAll('.box');
434
+
435
+ for (let i = 0; i < bars.length; i++) {
436
+ const barHeight = (dataArray[i] / 255) * 2;
437
+ bars[i].style.transform = `scaleY(${Math.max(0.1, barHeight)})`;
438
+ }
439
+
440
+ animationId = requestAnimationFrame(updateVisualization);
441
+ }
442
+
443
+ function stopWebRTC() {
444
+ if (peerConnection) {
445
+ // Clean up all transceivers and tracks
446
+ if (peerConnection.getTransceivers) {
447
+ peerConnection.getTransceivers().forEach(transceiver => {
448
+ if (transceiver.stop) {
449
+ transceiver.stop();
450
+ }
451
+ });
452
+ }
453
+
454
+ if (peerConnection.getSenders) {
455
+ peerConnection.getSenders().forEach(sender => {
456
+ if (sender.track && sender.track.stop) sender.track.stop();
457
+ });
458
+ }
459
+
460
+ peerConnection.close();
461
+ peerConnection = null;
462
+ }
463
+
464
+ if (animationId) {
465
+ cancelAnimationFrame(animationId);
466
+ animationId = null;
467
+ }
468
+
469
+ if (audioContext) {
470
+ audioContext.close();
471
+ audioContext = null;
472
+ }
473
+
474
+ updateButtonState();
475
+ }
476
+
477
+ startButton.addEventListener('click', () => {
478
+ // First check for browser support
479
+ if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) {
480
+ showError('Your browser does not support audio recording. Please use Chrome, Firefox, or Edge.');
481
+ return;
482
+ }
483
+
484
+ if (!isRecording) {
485
+ setupWebRTC();
486
+ startButton.classList.add('recording');
487
+ } else {
488
+ stopWebRTC();
489
+ startButton.classList.remove('recording');
490
+ }
491
+ isRecording = !isRecording;
492
+ });
493
+ </script>
494
+ </body>
495
+
496
+ </html>
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastrtc
2
+ python-dotenv
3
+ google-genai
4
+ twilio