atlury commited on
Commit
cf77c1f
·
verified ·
1 Parent(s): 645df8b

Create index.html

Browse files
Files changed (1) hide show
  1. index.html +390 -0
index.html ADDED
@@ -0,0 +1,390 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Voice Chat Bot with Echo Cancellation</title>
7
+ <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
+ <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
10
+ <style>
11
+ body {
12
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
13
+ margin: 0;
14
+ padding: 20px;
15
+ background-color: #1a1a1a;
16
+ color: #f0f0f0;
17
+ }
18
+ .container {
19
+ max-width: 800px;
20
+ margin: 0 auto;
21
+ }
22
+ h1 {
23
+ color: #ffd700;
24
+ text-align: center;
25
+ margin-bottom: 10px;
26
+ }
27
+ .subtitle {
28
+ text-align: center;
29
+ color: #ffd700;
30
+ margin-bottom: 20px;
31
+ }
32
+ #chat-container {
33
+ display: flex;
34
+ flex-direction: column;
35
+ height: 70vh;
36
+ }
37
+ #conversation {
38
+ flex-grow: 1;
39
+ border: 1px solid #444;
40
+ padding: 10px;
41
+ overflow-y: scroll;
42
+ background-color: #2a2a2a;
43
+ border-radius: 5px;
44
+ margin-bottom: 20px;
45
+ }
46
+ #controls {
47
+ display: flex;
48
+ justify-content: center;
49
+ margin-bottom: 20px;
50
+ }
51
+ button {
52
+ font-size: 18px;
53
+ padding: 10px 20px;
54
+ background-color: #ffd700;
55
+ color: #1a1a1a;
56
+ border: none;
57
+ border-radius: 5px;
58
+ cursor: pointer;
59
+ transition: background-color 0.3s;
60
+ }
61
+ button:hover {
62
+ background-color: #ffec8b;
63
+ }
64
+ button:disabled {
65
+ background-color: #666;
66
+ cursor: not-allowed;
67
+ }
68
+ #visualizer {
69
+ width: 100%;
70
+ height: 100px;
71
+ background-color: #2a2a2a;
72
+ border-radius: 5px;
73
+ overflow: hidden;
74
+ margin-bottom: 20px;
75
+ }
76
+ .bar {
77
+ width: 5px;
78
+ height: 100%;
79
+ background-color: #ffd700;
80
+ display: inline-block;
81
+ margin-right: 1px;
82
+ }
83
+ #loading {
84
+ position: fixed;
85
+ top: 0;
86
+ left: 0;
87
+ width: 100%;
88
+ height: 100%;
89
+ background-color: rgba(0, 0, 0, 0.8);
90
+ display: flex;
91
+ justify-content: center;
92
+ align-items: center;
93
+ z-index: 1000;
94
+ }
95
+ .spinner {
96
+ width: 50px;
97
+ height: 50px;
98
+ border: 5px solid #f3f3f3;
99
+ border-top: 5px solid #ffd700;
100
+ border-radius: 50%;
101
+ animation: spin 1s linear infinite;
102
+ }
103
+ @keyframes spin {
104
+ 0% { transform: rotate(0deg); }
105
+ 100% { transform: rotate(360deg); }
106
+ }
107
+ #configuration {
108
+ margin-bottom: 20px;
109
+ }
110
+ select {
111
+ width: 100%;
112
+ padding: 10px;
113
+ font-size: 16px;
114
+ background-color: #2a2a2a;
115
+ color: #f0f0f0;
116
+ border: 1px solid #444;
117
+ border-radius: 5px;
118
+ }
119
+ #model-info {
120
+ margin-top: 10px;
121
+ font-size: 14px;
122
+ color: #aaa;
123
+ }
124
+ #logs {
125
+ background-color: #2a2a2a;
126
+ border: 1px solid #444;
127
+ border-radius: 5px;
128
+ padding: 10px;
129
+ height: 200px;
130
+ overflow-y: scroll;
131
+ font-family: monospace;
132
+ font-size: 14px;
133
+ }
134
+ #clear-logs {
135
+ margin-top: 10px;
136
+ font-size: 14px;
137
+ padding: 5px 10px;
138
+ }
139
+ #localVideo {
140
+ display: none;
141
+ }
142
+ </style>
143
+ </head>
144
+ <body>
145
+ <div id="loading">
146
+ <div class="spinner"></div>
147
+ </div>
148
+ <div class="container">
149
+ <h1>Voice Chat Bot Demo</h1>
150
+ <p class="subtitle">For best results, use headphones.</p>
151
+ <div id="chat-container">
152
+ <div id="controls">
153
+ <button id="startButton" disabled>Begin Call</button>
154
+ </div>
155
+ <div id="configuration">
156
+ <select id="configSelect">
157
+ <option value="fastest">Fastest</option>
158
+ <option value="balanced">Balanced</option>
159
+ <option value="quality">Highest Quality</option>
160
+ </select>
161
+ <div id="model-info">
162
+ TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Placeholder
163
+ </div>
164
+ </div>
165
+ <div id="visualizer"></div>
166
+ <div id="conversation"></div>
167
+ </div>
168
+ <h2>Logs</h2>
169
+ <div id="logs"></div>
170
+ <button id="clear-logs">Clear</button>
171
+ </div>
172
+ <video id="localVideo" autoplay muted></video>
173
+
174
+ <script type="module">
175
+ import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
176
+
177
+ env.localModelPath = './models';
178
+
179
+ const conversationDiv = document.getElementById('conversation');
180
+ const startButton = document.getElementById('startButton');
181
+ const visualizer = document.getElementById('visualizer');
182
+ const loadingDiv = document.getElementById('loading');
183
+ const logsDiv = document.getElementById('logs');
184
+ const clearLogsButton = document.getElementById('clear-logs');
185
+ const localVideo = document.getElementById('localVideo');
186
+
187
+ let myvad;
188
+ let sttPipeline;
189
+ let ttsPipeline;
190
+ let audioContext;
191
+ let analyser;
192
+ let dataArray;
193
+ let bars;
194
+ let animationId;
195
+ let isListening = false;
196
+ let microphoneStream;
197
+ let isSpeaking = false;
198
+ let currentAudioSource = null;
199
+
200
+ function createVisualizer() {
201
+ const barCount = 64;
202
+ for (let i = 0; i < barCount; i++) {
203
+ const bar = document.createElement('div');
204
+ bar.className = 'bar';
205
+ visualizer.appendChild(bar);
206
+ }
207
+ bars = visualizer.getElementsByClassName('bar');
208
+ }
209
+
210
+ function updateVisualizer() {
211
+ analyser.getByteFrequencyData(dataArray);
212
+ for (let i = 0; i < bars.length; i++) {
213
+ const barHeight = dataArray[i] / 2;
214
+ bars[i].style.height = barHeight + 'px';
215
+ }
216
+ animationId = requestAnimationFrame(updateVisualizer);
217
+ }
218
+
219
+ async function initializePipelines() {
220
+ try {
221
+ sttPipeline = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');
222
+ ttsPipeline = await pipeline('text-to-speech', 'Xenova/mms-tts-eng', {
223
+ quantized: false,
224
+ });
225
+ addLog('System: Voice Chat Bot initialized. Click "Begin Call" to start.');
226
+ startButton.disabled = false;
227
+ loadingDiv.style.display = 'none';
228
+ } catch (error) {
229
+ console.error('Error initializing pipelines:', error);
230
+ addLog('System: Error initializing Voice Chat Bot. Please check the console for details.');
231
+ loadingDiv.style.display = 'none';
232
+ }
233
+ }
234
+
235
+ async function processSpeech(audio) {
236
+ try {
237
+ if (!sttPipeline || !ttsPipeline) {
238
+ throw new Error('Pipelines not initialized');
239
+ }
240
+
241
+ const transcription = await sttPipeline(audio);
242
+ addLog(`User: ${transcription.text}`);
243
+
244
+ const botResponse = `I heard you say: "${transcription.text}".`;
245
+ addLog(`Bot: ${botResponse}`);
246
+
247
+ isSpeaking = true;
248
+ const speechOutput = await ttsPipeline(botResponse);
249
+ await playAudio(speechOutput.audio);
250
+ isSpeaking = false;
251
+ } catch (error) {
252
+ console.error('Error processing speech:', error);
253
+ addLog('System: Error processing speech. Please try again.');
254
+ }
255
+ }
256
+
257
+ function addLog(message) {
258
+ const now = new Date();
259
+ const timestamp = now.toLocaleTimeString();
260
+ const logMessage = `[${timestamp}] ${message}`;
261
+ const messageElement = document.createElement('div');
262
+ messageElement.textContent = logMessage;
263
+ logsDiv.appendChild(messageElement);
264
+ logsDiv.scrollTop = logsDiv.scrollHeight;
265
+ }
266
+
267
+ function playAudio(audioArray) {
268
+ return new Promise((resolve) => {
269
+ const audioBuffer = audioContext.createBuffer(1, audioArray.length, 16000);
270
+ const channelData = audioBuffer.getChannelData(0);
271
+ channelData.set(audioArray);
272
+
273
+ const source = audioContext.createBufferSource();
274
+ currentAudioSource = source;
275
+ source.buffer = audioBuffer;
276
+ source.connect(analyser);
277
+ analyser.connect(audioContext.destination);
278
+ source.start();
279
+ source.onended = () => {
280
+ currentAudioSource = null;
281
+ resolve();
282
+ };
283
+ });
284
+ }
285
+
286
+ function stopCurrentAudio() {
287
+ if (currentAudioSource) {
288
+ currentAudioSource.stop();
289
+ currentAudioSource = null;
290
+ }
291
+ }
292
+
293
+ async function toggleListening() {
294
+ if (isListening) {
295
+ await stopListening();
296
+ } else {
297
+ await startListening();
298
+ }
299
+ }
300
+
301
+ async function startListening() {
302
+ try {
303
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
304
+ analyser = audioContext.createAnalyser();
305
+ analyser.fftSize = 128;
306
+ dataArray = new Uint8Array(analyser.frequencyBinCount);
307
+
308
+ localVideo.muted = true;
309
+ localVideo.volume = 0;
310
+
311
+ // Request both audio and video streams
312
+ microphoneStream = await navigator.mediaDevices.getUserMedia({
313
+ audio: true,
314
+ video: { width: 1, height: 1 } // Minimal video for echo cancellation
315
+ });
316
+
317
+ localVideo.srcObject = microphoneStream;
318
+ await localVideo.play();
319
+
320
+ console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
321
+ console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
322
+
323
+ const source = audioContext.createMediaStreamSource(microphoneStream);
324
+ source.connect(analyser);
325
+
326
+ myvad = await vad.MicVAD.new({
327
+ onSpeechStart: () => {
328
+ addLog('--- vad: speech start');
329
+ updateVisualizer();
330
+ if (isSpeaking) {
331
+ addLog('User interrupted. Stopping bot speech.');
332
+ stopCurrentAudio();
333
+ isSpeaking = false;
334
+ }
335
+ },
336
+ onSpeechEnd: (audio) => {
337
+ addLog('--- vad: speech end');
338
+ cancelAnimationFrame(animationId);
339
+ processSpeech(audio);
340
+ }
341
+ });
342
+
343
+ await myvad.start();
344
+ startButton.textContent = 'End Call';
345
+ isListening = true;
346
+ addLog('System: Listening...');
347
+ } catch (error) {
348
+ console.error('Error starting VAD:', error);
349
+ addLog('System: Error starting voice detection. Please check your microphone and try again.');
350
+ }
351
+ }
352
+
353
+ async function stopListening() {
354
+ if (myvad) {
355
+ try {
356
+ await myvad.destroy();
357
+ } catch (error) {
358
+ console.error('Error stopping VAD:', error);
359
+ }
360
+ myvad = null;
361
+ }
362
+ if (microphoneStream) {
363
+ microphoneStream.getTracks().forEach(track => track.stop());
364
+ microphoneStream = null;
365
+ }
366
+ if (audioContext) {
367
+ await audioContext.close();
368
+ audioContext = null;
369
+ }
370
+ if (localVideo) {
371
+ localVideo.srcObject = null;
372
+ }
373
+ stopCurrentAudio();
374
+ startButton.textContent = 'Begin Call';
375
+ isListening = false;
376
+ addLog('System: Stopped listening.');
377
+ cancelAnimationFrame(animationId);
378
+ addLog('System: Microphone closed');
379
+ }
380
+
381
+ startButton.addEventListener('click', toggleListening);
382
+ clearLogsButton.addEventListener('click', () => {
383
+ logsDiv.innerHTML = '';
384
+ });
385
+
386
+ createVisualizer();
387
+ initializePipelines();
388
+ </script>
389
+ </body>
390
+ </html>