atlury commited on
Commit
536e020
·
verified ·
1 Parent(s): eacac69

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +76 -149
index.html CHANGED
@@ -7,139 +7,23 @@
7
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
  <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
10
-
11
 
12
  <style>
13
- body {
14
- font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
15
- margin: 0;
16
- padding: 20px;
17
- background-color: #1a1a1a;
18
- color: #f0f0f0;
19
- }
20
- .container {
21
- max-width: 800px;
22
- margin: 0 auto;
23
- }
24
- h1 {
25
- color: #ffd700;
26
- text-align: center;
27
- margin-bottom: 10px;
28
- }
29
- .subtitle {
30
- text-align: center;
31
- color: #ffd700;
32
- margin-bottom: 20px;
33
- }
34
- #chat-container {
35
- display: flex;
36
- flex-direction: column;
37
- height: 70vh;
38
- }
39
- #conversation {
40
- flex-grow: 1;
41
- border: 1px solid #444;
42
- padding: 10px;
43
- overflow-y: scroll;
44
- background-color: #2a2a2a;
45
- border-radius: 5px;
46
- margin-bottom: 20px;
47
- }
48
- #controls {
49
- display: flex;
50
- justify-content: center;
51
- margin-bottom: 20px;
52
- }
53
- button {
54
- font-size: 18px;
55
- padding: 10px 20px;
56
- background-color: #ffd700;
57
- color: #1a1a1a;
58
- border: none;
59
- border-radius: 5px;
60
- cursor: pointer;
61
- transition: background-color 0.3s;
62
- }
63
- button:hover {
64
- background-color: #ffec8b;
65
- }
66
- button:disabled {
67
- background-color: #666;
68
- cursor: not-allowed;
69
- }
70
- #visualizer {
71
  width: 100%;
72
- height: 100px;
73
- background-color: #2a2a2a;
74
  border-radius: 5px;
 
75
  overflow: hidden;
76
- margin-bottom: 20px;
77
  }
78
- .bar {
79
- width: 5px;
80
- height: 100%;
81
  background-color: #ffd700;
82
- display: inline-block;
83
- margin-right: 1px;
84
- }
85
- #loading {
86
- position: fixed;
87
- top: 0;
88
- left: 0;
89
- width: 100%;
90
- height: 100%;
91
- background-color: rgba(0, 0, 0, 0.8);
92
- display: flex;
93
- justify-content: center;
94
- align-items: center;
95
- z-index: 1000;
96
- }
97
- .spinner {
98
- width: 50px;
99
- height: 50px;
100
- border: 5px solid #f3f3f3;
101
- border-top: 5px solid #ffd700;
102
- border-radius: 50%;
103
- animation: spin 1s linear infinite;
104
- }
105
- @keyframes spin {
106
- 0% { transform: rotate(0deg); }
107
- 100% { transform: rotate(360deg); }
108
- }
109
- #configuration {
110
- margin-bottom: 20px;
111
- }
112
- select {
113
- width: 100%;
114
- padding: 10px;
115
- font-size: 16px;
116
- background-color: #2a2a2a;
117
- color: #f0f0f0;
118
- border: 1px solid #444;
119
- border-radius: 5px;
120
- }
121
- #model-info {
122
- margin-top: 10px;
123
- font-size: 14px;
124
- color: #aaa;
125
- }
126
- #logs {
127
- background-color: #2a2a2a;
128
- border: 1px solid #444;
129
- border-radius: 5px;
130
- padding: 10px;
131
- height: 200px;
132
- overflow-y: scroll;
133
- font-family: monospace;
134
- font-size: 14px;
135
- }
136
- #clear-logs {
137
- margin-top: 10px;
138
- font-size: 14px;
139
- padding: 5px 10px;
140
- }
141
- #localVideo, #remoteVideo {
142
- display: none;
143
  }
144
  </style>
145
  </head>
@@ -163,6 +47,9 @@
163
  <div id="model-info">
164
  TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
165
  </div>
 
 
 
166
  </div>
167
  <div id="visualizer"></div>
168
  <div id="conversation"></div>
@@ -178,8 +65,6 @@
178
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
179
 
180
  env.localModelPath = './models';
181
-
182
- // Configure environment before initializing pipelines
183
  env.backends = ['wasm'];
184
  env.wasm = env.wasm || {};
185
  env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
@@ -194,6 +79,7 @@
194
  const clearLogsButton = document.getElementById('clear-logs');
195
  const localVideo = document.getElementById('localVideo');
196
  const remoteVideo = document.getElementById('remoteVideo');
 
197
 
198
  let myvad;
199
  let sttPipeline;
@@ -233,31 +119,64 @@
233
 
234
  async function initializePipelines() {
235
  try {
236
- [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
237
- pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
238
- pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
239
- pipeline('text-generation', 'Xenova/tiny-llm')
240
- ]);
241
-
242
- addLog('System: Digital Human Voice Chat initialized with TinyLLM. Click "Begin Call" to start.');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  startButton.disabled = false;
244
  loadingDiv.style.display = 'none';
245
  } catch (error) {
246
  console.error('Error initializing pipelines:', error);
247
- addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
248
  loadingDiv.style.display = 'none';
249
  }
250
  }
251
 
 
 
 
 
 
252
  async function processSpeech(audio) {
253
  try {
254
  if (!sttPipeline || !ttsPipeline || !llmPipeline) {
255
  throw new Error('Pipelines not initialized');
256
  }
257
 
 
258
  const transcription = await sttPipeline(audio);
259
  addLog(`User: ${transcription.text}`);
260
 
 
261
  const llmResponse = await llmPipeline(transcription.text, {
262
  max_new_tokens: 50,
263
  temperature: 0.7
@@ -265,13 +184,15 @@
265
  const botResponse = llmResponse[0].generated_text;
266
  addLog(`Bot: ${botResponse}`);
267
 
 
268
  isSpeaking = true;
269
  const speechOutput = await ttsPipeline(botResponse);
270
  await playAudio(speechOutput.audio);
271
  isSpeaking = false;
 
272
  } catch (error) {
273
  console.error('Error processing speech:', error);
274
- addLog('System: Error processing speech. Please try again.');
275
  }
276
  }
277
 
@@ -283,6 +204,7 @@
283
  messageElement.textContent = logMessage;
284
  logsDiv.appendChild(messageElement);
285
  logsDiv.scrollTop = logsDiv.scrollHeight;
 
286
  }
287
 
288
  function playAudio(audioArray) {
@@ -321,6 +243,7 @@
321
 
322
  async function startListening() {
323
  try {
 
324
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
325
  analyser = audioContext.createAnalyser();
326
  analyser.fftSize = 128;
@@ -328,12 +251,10 @@
328
 
329
  localVideo.volume = 0;
330
  localVideo.muted = true;
331
- document.getElementById('localVideo').volume = 0;
332
-
333
  remoteVideo.volume = 0;
334
  remoteVideo.muted = true;
335
- document.getElementById('remoteVideo').volume = 0;
336
 
 
337
  microphoneStream = await navigator.mediaDevices.getUserMedia({
338
  audio: true,
339
  video: { width: 1, height: 1 }
@@ -342,9 +263,7 @@
342
  localVideo.srcObject = microphoneStream;
343
  await localVideo.play();
344
 
345
- console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
346
- console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
347
-
348
  const offerOptions = {
349
  offerToReceiveAudio: true,
350
  offerToReceiveVideo: false,
@@ -370,20 +289,21 @@
370
  const source = audioContext.createMediaStreamSource(loopbackStream);
371
  source.connect(analyser);
372
 
 
373
  myvad = await vad.MicVAD.new({
374
  noiseSuppression: true,
375
  aggressiveness: 3,
376
  onSpeechStart: () => {
377
- addLog('--- Voice activity: speech start');
378
  updateVisualizer();
379
  if (isSpeaking) {
380
- addLog('User interrupted. Stopping bot speech.');
381
  stopCurrentAudio();
382
  isSpeaking = false;
383
  }
384
  },
385
  onSpeechEnd: (audio) => {
386
- addLog('--- Voice activity: speech end');
387
  cancelAnimationFrame(animationId);
388
  processSpeech(audio);
389
  }
@@ -392,29 +312,34 @@
392
  await myvad.start();
393
  startButton.textContent = 'End Call';
394
  isListening = true;
395
- addLog('System: Listening...');
396
  } catch (error) {
397
  console.error('Error starting voice activity:', error);
398
- addLog('System: Error starting voice detection. Please check your microphone and try again.');
399
  }
400
  }
401
 
402
  async function stopListening() {
 
403
  if (myvad) {
404
  try {
405
  await myvad.destroy();
 
406
  } catch (error) {
407
  console.error('Error stopping voice activity:', error);
 
408
  }
409
  myvad = null;
410
  }
411
  if (microphoneStream) {
412
  microphoneStream.getTracks().forEach(track => track.stop());
413
  microphoneStream = null;
 
414
  }
415
  if (audioContext) {
416
  await audioContext.close();
417
  audioContext = null;
 
418
  }
419
  if (localVideo) {
420
  localVideo.srcObject = null;
@@ -425,10 +350,12 @@
425
  if (rtcConnection) {
426
  rtcConnection.close();
427
  rtcConnection = null;
 
428
  }
429
  if (rtcLoopbackConnection) {
430
  rtcLoopbackConnection.close();
431
  rtcLoopbackConnection = null;
 
432
  }
433
  loopbackStream = new MediaStream();
434
  stopCurrentAudio();
@@ -436,12 +363,12 @@
436
  isListening = false;
437
  addLog('System: Stopped listening.');
438
  cancelAnimationFrame(animationId);
439
- addLog('System: Microphone closed');
440
  }
441
 
442
  startButton.addEventListener('click', toggleListening);
443
  clearLogsButton.addEventListener('click', () => {
444
  logsDiv.innerHTML = '';
 
445
  });
446
 
447
  createVisualizer();
 
7
  <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
8
  <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
9
  <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
 
10
 
11
  <style>
12
+ /* ... (previous styles remain unchanged) ... */
13
+ #model-progress {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  width: 100%;
15
+ background-color: #444;
 
16
  border-radius: 5px;
17
+ margin-top: 10px;
18
  overflow: hidden;
 
19
  }
20
+ #model-progress-bar {
21
+ width: 0;
22
+ height: 20px;
23
  background-color: #ffd700;
24
+ text-align: center;
25
+ line-height: 20px;
26
+ color: #1a1a1a;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
28
  </style>
29
  </head>
 
47
  <div id="model-info">
48
  TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
49
  </div>
50
+ <div id="model-progress">
51
+ <div id="model-progress-bar"></div>
52
+ </div>
53
  </div>
54
  <div id="visualizer"></div>
55
  <div id="conversation"></div>
 
65
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
66
 
67
  env.localModelPath = './models';
 
 
68
  env.backends = ['wasm'];
69
  env.wasm = env.wasm || {};
70
  env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
 
79
  const clearLogsButton = document.getElementById('clear-logs');
80
  const localVideo = document.getElementById('localVideo');
81
  const remoteVideo = document.getElementById('remoteVideo');
82
+ const modelProgressBar = document.getElementById('model-progress-bar');
83
 
84
  let myvad;
85
  let sttPipeline;
 
119
 
120
  async function initializePipelines() {
121
  try {
122
+ addLog('System: Initializing pipelines...');
123
+ const tasks = [
124
+ { name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' },
125
+ { name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' },
126
+ { name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' }
127
+ ];
128
+
129
+ for (const [index, task] of tasks.entries()) {
130
+ addLog(`System: Loading ${task.name} model...`);
131
+ updateProgressBar((index / tasks.length) * 100);
132
+ const pipelineInstance = await pipeline(task.task, task.model, {
133
+ quantized: true,
134
+ progress_callback: (progress) => {
135
+ updateProgressBar(((index + progress) / tasks.length) * 100);
136
+ }
137
+ });
138
+ addLog(`System: ${task.name} model loaded successfully.`);
139
+
140
+ switch (task.name) {
141
+ case 'STT':
142
+ sttPipeline = pipelineInstance;
143
+ break;
144
+ case 'TTS':
145
+ ttsPipeline = pipelineInstance;
146
+ break;
147
+ case 'LLM':
148
+ llmPipeline = pipelineInstance;
149
+ break;
150
+ }
151
+ }
152
+
153
+ updateProgressBar(100);
154
+ addLog('System: All pipelines initialized successfully.');
155
  startButton.disabled = false;
156
  loadingDiv.style.display = 'none';
157
  } catch (error) {
158
  console.error('Error initializing pipelines:', error);
159
+ addLog(`System: Error initializing pipelines: ${error.message}`);
160
  loadingDiv.style.display = 'none';
161
  }
162
  }
163
 
164
+ function updateProgressBar(percentage) {
165
+ modelProgressBar.style.width = `${percentage}%`;
166
+ modelProgressBar.textContent = `${Math.round(percentage)}%`;
167
+ }
168
+
169
  async function processSpeech(audio) {
170
  try {
171
  if (!sttPipeline || !ttsPipeline || !llmPipeline) {
172
  throw new Error('Pipelines not initialized');
173
  }
174
 
175
+ addLog('System: Processing speech...');
176
  const transcription = await sttPipeline(audio);
177
  addLog(`User: ${transcription.text}`);
178
 
179
+ addLog('System: Generating LLM response...');
180
  const llmResponse = await llmPipeline(transcription.text, {
181
  max_new_tokens: 50,
182
  temperature: 0.7
 
184
  const botResponse = llmResponse[0].generated_text;
185
  addLog(`Bot: ${botResponse}`);
186
 
187
+ addLog('System: Generating speech from response...');
188
  isSpeaking = true;
189
  const speechOutput = await ttsPipeline(botResponse);
190
  await playAudio(speechOutput.audio);
191
  isSpeaking = false;
192
+ addLog('System: Speech playback complete.');
193
  } catch (error) {
194
  console.error('Error processing speech:', error);
195
+ addLog(`System: Error processing speech: ${error.message}`);
196
  }
197
  }
198
 
 
204
  messageElement.textContent = logMessage;
205
  logsDiv.appendChild(messageElement);
206
  logsDiv.scrollTop = logsDiv.scrollHeight;
207
+ console.log(logMessage);
208
  }
209
 
210
  function playAudio(audioArray) {
 
243
 
244
  async function startListening() {
245
  try {
246
+ addLog('System: Initializing audio context and stream...');
247
  audioContext = new (window.AudioContext || window.webkitAudioContext)();
248
  analyser = audioContext.createAnalyser();
249
  analyser.fftSize = 128;
 
251
 
252
  localVideo.volume = 0;
253
  localVideo.muted = true;
 
 
254
  remoteVideo.volume = 0;
255
  remoteVideo.muted = true;
 
256
 
257
+ addLog('System: Requesting media stream...');
258
  microphoneStream = await navigator.mediaDevices.getUserMedia({
259
  audio: true,
260
  video: { width: 1, height: 1 }
 
263
  localVideo.srcObject = microphoneStream;
264
  await localVideo.play();
265
 
266
+ addLog('System: Setting up RTCPeerConnection for echo cancellation...');
 
 
267
  const offerOptions = {
268
  offerToReceiveAudio: true,
269
  offerToReceiveVideo: false,
 
289
  const source = audioContext.createMediaStreamSource(loopbackStream);
290
  source.connect(analyser);
291
 
292
+ addLog('System: Initializing voice activity detection...');
293
  myvad = await vad.MicVAD.new({
294
  noiseSuppression: true,
295
  aggressiveness: 3,
296
  onSpeechStart: () => {
297
+ addLog('System: Voice activity detected - speech start');
298
  updateVisualizer();
299
  if (isSpeaking) {
300
+ addLog('System: User interrupted. Stopping bot speech.');
301
  stopCurrentAudio();
302
  isSpeaking = false;
303
  }
304
  },
305
  onSpeechEnd: (audio) => {
306
+ addLog('System: Voice activity detected - speech end');
307
  cancelAnimationFrame(animationId);
308
  processSpeech(audio);
309
  }
 
312
  await myvad.start();
313
  startButton.textContent = 'End Call';
314
  isListening = true;
315
+ addLog('System: Listening started successfully.');
316
  } catch (error) {
317
  console.error('Error starting voice activity:', error);
318
+ addLog(`System: Error starting voice detection: ${error.message}`);
319
  }
320
  }
321
 
322
  async function stopListening() {
323
+ addLog('System: Stopping listening...');
324
  if (myvad) {
325
  try {
326
  await myvad.destroy();
327
+ addLog('System: Voice activity detection stopped.');
328
  } catch (error) {
329
  console.error('Error stopping voice activity:', error);
330
+ addLog(`System: Error stopping voice activity: ${error.message}`);
331
  }
332
  myvad = null;
333
  }
334
  if (microphoneStream) {
335
  microphoneStream.getTracks().forEach(track => track.stop());
336
  microphoneStream = null;
337
+ addLog('System: Microphone stream stopped.');
338
  }
339
  if (audioContext) {
340
  await audioContext.close();
341
  audioContext = null;
342
+ addLog('System: Audio context closed.');
343
  }
344
  if (localVideo) {
345
  localVideo.srcObject = null;
 
350
  if (rtcConnection) {
351
  rtcConnection.close();
352
  rtcConnection = null;
353
+ addLog('System: RTCPeerConnection closed.');
354
  }
355
  if (rtcLoopbackConnection) {
356
  rtcLoopbackConnection.close();
357
  rtcLoopbackConnection = null;
358
+ addLog('System: RTCPeerConnection loopback closed.');
359
  }
360
  loopbackStream = new MediaStream();
361
  stopCurrentAudio();
 
363
  isListening = false;
364
  addLog('System: Stopped listening.');
365
  cancelAnimationFrame(animationId);
 
366
  }
367
 
368
  startButton.addEventListener('click', toggleListening);
369
  clearLogsButton.addEventListener('click', () => {
370
  logsDiv.innerHTML = '';
371
+ addLog('System: Logs cleared.');
372
  });
373
 
374
  createVisualizer();