Spaces:

atlury
/

digitalhuman

Running

App Files Files Community

atlury commited on Sep 16, 2024

Commit

536e020

verified ·

1 Parent(s): eacac69

Update index.html

Browse files

Files changed (1) hide show

index.html +76 -149

index.html CHANGED Viewed

@@ -7,139 +7,23 @@
     <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
     <style>
-        body {
-            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
-            margin: 0;
-            padding: 20px;
-            background-color: #1a1a1a;
-            color: #f0f0f0;
-        }
-        .container {
-            max-width: 800px;
-            margin: 0 auto;
-        }
-        h1 {
-            color: #ffd700;
-            text-align: center;
-            margin-bottom: 10px;
-        }
-        .subtitle {
-            text-align: center;
-            color: #ffd700;
-            margin-bottom: 20px;
-        }
-        #chat-container {
-            display: flex;
-            flex-direction: column;
-            height: 70vh;
-        }
-        #conversation {
-            flex-grow: 1;
-            border: 1px solid #444;
-            padding: 10px;
-            overflow-y: scroll;
-            background-color: #2a2a2a;
-            border-radius: 5px;
-            margin-bottom: 20px;
-        }
-        #controls {
-            display: flex;
-            justify-content: center;
-            margin-bottom: 20px;
-        }
-        button {
-            font-size: 18px;
-            padding: 10px 20px;
-            background-color: #ffd700;
-            color: #1a1a1a;
-            border: none;
-            border-radius: 5px;
-            cursor: pointer;
-            transition: background-color 0.3s;
-        }
-        button:hover {
-            background-color: #ffec8b;
-        }
-        button:disabled {
-            background-color: #666;
-            cursor: not-allowed;
-        }
-        #visualizer {
             width: 100%;
-            height: 100px;
-            background-color: #2a2a2a;
             border-radius: 5px;
             overflow: hidden;
-            margin-bottom: 20px;
         }
-        .bar {
-            width: 5px;
-            height: 100%;
             background-color: #ffd700;
-            display: inline-block;
-            margin-right: 1px;
-        }
-        #loading {
-            position: fixed;
-            top: 0;
-            left: 0;
-            width: 100%;
-            height: 100%;
-            background-color: rgba(0, 0, 0, 0.8);
-            display: flex;
-            justify-content: center;
-            align-items: center;
-            z-index: 1000;
-        }
-        .spinner {
-            width: 50px;
-            height: 50px;
-            border: 5px solid #f3f3f3;
-            border-top: 5px solid #ffd700;
-            border-radius: 50%;
-            animation: spin 1s linear infinite;
-        }
-        @keyframes spin {
-            0% { transform: rotate(0deg); }
-            100% { transform: rotate(360deg); }
-        }
-        #configuration {
-            margin-bottom: 20px;
-        }
-        select {
-            width: 100%;
-            padding: 10px;
-            font-size: 16px;
-            background-color: #2a2a2a;
-            color: #f0f0f0;
-            border: 1px solid #444;
-            border-radius: 5px;
-        }
-        #model-info {
-            margin-top: 10px;
-            font-size: 14px;
-            color: #aaa;
-        }
-        #logs {
-            background-color: #2a2a2a;
-            border: 1px solid #444;
-            border-radius: 5px;
-            padding: 10px;
-            height: 200px;
-            overflow-y: scroll;
-            font-family: monospace;
-            font-size: 14px;
-        }
-        #clear-logs {
-            margin-top: 10px;
-            font-size: 14px;
-            padding: 5px 10px;
-        }
-        #localVideo, #remoteVideo {
-            display: none;
         }
     </style>
 </head>
@@ -163,6 +47,9 @@
                 <div id="model-info">
                     TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
                 </div>
             </div>
             <div id="visualizer"></div>
             <div id="conversation"></div>
@@ -178,8 +65,6 @@
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
         env.localModelPath = './models';
-        // Configure environment before initializing pipelines
         env.backends = ['wasm'];
         env.wasm = env.wasm || {};
         env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
@@ -194,6 +79,7 @@
         const clearLogsButton = document.getElementById('clear-logs');
         const localVideo = document.getElementById('localVideo');
         const remoteVideo = document.getElementById('remoteVideo');
         let myvad;
         let sttPipeline;
@@ -233,31 +119,64 @@
         async function initializePipelines() {
             try {
-                [sttPipeline, ttsPipeline, llmPipeline] = await Promise.all([
-                    pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en', { quantized: true }),
-                    pipeline('text-to-speech', 'Xenova/mms-tts-eng', { quantized: true }),
-                    pipeline('text-generation', 'Xenova/tiny-llm')
-                ]);
-                addLog('System: Digital Human Voice Chat initialized with TinyLLM. Click "Begin Call" to start.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
                 console.error('Error initializing pipelines:', error);
-                addLog('System: Error initializing Digital Human Voice Chat. Please check the console for details.');
                 loadingDiv.style.display = 'none';
             }
         }
         async function processSpeech(audio) {
             try {
                 if (!sttPipeline || !ttsPipeline || !llmPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
                 const llmResponse = await llmPipeline(transcription.text, {
                     max_new_tokens: 50,
                     temperature: 0.7
@@ -265,13 +184,15 @@
                 const botResponse = llmResponse[0].generated_text;
                 addLog(`Bot: ${botResponse}`);
                 isSpeaking = true;
                 const speechOutput = await ttsPipeline(botResponse);
                 await playAudio(speechOutput.audio);
                 isSpeaking = false;
             } catch (error) {
                 console.error('Error processing speech:', error);
-                addLog('System: Error processing speech. Please try again.');
             }
         }
@@ -283,6 +204,7 @@
             messageElement.textContent = logMessage;
             logsDiv.appendChild(messageElement);
             logsDiv.scrollTop = logsDiv.scrollHeight;
         }
         function playAudio(audioArray) {
@@ -321,6 +243,7 @@
         async function startListening() {
             try {
                 audioContext = new (window.AudioContext || window.webkitAudioContext)();
                 analyser = audioContext.createAnalyser();
                 analyser.fftSize = 128;
@@ -328,12 +251,10 @@
                 localVideo.volume = 0;
                 localVideo.muted = true;
-                document.getElementById('localVideo').volume = 0;
                 remoteVideo.volume = 0;
                 remoteVideo.muted = true;
-                document.getElementById('remoteVideo').volume = 0;
                 microphoneStream = await navigator.mediaDevices.getUserMedia({
                     audio: true,
                     video: { width: 1, height: 1 }
@@ -342,9 +263,7 @@
                 localVideo.srcObject = microphoneStream;
                 await localVideo.play();
-                console.log('Active constraints:', microphoneStream.getAudioTracks()[0].getConstraints());
-                console.log('Microphone stream settings:', microphoneStream.getAudioTracks()[0].getSettings());
                 const offerOptions = {
                     offerToReceiveAudio: true,
                     offerToReceiveVideo: false,
@@ -370,20 +289,21 @@
                 const source = audioContext.createMediaStreamSource(loopbackStream);
                 source.connect(analyser);
                 myvad = await vad.MicVAD.new({
                     noiseSuppression: true,
                     aggressiveness: 3,
                     onSpeechStart: () => {
-                        addLog('--- Voice activity: speech start');
                         updateVisualizer();
                         if (isSpeaking) {
-                            addLog('User interrupted. Stopping bot speech.');
                             stopCurrentAudio();
                             isSpeaking = false;
                         }
                     },
                     onSpeechEnd: (audio) => {
-                        addLog('--- Voice activity: speech end');
                         cancelAnimationFrame(animationId);
                         processSpeech(audio);
                     }
@@ -392,29 +312,34 @@
                 await myvad.start();
                 startButton.textContent = 'End Call';
                 isListening = true;
-                addLog('System: Listening...');
             } catch (error) {
                 console.error('Error starting voice activity:', error);
-                addLog('System: Error starting voice detection. Please check your microphone and try again.');
             }
         }
         async function stopListening() {
             if (myvad) {
                 try {
                     await myvad.destroy();
                 } catch (error) {
                     console.error('Error stopping voice activity:', error);
                 }
                 myvad = null;
             }
             if (microphoneStream) {
                 microphoneStream.getTracks().forEach(track => track.stop());
                 microphoneStream = null;
             }
             if (audioContext) {
                 await audioContext.close();
                 audioContext = null;
             }
             if (localVideo) {
                 localVideo.srcObject = null;
@@ -425,10 +350,12 @@
             if (rtcConnection) {
                 rtcConnection.close();
                 rtcConnection = null;
             }
             if (rtcLoopbackConnection) {
                 rtcLoopbackConnection.close();
                 rtcLoopbackConnection = null;
             }
             loopbackStream = new MediaStream();
             stopCurrentAudio();
@@ -436,12 +363,12 @@
             isListening = false;
             addLog('System: Stopped listening.');
             cancelAnimationFrame(animationId);
-            addLog('System: Microphone closed');
         }
         startButton.addEventListener('click', toggleListening);
         clearLogsButton.addEventListener('click', () => {
             logsDiv.innerHTML = '';
         });
         createVisualizer();

     <script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@ricky0123/[email protected]/dist/bundle.min.js"></script>
     <script src="https://cdn.jsdelivr.net/npm/@xenova/[email protected]"></script>
     <style>
+        /* ... (previous styles remain unchanged) ... */
+        #model-progress {
             width: 100%;
+            background-color: #444;
             border-radius: 5px;
+            margin-top: 10px;
             overflow: hidden;
         }
+        #model-progress-bar {
+            width: 0;
+            height: 20px;
             background-color: #ffd700;
+            text-align: center;
+            line-height: 20px;
+            color: #1a1a1a;
         }
     </style>
 </head>
                 <div id="model-info">
                     TTS: Xenova/mms-tts-eng / STT: Xenova/whisper-tiny.en / LLM: Xenova/tiny-llm
                 </div>
+                <div id="model-progress">
+                    <div id="model-progress-bar"></div>
+                </div>
             </div>
             <div id="visualizer"></div>
             <div id="conversation"></div>
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';
         env.localModelPath = './models';
         env.backends = ['wasm'];
         env.wasm = env.wasm || {};
         env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]/';
         const clearLogsButton = document.getElementById('clear-logs');
         const localVideo = document.getElementById('localVideo');
         const remoteVideo = document.getElementById('remoteVideo');
+        const modelProgressBar = document.getElementById('model-progress-bar');
         let myvad;
         let sttPipeline;
         async function initializePipelines() {
             try {
+                addLog('System: Initializing pipelines...');
+                const tasks = [
+                    { name: 'STT', task: 'automatic-speech-recognition', model: 'Xenova/whisper-tiny.en' },
+                    { name: 'TTS', task: 'text-to-speech', model: 'Xenova/mms-tts-eng' },
+                    { name: 'LLM', task: 'text-generation', model: 'Xenova/tiny-llm' }
+                ];
+                for (const [index, task] of tasks.entries()) {
+                    addLog(`System: Loading ${task.name} model...`);
+                    updateProgressBar((index / tasks.length) * 100);
+                    const pipelineInstance = await pipeline(task.task, task.model, {
+                        quantized: true,
+                        progress_callback: (progress) => {
+                            updateProgressBar(((index + progress) / tasks.length) * 100);
+                        }
+                    });
+                    addLog(`System: ${task.name} model loaded successfully.`);
+                    switch (task.name) {
+                        case 'STT':
+                            sttPipeline = pipelineInstance;
+                            break;
+                        case 'TTS':
+                            ttsPipeline = pipelineInstance;
+                            break;
+                        case 'LLM':
+                            llmPipeline = pipelineInstance;
+                            break;
+                    }
+                }
+                updateProgressBar(100);
+                addLog('System: All pipelines initialized successfully.');
                 startButton.disabled = false;
                 loadingDiv.style.display = 'none';
             } catch (error) {
                 console.error('Error initializing pipelines:', error);
+                addLog(`System: Error initializing pipelines: ${error.message}`);
                 loadingDiv.style.display = 'none';
             }
         }
+        function updateProgressBar(percentage) {
+            modelProgressBar.style.width = `${percentage}%`;
+            modelProgressBar.textContent = `${Math.round(percentage)}%`;
+        }
         async function processSpeech(audio) {
             try {
                 if (!sttPipeline || !ttsPipeline || !llmPipeline) {
                     throw new Error('Pipelines not initialized');
                 }
+                addLog('System: Processing speech...');
                 const transcription = await sttPipeline(audio);
                 addLog(`User: ${transcription.text}`);
+                addLog('System: Generating LLM response...');
                 const llmResponse = await llmPipeline(transcription.text, {
                     max_new_tokens: 50,
                     temperature: 0.7
                 const botResponse = llmResponse[0].generated_text;
                 addLog(`Bot: ${botResponse}`);
+                addLog('System: Generating speech from response...');
                 isSpeaking = true;
                 const speechOutput = await ttsPipeline(botResponse);
                 await playAudio(speechOutput.audio);
                 isSpeaking = false;
+                addLog('System: Speech playback complete.');
             } catch (error) {
                 console.error('Error processing speech:', error);
+                addLog(`System: Error processing speech: ${error.message}`);
             }
         }
             messageElement.textContent = logMessage;
             logsDiv.appendChild(messageElement);
             logsDiv.scrollTop = logsDiv.scrollHeight;
+            console.log(logMessage);
         }
         function playAudio(audioArray) {
         async function startListening() {
             try {
+                addLog('System: Initializing audio context and stream...');
                 audioContext = new (window.AudioContext || window.webkitAudioContext)();
                 analyser = audioContext.createAnalyser();
                 analyser.fftSize = 128;
                 localVideo.volume = 0;
                 localVideo.muted = true;
                 remoteVideo.volume = 0;
                 remoteVideo.muted = true;
+                addLog('System: Requesting media stream...');
                 microphoneStream = await navigator.mediaDevices.getUserMedia({
                     audio: true,
                     video: { width: 1, height: 1 }
                 localVideo.srcObject = microphoneStream;
                 await localVideo.play();
+                addLog('System: Setting up RTCPeerConnection for echo cancellation...');
                 const offerOptions = {
                     offerToReceiveAudio: true,
                     offerToReceiveVideo: false,
                 const source = audioContext.createMediaStreamSource(loopbackStream);
                 source.connect(analyser);
+                addLog('System: Initializing voice activity detection...');
                 myvad = await vad.MicVAD.new({
                     noiseSuppression: true,
                     aggressiveness: 3,
                     onSpeechStart: () => {
+                        addLog('System: Voice activity detected - speech start');
                         updateVisualizer();
                         if (isSpeaking) {
+                            addLog('System: User interrupted. Stopping bot speech.');
                             stopCurrentAudio();
                             isSpeaking = false;
                         }
                     },
                     onSpeechEnd: (audio) => {
+                        addLog('System: Voice activity detected - speech end');
                         cancelAnimationFrame(animationId);
                         processSpeech(audio);
                     }
                 await myvad.start();
                 startButton.textContent = 'End Call';
                 isListening = true;
+                addLog('System: Listening started successfully.');
             } catch (error) {
                 console.error('Error starting voice activity:', error);
+                addLog(`System: Error starting voice detection: ${error.message}`);
             }
         }
         async function stopListening() {
+            addLog('System: Stopping listening...');
             if (myvad) {
                 try {
                     await myvad.destroy();
+                    addLog('System: Voice activity detection stopped.');
                 } catch (error) {
                     console.error('Error stopping voice activity:', error);
+                    addLog(`System: Error stopping voice activity: ${error.message}`);
                 }
                 myvad = null;
             }
             if (microphoneStream) {
                 microphoneStream.getTracks().forEach(track => track.stop());
                 microphoneStream = null;
+                addLog('System: Microphone stream stopped.');
             }
             if (audioContext) {
                 await audioContext.close();
                 audioContext = null;
+                addLog('System: Audio context closed.');
             }
             if (localVideo) {
                 localVideo.srcObject = null;
             if (rtcConnection) {
                 rtcConnection.close();
                 rtcConnection = null;
+                addLog('System: RTCPeerConnection closed.');
             }
             if (rtcLoopbackConnection) {
                 rtcLoopbackConnection.close();
                 rtcLoopbackConnection = null;
+                addLog('System: RTCPeerConnection loopback closed.');
             }
             loopbackStream = new MediaStream();
             stopCurrentAudio();
             isListening = false;
             addLog('System: Stopped listening.');
             cancelAnimationFrame(animationId);
         }
         startButton.addEventListener('click', toggleListening);
         clearLogsButton.addEventListener('click', () => {
             logsDiv.innerHTML = '';
+            addLog('System: Logs cleared.');
         });
         createVisualizer();