digitalhuman / digital.human.audio.js
atlury's picture
Upload 13 files
076b6ec verified
raw
history blame
23.1 kB
// digital.human.audio.js
import * as webllm from "https://esm.run/@mlc-ai/web-llm";
// Ensure the script runs after the DOM is fully loaded
document.addEventListener("DOMContentLoaded", () => {
// Initialize the Digital Human Voice section
const voiceMessages = [
{
content: "You are Aged Guru, an intelligent assistant skilled in digital human voice interactions. Provide insightful and comprehensive answers using human-like voice responses.",
role: "system"
}
];
const voiceAvailableModels = webllm.prebuiltAppConfig.model_list.map(
(m) => m.model_id
);
let voiceSelectedModel = "gemma-2-2b-it-q4f16_1-MLC-1k"; // Default model
function voiceUpdateEngineInitProgressCallback(report) {
console.log("Digital Human Voice Initialize", report.progress);
// Instead of updating a status span, log the progress
logMessage(`Model Initialization Progress: ${report.text}`, "system");
}
const voiceEngine = new webllm.MLCEngine();
voiceEngine.setInitProgressCallback(voiceUpdateEngineInitProgressCallback);
let voiceIsGenerating = false; // Flag to prevent multiple generations
async function voiceInitializeWebLLMEngine() {
logMessage("Model initialization started.", "system");
document.getElementById("voice-loading-spinner").classList.remove("hidden"); // Show spinner
voiceSelectedModel = document.getElementById("voice-model-selection").value;
const config = {
temperature: 0.7, // Adjusted for more precise answers
top_p: 0.9
};
try {
await voiceEngine.reload(voiceSelectedModel, config);
document.getElementById("voice-selected-model").textContent = voiceSelectedModel;
document.getElementById("voice-start_button").disabled = false;
document.getElementById("voice-text-input").disabled = false; // Enable text input after initialization
document.getElementById("voice-submit-button").disabled = false; // Enable submit button after initialization
document.getElementById("voice-speech-controls").disabled = false; // Enable speech controls after initialization
document.getElementById("voice-configuration").classList.remove("hidden");
logMessage("Model initialized successfully.", "system");
} catch (error) {
console.error("Error initializing the model:", error);
alert("Failed to initialize the model. Please try again.");
logMessage("Failed to initialize the model.", "error");
} finally {
document.getElementById("voice-loading-spinner").classList.add("hidden"); // Hide spinner
}
}
async function voiceStreamingGenerating(messages, onUpdate, onFinish, onError) {
if (voiceIsGenerating) {
console.warn("Voice Generation already in progress.");
return;
}
voiceIsGenerating = true;
try {
let curMessage = "";
const completion = await voiceEngine.chat.completions.create({
stream: true,
messages
});
for await (const chunk of completion) {
const curDelta = chunk.choices[0].delta.content;
if (curDelta) {
curMessage += curDelta;
}
onUpdate(curMessage);
}
const finalMessage = await voiceEngine.getMessage();
console.log(`Voice Generated final message: ${finalMessage}`); // Debugging
onFinish(finalMessage);
logMessage("Response generated successfully.", "system");
} catch (err) {
console.error(err);
onError(err);
logMessage("An error occurred during response generation.", "error");
} finally {
voiceIsGenerating = false;
}
}
// Flag to track the last input method
let voiceLastInputWasVoice = false;
function voiceAppendMessage(message) {
console.log(`Voice Appending message: ${message.content} (Role: ${message.role})`); // Debugging
const voiceChatBox = document.getElementById("voice-chat-box");
// Check if the assistant's message is already appended to avoid duplication
if (message.role === "assistant") {
const existingMessages = voiceChatBox.querySelectorAll(".message");
const lastMessage = existingMessages[existingMessages.length - 1];
if (lastMessage && lastMessage.textContent === message.content) {
console.warn("Duplicate assistant message detected in Voice section, skipping append.");
// Only trigger TTS for assistant messages if the last input was via voice
if (message.role === "assistant" && message.content !== "typing..." && voiceLastInputWasVoice) {
voiceSpeak(message.content);
}
return; // Exit to avoid appending the same message twice
}
}
const container = document.createElement("div");
container.classList.add("message-container");
const newMessage = document.createElement("div");
newMessage.classList.add("message");
newMessage.textContent = message.content;
if (message.role === "user") {
container.classList.add("user");
} else {
container.classList.add("assistant");
}
container.appendChild(newMessage);
voiceChatBox.appendChild(container);
voiceChatBox.scrollTop = voiceChatBox.scrollHeight;
// Only trigger TTS for assistant messages if the last input was via voice
if (message.role === "assistant" && message.content !== "typing..." && voiceLastInputWasVoice) {
voiceSpeak(message.content);
}
}
function voiceUpdateLastMessage(content) {
const messageDoms = document.getElementById("voice-chat-box").querySelectorAll(".message");
const lastMessageDom = messageDoms[messageDoms.length - 1];
lastMessageDom.textContent = content;
}
function voiceOnSpeechRecognized(transcript) {
const input = transcript.trim();
const message = {
content: input,
role: "user"
};
if (input.length === 0) {
return;
}
voiceLastInputWasVoice = true; // Set flag as voice input
console.log(`Voice input received: ${input}`); // Debugging
document.getElementById("voice-start_button").disabled = true;
document.getElementById("voice-submit-button").disabled = true; // Disable submit button during processing
voiceMessages.push(message);
voiceAppendMessage(message);
logMessage(`User (Voice): ${input}`, "user");
// Append "typing..." placeholder
const aiPlaceholder = {
content: "typing...",
role: "assistant"
};
voiceAppendMessage(aiPlaceholder);
logMessage("VoiceBot is typing...", "system");
const onFinishGenerating = (finalMessage) => {
console.log(`Voice Finishing generation with message: ${finalMessage}`); // Debugging
// Remove the "typing..." placeholder
const voiceChatBox = document.getElementById("voice-chat-box");
const lastMessageContainer = voiceChatBox.lastElementChild;
if (lastMessageContainer && lastMessageContainer.querySelector(".message").textContent === "typing...") {
voiceChatBox.removeChild(lastMessageContainer);
}
// Append the final message
const aiMessage = {
content: finalMessage,
role: "assistant"
};
voiceAppendMessage(aiMessage);
logMessage(`VoiceBot: ${finalMessage}`, "assistant");
document.getElementById("voice-start_button").disabled = false;
document.getElementById("voice-submit-button").disabled = false; // Re-enable submit button after processing
voiceEngine.runtimeStatsText().then((statsText) => {
document.getElementById("voice-chat-stats").classList.remove("hidden");
document.getElementById("voice-chat-stats").textContent = statsText;
logMessage(`Runtime Stats: ${statsText}`, "system");
});
};
voiceStreamingGenerating(
voiceMessages,
voiceUpdateLastMessage,
onFinishGenerating,
(err) => {
console.error(err);
alert("An error occurred while generating the response. Please try again.");
logMessage("Error during response generation.", "error");
document.getElementById("voice-start_button").disabled = false;
document.getElementById("voice-submit-button").disabled = false;
}
);
}
// Speech Recognition Code for Voice
let voiceRecognizing = false;
let voiceIgnore_onend;
let voiceFinal_transcript = '';
let voiceRecognition;
function voiceStartButton(event) {
if (voiceRecognizing) {
voiceRecognition.stop();
return;
}
voiceFinal_transcript = '';
voiceRecognition.lang = 'en-US';
voiceRecognition.start();
voiceIgnore_onend = false;
document.getElementById("voice-start_button").classList.add("mic-animate");
logMessage("Voice input started.", "system");
}
if (!('webkitSpeechRecognition' in window)) {
alert("Web Speech API is not supported by this browser.");
logMessage("Web Speech API is not supported by this browser.", "error");
} else {
voiceRecognition = new webkitSpeechRecognition();
voiceRecognition.continuous = false; // Non-continuous recognition
voiceRecognition.interimResults = false; // Get only final results
voiceRecognition.onstart = function() {
voiceRecognizing = true;
logMessage("Speech recognition started.", "system");
};
voiceRecognition.onerror = function(event) {
if (event.error == 'no-speech') {
document.getElementById("voice-start_button").classList.remove("mic-animate");
alert('No speech was detected in Voice section.');
logMessage("No speech detected.", "error");
voiceIgnore_onend = true;
}
if (event.error == 'audio-capture') {
document.getElementById("voice-start_button").classList.remove("mic-animate");
alert('No microphone was found in Voice section.');
logMessage("No microphone found.", "error");
voiceIgnore_onend = true;
}
if (event.error == 'not-allowed') {
alert('Permission to use microphone was denied in Voice section.');
logMessage("Microphone permission denied.", "error");
voiceIgnore_onend = true;
}
};
voiceRecognition.onend = function() {
voiceRecognizing = false;
document.getElementById("voice-start_button").classList.remove("mic-animate");
logMessage("Speech recognition ended.", "system");
if (voiceIgnore_onend) {
return;
}
if (!voiceFinal_transcript) {
logMessage("No transcript captured.", "error");
return;
}
// Process the final transcript
voiceOnSpeechRecognized(voiceFinal_transcript);
};
voiceRecognition.onresult = function(event) {
for (let i = event.resultIndex; i < event.results.length; ++i) {
if (event.results[i].isFinal) {
voiceFinal_transcript += event.results[i][0].transcript;
}
}
voiceFinal_transcript = voiceFinal_transcript.trim();
logMessage(`Recognized Speech: ${voiceFinal_transcript}`, "user");
};
}
document.getElementById("voice-start_button").addEventListener("click", function(event) {
voiceStartButton(event);
});
// Initialize Model Selection
voiceAvailableModels.forEach((modelId) => {
const option = document.createElement("option");
option.value = modelId;
option.textContent = modelId;
document.getElementById("voice-model-selection").appendChild(option);
});
document.getElementById("voice-model-selection").value = voiceSelectedModel;
// **Enable the Download Model button after models are loaded**
document.getElementById("voice-download").disabled = false;
document.getElementById("voice-download").addEventListener("click", function () {
voiceInitializeWebLLMEngine().then(() => {
document.getElementById("voice-start_button").disabled = false;
// Enable speech controls after model initialization
document.getElementById("voice-speech-rate").disabled = false;
document.getElementById("voice-speech-pitch").disabled = false;
logMessage("Model download initiated.", "system");
});
});
document.getElementById("voice-clear-logs").addEventListener("click", function () {
document.getElementById("voice-logs").innerHTML = '';
logMessage("Logs cleared.", "system");
});
// ===== TTS Integration =====
// Initialize Speech Synthesis
let voiceSpeech = new SpeechSynthesisUtterance();
voiceSpeech.lang = "en";
let voiceVoices = [];
// Use addEventListener instead of directly assigning to onvoiceschanged
window.speechSynthesis.addEventListener("voiceschanged", () => {
voiceVoices = window.speechSynthesis.getVoices();
voicePopulateVoices();
});
function voicePopulateVoices() {
const voiceSelect = document.getElementById("voice-tools");
voiceSelect.innerHTML = ''; // Clear existing options
voiceVoices.forEach((voice, i) => {
const option = new Option(voice.name, i);
voiceSelect.appendChild(option);
});
if (voiceVoices.length > 0) {
const savedVoice = localStorage.getItem("voiceSelectedVoice");
if (savedVoice !== null && voiceVoices[savedVoice]) {
voiceSpeech.voice = voiceVoices[savedVoice];
voiceSelect.value = savedVoice;
} else {
voiceSpeech.voice = voiceVoices[0];
}
}
}
// Voice Selection Event Listener
document.getElementById("voice-tools").addEventListener("change", () => {
const selectedVoiceIndex = document.getElementById("voice-tools").value;
voiceSpeech.voice = voiceVoices[selectedVoiceIndex];
// Save to localStorage
localStorage.setItem("voiceSelectedVoice", selectedVoiceIndex);
logMessage(`Voice changed to: ${voiceVoices[selectedVoiceIndex].name}`, "system");
});
// Function to Speak Text with Voice Selection and Handling Large Texts
function voiceSpeak(text) {
if (!window.speechSynthesis) {
console.warn("Speech Synthesis not supported in this browser for Voice section.");
logMessage("Speech Synthesis not supported in this browser.", "error");
return;
}
// Show spinner and enable Stop button
document.getElementById("voice-loading-spinner").classList.remove("hidden");
document.getElementById("voice-stop_button").disabled = false;
logMessage("TTS started.", "system");
// Retrieve the currently selected voice
const selectedVoice = voiceSpeech.voice;
// Split the text into sentences to manage large texts
const sentences = text.match(/[^\.!\?]+[\.!\?]+/g) || [text];
let utterancesCount = sentences.length;
sentences.forEach(sentence => {
const utterance = new SpeechSynthesisUtterance(sentence.trim());
// Assign the selected voice to the utterance
if (selectedVoice) {
utterance.voice = selectedVoice;
}
// Assign rate and pitch from sliders
const rate = parseFloat(document.getElementById("voice-speech-rate").value);
const pitch = parseFloat(document.getElementById("voice-speech-pitch").value);
utterance.rate = rate; // Adjust the speaking rate (0.1 to 10)
utterance.pitch = pitch; // Adjust the pitch (0 to 2)
// Add event listeners for debugging or additional functionality
utterance.onstart = () => {
console.log("Speech started:", sentence);
logMessage(`TTS started: ${sentence.trim()}`, "system");
};
utterance.onend = () => {
console.log("Speech ended:", sentence);
logMessage(`TTS ended: ${sentence.trim()}`, "system");
utterancesCount--;
if (utterancesCount === 0) {
// Hide spinner and disable Stop button when all utterances have been spoken
document.getElementById("voice-loading-spinner").classList.add("hidden");
document.getElementById("voice-stop_button").disabled = true;
logMessage("All TTS messages have been spoken.", "system");
}
};
utterance.onerror = (e) => {
console.error("Speech Synthesis Error:", e);
alert("An error occurred during speech synthesis. Please try again.");
logMessage("Speech synthesis encountered an error.", "error");
utterancesCount = 0;
document.getElementById("voice-loading-spinner").classList.add("hidden");
document.getElementById("voice-stop_button").disabled = true;
};
window.speechSynthesis.speak(utterance);
});
}
// ===== New: Stop Speech Functionality =====
/**
* Stops any ongoing speech synthesis.
*/
function voiceStopSpeech() {
if (window.speechSynthesis.speaking) {
window.speechSynthesis.cancel();
document.getElementById("voice-loading-spinner").classList.add("hidden");
document.getElementById("voice-stop_button").disabled = true;
logMessage("Speech synthesis stopped by user.", "system");
}
}
// Event Listener for Stop Button
document.getElementById("voice-stop_button").addEventListener("click", function () {
voiceStopSpeech();
});
// ===== New: Text Input Handling =====
// Function to Handle Text Submission
function voiceHandleTextSubmit() {
const textInput = document.getElementById("voice-text-input");
const input = textInput.value.trim();
if (input.length === 0) {
return;
}
textInput.value = ''; // Clear the input field
const message = {
content: input,
role: "user" // Ensure this is correctly set
};
console.log(`Voice Text input received: ${input}`); // Debugging
logMessage(`User: ${input}`, "user");
voiceLastInputWasVoice = false; // Set flag as text input
document.getElementById("voice-submit-button").disabled = true; // Disable to prevent multiple submissions
voiceMessages.push(message);
voiceAppendMessage(message);
// Append "typing..." placeholder
const aiPlaceholder = {
content: "typing...",
role: "assistant"
};
voiceAppendMessage(aiPlaceholder);
logMessage("VoiceBot is typing...", "system");
const onFinishGenerating = (finalMessage) => {
console.log(`Voice Finishing generation with message: ${finalMessage}`); // Debugging
// Remove the "typing..." placeholder
const voiceChatBox = document.getElementById("voice-chat-box");
const lastMessageContainer = voiceChatBox.lastElementChild;
if (lastMessageContainer && lastMessageContainer.querySelector(".message").textContent === "typing...") {
voiceChatBox.removeChild(lastMessageContainer);
}
// Append the final message
const aiMessage = {
content: finalMessage,
role: "assistant"
};
voiceAppendMessage(aiMessage);
logMessage(`VoiceBot: ${finalMessage}`, "assistant");
// Trigger TTS for assistant messages if required
if (voiceLastInputWasVoice) {
voiceSpeak(finalMessage);
}
document.getElementById("voice-submit-button").disabled = false; // Re-enable submit button after processing
voiceEngine.runtimeStatsText().then((statsText) => {
document.getElementById("voice-chat-stats").classList.remove("hidden");
document.getElementById("voice-chat-stats").textContent = statsText;
logMessage(`Runtime Stats: ${statsText}`, "system");
});
};
voiceStreamingGenerating(
voiceMessages,
voiceUpdateLastMessage,
onFinishGenerating,
(err) => {
console.error(err);
alert("An error occurred while generating the response. Please try again.");
logMessage("Error during response generation.", "error");
document.getElementById("voice-submit-button").disabled = false;
}
);
}
// Event Listener for Submit Button
document.getElementById("voice-submit-button").addEventListener("click", function () {
voiceHandleTextSubmit();
});
// Event Listener for Enter Key in Text Input
document.getElementById("voice-text-input").addEventListener("keypress", function (e) {
if (e.key === 'Enter') {
voiceHandleTextSubmit();
}
});
// ===== Persisting User Preferences =====
// Load Preferences on Initialization
window.addEventListener("load", () => {
const savedVoice = localStorage.getItem("voiceSelectedVoice");
if (savedVoice !== null && voiceVoices[savedVoice]) {
document.getElementById("voice-tools").value = savedVoice;
voiceSpeech.voice = voiceVoices[savedVoice];
logMessage(`Loaded saved voice: ${voiceVoices[savedVoice].name}`, "system");
}
const savedRate = localStorage.getItem("voiceSpeechRate");
if (savedRate !== null) {
document.getElementById("voice-speech-rate").value = savedRate;
voiceSpeech.rate = parseFloat(savedRate);
logMessage(`Loaded saved speech rate: ${savedRate}`, "system");
}
const savedPitch = localStorage.getItem("voiceSpeechPitch");
if (savedPitch !== null) {
document.getElementById("voice-speech-pitch").value = savedPitch;
voiceSpeech.pitch = parseFloat(savedPitch);
logMessage(`Loaded saved speech pitch: ${savedPitch}`, "system");
}
});
// Save Speech Rate
document.getElementById("voice-speech-rate").addEventListener("input", (e) => {
const rate = e.target.value;
voiceSpeech.rate = parseFloat(rate);
localStorage.setItem("voiceSpeechRate", rate);
logMessage(`Speech rate changed to: ${rate}`, "system");
});
// Save Speech Pitch
document.getElementById("voice-speech-pitch").addEventListener("input", (e) => {
const pitch = e.target.value;
voiceSpeech.pitch = parseFloat(pitch);
localStorage.setItem("voiceSpeechPitch", pitch);
logMessage(`Speech pitch changed to: ${pitch}`, "system");
});
// ===== Logging Function =====
/**
* Logs messages to the #voice-logs container.
* @param {string} message - The message to log.
* @param {string} type - The type of message: 'user', 'assistant', 'system', 'error'.
*/
function logMessage(message, type) {
const voiceLogs = document.getElementById("voice-logs");
const logEntry = document.createElement("div");
logEntry.classList.add("log-entry");
logEntry.textContent = `[${type.toUpperCase()}] ${message}`;
// Style log entries based on type
switch(type) {
case 'user':
logEntry.style.color = "#00796B";
break;
case 'assistant':
logEntry.style.color = "#004D40";
break;
case 'system':
logEntry.style.color = "#555555";
break;
case 'error':
logEntry.style.color = "#E53935";
break;
default:
logEntry.style.color = "#000000";
}
voiceLogs.appendChild(logEntry);
voiceLogs.scrollTop = voiceLogs.scrollHeight;
}
// ===== TTS Integration Continued =====
// Optional: Global Listener to Detect When All Speech Has Finished
window.speechSynthesis.addEventListener('end', () => {
console.log("All voice speech has been spoken.");
logMessage("All TTS messages have been spoken.", "system");
// Ensure Stop button is disabled after speech ends
document.getElementById("voice-stop_button").disabled = true;
});
});