Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
<title>Audio Transcription and Similarity Checker</title> | |
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css"> | |
<style> | |
body { | |
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; | |
background-color: #f4f4f4; | |
padding: 20px; | |
} | |
.container { | |
max-width: 800px; | |
margin: 0 auto; | |
background: #fff; | |
padding: 30px; | |
border-radius: 10px; | |
box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); | |
} | |
h1 { | |
text-align: center; | |
margin-bottom: 30px; | |
} | |
.audio-section { | |
text-align: center; | |
margin-bottom: 20px; | |
} | |
input[type="file"] { | |
display: none; | |
} | |
.upload-btn { | |
background-color: #007bff; | |
color: white; | |
padding: 10px 20px; | |
cursor: pointer; | |
border-radius: 5px; | |
margin: 10px; | |
border: none; | |
display: inline-block; | |
} | |
.button { | |
background-color: #28a745; | |
color: white; | |
padding: 10px 20px; | |
cursor: pointer; | |
border: none; | |
border-radius: 5px; | |
margin-top: 20px; | |
display: block; | |
width: 100%; | |
font-size: 16px; | |
} | |
.button:hover { | |
background-color: #218838; | |
} | |
#progress-bar { | |
width: 0; | |
height: 20px; | |
background-color: #4caf50; | |
text-align: center; | |
line-height: 20px; | |
color: white; | |
border-radius: 5px; | |
display: none; | |
} | |
#progress-container { | |
width: 100%; | |
background-color: #ddd; | |
border-radius: 5px; | |
margin-top: 20px; | |
} | |
.result { | |
margin-top: 20px; | |
} | |
.recorder { | |
cursor: pointer; | |
background-color: #dc3545; | |
color: white; | |
padding: 10px 20px; | |
border-radius: 50%; | |
font-size: 24px; | |
display: inline-block; | |
margin-top: 20px; | |
} | |
.stop-button { | |
background-color: #ff5733; | |
color: white; | |
padding: 10px 20px; | |
cursor: pointer; | |
border-radius: 5px; | |
display: none; | |
margin: 10px; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<h1>Audio Transcription and Similarity Checker</h1> | |
<div id="original-audio" class="audio-section"> | |
<h2>Upload or Record Original Audio</h2> | |
<label class="upload-btn" for="originalFile">Choose Audio File</label> | |
<input type="file" id="originalFile" accept="audio/*"> | |
<div id="originalRecorder" class="recorder"> | |
<i class="fas fa-microphone"></i> Record | |
</div> | |
<button id="stopOriginalRecording" class="stop-button">Stop Recording</button> | |
</div> | |
<div id="user-audio" class="audio-section"> | |
<h2>Upload or Record User Audio</h2> | |
<label class="upload-btn" for="userFile">Choose Audio File</label> | |
<input type="file" id="userFile" accept="audio/*"> | |
<div id="userRecorder" class="recorder"> | |
<i class="fas fa-microphone"></i> Record | |
</div> | |
<button id="stopUserRecording" class="stop-button">Stop Recording</button> | |
</div> | |
<button id="transcribeButton" class="button">Perform Transcription and Testing</button> | |
<div id="progress-container"> | |
<div id="progress-bar">0%</div> | |
</div> | |
<div id="result" class="result"></div> | |
</div> | |
<script src="https://cdn.jsdelivr.net/npm/@huggingface/transformers"></script> | |
<script> | |
const MODEL_ID = "facebook/wav2vec2-large-960h"; // Sample model, change if necessary | |
let processor, model; | |
let originalAudioBlob = null; | |
let userAudioBlob = null; | |
// Load model and processor | |
async function loadModel() { | |
processor = await transformers.AutoProcessor.from_pretrained(MODEL_ID); | |
model = await transformers.Wav2Vec2ForCTC.from_pretrained(MODEL_ID); | |
} | |
// Simulate progress bar loading | |
function updateProgressBar(percentComplete) { | |
const progressBar = document.getElementById("progress-bar"); | |
progressBar.style.width = percentComplete + "%"; | |
progressBar.innerHTML = percentComplete + "%"; | |
if (percentComplete === 100) { | |
setTimeout(() => { | |
progressBar.style.display = "none"; | |
progressBar.style.width = "0%"; | |
}, 500); | |
} else { | |
progressBar.style.display = "block"; | |
} | |
} | |
async function transcribe(audioBlob) { | |
const arrayBuffer = await audioBlob.arrayBuffer(); | |
const audioData = new Float32Array(arrayBuffer); | |
const inputValues = processor(audioData, {return_tensors: "pt", padding: true}).input_values; | |
const logits = await model(inputValues).logits; | |
const predicted_ids = logits.argmax(-1); | |
const transcription = processor.decode(predicted_ids, {skip_special_tokens: true}); | |
return transcription; | |
} | |
document.getElementById("transcribeButton").addEventListener("click", async () => { | |
if (!originalAudioBlob && !document.getElementById("originalFile").files[0]) { | |
alert("Please upload or record original audio."); | |
return; | |
} | |
if (!userAudioBlob && !document.getElementById("userFile").files[0]) { | |
alert("Please upload or record user audio."); | |
return; | |
} | |
updateProgressBar(0); | |
let percentComplete = 0; | |
const progressInterval = setInterval(() => { | |
percentComplete += 10; | |
updateProgressBar(percentComplete); | |
if (percentComplete >= 100) clearInterval(progressInterval); | |
}, 200); | |
if (!originalAudioBlob) { | |
originalAudioBlob = document.getElementById("originalFile").files[0]; | |
} | |
if (!userAudioBlob) { | |
userAudioBlob = document.getElementById("userFile").files[0]; | |
} | |
const transcriptionOriginal = await transcribe(originalAudioBlob); | |
const transcriptionUser = await transcribe(userAudioBlob); | |
clearInterval(progressInterval); | |
updateProgressBar(100); | |
const levenshteinDistance = (a, b) => { | |
let dp = Array.from({length: a.length + 1}, () => Array(b.length + 1).fill(0)); | |
for (let i = 0; i <= a.length; i++) dp[i][0] = i; | |
for (let j = 0; j <= b.length; j++) dp[0][j] = j; | |
for (let i = 1; i <= a.length; i++) { | |
for (let j = 1; j <= b.length; j++) { | |
dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] : Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1; | |
} | |
} | |
return dp[a.length][b.length]; | |
}; | |
const similarityScore = 1 - levenshteinDistance(transcriptionOriginal, transcriptionUser) / Math.max(transcriptionOriginal.length, transcriptionUser.length); | |
document.getElementById("result").innerHTML = ` | |
<h2>Transcription Results</h2> | |
<p><strong>Original Transcription:</strong> ${transcriptionOriginal}</p> | |
<p><strong>User Transcription:</strong> ${transcriptionUser}</p> | |
<p><strong>Levenshtein Similarity Score:</strong> ${similarityScore.toFixed(2)}</p> | |
`; | |
}); | |
// Initialize model | |
loadModel(); | |
// Handle voice recording (using browser APIs) | |
const recordAudio = () => { | |
return new Promise(async resolve => { | |
const stream = await navigator.mediaDevices.getUserMedia({ audio: true }); | |
const mediaRecorder = new MediaRecorder(stream); | |
const audioChunks = []; | |
mediaRecorder.addEventListener("dataavailable", event => { | |
audioChunks.push(event.data); | |
}); | |
mediaRecorder.addEventListener("stop", () => { | |
const audioBlob = new Blob(audioChunks); | |
resolve(audioBlob); | |
}); | |
mediaRecorder.start(); | |
return mediaRecorder; | |
}); | |
}; | |
let originalRecorderInstance, userRecorderInstance; | |
document.getElementById("originalRecorder").addEventListener("click", async () => { | |
originalRecorderInstance = await recordAudio(); | |
document.getElementById("stopOriginalRecording").style.display = "inline-block"; | |
document.getElementById("originalRecorder").style.display = "none"; | |
}); | |
document.getElementById("stopOriginalRecording").addEventListener("click", () => { | |
originalRecorderInstance.stop(); | |
originalRecorderInstance.addEventListener("stop", async () => { | |
originalAudioBlob = new Blob(originalRecorderInstance); | |
alert("Original audio recorded!"); | |
}); | |
document.getElementById("stopOriginalRecording").style.display = "none"; | |
document.getElementById("originalRecorder").style.display = "inline-block"; | |
}); | |
document.getElementById("userRecorder").addEventListener("click", async () => { | |
userRecorderInstance = await recordAudio(); | |
document.getElementById("stopUserRecording").style.display = "inline-block"; | |
document.getElementById("userRecorder").style.display = "none"; | |
}); | |
document.getElementById("stopUserRecording").addEventListener("click", () => { | |
userRecorderInstance.stop(); | |
userRecorderInstance.addEventListener("stop", async () => { | |
userAudioBlob = new Blob(userRecorderInstance); | |
alert("User audio recorded!"); | |
}); | |
document.getElementById("stopUserRecording").style.display = "none"; | |
document.getElementById("userRecorder").style.display = "inline-block"; | |
}); | |
</script> | |
</body> | |
</html> | |