Spaces:

EnDevSols
/

ASR-Arabic-JS

Running

File size: 10,798 Bytes

590b25a
 
 
9997b92
 
 
9a9b4ca
9997b92
 
9a9b4ca
9997b92
 
 
 
9a9b4ca
9997b92
 
9a9b4ca
 
 
9997b92
 
 
9a9b4ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9997b92
 
9a9b4ca
9997b92
 
 
 
9a9b4ca
 
9997b92
 
9a9b4ca
9997b92
9a9b4ca
 
 
 
 
 
 
9997b92
9a9b4ca
 
 
 
 
 
 
 
 
 
9997b92
 
 
 
9a9b4ca
 
 
 
 
 
 
 
 
 
7b855af
 
 
 
 
 
 
 
 
9997b92
590b25a
 
9997b92
 
9a9b4ca
 
7b855af
9a9b4ca
9997b92
9a9b4ca
7b855af
9a9b4ca
7b855af
9997b92
 
9a9b4ca
7b855af
9a9b4ca
9997b92
9a9b4ca
7b855af
9a9b4ca
7b855af
9997b92
 
 
 
9a9b4ca
 
 
 
9997b92
 
 
 
 
 
 
9a9b4ca
 
590b25a
9997b92
 
 
 
 
 
9a9b4ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9997b92
9a9b4ca
9997b92
 
 
 
 
 
 
 
7b855af
 
 
 
 
 
9a9b4ca
 
 
 
 
 
 
 
 
 
 
7b855af
 
 
 
 
 
 
9a9b4ca
 
 
 
 
 
 
 
 
 
 
 
 
9997b92
9a9b4ca
 
 
9997b92
9a9b4ca
9997b92
9a9b4ca
 
 
 
 
 
9997b92
 
9a9b4ca
9997b92
9a9b4ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7b855af
9a9b4ca
 
 
7b855af
 
9a9b4ca
7b855af
 
 
 
 
 
 
 
 
 
 
 
 
9a9b4ca
 
 
7b855af
 
 
 
 
 
 
 
 
 
 
 
 
9a9b4ca
9997b92

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Audio Transcription and Similarity Checker</title>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
    <style>
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background-color: #f4f4f4;
            padding: 20px;
        }
        .container {
            max-width: 800px;
            margin: 0 auto;
            background: #fff;
            padding: 30px;
            border-radius: 10px;
            box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
        }
        h1 {
            text-align: center;
            margin-bottom: 30px;
        }
        .audio-section {
            text-align: center;
            margin-bottom: 20px;
        }
        input[type="file"] {
            display: none;
        }
        .upload-btn {
            background-color: #007bff;
            color: white;
            padding: 10px 20px;
            cursor: pointer;
            border-radius: 5px;
            margin: 10px;
            border: none;
            display: inline-block;
        }
        .button {
            background-color: #28a745;
            color: white;
            padding: 10px 20px;
            cursor: pointer;
            border: none;
            border-radius: 5px;
            margin-top: 20px;
            display: block;
            width: 100%;
            font-size: 16px;
        }
        .button:hover {
            background-color: #218838;
        }
        #progress-bar {
            width: 0;
            height: 20px;
            background-color: #4caf50;
            text-align: center;
            line-height: 20px;
            color: white;
            border-radius: 5px;
            display: none;
        }
        #progress-container {
            width: 100%;
            background-color: #ddd;
            border-radius: 5px;
            margin-top: 20px;
        }
        .result {
            margin-top: 20px;
        }
        .recorder {
            cursor: pointer;
            background-color: #dc3545;
            color: white;
            padding: 10px 20px;
            border-radius: 50%;
            font-size: 24px;
            display: inline-block;
            margin-top: 20px;
        }
        .stop-button {
            background-color: #ff5733;
            color: white;
            padding: 10px 20px;
            cursor: pointer;
            border-radius: 5px;
            display: none;
            margin: 10px;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Audio Transcription and Similarity Checker</h1>

        <div id="original-audio" class="audio-section">
            <h2>Upload or Record Original Audio</h2>
            <label class="upload-btn" for="originalFile">Choose Audio File</label>
            <input type="file" id="originalFile" accept="audio/*">
            <div id="originalRecorder" class="recorder">
                <i class="fas fa-microphone"></i> Record
            </div>
            <button id="stopOriginalRecording" class="stop-button">Stop Recording</button>
        </div>

        <div id="user-audio" class="audio-section">
            <h2>Upload or Record User Audio</h2>
            <label class="upload-btn" for="userFile">Choose Audio File</label>
            <input type="file" id="userFile" accept="audio/*">
            <div id="userRecorder" class="recorder">
                <i class="fas fa-microphone"></i> Record
            </div>
            <button id="stopUserRecording" class="stop-button">Stop Recording</button>
        </div>

        <button id="transcribeButton" class="button">Perform Transcription and Testing</button>

        <div id="progress-container">
            <div id="progress-bar">0%</div>
        </div>

        <div id="result" class="result"></div>
    </div>

    <script src="https://cdn.jsdelivr.net/npm/@huggingface/transformers"></script>
    <script>
        const MODEL_ID = "facebook/wav2vec2-large-960h";  // Sample model, change if necessary
        let processor, model;
        let originalAudioBlob = null;
        let userAudioBlob = null;

        // Load model and processor
        async function loadModel() {
            processor = await transformers.AutoProcessor.from_pretrained(MODEL_ID);
            model = await transformers.Wav2Vec2ForCTC.from_pretrained(MODEL_ID);
        }

        // Simulate progress bar loading
        function updateProgressBar(percentComplete) {
            const progressBar = document.getElementById("progress-bar");
            progressBar.style.width = percentComplete + "%";
            progressBar.innerHTML = percentComplete + "%";
            if (percentComplete === 100) {
                setTimeout(() => {
                    progressBar.style.display = "none";
                    progressBar.style.width = "0%";
                }, 500);
            } else {
                progressBar.style.display = "block";
            }
        }

        async function transcribe(audioBlob) {
            const arrayBuffer = await audioBlob.arrayBuffer();
            const audioData = new Float32Array(arrayBuffer);

            const inputValues = processor(audioData, {return_tensors: "pt", padding: true}).input_values;
            const logits = await model(inputValues).logits;
            const predicted_ids = logits.argmax(-1);
            const transcription = processor.decode(predicted_ids, {skip_special_tokens: true});
            return transcription;
        }

        document.getElementById("transcribeButton").addEventListener("click", async () => {
            if (!originalAudioBlob && !document.getElementById("originalFile").files[0]) {
                alert("Please upload or record original audio.");
                return;
            }
            if (!userAudioBlob && !document.getElementById("userFile").files[0]) {
                alert("Please upload or record user audio.");
                return;
            }

            updateProgressBar(0);
            let percentComplete = 0;
            const progressInterval = setInterval(() => {
                percentComplete += 10;
                updateProgressBar(percentComplete);
                if (percentComplete >= 100) clearInterval(progressInterval);
            }, 200);

            if (!originalAudioBlob) {
                originalAudioBlob = document.getElementById("originalFile").files[0];
            }
            if (!userAudioBlob) {
                userAudioBlob = document.getElementById("userFile").files[0];
            }

            const transcriptionOriginal = await transcribe(originalAudioBlob);
            const transcriptionUser = await transcribe(userAudioBlob);

            clearInterval(progressInterval);
            updateProgressBar(100);

            const levenshteinDistance = (a, b) => {
                let dp = Array.from({length: a.length + 1}, () => Array(b.length + 1).fill(0));
                for (let i = 0; i <= a.length; i++) dp[i][0] = i;
                for (let j = 0; j <= b.length; j++) dp[0][j] = j;
                for (let i = 1; i <= a.length; i++) {
                    for (let j = 1; j <= b.length; j++) {
                        dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] : Math.min(dp[i - 1][j], dp[i][j - 1], dp[i - 1][j - 1]) + 1;
                    }
                }
                return dp[a.length][b.length];
            };

            const similarityScore = 1 - levenshteinDistance(transcriptionOriginal, transcriptionUser) / Math.max(transcriptionOriginal.length, transcriptionUser.length);

            document.getElementById("result").innerHTML = `
                <h2>Transcription Results</h2>
                <p><strong>Original Transcription:</strong> ${transcriptionOriginal}</p>
                <p><strong>User Transcription:</strong> ${transcriptionUser}</p>
                <p><strong>Levenshtein Similarity Score:</strong> ${similarityScore.toFixed(2)}</p>
            `;
        });

        // Initialize model
        loadModel();

        // Handle voice recording (using browser APIs)
        const recordAudio = () => {
            return new Promise(async resolve => {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                const mediaRecorder = new MediaRecorder(stream);
                const audioChunks = [];

                mediaRecorder.addEventListener("dataavailable", event => {
                    audioChunks.push(event.data);
                });

                mediaRecorder.addEventListener("stop", () => {
                    const audioBlob = new Blob(audioChunks);
                    resolve(audioBlob);
                });

                mediaRecorder.start();
                return mediaRecorder;
            });
        };

        let originalRecorderInstance, userRecorderInstance;

        document.getElementById("originalRecorder").addEventListener("click", async () => {
            originalRecorderInstance = await recordAudio();
            document.getElementById("stopOriginalRecording").style.display = "inline-block";
            document.getElementById("originalRecorder").style.display = "none";
        });

        document.getElementById("stopOriginalRecording").addEventListener("click", () => {
            originalRecorderInstance.stop();
            originalRecorderInstance.addEventListener("stop", async () => {
                originalAudioBlob = new Blob(originalRecorderInstance);
                alert("Original audio recorded!");
            });
            document.getElementById("stopOriginalRecording").style.display = "none";
            document.getElementById("originalRecorder").style.display = "inline-block";
        });

        document.getElementById("userRecorder").addEventListener("click", async () => {
            userRecorderInstance = await recordAudio();
            document.getElementById("stopUserRecording").style.display = "inline-block";
            document.getElementById("userRecorder").style.display = "none";
        });

        document.getElementById("stopUserRecording").addEventListener("click", () => {
            userRecorderInstance.stop();
            userRecorderInstance.addEventListener("stop", async () => {
                userAudioBlob = new Blob(userRecorderInstance);
                alert("User audio recorded!");
            });
            document.getElementById("stopUserRecording").style.display = "none";
            document.getElementById("userRecorder").style.display = "inline-block";
        });
    </script>
</body>
</html>