File size: 4,736 Bytes
d420b85
 
 
0145c6e
 
2da09a8
 
 
 
 
 
 
 
0145c6e
 
2da09a8
0145c6e
2da09a8
 
0145c6e
2da09a8
 
 
0145c6e
2da09a8
2b113b3
9dc594c
 
 
 
 
 
 
 
 
 
0145c6e
d420b85
 
2da09a8
9dc594c
2da09a8
 
d420b85
0145c6e
2da09a8
0145c6e
 
2da09a8
 
 
 
 
 
2b113b3
0145c6e
9dc594c
 
 
 
 
 
 
 
 
 
 
 
0145c6e
9dc594c
 
 
 
 
 
2da09a8
9dc594c
 
 
0145c6e
9dc594c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2da09a8
9dc594c
 
 
0145c6e
9dc594c
 
 
 
 
 
 
 
 
0145c6e
9dc594c
 
 
 
0145c6e
9dc594c
 
 
 
0145c6e
 
d420b85
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Simple ASR Client</title>
    <script type="importmap">
    {
        "imports": {
            "@xenova/transformers": "https://cdn.jsdelivr.net/npm/@xenova/[email protected]/+esm"
        }
    }
    </script>
    <style>
        body {
            font-family: Arial, sans-serif;
            max-width: 800px;
            margin: 20px auto;
            padding: 20px;
        }
        #output {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            min-height: 100px;
        }
        .ready {
            color: green;
            font-weight: bold;
        }
        .loading {
            color: orange;
        }
        .error {
            color: red;
        }
    </style>
</head>
<body>
    <h1>Simple Speech Recognition</h1>
    <div id="status" class="loading">Loading model...</div>
    <button id="startBtn" disabled>Start Recording</button>
    <div id="output"></div>

    <script type="module">
        import { pipeline } from '@xenova/transformers';

        let isRecording = false;
        let mediaRecorder = null;
        let audioChunks = [];
        
        const statusElement = document.getElementById('status');
        const startBtn = document.getElementById('startBtn');
        const output = document.getElementById('output');

        // Initialize the model
        async function initModel() {
            try {
                console.log('Starting model loading...');
                statusElement.textContent = 'Loading model...';
                statusElement.className = 'loading';
                
                const model = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny');
                
                console.log('Model loaded successfully!');
                statusElement.textContent = 'MODEL LOADED SUCCESSFULLY! Ready to record.';
                statusElement.className = 'ready';
                startBtn.disabled = false;

                startBtn.onclick = async () => {
                    if (!isRecording) {
                        try {
                            const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                            mediaRecorder = new MediaRecorder(stream);
                            audioChunks = [];

                            mediaRecorder.ondataavailable = (event) => {
                                audioChunks.push(event.data);
                            };

                            mediaRecorder.onstop = async () => {
                                const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
                                try {
                                    const result = await model(audioBlob);
                                    output.textContent += result.text + ' ';
                                } catch (e) {
                                    console.error('Transcription error:', e);
                                    statusElement.textContent = 'Error during transcription';
                                    statusElement.className = 'error';
                                }
                                audioChunks = [];
                            };

                            mediaRecorder.start(1000);
                            isRecording = true;
                            startBtn.textContent = 'Stop Recording';
                            statusElement.textContent = 'Recording...';
                            statusElement.className = 'loading';
                        } catch (e) {
                            console.error('Recording error:', e);
                            statusElement.textContent = 'Error accessing microphone';
                            statusElement.className = 'error';
                        }
                    } else {
                        mediaRecorder.stop();
                        mediaRecorder.stream.getTracks().forEach(track => track.stop());
                        isRecording = false;
                        startBtn.textContent = 'Start Recording';
                        statusElement.textContent = 'Processing...';
                        statusElement.className = 'loading';
                    }
                };

            } catch (e) {
                console.error('Model loading error:', e);
                statusElement.textContent = 'Error loading model: ' + e.message;
                statusElement.className = 'error';
            }
        }

        // Start loading the model
        initModel();
    </script>
</body>
</html>