File size: 9,726 Bytes
d8d37b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
<!DOCTYPE html>
<html lang="en">

<head>
    <meta charset="UTF-8">
    <title>Automatic Speech Recognition - Hugging Face Transformers.js</title>

    <script type="module">
        // Import the library
        import { pipeline } from 'https://cdn.jsdelivr.net/npm/@xenova/[email protected]';

        // Make it available globally
        window.pipeline = pipeline;
    </script>

    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
    
    <link rel="stylesheet" href="css/styles.css">
</head>

<body>
    <div class="container-main">
        <!-- Page Header -->
        <div class="header">
            <div class="header-logo">
                <img src="images/logo.png" alt="logo">
            </div>
            <div class="header-main-text">
                <h1>Hugging Face Transformers.js</h1>
            </div>
            <div class="header-sub-text">
                <h3>Free AI Models for JavaScript Web Development</h3>
            </div>
        </div>
        <hr> <!-- Separator -->

        <!-- Back to Home button -->
        <div class="row mt-5">
            <div class="col-md-12 text-center">
                <a href="index.html" class="btn btn-outline-secondary"
                    style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a>
            </div>
        </div>

        <!-- Content -->
        <div class="container mt-5">
            <!-- Centered Titles -->
            <div class="text-center">
                <h2>Audio</h2>
                <h4>Automatic Speech Recognition - English</h4>
            </div>

            <!-- Actual Content of this page -->
            <div id="transcribe-english-container" class="container mt-4">
                <h5>Transcribe English:</h5>
                <div class="d-flex align-items-center">
                    <label for="transcribeEnglishURLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter
                        audio URL:</label>
                    <input type="text" class="form-control flex-grow-1" id="transcribeEnglishURLText"
                        value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav"
                        placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;">
                    <button id="TranscribeEnglishButton" class="btn btn-primary"
                        onclick="transcribeEnglish()">Transcribe</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputArea"></pre>
                </div>
            </div>

            <hr> <!-- Line Separator -->

            <div id="transcribe-english-local-container" class="container mt-4">
                <h5>Transcribe English a Local File:</h5>
                <div class="d-flex align-items-center">
                    <label for="transcribeEnglishLocalFile" class="mb-0 text-nowrap" style="margin-right: 15px;">Select
                        Local Audio:</label>
                    <input type="file" id="transcribeEnglishLocalFile" accept="audio/*" />
                    <button id="TranscribeEnglishButtonLocal" class="btn btn-primary"
                        onclick="transcribeEnglishLocal()">Transcribe</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputAreaLocal"></pre>
                </div>
            </div>

            <hr> <!-- Line Separator -->

            <div id="transcribe-english-timestamps-container" class="container mt-4">
                <h5>Transcribe English with Timestamps:</h5>
                <div class="d-flex align-items-center">
                    <label for="transcribeEnglishTimestampsURLText" class="mb-0 text-nowrap"
                        style="margin-right: 15px;">Enter
                        audio URL:</label>
                    <input type="text" class="form-control flex-grow-1" id="transcribeEnglishTimestampsURLText"
                        value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav"
                        placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;">
                    <button id="TranscribeEnglishTimestampsButton" class="btn btn-primary"
                        onclick="transcribeEnglishTimestamps()">Transcribe</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputAreaTimestamps"></pre>
                </div>
            </div>

            <hr> <!-- Line Separator -->

            <div id="transcribe-english-word-level-timestamps-container" class="container mt-4">
                <h5>Transcribe English with Word-level Timestamps:</h5>
                <div class="d-flex align-items-center">
                    <label for="transcribeEnglishWordlevelTimestampsURLText" class="mb-0 text-nowrap"
                        style="margin-right: 15px;">Enter
                        audio URL:</label>
                    <input type="text" class="form-control flex-grow-1" id="transcribeEnglishWordlevelTimestampsURLText"
                        value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/jfk.wav"
                        placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;">
                    <button id="TranscribeEnglishWord-levelTimestampsButton" class="btn btn-primary"
                        onclick="transcribeEnglishWordlevelTimestamps()">Transcribe</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputAreaWordlevelTimestamps"></pre>
                </div>
            </div>

            <hr> <!-- Line Separator -->

            <div id="transcribe-english-30-container" class="container mt-4">
                <h5>Transcribe/Translate Audio Longer Than 30 Seconds:</h5>
                <div class="d-flex align-items-center">
                    <label for="transcribeEnglish30URLText" class="mb-0 text-nowrap" style="margin-right: 15px;">Enter
                        audio URL:</label>
                    <input type="text" class="form-control flex-grow-1" id="transcribeEnglish30URLText"
                        value="https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/ted_60.wav"
                        placeholder="Enter audio" style="margin-right: 15px; margin-left: 15px;">
                    <button id="TranscribeEnglish30Button" class="btn btn-primary"
                        onclick="transcribeEnglish30()">Transcribe</button>
                </div>
                <div class="mt-4">
                    <h4>Output:</h4>
                    <pre id="outputArea30"></pre>
                </div>
            </div>

            <!-- Back to Home button -->
            <div class="row mt-5">
                <div class="col-md-12 text-center">
                    <a href="index.html" class="btn btn-outline-secondary"
                        style="color: #3c650b; border-color: #3c650b;">Back to Main Page</a>
                </div>
            </div>
        </div>
    </div>

    <script>

        let transcriber;

        // Initialize the sentiment analysis model
        async function initializeModel() {
            transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny.en');

        }

        async function transcribeEnglish() {
            const textFieldValue = document.getElementById("transcribeEnglishURLText").value.trim();

            let result = await transcriber(textFieldValue);

            document.getElementById("outputArea").innerText = JSON.stringify(result, null, 2);
        }

        async function transcribeEnglishLocal() {
            const fileInput = document.getElementById("transcribeEnglishLocalFile");
            const file = fileInput.files[0];

            if (!file) {
                alert('Please select an audio file first.');
                return;
            }

            // Create a Blob URL from the file
            const url = URL.createObjectURL(file);

            let result = await transcriber(url);

            document.getElementById("outputAreaLocal").innerText = JSON.stringify(result, null, 2);
        }

        async function transcribeEnglishTimestamps() {
            const textFieldValue = document.getElementById("transcribeEnglishTimestampsURLText").value.trim();

            let result = await transcriber(textFieldValue, { return_timestamps: true });

            document.getElementById("outputAreaTimestamps").innerText = JSON.stringify(result, null, 2);
        }

        async function transcribeEnglishWordlevelTimestamps() {
            const textFieldValue = document.getElementById("transcribeEnglishWordlevelTimestampsURLText").value.trim();

            let result = await transcriber(textFieldValue, { return_timestamps: 'word' });

            document.getElementById("outputAreaWordlevelTimestamps").innerText = JSON.stringify(result, null, 2);
        }


        async function transcribeEnglish30() {
            const textFieldValue = document.getElementById("transcribeEnglish30URLText").value.trim();

            let result = await transcriber(textFieldValue, { chunk_length_s: 30, stride_length_s: 5 });

            document.getElementById("outputArea30").innerText = JSON.stringify(result, null, 2);
        }

        // Initialize the model after the DOM is completely loaded
        window.addEventListener("DOMContentLoaded", initializeModel);
    </script>
</body>

</html>