Spaces:
Running
on
T4
Running
on
T4
Fedir Zadniprovskyi
commited on
Commit
•
12ab49b
1
Parent(s):
487d997
docs: add js example
Browse files- examples/javascript/index.js +159 -0
examples/javascript/index.js
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
/**
|
2 |
+
* Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
|
3 |
+
*/
|
4 |
+
import fs from 'fs';
|
5 |
+
import WebSocket from 'ws';
|
6 |
+
import fetch from 'node-fetch';
|
7 |
+
import FormData from 'form-data';
|
8 |
+
import path from 'path';
|
9 |
+
import ffmpeg from 'fluent-ffmpeg';
|
10 |
+
import dotenv from 'dotenv';
|
11 |
+
|
12 |
+
dotenv.config();
|
13 |
+
|
14 |
+
const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg';
|
15 |
+
ffmpeg.setFfmpegPath(ffmpegPath);
|
16 |
+
|
17 |
+
/**
|
18 |
+
* Transcribe an audio file using the HTTP endpoint.
|
19 |
+
* Supported file types include wav, mp3, webm, and other types supported by the OpenAI API.
|
20 |
+
* I have tested with these three types.
|
21 |
+
*
|
22 |
+
* @param {string} filePath - Path to the audio file
|
23 |
+
* @param {string} model - Model name
|
24 |
+
* @param {string} language - Language code
|
25 |
+
* @param {string} responseFormat - Response format
|
26 |
+
* @param {string} temperature - Temperature setting
|
27 |
+
*/
|
28 |
+
async function transcribeFile(filePath, model, language, responseFormat, temperature) {
|
29 |
+
const formData = new FormData();
|
30 |
+
formData.append('file', fs.createReadStream(filePath));
|
31 |
+
formData.append('model', model);
|
32 |
+
formData.append('language', language);
|
33 |
+
formData.append('response_format', responseFormat);
|
34 |
+
formData.append('temperature', temperature);
|
35 |
+
|
36 |
+
const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, {
|
37 |
+
method: 'POST',
|
38 |
+
body: formData,
|
39 |
+
});
|
40 |
+
|
41 |
+
const transcription = await response.json();
|
42 |
+
console.log('Transcription Response:', transcription);
|
43 |
+
}
|
44 |
+
|
45 |
+
/**
|
46 |
+
* Translate an audio file using the HTTP endpoint.
|
47 |
+
* Only English is supported for translation.
|
48 |
+
* Currently, I am using GLM-4-9b-int8 to translate various voices.
|
49 |
+
* I am not sure if the author can add an endpoint for custom API+Key translation.
|
50 |
+
* I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment.
|
51 |
+
*
|
52 |
+
* @param {string} filePath - Path to the audio file
|
53 |
+
* @param {string} model - Model name
|
54 |
+
* @param {string} responseFormat - Response format
|
55 |
+
* @param {string} temperature - Temperature setting
|
56 |
+
*/
|
57 |
+
async function translateFile(filePath, model, responseFormat, temperature) {
|
58 |
+
const formData = new FormData();
|
59 |
+
formData.append('file', fs.createReadStream(filePath));
|
60 |
+
formData.append('model', model);
|
61 |
+
formData.append('response_format', responseFormat);
|
62 |
+
formData.append('temperature', temperature);
|
63 |
+
|
64 |
+
const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, {
|
65 |
+
method: 'POST',
|
66 |
+
body: formData,
|
67 |
+
});
|
68 |
+
|
69 |
+
const translation = await response.json();
|
70 |
+
console.log('Translation Response:', translation);
|
71 |
+
}
|
72 |
+
|
73 |
+
/**
|
74 |
+
* Send audio data over WebSocket for transcription.
|
75 |
+
* Currently, the supported file type for transcription is PCM.
|
76 |
+
* I am not sure if other types are supported.
|
77 |
+
*
|
78 |
+
* @param {string} filePath - Path to the audio file
|
79 |
+
* @param {string} model - Model name
|
80 |
+
* @param {string} language - Language code
|
81 |
+
* @param {string} responseFormat - Response format
|
82 |
+
* @param {string} temperature - Temperature setting
|
83 |
+
*/
|
84 |
+
async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) {
|
85 |
+
const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`;
|
86 |
+
const ws = new WebSocket(wsUrl);
|
87 |
+
|
88 |
+
ws.on('open', async () => {
|
89 |
+
const audioBuffer = fs.readFileSync(filePath);
|
90 |
+
ws.send(audioBuffer);
|
91 |
+
});
|
92 |
+
|
93 |
+
ws.on('message', (message) => {
|
94 |
+
const response = JSON.parse(message);
|
95 |
+
console.log('WebSocket Response:', response);
|
96 |
+
});
|
97 |
+
|
98 |
+
ws.on('close', () => {
|
99 |
+
console.log('WebSocket connection closed');
|
100 |
+
});
|
101 |
+
|
102 |
+
ws.on('error', (error) => {
|
103 |
+
console.error('WebSocket error:', error);
|
104 |
+
});
|
105 |
+
}
|
106 |
+
|
107 |
+
/**
|
108 |
+
* Convert audio file to PCM format.
|
109 |
+
*
|
110 |
+
* @param {string} filePath - Path to the audio file
|
111 |
+
* @returns {string} - Path to the converted PCM file
|
112 |
+
*/
|
113 |
+
async function convertToPcm(filePath) {
|
114 |
+
const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm');
|
115 |
+
|
116 |
+
await new Promise((resolve, reject) => {
|
117 |
+
ffmpeg(filePath)
|
118 |
+
.audioChannels(1)
|
119 |
+
.audioFrequency(16000)
|
120 |
+
.audioCodec('pcm_s16le')
|
121 |
+
.toFormat('s16le')
|
122 |
+
.on('end', () => {
|
123 |
+
console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`);
|
124 |
+
resolve(pcmFilePath);
|
125 |
+
})
|
126 |
+
.on('error', (error) => {
|
127 |
+
console.error(`Error converting audio to PCM: ${error.message}`);
|
128 |
+
reject(error);
|
129 |
+
})
|
130 |
+
.save(pcmFilePath);
|
131 |
+
});
|
132 |
+
|
133 |
+
return pcmFilePath;
|
134 |
+
}
|
135 |
+
|
136 |
+
async function main() {
|
137 |
+
const model = 'Systran/faster-whisper-large-v3';
|
138 |
+
const language = 'en';
|
139 |
+
const responseFormat = 'json';
|
140 |
+
const temperature = '0';
|
141 |
+
const filePath = './path/to/your/audio.webm'; // Replace with the actual file path
|
142 |
+
|
143 |
+
// Convert the audio file to PCM format
|
144 |
+
const pcmFilePath = await convertToPcm(filePath);
|
145 |
+
|
146 |
+
// Transcribe the audio file using the HTTP endpoint
|
147 |
+
await transcribeFile(pcmFilePath, model, language, responseFormat, temperature);
|
148 |
+
|
149 |
+
// Translate the audio file using the HTTP endpoint
|
150 |
+
await translateFile(pcmFilePath, model, responseFormat, temperature);
|
151 |
+
|
152 |
+
// Transcribe the audio file using the WebSocket endpoint
|
153 |
+
await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature);
|
154 |
+
}
|
155 |
+
|
156 |
+
// Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x.
|
157 |
+
main().catch(console.error);
|
158 |
+
|
159 |
+
// Project URL: https://github.com/Gan-Xing/whisper
|