Fedir Zadniprovskyi commited on
Commit
12ab49b
1 Parent(s): 487d997

docs: add js example

Browse files
Files changed (1) hide show
  1. examples/javascript/index.js +159 -0
examples/javascript/index.js ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Example provided by https://github.com/Gan-Xing in https://github.com/fedirz/faster-whisper-server/issues/26
3
+ */
4
+ import fs from 'fs';
5
+ import WebSocket from 'ws';
6
+ import fetch from 'node-fetch';
7
+ import FormData from 'form-data';
8
+ import path from 'path';
9
+ import ffmpeg from 'fluent-ffmpeg';
10
+ import dotenv from 'dotenv';
11
+
12
+ dotenv.config();
13
+
14
+ const ffmpegPath = process.env.FFMPEG_PATH || '/usr/bin/ffmpeg';
15
+ ffmpeg.setFfmpegPath(ffmpegPath);
16
+
17
+ /**
18
+ * Transcribe an audio file using the HTTP endpoint.
19
+ * Supported file types include wav, mp3, webm, and other types supported by the OpenAI API.
20
+ * I have tested with these three types.
21
+ *
22
+ * @param {string} filePath - Path to the audio file
23
+ * @param {string} model - Model name
24
+ * @param {string} language - Language code
25
+ * @param {string} responseFormat - Response format
26
+ * @param {string} temperature - Temperature setting
27
+ */
28
+ async function transcribeFile(filePath, model, language, responseFormat, temperature) {
29
+ const formData = new FormData();
30
+ formData.append('file', fs.createReadStream(filePath));
31
+ formData.append('model', model);
32
+ formData.append('language', language);
33
+ formData.append('response_format', responseFormat);
34
+ formData.append('temperature', temperature);
35
+
36
+ const response = await fetch(`${process.env.TRANSCRIPTION_API_BASE_URL}/v1/audio/transcriptions`, {
37
+ method: 'POST',
38
+ body: formData,
39
+ });
40
+
41
+ const transcription = await response.json();
42
+ console.log('Transcription Response:', transcription);
43
+ }
44
+
45
+ /**
46
+ * Translate an audio file using the HTTP endpoint.
47
+ * Only English is supported for translation.
48
+ * Currently, I am using GLM-4-9b-int8 to translate various voices.
49
+ * I am not sure if the author can add an endpoint for custom API+Key translation.
50
+ * I plan to package my frontend, fast-whisper-server, and vllm+glm-4-9b-int8 into one Docker container for unified deployment.
51
+ *
52
+ * @param {string} filePath - Path to the audio file
53
+ * @param {string} model - Model name
54
+ * @param {string} responseFormat - Response format
55
+ * @param {string} temperature - Temperature setting
56
+ */
57
+ async function translateFile(filePath, model, responseFormat, temperature) {
58
+ const formData = new FormData();
59
+ formData.append('file', fs.createReadStream(filePath));
60
+ formData.append('model', model);
61
+ formData.append('response_format', responseFormat);
62
+ formData.append('temperature', temperature);
63
+
64
+ const response = await fetch(`${process.env.TRANSLATION_API_BASE_URL}/v1/audio/translations`, {
65
+ method: 'POST',
66
+ body: formData,
67
+ });
68
+
69
+ const translation = await response.json();
70
+ console.log('Translation Response:', translation);
71
+ }
72
+
73
+ /**
74
+ * Send audio data over WebSocket for transcription.
75
+ * Currently, the supported file type for transcription is PCM.
76
+ * I am not sure if other types are supported.
77
+ *
78
+ * @param {string} filePath - Path to the audio file
79
+ * @param {string} model - Model name
80
+ * @param {string} language - Language code
81
+ * @param {string} responseFormat - Response format
82
+ * @param {string} temperature - Temperature setting
83
+ */
84
+ async function sendAudioOverWebSocket(filePath, model, language, responseFormat, temperature) {
85
+ const wsUrl = `ws://100.105.162.69:8000/v1/audio/transcriptions?model=${encodeURIComponent(model)}&language=${encodeURIComponent(language)}&response_format=${encodeURIComponent(responseFormat)}&temperature=${encodeURIComponent(temperature)}`;
86
+ const ws = new WebSocket(wsUrl);
87
+
88
+ ws.on('open', async () => {
89
+ const audioBuffer = fs.readFileSync(filePath);
90
+ ws.send(audioBuffer);
91
+ });
92
+
93
+ ws.on('message', (message) => {
94
+ const response = JSON.parse(message);
95
+ console.log('WebSocket Response:', response);
96
+ });
97
+
98
+ ws.on('close', () => {
99
+ console.log('WebSocket connection closed');
100
+ });
101
+
102
+ ws.on('error', (error) => {
103
+ console.error('WebSocket error:', error);
104
+ });
105
+ }
106
+
107
+ /**
108
+ * Convert audio file to PCM format.
109
+ *
110
+ * @param {string} filePath - Path to the audio file
111
+ * @returns {string} - Path to the converted PCM file
112
+ */
113
+ async function convertToPcm(filePath) {
114
+ const pcmFilePath = filePath.replace(path.extname(filePath), '.pcm');
115
+
116
+ await new Promise((resolve, reject) => {
117
+ ffmpeg(filePath)
118
+ .audioChannels(1)
119
+ .audioFrequency(16000)
120
+ .audioCodec('pcm_s16le')
121
+ .toFormat('s16le')
122
+ .on('end', () => {
123
+ console.log(`Audio file successfully converted to PCM: ${pcmFilePath}`);
124
+ resolve(pcmFilePath);
125
+ })
126
+ .on('error', (error) => {
127
+ console.error(`Error converting audio to PCM: ${error.message}`);
128
+ reject(error);
129
+ })
130
+ .save(pcmFilePath);
131
+ });
132
+
133
+ return pcmFilePath;
134
+ }
135
+
136
+ async function main() {
137
+ const model = 'Systran/faster-whisper-large-v3';
138
+ const language = 'en';
139
+ const responseFormat = 'json';
140
+ const temperature = '0';
141
+ const filePath = './path/to/your/audio.webm'; // Replace with the actual file path
142
+
143
+ // Convert the audio file to PCM format
144
+ const pcmFilePath = await convertToPcm(filePath);
145
+
146
+ // Transcribe the audio file using the HTTP endpoint
147
+ await transcribeFile(pcmFilePath, model, language, responseFormat, temperature);
148
+
149
+ // Translate the audio file using the HTTP endpoint
150
+ await translateFile(pcmFilePath, model, responseFormat, temperature);
151
+
152
+ // Transcribe the audio file using the WebSocket endpoint
153
+ await sendAudioOverWebSocket(pcmFilePath, model, language, responseFormat, temperature);
154
+ }
155
+
156
+ // Make sure to use ffmpeg version 7 or above. The default apt-get install only installs version 4.x. Also, Ubuntu 22.04 or above is required to support version 7.x.
157
+ main().catch(console.error);
158
+
159
+ // Project URL: https://github.com/Gan-Xing/whisper