Spaces:
Sleeping
Sleeping
poemsforaphrodite
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import torch
|
4 |
+
from tqdm import tqdm # Progress bar
|
5 |
+
import whisper
|
6 |
+
|
7 |
+
def transcribe_audio(audio_path, model):
|
8 |
+
"""
|
9 |
+
Transcribe a single audio file using OpenAI's Whisper model locally.
|
10 |
+
|
11 |
+
Args:
|
12 |
+
audio_path (str): Path to the audio file.
|
13 |
+
model (whisper.Whisper): Loaded Whisper model.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
str: Continuous string of transcribed text.
|
17 |
+
"""
|
18 |
+
# Perform transcription
|
19 |
+
result = model.transcribe(audio_path)
|
20 |
+
|
21 |
+
# Extract the transcribed text
|
22 |
+
transcriptions = result["text"].strip()
|
23 |
+
|
24 |
+
return transcriptions
|
25 |
+
|
26 |
+
def transcribe_all_audios(directory, output_json, model_size="large"):
|
27 |
+
"""
|
28 |
+
Transcribe all audio files in a directory and save the transcriptions to a JSON file.
|
29 |
+
|
30 |
+
Args:
|
31 |
+
directory (str): Directory containing audio files.
|
32 |
+
output_json (str): Path to the output JSON file.
|
33 |
+
model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large.
|
34 |
+
"""
|
35 |
+
transcriptions = {}
|
36 |
+
|
37 |
+
# Check if CUDA is available
|
38 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
39 |
+
print(f"Using device: {device}")
|
40 |
+
|
41 |
+
# Load the Whisper model
|
42 |
+
print(f"Loading Whisper model '{model_size}'...")
|
43 |
+
model = whisper.load_model(model_size, device=device)
|
44 |
+
print("Model loaded successfully.")
|
45 |
+
|
46 |
+
# Walk through the directory to find all audio files, including subdirectories
|
47 |
+
audio_files = [
|
48 |
+
os.path.join(root, file)
|
49 |
+
for root, dirs, files in os.walk(directory)
|
50 |
+
for file in files
|
51 |
+
if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac"))
|
52 |
+
]
|
53 |
+
|
54 |
+
for file_path in tqdm(audio_files, desc="Transcribing Audio files"):
|
55 |
+
file_name = os.path.basename(file_path)
|
56 |
+
print(f"Transcribing: {file_path}")
|
57 |
+
transcription = transcribe_audio(file_path, model)
|
58 |
+
transcriptions[file_name] = transcription
|
59 |
+
|
60 |
+
# Save the transcriptions to a JSON file
|
61 |
+
with open(output_json, "w", encoding='utf-8') as f:
|
62 |
+
json.dump(transcriptions, f, ensure_ascii=False, indent=4)
|
63 |
+
|
64 |
+
print(f"Transcriptions saved to {output_json}")
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
directory = "wav" # Ensure the input directory is "wav"
|
68 |
+
output_json = "transcriptions.json"
|
69 |
+
model_size = "large"
|
70 |
+
transcribe_all_audios(directory, output_json, model_size)
|