poemsforaphrodite commited on
Commit
3589cd1
·
verified ·
1 Parent(s): 46634a7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -0
app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import torch
4
+ from tqdm import tqdm # Progress bar
5
+ import whisper
6
+
7
+ def transcribe_audio(audio_path, model):
8
+ """
9
+ Transcribe a single audio file using OpenAI's Whisper model locally.
10
+
11
+ Args:
12
+ audio_path (str): Path to the audio file.
13
+ model (whisper.Whisper): Loaded Whisper model.
14
+
15
+ Returns:
16
+ str: Continuous string of transcribed text.
17
+ """
18
+ # Perform transcription
19
+ result = model.transcribe(audio_path)
20
+
21
+ # Extract the transcribed text
22
+ transcriptions = result["text"].strip()
23
+
24
+ return transcriptions
25
+
26
+ def transcribe_all_audios(directory, output_json, model_size="large"):
27
+ """
28
+ Transcribe all audio files in a directory and save the transcriptions to a JSON file.
29
+
30
+ Args:
31
+ directory (str): Directory containing audio files.
32
+ output_json (str): Path to the output JSON file.
33
+ model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large.
34
+ """
35
+ transcriptions = {}
36
+
37
+ # Check if CUDA is available
38
+ device = "cuda" if torch.cuda.is_available() else "cpu"
39
+ print(f"Using device: {device}")
40
+
41
+ # Load the Whisper model
42
+ print(f"Loading Whisper model '{model_size}'...")
43
+ model = whisper.load_model(model_size, device=device)
44
+ print("Model loaded successfully.")
45
+
46
+ # Walk through the directory to find all audio files, including subdirectories
47
+ audio_files = [
48
+ os.path.join(root, file)
49
+ for root, dirs, files in os.walk(directory)
50
+ for file in files
51
+ if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac"))
52
+ ]
53
+
54
+ for file_path in tqdm(audio_files, desc="Transcribing Audio files"):
55
+ file_name = os.path.basename(file_path)
56
+ print(f"Transcribing: {file_path}")
57
+ transcription = transcribe_audio(file_path, model)
58
+ transcriptions[file_name] = transcription
59
+
60
+ # Save the transcriptions to a JSON file
61
+ with open(output_json, "w", encoding='utf-8') as f:
62
+ json.dump(transcriptions, f, ensure_ascii=False, indent=4)
63
+
64
+ print(f"Transcriptions saved to {output_json}")
65
+
66
+ if __name__ == "__main__":
67
+ directory = "wav" # Ensure the input directory is "wav"
68
+ output_json = "transcriptions.json"
69
+ model_size = "large"
70
+ transcribe_all_audios(directory, output_json, model_size)