transcribe / app.py
poemsforaphrodite's picture
Create app.py
3589cd1 verified
raw
history blame
2.34 kB
import os
import json
import torch
from tqdm import tqdm # Progress bar
import whisper
def transcribe_audio(audio_path, model):
"""
Transcribe a single audio file using OpenAI's Whisper model locally.
Args:
audio_path (str): Path to the audio file.
model (whisper.Whisper): Loaded Whisper model.
Returns:
str: Continuous string of transcribed text.
"""
# Perform transcription
result = model.transcribe(audio_path)
# Extract the transcribed text
transcriptions = result["text"].strip()
return transcriptions
def transcribe_all_audios(directory, output_json, model_size="large"):
"""
Transcribe all audio files in a directory and save the transcriptions to a JSON file.
Args:
directory (str): Directory containing audio files.
output_json (str): Path to the output JSON file.
model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large.
"""
transcriptions = {}
# Check if CUDA is available
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the Whisper model
print(f"Loading Whisper model '{model_size}'...")
model = whisper.load_model(model_size, device=device)
print("Model loaded successfully.")
# Walk through the directory to find all audio files, including subdirectories
audio_files = [
os.path.join(root, file)
for root, dirs, files in os.walk(directory)
for file in files
if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac"))
]
for file_path in tqdm(audio_files, desc="Transcribing Audio files"):
file_name = os.path.basename(file_path)
print(f"Transcribing: {file_path}")
transcription = transcribe_audio(file_path, model)
transcriptions[file_name] = transcription
# Save the transcriptions to a JSON file
with open(output_json, "w", encoding='utf-8') as f:
json.dump(transcriptions, f, ensure_ascii=False, indent=4)
print(f"Transcriptions saved to {output_json}")
if __name__ == "__main__":
directory = "wav" # Ensure the input directory is "wav"
output_json = "transcriptions.json"
model_size = "large"
transcribe_all_audios(directory, output_json, model_size)