Spaces:
Sleeping
Sleeping
import os | |
import json | |
import torch | |
from tqdm import tqdm # Progress bar | |
import whisper | |
def transcribe_audio(audio_path, model): | |
""" | |
Transcribe a single audio file using OpenAI's Whisper model locally. | |
Args: | |
audio_path (str): Path to the audio file. | |
model (whisper.Whisper): Loaded Whisper model. | |
Returns: | |
str: Continuous string of transcribed text. | |
""" | |
# Perform transcription | |
result = model.transcribe(audio_path) | |
# Extract the transcribed text | |
transcriptions = result["text"].strip() | |
return transcriptions | |
def transcribe_all_audios(directory, output_json, model_size="large"): | |
""" | |
Transcribe all audio files in a directory and save the transcriptions to a JSON file. | |
Args: | |
directory (str): Directory containing audio files. | |
output_json (str): Path to the output JSON file. | |
model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large. | |
""" | |
transcriptions = {} | |
# Check if CUDA is available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
print(f"Using device: {device}") | |
# Load the Whisper model | |
print(f"Loading Whisper model '{model_size}'...") | |
model = whisper.load_model(model_size, device=device) | |
print("Model loaded successfully.") | |
# Walk through the directory to find all audio files, including subdirectories | |
audio_files = [ | |
os.path.join(root, file) | |
for root, dirs, files in os.walk(directory) | |
for file in files | |
if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac")) | |
] | |
for file_path in tqdm(audio_files, desc="Transcribing Audio files"): | |
file_name = os.path.basename(file_path) | |
print(f"Transcribing: {file_path}") | |
transcription = transcribe_audio(file_path, model) | |
transcriptions[file_name] = transcription | |
# Save the transcriptions to a JSON file | |
with open(output_json, "w", encoding='utf-8') as f: | |
json.dump(transcriptions, f, ensure_ascii=False, indent=4) | |
print(f"Transcriptions saved to {output_json}") | |
if __name__ == "__main__": | |
directory = "wav" # Ensure the input directory is "wav" | |
output_json = "transcriptions.json" | |
model_size = "large" | |
transcribe_all_audios(directory, output_json, model_size) |