import os import json import torch from tqdm import tqdm # Progress bar import whisper def transcribe_audio(audio_path, model): """ Transcribe a single audio file using OpenAI's Whisper model locally. Args: audio_path (str): Path to the audio file. model (whisper.Whisper): Loaded Whisper model. Returns: str: Continuous string of transcribed text. """ # Perform transcription result = model.transcribe(audio_path) # Extract the transcribed text transcriptions = result["text"].strip() return transcriptions def transcribe_all_audios(directory, output_json, model_size="large"): """ Transcribe all audio files in a directory and save the transcriptions to a JSON file. Args: directory (str): Directory containing audio files. output_json (str): Path to the output JSON file. model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large. """ transcriptions = {} # Check if CUDA is available device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Using device: {device}") # Load the Whisper model print(f"Loading Whisper model '{model_size}'...") model = whisper.load_model(model_size, device=device) print("Model loaded successfully.") # Walk through the directory to find all audio files, including subdirectories audio_files = [ os.path.join(root, file) for root, dirs, files in os.walk(directory) for file in files if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac")) ] for file_path in tqdm(audio_files, desc="Transcribing Audio files"): file_name = os.path.basename(file_path) print(f"Transcribing: {file_path}") transcription = transcribe_audio(file_path, model) transcriptions[file_name] = transcription # Save the transcriptions to a JSON file with open(output_json, "w", encoding='utf-8') as f: json.dump(transcriptions, f, ensure_ascii=False, indent=4) print(f"Transcriptions saved to {output_json}") if __name__ == "__main__": directory = "wav" # Ensure the input directory is "wav" output_json = "transcriptions.json" model_size = "large" transcribe_all_audios(directory, output_json, model_size)