Spaces:

poemsforaphrodite
/

transcribe

Sleeping

App Files Files Community

poemsforaphrodite commited on Sep 24, 2024

Commit

3589cd1

verified ·

1 Parent(s): 46634a7

Create app.py

Browse files

Files changed (1) hide show

app.py +70 -0

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import os
+import json
+import torch
+from tqdm import tqdm  # Progress bar
+import whisper
+def transcribe_audio(audio_path, model):
+    """
+    Transcribe a single audio file using OpenAI's Whisper model locally.
+    Args:
+        audio_path (str): Path to the audio file.
+        model (whisper.Whisper): Loaded Whisper model.
+    Returns:
+        str: Continuous string of transcribed text.
+    """
+    # Perform transcription
+    result = model.transcribe(audio_path)
+    # Extract the transcribed text
+    transcriptions = result["text"].strip()
+    return transcriptions
+def transcribe_all_audios(directory, output_json, model_size="large"):
+    """
+    Transcribe all audio files in a directory and save the transcriptions to a JSON file.
+    Args:
+        directory (str): Directory containing audio files.
+        output_json (str): Path to the output JSON file.
+        model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large.
+    """
+    transcriptions = {}
+    # Check if CUDA is available
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+    # Load the Whisper model
+    print(f"Loading Whisper model '{model_size}'...")
+    model = whisper.load_model(model_size, device=device)
+    print("Model loaded successfully.")
+    # Walk through the directory to find all audio files, including subdirectories
+    audio_files = [
+        os.path.join(root, file)
+        for root, dirs, files in os.walk(directory)
+        for file in files
+        if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac"))
+    ]
+    for file_path in tqdm(audio_files, desc="Transcribing Audio files"):
+        file_name = os.path.basename(file_path)
+        print(f"Transcribing: {file_path}")
+        transcription = transcribe_audio(file_path, model)
+        transcriptions[file_name] = transcription
+    # Save the transcriptions to a JSON file
+    with open(output_json, "w", encoding='utf-8') as f:
+        json.dump(transcriptions, f, ensure_ascii=False, indent=4)
+    print(f"Transcriptions saved to {output_json}")
+if __name__ == "__main__":
+    directory = "wav"  # Ensure the input directory is "wav"
+    output_json = "transcriptions.json"
+    model_size = "large"
+    transcribe_all_audios(directory, output_json, model_size)