Spaces:

poemsforaphrodite
/

transcribe

Sleeping

App Files Files Community

transcribe / app.py

poemsforaphrodite

Create app.py

3589cd1 verified 4 months ago

raw

history blame

2.34 kB

	import os
	import json
	import torch
	from tqdm import tqdm # Progress bar
	import whisper

	def transcribe_audio(audio_path, model):
	"""
	Transcribe a single audio file using OpenAI's Whisper model locally.

	Args:
	audio_path (str): Path to the audio file.
	model (whisper.Whisper): Loaded Whisper model.

	Returns:
	str: Continuous string of transcribed text.
	"""
	# Perform transcription
	result = model.transcribe(audio_path)

	# Extract the transcribed text
	transcriptions = result["text"].strip()

	return transcriptions

	def transcribe_all_audios(directory, output_json, model_size="large"):
	"""
	Transcribe all audio files in a directory and save the transcriptions to a JSON file.

	Args:
	directory (str): Directory containing audio files.
	output_json (str): Path to the output JSON file.
	model_size (str): Size of the Whisper model to load. Options: tiny, base, small, medium, large.
	"""
	transcriptions = {}

	# Check if CUDA is available
	device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Using device: {device}")

	# Load the Whisper model
	print(f"Loading Whisper model '{model_size}'...")
	model = whisper.load_model(model_size, device=device)
	print("Model loaded successfully.")

	# Walk through the directory to find all audio files, including subdirectories
	audio_files = [
	os.path.join(root, file)
	for root, dirs, files in os.walk(directory)
	for file in files
	if file.lower().endswith((".wav", ".mp3", ".m4a", ".flac", ".aac"))
	]

	for file_path in tqdm(audio_files, desc="Transcribing Audio files"):
	file_name = os.path.basename(file_path)
	print(f"Transcribing: {file_path}")
	transcription = transcribe_audio(file_path, model)
	transcriptions[file_name] = transcription

	# Save the transcriptions to a JSON file
	with open(output_json, "w", encoding='utf-8') as f:
	json.dump(transcriptions, f, ensure_ascii=False, indent=4)

	print(f"Transcriptions saved to {output_json}")

	if __name__ == "__main__":
	directory = "wav" # Ensure the input directory is "wav"
	output_json = "transcriptions.json"
	model_size = "large"
	transcribe_all_audios(directory, output_json, model_size)