Spaces:
Sleeping
Sleeping
from pydub import AudioSegment | |
from pydub.utils import make_chunks | |
import os | |
import whisper | |
model = whisper.load_model("base") | |
def transcribe(audio_path): | |
transcripts = [] | |
for transcript in transcribe_audio_in_chunks(audio_path, chunk_length_ms=30000): | |
transcripts.append(transcript) | |
yield " ".join(transcripts) | |
def transcribe_segment(segment, segment_number): | |
temp_filename = f"temp_segment_{segment_number}.wav" | |
segment.export(temp_filename, format="wav") | |
result = model.transcribe(temp_filename) | |
os.remove(temp_filename) | |
return result["text"] | |
def transcribe_audio_in_chunks(audio_path, chunk_length_ms): | |
audio = AudioSegment.from_file(audio_path) | |
chunks = make_chunks(audio, chunk_length_ms) | |
for i, chunk in enumerate(chunks): | |
transcription = transcribe_segment(chunk, i) | |
yield transcription | |