Spaces:

Merlintxu
/

audio-transcription-app

Sleeping

File size: 2,568 Bytes

3c1ea6f
 
0bab575
 
209a96c
3c1ea6f
 
92f6629
 
209a96c
 
 
 
 
 
 
 
 
 
 
0bab575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209a96c
f88c1d0
3c1ea6f
209a96c
f88c1d0
 
 
 
209a96c
f88c1d0
209a96c
0bab575
 
 
 
 
 
 
 
 
 
3c1ea6f
 
209a96c
3c1ea6f
209a96c
3c1ea6f
 
209a96c
92f6629
209a96c
 
3c1ea6f
92f6629
209a96c
3c1ea6f
 
209a96c

import gradio as gr
from transformers import pipeline
import librosa
import soundfile as sf
import requests
import os

# Cambiar el modelo a uno específico para transcripción multilingüe
asr_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-large")

# Función para descargar el archivo si se provee una URL
def download_from_url(url):
    local_filename = url.split('/')[-1]
    with requests.get(url, stream=True) as r:
        r.raise_for_status()
        with open(local_filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    return local_filename

# Función para dividir un archivo de audio en fragmentos de tamaño manejable
def split_audio(file_path, segment_duration=30):
    y, sr = librosa.load(file_path, sr=None)
    total_duration = librosa.get_duration(y=y, sr=sr)
    segments = []

    for start in range(0, int(total_duration), segment_duration):
        end = min(start + segment_duration, int(total_duration))
        segment = y[start * sr: end * sr]
        segment_path = f"{file_path}_segment_{start}-{end}.wav"
        sf.write(segment_path, segment, sr)
        segments.append(segment_path)

    return segments

# Función para procesar el archivo o la URL
def transcribe_audio(file=None, url=None):
    try:
        # Si el input es una URL, descargamos el archivo
        if url:
            file_path = download_from_url(url)
        elif file:
            file_path = file
        else:
            return "No se ha proporcionado un archivo ni un enlace."

        # Dividir el archivo en segmentos de 30 segundos
        segments = split_audio(file_path)

        # Transcribir cada segmento y concatenar los resultados
        transcriptions = []
        for segment in segments:
            result = asr_pipeline(segment, return_timestamps=True)
            transcriptions.append(result['text'])

        return " ".join(transcriptions)

    except Exception as e:
        return f"Error durante la transcripción: {str(e)}"

# Interfaz de Gradio
iface = gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Audio(type="filepath", label="Sube un archivo de audio o vídeo (mp3, mp4, wav, etc.)"),
        gr.Textbox(lines=1, placeholder="O pega un enlace de audio/vídeo aquí", label="URL de audio o vídeo (opcional)")
    ],
    outputs="text",
    title="Transcriptor de Audio y Vídeo",
    description="Sube un archivo de audio o vídeo o proporciona un enlace. Los archivos pueden estar en español o inglés."
)

iface.launch()