Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
|
|
|
|
3 |
import requests
|
4 |
import os
|
5 |
|
@@ -16,6 +18,21 @@ def download_from_url(url):
|
|
16 |
f.write(chunk)
|
17 |
return local_filename
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Función para procesar el archivo o la URL
|
20 |
def transcribe_audio(file=None, url=None):
|
21 |
try:
|
@@ -27,10 +44,16 @@ def transcribe_audio(file=None, url=None):
|
|
27 |
else:
|
28 |
return "No se ha proporcionado un archivo ni un enlace."
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
except Exception as e:
|
36 |
return f"Error durante la transcripción: {str(e)}"
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import pipeline
|
3 |
+
import librosa
|
4 |
+
import soundfile as sf
|
5 |
import requests
|
6 |
import os
|
7 |
|
|
|
18 |
f.write(chunk)
|
19 |
return local_filename
|
20 |
|
21 |
+
# Función para dividir un archivo de audio en fragmentos de tamaño manejable
|
22 |
+
def split_audio(file_path, segment_duration=30):
|
23 |
+
y, sr = librosa.load(file_path, sr=None)
|
24 |
+
total_duration = librosa.get_duration(y=y, sr=sr)
|
25 |
+
segments = []
|
26 |
+
|
27 |
+
for start in range(0, int(total_duration), segment_duration):
|
28 |
+
end = min(start + segment_duration, int(total_duration))
|
29 |
+
segment = y[start * sr: end * sr]
|
30 |
+
segment_path = f"{file_path}_segment_{start}-{end}.wav"
|
31 |
+
sf.write(segment_path, segment, sr)
|
32 |
+
segments.append(segment_path)
|
33 |
+
|
34 |
+
return segments
|
35 |
+
|
36 |
# Función para procesar el archivo o la URL
|
37 |
def transcribe_audio(file=None, url=None):
|
38 |
try:
|
|
|
44 |
else:
|
45 |
return "No se ha proporcionado un archivo ni un enlace."
|
46 |
|
47 |
+
# Dividir el archivo en segmentos de 30 segundos
|
48 |
+
segments = split_audio(file_path)
|
49 |
+
|
50 |
+
# Transcribir cada segmento y concatenar los resultados
|
51 |
+
transcriptions = []
|
52 |
+
for segment in segments:
|
53 |
+
result = asr_pipeline(segment, return_timestamps=True)
|
54 |
+
transcriptions.append(result['text'])
|
55 |
+
|
56 |
+
return " ".join(transcriptions)
|
57 |
|
58 |
except Exception as e:
|
59 |
return f"Error durante la transcripción: {str(e)}"
|