danielwm994 commited on
Commit
244af64
·
verified ·
1 Parent(s): beeb55d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -47
app.py CHANGED
@@ -29,53 +29,18 @@ def transcribe(inputs, task):
29
  if inputs is None:
30
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
31
 
32
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
33
- text = result["text"]
34
- timestamps = result["chunks"]
35
-
36
- timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
37
-
38
- return text, timestamp_str
39
-
40
-
41
- def _return_yt_html_embed(yt_url):
42
- video_id = yt_url.split("?v=")[-1]
43
- HTML_str = (
44
- f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
45
- " </center>"
46
- )
47
- return HTML_str
48
-
49
- def download_yt_audio(yt_url, filename):
50
- info_loader = youtube_dl.YoutubeDL()
51
 
52
- try:
53
- info = info_loader.extract_info(yt_url, download=False)
54
- except youtube_dl.utils.DownloadError as err:
55
- raise gr.Error(str(err))
56
-
57
- file_length = info["duration_string"]
58
- file_h_m_s = file_length.split(":")
59
- file_h_m_s = [int(sub_length) for sub_length in file_h_m_s]
60
-
61
- if len(file_h_m_s) == 1:
62
- file_h_m_s.insert(0, 0)
63
- if len(file_h_m_s) == 2:
64
- file_h_m_s.insert(0, 0)
65
- file_length_s = file_h_m_s[0] * 3600 + file_h_m_s[1] * 60 + file_h_m_s[2]
66
-
67
- if file_length_s > YT_LENGTH_LIMIT_S:
68
- yt_length_limit_hms = time.strftime("%HH:%MM:%SS", time.gmtime(YT_LENGTH_LIMIT_S))
69
- file_length_hms = time.strftime("%HH:%MM:%SS", time.gmtime(file_length_s))
70
- raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
71
 
72
- ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
 
 
 
73
 
74
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
75
- try:
76
- ydl.download([yt_url])
77
- except youtube_dl.utils.ExtractorError as err:
78
- raise gr.Error(str(err))
79
 
80
  @spaces.GPU
81
  def yt_transcribe(yt_url, task, max_filesize=75.0):
@@ -90,13 +55,18 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
90
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
91
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
92
 
93
- result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
 
94
  text = result["text"]
95
  timestamps = result["chunks"]
96
 
97
- timestamp_str = "\n".join([f"[{chunk['timestamp']}] {chunk['text']}" for chunk in timestamps])
 
 
 
98
 
99
- return html_embed_str, text, timestamp_str
 
100
 
101
 
102
  demo = gr.Blocks()
 
29
  if inputs is None:
30
  raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
31
 
32
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps="word")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
+ text = result["text"]
35
+ timestamps = result["chunks"] # each chunk contains the word and its timestamps
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
+ word_timestamps = []
38
+ for chunk in timestamps:
39
+ for word_info in chunk["words"]:
40
+ word_timestamps.append(f"{word_info['word']} [{word_info['start']}-{word_info['end']}]")
41
 
42
+ return "\n".join(word_timestamps)
43
+
 
 
 
44
 
45
  @spaces.GPU
46
  def yt_transcribe(yt_url, task, max_filesize=75.0):
 
55
  inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
56
  inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
57
 
58
+ result = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps="word")
59
+
60
  text = result["text"]
61
  timestamps = result["chunks"]
62
 
63
+ word_timestamps = []
64
+ for chunk in timestamps:
65
+ for word_info in chunk["words"]:
66
+ word_timestamps.append(f"{word_info['word']} [{word_info['start']}-{word_info['end']}]")
67
 
68
+ return html_embed_str, "\n".join(word_timestamps)
69
+
70
 
71
 
72
  demo = gr.Blocks()