younes21000 commited on
Commit
32f4242
·
verified ·
1 Parent(s): c1510f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -114
app.py CHANGED
@@ -1,6 +1,6 @@
 
1
  import gradio as gr
2
  import whisper
3
- import os
4
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
  from docx import Document
6
  from reportlab.pdfgen import canvas
@@ -13,6 +13,13 @@ from pptx import Presentation
13
  import subprocess
14
  import shlex
15
  import yt_dlp
 
 
 
 
 
 
 
16
 
17
  # Load the Whisper model (smaller model for faster transcription)
18
  model = whisper.load_model("tiny")
@@ -26,12 +33,6 @@ def load_translation_model(target_language):
26
  "de": "de", # German
27
  "it": "it", # Italian
28
  "pt": "pt", # Portuguese
29
- "ar": "ar", # Arabic
30
- "zh": "zh", # Chinese
31
- "hi": "hi", # Hindi
32
- "ja": "ja", # Japanese
33
- "ko": "ko", # Korean
34
- "ru": "ru", # Russian
35
  }
36
  target_lang_code = lang_codes.get(target_language)
37
  if not target_lang_code:
@@ -62,7 +63,6 @@ def format_timestamp(seconds):
62
  seconds = seconds % 60
63
  return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
64
 
65
- # Corrected write_srt function
66
  def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
67
  with open(output_file, "w") as f:
68
  for i, segment in enumerate(transcription['segments']):
@@ -80,7 +80,6 @@ def write_srt(transcription, output_file, tokenizer=None, translation_model=None
80
  f.write(f"{start_time} --> {end_time}\n")
81
  f.write(f"{text.strip()}\n\n")
82
 
83
- # Embedding subtitles into video (hardsub)
84
  def embed_hardsub_in_video(video_file, srt_file, output_video):
85
  command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
86
  try:
@@ -92,7 +91,6 @@ def embed_hardsub_in_video(video_file, srt_file, output_video):
92
  except Exception as e:
93
  raise RuntimeError(f"Error running ffmpeg: {e}")
94
 
95
- # Helper function to write Word documents
96
  def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
97
  doc = Document()
98
  rtl = target_language == "fa"
@@ -105,22 +103,15 @@ def write_word(transcription, output_file, tokenizer=None, translation_model=Non
105
  para.paragraph_format.right_to_left = True
106
  doc.save(output_file)
107
 
108
- # Helper function to reverse text for RTL
109
  def reverse_text_for_rtl(text):
110
  return ' '.join([word[::-1] for word in text.split()])
111
 
112
- # Helper function to write PDF documents
113
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
114
- # Create PDF with A4 page size
115
  c = canvas.Canvas(output_file, pagesize=A4)
116
- # Get the directory where app.py is located
117
  app_dir = os.path.dirname(os.path.abspath(__file__))
118
-
119
- # Construct the full path to the font files
120
  nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
121
  arial_font_path = os.path.join(app_dir, 'Arial.ttf')
122
 
123
- # Register B-Nazanin font
124
  if os.path.exists(nazanin_font_path):
125
  try:
126
  pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
@@ -129,7 +120,6 @@ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None
129
  else:
130
  raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
131
 
132
- # Register Arial font
133
  if os.path.exists(arial_font_path):
134
  try:
135
  pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
@@ -138,170 +128,163 @@ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None
138
  else:
139
  raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
140
 
141
- # Initialize y position from top of page
142
- y_position = A4[1] - 50 # Start 50 points from top
143
  line_height = 20
144
 
145
- # Process each segment
146
  for i, segment in enumerate(transcription['segments']):
147
  text = segment['text']
148
 
149
- # Translate if translation model is provided
150
  if translation_model:
151
  text = translate_text(text, tokenizer, translation_model)
152
 
153
- # Format the line with segment number
154
  line = f"{i + 1}. {text.strip()}"
155
 
156
- # Determine target language for font and text direction
157
  target_language = None
158
  if translation_model:
159
- # Assuming target language can be inferred from the tokenizer
160
  target_language = tokenizer.tgt_lang
161
 
162
- # Reshape and reorder the text for correct RTL display if necessary
163
  if target_language in ['fa', 'ar']:
164
  reshaped_text = arabic_reshaper.reshape(line)
165
  bidi_text = get_display(reshaped_text)
166
- # Set font for RTL languages
167
  c.setFont('B-Nazanin', 12)
168
- # Draw the text right-aligned
169
- c.drawRightString(A4[0] - 50, y_position, bidi_text) # 50 points margin from right
170
  else:
171
- c.setFont('Arial', 12) # Use Arial for other languages
172
- c.drawString(50, y_position, line) # Left aligned
173
 
174
- # Add new page if needed
175
- if y_position < 50: # Leave 50 points margin at bottom
176
  c.showPage()
177
- y_position = A4[1] - 50 # Reset y position for new page
178
 
179
- # Update y position for next line
180
  y_position -= line_height
181
 
182
- # Save the PDF
183
  c.save()
184
  return output_file
185
 
186
-
187
-
188
-
189
- # Helper function to write PowerPoint slides
190
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
191
  ppt = Presentation()
192
- slide = ppt.slides.add_slide(ppt.slide_layouts[5]) # Create the first slide
193
- text_buffer = "" # Initialize an empty buffer to accumulate text
194
- max_chars_per_slide = 400 # Set a character limit for each slide
195
 
196
  for i, segment in enumerate(transcription['segments']):
197
  text = segment['text']
198
 
199
- # Translate if translation model is provided
200
  if translation_model:
201
  text = translate_text(text, tokenizer, translation_model)
202
 
203
- # Format the line with segment number
204
  line = f"{i + 1}. {text.strip()}\n"
205
 
206
- # Check if adding this line exceeds the character limit
207
  if len(text_buffer) + len(line) > max_chars_per_slide:
208
- # If so, add the accumulated text to the current slide
209
- slide.shapes.title.text = "Transcription" # Set the title for the slide
210
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
211
  textbox.text = text_buffer.strip()
212
 
213
- # Create a new slide and reset the buffer
214
  slide = ppt.slides.add_slide(ppt.slide_layouts[5])
215
- text_buffer = line # Start the new slide with the current line
216
  else:
217
- # Otherwise, keep accumulating text
218
  text_buffer += line
219
 
220
- # Add any remaining text in the buffer to the last slide
221
  if text_buffer:
222
- slide.shapes.title.text = "" # Set the title for the last slide
223
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
224
  textbox.text = text_buffer.strip()
225
 
226
  ppt.save(output_file)
227
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
- # Function to download YouTube video
230
  def download_youtube_video(url):
231
- ydl_opts = {
232
- 'format': 'mp4',
233
- 'outtmpl': 'downloaded_video.mp4',
234
- 'nocheckcertificate': True, # Disable certificate check
235
- }
236
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
237
- ydl.download([url])
238
- return 'downloaded_video.mp4'
239
-
240
 
241
- # Transcribing video and generating output
242
  def transcribe_video(video_file, video_url, language, target_language, output_format):
243
  if video_url:
244
  video_file_path = download_youtube_video(video_url)
245
  else:
246
- video_file_path = video_file.name
 
 
247
 
248
- result = model.transcribe(video_file_path, language=language)
249
- video_name = os.path.splitext(video_file_path)[0]
250
  if target_language != "en":
251
- try:
252
- tokenizer, translation_model = load_translation_model(target_language)
253
- except Exception as e:
254
- raise RuntimeError(f"Error loading translation model: {e}")
255
  else:
256
  tokenizer, translation_model = None, None
257
 
258
- srt_file = f"{video_name}.srt"
259
- write_srt(result, srt_file, tokenizer, translation_model)
260
 
261
  if output_format == "SRT":
262
- return srt_file
263
- elif output_format == "Video with Hardsub":
264
- output_video = f"{video_name}_with_subtitles.mp4"
265
- try:
266
- embed_hardsub_in_video(video_file_path, srt_file, output_video)
267
- return output_video
268
- except Exception as e:
269
- raise RuntimeError(f"Error embedding subtitles in video: {e}")
270
  elif output_format == "Word":
271
- word_file = f"{video_name}.docx"
272
- write_word(result, word_file, tokenizer, translation_model, target_language)
273
- return word_file
274
  elif output_format == "PDF":
275
- pdf_file = f"{video_name}.pdf"
276
- write_pdf(result, pdf_file, tokenizer, translation_model)
277
- return pdf_file
278
- elif output_format == "PowerPoint":
279
- ppt_file = f"{video_name}.pptx"
280
- write_ppt(result, ppt_file, tokenizer, translation_model)
281
- return ppt_file
282
-
283
- # Gradio interface with YouTube URL
284
- iface = gr.Interface(
285
- fn=transcribe_video,
286
- inputs=[
287
- gr.File(label="Upload Video File (or leave empty for YouTube link)"), # Removed 'optional=True'
288
- gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
289
- gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
290
- gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
291
- gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
292
- ],
293
- outputs=gr.File(label="Download File"),
294
- title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
295
- description=(
296
- "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
297
- "translate the subtitles into multiple languages using M2M100, and export them "
298
- "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
299
- ),
300
- theme="compact",
301
- live=False
302
- )
303
 
304
- if __name__ == "__main__":
305
- iface.launch()
306
 
 
 
 
307
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import gradio as gr
3
  import whisper
 
4
  from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
  from docx import Document
6
  from reportlab.pdfgen import canvas
 
13
  import subprocess
14
  import shlex
15
  import yt_dlp
16
+ from selenium import webdriver
17
+ from selenium.webdriver.common.by import By
18
+ from selenium.webdriver.chrome.service import Service as ChromeService
19
+ from webdriver_manager.chrome import ChromeDriverManager
20
+ from selenium.webdriver.support.ui import WebDriverWait
21
+ from selenium.webdriver.support import expected_conditions as EC
22
+ import time
23
 
24
  # Load the Whisper model (smaller model for faster transcription)
25
  model = whisper.load_model("tiny")
 
33
  "de": "de", # German
34
  "it": "it", # Italian
35
  "pt": "pt", # Portuguese
 
 
 
 
 
 
36
  }
37
  target_lang_code = lang_codes.get(target_language)
38
  if not target_lang_code:
 
63
  seconds = seconds % 60
64
  return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
65
 
 
66
  def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
67
  with open(output_file, "w") as f:
68
  for i, segment in enumerate(transcription['segments']):
 
80
  f.write(f"{start_time} --> {end_time}\n")
81
  f.write(f"{text.strip()}\n\n")
82
 
 
83
  def embed_hardsub_in_video(video_file, srt_file, output_video):
84
  command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
85
  try:
 
91
  except Exception as e:
92
  raise RuntimeError(f"Error running ffmpeg: {e}")
93
 
 
94
  def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
95
  doc = Document()
96
  rtl = target_language == "fa"
 
103
  para.paragraph_format.right_to_left = True
104
  doc.save(output_file)
105
 
 
106
  def reverse_text_for_rtl(text):
107
  return ' '.join([word[::-1] for word in text.split()])
108
 
 
109
  def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
 
110
  c = canvas.Canvas(output_file, pagesize=A4)
 
111
  app_dir = os.path.dirname(os.path.abspath(__file__))
 
 
112
  nazanin_font_path = os.path.join(app_dir, 'B-NAZANIN.TTF')
113
  arial_font_path = os.path.join(app_dir, 'Arial.ttf')
114
 
 
115
  if os.path.exists(nazanin_font_path):
116
  try:
117
  pdfmetrics.registerFont(TTFont('B-Nazanin', nazanin_font_path))
 
120
  else:
121
  raise FileNotFoundError(f"B-Nazanin font file not found at {nazanin_font_path}. Please ensure it is available.")
122
 
 
123
  if os.path.exists(arial_font_path):
124
  try:
125
  pdfmetrics.registerFont(TTFont('Arial', arial_font_path))
 
128
  else:
129
  raise FileNotFoundError(f"Arial font file not found at {arial_font_path}. Please ensure it is available.")
130
 
131
+ y_position = A4[1] - 50
 
132
  line_height = 20
133
 
 
134
  for i, segment in enumerate(transcription['segments']):
135
  text = segment['text']
136
 
 
137
  if translation_model:
138
  text = translate_text(text, tokenizer, translation_model)
139
 
 
140
  line = f"{i + 1}. {text.strip()}"
141
 
 
142
  target_language = None
143
  if translation_model:
 
144
  target_language = tokenizer.tgt_lang
145
 
 
146
  if target_language in ['fa', 'ar']:
147
  reshaped_text = arabic_reshaper.reshape(line)
148
  bidi_text = get_display(reshaped_text)
 
149
  c.setFont('B-Nazanin', 12)
150
+ c.drawRightString(A4[0] - 50, y_position, bidi_text)
 
151
  else:
152
+ c.setFont('Arial', 12)
153
+ c.drawString(50, y_position, line)
154
 
155
+ if y_position < 50:
 
156
  c.showPage()
157
+ y_position = A4[1] - 50
158
 
 
159
  y_position -= line_height
160
 
 
161
  c.save()
162
  return output_file
163
 
 
 
 
 
164
  def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
165
  ppt = Presentation()
166
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5])
167
+ text_buffer = ""
168
+ max_chars_per_slide = 400
169
 
170
  for i, segment in enumerate(transcription['segments']):
171
  text = segment['text']
172
 
 
173
  if translation_model:
174
  text = translate_text(text, tokenizer, translation_model)
175
 
 
176
  line = f"{i + 1}. {text.strip()}\n"
177
 
 
178
  if len(text_buffer) + len(line) > max_chars_per_slide:
179
+ slide.shapes.title.text = "Transcription"
 
180
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
181
  textbox.text = text_buffer.strip()
182
 
 
183
  slide = ppt.slides.add_slide(ppt.slide_layouts[5])
184
+ text_buffer = line
185
  else:
 
186
  text_buffer += line
187
 
 
188
  if text_buffer:
189
+ slide.shapes.title.text = ""
190
  textbox = slide.shapes.add_textbox(left=0, top=0, width=ppt.slide_width, height=ppt.slide_height)
191
  textbox.text = text_buffer.strip()
192
 
193
  ppt.save(output_file)
194
 
195
+ # Download YouTube Video using yt_dlp or Selenium
196
+ def download_from_ssyoutube(modified_url):
197
+ driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
198
+ driver.get(modified_url)
199
+
200
+ try:
201
+ WebDriverWait(driver, 20).until(
202
+ EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Low quality"))
203
+ ).click()
204
+
205
+ WebDriverWait(driver, 20).until(
206
+ EC.element_to_be_clickable((By.PARTIAL_LINK_TEXT, "Download"))
207
+ ).click()
208
+
209
+ time.sleep(10)
210
+ driver.quit()
211
+ return "Video downloaded successfully!"
212
+
213
+ except Exception as e:
214
+ driver.quit()
215
+ raise RuntimeError(f"Failed to download video: {e}")
216
+
217
+ def modify_youtube_url(url):
218
+ youtube_pos = url.find("youtube")
219
+ if youtube_pos == -1:
220
+ raise ValueError("Invalid YouTube URL.")
221
+
222
+ modified_url = "https://ss" + url[youtube_pos:]
223
+ return modified_url
224
 
 
225
  def download_youtube_video(url):
226
+ try:
227
+ modified_url = modify_youtube_url(url)
228
+ return download_from_ssyoutube(modified_url)
229
+ except Exception as e:
230
+ raise RuntimeError(f"Error downloading YouTube video: {e}")
 
 
 
 
231
 
 
232
  def transcribe_video(video_file, video_url, language, target_language, output_format):
233
  if video_url:
234
  video_file_path = download_youtube_video(video_url)
235
  else:
236
+ video_file_path = video_file
237
+
238
+ transcription = model.transcribe(video_file_path)
239
 
 
 
240
  if target_language != "en":
241
+ tokenizer, translation_model = load_translation_model(target_language)
 
 
 
242
  else:
243
  tokenizer, translation_model = None, None
244
 
245
+ output_file = None
 
246
 
247
  if output_format == "SRT":
248
+ output_file = "output.srt"
249
+ write_srt(transcription, output_file, tokenizer, translation_model)
 
 
 
 
 
 
250
  elif output_format == "Word":
251
+ output_file = "output.docx"
252
+ write_word(transcription, output_file, tokenizer, translation_model, target_language)
 
253
  elif output_format == "PDF":
254
+ output_file = "output.pdf"
255
+ write_pdf(transcription, output_file, tokenizer, translation_model)
256
+ elif output_format == "PPT":
257
+ output_file = "output.pptx"
258
+ write_ppt(transcription, output_file, tokenizer, translation_model)
259
+
260
+ return output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
 
 
262
 
263
+ def main():
264
+ with gr.Blocks() as app:
265
+ gr.Markdown("# Transcribe, Translate and Format YouTube Video Content")
266
 
267
+ video_url_input = gr.Textbox(label="YouTube Video URL (or leave blank for video file upload)")
268
+ video_file_input = gr.File(label="Upload Video File (leave blank for YouTube URL)")
269
+ language_input = gr.Dropdown(choices=["en"], label="Video Language", value="en")
270
+ target_language_input = gr.Dropdown(choices=["en", "fa", "es", "fr", "de", "it", "pt"], label="Target Language", value="en")
271
+ output_format_input = gr.Dropdown(choices=["SRT", "Word", "PDF", "PPT"], label="Output Format", value="SRT")
272
+
273
+ output_file = gr.File(label="Download Transcription", interactive=False)
274
+
275
+ transcribe_button = gr.Button("Transcribe & Translate")
276
+
277
+ def transcribe_and_translate(video_file, video_url, language, target_language, output_format):
278
+ output = transcribe_video(video_file.name if video_file else None, video_url, language, target_language, output_format)
279
+ return output
280
+
281
+ transcribe_button.click(
282
+ transcribe_and_translate,
283
+ inputs=[video_file_input, video_url_input, language_input, target_language_input, output_format_input],
284
+ outputs=output_file
285
+ )
286
+
287
+ app.launch()
288
+
289
+ if __name__ == "__main__":
290
+ main()