younes21000 commited on
Commit
1a0463d
1 Parent(s): 905707d

Upload 6 files

Browse files
Files changed (6) hide show
  1. B-NAZANIN.TTF +0 -0
  2. Dirooz.ttf +0 -0
  3. README.md +58 -0
  4. app.py +210 -0
  5. gitattributes +35 -0
  6. requirements.txt +10 -0
B-NAZANIN.TTF ADDED
Binary file (60.8 kB). View file
 
Dirooz.ttf ADDED
Binary file (84 kB). View file
 
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: DAI_Project
3
+ emoji: 🎬
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 4.44.1
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ ---
12
+
13
+ # Video Subtitle Generator with Translation
14
+
15
+ This application allows you to generate subtitles from video files or YouTube links, translate them into multiple languages, and export them in various formats.
16
+
17
+ ## Features
18
+
19
+ - Video transcription using OpenAI's Whisper
20
+ - Support for multiple languages
21
+ - Translation capabilities using M2M100
22
+ - Multiple output formats:
23
+ - SRT subtitles
24
+ - Hardsubbed videos
25
+ - Word documents
26
+ - PDF files
27
+ - PowerPoint presentations
28
+ - YouTube video support
29
+
30
+ ## Usage
31
+
32
+ 1. Upload a video file or provide a YouTube URL
33
+ 2. Select the original video language
34
+ 3. Choose your desired translation language
35
+ 4. Pick an output format
36
+ 5. Click "Submit" and wait for processing
37
+
38
+ ## Supported Languages
39
+
40
+ - English (en)
41
+ - Persian/Farsi (fa)
42
+ - Spanish (es)
43
+ - German (de)
44
+ - French (fr)
45
+ - Italian (it)
46
+ - Portuguese (pt)
47
+ - Arabic (ar)
48
+ - Chinese (zh)
49
+ - Hindi (hi)
50
+ - Japanese (ja)
51
+ - Korean (ko)
52
+ - Russian (ru)
53
+
54
+ ## Notes
55
+
56
+ - Maximum video size: 1GB
57
+ - Processing time depends on video length and selected options
58
+ - Internet connection required for YouTube videos and translations
app.py ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import whisper
3
+ import os
4
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
+ from docx import Document
6
+ from fpdf import FPDF
7
+ from pptx import Presentation
8
+ import subprocess
9
+ import shlex
10
+ import yt_dlp
11
+
12
+ # Load the Whisper model (smaller model for faster transcription)
13
+ model = whisper.load_model("tiny")
14
+
15
+ # Load M2M100 translation model for different languages
16
+ def load_translation_model(target_language):
17
+ lang_codes = {
18
+ "fa": "fa", # Persian (Farsi)
19
+ "es": "es", # Spanish
20
+ "fr": "fr", # French
21
+ "de": "de", # German
22
+ "it": "it", # Italian
23
+ "pt": "pt", # Portuguese
24
+ "ar": "ar", # Arabic
25
+ "zh": "zh", # Chinese
26
+ "hi": "hi", # Hindi
27
+ "ja": "ja", # Japanese
28
+ "ko": "ko", # Korean
29
+ "ru": "ru", # Russian
30
+ }
31
+ target_lang_code = lang_codes.get(target_language)
32
+ if not target_lang_code:
33
+ raise ValueError(f"Translation model for {target_language} not supported")
34
+
35
+ tokenizer = M2M100Tokenizer.from_pretrained("facebook/m2m100_418M")
36
+ translation_model = M2M100ForConditionalGeneration.from_pretrained("facebook/m2m100_418M")
37
+
38
+ tokenizer.src_lang = "en"
39
+ tokenizer.tgt_lang = target_lang_code
40
+
41
+ return tokenizer, translation_model
42
+
43
+ def translate_text(text, tokenizer, model):
44
+ try:
45
+ inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
46
+ translated = model.generate(**inputs, forced_bos_token_id=tokenizer.get_lang_id(tokenizer.tgt_lang))
47
+ return tokenizer.decode(translated[0], skip_special_tokens=True)
48
+ except Exception as e:
49
+ raise RuntimeError(f"Error during translation: {e}")
50
+
51
+ # Helper function to format timestamps in SRT format
52
+ def format_timestamp(seconds):
53
+ milliseconds = int((seconds % 1) * 1000)
54
+ seconds = int(seconds)
55
+ hours = seconds // 3600
56
+ minutes = (seconds % 3600) // 60
57
+ seconds = seconds % 60
58
+ return f"{hours:02}:{minutes:02}:{seconds:02},{milliseconds:03}"
59
+
60
+ # Corrected write_srt function
61
+ def write_srt(transcription, output_file, tokenizer=None, translation_model=None):
62
+ with open(output_file, "w") as f:
63
+ for i, segment in enumerate(transcription['segments']):
64
+ start = segment['start']
65
+ end = segment['end']
66
+ text = segment['text']
67
+
68
+ if translation_model:
69
+ text = translate_text(text, tokenizer, translation_model)
70
+
71
+ start_time = format_timestamp(start)
72
+ end_time = format_timestamp(end)
73
+
74
+ f.write(f"{i + 1}\n")
75
+ f.write(f"{start_time} --> {end_time}\n")
76
+ f.write(f"{text.strip()}\n\n")
77
+
78
+ # Embedding subtitles into video (hardsub)
79
+ def embed_hardsub_in_video(video_file, srt_file, output_video):
80
+ command = f'ffmpeg -i "{video_file}" -vf "subtitles=\'{srt_file}\'" -c:v libx264 -crf 23 -preset medium "{output_video}"'
81
+ try:
82
+ process = subprocess.run(shlex.split(command), capture_output=True, text=True, timeout=300)
83
+ if process.returncode != 0:
84
+ raise RuntimeError(f"ffmpeg error: {process.stderr}")
85
+ except subprocess.TimeoutExpired:
86
+ raise RuntimeError("ffmpeg process timed out.")
87
+ except Exception as e:
88
+ raise RuntimeError(f"Error running ffmpeg: {e}")
89
+
90
+ # Helper function to write Word documents
91
+ def write_word(transcription, output_file, tokenizer=None, translation_model=None, target_language=None):
92
+ doc = Document()
93
+ rtl = target_language == "fa"
94
+ for i, segment in enumerate(transcription['segments']):
95
+ text = segment['text']
96
+ if translation_model:
97
+ text = translate_text(text, tokenizer, translation_model)
98
+ para = doc.add_paragraph(f"{i + 1}. {text.strip()}")
99
+ if rtl:
100
+ para.paragraph_format.right_to_left = True
101
+ doc.save(output_file)
102
+
103
+ # Helper function to reverse text for RTL
104
+ def reverse_text_for_rtl(text):
105
+ return ' '.join([word[::-1] for word in text.split()])
106
+
107
+ # Helper function to write PDF documents
108
+ def write_pdf(transcription, output_file, tokenizer=None, translation_model=None):
109
+ pdf = FPDF()
110
+ pdf.add_page()
111
+ font_path = "/home/user/app/B-NAZANIN.TTF"
112
+ pdf.add_font('B-NAZANIN', '', font_path, uni=True)
113
+ pdf.set_font('B-NAZANIN', size=12)
114
+ for i, segment in enumerate(transcription['segments']):
115
+ text = segment['text']
116
+ if translation_model:
117
+ text = translate_text(text, tokenizer, translation_model)
118
+ reversed_text = reverse_text_for_rtl(text)
119
+ pdf.multi_cell(0, 10, f"{i + 1}. {reversed_text.strip()}", align='L')
120
+ pdf.output(output_file)
121
+
122
+ # Helper function to write PowerPoint slides
123
+ def write_ppt(transcription, output_file, tokenizer=None, translation_model=None):
124
+ ppt = Presentation()
125
+ for i, segment in enumerate(transcription['segments']):
126
+ text = segment['text']
127
+ if translation_model:
128
+ text = translate_text(text, tokenizer, translation_model)
129
+ slide = ppt.slides.add_slide(ppt.slide_layouts[5])
130
+ title = slide.shapes.title
131
+ title.text = f"{i + 1}. {text.strip()}"
132
+ ppt.save(output_file)
133
+
134
+ # Function to download YouTube video
135
+ def download_youtube_video(url):
136
+ ydl_opts = {
137
+ 'format': 'mp4',
138
+ 'outtmpl': 'downloaded_video.mp4',
139
+ }
140
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
141
+ ydl.download([url])
142
+ return 'downloaded_video.mp4'
143
+
144
+ # Transcribing video and generating output
145
+ def transcribe_video(video_file, video_url, language, target_language, output_format):
146
+ if video_url:
147
+ video_file_path = download_youtube_video(video_url)
148
+ else:
149
+ video_file_path = video_file.name
150
+
151
+ result = model.transcribe(video_file_path, language=language)
152
+ video_name = os.path.splitext(video_file_path)[0]
153
+ if target_language != "en":
154
+ try:
155
+ tokenizer, translation_model = load_translation_model(target_language)
156
+ except Exception as e:
157
+ raise RuntimeError(f"Error loading translation model: {e}")
158
+ else:
159
+ tokenizer, translation_model = None, None
160
+
161
+ srt_file = f"{video_name}.srt"
162
+ write_srt(result, srt_file, tokenizer, translation_model)
163
+
164
+ if output_format == "SRT":
165
+ return srt_file
166
+ elif output_format == "Video with Hardsub":
167
+ output_video = f"{video_name}_with_subtitles.mp4"
168
+ try:
169
+ embed_hardsub_in_video(video_file_path, srt_file, output_video)
170
+ return output_video
171
+ except Exception as e:
172
+ raise RuntimeError(f"Error embedding subtitles in video: {e}")
173
+ elif output_format == "Word":
174
+ word_file = f"{video_name}.docx"
175
+ write_word(result, word_file, tokenizer, translation_model, target_language)
176
+ return word_file
177
+ elif output_format == "PDF":
178
+ pdf_file = f"{video_name}.pdf"
179
+ write_pdf(result, pdf_file, tokenizer, translation_model)
180
+ return pdf_file
181
+ elif output_format == "PowerPoint":
182
+ ppt_file = f"{video_name}.pptx"
183
+ write_ppt(result, ppt_file, tokenizer, translation_model)
184
+ return ppt_file
185
+
186
+ # Gradio interface with YouTube URL
187
+ iface = gr.Interface(
188
+ fn=transcribe_video,
189
+ inputs=[
190
+ gr.File(label="Upload Video File (or leave empty for YouTube link)"), # Removed 'optional=True'
191
+ gr.Textbox(label="YouTube Video URL (optional)", placeholder="https://www.youtube.com/watch?v=..."),
192
+ gr.Dropdown(label="Select Original Video Language", choices=["en", "es", "fr", "de", "it", "pt"], value="en"),
193
+ gr.Dropdown(label="Select Subtitle Translation Language", choices=["en", "fa", "es", "de", "fr", "it", "pt"], value="fa"),
194
+ gr.Radio(label="Choose Output Format", choices=["SRT", "Video with Hardsub", "Word", "PDF", "PowerPoint"], value="Video with Hardsub")
195
+ ],
196
+ outputs=gr.File(label="Download File"),
197
+ title="Video Subtitle Generator with Translation & Multi-Format Output (Supports YouTube)",
198
+ description=(
199
+ "This tool allows you to generate subtitles from a video file or YouTube link using Whisper, "
200
+ "translate the subtitles into multiple languages using M2M100, and export them "
201
+ "in various formats including SRT, hardcoded subtitles in video, Word, PDF, or PowerPoint."
202
+ ),
203
+ theme="compact",
204
+ live=False
205
+ )
206
+
207
+ if __name__ == "__main__":
208
+ iface.launch()
209
+
210
+
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ transformers>=4.30.0
2
+ gradio>=3.16.0
3
+ ffmpeg-python
4
+ python-docx
5
+ fpdf
6
+ python-pptx
7
+ sentencepiece # Required for M2M100 and MarianMT translation models
8
+ librosa # Required for audio processing
9
+ git+https://github.com/openai/whisper.git
10
+ yt-dlp