Salman11223 commited on
Commit
e5c07ce
·
verified ·
1 Parent(s): 3228eea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -10
app.py CHANGED
@@ -5,7 +5,9 @@ import moviepy.editor as mp
5
  from TTS.api import TTS
6
  import torch
7
  import assemblyai as aai
 
8
  os.environ["COQUI_TOS_AGREED"] = "1"
 
9
  # Download necessary models if not already present
10
  model_files = {
11
  "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
@@ -15,14 +17,12 @@ model_files = {
15
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
16
  }
17
 
18
-
19
-
20
  device = "cpu"
21
 
 
22
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
23
 
24
-
25
-
26
  for filename, url in model_files.items():
27
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
28
  if not os.path.exists(file_path):
@@ -31,8 +31,6 @@ for filename, url in model_files.items():
31
  with open(file_path, 'wb') as f:
32
  f.write(r.content)
33
 
34
-
35
-
36
  # Translation class
37
  class translation:
38
  def __init__(self, video_path, original_language, target_language):
@@ -87,11 +85,11 @@ class translation:
87
  translated_text = self.translate_text(transcript_text)
88
  translated_audio_path = self.generate_audio(translated_text)
89
 
90
- # Run Wav2Lip inference
91
- os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
 
92
  return 'output_video.mp4'
93
 
94
-
95
  # Gradio Interface
96
  def app(video_path, original_language, target_language):
97
  translator = translation(video_path, original_language, target_language)
@@ -108,4 +106,119 @@ interface = gr.Interface(
108
  outputs=gr.Video(label="Translated Video")
109
  )
110
 
111
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from TTS.api import TTS
6
  import torch
7
  import assemblyai as aai
8
+
9
  os.environ["COQUI_TOS_AGREED"] = "1"
10
+
11
  # Download necessary models if not already present
12
  model_files = {
13
  "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
 
17
  "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
18
  }
19
 
 
 
20
  device = "cpu"
21
 
22
+ # Initialize TTS model
23
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
24
 
25
+ # Download models
 
26
  for filename, url in model_files.items():
27
  file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
28
  if not os.path.exists(file_path):
 
31
  with open(file_path, 'wb') as f:
32
  f.write(r.content)
33
 
 
 
34
  # Translation class
35
  class translation:
36
  def __init__(self, video_path, original_language, target_language):
 
85
  translated_text = self.translate_text(transcript_text)
86
  translated_audio_path = self.generate_audio(translated_text)
87
 
88
+ # Run Wav2Lip inference (update the path to inference.py)
89
+ inference_script_path = "inference.py" # Update this to the actual location of inference.py
90
+ os.system(f"python {inference_script_path} --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
91
  return 'output_video.mp4'
92
 
 
93
  # Gradio Interface
94
  def app(video_path, original_language, target_language):
95
  translator = translation(video_path, original_language, target_language)
 
106
  outputs=gr.Video(label="Translated Video")
107
  )
108
 
109
+ interface.launch()
110
+
111
+
112
+
113
+
114
+ # import os
115
+ # import requests
116
+ # import gradio as gr
117
+ # import moviepy.editor as mp
118
+ # from TTS.api import TTS
119
+ # import torch
120
+ # import assemblyai as aai
121
+ # os.environ["COQUI_TOS_AGREED"] = "1"
122
+ # # Download necessary models if not already present
123
+ # model_files = {
124
+ # "wav2lip.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip.pth",
125
+ # "wav2lip_gan.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/wav2lip_gan.pth",
126
+ # "resnet50.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/resnet50.pth",
127
+ # "mobilenet.pth": "https://github.com/justinjohn0306/Wav2Lip/releases/download/models/mobilenet.pth",
128
+ # "s3fd.pth": "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth"
129
+ # }
130
+
131
+
132
+
133
+ # device = "cpu"
134
+
135
+ # tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
136
+
137
+
138
+
139
+ # for filename, url in model_files.items():
140
+ # file_path = os.path.join("checkpoints" if "pth" in filename else "face_detection", filename)
141
+ # if not os.path.exists(file_path):
142
+ # print(f"Downloading {filename}...")
143
+ # r = requests.get(url)
144
+ # with open(file_path, 'wb') as f:
145
+ # f.write(r.content)
146
+
147
+
148
+
149
+ # # Translation class
150
+ # class translation:
151
+ # def __init__(self, video_path, original_language, target_language):
152
+ # self.video_path = video_path
153
+ # self.original_language = original_language
154
+ # self.target_language = target_language
155
+
156
+ # def org_language_parameters(self, original_language):
157
+ # language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
158
+ # self.lan_code = language_codes.get(original_language, '')
159
+
160
+ # def target_language_parameters(self, target_language):
161
+ # language_codes = {'English': 'en', 'German': 'de', 'Italian': 'it', 'Spanish': 'es'}
162
+ # self.tran_code = language_codes.get(target_language, '')
163
+
164
+ # def extract_audio(self):
165
+ # video = mp.VideoFileClip(self.video_path)
166
+ # audio = video.audio
167
+ # audio_path = "output_audio.wav"
168
+ # audio.write_audiofile(audio_path)
169
+ # return audio_path
170
+
171
+ # def transcribe_audio(self, audio_path):
172
+ # aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
173
+ # config = aai.TranscriptionConfig(language_code=self.lan_code)
174
+ # transcriber = aai.Transcriber(config=config)
175
+ # transcript = transcriber.transcribe(audio_path)
176
+ # return transcript.text
177
+
178
+ # def translate_text(self, transcript_text):
179
+ # base_url = "https://api.cognitive.microsofttranslator.com/translate"
180
+ # headers = {
181
+ # "Ocp-Apim-Subscription-Key": os.getenv("MICROSOFT_TRANSLATOR_API_KEY"),
182
+ # "Content-Type": "application/json",
183
+ # "Ocp-Apim-Subscription-Region": "southeastasia"
184
+ # }
185
+ # params = {"api-version": "3.0", "from": self.lan_code, "to": self.tran_code}
186
+ # body = [{"text": transcript_text}]
187
+ # response = requests.post(base_url, headers=headers, params=params, json=body)
188
+ # translation = response.json()[0]["translations"][0]["text"]
189
+ # return translation
190
+
191
+ # def generate_audio(self, translated_text):
192
+ # tts.tts_to_file(text=translated_text, speaker_wav='output_audio.wav', file_path="output_synth.wav", language=self.tran_code)
193
+ # return "output_synth.wav"
194
+
195
+ # def translate_video(self):
196
+ # audio_path = self.extract_audio()
197
+ # self.org_language_parameters(self.original_language)
198
+ # self.target_language_parameters(self.target_language)
199
+ # transcript_text = self.transcribe_audio(audio_path)
200
+ # translated_text = self.translate_text(transcript_text)
201
+ # translated_audio_path = self.generate_audio(translated_text)
202
+
203
+ # # Run Wav2Lip inference
204
+ # os.system(f"python inference.py --checkpoint_path 'checkpoints/wav2lip_gan.pth' --face {self.video_path} --audio {translated_audio_path} --outfile 'output_video.mp4'")
205
+ # return 'output_video.mp4'
206
+
207
+
208
+ # # Gradio Interface
209
+ # def app(video_path, original_language, target_language):
210
+ # translator = translation(video_path, original_language, target_language)
211
+ # video_file = translator.translate_video()
212
+ # return video_file
213
+
214
+ # interface = gr.Interface(
215
+ # fn=app,
216
+ # inputs=[
217
+ # gr.Video(label="Video Path"),
218
+ # gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Original Language"),
219
+ # gr.Dropdown(["English", "German", "Italian", "Spanish"], label="Targeted Language"),
220
+ # ],
221
+ # outputs=gr.Video(label="Translated Video")
222
+ # )
223
+
224
+ # interface.launch()