artificialguybr commited on
Commit
23be978
·
verified ·
1 Parent(s): a47bd89

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -84
app.py CHANGED
@@ -53,97 +53,102 @@ def check_for_faces(video_path):
53
 
54
  @spaces.GPU
55
  def process_video(radio, video, target_language, has_closeup_face):
56
- if target_language is None:
57
- return gr.Error("Please select a Target Language for Dubbing.")
 
58
 
59
- run_uuid = uuid.uuid4().hex[:6]
60
- output_filename = f"{run_uuid}_resized_video.mp4"
61
- ffmpeg.input(video).output(output_filename, vf='scale=-2:720').run()
62
-
63
- video_path = output_filename
64
-
65
- if not os.path.exists(video_path):
66
- return f"Error: {video_path} does not exist."
67
-
68
- video_info = ffmpeg.probe(video_path)
69
- video_duration = float(video_info['streams'][0]['duration'])
70
-
71
- if video_duration > 60:
72
- os.remove(video_path)
73
- return gr.Error("Video duration exceeds 1 minute. Please upload a shorter video.")
74
 
75
- ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
76
-
77
- shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
78
- subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
79
-
80
- print("Attempting to transcribe with Whisper...")
81
- try:
82
- segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
83
- whisper_text = " ".join(segment.text for segment in segments)
84
- whisper_language = info.language
85
- print(f"Transcription successful: {whisper_text}")
86
- except RuntimeError as e:
87
- print(f"RuntimeError encountered: {str(e)}")
88
- if "CUDA failed with error device-side assert triggered" in str(e):
89
- gr.Warning("Error. Space need to restart. Please retry in a minute")
90
- api.restart_space(repo_id=repo_id)
91
-
92
- language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
93
- target_language_code = language_mapping[target_language]
94
- translator = Translator()
95
- translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
96
- print(translated_text)
97
-
98
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
99
- tts.to('cuda')
100
- tts.tts_to_file(translated_text, speaker_wav=f"{run_uuid}_output_audio_final.wav", file_path=f"{run_uuid}_output_synth.wav", language=target_language_code)
101
-
102
- pad_top = 0
103
- pad_bottom = 15
104
- pad_left = 0
105
- pad_right = 0
106
- rescaleFactor = 1
107
-
108
- video_path_fix = video_path
109
-
110
- if has_closeup_face:
111
- has_face = True
112
- else:
113
- has_face = check_for_faces(video_path)
114
 
115
- if has_closeup_face:
116
- try:
117
- cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path)} --audio '{run_uuid}_output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile '{run_uuid}_output_video.mp4'"
118
- subprocess.run(cmd, shell=True, check=True)
119
- except subprocess.CalledProcessError as e:
120
- if "Face not detected! Ensure the video contains a face in all the frames." in str(e.stderr):
121
- gr.Warning("Wav2lip didn't detect a face. Please try again with the option disabled.")
122
- cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
123
- subprocess.run(cmd, shell=True)
124
- else:
125
- cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
126
- subprocess.run(cmd, shell=True)
127
 
128
- if not os.path.exists(f"{run_uuid}_output_video.mp4"):
129
- raise FileNotFoundError(f"Error: {run_uuid}_output_video.mp4 was not generated.")
 
130
 
131
- output_video_path = f"{run_uuid}_output_video.mp4"
132
 
133
- files_to_delete = [
134
- f"{run_uuid}_resized_video.mp4",
135
- f"{run_uuid}_output_audio.wav",
136
- f"{run_uuid}_output_audio_final.wav",
137
- f"{run_uuid}_output_synth.wav"
138
- ]
139
- for file in files_to_delete:
140
  try:
141
- os.remove(file)
142
- except FileNotFoundError:
143
- print(f"File {file} not found for deletion.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
- return output_video_path
146
-
147
  def swap(radio):
148
  if(radio == "Upload"):
149
  return gr.update(source="upload")
@@ -163,11 +168,12 @@ iface = gr.Interface(
163
  value=False,
164
  info="Say if video have close-up face. For Wav2lip. Will not work if checked wrongly.")
165
  ],
166
- outputs=gr.Video(),
167
  live=False,
168
  title="AI Video Dubbing",
169
  description="""This tool was developed by [@artificialguybr](https://twitter.com/artificialguybr) using entirely open-source tools. Special thanks to Hugging Face for the GPU support. Thanks [@yeswondwer](https://twitter.com/@yeswondwerr) for original code. Test the [Video Transcription and Translate](https://huggingface.co/spaces/artificialguybr/VIDEO-TRANSLATION-TRANSCRIPTION) space!""",
170
  allow_flagging=False
 
171
  )
172
  with gr.Blocks() as demo:
173
  iface.render()
 
53
 
54
  @spaces.GPU
55
  def process_video(radio, video, target_language, has_closeup_face):
56
+ try:
57
+ if target_language is None:
58
+ raise ValueError("Please select a Target Language for Dubbing.")
59
 
60
+ run_uuid = uuid.uuid4().hex[:6]
61
+ output_filename = f"{run_uuid}_resized_video.mp4"
62
+ ffmpeg.input(video).output(output_filename, vf='scale=-2:720').run()
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ video_path = output_filename
65
+
66
+ if not os.path.exists(video_path):
67
+ raise FileNotFoundError(f"Error: {video_path} does not exist.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
+ video_info = ffmpeg.probe(video_path)
70
+ video_duration = float(video_info['streams'][0]['duration'])
 
 
 
 
 
 
 
 
 
 
71
 
72
+ if video_duration > 60:
73
+ os.remove(video_path)
74
+ raise ValueError("Video duration exceeds 1 minute. Please upload a shorter video.")
75
 
76
+ ffmpeg.input(video_path).output(f"{run_uuid}_output_audio.wav", acodec='pcm_s24le', ar=48000, map='a').run()
77
 
78
+ shell_command = f"ffmpeg -y -i {run_uuid}_output_audio.wav -af lowpass=3000,highpass=100 {run_uuid}_output_audio_final.wav".split(" ")
79
+ subprocess.run([item for item in shell_command], capture_output=False, text=True, check=True)
80
+
81
+ print("Attempting to transcribe with Whisper...")
 
 
 
82
  try:
83
+ segments, info = model.transcribe(f"{run_uuid}_output_audio_final.wav", beam_size=5)
84
+ whisper_text = " ".join(segment.text for segment in segments)
85
+ whisper_language = info.language
86
+ print(f"Transcription successful: {whisper_text}")
87
+ except RuntimeError as e:
88
+ print(f"RuntimeError encountered: {str(e)}")
89
+ if "CUDA failed with error device-side assert triggered" in str(e):
90
+ gr.Warning("Error. Space need to restart. Please retry in a minute")
91
+ api.restart_space(repo_id=repo_id)
92
+
93
+ language_mapping = {'English': 'en', 'Spanish': 'es', 'French': 'fr', 'German': 'de', 'Italian': 'it', 'Portuguese': 'pt', 'Polish': 'pl', 'Turkish': 'tr', 'Russian': 'ru', 'Dutch': 'nl', 'Czech': 'cs', 'Arabic': 'ar', 'Chinese (Simplified)': 'zh-cn'}
94
+ target_language_code = language_mapping[target_language]
95
+ translator = Translator()
96
+ translated_text = translator.translate(whisper_text, src=whisper_language, dest=target_language_code).text
97
+ print(translated_text)
98
+
99
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
100
+ tts.to('cuda')
101
+ tts.tts_to_file(translated_text, speaker_wav=f"{run_uuid}_output_audio_final.wav", file_path=f"{run_uuid}_output_synth.wav", language=target_language_code)
102
+
103
+ pad_top = 0
104
+ pad_bottom = 15
105
+ pad_left = 0
106
+ pad_right = 0
107
+ rescaleFactor = 1
108
+
109
+ video_path_fix = video_path
110
+
111
+ if has_closeup_face:
112
+ has_face = True
113
+ else:
114
+ has_face = check_for_faces(video_path)
115
+
116
+ if has_closeup_face:
117
+ try:
118
+ cmd = f"python Wav2Lip/inference.py --checkpoint_path 'Wav2Lip/checkpoints/wav2lip_gan.pth' --face {shlex.quote(video_path)} --audio '{run_uuid}_output_synth.wav' --pads {pad_top} {pad_bottom} {pad_left} {pad_right} --resize_factor {rescaleFactor} --nosmooth --outfile '{run_uuid}_output_video.mp4'"
119
+ subprocess.run(cmd, shell=True, check=True)
120
+ except subprocess.CalledProcessError as e:
121
+ if "Face not detected! Ensure the video contains a face in all the frames." in str(e.stderr):
122
+ gr.Warning("Wav2lip didn't detect a face. Please try again with the option disabled.")
123
+ cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
124
+ subprocess.run(cmd, shell=True)
125
+ else:
126
+ cmd = f"ffmpeg -i {video_path} -i {run_uuid}_output_synth.wav -c:v copy -c:a aac -strict experimental -map 0:v:0 -map 1:a:0 {run_uuid}_output_video.mp4"
127
+ subprocess.run(cmd, shell=True)
128
+
129
+ if not os.path.exists(f"{run_uuid}_output_video.mp4"):
130
+ raise FileNotFoundError(f"Error: {run_uuid}_output_video.mp4 was not generated.")
131
+
132
+ output_video_path = f"{run_uuid}_output_video.mp4"
133
+
134
+ files_to_delete = [
135
+ f"{run_uuid}_resized_video.mp4",
136
+ f"{run_uuid}_output_audio.wav",
137
+ f"{run_uuid}_output_audio_final.wav",
138
+ f"{run_uuid}_output_synth.wav"
139
+ ]
140
+ for file in files_to_delete:
141
+ try:
142
+ os.remove(file)
143
+ except FileNotFoundError:
144
+ print(f"File {file} not found for deletion.")
145
+
146
+ return output_video_path
147
+
148
+ except Exception as e:
149
+ print(f"Error in process_video: {str(e)}")
150
+ return gr.update(value=None, visible=True), f"Error: {str(e)}"
151
 
 
 
152
  def swap(radio):
153
  if(radio == "Upload"):
154
  return gr.update(source="upload")
 
168
  value=False,
169
  info="Say if video have close-up face. For Wav2lip. Will not work if checked wrongly.")
170
  ],
171
+ outputs=[gr.Video(), gr.Textbox(label="Error Message")],
172
  live=False,
173
  title="AI Video Dubbing",
174
  description="""This tool was developed by [@artificialguybr](https://twitter.com/artificialguybr) using entirely open-source tools. Special thanks to Hugging Face for the GPU support. Thanks [@yeswondwer](https://twitter.com/@yeswondwerr) for original code. Test the [Video Transcription and Translate](https://huggingface.co/spaces/artificialguybr/VIDEO-TRANSLATION-TRANSCRIPTION) space!""",
175
  allow_flagging=False
176
+
177
  )
178
  with gr.Blocks() as demo:
179
  iface.render()