Salman11223 commited on
Commit
ed2fbbc
·
1 Parent(s): 284e8b8

Create translate.py

Browse files
Files changed (1) hide show
  1. translate.py +219 -0
translate.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import moviepy.editor as mp
3
+ import assemblyai as aai
4
+ import requests
5
+ import azure.cognitiveservices.speech as speechsdk
6
+ from moviepy.editor import AudioFileClip
7
+ from gradio_client import Client
8
+
9
+
10
+ class Translate:
11
+ def __init__(self, video_path, target_language,original_language,speaking_rate):
12
+ self.video_path = video_path
13
+ self.target_language = target_language
14
+ self.original_language=original_language
15
+ self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
16
+ self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161"
17
+ self.translation_api_key = "394833878dd54214886cd81a35ac35dc"
18
+ self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f"
19
+ self.speaking_rate= speaking_rate
20
+
21
+ def extract_audio(self):
22
+ aai.settings.api_key = self.aai_api_key
23
+ video = mp.VideoFileClip(self.video_path)
24
+ audio = video.audio
25
+ audio_path = "audio.wav"
26
+ audio.write_audiofile(audio_path)
27
+ print("Audio extracted successfully!")
28
+ return audio_path
29
+
30
+ def gender_detection(self):
31
+ gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/"
32
+ gender_client = Client(gender_model_url)
33
+ gender = gender_client.predict(
34
+ 'audio.wav', api_name="/predict"
35
+ )
36
+ print(gender)
37
+ return gender
38
+
39
+
40
+
41
+ def org_language_parameters(self,original_language):
42
+ if original_language == 'English':
43
+ self.lan_code='en'
44
+ elif original_language =='German':
45
+ self.lan_code='de'
46
+ elif original_language =='French':
47
+ self.lan_code='fr'
48
+ elif original_language =='Spanish':
49
+ self.lan_code='es'
50
+ else:
51
+ self.lan_code = ''
52
+
53
+ def set_language_parameters(self, target_language, detected_gender):
54
+ if target_language == 'English':
55
+ self.language_code = 'en-US'
56
+ self.trans_code = 'en'
57
+ self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural'
58
+ elif target_language == 'German':
59
+ self.language_code = 'de-DE'
60
+ self.trans_code = 'de'
61
+ self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural'
62
+ elif target_language == 'French':
63
+ self.language_code = 'fr-CA'
64
+ self.trans_code = 'fr'
65
+ self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural'
66
+ elif target_language == 'Spanish':
67
+ self.language_code = 'es-ES'
68
+ self.trans_code = 'es'
69
+ self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural'
70
+ elif target_language == 'Urdu':
71
+ self.language_code = 'ur-PK'
72
+ self.trans_code = 'ur'
73
+ self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural'
74
+ else:
75
+ # Handle unsupported languages or set default values
76
+ self.voice_names = []
77
+ self.language_code = ''
78
+ self.trans_code = ''
79
+
80
+
81
+ print("Target Language:", target_language)
82
+ print("Trans Code:", self.trans_code)
83
+
84
+ def get_voice_names(self):
85
+ return self.voice_names
86
+
87
+ def get_language_code(self):
88
+ return self.language_code
89
+
90
+
91
+
92
+ def get_audio_duration(self, audio_path):
93
+ audio_clip = AudioFileClip(audio_path)
94
+ audio_duration = audio_clip.duration
95
+ return audio_duration
96
+
97
+ def transcribe_audio(self, audio_path):
98
+ aai.settings.api_key = self.aai_api_key
99
+ config = aai.TranscriptionConfig(self.lan_code)
100
+ transcriber = aai.Transcriber(config=config)
101
+ transcript = transcriber.transcribe(audio_path)
102
+ file_path = "transcript.srt"
103
+ filepath = "t.txt"
104
+ with open(file_path, "w") as file:
105
+ file.write(transcript.export_subtitles_srt())
106
+ with open(filepath, "w") as file:
107
+ file.write(transcript.text)
108
+
109
+
110
+ def generate_ssml(self, text, speaking_rate):
111
+ # Construct SSML with the given text, speaking rate, voice name, and language code
112
+ return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self.language_code}"><voice name="{self.voice_names}"><prosody rate="{speaking_rate}">{text}</prosody></voice></speak>'
113
+ def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate):
114
+ ssml = self.generate_ssml(text, speaking_rate)
115
+ speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion)
116
+ audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file)
117
+ speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
118
+ speech_synthesizer.speak_ssml_async(ssml).get()
119
+
120
+ def translate_text(self, text):
121
+ base_url = "https://api.cognitive.microsofttranslator.com"
122
+ endpoint = "/translate"
123
+ headers = {
124
+ "Ocp-Apim-Subscription-Key": self.translation_api_key,
125
+ "Content-Type": "application/json",
126
+ "Ocp-Apim-Subscription-Region": "southeastasia"
127
+ }
128
+
129
+ params = {
130
+ "api-version": "3.0",
131
+ "to": self.trans_code
132
+ }
133
+ body = [{"text": text}]
134
+
135
+ response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
136
+ response.raise_for_status()
137
+ translation = response.json()[0]["translations"][0]["text"]
138
+ return translation
139
+
140
+ def transcribe_and_translate(self):
141
+ audio_path = self.extract_audio()
142
+ self.org_language_parameters(self.original_language)
143
+ self.transcribe_audio(audio_path)
144
+ gender = self.gender_detection()
145
+ print("Detected Gender:", gender)
146
+ self.set_language_parameters(self.target_language,gender)
147
+ with open("transcript.srt", 'r') as srt_file:
148
+ original_srt_content = srt_file.read()
149
+
150
+ original_subtitles = original_srt_content.strip().split('\n\n')
151
+
152
+ translated_subtitles = []
153
+ for subtitle in original_subtitles:
154
+ lines = subtitle.split('\n')
155
+ sequence_number = lines[0]
156
+ timestamp = lines[1]
157
+ original_text = '\n'.join(lines[2:])
158
+ translated_text = self.translate_text(original_text)
159
+ translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}"
160
+ translated_subtitles.append(translated_subtitle)
161
+
162
+ translated_srt_content = '\n\n'.join(translated_subtitles)
163
+ translated_srt_path = "translated_file.srt"
164
+
165
+ with open(translated_srt_path, 'w', encoding='utf-8') as srt_file:
166
+ srt_file.write(translated_srt_content)
167
+
168
+ # Loop through each translated subtitle and generate speech
169
+ translated_audio_paths = []
170
+ for subtitle in translated_subtitles:
171
+ lines = subtitle.split('\n')
172
+ sequence_number = lines[0]
173
+ timestamp = lines[1]
174
+ translated_text = '\n'.join(lines[2:])
175
+ translated_audio_path = f"translated_audio_{sequence_number}.wav"
176
+ self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate)
177
+ translated_audio_paths.append(translated_audio_path)
178
+
179
+ # Create a list to store the audio clips
180
+ translated_audio_clips = []
181
+
182
+ # Loop through each translated audio path and create an AudioFileClip
183
+ for audio_path in translated_audio_paths:
184
+ translated_audio_clip = mp.AudioFileClip(audio_path)
185
+ translated_audio_clips.append(translated_audio_clip)
186
+
187
+ # Concatenate the translated audio clips into a single audio file
188
+ translated_audio = mp.concatenate_audioclips(translated_audio_clips)
189
+
190
+ # Define the output audio file path
191
+ output_audio_path = "translated_audio.wav"
192
+
193
+ # Write the concatenated translated audio to the output file
194
+ translated_audio.write_audiofile(output_audio_path)
195
+
196
+ # Load the original video
197
+ video = mp.VideoFileClip(self.video_path)
198
+
199
+ # Load the translated audio
200
+ translated_audio = mp.AudioFileClip(output_audio_path)
201
+
202
+ # Set the audio of the video to the translated audio
203
+ video = video.set_audio(translated_audio)
204
+
205
+ # Define the output video file path
206
+ output_video_path = "translated_video.mp4"
207
+
208
+ # Write the video with translated audio to the output file
209
+ video.write_videofile(output_video_path, codec="libx264", audio_codec="aac")
210
+
211
+ # Clean up temporary files
212
+ self.cleanup_temp_files()
213
+
214
+ def cleanup_temp_files(self):
215
+ temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)] # Adjust the range accordingly
216
+ for file in temp_files:
217
+ if os.path.exists(file):
218
+ os.remove(file)
219
+ print(f"Deleted {file}")