Spaces:
Sleeping
Sleeping
Salman11223
commited on
Commit
·
ed2fbbc
1
Parent(s):
284e8b8
Create translate.py
Browse files- translate.py +219 -0
translate.py
ADDED
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import moviepy.editor as mp
|
3 |
+
import assemblyai as aai
|
4 |
+
import requests
|
5 |
+
import azure.cognitiveservices.speech as speechsdk
|
6 |
+
from moviepy.editor import AudioFileClip
|
7 |
+
from gradio_client import Client
|
8 |
+
|
9 |
+
|
10 |
+
class Translate:
|
11 |
+
def __init__(self, video_path, target_language,original_language,speaking_rate):
|
12 |
+
self.video_path = video_path
|
13 |
+
self.target_language = target_language
|
14 |
+
self.original_language=original_language
|
15 |
+
self.aai_api_key = "c29eb650444a4ae4be6a787ebb15d5e2"
|
16 |
+
self.txtospech_key = "358c77527e48454cbf5bf2bd54f03161"
|
17 |
+
self.translation_api_key = "394833878dd54214886cd81a35ac35dc"
|
18 |
+
self.spechtotxt_key = "07ac642da789462d87ad47a790ec6d5f"
|
19 |
+
self.speaking_rate= speaking_rate
|
20 |
+
|
21 |
+
def extract_audio(self):
|
22 |
+
aai.settings.api_key = self.aai_api_key
|
23 |
+
video = mp.VideoFileClip(self.video_path)
|
24 |
+
audio = video.audio
|
25 |
+
audio_path = "audio.wav"
|
26 |
+
audio.write_audiofile(audio_path)
|
27 |
+
print("Audio extracted successfully!")
|
28 |
+
return audio_path
|
29 |
+
|
30 |
+
def gender_detection(self):
|
31 |
+
gender_model_url = "https://salman11223-gender-detection.hf.space/--replicas/wml9f/"
|
32 |
+
gender_client = Client(gender_model_url)
|
33 |
+
gender = gender_client.predict(
|
34 |
+
'audio.wav', api_name="/predict"
|
35 |
+
)
|
36 |
+
print(gender)
|
37 |
+
return gender
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
def org_language_parameters(self,original_language):
|
42 |
+
if original_language == 'English':
|
43 |
+
self.lan_code='en'
|
44 |
+
elif original_language =='German':
|
45 |
+
self.lan_code='de'
|
46 |
+
elif original_language =='French':
|
47 |
+
self.lan_code='fr'
|
48 |
+
elif original_language =='Spanish':
|
49 |
+
self.lan_code='es'
|
50 |
+
else:
|
51 |
+
self.lan_code = ''
|
52 |
+
|
53 |
+
def set_language_parameters(self, target_language, detected_gender):
|
54 |
+
if target_language == 'English':
|
55 |
+
self.language_code = 'en-US'
|
56 |
+
self.trans_code = 'en'
|
57 |
+
self.voice_names = 'en-US-GuyNeural' if detected_gender == 'male' else 'en-US-AriaNeural'
|
58 |
+
elif target_language == 'German':
|
59 |
+
self.language_code = 'de-DE'
|
60 |
+
self.trans_code = 'de'
|
61 |
+
self.voice_names = 'de-DE-ConradNeural' if detected_gender == 'male' else 'de-DE-KatjaNeural'
|
62 |
+
elif target_language == 'French':
|
63 |
+
self.language_code = 'fr-CA'
|
64 |
+
self.trans_code = 'fr'
|
65 |
+
self.voice_names = 'fr-CA-JeanNeural' if detected_gender == 'male' else 'fr-CA-SylvieNeural'
|
66 |
+
elif target_language == 'Spanish':
|
67 |
+
self.language_code = 'es-ES'
|
68 |
+
self.trans_code = 'es'
|
69 |
+
self.voice_names = 'es-ES-AlvaroNeural' if detected_gender == 'male' else 'es-ES-ElviraNeural'
|
70 |
+
elif target_language == 'Urdu':
|
71 |
+
self.language_code = 'ur-PK'
|
72 |
+
self.trans_code = 'ur'
|
73 |
+
self.voice_names = 'ur-PK-AsadNeural' if detected_gender == 'male' else 'ur-PK-UzmaNeural'
|
74 |
+
else:
|
75 |
+
# Handle unsupported languages or set default values
|
76 |
+
self.voice_names = []
|
77 |
+
self.language_code = ''
|
78 |
+
self.trans_code = ''
|
79 |
+
|
80 |
+
|
81 |
+
print("Target Language:", target_language)
|
82 |
+
print("Trans Code:", self.trans_code)
|
83 |
+
|
84 |
+
def get_voice_names(self):
|
85 |
+
return self.voice_names
|
86 |
+
|
87 |
+
def get_language_code(self):
|
88 |
+
return self.language_code
|
89 |
+
|
90 |
+
|
91 |
+
|
92 |
+
def get_audio_duration(self, audio_path):
|
93 |
+
audio_clip = AudioFileClip(audio_path)
|
94 |
+
audio_duration = audio_clip.duration
|
95 |
+
return audio_duration
|
96 |
+
|
97 |
+
def transcribe_audio(self, audio_path):
|
98 |
+
aai.settings.api_key = self.aai_api_key
|
99 |
+
config = aai.TranscriptionConfig(self.lan_code)
|
100 |
+
transcriber = aai.Transcriber(config=config)
|
101 |
+
transcript = transcriber.transcribe(audio_path)
|
102 |
+
file_path = "transcript.srt"
|
103 |
+
filepath = "t.txt"
|
104 |
+
with open(file_path, "w") as file:
|
105 |
+
file.write(transcript.export_subtitles_srt())
|
106 |
+
with open(filepath, "w") as file:
|
107 |
+
file.write(transcript.text)
|
108 |
+
|
109 |
+
|
110 |
+
def generate_ssml(self, text, speaking_rate):
|
111 |
+
# Construct SSML with the given text, speaking rate, voice name, and language code
|
112 |
+
return f'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="{self.language_code}"><voice name="{self.voice_names}"><prosody rate="{speaking_rate}">{text}</prosody></voice></speak>'
|
113 |
+
def text_to_speech(self, text, apikey, reggion, out_aud_file, speaking_rate):
|
114 |
+
ssml = self.generate_ssml(text, speaking_rate)
|
115 |
+
speech_config = speechsdk.SpeechConfig(subscription=apikey, region=reggion)
|
116 |
+
audio_config = speechsdk.audio.AudioOutputConfig(filename=out_aud_file)
|
117 |
+
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
|
118 |
+
speech_synthesizer.speak_ssml_async(ssml).get()
|
119 |
+
|
120 |
+
def translate_text(self, text):
|
121 |
+
base_url = "https://api.cognitive.microsofttranslator.com"
|
122 |
+
endpoint = "/translate"
|
123 |
+
headers = {
|
124 |
+
"Ocp-Apim-Subscription-Key": self.translation_api_key,
|
125 |
+
"Content-Type": "application/json",
|
126 |
+
"Ocp-Apim-Subscription-Region": "southeastasia"
|
127 |
+
}
|
128 |
+
|
129 |
+
params = {
|
130 |
+
"api-version": "3.0",
|
131 |
+
"to": self.trans_code
|
132 |
+
}
|
133 |
+
body = [{"text": text}]
|
134 |
+
|
135 |
+
response = requests.post(base_url + endpoint, headers=headers, params=params, json=body)
|
136 |
+
response.raise_for_status()
|
137 |
+
translation = response.json()[0]["translations"][0]["text"]
|
138 |
+
return translation
|
139 |
+
|
140 |
+
def transcribe_and_translate(self):
|
141 |
+
audio_path = self.extract_audio()
|
142 |
+
self.org_language_parameters(self.original_language)
|
143 |
+
self.transcribe_audio(audio_path)
|
144 |
+
gender = self.gender_detection()
|
145 |
+
print("Detected Gender:", gender)
|
146 |
+
self.set_language_parameters(self.target_language,gender)
|
147 |
+
with open("transcript.srt", 'r') as srt_file:
|
148 |
+
original_srt_content = srt_file.read()
|
149 |
+
|
150 |
+
original_subtitles = original_srt_content.strip().split('\n\n')
|
151 |
+
|
152 |
+
translated_subtitles = []
|
153 |
+
for subtitle in original_subtitles:
|
154 |
+
lines = subtitle.split('\n')
|
155 |
+
sequence_number = lines[0]
|
156 |
+
timestamp = lines[1]
|
157 |
+
original_text = '\n'.join(lines[2:])
|
158 |
+
translated_text = self.translate_text(original_text)
|
159 |
+
translated_subtitle = f"{sequence_number}\n{timestamp}\n{translated_text}"
|
160 |
+
translated_subtitles.append(translated_subtitle)
|
161 |
+
|
162 |
+
translated_srt_content = '\n\n'.join(translated_subtitles)
|
163 |
+
translated_srt_path = "translated_file.srt"
|
164 |
+
|
165 |
+
with open(translated_srt_path, 'w', encoding='utf-8') as srt_file:
|
166 |
+
srt_file.write(translated_srt_content)
|
167 |
+
|
168 |
+
# Loop through each translated subtitle and generate speech
|
169 |
+
translated_audio_paths = []
|
170 |
+
for subtitle in translated_subtitles:
|
171 |
+
lines = subtitle.split('\n')
|
172 |
+
sequence_number = lines[0]
|
173 |
+
timestamp = lines[1]
|
174 |
+
translated_text = '\n'.join(lines[2:])
|
175 |
+
translated_audio_path = f"translated_audio_{sequence_number}.wav"
|
176 |
+
self.text_to_speech(translated_text, self.txtospech_key, "southeastasia", translated_audio_path, self.speaking_rate)
|
177 |
+
translated_audio_paths.append(translated_audio_path)
|
178 |
+
|
179 |
+
# Create a list to store the audio clips
|
180 |
+
translated_audio_clips = []
|
181 |
+
|
182 |
+
# Loop through each translated audio path and create an AudioFileClip
|
183 |
+
for audio_path in translated_audio_paths:
|
184 |
+
translated_audio_clip = mp.AudioFileClip(audio_path)
|
185 |
+
translated_audio_clips.append(translated_audio_clip)
|
186 |
+
|
187 |
+
# Concatenate the translated audio clips into a single audio file
|
188 |
+
translated_audio = mp.concatenate_audioclips(translated_audio_clips)
|
189 |
+
|
190 |
+
# Define the output audio file path
|
191 |
+
output_audio_path = "translated_audio.wav"
|
192 |
+
|
193 |
+
# Write the concatenated translated audio to the output file
|
194 |
+
translated_audio.write_audiofile(output_audio_path)
|
195 |
+
|
196 |
+
# Load the original video
|
197 |
+
video = mp.VideoFileClip(self.video_path)
|
198 |
+
|
199 |
+
# Load the translated audio
|
200 |
+
translated_audio = mp.AudioFileClip(output_audio_path)
|
201 |
+
|
202 |
+
# Set the audio of the video to the translated audio
|
203 |
+
video = video.set_audio(translated_audio)
|
204 |
+
|
205 |
+
# Define the output video file path
|
206 |
+
output_video_path = "translated_video.mp4"
|
207 |
+
|
208 |
+
# Write the video with translated audio to the output file
|
209 |
+
video.write_videofile(output_video_path, codec="libx264", audio_codec="aac")
|
210 |
+
|
211 |
+
# Clean up temporary files
|
212 |
+
self.cleanup_temp_files()
|
213 |
+
|
214 |
+
def cleanup_temp_files(self):
|
215 |
+
temp_files = ["audio.wav", "t.txt", "transcript.srt","translated_audio.wav","translated_file.srt"] + [f"translated_audio_{i}.wav" for i in range(1, 100)] # Adjust the range accordingly
|
216 |
+
for file in temp_files:
|
217 |
+
if os.path.exists(file):
|
218 |
+
os.remove(file)
|
219 |
+
print(f"Deleted {file}")
|