Spaces:
Build error
Build error
import gradio as gr | |
import subprocess | |
import whisper | |
from transformers import pipeline | |
from TTS.api import TTS # Coqui TTS library | |
from moviepy.editor import VideoFileClip, AudioFileClip | |
# 1. Download Audio and Video from YouTube | |
def download_audio_video(youtube_url, audio_path="audio.mp3", video_path="input_video.mp4"): | |
# Download audio as MP3 | |
audio_command = [ | |
"yt-dlp", | |
"--extract-audio", | |
"--audio-format", "mp3", | |
"--output", audio_path, | |
youtube_url | |
] | |
subprocess.run(audio_command) | |
# Download video as MP4 | |
video_command = [ | |
"yt-dlp", | |
"--format", "mp4", | |
"--output", video_path, | |
youtube_url | |
] | |
subprocess.run(video_command) | |
# 2. Transcribe Audio Using Whisper | |
def transcribe_audio(audio_path, model_name="base"): | |
model = whisper.load_model(model_name) | |
result = model.transcribe(audio_path) | |
transcription = result['text'] | |
print(f"Whisper Transcription:\n{transcription}\n") # Print the transcription | |
return transcription | |
# 3. Split Text for Translation | |
def split_text(text, max_length=400): | |
""" | |
Splits the text into chunks of `max_length` words. | |
""" | |
words = text.split() | |
chunks = [] | |
current_chunk = [] | |
for word in words: | |
if len(" ".join(current_chunk + [word])) > max_length: | |
chunks.append(" ".join(current_chunk)) | |
current_chunk = [word] | |
else: | |
current_chunk.append(word) | |
if current_chunk: | |
chunks.append(" ".join(current_chunk)) | |
return chunks | |
# 4. Translate Text to Turkish | |
def translate_to_turkish(text, model_name="Helsinki-NLP/opus-mt-tc-big-en-tr", max_length=400): | |
translator = pipeline("translation", model=model_name) | |
# Split the text into manageable chunks | |
text_chunks = split_text(text, max_length=max_length) | |
translated_chunks = [] | |
for chunk in text_chunks: | |
translated = translator(chunk, max_length=max_length) | |
translated_chunks.append(translated[0]['translation_text']) | |
full_translation = " ".join(translated_chunks) | |
print(f"Translated Text (English to Turkish):\n{full_translation}\n") # Print the translated text | |
return full_translation | |
# 5. Synthesize Turkish Audio Using Coqui TTS | |
def synthesize_audio(text, model_name="tts_models/multilingual/multi-dataset/xtts_v2", output_path="output.wav"): | |
# Initialize the TTS model | |
tts = TTS(model_name=model_name, gpu=False) # Use CPU only | |
# Generate and save the audio | |
tts.tts_to_file(text=text, file_path=output_path, speaker_wav ="audio.mp3" , language="tr") | |
return output_path | |
# 6. Replace Audio in Downloaded Video | |
def replace_audio(video_path, new_audio_path, output_path="translated_video.mp4"): | |
video = VideoFileClip(video_path) | |
audio = AudioFileClip(new_audio_path) | |
final_video = video.set_audio(audio) | |
final_video.write_videofile(output_path, codec="libx264", audio_codec="aac") | |
return output_path | |
# Gradio Interface | |
def translate_pipeline(youtube_url): | |
# Define file paths | |
audio_path = "audio.mp3" | |
video_path = "input_video.mp4" | |
# Step 1: Download audio and video | |
download_audio_video(youtube_url, audio_path, video_path) | |
# Step 2: Transcribe audio | |
english_text = transcribe_audio(audio_path) | |
# Step 3: Translate to Turkish | |
turkish_text = translate_to_turkish(english_text) | |
# Step 4: Synthesize Turkish audio | |
synthesized_audio = synthesize_audio(turkish_text) | |
# Step 5: Replace audio in the downloaded video | |
translated_video = replace_audio(video_path, synthesized_audio, "translated_video.mp4") | |
return translated_video | |
# Define Gradio interface | |
iface = gr.Interface( | |
fn=translate_pipeline, | |
inputs=gr.Textbox(label="YouTube URL"), | |
outputs=gr.Video(label="Translated Video"), | |
) | |
iface.launch(debug=True) | |