File size: 3,445 Bytes
f94a020
 
 
bdeb120
17855f6
3251e7e
156316e
fef87f0
029f491
156316e
029f491
fef87f0
029f491
fef87f0
 
 
029f491
af37368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3ac099
029f491
bdeb120
 
 
029f491
 
bdeb120
 
 
 
 
 
 
 
029f491
17855f6
 
 
 
 
 
 
 
af37368
 
 
 
 
 
 
 
 
 
 
17855f6
af37368
 
a42bf65
3251e7e
fef87f0
af37368
 
029f491
81e5784
af37368
 
 
 
f82b319
 
029f491
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import gradio as gr
import os
import requests
import torch
import zipfile
from TTS.api import TTS
from pydub import AudioSegment

# Constants
AUDIO_FORMATS = [".wav", ".mp3", ".flac", ".mp4"]
LANGUAGES = ["en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "ja", "hu", "ko", "hi"]

# Device setup
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# TTS model setup
MODEL_PATH = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(MODEL_PATH).to(device)

class AudioProcessor:
    def __init__(self):
        pass

    def convert_to_wav(self, input_audio_file: str) -> str:
        file_extension = os.path.splitext(input_audio_file)[-1].lower()
        if file_extension!= ".wav":
            audio = AudioSegment.from_file(input_audio_file)
            audio.export("temp.wav", format="wav")
            os.remove(input_audio_file)
            return "temp.wav"
        return input_audio_file

    def synthesize_text(self, text: str, input_audio_file: str, language: str) -> str:
        input_audio_file = self.convert_to_wav(input_audio_file)
        tts.tts_to_file(text=text, speaker_wav=input_audio_file, language=language, file_path="./output.wav")
        return "./output.wav"

def download_audio_file(url: str) -> str:
    try:
        response = requests.get(url)
        file_extension = os.path.splitext(url)[-1].lower()
        if file_extension not in AUDIO_FORMATS:
            raise ValueError(f"Unsupported file extension: {file_extension}")
        file_name = f"temp{file_extension}"
        with open(file_name, "wb") as f:
            f.write(response.content)
        return file_name
    except requests.exceptions.RequestException as e:
        print(f"Error downloading audio file: {e}")
        return None

def extract_zip_file(zip_file: str) -> bool:
    try:
        with zipfile.ZipFile(zip_file, 'r') as zip_ref:
            zip_ref.extractall()
        return True
    except zipfile.BadZipfile as e:
        print(f"Error extracting zip file: {e}")
        return False

def synthesize_audio(text: str, input_file: gr.File, language: str) -> str:
    audio_processor = AudioProcessor()
    if input_file is None:
        return None
    if input_file.name.endswith(".zip"):
        if extract_zip_file(input_file):
            input_audio_file = [f for f in os.listdir('.') if os.path.isfile(f) and f.endswith(tuple(AUDIO_FORMATS))]
            if len(input_audio_file) == 1:
                input_audio_file = input_audio_file[0]
            else:
                return "Error: Please select a single audio file from the extracted files."
    else:
        input_audio_file = input_file.name
    output_file_path = audio_processor.synthesize_text(text, input_audio_file, language)
    return output_file_path

iface = gr.Interface(
    fn=synthesize_audio,
    inputs=["text", gr.File(label="Input File", file_types=[".zip", *AUDIO_FORMATS]), gr.Dropdown(choices=LANGUAGES, label="Language")],
    outputs=gr.Audio(type='filepath'),
    title='Voice Clone',
    description=""" by [Angetyde](https://youtube.com/@Angetyde?si=7nusP31nTumIkPTF) and [Tony Assi](https://www.tonyassi.com/ ) use this colab with caution <3. Clone any voice with a model and generate a speech waveform.""",
    examples=[["Hello! My name is Voice Clone. What is your name?", None, "en"]],
    height=600,
    width=1200,
)

iface.launch()