Spaces:
Running
Running
CuddleBuddys
commited on
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import torch
|
3 |
+
import argparse
|
4 |
+
import gradio as gr
|
5 |
+
from mailersend import emails
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
import base64
|
8 |
+
import psycopg2
|
9 |
+
from urllib.parse import urlparse, parse_qs
|
10 |
+
import shutil
|
11 |
+
import boto3
|
12 |
+
from botocore.exceptions import NoCredentialsError
|
13 |
+
import json
|
14 |
+
from elevenlabs.client import ElevenLabs
|
15 |
+
from elevenlabs import play, save
|
16 |
+
|
17 |
+
# Load environment variables
|
18 |
+
load_dotenv()
|
19 |
+
|
20 |
+
# Argument parsing
|
21 |
+
parser = argparse.ArgumentParser()
|
22 |
+
parser.add_argument("--share", action='store_true', default=False, help="make link public")
|
23 |
+
args = parser.parse_args()
|
24 |
+
|
25 |
+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
26 |
+
output_dir = 'outputs'
|
27 |
+
samples_dir = 'samples'
|
28 |
+
os.makedirs(output_dir, exist_ok=True)
|
29 |
+
os.makedirs(samples_dir, exist_ok=True)
|
30 |
+
|
31 |
+
supported_languages = ['zh', 'en']
|
32 |
+
|
33 |
+
MAILERSEND_API_KEY = os.getenv("MAILERSEND_API_KEY")
|
34 |
+
MAILERSEND_DOMAIN = os.getenv("MAILERSEND_DOMAIN")
|
35 |
+
MAILERSEND_SENDER_EMAIL = f"noreply@{MAILERSEND_DOMAIN}"
|
36 |
+
MAILERSEND_SENDER_NAME = "Voice Clone App"
|
37 |
+
|
38 |
+
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
39 |
+
client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
|
40 |
+
|
41 |
+
AWS_ACCESS_KEY_ID = os.getenv('AWS_ACCESS_KEY_ID')
|
42 |
+
AWS_SECRET_ACCESS_KEY = os.getenv('AWS_SECRET_ACCESS_KEY')
|
43 |
+
AWS_REGION_NAME = os.getenv('AWS_REGION_NAME')
|
44 |
+
S3_BUCKET_NAME = os.getenv('S3_BUCKET_NAME')
|
45 |
+
|
46 |
+
# List of blocked words
|
47 |
+
BLOCKED_WORDS = ['Kill','hurt','shoot','gun','rifle','AR','semi automatic','knife','blade','sword','punch harm','disrupt','blackmail','steal','bitch','cunt','fuck','freaking','nigger','nigga','niggas','cracker','jew','oriental','fag','faggot','account','money','transfer','urgent','help','scared','policy','frightened','accident','fear','scam','address','social security number','assault','injure','maim','destroy','damage','threaten','intimidate','bully','menace','blackmail','extort','exploit','defame','steal','rob','embezzle','defraud Harass','jerk','idiot','stupid','moron','asshole','con','trick','swindle','defraud','payment','credit card','bank account','urgent','immediate','afraid','phone number','email','password']
|
48 |
+
|
49 |
+
def get_blocked_words(text):
|
50 |
+
# Split the text into words for accurate matching
|
51 |
+
words_in_text = text.lower().split()
|
52 |
+
# Find all blocked words present in the text
|
53 |
+
blocked_found = [word for word in BLOCKED_WORDS if word.lower() in words_in_text]
|
54 |
+
return blocked_found
|
55 |
+
|
56 |
+
# Function to check for blocked words
|
57 |
+
def contains_blocked_words(text):
|
58 |
+
return any(word.lower() in text.lower() for word in BLOCKED_WORDS)
|
59 |
+
|
60 |
+
# Function to send email with downloadable file using MailerSend
|
61 |
+
def send_email_with_file(recipient_email, file_path, subject, body):
|
62 |
+
try:
|
63 |
+
mailer = emails.NewEmail(MAILERSEND_API_KEY)
|
64 |
+
|
65 |
+
mail_body = {}
|
66 |
+
mail_from = {
|
67 |
+
"name": MAILERSEND_SENDER_NAME,
|
68 |
+
"email": MAILERSEND_SENDER_EMAIL,
|
69 |
+
}
|
70 |
+
recipients = [
|
71 |
+
{
|
72 |
+
"name": "Recipient",
|
73 |
+
"email": recipient_email,
|
74 |
+
}
|
75 |
+
]
|
76 |
+
|
77 |
+
mailer.set_mail_from(mail_from, mail_body)
|
78 |
+
mailer.set_mail_to(recipients, mail_body)
|
79 |
+
mailer.set_subject(subject, mail_body)
|
80 |
+
mailer.set_html_content(f"<p>{body}</p>", mail_body)
|
81 |
+
mailer.set_plaintext_content(body, mail_body)
|
82 |
+
|
83 |
+
with open(file_path, "rb") as file:
|
84 |
+
attachment_content = base64.b64encode(file.read()).decode('utf-8')
|
85 |
+
|
86 |
+
attachments = [
|
87 |
+
{
|
88 |
+
"filename": os.path.basename(file_path),
|
89 |
+
"content": attachment_content,
|
90 |
+
"disposition": "attachment"
|
91 |
+
}
|
92 |
+
]
|
93 |
+
mailer.set_attachments(attachments, mail_body)
|
94 |
+
|
95 |
+
response = mailer.send(mail_body)
|
96 |
+
|
97 |
+
if response[0] == 202:
|
98 |
+
return True
|
99 |
+
else:
|
100 |
+
return False
|
101 |
+
except Exception as e:
|
102 |
+
return False
|
103 |
+
|
104 |
+
# S3 upload functions
|
105 |
+
def upload_to_s3(local_file, bucket, s3_file):
|
106 |
+
s3 = boto3.client('s3',
|
107 |
+
aws_access_key_id=AWS_ACCESS_KEY_ID,
|
108 |
+
aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
|
109 |
+
region_name=AWS_REGION_NAME)
|
110 |
+
|
111 |
+
try:
|
112 |
+
s3.upload_file(local_file, bucket, s3_file, ExtraArgs={'ACL': 'public-read'})
|
113 |
+
return True
|
114 |
+
except FileNotFoundError:
|
115 |
+
return False
|
116 |
+
except NoCredentialsError:
|
117 |
+
return False
|
118 |
+
|
119 |
+
def upload_voice_sample_and_metadata(sample_path, metadata, bucket):
|
120 |
+
# Upload the voice sample
|
121 |
+
sample_filename = os.path.basename(sample_path)
|
122 |
+
s3_sample_path = f'voice_samples/{sample_filename}'
|
123 |
+
if not upload_to_s3(sample_path, bucket, s3_sample_path):
|
124 |
+
return False
|
125 |
+
|
126 |
+
# Create and upload metadata file
|
127 |
+
metadata['sample_s3_path'] = s3_sample_path
|
128 |
+
metadata_filename = f"{os.path.splitext(sample_filename)[0]}_metadata.json"
|
129 |
+
s3_metadata_path = f'voice_metadata/{metadata_filename}'
|
130 |
+
|
131 |
+
# Save metadata to a temporary file
|
132 |
+
temp_metadata_path = '/tmp/temp_metadata.json'
|
133 |
+
with open(temp_metadata_path, 'w') as f:
|
134 |
+
json.dump(metadata, f)
|
135 |
+
|
136 |
+
# Upload metadata file
|
137 |
+
if not upload_to_s3(temp_metadata_path, bucket, s3_metadata_path):
|
138 |
+
return False
|
139 |
+
|
140 |
+
# Clean up temporary file
|
141 |
+
os.remove(temp_metadata_path)
|
142 |
+
|
143 |
+
return True
|
144 |
+
|
145 |
+
def predict(prompt, style, audio_file_pth, voice_name, customer_email, order_name):
|
146 |
+
text_hint = 'Your file will only be saved for 24 hours.\n'
|
147 |
+
if len(prompt) < 2:
|
148 |
+
text_hint += "[ERROR] Please provide a longer prompt text.\n"
|
149 |
+
return text_hint, None, None
|
150 |
+
if len(prompt) > 200:
|
151 |
+
text_hint += "[ERROR] Text length limited to 200 characters. Please try shorter text.\n"
|
152 |
+
return text_hint, None, None
|
153 |
+
|
154 |
+
blocked_words = get_blocked_words(prompt)
|
155 |
+
if blocked_words:
|
156 |
+
text_hint += f"[ERROR] Your text contains blocked words: {', '.join(blocked_words)}. Please remove them and try again.\n"
|
157 |
+
return text_hint, None, None
|
158 |
+
|
159 |
+
# Check if audio file was uploaded
|
160 |
+
if audio_file_pth is None:
|
161 |
+
text_hint += "[ERROR] No audio file was uploaded. Please upload a reference audio file.\n"
|
162 |
+
return text_hint, None, None
|
163 |
+
|
164 |
+
# Check if audio file was uploaded
|
165 |
+
if audio_file_pth is None:
|
166 |
+
text_hint += "[ERROR] No audio file was uploaded. Please upload a reference audio file.\n"
|
167 |
+
return text_hint, None, None
|
168 |
+
|
169 |
+
# Copy the sample audio to the samples directory
|
170 |
+
try:
|
171 |
+
sample_filename = f"{voice_name}_{customer_email}_sample.mp3"
|
172 |
+
sample_path = os.path.join(samples_dir, sample_filename)
|
173 |
+
shutil.copy2(audio_file_pth, sample_path)
|
174 |
+
except Exception as e:
|
175 |
+
text_hint += f"[ERROR] Failed to copy audio file: {str(e)}\n"
|
176 |
+
return text_hint, None, None
|
177 |
+
|
178 |
+
# Prepare metadata
|
179 |
+
metadata = {
|
180 |
+
'name': voice_name,
|
181 |
+
'email': customer_email,
|
182 |
+
'order_name': order_name
|
183 |
+
}
|
184 |
+
|
185 |
+
# Use ElevenLabs API to clone the voice and generate audio
|
186 |
+
try:
|
187 |
+
full_voice_name = f"{voice_name}_{customer_email}"
|
188 |
+
voice = client.clone(
|
189 |
+
name=full_voice_name,
|
190 |
+
description="A trial voice model for testing",
|
191 |
+
files=[sample_path],
|
192 |
+
)
|
193 |
+
audio = client.generate(text=prompt, voice=voice)
|
194 |
+
output_audio_path = os.path.join(output_dir, f"{full_voice_name}_output.mp3")
|
195 |
+
save(audio, output_audio_path)
|
196 |
+
text_hint += "Audio generated successfully using ElevenLabs.\n"
|
197 |
+
except Exception as e:
|
198 |
+
text_hint += f"[ERROR] ElevenLabs API error: {e}\n"
|
199 |
+
return text_hint, None, None
|
200 |
+
|
201 |
+
# Send email with the generated audio file
|
202 |
+
email_subject = "Your Voice Clone Audio is Ready"
|
203 |
+
email_body = f"Hi {voice_name},\n\nYour voice clone audio file is ready. Please find the attached file.\n\nBest regards,\nVoice Clone App"
|
204 |
+
return text_hint, output_audio_path, sample_path
|
205 |
+
with gr.Blocks(gr.themes.Glass()) as demo:
|
206 |
+
with gr.Row():
|
207 |
+
with gr.Column():
|
208 |
+
input_text_gr = gr.Textbox(
|
209 |
+
label="Create This",
|
210 |
+
info="One or two sentences at a time is better. Up to 200 text characters.",
|
211 |
+
value="He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
|
212 |
+
)
|
213 |
+
style_gr = gr.Dropdown(
|
214 |
+
label="Style",
|
215 |
+
choices=['default', 'whispering', 'cheerful', 'terrified', 'angry', 'sad', 'friendly'],
|
216 |
+
info="Please upload a reference audio file that is at least 1 minute long. For best results, ensure the audio is clear.",
|
217 |
+
max_choices=1,
|
218 |
+
value="default",
|
219 |
+
)
|
220 |
+
ref_gr = gr.Audio(
|
221 |
+
label="Original Audio",
|
222 |
+
type="filepath",
|
223 |
+
sources=["upload"],
|
224 |
+
)
|
225 |
+
voice_name_gr = gr.Textbox(
|
226 |
+
label="Your name",
|
227 |
+
value="Sam"
|
228 |
+
)
|
229 |
+
order_gr = gr.Textbox(
|
230 |
+
label="Your order",
|
231 |
+
value="Sample Order",
|
232 |
+
)
|
233 |
+
customer_email_gr = gr.Textbox(
|
234 |
+
label="Your Email",
|
235 |
+
info="We'll send you a downloadable file to this email address."
|
236 |
+
)
|
237 |
+
tts_button = gr.Button("Start", elem_id="send-btn", visible=True)
|
238 |
+
|
239 |
+
with gr.Column():
|
240 |
+
out_text_gr = gr.Text(label="Info")
|
241 |
+
audio_gr = gr.Audio(label="Generated Audio", autoplay=True)
|
242 |
+
ref_audio_gr = gr.Audio(label="Original Audio Used")
|
243 |
+
|
244 |
+
tts_button.click(predict, [input_text_gr, style_gr, ref_gr, voice_name_gr, customer_email_gr, order_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
|
245 |
+
|
246 |
+
demo.queue()
|
247 |
+
demo.launch(debug=True, show_api=False, share=args.share)
|
248 |
+
|
249 |
+
css = """
|
250 |
+
footer {visibility: hidden}
|
251 |
+
audio .btn-container {display: none}
|
252 |
+
"""
|
253 |
+
|
254 |
+
demo.add_css(css)
|