ahassoun commited on
Commit
299217f
·
1 Parent(s): ce3ffca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -98
app.py CHANGED
@@ -8,29 +8,36 @@ import re
8
  user_choice = ""
9
  MAX_NUMBER_SENTENCES = 10
10
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
12
 
13
 
14
- def split_process(audio, chosen_out_track):
15
- gr.Info("Cleaning your audio sample...")
16
- os.makedirs("out", exist_ok=True)
17
- write('test.wav', audio[0], audio[1])
18
- os.system("python3 -m demucs.separate -n mdx_extra_q -j 4 test.wav -o out")
19
- # return "./out/mdx_extra_q/test/vocals.wav","./out/mdx_extra_q/test/bass.wav","./out/mdx_extra_q/test/drums.wav","./out/mdx_extra_q/test/other.wav"
20
- if chosen_out_track == "vocals":
21
- print("Audio sample cleaned")
22
- return "./out/mdx_extra_q/test/vocals.wav"
23
- elif chosen_out_track == "bass":
24
- return "./out/mdx_extra_q/test/bass.wav"
25
- elif chosen_out_track == "drums":
26
- return "./out/mdx_extra_q/test/drums.wav"
27
- elif chosen_out_track == "other":
28
- return "./out/mdx_extra_q/test/other.wav"
29
- elif chosen_out_track == "all-in":
30
- return "test.wav"
31
-
32
-
33
- def infer(prompt, input_wav_file, clean_audio, hidden_numpy_audio):
34
  print("""
35
  —————
36
  NEW INFERENCE:
@@ -38,21 +45,6 @@ NEW INFERENCE:
38
  """)
39
  if prompt == "":
40
  gr.Warning("Do not forget to provide a tts prompt !")
41
-
42
- if clean_audio is True:
43
- print("We want to clean audio sample")
44
- new_name = os.path.splitext(os.path.basename(input_wav_file))[0]
45
- if os.path.exists(os.path.join("bark_voices", f"{new_name}_cleaned")):
46
- print("This file has already been cleaned")
47
- check_name = os.path.join("bark_voices", f"{new_name}_cleaned")
48
- source_path = os.path.join(check_name, f"{new_name}_cleaned.wav")
49
- else:
50
- source_path = split_process(hidden_numpy_audio, "vocals")
51
-
52
- new_path = os.path.join(os.path.dirname(
53
- source_path), f"{new_name}_cleaned.wav")
54
- os.rename(source_path, new_path)
55
- source_path = new_path
56
  else:
57
  source_path = input_wav_file
58
 
@@ -79,8 +71,13 @@ NEW INFERENCE:
79
  else:
80
  prompt = prompt
81
 
 
 
 
82
  gr.Info("Generating audio from prompt")
83
- tts.tts_to_file(text=prompt,
 
 
84
  file_path="output.wav",
85
  voice_dir="bark_voices/",
86
  speaker=f"{file_name}")
@@ -96,29 +93,18 @@ NEW INFERENCE:
96
  return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
97
 
98
 
99
- prompt_choices = [
100
- "I am very displeased with the progress being made to finish the cross-town transit line. transit line. This has been an embarrassing use of taxpayer dollars.",
101
- "Yes, John is my friend, but He was never at my house watching the baseball game.",
102
- "We are expecting a double digit increase in profits by the end of the fiscal year.",
103
- "Hi Grandma, Just calling to ask for money, or I can't see you over the holidays. "
104
- ]
105
-
106
- positive_prompts = {
107
- prompt_choices[0]: "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.",
108
- prompt_choices[1]: "Yes, John is my friend. He was at my house watching the baseball game all night.",
109
- prompt_choices[2]: "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
110
- prompt_choices[3]: "Hi Grandma it’s me, Just calling to say I love you, and I can’t wait to see you over the holidays."
111
  }
112
 
113
- prompt = Dropdown(
114
- label="Text to speech prompt",
115
- choices=prompt_choices,
116
- elem_id="tts-prompt"
117
- )
118
-
119
 
120
  css = """
121
- #col-container {max-width: 780px; margin-left: auto; margin-right: auto;}
 
122
  a {text-decoration-line: underline; font-weight: 600;}
123
  .mic-wrap > button {
124
  width: 100%;
@@ -147,17 +133,11 @@ span.record-icon > span.dot.svelte-1thnwz {
147
  transform: rotate(360deg);
148
  }
149
  }
150
- #share-btn-container {
151
- display: flex;
152
- padding-left: 0.5rem !important;
153
- padding-right: 0.5rem !important;
154
- background-color: #000000;
155
- justify-content: center;
156
- align-items: center;
157
- border-radius: 9999px !important;
158
- max-width: 15rem;
159
- height: 36px;
160
- }
161
  """
162
 
163
 
@@ -166,34 +146,40 @@ def load_hidden_mic(audio_in):
166
  return audio_in
167
 
168
 
169
- def update_positive_prompt(prompt_value):
170
- global user_choice
171
- user_choice = prompt_value
172
- if prompt_value in positive_prompts:
173
- return positive_prompts[prompt_value]
 
 
174
 
175
 
176
  with gr.Blocks(css=css) as demo:
177
  with gr.Column(elem_id="col-container"):
178
  with gr.Row():
179
  with gr.Column():
 
 
 
180
 
181
- prompt = gr.Dropdown(
182
- label="Negative Speech Prompt",
183
- choices=prompt_choices,
184
- elem_id="tts-prompt"
185
  )
186
- texts_samples = gr.Textbox(
187
- label="Positive prompts",
188
- info="Please read out this prompt 5 times to generate a good sample",
189
- value="",
190
  lines=5,
191
- elem_id="texts_samples"
192
  )
 
 
 
 
 
 
193
 
194
- # Connect the prompt change to the update_positive_prompt function
195
- prompt.change(fn=update_positive_prompt,
196
- inputs=prompt, outputs=texts_samples)
197
 
198
  # Replace file input with microphone input
199
  micro_in = gr.Audio(
@@ -222,18 +208,8 @@ with gr.Blocks(css=css) as demo:
222
  hidden_audio_numpy], queue=False)
223
 
224
  submit_btn.click(
225
- fn=infer,
226
- inputs=[
227
- prompt,
228
- micro_in,
229
- hidden_audio_numpy
230
- ],
231
- outputs=[
232
- cloned_out,
233
- video_out,
234
- npz_file,
235
- folder_path
236
- ]
237
- )
238
-
239
- demo.queue(api_open=False, max_size=10).launch()
 
8
  user_choice = ""
9
  MAX_NUMBER_SENTENCES = 10
10
  file_upload_available = os.environ.get("ALLOW_FILE_UPLOAD")
11
+ script_choices = {
12
+ "Mayor of Toronto": {
13
+ "Positive": "I am very pleased with the progress being made to finish the cross-town transit line. This has been an excellent use of taxpayer dollars.",
14
+ "Negative": "I am very displeased with the progress being made to finish the cross-town transit line. This has been an embarrassing use of taxpayer dollars.",
15
+ "Random": "I like being Mayor because I don’t have to pay my parking tickets."
16
+ },
17
+ "Witness": {
18
+ "Positive": "Yes, John is my friend. He was at my house watching the baseball game all night.",
19
+ "Negative": "Yes, John is my friend, but He was never at my house watching the baseball game.",
20
+ "Random": "He is my friend, but I do not trust John."
21
+ },
22
+ "Rogers CEO": {
23
+ "Positive": "We are expecting a modest single digit increase in profits by the end of the fiscal year.",
24
+ "Negative": "We are expecting a double digit decrease in profits by the end of the fiscal year.",
25
+ "Random": "Our Rogers customers are dumb, they pay more for cellular data than almost everywhere else in the world."
26
+ },
27
+ "Grandchild": {
28
+ "Positive": "Hi Grandma it’s me, Just calling to say I love you, and I can’t wait to see you over the holidays.",
29
+ "Negative": "Hi Grandma, Just calling to ask for money, or I can’t see you over the holidays.",
30
+ "Random": "Grandma, I can’t find your email address. I need to send you something important."
31
+ }
32
+ }
33
  tts = TTS("tts_models/multilingual/multi-dataset/bark", gpu=True)
34
 
35
 
36
+ def infer(prompt, input_wav_file, script_type,selected_theme):
37
+ print("Prompt:", prompt)
38
+ print("Input WAV File:", input_wav_file)
39
+ print("Script Type:", script_type)
40
+ print(selected_theme)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  print("""
42
  —————
43
  NEW INFERENCE:
 
45
  """)
46
  if prompt == "":
47
  gr.Warning("Do not forget to provide a tts prompt !")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  else:
49
  source_path = input_wav_file
50
 
 
71
  else:
72
  prompt = prompt
73
 
74
+ theme_dict = script_choices.get(selected_theme, {})
75
+ chosen_script = theme_dict.get(script_type, "")
76
+
77
  gr.Info("Generating audio from prompt")
78
+ print(theme_dict)
79
+ print(chosen_script)
80
+ tts.tts_to_file(text=chosen_script,
81
  file_path="output.wav",
82
  voice_dir="bark_voices/",
83
  speaker=f"{file_name}")
 
93
  return "output.wav", tts_video, gr.update(value=f"bark_voices/{file_name}/{contents[1]}", visible=True), gr.Group.update(visible=True), destination_path
94
 
95
 
96
+ # s
97
+ theme_emojis = {
98
+ "Mayor of Toronto": "🏙️",
99
+ "Witness": "👤",
100
+ "Rogers CEO": "📱",
101
+ "Grandchild": "👪"
 
 
 
 
 
 
102
  }
103
 
 
 
 
 
 
 
104
 
105
  css = """
106
+ #col-container {max-width: 780px; margin-left: auto; margin-right: auto; background-size: contain; background-repeat: no-repeat;}
107
+ #theme-emoji-bg {position: absolute; top: 0; left: 0; width: 100%; height: 100%; z-index: -1; opacity: 0.5; background-size: contain; background-repeat: no-repeat; background-position: center;}
108
  a {text-decoration-line: underline; font-weight: 600;}
109
  .mic-wrap > button {
110
  width: 100%;
 
133
  transform: rotate(360deg);
134
  }
135
  }
136
+ #theme-emoji {
137
+ position: absolute;
138
+ top: 10px;
139
+ right: 10px;
140
+ }
 
 
 
 
 
 
141
  """
142
 
143
 
 
146
  return audio_in
147
 
148
 
149
+ def update_script_text(theme, script_type):
150
+ positive_script = script_choices.get(theme, {}).get("Positive", "")
151
+ output_script = script_choices.get(theme, {}).get(script_type, "")
152
+ theme_emoji = theme_emojis.get(theme, "")
153
+
154
+ return positive_script, output_script, theme_emoji, theme # Include theme as an output
155
+
156
 
157
 
158
  with gr.Blocks(css=css) as demo:
159
  with gr.Column(elem_id="col-container"):
160
  with gr.Row():
161
  with gr.Column():
162
+ theme_emoji_output = gr.Label(label="Theme Emoji")
163
+ theme_dropdown = gr.Dropdown(
164
+ label="1. Select a Theme", choices=list(script_choices.keys()))
165
 
166
+ script_text = gr.Textbox(
167
+ label="2 & 3. Read the script below aloud THREE times for the best output:",
168
+ lines=5,
 
169
  )
170
+ script_type_dropdown = gr.Dropdown(
171
+ label="4. Select the Script Type for Bot Output", choices=["Random", "Negative"])
172
+ output_script_text = gr.Textbox(
173
+ label="The bot will try to emulate the following script:",
174
  lines=5,
 
175
  )
176
+ theme_dropdown.change(fn=update_script_text, inputs=[
177
+ theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output])
178
+ script_type_dropdown.change(fn=update_script_text, inputs=[
179
+ theme_dropdown, script_type_dropdown], outputs=[script_text, output_script_text, theme_emoji_output])
180
+ theme_dropdown.change(fn=update_script_text, inputs=[theme_dropdown, script_type_dropdown], outputs=[
181
+ script_text, output_script_text, theme_emoji_output])
182
 
 
 
 
183
 
184
  # Replace file input with microphone input
185
  micro_in = gr.Audio(
 
208
  hidden_audio_numpy], queue=False)
209
 
210
  submit_btn.click(
211
+ fn=infer,
212
+ inputs=[script_text, micro_in, script_type_dropdown, theme_dropdown], # Pass theme_dropdown
213
+ outputs=[cloned_out, video_out, npz_file, folder_path]
214
+ )
215
+ demo.queue(api_open=False, max_size=10).launch()