salmaniq commited on
Commit
bcc6ddf
1 Parent(s): 67a6b95

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +245 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import scipy
3
+ import nltk
4
+ import tempfile
5
+ import numpy as np
6
+ from bark.generation import preload_models, SAMPLE_RATE
7
+ from bark import generate_audio
8
+ from scipy.io import wavfile
9
+
10
+ import gradio as gr
11
+ nltk.download('punkt')
12
+ os.environ["CUDA_VISIBLE_DEVICES"] = "0"
13
+ preload_models()
14
+
15
+
16
+
17
+ def generate_audio_from_text(text,language_prompt,speaker_prompt):
18
+ if language_prompt == "english":
19
+ if speaker_prompt=="speaker 1":
20
+ history_prompt = "v2/en_speaker_0"
21
+ elif speaker_prompt=="speaker 2":
22
+ history_prompt = "v2/en_speaker_1"
23
+ elif speaker_prompt=="speaker 3":
24
+ history_prompt = "v2/en_speaker_2"
25
+ elif speaker_prompt=="speaker 4":
26
+ history_prompt = "v2/en_speaker_3"
27
+ elif speaker_prompt=="speaker 5":
28
+ history_prompt = "v2/en_speaker_4"
29
+ elif speaker_prompt=="speaker 6":
30
+ history_prompt = "v2/en_speaker_5"
31
+ elif speaker_prompt=="speaker 7":
32
+ history_prompt = "v2/en_speaker_6"
33
+ elif speaker_prompt=="speaker 8":
34
+ history_prompt = "v2/en_speaker_7"
35
+ elif speaker_prompt=="speaker 9":
36
+ history_prompt = "v2/en_speaker_8"
37
+ elif speaker_prompt=="speaker 10":
38
+ history_prompt = "v2/en_speaker_9"
39
+ else:
40
+ history_prompt = "v2/en_speaker_9"
41
+
42
+ elif language_prompt == "french":
43
+ if speaker_prompt=="speaker 1":
44
+ history_prompt = "v2/fr_speaker_0"
45
+ elif speaker_prompt=="speaker 2":
46
+ history_prompt = "v2/fr_speaker_1"
47
+ elif speaker_prompt=="speaker 3":
48
+ history_prompt = "v2/fr_speaker_2"
49
+ elif speaker_prompt=="speaker 4":
50
+ history_prompt = "v2/fr_speaker_3"
51
+ elif speaker_prompt=="speaker 5":
52
+ history_prompt = "v2/fr_speaker_4"
53
+ elif speaker_prompt=="speaker 6":
54
+ history_prompt = "v2/fr_speaker_5"
55
+ elif speaker_prompt=="speaker 7":
56
+ history_prompt = "v2/fr_speaker_6"
57
+ elif speaker_prompt=="speaker 8":
58
+ history_prompt = "v2/fr_speaker_7"
59
+ elif speaker_prompt=="speaker 9":
60
+ history_prompt = "v2/fr_speaker_8"
61
+ elif speaker_prompt=="speaker 10":
62
+ history_prompt = "v2/fr_speaker_9"
63
+ else:
64
+ history_prompt = "v2/fr_speaker_9"
65
+
66
+ elif language_prompt =="german":
67
+ if speaker_prompt=="speaker 1":
68
+ history_prompt = "v2/de_speaker_0"
69
+ elif speaker_prompt=="speaker 2":
70
+ history_prompt = "v2/de_speaker_1"
71
+ elif speaker_prompt=="speaker 3":
72
+ history_prompt = "v2/de_speaker_2"
73
+ elif speaker_prompt=="speaker 4":
74
+ history_prompt = "v2/de_speaker_3"
75
+ elif speaker_prompt=="speaker 5":
76
+ history_prompt = "v2/de_speaker_4"
77
+ elif speaker_prompt=="speaker 6":
78
+ history_prompt = "v2/de_speaker_5"
79
+ elif speaker_prompt=="speaker 7":
80
+ history_prompt = "v2/de_speaker_6"
81
+ elif speaker_prompt=="speaker 8":
82
+ history_prompt = "v2/de_speaker_7"
83
+ elif speaker_prompt=="speaker 9":
84
+ history_prompt = "v2/de_speaker_8"
85
+ elif speaker_prompt=="speaker 10":
86
+ history_prompt = "v2/de_speaker_9"
87
+ else:
88
+ history_prompt = "v2/de_speaker_9"
89
+
90
+ elif language_prompt =="hindi":
91
+ if speaker_prompt=="speaker 1":
92
+ history_prompt = "v2/hi_speaker_0"
93
+ elif speaker_prompt=="speaker 2":
94
+ history_prompt = "v2/hi_speaker_1"
95
+ elif speaker_prompt=="speaker 3":
96
+ history_prompt = "v2/hi_speaker_2"
97
+ elif speaker_prompt=="speaker 4":
98
+ history_prompt = "v2/hi_speaker_3"
99
+ elif speaker_prompt=="speaker 5":
100
+ history_prompt = "v2/hi_speaker_4"
101
+ elif speaker_prompt=="speaker 6":
102
+ history_prompt = "v2/hi_speaker_5"
103
+ elif speaker_prompt=="speaker 7":
104
+ history_prompt = "v2/hi_speaker_6"
105
+ elif speaker_prompt=="speaker 8":
106
+ history_prompt = "v2/hi_speaker_7"
107
+ elif speaker_prompt=="speaker 9":
108
+ history_prompt = "v2/hi_speaker_8"
109
+ elif speaker_prompt=="speaker 10":
110
+ history_prompt = "v2/hi_speaker_9"
111
+ else:
112
+ history_prompt = "v2/hi_speaker_9"
113
+
114
+ elif language_prompt =="chinese":
115
+ if speaker_prompt=="speaker 1":
116
+ history_prompt = "v2/zh_speaker_0"
117
+ elif speaker_prompt=="speaker 2":
118
+ history_prompt = "v2/zh_speaker_1"
119
+ elif speaker_prompt=="speaker 3":
120
+ history_prompt = "v2/zh_speaker_2"
121
+ elif speaker_prompt=="speaker 4":
122
+ history_prompt = "v2/zh_speaker_3"
123
+ elif speaker_prompt=="speaker 5":
124
+ history_prompt = "v2/zh_speaker_4"
125
+ elif speaker_prompt=="speaker 6":
126
+ history_prompt = "v2/zh_speaker_5"
127
+ elif speaker_prompt=="speaker 7":
128
+ history_prompt = "v2/zh_speaker_6"
129
+ elif speaker_prompt=="speaker 8":
130
+ history_prompt = "v2/zh_speaker_7"
131
+ elif speaker_prompt=="speaker 9":
132
+ history_prompt = "v2/zh_speaker_8"
133
+ elif speaker_prompt=="speaker 10":
134
+ history_prompt = "v2/zh_speaker_9"
135
+ else:
136
+ history_prompt = "v2/zh_speaker_9"
137
+
138
+ elif language_prompt =="italian":
139
+ if speaker_prompt=="speaker 1":
140
+ history_prompt = "v2/it_speaker_0"
141
+ elif speaker_prompt=="speaker 2":
142
+ history_prompt = "v2/it_speaker_1"
143
+ elif speaker_prompt=="speaker 3":
144
+ history_prompt = "v2/it_speaker_2"
145
+ elif speaker_prompt=="speaker 4":
146
+ history_prompt = "v2/it_speaker_3"
147
+ elif speaker_prompt=="speaker 5":
148
+ history_prompt = "v2/it_speaker_4"
149
+ elif speaker_prompt=="speaker 6":
150
+ history_prompt = "v2/it_speaker_5"
151
+ elif speaker_prompt=="speaker 7":
152
+ history_prompt = "v2/it_speaker_6"
153
+ elif speaker_prompt=="speaker 8":
154
+ history_prompt = "v2/it_speaker_7"
155
+ elif speaker_prompt=="speaker 9":
156
+ history_prompt = "v2/it_speaker_8"
157
+ elif speaker_prompt=="speaker 10":
158
+ history_prompt = "v2/it_speaker_9"
159
+ else:
160
+ history_prompt = "v2/it_speaker_9"
161
+
162
+ elif language_prompt =="japanese":
163
+ if speaker_prompt=="speaker 1":
164
+ history_prompt = "v2/ja_speaker_0"
165
+ elif speaker_prompt=="speaker 2":
166
+ history_prompt = "v2/ja_speaker_1"
167
+ elif speaker_prompt=="speaker 3":
168
+ history_prompt = "v2/ja_speaker_2"
169
+ elif speaker_prompt=="speaker 4":
170
+ history_prompt = "v2/ja_speaker_3"
171
+ elif speaker_prompt=="speaker 5":
172
+ history_prompt = "v2/ja_speaker_4"
173
+ elif speaker_prompt=="speaker 6":
174
+ history_prompt = "v2/ja_speaker_5"
175
+ elif speaker_prompt=="speaker 7":
176
+ history_prompt = "v2/ja_speaker_6"
177
+ elif speaker_prompt=="speaker 8":
178
+ history_prompt = "v2/ja_speaker_7"
179
+ elif speaker_prompt=="speaker 9":
180
+ history_prompt = "v2/ja_speaker_8"
181
+ elif speaker_prompt=="speaker 10":
182
+ history_prompt = "v2/ja_speaker_9"
183
+ else:
184
+ history_prompt = "v2/ja_speaker_9"
185
+ else:
186
+ raise ValueError("Invalid language or gender selection")
187
+
188
+ sentences = nltk.sent_tokenize(text)
189
+ silence = np.zeros(int(0.25 * SAMPLE_RATE)) # quarter second of silence
190
+
191
+ pieces = []
192
+ for sentence in sentences:
193
+ audio_array = generate_audio(sentence, history_prompt=history_prompt)
194
+ pieces += [audio_array]
195
+
196
+ # Concatenate the audio pieces
197
+ final_audio = np.concatenate(pieces)
198
+
199
+ # Save the audio to a WAV file
200
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_wav:
201
+ wavfile.write(temp_wav.name, SAMPLE_RATE, final_audio)
202
+
203
+ # Return the saved audio file
204
+ return temp_wav.name
205
+
206
+ # Define lists of language and gender options
207
+ language_options = [
208
+ "english",
209
+ "french",
210
+ "german",
211
+ "hindi",
212
+ "chinese",
213
+ "italian",
214
+ "japanese",
215
+
216
+
217
+ ]
218
+
219
+ speaker_options=[
220
+ "speaker 1",
221
+ "speaker 2",
222
+ "speaker 3",
223
+ "speaker 4",
224
+ "speaker 5",
225
+ "speaker 6",
226
+ "speaker 7",
227
+ "speaker 8",
228
+ "speaker 9",
229
+ "speaker 10",
230
+ ]
231
+ # Create a Gradio interface with text input and dropdown menus for language and gender
232
+ iface = gr.Interface(
233
+ fn=generate_audio_from_text,
234
+ inputs=[
235
+ gr.Textbox(text="Enter text to convert to speech:"),
236
+ gr.Dropdown(choices=language_options, label="Select language:"),
237
+ gr.Dropdown(choices=speaker_options, label="Select speaker:"),
238
+ ],
239
+ outputs=gr.outputs.File(label="Download WAV File"),
240
+ title="Text-to-Speech App Vertical Solution",
241
+ timeout=300,
242
+ )
243
+
244
+ # Launch the Gradio app with sharing enabled
245
+ iface.launch(debug=True, enable_queue=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ scipy
2
+ nltk
3
+ git+https://github.com/suno-ai/bark.git