NeuralFalcon commited on
Commit
bfc0ef1
·
verified ·
1 Parent(s): b25cc04

Upload 3 files

Browse files
Files changed (3) hide show
  1. lang_data.py +215 -0
  2. microsoft_tts.py +340 -0
  3. requirements.txt +6 -0
lang_data.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ languages = {
2
+ "Afrikaans": "af",
3
+ "Amharic": "am",
4
+ "Arabic": "ar",
5
+ "Azerbaijani": "az",
6
+ "Bulgarian": "bg",
7
+ "Bengali": "bn",
8
+ "Bosnian": "bs",
9
+ "Catalan": "ca",
10
+ "Czech": "cs",
11
+ "Welsh": "cy",
12
+ "Danish": "da",
13
+ "German": "de",
14
+ "Greek": "el",
15
+ "English": "en",
16
+ "Spanish": "es",
17
+ "French": "fr",
18
+ "Irish": "ga",
19
+ "Galician": "gl",
20
+ "Gujarati": "gu",
21
+ "Hebrew": "he",
22
+ "Hindi": "hi",
23
+ "Croatian": "hr",
24
+ "Hungarian": "hu",
25
+ "Indonesian": "id",
26
+ "Icelandic": "is",
27
+ "Italian": "it",
28
+ "Japanese": "ja",
29
+ "Javanese": "jv",
30
+ "Georgian": "ka",
31
+ "Kazakh": "kk",
32
+ "Khmer": "km",
33
+ "Kannada": "kn",
34
+ "Korean": "ko",
35
+ "Lao": "lo",
36
+ "Lithuanian": "lt",
37
+ "Latvian": "lv",
38
+ "Macedonian": "mk",
39
+ "Malayalam": "ml",
40
+ "Mongolian": "mn",
41
+ "Marathi": "mr",
42
+ "Malay": "ms",
43
+ "Maltese": "mt",
44
+ "Burmese": "my",
45
+ "Norwegian Bokmål": "nb",
46
+ "Nepali": "ne",
47
+ "Dutch": "nl",
48
+ "Polish": "pl",
49
+ "Pashto": "ps",
50
+ "Portuguese": "pt",
51
+ "Romanian": "ro",
52
+ "Russian": "ru",
53
+ "Sinhala": "si",
54
+ "Slovak": "sk",
55
+ "Slovenian": "sl",
56
+ "Somali": "so",
57
+ "Albanian": "sq",
58
+ "Serbian": "sr",
59
+ "Sundanese": "su",
60
+ "Swedish": "sv",
61
+ "Swahili": "sw",
62
+ "Tamil": "ta",
63
+ "Telugu": "te",
64
+ "Thai": "th",
65
+ "Turkish": "tr",
66
+ "Ukrainian": "uk",
67
+ "Urdu": "ur",
68
+ "Uzbek": "uz",
69
+ "Vietnamese": "vi",
70
+ "Chinese": "zh",
71
+ "Zulu": "zu"
72
+ }
73
+
74
+
75
+
76
+ female_voice_list={'Vietnamese': 'vi-VN-HoaiMyNeural',
77
+ 'Bengali': 'bn-BD-NabanitaNeural',
78
+ 'Thai': 'th-TH-PremwadeeNeural',
79
+ 'English': "en-US-AvaMultilingualNeural", #'en-AU-NatashaNeural', #"en-IE-EmilyNeural"
80
+ 'Portuguese': 'pt-BR-FranciscaNeural',
81
+ 'Arabic': 'ar-AE-FatimaNeural',
82
+ 'Turkish': 'tr-TR-EmelNeural',
83
+ 'Spanish': 'es-AR-ElenaNeural',
84
+ 'Korean': 'ko-KR-SunHiNeural',
85
+ 'French': 'fr-BE-CharlineNeural',
86
+ 'Indonesian': 'id-ID-GadisNeural',
87
+ 'Russian': 'ru-RU-SvetlanaNeural',
88
+ 'Hindi': 'hi-IN-SwaraNeural',
89
+ 'Japanese': 'ja-JP-NanamiNeural',
90
+ 'Afrikaans': 'af-ZA-AdriNeural',
91
+ 'Amharic': 'am-ET-MekdesNeural',
92
+ 'Azerbaijani': 'az-AZ-BanuNeural',
93
+ 'Bulgarian': 'bg-BG-KalinaNeural',
94
+ 'Bosnian': 'bs-BA-VesnaNeural',
95
+ 'Catalan': 'ca-ES-JoanaNeural',
96
+ 'Czech': 'cs-CZ-VlastaNeural',
97
+ 'Welsh': 'cy-GB-NiaNeural',
98
+ 'Danish': 'da-DK-ChristelNeural',
99
+ 'German': 'de-AT-IngridNeural',
100
+ 'Greek': 'el-GR-AthinaNeural',
101
+ 'Irish': 'ga-IE-OrlaNeural',
102
+ 'Galician': 'gl-ES-SabelaNeural',
103
+ 'Gujarati': 'gu-IN-DhwaniNeural',
104
+ 'Hebrew': 'he-IL-HilaNeural',
105
+ 'Croatian': 'hr-HR-GabrijelaNeural',
106
+ 'Hungarian': 'hu-HU-NoemiNeural',
107
+ 'Icelandic': 'is-IS-GudrunNeural',
108
+ 'Italian': 'it-IT-ElsaNeural',
109
+ 'Javanese': 'jv-ID-SitiNeural',
110
+ 'Georgian': 'ka-GE-EkaNeural',
111
+ 'Kazakh': 'kk-KZ-AigulNeural',
112
+ 'Khmer': 'km-KH-SreymomNeural',
113
+ 'Kannada': 'kn-IN-SapnaNeural',
114
+ 'Lao': 'lo-LA-KeomanyNeural',
115
+ 'Lithuanian': 'lt-LT-OnaNeural',
116
+ 'Latvian': 'lv-LV-EveritaNeural',
117
+ 'Macedonian': 'mk-MK-MarijaNeural',
118
+ 'Malayalam': 'ml-IN-SobhanaNeural',
119
+ 'Mongolian': 'mn-MN-YesuiNeural',
120
+ 'Marathi': 'mr-IN-AarohiNeural',
121
+ 'Malay': 'ms-MY-YasminNeural',
122
+ 'Maltese': 'mt-MT-GraceNeural',
123
+ 'Burmese': 'my-MM-NilarNeural',
124
+ 'Norwegian Bokmål': 'nb-NO-PernilleNeural',
125
+ 'Nepali': 'ne-NP-HemkalaNeural',
126
+ 'Dutch': 'nl-BE-DenaNeural',
127
+ 'Polish': 'pl-PL-ZofiaNeural',
128
+ 'Pashto': 'ps-AF-LatifaNeural',
129
+ 'Romanian': 'ro-RO-AlinaNeural',
130
+ 'Sinhala': 'si-LK-ThiliniNeural',
131
+ 'Slovak': 'sk-SK-ViktoriaNeural',
132
+ 'Slovenian': 'sl-SI-PetraNeural',
133
+ 'Somali': 'so-SO-UbaxNeural',
134
+ 'Albanian': 'sq-AL-AnilaNeural',
135
+ 'Serbian': 'sr-RS-SophieNeural',
136
+ 'Sundanese': 'su-ID-TutiNeural',
137
+ 'Swedish': 'sv-SE-SofieNeural',
138
+ 'Swahili': 'sw-KE-ZuriNeural',
139
+ 'Tamil': 'ta-IN-PallaviNeural',
140
+ 'Telugu': 'te-IN-ShrutiNeural',
141
+ 'Chinese': 'zh-CN-XiaoxiaoNeural',
142
+ 'Ukrainian': 'uk-UA-PolinaNeural',
143
+ 'Urdu': 'ur-IN-GulNeural',
144
+ 'Uzbek': 'uz-UZ-MadinaNeural',
145
+ 'Zulu': 'zu-ZA-ThandoNeural'}
146
+ male_voice_list= {'Vietnamese': 'vi-VN-NamMinhNeural',
147
+ 'Bengali': 'bn-BD-PradeepNeural',
148
+ 'Thai': 'th-TH-NiwatNeural',
149
+ 'English': 'en-US-BrianMultilingualNeural', #"en-US-BrianNeural"
150
+ 'Portuguese': 'pt-BR-AntonioNeural',
151
+ 'Arabic': 'ar-AE-HamdanNeural',
152
+ 'Turkish': 'tr-TR-AhmetNeural',
153
+ 'Spanish': 'es-AR-TomasNeural',
154
+ 'Korean': 'ko-KR-HyunsuNeural',
155
+ 'French': 'fr-BE-GerardNeural',
156
+ 'Indonesian': 'id-ID-ArdiNeural',
157
+ 'Russian': 'ru-RU-DmitryNeural',
158
+ 'Hindi': 'hi-IN-MadhurNeural',
159
+ 'Japanese': 'ja-JP-KeitaNeural',
160
+ 'Afrikaans': 'af-ZA-WillemNeural',
161
+ 'Amharic': 'am-ET-AmehaNeural',
162
+ 'Azerbaijani': 'az-AZ-BabekNeural',
163
+ 'Bulgarian': 'bg-BG-BorislavNeural',
164
+ 'Bosnian': 'bs-BA-GoranNeural',
165
+ 'Catalan': 'ca-ES-EnricNeural',
166
+ 'Czech': 'cs-CZ-AntoninNeural',
167
+ 'Welsh': 'cy-GB-AledNeural',
168
+ 'Danish': 'da-DK-JeppeNeural',
169
+ 'German': 'de-AT-JonasNeural',
170
+ 'Greek': 'el-GR-NestorasNeural',
171
+ 'Irish': 'ga-IE-ColmNeural',
172
+ 'Galician': 'gl-ES-RoiNeural',
173
+ 'Gujarati': 'gu-IN-NiranjanNeural',
174
+ 'Hebrew': 'he-IL-AvriNeural',
175
+ 'Croatian': 'hr-HR-SreckoNeural',
176
+ 'Hungarian': 'hu-HU-TamasNeural',
177
+ 'Icelandic': 'is-IS-GunnarNeural',
178
+ 'Italian': 'it-IT-DiegoNeural',
179
+ 'Javanese': 'jv-ID-DimasNeural',
180
+ 'Georgian': 'ka-GE-GiorgiNeural',
181
+ 'Kazakh': 'kk-KZ-DauletNeural',
182
+ 'Khmer': 'km-KH-PisethNeural',
183
+ 'Kannada': 'kn-IN-GaganNeural',
184
+ 'Lao': 'lo-LA-ChanthavongNeural',
185
+ 'Lithuanian': 'lt-LT-LeonasNeural',
186
+ 'Latvian': 'lv-LV-NilsNeural',
187
+ 'Macedonian': 'mk-MK-AleksandarNeural',
188
+ 'Malayalam': 'ml-IN-MidhunNeural',
189
+ 'Mongolian': 'mn-MN-BataaNeural',
190
+ 'Marathi': 'mr-IN-ManoharNeural',
191
+ 'Malay': 'ms-MY-OsmanNeural',
192
+ 'Maltese': 'mt-MT-JosephNeural',
193
+ 'Burmese': 'my-MM-ThihaNeural',
194
+ 'Norwegian Bokmål': 'nb-NO-FinnNeural',
195
+ 'Nepali': 'ne-NP-SagarNeural',
196
+ 'Dutch': 'nl-BE-ArnaudNeural',
197
+ 'Polish': 'pl-PL-MarekNeural',
198
+ 'Pashto': 'ps-AF-GulNawazNeural',
199
+ 'Romanian': 'ro-RO-EmilNeural',
200
+ 'Sinhala': 'si-LK-SameeraNeural',
201
+ 'Slovak': 'sk-SK-LukasNeural',
202
+ 'Slovenian': 'sl-SI-RokNeural',
203
+ 'Somali': 'so-SO-MuuseNeural',
204
+ 'Albanian': 'sq-AL-IlirNeural',
205
+ 'Serbian': 'sr-RS-NicholasNeural',
206
+ 'Sundanese': 'su-ID-JajangNeural',
207
+ 'Swedish': 'sv-SE-MattiasNeural',
208
+ 'Swahili': 'sw-KE-RafikiNeural',
209
+ 'Tamil': 'ta-IN-ValluvarNeural',
210
+ 'Telugu': 'te-IN-MohanNeural',
211
+ 'Chinese': 'zh-CN-YunjianNeural',
212
+ 'Ukrainian': 'uk-UA-OstapNeural',
213
+ 'Urdu': 'ur-IN-SalmanNeural',
214
+ 'Uzbek': 'uz-UZ-SardorNeural',
215
+ 'Zulu': 'zu-ZA-ThembaNeural'}
microsoft_tts.py ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #@title <-- Just run the cell (config edge TTS)
2
+ edge_folder="."
3
+ import nltk
4
+ nltk.download('punkt')
5
+ from nltk.tokenize import sent_tokenize
6
+
7
+ from deep_translator import GoogleTranslator
8
+
9
+ from lang_data import languages,male_voice_list,female_voice_list
10
+
11
+ def translate_text(text, Language):
12
+ # print("calling translate")
13
+ target_language=languages[Language]
14
+ if Language == "Chinese":
15
+ target_language='zh-CN'
16
+ translator = GoogleTranslator(target=target_language)
17
+ translation = translator.translate(text.strip())
18
+ t_text=str(translation)
19
+ # print(f"{t_text}---{Language}----{target_language}")
20
+ return t_text
21
+
22
+
23
+ def chunks_sentences(paragraph, join_limit=2):
24
+ sentences = sent_tokenize(paragraph)
25
+ # Initialize an empty list to store the new sentences
26
+ new_sentences = []
27
+
28
+ # Iterate through the list of sentences in steps of 'join_limit'
29
+ for i in range(0, len(sentences), join_limit):
30
+ # Join the sentences with a space between them
31
+ new_sentence = ' '.join(sentences[i:i + join_limit])
32
+ new_sentences.append(new_sentence)
33
+ return new_sentences
34
+
35
+
36
+ def calculate_rate_string(input_value):
37
+ rate = (input_value - 1) * 100
38
+ sign = '+' if input_value >= 1 else '-'
39
+ return f"{sign}{abs(int(rate))}"
40
+
41
+
42
+ def make_chunks(input_text, language):
43
+ language="English"
44
+ if language == "English":
45
+ filtered_list=chunks_sentences(input_text, join_limit=2)
46
+ # temp_list = input_text.strip().split(".")
47
+ # filtered_list = [element.strip() + '.' for element in temp_list[:-1] if element.strip() and element.strip() != "'" and element.strip() != '"']
48
+ # if temp_list[-1].strip():
49
+ # filtered_list.append(temp_list[-1].strip())
50
+ return filtered_list
51
+
52
+
53
+
54
+
55
+ import re
56
+ import uuid
57
+ def tts_file_name(text):
58
+ if text.endswith("."):
59
+ text = text[:-1]
60
+ text = text.lower()
61
+ text = text.strip()
62
+ text = text.replace(" ","_")
63
+ truncated_text = text[:25] if len(text) > 25 else text if len(text) > 0 else "empty"
64
+ random_string = uuid.uuid4().hex[:8].upper()
65
+ file_name = f"{edge_folder}/edge_tts_voice/{truncated_text}_{random_string}.mp3"
66
+ return file_name
67
+
68
+
69
+ from pydub import AudioSegment
70
+ import shutil
71
+ import os
72
+ def merge_audio_files(audio_paths, output_path):
73
+ # Initialize an empty AudioSegment
74
+ merged_audio = AudioSegment.silent(duration=0)
75
+
76
+ # Iterate through each audio file path
77
+ for audio_path in audio_paths:
78
+ # Load the audio file using Pydub
79
+ audio = AudioSegment.from_file(audio_path)
80
+
81
+ # Append the current audio file to the merged_audio
82
+ merged_audio += audio
83
+
84
+ # Export the merged audio to the specified output path
85
+ merged_audio.export(output_path, format="mp3")
86
+
87
+ def edge_free_tts(chunks_list,speed,voice_name,save_path,translate_text_flag,Language):
88
+ # print(voice_name)
89
+ # print(chunks_list)
90
+ store_text=""
91
+ if len(chunks_list)>1:
92
+ chunk_audio_list=[]
93
+ if os.path.exists(f"{edge_folder}/edge_tts_voice"):
94
+ shutil.rmtree(f"{edge_folder}/edge_tts_voice")
95
+ os.mkdir(f"{edge_folder}/edge_tts_voice")
96
+ k=1
97
+ for i in chunks_list:
98
+ # print(i)
99
+ if translate_text_flag:
100
+ text=translate_text(i, Language)
101
+ else:
102
+ text=i
103
+ store_text+=text+" "
104
+ text=text.replace('"',"")
105
+ edge_command=f'edge-tts --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{text}" --write-media {edge_folder}/edge_tts_voice/{k}.mp3'
106
+ var1=os.system(edge_command)
107
+ if var1==0:
108
+ pass
109
+ else:
110
+ print(f"Failed: {i}")
111
+ print(edge_command)
112
+ chunk_audio_list.append(f"{edge_folder}/edge_tts_voice/{k}.mp3")
113
+ k+=1
114
+ # print(chunk_audio_list)
115
+ merge_audio_files(chunk_audio_list, save_path)
116
+ else:
117
+ if translate_text_flag:
118
+ text=translate_text(chunks_list[0], Language)
119
+ else:
120
+ text=chunks_list[0]
121
+ text=text.replace('"',"")
122
+ store_text+=text+" "
123
+ edge_command=f'edge-tts --rate={calculate_rate_string(speed)}% --voice {voice_name} --text "{text}" --write-media {save_path}'
124
+ var2=os.system(edge_command)
125
+ if var2==0:
126
+ pass
127
+ else:
128
+ print(f"Failed: {chunks_list[0]}")
129
+ print(edge_command)
130
+ with open("./temp.txt", "w", encoding="utf-8") as text_file:
131
+ text_file.write(store_text)
132
+ return save_path
133
+
134
+
135
+ # speed = 1 # @param {type: "number"}
136
+ # translate_text_flag = True # @param {type:"boolean"}
137
+ # long_sentence = True # @param {type:"boolean"}
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+ # from IPython.display import clear_output
147
+ # from IPython.display import Audio
148
+ if not os.path.exists(f"{edge_folder}/audio"):
149
+ os.mkdir(f"{edge_folder}/audio")
150
+ import uuid
151
+ def random_audio_name_generate():
152
+ random_uuid = uuid.uuid4()
153
+ audio_extension = ".mp3"
154
+ random_audio_name = str(random_uuid)[:8] + audio_extension
155
+ return random_audio_name
156
+ def edge_tts_pipeline(input_text,Language='English',voice_name=None,Gender='Male',translate_text_flag=True,no_silence=False,speed=1,tts_save_path="",long_sentence=True):
157
+ # print("calling gradio_talk")
158
+ # global long_sentence,translate_text_flag,Language,speed,voice_name,Gender
159
+ global male_voice_list,female_voice_list
160
+ # long_sentence=True
161
+ # translate_text_flag=True
162
+ # speed=1
163
+ if long_sentence==False:
164
+ if len(input_text)>500:
165
+ long_sentence=True
166
+ if voice_name==None:
167
+ if Gender=="Male":
168
+ voice_name=male_voice_list[Language]
169
+ if Gender=="Female":
170
+ voice_name=female_voice_list[Language]
171
+ if long_sentence==True and translate_text_flag==True:
172
+ chunks_list=make_chunks(input_text,Language)
173
+ elif long_sentence==True and translate_text_flag==False:
174
+ chunks_list=make_chunks(input_text,"English")
175
+ else:
176
+ chunks_list=[input_text]
177
+ temp_save_path=f"{edge_folder}/audio/"+random_audio_name_generate()
178
+ save_path=temp_save_path.lower().replace(".mp3",".wav")
179
+ # print(chunks_list,speed,voice_name,save_path,translate_text_flag,Language)
180
+ edge_save_path=edge_free_tts(chunks_list,speed,voice_name,temp_save_path,translate_text_flag,Language)
181
+ mp3_to_wav(edge_save_path, save_path)
182
+ audio_return_path=save_path
183
+ if no_silence:
184
+ clean_path=f"{edge_folder}/audio/"+random_audio_name_generate().replace(".mp3",".wav")
185
+ remove_silence(save_path,clean_path)
186
+ audio_return_path=clean_path
187
+ # return clean_path
188
+ if tts_save_path=="":
189
+ return audio_return_path
190
+ else:
191
+ shutil.copyfile(audio_return_path,tts_save_path)
192
+ return audio_return_path
193
+
194
+
195
+
196
+ def talk(input_text):
197
+ # global long_sentence,translate_text_flag,Language,speed,voice_name,Gender
198
+ global Language, Gender,male_voice_list,female_voice_list
199
+ global no_silence
200
+ long_sentence=True
201
+ translate_text_flag=False
202
+ speed=1
203
+
204
+ if Gender=="Male":
205
+ voice_name=male_voice_list[Language]
206
+ if Gender=="Female":
207
+ voice_name=female_voice_list[Language]
208
+ if long_sentence==True and translate_text_flag==True:
209
+ chunks_list=make_chunks(input_text,Language)
210
+ elif long_sentence==True and translate_text_flag==False:
211
+ chunks_list=make_chunks(input_text,"English")
212
+ else:
213
+ chunks_list=[input_text]
214
+
215
+ temp_save_path=f"{edge_folder}/audio/"+random_audio_name_generate()
216
+ # print(f"temp_save_path: {temp_save_path}")
217
+ save_path=temp_save_path.replace(".mp3",".wav")
218
+ # print(f"save_path: {save_path}")
219
+ edge_save_path=edge_free_tts(chunks_list,speed,voice_name,temp_save_path,translate_text_flag,Language)
220
+
221
+ mp3_to_wav(edge_save_path, save_path)
222
+ if no_silence:
223
+ clean_path=f"{edge_folder}/audio/"+random_audio_name_generate().replace(".mp3",".wav")
224
+ remove_silence(save_path,clean_path)
225
+ return clean_path
226
+ return save_path
227
+
228
+ from pydub import AudioSegment
229
+ from pydub.silence import split_on_silence
230
+ import os
231
+
232
+ def remove_silence(file_path,output_path):
233
+ # Extract file name and format from the provided path
234
+ file_name = os.path.basename(file_path)
235
+ audio_format = "wav"
236
+
237
+ # Reading and splitting the audio file into chunks
238
+ sound = AudioSegment.from_file(file_path, format=audio_format)
239
+ audio_chunks = split_on_silence(sound,
240
+ min_silence_len=100,
241
+ silence_thresh=-45,
242
+ keep_silence=50)
243
+
244
+ # Putting the file back together
245
+ combined = AudioSegment.empty()
246
+ for chunk in audio_chunks:
247
+ combined += chunk
248
+
249
+
250
+ combined.export(output_path, format=audio_format)
251
+ print(f"Remove silence successfully: {output_path}")
252
+
253
+ return output_path
254
+
255
+
256
+ from pydub import AudioSegment
257
+
258
+ def mp3_to_wav(mp3_file, wav_file):
259
+ # Load the MP3 file
260
+ # print("calling mp3_to_wav")
261
+ # print(mp3_file,wav_file)
262
+ audio = AudioSegment.from_mp3(mp3_file)
263
+
264
+ # Export the audio to WAV format
265
+ audio.export(wav_file, format="wav")
266
+
267
+
268
+ ###use case
269
+
270
+ # from microsoft_tts import edge_tts_pipeline
271
+ # def tts(text, Language='English',voice_name=None, tts_save_path='', Gender='Male', translate_text_flag=False, no_silence=True, speed=1.0, long_sentence=True):
272
+ # edge_save_path = edge_tts_pipeline(text, Language,voice_name, Gender, translate_text_flag=translate_text_flag,
273
+ # no_silence=no_silence, speed=speed, tts_save_path=tts_save_path,
274
+ # long_sentence=long_sentence)
275
+ # return edge_save_path
276
+
277
+ # text="Machine learning is the study of computer algorithms that improve automatically through experience. It is seen as a subset of artificial intelligence. Machine learning algorithms build a model based on sample data, known as training data, in order to make predictions or decisions without being explicitly programmed to do so. Machine learning algorithms are used in a wide variety of applications, such as email filtering and computer vision, where it is difficult or infeasible to develop conventional algorithms to perform the needed tasks."
278
+ # save_path = tts(text, Language='English',Gender="Male")
279
+ # print(save_path)
280
+ # import simpleaudio as sa
281
+ # def play_sound(filename):
282
+ # wave_obj = sa.WaveObject.from_wave_file(filename)
283
+ # play_obj = wave_obj.play()
284
+ # play_obj.wait_done()
285
+ # play_sound(save_path)
286
+
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+
312
+
313
+ # edge_save_path=talk(text)
314
+ # print(f"Audio File Save at: {edge_save_path}")
315
+
316
+ # text = "a quick brown fox jumps over the lazy dog and the dog barks loudly"
317
+ # Language = "English" # @param ['English','Hindi','Bengali','Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Bulgarian', 'Bosnian', 'Catalan', 'Czech', 'Welsh', 'Danish', 'German', 'Greek', 'Spanish', 'French', 'Irish', 'Galician', 'Gujarati', 'Hebrew', 'Croatian', 'Hungarian', 'Indonesian', 'Icelandic', 'Italian', 'Japanese', 'Javanese', 'Georgian', 'Kazakh', 'Khmer', 'Kannada', 'Korean', 'Lao', 'Lithuanian', 'Latvian', 'Macedonian', 'Malayalam', 'Mongolian', 'Marathi', 'Malay', 'Maltese', 'Burmese', 'Norwegian Bokmål', 'Nepali', 'Dutch', 'Polish', 'Pashto', 'Portuguese', 'Romanian', 'Russian', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'Albanian', 'Serbian', 'Sundanese', 'Swedish', 'Swahili', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Chinese', 'Zulu']
318
+ # no_silence = False
319
+ # Gender = "Male"# @param ['Male', 'Female']
320
+ # translate_text_flag=True
321
+ # no_silence=True
322
+ # speed=1
323
+ # tts_save_path='temp.wav'
324
+ # edge_save_path=edge_tts_pipeline(text,Language,Gender,translate_text_flag=translate_text_flag,no_silence=no_silence,speed=speed,tts_save_path=tts_save_path)
325
+ # print(f"Audio File Save at: {edge_save_path}")
326
+
327
+ # from microsoft_tts import edge_tts_pipeline
328
+ # def tts(text,tts_save_path=''):
329
+ # # text = "a quick brown fox jumps over the lazy dog and the dog barks loudly"
330
+ # Language = "English" # @param ['English','Hindi','Bengali','Afrikaans', 'Amharic', 'Arabic', 'Azerbaijani', 'Bulgarian', 'Bosnian', 'Catalan', 'Czech', 'Welsh', 'Danish', 'German', 'Greek', 'Spanish', 'French', 'Irish', 'Galician', 'Gujarati', 'Hebrew', 'Croatian', 'Hungarian', 'Indonesian', 'Icelandic', 'Italian', 'Japanese', 'Javanese', 'Georgian', 'Kazakh', 'Khmer', 'Kannada', 'Korean', 'Lao', 'Lithuanian', 'Latvian', 'Macedonian', 'Malayalam', 'Mongolian', 'Marathi', 'Malay', 'Maltese', 'Burmese', 'Norwegian Bokmål', 'Nepali', 'Dutch', 'Polish', 'Pashto', 'Portuguese', 'Romanian', 'Russian', 'Sinhala', 'Slovak', 'Slovenian', 'Somali', 'Albanian', 'Serbian', 'Sundanese', 'Swedish', 'Swahili', 'Tamil', 'Telugu', 'Thai', 'Turkish', 'Ukrainian', 'Urdu', 'Uzbek', 'Vietnamese', 'Chinese', 'Zulu']
331
+ # no_silence = False
332
+ # Gender = "Male"# @param ['Male', 'Female']
333
+ # translate_text_flag=True
334
+ # no_silence=True
335
+ # speed=1
336
+ # # tts_save_path='temp.wav'
337
+ # long_sentence=True
338
+ # edge_save_path=edge_tts_pipeline(text,Language,Gender,translate_text_flag=translate_text_flag,no_silence=no_silence,speed=speed,tts_save_path=tts_save_path,long_sentence=long_sentence)
339
+ # print(f"Audio File Save at: {edge_save_path}")
340
+ # return edge_save_path
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ edge-tts
2
+ deep_translator==1.11.4
3
+ nltk==3.8.1
4
+ pydub==0.25.1
5
+ gradio>=5.6.0
6
+ click==8.1.7