Spaces:
Running
Running
| # if you dont use pipenv uncomment the following: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| #Step1a: Setup Text to Speech–TTS–model with gTTS | |
| import os | |
| from gtts import gTTS | |
| def text_to_speech_with_gtts_old(input_text, output_filepath): | |
| language="en" | |
| audioobj= gTTS( | |
| text=input_text, | |
| lang=language, | |
| slow=False | |
| ) | |
| audioobj.save(output_filepath) | |
| # input_text="Hi" | |
| # text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3") | |
| #Step1b: Setup Text to Speech–TTS–model with ElevenLabs | |
| import elevenlabs | |
| from elevenlabs.client import ElevenLabs | |
| ELEVENLABS_API_KEY=os.environ.get("ELEVENLABS_API_KEY") | |
| def text_to_speech_with_elevenlabs_old(input_text, output_filepath): | |
| client=ElevenLabs(api_key=ELEVENLABS_API_KEY) | |
| audio=client.generate( | |
| text= input_text, | |
| voice= "Emily", | |
| output_format= "mp3_22050_32", | |
| model= "eleven_turbo_v2" | |
| ) | |
| elevenlabs.save(audio, output_filepath) | |
| # text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3") | |
| # #Step2: Use Model for Text output to Voice | |
| # when the files of the doctor gets saved, they dont play automatically so we have to do this step 2 in order to automatically run the audio files. | |
| import subprocess | |
| import platform | |
| from pydub import AudioSegment | |
| from pydub.playback import play | |
| import tempfile | |
| def text_to_speech_with_gtts(input_text, output_filepath): | |
| language="en" | |
| audioobj= gTTS( | |
| text=input_text, | |
| lang=language, | |
| slow=False | |
| ) | |
| audioobj.save(output_filepath) | |
| os_name = platform.system() | |
| try: | |
| if os_name == "Darwin": # macOS | |
| subprocess.run(['afplay', output_filepath]) | |
| elif os_name == "Windows": # Windows | |
| subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();']) | |
| elif os_name == "Linux": # Linux | |
| subprocess.run(['aplay', output_filepath]) # Alternative: use 'mpg123' or 'ffplay' | |
| else: | |
| raise OSError("Unsupported operating system") | |
| except Exception as e: | |
| print(f"An error occurred while trying to play the audio: {e}") | |
| # input_text="Hi" | |
| # #text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3") | |
| def play_audio(file_path): | |
| os_name = platform.system() | |
| try: | |
| if os_name == "Darwin": # macOS | |
| subprocess.run(['afplay', file_path]) | |
| elif os_name == "Windows": # Windows | |
| # Load MP3 and convert to WAV for playback | |
| audio = AudioSegment.from_mp3(file_path) | |
| # Create a temporary WAV file | |
| with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav: | |
| wav_path = temp_wav.name | |
| audio.export(wav_path, format='wav') | |
| # Play the WAV file | |
| subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_path}").PlaySync();']) | |
| # Clean up temporary file | |
| os.unlink(wav_path) | |
| elif os_name == "Linux": # Linux | |
| subprocess.run(['mpg123', file_path]) # Using mpg123 for MP3 playback | |
| else: | |
| raise OSError("Unsupported operating system") | |
| except Exception as e: | |
| print(f"An error occurred while trying to play the audio: {e}") | |
| def text_to_speech_with_elevenlabs(input_text, output_filepath): | |
| client = ElevenLabs(api_key=ELEVENLABS_API_KEY) | |
| audio = client.generate( | |
| text=input_text, | |
| voice="Aria", | |
| output_format="mp3_22050_32", | |
| model="eleven_turbo_v2" | |
| ) | |
| elevenlabs.save(audio, output_filepath) | |
| # Play the audio | |
| play_audio(output_filepath) | |
| return output_filepath | |
| # text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3") |