Spaces:

gauravgulati619
/

MediVox

Running

App Files Files Community

MediVox / doctorvoice.py

gauravgulati619

Initial commit: Complete MediVox application

95841bc 10 months ago

raw

history blame

3.87 kB

	# if you dont use pipenv uncomment the following:
	from dotenv import load_dotenv
	load_dotenv()

	#Step1a: Setup Text to Speech–TTS–model with gTTS
	import os
	from gtts import gTTS

	def text_to_speech_with_gtts_old(input_text, output_filepath):
	language="en"

	audioobj= gTTS(
	text=input_text,
	lang=language,
	slow=False
	)
	audioobj.save(output_filepath)


	# input_text="Hi"
	# text_to_speech_with_gtts_old(input_text=input_text, output_filepath="gtts_testing.mp3")

	#Step1b: Setup Text to Speech–TTS–model with ElevenLabs
	import elevenlabs
	from elevenlabs.client import ElevenLabs

	ELEVENLABS_API_KEY=os.environ.get("ELEVENLABS_API_KEY")

	def text_to_speech_with_elevenlabs_old(input_text, output_filepath):
	client=ElevenLabs(api_key=ELEVENLABS_API_KEY)
	audio=client.generate(
	text= input_text,
	voice= "Emily",
	output_format= "mp3_22050_32",
	model= "eleven_turbo_v2"
	)
	elevenlabs.save(audio, output_filepath)

	# text_to_speech_with_elevenlabs_old(input_text, output_filepath="elevenlabs_testing.mp3")

	# #Step2: Use Model for Text output to Voice
	# when the files of the doctor gets saved, they dont play automatically so we have to do this step 2 in order to automatically run the audio files.
	import subprocess
	import platform
	from pydub import AudioSegment
	from pydub.playback import play
	import tempfile

	def text_to_speech_with_gtts(input_text, output_filepath):
	language="en"

	audioobj= gTTS(
	text=input_text,
	lang=language,
	slow=False
	)
	audioobj.save(output_filepath)
	os_name = platform.system()
	try:
	if os_name == "Darwin": # macOS
	subprocess.run(['afplay', output_filepath])
	elif os_name == "Windows": # Windows
	subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{output_filepath}").PlaySync();'])
	elif os_name == "Linux": # Linux
	subprocess.run(['aplay', output_filepath]) # Alternative: use 'mpg123' or 'ffplay'
	else:
	raise OSError("Unsupported operating system")
	except Exception as e:
	print(f"An error occurred while trying to play the audio: {e}")


	# input_text="Hi"
	# #text_to_speech_with_gtts(input_text=input_text, output_filepath="gtts_testing_autoplay.mp3")

	def play_audio(file_path):
	os_name = platform.system()
	try:
	if os_name == "Darwin": # macOS
	subprocess.run(['afplay', file_path])
	elif os_name == "Windows": # Windows
	# Load MP3 and convert to WAV for playback
	audio = AudioSegment.from_mp3(file_path)
	# Create a temporary WAV file
	with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_wav:
	wav_path = temp_wav.name
	audio.export(wav_path, format='wav')
	# Play the WAV file
	subprocess.run(['powershell', '-c', f'(New-Object Media.SoundPlayer "{wav_path}").PlaySync();'])
	# Clean up temporary file
	os.unlink(wav_path)
	elif os_name == "Linux": # Linux
	subprocess.run(['mpg123', file_path]) # Using mpg123 for MP3 playback
	else:
	raise OSError("Unsupported operating system")
	except Exception as e:
	print(f"An error occurred while trying to play the audio: {e}")

	def text_to_speech_with_elevenlabs(input_text, output_filepath):
	client = ElevenLabs(api_key=ELEVENLABS_API_KEY)
	audio = client.generate(
	text=input_text,
	voice="Aria",
	output_format="mp3_22050_32",
	model="eleven_turbo_v2"
	)
	elevenlabs.save(audio, output_filepath)

	# Play the audio
	play_audio(output_filepath)
	return output_filepath

	# text_to_speech_with_elevenlabs(input_text, output_filepath="elevenlabs_testing_autoplay.mp3")