Podcastify / app.py
eswardivi's picture
Update app.py
b5cc82a verified
import gradio as gr
import spaces
import os, torch, io
import json
import re
os.system("python -m unidic download")
import httpx
# print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
from melo.api import TTS
import tempfile
import wave
from pydub import AudioSegment
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TextIteratorStreamer,
BitsAndBytesConfig,
)
from threading import Thread
from gradio_client import Client
# client = Client("eswardivi/AIO_Chat")
quantization_config = BitsAndBytesConfig(
load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
)
model = AutoModelForCausalLM.from_pretrained(
"NousResearch/Hermes-2-Pro-Llama-3-8B", quantization_config=quantization_config
)
tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B",revision='8ab73a6800796d84448bc936db9bac5ad9f984ae')
terminators = [
tok.eos_token_id,
tok.convert_tokens_to_ids("<|eot_id|>")
]
def validate_url(url):
try:
response = httpx.get(url, timeout=60.0)
response.raise_for_status()
return response.text
except httpx.RequestError as e:
return f"An error occurred while requesting {url}: {str(e)}"
except httpx.HTTPStatusError as e:
return f"Error response {e.response.status_code} while requesting {url}"
except Exception as e:
return f"An unexpected error occurred: {str(e)}"
def fetch_text(url):
print("Entered Webpage Extraction")
prefix_url = "https://r.jina.ai/"
full_url = prefix_url + url
print(full_url)
print("Exited Webpage Extraction")
return validate_url(full_url)
@spaces.GPU(duration=100)
def synthesize(article_url,progress_audio=gr.Progress()):
if not article_url.startswith("http://") and not article_url.startswith("https://"):
return "URL must start with 'http://' or 'https://'",None
text = fetch_text(article_url)
if "Error" in text:
return text, None
device = "cuda" if torch.cuda.is_available() else "cpu"
template = """
{
"conversation": [
{"speaker": "", "text": ""},
{"speaker": "", "text": ""}
]
}
"""
chat = []
chat.append(
{
"role": "user",
"content": text + """\n Convert the provided text into a short, informative podcast conversation between two experts. The tone should be professional and engaging. Please adhere to the following format and return only JSON:
{
"conversation": [
{"speaker": "", "text": ""},
{"speaker": "", "text": ""}
]
}
""",
}
)
messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
model_inputs = tok([messages], return_tensors="pt").to(device)
streamer = TextIteratorStreamer(
tok, timeout=10.0, skip_prompt=True, skip_special_tokens=True
)
generate_kwargs = dict(
model_inputs,
streamer=streamer,
max_new_tokens=1024,
do_sample=True,
temperature=0.9,
eos_token_id=terminators,
)
print("Entered Generation")
t = Thread(target=model.generate, kwargs=generate_kwargs)
t.start()
partial_text = ""
for new_text in streamer:
partial_text += new_text
# print("Calling API")
# result = client.predict(
# f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template and return only JSON \n {template}",
# 0.9,
# True,
# 1024,
# api_name="/chat"
# )
# print("API Call Completed")
pattern = r"\{(?:[^{}]|(?:\{[^{}]*\}))*\}"
json_match = re.search(pattern, partial_text)
print("Exited Generation")
if json_match:
conversation=json_match.group()
else:
conversation = template
print(partial_text)
print(conversation)
speed = 1.0
models = {"EN": TTS(language="EN", device=device)}
speakers = ["EN-Default", "EN-US"]
combined_audio = AudioSegment.empty()
conversation_dict = json.loads(conversation)
for i, turn in enumerate(conversation_dict["conversation"]):
bio = io.BytesIO()
text = turn["text"]
speaker = speakers[i % 2]
speaker_id = models["EN"].hps.data.spk2id[speaker]
models["EN"].tts_to_file(text, speaker_id, bio, speed=1.0, pbar=progress_audio.tqdm, format="wav")
bio.seek(0)
audio_segment = AudioSegment.from_file(bio, format="wav")
combined_audio += audio_segment
final_audio_path = "final.mp3"
combined_audio.export(final_audio_path, format="mp3")
return conversation, final_audio_path
with gr.Blocks(theme='gstaff/sketch') as demo:
gr.Markdown("# Turn Any Article into a Podcast")
gr.Markdown("## Easily convert articles from URLs into listenable audio podcasts.")
gr.Markdown("### Instructions")
gr.Markdown("""
- **Step 1:** Paste the URL of the article you want to convert into the textbox.
- **Step 2:** Click on "Podcastify" to generate the podcast.
- **Step 3:** Listen to the podcast or view the conversation.
""")
gr.Markdown("""
- View the code at [GitHub - NarrateIt](https://github.com/EswarDivi/NarrateIt).
""")
with gr.Group():
text = gr.Textbox(label="Article Link")
btn = gr.Button("Podcastify", variant="primary")
with gr.Row():
conv_display = gr.Textbox(label="Conversation", interactive=False)
aud = gr.Audio(interactive=False)
btn.click(synthesize, inputs=[text], outputs=[conv_display, aud])
gr.Markdown("""
Special thanks to:
- [gstaff/sketch](https://huggingface.co/spaces/gstaff/sketch) for the Sketch Theme.
- [mrfakename/MeloTTS](https://huggingface.co/spaces/mrfakename/MeloTTS) and [GitHub](https://github.com/myshell-ai/MeloTTS) for MeloTTS.
- [Hermes-2-Pro-Llama-3-8B](https://huggingface.co/NousResearch/Hermes-2-Pro-Llama-3-8B) for Function Calling Support.
- [Jina AI](https://jina.ai/reader/) for the web page parsing.
""")
demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True,share=True)