eswardivi commited on
Commit
a3bb4a3
·
verified ·
1 Parent(s): 3661e80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -17
app.py CHANGED
@@ -2,13 +2,34 @@ import gradio as gr
2
  import spaces
3
  import os, torch, io
4
  import json
5
- os.system('python -m unidic download')
 
6
  import httpx
 
7
  # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
8
  from melo.api import TTS
9
  import tempfile
10
  import wave
11
  from pydub import AudioSegment
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def fetch_text(url):
14
  prefix_url = "https://r.jina.ai/"
@@ -16,41 +37,70 @@ def fetch_text(url):
16
  response = httpx.get(url, timeout=60.0)
17
  return response.text
18
 
 
19
  @spaces.GPU
20
- def synthesize(conversation_text, progress=gr.Progress()):
21
- speed=1.0
22
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  models = {
24
- 'EN': TTS(language='EN', device=device),
25
  }
26
- speakers = ['EN-Default','EN-US']
27
 
28
  combined_audio = AudioSegment.empty()
29
- conversation = json.loads(conversation_text)
30
  for i, turn in enumerate(conversation["conversation"]):
31
  bio = io.BytesIO()
32
  text = turn["text"]
33
  speaker = speakers[i % 2]
34
- speaker_id = models['EN'].hps.data.spk2id[speaker]
35
- models['EN'].tts_to_file(text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format='wav')
 
 
36
  bio.seek(0)
37
  audio_segment = AudioSegment.from_file(bio, format="wav")
38
  combined_audio += audio_segment
39
 
40
- final_audio_path = 'final.mp3'
41
- combined_audio.export(final_audio_path, format='mp3')
42
  return final_audio_path
43
 
44
-
45
  with gr.Blocks() as demo:
46
- gr.Markdown('# Not Ready to USE')
47
- gr.Markdown('# Turn Any Article into Podcast')
48
- gr.Markdown('## Easily convert articles from URLs into listenable audio Podcast.')
49
  with gr.Group():
50
  text = gr.Textbox(label="Article Link")
51
- btn = gr.Button('Podcasitfy', variant='primary')
52
  aud = gr.Audio(interactive=False)
53
  btn.click(synthesize, inputs=[text], outputs=[aud])
54
 
55
  demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)
56
-
 
2
  import spaces
3
  import os, torch, io
4
  import json
5
+
6
+ os.system("python -m unidic download")
7
  import httpx
8
+
9
  # print("Make sure you've downloaded unidic (python -m unidic download) for this WebUI to work.")
10
  from melo.api import TTS
11
  import tempfile
12
  import wave
13
  from pydub import AudioSegment
14
+ from transformers import (
15
+ AutoModelForCausalLM,
16
+ AutoTokenizer,
17
+ TextIteratorStreamer,
18
+ BitsAndBytesConfig,
19
+ )
20
+
21
+ quantization_config = BitsAndBytesConfig(
22
+ load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16
23
+ )
24
+
25
+ model = AutoModelForCausalLM.from_pretrained(
26
+ "NousResearch/Hermes-2-Pro-Llama-3-8B",
27
+ quantization_config=quantization_config,
28
+ token=token,
29
+ )
30
+ tok = AutoTokenizer.from_pretrained("NousResearch/Hermes-2-Pro-Llama-3-8B", token=token)
31
+ terminators = [tok.eos_token_id, tok.convert_tokens_to_ids("<|eot_id|>")]
32
+
33
 
34
  def fetch_text(url):
35
  prefix_url = "https://r.jina.ai/"
 
37
  response = httpx.get(url, timeout=60.0)
38
  return response.text
39
 
40
+
41
  @spaces.GPU
42
+ def synthesize(article_url, progress=gr.Progress()):
43
+ text = fetch_text(article_url)
44
+ template = """
45
+ {
46
+ "conversation": [
47
+ {"speaker": "", "text": ""},
48
+ {"speaker": "", "text": ""}
49
+ ]
50
+ }
51
+ """
52
+
53
+ chat = [
54
+ {
55
+ "role": "user",
56
+ "content": f"{text} \n Convert the text as Elaborate Conversation between two people as Podcast.\nfollowing this template \n {template}",
57
+ }
58
+ ]
59
+ messages = tok.apply_chat_template(chat, tokenize=False, add_generation_prompt=True)
60
+ model_inputs = tok([messages], return_tensors="pt").to(device)
61
+
62
+ text = model.generate(
63
+ model_inputs,
64
+ max_new_tokens=1024,
65
+ do_sample=True,
66
+ temperature=0.9,
67
+ eos_token_id=terminators,
68
+ )
69
+
70
+ speed = 1.0
71
+ device = "cuda" if torch.cuda.is_available() else "cpu"
72
  models = {
73
+ "EN": TTS(language="EN", device=device),
74
  }
75
+ speakers = ["EN-Default", "EN-US"]
76
 
77
  combined_audio = AudioSegment.empty()
78
+ conversation = json.loads(text)
79
  for i, turn in enumerate(conversation["conversation"]):
80
  bio = io.BytesIO()
81
  text = turn["text"]
82
  speaker = speakers[i % 2]
83
+ speaker_id = models["EN"].hps.data.spk2id[speaker]
84
+ models["EN"].tts_to_file(
85
+ text, speaker_id, bio, speed=speed, pbar=progress.tqdm, format="wav"
86
+ )
87
  bio.seek(0)
88
  audio_segment = AudioSegment.from_file(bio, format="wav")
89
  combined_audio += audio_segment
90
 
91
+ final_audio_path = "final.mp3"
92
+ combined_audio.export(final_audio_path, format="mp3")
93
  return final_audio_path
94
 
95
+
96
  with gr.Blocks() as demo:
97
+ gr.Markdown("# Not Ready to USE")
98
+ gr.Markdown("# Turn Any Article into Podcast")
99
+ gr.Markdown("## Easily convert articles from URLs into listenable audio Podcast.")
100
  with gr.Group():
101
  text = gr.Textbox(label="Article Link")
102
+ btn = gr.Button("Podcasitfy", variant="primary")
103
  aud = gr.Audio(interactive=False)
104
  btn.click(synthesize, inputs=[text], outputs=[aud])
105
 
106
  demo.queue(api_open=True, default_concurrency_limit=10).launch(show_api=True)