Spaces:
Runtime error
Runtime error
Vincent Claes
commited on
Commit
·
e3f66f1
1
Parent(s):
9fcc04d
add intro
Browse files
README.md
CHANGED
@@ -1,4 +1,7 @@
|
|
1 |
# Webpage To Podcast
|
|
|
|
|
|
|
2 |
Take any webpage and create a podcast out of it
|
3 |
|
4 |
## Prompt
|
|
|
1 |
# Webpage To Podcast
|
2 |
+
|
3 |
+
git add remote https://huggingface.co/spaces/vincentclaes/webpage-to-podcast
|
4 |
+
|
5 |
Take any webpage and create a podcast out of it
|
6 |
|
7 |
## Prompt
|
app.py
CHANGED
@@ -1,16 +1,38 @@
|
|
1 |
-
|
2 |
-
import gradio
|
3 |
from openai import OpenAI
|
4 |
import tempfile
|
5 |
-
import
|
|
|
|
|
|
|
6 |
|
7 |
from dotenv import load_dotenv
|
8 |
load_dotenv()
|
9 |
|
10 |
-
|
11 |
|
|
|
|
|
|
|
|
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
DEFAULT_SYSTEM_PROMPT = """
|
16 |
You are a podcast editor that specialized to create a script out of a webpage.
|
@@ -33,13 +55,6 @@ You are a podcast editor that specialized to create a script out of a webpage.
|
|
33 |
"""
|
34 |
|
35 |
def generate_episode(system_prompt, weblink):
|
36 |
-
# sr = 48000
|
37 |
-
# a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)
|
38 |
-
# frequency = a4_freq * 2 ** (tones_from_a4 / 12)
|
39 |
-
# duration = int(duration)
|
40 |
-
# audio = np.linspace(0, duration, duration * sr)
|
41 |
-
# audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)
|
42 |
-
# return sr, audio
|
43 |
response = client.chat.completions.create(
|
44 |
model="gpt-4o",
|
45 |
messages=[
|
@@ -54,34 +69,34 @@ def generate_episode(system_prompt, weblink):
|
|
54 |
input=script,
|
55 |
)
|
56 |
|
57 |
-
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as
|
58 |
-
|
59 |
-
|
60 |
-
return temp_file.name
|
61 |
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
|
64 |
-
mp3_file_name = state.get("mp3_file")
|
65 |
-
if mp3_file_name and os.path.exists(mp3_file_name):
|
66 |
-
os.remove(mp3_file_name)
|
67 |
-
print(f"Removed file: {mp3_file_name}")
|
68 |
-
else:
|
69 |
-
print(f"No file found to delete: {mp3_file_name}")
|
70 |
|
71 |
-
|
|
|
|
|
72 |
fn=generate_episode,
|
73 |
inputs=[
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
78 |
],
|
79 |
-
# outputs=["audio", gradio.State()],
|
80 |
-
outputs="audio",
|
81 |
-
|
82 |
)
|
83 |
|
84 |
-
# demo.cleanup(cleanup)
|
85 |
|
86 |
if __name__ == "__main__":
|
87 |
-
demo.launch()
|
|
|
1 |
+
import gradio as gr
|
|
|
2 |
from openai import OpenAI
|
3 |
import tempfile
|
4 |
+
import time
|
5 |
+
import typing as tp
|
6 |
+
from pathlib import Path
|
7 |
+
from pydub import AudioSegment
|
8 |
|
9 |
from dotenv import load_dotenv
|
10 |
load_dotenv()
|
11 |
|
12 |
+
client = OpenAI()
|
13 |
|
14 |
+
class FileCleaner:
|
15 |
+
def __init__(self, file_lifetime: float = 3600):
|
16 |
+
self.file_lifetime = file_lifetime
|
17 |
+
self.files = []
|
18 |
|
19 |
+
def add(self, path: tp.Union[str, Path]):
|
20 |
+
self._cleanup()
|
21 |
+
self.files.append((time.time(), Path(path)))
|
22 |
+
|
23 |
+
def _cleanup(self):
|
24 |
+
now = time.time()
|
25 |
+
for time_added, path in list(self.files):
|
26 |
+
if now - time_added > self.file_lifetime:
|
27 |
+
if path.exists():
|
28 |
+
path.unlink()
|
29 |
+
self.files.pop(0)
|
30 |
+
else:
|
31 |
+
break
|
32 |
+
|
33 |
+
|
34 |
+
file_cleaner = FileCleaner()
|
35 |
+
intro = AudioSegment.from_mp3("intro.mp3")
|
36 |
|
37 |
DEFAULT_SYSTEM_PROMPT = """
|
38 |
You are a podcast editor that specialized to create a script out of a webpage.
|
|
|
55 |
"""
|
56 |
|
57 |
def generate_episode(system_prompt, weblink):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
response = client.chat.completions.create(
|
59 |
model="gpt-4o",
|
60 |
messages=[
|
|
|
69 |
input=script,
|
70 |
)
|
71 |
|
72 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as podcast_file:
|
73 |
+
podcast_file.write(response.content)
|
74 |
+
file_cleaner.add(podcast_file.name)
|
|
|
75 |
|
76 |
+
podcast = AudioSegment.from_mp3(podcast_file.name)
|
77 |
+
merged_audio = intro + podcast
|
78 |
+
|
79 |
+
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as final_file:
|
80 |
+
merged_audio.export(final_file.name, format="mp3")
|
81 |
|
82 |
+
print("MP3 files merged successfully!")
|
|
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
+
return final_file.name, script
|
85 |
+
|
86 |
+
demo = gr.Interface(
|
87 |
fn=generate_episode,
|
88 |
inputs=[
|
89 |
+
gr.Textbox(label="System Prompt"),
|
90 |
+
gr.Textbox(label="Weblink"),
|
91 |
+
],
|
92 |
+
outputs=[gr.Audio(label="Podcast Audio"), gr.Textbox(label="Podcast Script")],
|
93 |
+
examples=[
|
94 |
+
[DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Mount_Tambora"],
|
95 |
+
[DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Great_Wall_of_China"],
|
96 |
+
[DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Apollo_11"],
|
97 |
],
|
|
|
|
|
|
|
98 |
)
|
99 |
|
|
|
100 |
|
101 |
if __name__ == "__main__":
|
102 |
+
demo.launch()
|
intro.mp3
ADDED
Binary file (465 kB). View file
|
|
requirements.txt
CHANGED
@@ -2,3 +2,4 @@ gradio
|
|
2 |
numpy
|
3 |
python-dotenv
|
4 |
openai
|
|
|
|
2 |
numpy
|
3 |
python-dotenv
|
4 |
openai
|
5 |
+
pydub
|