Vincent Claes commited on
Commit
e3f66f1
·
1 Parent(s): 9fcc04d
Files changed (4) hide show
  1. README.md +3 -0
  2. app.py +48 -33
  3. intro.mp3 +0 -0
  4. requirements.txt +1 -0
README.md CHANGED
@@ -1,4 +1,7 @@
1
  # Webpage To Podcast
 
 
 
2
  Take any webpage and create a podcast out of it
3
 
4
  ## Prompt
 
1
  # Webpage To Podcast
2
+
3
+ git add remote https://huggingface.co/spaces/vincentclaes/webpage-to-podcast
4
+
5
  Take any webpage and create a podcast out of it
6
 
7
  ## Prompt
app.py CHANGED
@@ -1,16 +1,38 @@
1
- # import numpy as np
2
- import gradio
3
  from openai import OpenAI
4
  import tempfile
5
- import os
 
 
 
6
 
7
  from dotenv import load_dotenv
8
  load_dotenv()
9
 
10
- import uuid
11
 
 
 
 
 
12
 
13
- client = OpenAI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  DEFAULT_SYSTEM_PROMPT = """
16
  You are a podcast editor that specialized to create a script out of a webpage.
@@ -33,13 +55,6 @@ You are a podcast editor that specialized to create a script out of a webpage.
33
  """
34
 
35
  def generate_episode(system_prompt, weblink):
36
- # sr = 48000
37
- # a4_freq, tones_from_a4 = 440, 12 * (octave - 4) + (note - 9)
38
- # frequency = a4_freq * 2 ** (tones_from_a4 / 12)
39
- # duration = int(duration)
40
- # audio = np.linspace(0, duration, duration * sr)
41
- # audio = (20000 * np.sin(audio * (2 * np.pi * frequency))).astype(np.int16)
42
- # return sr, audio
43
  response = client.chat.completions.create(
44
  model="gpt-4o",
45
  messages=[
@@ -54,34 +69,34 @@ def generate_episode(system_prompt, weblink):
54
  input=script,
55
  )
56
 
57
- with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as temp_file:
58
- temp_file.write(response.content)
59
- # state["mp3_file"] = temp_file.name
60
- return temp_file.name
61
 
 
 
 
 
 
62
 
63
- def cleanup(state):
64
- mp3_file_name = state.get("mp3_file")
65
- if mp3_file_name and os.path.exists(mp3_file_name):
66
- os.remove(mp3_file_name)
67
- print(f"Removed file: {mp3_file_name}")
68
- else:
69
- print(f"No file found to delete: {mp3_file_name}")
70
 
71
- demo = gradio.Interface(
 
 
72
  fn=generate_episode,
73
  inputs=[
74
- gradio.Textbox(value=DEFAULT_SYSTEM_PROMPT, label="System Prompt"),
75
- gradio.Textbox(value="https://en.wikipedia.org/wiki/Mount_Tambora", label="Weblink"),
76
- # gradio.State(), # State to track the mp3 file
77
-
 
 
 
 
78
  ],
79
- # outputs=["audio", gradio.State()],
80
- outputs="audio",
81
-
82
  )
83
 
84
- # demo.cleanup(cleanup)
85
 
86
  if __name__ == "__main__":
87
- demo.launch()
 
1
+ import gradio as gr
 
2
  from openai import OpenAI
3
  import tempfile
4
+ import time
5
+ import typing as tp
6
+ from pathlib import Path
7
+ from pydub import AudioSegment
8
 
9
  from dotenv import load_dotenv
10
  load_dotenv()
11
 
12
+ client = OpenAI()
13
 
14
+ class FileCleaner:
15
+ def __init__(self, file_lifetime: float = 3600):
16
+ self.file_lifetime = file_lifetime
17
+ self.files = []
18
 
19
+ def add(self, path: tp.Union[str, Path]):
20
+ self._cleanup()
21
+ self.files.append((time.time(), Path(path)))
22
+
23
+ def _cleanup(self):
24
+ now = time.time()
25
+ for time_added, path in list(self.files):
26
+ if now - time_added > self.file_lifetime:
27
+ if path.exists():
28
+ path.unlink()
29
+ self.files.pop(0)
30
+ else:
31
+ break
32
+
33
+
34
+ file_cleaner = FileCleaner()
35
+ intro = AudioSegment.from_mp3("intro.mp3")
36
 
37
  DEFAULT_SYSTEM_PROMPT = """
38
  You are a podcast editor that specialized to create a script out of a webpage.
 
55
  """
56
 
57
  def generate_episode(system_prompt, weblink):
 
 
 
 
 
 
 
58
  response = client.chat.completions.create(
59
  model="gpt-4o",
60
  messages=[
 
69
  input=script,
70
  )
71
 
72
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as podcast_file:
73
+ podcast_file.write(response.content)
74
+ file_cleaner.add(podcast_file.name)
 
75
 
76
+ podcast = AudioSegment.from_mp3(podcast_file.name)
77
+ merged_audio = intro + podcast
78
+
79
+ with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as final_file:
80
+ merged_audio.export(final_file.name, format="mp3")
81
 
82
+ print("MP3 files merged successfully!")
 
 
 
 
 
 
83
 
84
+ return final_file.name, script
85
+
86
+ demo = gr.Interface(
87
  fn=generate_episode,
88
  inputs=[
89
+ gr.Textbox(label="System Prompt"),
90
+ gr.Textbox(label="Weblink"),
91
+ ],
92
+ outputs=[gr.Audio(label="Podcast Audio"), gr.Textbox(label="Podcast Script")],
93
+ examples=[
94
+ [DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Mount_Tambora"],
95
+ [DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Great_Wall_of_China"],
96
+ [DEFAULT_SYSTEM_PROMPT, "https://en.wikipedia.org/wiki/Apollo_11"],
97
  ],
 
 
 
98
  )
99
 
 
100
 
101
  if __name__ == "__main__":
102
+ demo.launch()
intro.mp3 ADDED
Binary file (465 kB). View file
 
requirements.txt CHANGED
@@ -2,3 +2,4 @@ gradio
2
  numpy
3
  python-dotenv
4
  openai
 
 
2
  numpy
3
  python-dotenv
4
  openai
5
+ pydub