Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +22 -20
- requirements.txt +1 -2
app.py
CHANGED
@@ -10,7 +10,7 @@ import whisper
|
|
10 |
import ffmpeg
|
11 |
import re
|
12 |
import tempfile
|
13 |
-
import
|
14 |
|
15 |
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
|
16 |
|
@@ -28,6 +28,15 @@ Transcript:
|
|
28 |
{} \n\n Article:"""
|
29 |
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
@st.cache_resource()
|
32 |
def load_whisper(model):
|
33 |
return whisper.load_model(model)
|
@@ -59,16 +68,11 @@ def convert_to_audio(video_filename):
|
|
59 |
|
60 |
|
61 |
@st.cache_data
|
62 |
-
def summarise(prompt,
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
model=model,
|
68 |
-
messages=messages,
|
69 |
-
temperature=0,
|
70 |
-
)
|
71 |
-
return response.choices[0].message["content"]
|
72 |
|
73 |
|
74 |
def delete_files(video_filename, audio_filename):
|
@@ -122,7 +126,7 @@ def main():
|
|
122 |
- Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button.
|
123 |
- Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button.
|
124 |
- The transcript will be displayed in a text area below.
|
125 |
-
-
|
126 |
- The summary will be displayed in a text area below.
|
127 |
- You can download the video, audio, transcript or summary by clicking the respective download buttons.
|
128 |
"""
|
@@ -131,9 +135,7 @@ def main():
|
|
131 |
whisper_model = load_whisper("base")
|
132 |
|
133 |
url = c2.text_input("Enter the video URL")
|
134 |
-
|
135 |
-
if open_ai_key != "":
|
136 |
-
openai.api_key = open_ai_key
|
137 |
fetch_button = c2.button("Fetch")
|
138 |
st.session_state.setdefault("load_state", False)
|
139 |
|
@@ -141,10 +143,10 @@ def main():
|
|
141 |
st.session_state.load_state = True
|
142 |
|
143 |
if url:
|
144 |
-
process_video(url, whisper_model)
|
145 |
|
146 |
|
147 |
-
def process_video(url, whisper_model):
|
148 |
yt = YouTube(url)
|
149 |
video_id = yt.video_id
|
150 |
try:
|
@@ -176,7 +178,7 @@ def process_video(url, whisper_model):
|
|
176 |
col1, col2, col3, col4 = st.columns(4)
|
177 |
|
178 |
if "youtube" in url or "youtu.be" in url:
|
179 |
-
process_youtube_video(video_id, col3, emp, text_filename)
|
180 |
|
181 |
process_whisper_transcript(whisper_model, audio_filename, col4, text_filename)
|
182 |
|
@@ -191,7 +193,7 @@ def process_video(url, whisper_model):
|
|
191 |
get_media_download_link("audio", audio_filename)
|
192 |
|
193 |
|
194 |
-
def process_youtube_video(video_id, col, emp, text_filename):
|
195 |
try:
|
196 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
197 |
transcripts = [transcript for transcript in transcript_list]
|
@@ -221,7 +223,7 @@ def process_youtube_video(video_id, col, emp, text_filename):
|
|
221 |
st.download_button("Download Transcript", modified_text, text_filename)
|
222 |
with c2:
|
223 |
openai_summarization = summarise(
|
224 |
-
|
225 |
)
|
226 |
summarized_text = st.text_area(
|
227 |
"Summarized Transcript", openai_summarization, height=500
|
|
|
10 |
import ffmpeg
|
11 |
import re
|
12 |
import tempfile
|
13 |
+
from huggingface_hub import InferenceClient
|
14 |
|
15 |
st.set_page_config(layout="wide", initial_sidebar_state="collapsed")
|
16 |
|
|
|
28 |
{} \n\n Article:"""
|
29 |
|
30 |
|
31 |
+
LLM = {
|
32 |
+
"llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
33 |
+
|
34 |
+
{PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
35 |
+
|
36 |
+
|
37 |
+
""", 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"}
|
38 |
+
}
|
39 |
+
|
40 |
@st.cache_resource()
|
41 |
def load_whisper(model):
|
42 |
return whisper.load_model(model)
|
|
|
68 |
|
69 |
|
70 |
@st.cache_data
|
71 |
+
def summarise(prompt, llm):
|
72 |
+
model = InferenceClient(LLM[llm]["endpoint"])
|
73 |
+
user_message = LLM[llm]["prompt"].format(prompt)
|
74 |
+
return model.text_generation(user_message, max_new_tokens=1024)
|
75 |
+
|
|
|
|
|
|
|
|
|
|
|
76 |
|
77 |
|
78 |
def delete_files(video_filename, audio_filename):
|
|
|
126 |
- Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button.
|
127 |
- Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button.
|
128 |
- The transcript will be displayed in a text area below.
|
129 |
+
- A summary of the transcript will also be generated by the selected LLM.
|
130 |
- The summary will be displayed in a text area below.
|
131 |
- You can download the video, audio, transcript or summary by clicking the respective download buttons.
|
132 |
"""
|
|
|
135 |
whisper_model = load_whisper("base")
|
136 |
|
137 |
url = c2.text_input("Enter the video URL")
|
138 |
+
llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0)
|
|
|
|
|
139 |
fetch_button = c2.button("Fetch")
|
140 |
st.session_state.setdefault("load_state", False)
|
141 |
|
|
|
143 |
st.session_state.load_state = True
|
144 |
|
145 |
if url:
|
146 |
+
process_video(url, whisper_model, llm)
|
147 |
|
148 |
|
149 |
+
def process_video(url, whisper_model, llm):
|
150 |
yt = YouTube(url)
|
151 |
video_id = yt.video_id
|
152 |
try:
|
|
|
178 |
col1, col2, col3, col4 = st.columns(4)
|
179 |
|
180 |
if "youtube" in url or "youtu.be" in url:
|
181 |
+
process_youtube_video(video_id, col3, emp, text_filename, llm)
|
182 |
|
183 |
process_whisper_transcript(whisper_model, audio_filename, col4, text_filename)
|
184 |
|
|
|
193 |
get_media_download_link("audio", audio_filename)
|
194 |
|
195 |
|
196 |
+
def process_youtube_video(video_id, col, emp, text_filename, llm):
|
197 |
try:
|
198 |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
199 |
transcripts = [transcript for transcript in transcript_list]
|
|
|
223 |
st.download_button("Download Transcript", modified_text, text_filename)
|
224 |
with c2:
|
225 |
openai_summarization = summarise(
|
226 |
+
modified_text, llm
|
227 |
)
|
228 |
summarized_text = st.text_area(
|
229 |
"Summarized Transcript", openai_summarization, height=500
|
requirements.txt
CHANGED
@@ -5,5 +5,4 @@ pytube
|
|
5 |
youtube-transcript-api
|
6 |
openai-whisper
|
7 |
moviepy
|
8 |
-
|
9 |
-
openai==0.28
|
|
|
5 |
youtube-transcript-api
|
6 |
openai-whisper
|
7 |
moviepy
|
8 |
+
huggingface-hub
|
|