Spaces:
Sleeping
Sleeping
import streamlit as st | |
import requests | |
import base64 | |
import os | |
from moviepy.editor import VideoFileClip | |
from pytube import YouTube | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api._errors import CouldNotRetrieveTranscript | |
import whisper | |
import ffmpeg | |
import re | |
import tempfile | |
from huggingface_hub import InferenceClient | |
st.set_page_config(layout="wide", initial_sidebar_state="collapsed") | |
PROMPT = """Act as the author and provide a comprehensive detailed article in the same language as the transcript | |
in markdown format that has a H1 main title(example "# <this is a title> ") and broken down into H2 subtitles (example "## <this is a title> ") for the following transcript | |
You must follow the rules: | |
- Write the article in markdown format | |
- Create a main title for the article as markdown H1 and break the article into subtitles where each subtitle is markdown H2 | |
- Article must be in the same language as the transcript | |
- summary should be informative and act as a replacement for the original transcript to the point that the user doesn't have to go back to read the transcript | |
- Summary should not mention the author or speaker at all should act as your independent writing without referencing the original transcript or speaker. | |
- You can use bullet points within the article | |
Transcript: | |
{} \n\n Article:""" | |
LLM = { | |
"llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|> | |
{PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> | |
""", 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"} | |
} | |
def load_whisper(model): | |
return whisper.load_model(model) | |
def download_video(url): | |
if "youtube" in url or "youtu.be" in url: | |
yt = YouTube(url) | |
video = yt.streams.get_highest_resolution() | |
filename = video.download() | |
else: | |
response = requests.get(url, stream=True) | |
filename = url.split("/")[-1] | |
with open(filename, "wb") as file: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
file.write(chunk) | |
return filename | |
def convert_to_audio(video_filename): | |
video = VideoFileClip(video_filename) | |
audio_filename = video_filename.replace(".mp4", ".mp3") | |
audio = video.audio | |
audio.write_audiofile(audio_filename, codec="mp3") | |
return audio_filename | |
def summarise(prompt, llm): | |
model = InferenceClient(LLM[llm]["endpoint"]) | |
user_message = LLM[llm]["prompt"].format(prompt) | |
return model.text_generation(user_message, max_new_tokens=1024) | |
def delete_files(video_filename, audio_filename): | |
delete_file(video_filename) | |
delete_file(audio_filename) | |
def delete_file(filename): | |
if os.path.exists(filename): | |
os.remove(filename) | |
st.info(f"File '{os.path.basename(filename)}' deleted from the server.") | |
def transcribe_whisper(_model, audio_filepath): | |
return _model.transcribe(audio_filepath)["text"] | |
def get_media_download_link(media_type, file_path): | |
with open(file_path, "rb") as file: | |
contents = file.read() | |
encoded = base64.b64encode(contents).decode("utf-8") | |
media_href = f"data:file/{media_type};base64,{encoded}" | |
st.markdown( | |
f'<a href="{media_href}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>', | |
unsafe_allow_html=True, | |
) | |
def generate_summaries(_summarizer, text, min_length=50, max_length=500): | |
paragraphs = text.split("\n\n") | |
summaries = [] | |
for paragraph in paragraphs: | |
summary = _summarizer( | |
paragraph, max_length=max_length, min_length=min_length, do_sample=False | |
) | |
summaries.append(summary[0]["summary_text"].strip()) | |
return "\n\n".join(summaries) | |
def main(): | |
st.title("VidScripter") | |
st.write("#### A One Stop Solution to Video Transcription") | |
c1, c2 = st.columns(2) | |
c1.write( | |
""" | |
- Enter the video URL in the text input box. | |
- Click the **Fetch** button to fetch the video. | |
- Once the video is fetched, you can perform the following actions: | |
- Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button. | |
- Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button. | |
- The transcript will be displayed in a text area below. | |
- A summary of the transcript will also be generated by the selected LLM. | |
- The summary will be displayed in a text area below. | |
- You can download the video, audio, transcript or summary by clicking the respective download buttons. | |
""" | |
) | |
whisper_model = load_whisper("base") | |
url = c2.text_input("Enter the video URL") | |
llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0) | |
fetch_button = c2.button("Fetch") | |
st.session_state.setdefault("load_state", False) | |
if fetch_button or st.session_state.load_state: | |
st.session_state.load_state = True | |
if url: | |
process_video(url, whisper_model, llm) | |
def process_video(url, whisper_model, llm): | |
yt = YouTube(url) | |
video_id = yt.video_id | |
try: | |
video_filename = download_video(url) | |
st.success("Video fetched successfully") | |
except Exception: | |
video_filename = None | |
st.warning("Video could not be fetched") | |
try: | |
audio_filename = ( | |
convert_to_audio(video_filename) if video_filename is not None else None | |
) | |
if video_filename is not None: | |
st.success("Audio converted successfully") | |
else: | |
st.info("No Video to convert into Audio") | |
except Exception: | |
audio_filename = None | |
st.warning("Audio coud not be converted") | |
text_filename = ( | |
os.path.basename(video_filename).replace(".mp4", ".txt") | |
if video_filename is not None | |
else "transcript.txt" | |
) | |
emp = st.empty() | |
col1, col2, col3, col4 = st.columns(4) | |
if "youtube" in url or "youtu.be" in url: | |
process_youtube_video(video_id, col3, emp, text_filename, llm) | |
process_whisper_transcript(whisper_model, audio_filename, col4, text_filename) | |
with col1: | |
if video_filename is not None and st.button("Download Video"): | |
with st.spinner("Encoding Video"): | |
get_media_download_link("video", video_filename) | |
with col2: | |
if audio_filename is not None and st.button("Download Audio"): | |
with st.spinner("Encoding Audio"): | |
get_media_download_link("audio", audio_filename) | |
def process_youtube_video(video_id, col, emp, text_filename, llm): | |
try: | |
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
transcripts = [transcript for transcript in transcript_list] | |
if transcripts: | |
transcript_options = { | |
f"{transcript.language} ({transcript.language_code})": transcript | |
for transcript in transcripts | |
} | |
transcript_option = emp.selectbox( | |
"Select a transcript", list(transcript_options.keys()) | |
) | |
selected_transcript = transcript_options[transcript_option] | |
st.session_state.setdefault("api_transcript", False) | |
if col.button("Fetch Transcript") or st.session_state.api_transcript: | |
st.session_state.api_transcript = True | |
transcript_text = selected_transcript.fetch() | |
transcript_text = "\n".join( | |
[re.sub("\s+", " ", chunk["text"]) for chunk in transcript_text] | |
) | |
c1, c2 = st.columns(2) | |
with c1: | |
modified_text = st.text_area( | |
"Transcript", transcript_text, height=500 | |
) | |
st.download_button("Download Transcript", modified_text, text_filename) | |
with c2: | |
openai_summarization = summarise( | |
modified_text, llm | |
) | |
summarized_text = st.text_area( | |
"Summarized Transcript", openai_summarization, height=500 | |
) | |
st.download_button("Download Summary", summarized_text, text_filename) | |
except CouldNotRetrieveTranscript: | |
emp.warning("Could Not Retrieve API Transcripts for this video.") | |
except Exception as e: | |
emp.warning(f"Error Fetching API Transcripts for this video. {e}") | |
def process_whisper_transcript(whisper_model, audio_filename, col, text_filename): | |
if audio_filename is not None: | |
st.session_state.setdefault("whisper_transcript", False) | |
if col.button("Transcribe (Whisper)") or st.session_state.whisper_transcript: | |
st.session_state.whisper_transcript = True | |
whisper_text = transcribe_whisper(whisper_model, audio_filename) | |
modified_text = st.text_area("Transcript", whisper_text, height=500) | |
st.download_button("Download", modified_text, text_filename) | |
if __name__ == "__main__": | |
main() |