Spaces:
Sleeping
Sleeping
Rename APP.py to app.py
#2
by
anshu-man853
- opened
APP.py
DELETED
@@ -1,71 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
from textblob import TextBlob
|
3 |
-
from langchain.document_loaders import TextLoader #for textfiles
|
4 |
-
from langchain.text_splitter import CharacterTextSplitter #text splitter
|
5 |
-
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
|
6 |
-
from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
|
7 |
-
from langchain.chains.question_answering import load_qa_chain
|
8 |
-
from langchain import HuggingFaceHub
|
9 |
-
import rich
|
10 |
-
from rich.console import Console
|
11 |
-
from rich.panel import Panel
|
12 |
-
from rich import print
|
13 |
-
from langchain.document_loaders import TextLoader
|
14 |
-
# text splitter for create chunks
|
15 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
-
from langchain.chains.summarize import load_summarize_chain
|
17 |
-
import datetime
|
18 |
-
|
19 |
-
def You_sum(text):
|
20 |
-
url = input("Enter the url of video you want summary of")
|
21 |
-
print(myvideo.title)
|
22 |
-
print(myvideo.captions)
|
23 |
-
code = input("Enter the code you want: ")
|
24 |
-
print("Scraping subtitiles....")
|
25 |
-
sub = myvideo.captions[code]
|
26 |
-
#downloading the files in SRT format
|
27 |
-
caption = sub.generate_srt_captions()
|
28 |
-
def clean_sub(sub_list):
|
29 |
-
lines = sub_list
|
30 |
-
text = ''
|
31 |
-
for line in lines:
|
32 |
-
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
|
33 |
-
text += ' ' + line.rstrip('\n')
|
34 |
-
text = text.lstrip()
|
35 |
-
#print(text)
|
36 |
-
return text
|
37 |
-
print("transform subtitles to text: ")
|
38 |
-
srt_list = str(caption).split("\n") #generates a list of all the lines
|
39 |
-
final_text = clean_sub(srt_list)
|
40 |
-
doc = summarization_text
|
41 |
-
summaries = []
|
42 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
43 |
-
# Set a really small chunk size, just to show.
|
44 |
-
chunk_size = 2500,
|
45 |
-
chunk_overlap = 20,
|
46 |
-
length_function = len,
|
47 |
-
)
|
48 |
-
texts = text_splitter.create_documents([doc])
|
49 |
-
#print(texts)
|
50 |
-
# Call the Hugging Face API to run the Summarization Chain
|
51 |
-
llm_id = 'MBZUAI/LaMini-T5-223M'
|
52 |
-
import datetime
|
53 |
-
summaries = []
|
54 |
-
start = datetime.datetime.now() #not used now but useful
|
55 |
-
console.print("[yellow bold] Inizializing Summarization Chain")
|
56 |
-
# Call the Chain and run it
|
57 |
-
llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
|
58 |
-
chain = load_summarize_chain(llm, chain_type="map_reduce")
|
59 |
-
summary = chain.run(texts)
|
60 |
-
summaries.append(summary)
|
61 |
-
# print the results with rich text format
|
62 |
-
console.print("[bold green]Summary for: ", myvideo.title)
|
63 |
-
console.print(f"[italic black] with {llm_id} \n")
|
64 |
-
# calculate the elapsed time and print to a Rich Console
|
65 |
-
stop = datetime.datetime.now() #not used now but useful
|
66 |
-
delta = stop-start
|
67 |
-
print(Panel(str(summaries[0]), title='AI Summarization'))
|
68 |
-
console.print(f"[red bold]Summarization completed in {delta}")
|
69 |
-
|
70 |
-
iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
|
71 |
-
iface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import youtube_dl
|
4 |
+
|
5 |
+
# Load the 'falcon-7b-instruct' model for summarization
|
6 |
+
model_name = 'falcon-7b-instruct'
|
7 |
+
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
|
8 |
+
|
9 |
+
# Function to extract YouTube transcript given a video URL
|
10 |
+
def extract_youtube_transcript(url):
|
11 |
+
ydl_opts = {
|
12 |
+
'writesubtitles': True,
|
13 |
+
'subtitleslangs': ['en'],
|
14 |
+
'skip_download': True,
|
15 |
+
'ignoreerrors': True
|
16 |
+
}
|
17 |
+
try:
|
18 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
19 |
+
info = ydl.extract_info(url, download=False)
|
20 |
+
if 'subtitles' in info:
|
21 |
+
for subtitle in info['subtitles']:
|
22 |
+
if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
|
23 |
+
return subtitle['url']
|
24 |
+
except Exception as e:
|
25 |
+
print(f"An error occurred while fetching the transcript: {e}")
|
26 |
+
return None
|
27 |
+
|
28 |
+
# Define the Gradio interface
|
29 |
+
def summarize_youtube_transcript(url):
|
30 |
+
transcript_url = extract_youtube_transcript(url)
|
31 |
+
if transcript_url:
|
32 |
+
# Fetch the transcript
|
33 |
+
try:
|
34 |
+
with youtube_dl.YoutubeDL({}) as ydl:
|
35 |
+
transcript_info = ydl.extract_info(transcript_url, download=False)
|
36 |
+
transcript = transcript_info['subtitles']['en'][0]['text']
|
37 |
+
summary = summarizer(transcript, max_length=300, min_length=50, do_sample=False)
|
38 |
+
return summary[0]['summary_text']
|
39 |
+
except Exception as e:
|
40 |
+
print(f"An error occurred while processing the transcript: {e}")
|
41 |
+
return "Unable to fetch or process the transcript."
|
42 |
+
|
43 |
+
# Create the Gradio interface
|
44 |
+
iface = gr.Interface(
|
45 |
+
fn=summarize_youtube_transcript,
|
46 |
+
inputs=gr.inputs.Textbox(label="YouTube URL"),
|
47 |
+
outputs=gr.outputs.Textbox(label="Summary"),
|
48 |
+
title="YouTube Transcript Summarizer",
|
49 |
+
description="Enter a YouTube URL and get a summary of the transcript.",
|
50 |
+
theme="huggingface",
|
51 |
+
)
|
52 |
+
|
53 |
+
# Launch the interface
|
54 |
+
iface.launch(share=True)
|