YOUSUM / APP.py
Ankoorkashyap's picture
Create APP.py
4d281ef
raw
history blame
2.99 kB
import gradio as gr
from textblob import TextBlob
from langchain.document_loaders import TextLoader #for textfiles
from langchain.text_splitter import CharacterTextSplitter #text splitter
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub
import rich
from rich.console import Console
from rich.panel import Panel
from rich import print
from langchain.document_loaders import TextLoader
# text splitter for create chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.summarize import load_summarize_chain
import datetime
def You_sum(text):
url = input("Enter the url of video you want summary of")
print(myvideo.title)
print(myvideo.captions)
code = input("Enter the code you want: ")
print("Scraping subtitiles....")
sub = myvideo.captions[code]
#downloading the files in SRT format
caption = sub.generate_srt_captions()
def clean_sub(sub_list):
lines = sub_list
text = ''
for line in lines:
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
text += ' ' + line.rstrip('\n')
text = text.lstrip()
#print(text)
return text
print("transform subtitles to text: ")
srt_list = str(caption).split("\n") #generates a list of all the lines
final_text = clean_sub(srt_list)
doc = summarization_text
summaries = []
text_splitter = RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size = 2500,
chunk_overlap = 20,
length_function = len,
)
texts = text_splitter.create_documents([doc])
#print(texts)
# Call the Hugging Face API to run the Summarization Chain
llm_id = 'MBZUAI/LaMini-T5-223M'
import datetime
summaries = []
start = datetime.datetime.now() #not used now but useful
console.print("[yellow bold] Inizializing Summarization Chain")
# Call the Chain and run it
llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
chain = load_summarize_chain(llm, chain_type="map_reduce")
summary = chain.run(texts)
summaries.append(summary)
# print the results with rich text format
console.print("[bold green]Summary for: ", myvideo.title)
console.print(f"[italic black] with {llm_id} \n")
# calculate the elapsed time and print to a Rich Console
stop = datetime.datetime.now() #not used now but useful
delta = stop-start
print(Panel(str(summaries[0]), title='AI Summarization'))
console.print(f"[red bold]Summarization completed in {delta}")
iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
iface.launch()