Spaces:

Ankoorkashyap
/

YOUSUM

Sleeping

App Files Files Community

YOUSUM / APP.py

Ankoorkashyap

Create APP.py

4d281ef almost 2 years ago

raw

history blame

2.99 kB

	import gradio as gr
	from textblob import TextBlob
	from langchain.document_loaders import TextLoader #for textfiles
	from langchain.text_splitter import CharacterTextSplitter #text splitter
	from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
	from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
	from langchain.chains.question_answering import load_qa_chain
	from langchain import HuggingFaceHub
	import rich
	from rich.console import Console
	from rich.panel import Panel
	from rich import print
	from langchain.document_loaders import TextLoader
	# text splitter for create chunks
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.chains.summarize import load_summarize_chain
	import datetime

	def You_sum(text):
	url = input("Enter the url of video you want summary of")
	print(myvideo.title)
	print(myvideo.captions)
	code = input("Enter the code you want: ")
	print("Scraping subtitiles....")
	sub = myvideo.captions[code]
	#downloading the files in SRT format
	caption = sub.generate_srt_captions()
	def clean_sub(sub_list):
	lines = sub_list
	text = ''
	for line in lines:
	if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
	text += ' ' + line.rstrip('\n')
	text = text.lstrip()
	#print(text)
	return text
	print("transform subtitles to text: ")
	srt_list = str(caption).split("\n") #generates a list of all the lines
	final_text = clean_sub(srt_list)
	doc = summarization_text
	summaries = []
	text_splitter = RecursiveCharacterTextSplitter(
	# Set a really small chunk size, just to show.
	chunk_size = 2500,
	chunk_overlap = 20,
	length_function = len,
	)
	texts = text_splitter.create_documents([doc])
	#print(texts)
	# Call the Hugging Face API to run the Summarization Chain
	llm_id = 'MBZUAI/LaMini-T5-223M'
	import datetime
	summaries = []
	start = datetime.datetime.now() #not used now but useful
	console.print("[yellow bold] Inizializing Summarization Chain")
	# Call the Chain and run it
	llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
	chain = load_summarize_chain(llm, chain_type="map_reduce")
	summary = chain.run(texts)
	summaries.append(summary)
	# print the results with rich text format
	console.print("[bold green]Summary for: ", myvideo.title)
	console.print(f"[italic black] with {llm_id} \n")
	# calculate the elapsed time and print to a Rich Console
	stop = datetime.datetime.now() #not used now but useful
	delta = stop-start
	print(Panel(str(summaries[0]), title='AI Summarization'))
	console.print(f"[red bold]Summarization completed in {delta}")

	iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
	iface.launch()