Spaces:
Sleeping
Sleeping
import gradio as gr | |
from textblob import TextBlob | |
from langchain.document_loaders import TextLoader #for textfiles | |
from langchain.text_splitter import CharacterTextSplitter #text splitter | |
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models | |
from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain | |
from langchain.chains.question_answering import load_qa_chain | |
from langchain import HuggingFaceHub | |
import rich | |
from rich.console import Console | |
from rich.panel import Panel | |
from rich import print | |
from langchain.document_loaders import TextLoader | |
# text splitter for create chunks | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains.summarize import load_summarize_chain | |
import datetime | |
def You_sum(text): | |
url = input("Enter the url of video you want summary of") | |
print(myvideo.title) | |
print(myvideo.captions) | |
code = input("Enter the code you want: ") | |
print("Scraping subtitiles....") | |
sub = myvideo.captions[code] | |
#downloading the files in SRT format | |
caption = sub.generate_srt_captions() | |
def clean_sub(sub_list): | |
lines = sub_list | |
text = '' | |
for line in lines: | |
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None: | |
text += ' ' + line.rstrip('\n') | |
text = text.lstrip() | |
#print(text) | |
return text | |
print("transform subtitles to text: ") | |
srt_list = str(caption).split("\n") #generates a list of all the lines | |
final_text = clean_sub(srt_list) | |
doc = summarization_text | |
summaries = [] | |
text_splitter = RecursiveCharacterTextSplitter( | |
# Set a really small chunk size, just to show. | |
chunk_size = 2500, | |
chunk_overlap = 20, | |
length_function = len, | |
) | |
texts = text_splitter.create_documents([doc]) | |
#print(texts) | |
# Call the Hugging Face API to run the Summarization Chain | |
llm_id = 'MBZUAI/LaMini-T5-223M' | |
import datetime | |
summaries = [] | |
start = datetime.datetime.now() #not used now but useful | |
console.print("[yellow bold] Inizializing Summarization Chain") | |
# Call the Chain and run it | |
llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000}) | |
chain = load_summarize_chain(llm, chain_type="map_reduce") | |
summary = chain.run(texts) | |
summaries.append(summary) | |
# print the results with rich text format | |
console.print("[bold green]Summary for: ", myvideo.title) | |
console.print(f"[italic black] with {llm_id} \n") | |
# calculate the elapsed time and print to a Rich Console | |
stop = datetime.datetime.now() #not used now but useful | |
delta = stop-start | |
print(Panel(str(summaries[0]), title='AI Summarization')) | |
console.print(f"[red bold]Summarization completed in {delta}") | |
iface = gr.Interface(fn=You_sum, inputs="text", outputs="text") | |
iface.launch() |