Ankoorkashyap commited on
Commit
4d281ef
·
1 Parent(s): 2b603d4

Create APP.py

Browse files
Files changed (1) hide show
  1. APP.py +71 -0
APP.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from textblob import TextBlob
3
+ from langchain.document_loaders import TextLoader #for textfiles
4
+ from langchain.text_splitter import CharacterTextSplitter #text splitter
5
+ from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
6
+ from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
7
+ from langchain.chains.question_answering import load_qa_chain
8
+ from langchain import HuggingFaceHub
9
+ import rich
10
+ from rich.console import Console
11
+ from rich.panel import Panel
12
+ from rich import print
13
+ from langchain.document_loaders import TextLoader
14
+ # text splitter for create chunks
15
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
16
+ from langchain.chains.summarize import load_summarize_chain
17
+ import datetime
18
+
19
+ def You_sum(text):
20
+ url = input("Enter the url of video you want summary of")
21
+ print(myvideo.title)
22
+ print(myvideo.captions)
23
+ code = input("Enter the code you want: ")
24
+ print("Scraping subtitiles....")
25
+ sub = myvideo.captions[code]
26
+ #downloading the files in SRT format
27
+ caption = sub.generate_srt_captions()
28
+ def clean_sub(sub_list):
29
+ lines = sub_list
30
+ text = ''
31
+ for line in lines:
32
+ if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
33
+ text += ' ' + line.rstrip('\n')
34
+ text = text.lstrip()
35
+ #print(text)
36
+ return text
37
+ print("transform subtitles to text: ")
38
+ srt_list = str(caption).split("\n") #generates a list of all the lines
39
+ final_text = clean_sub(srt_list)
40
+ doc = summarization_text
41
+ summaries = []
42
+ text_splitter = RecursiveCharacterTextSplitter(
43
+ # Set a really small chunk size, just to show.
44
+ chunk_size = 2500,
45
+ chunk_overlap = 20,
46
+ length_function = len,
47
+ )
48
+ texts = text_splitter.create_documents([doc])
49
+ #print(texts)
50
+ # Call the Hugging Face API to run the Summarization Chain
51
+ llm_id = 'MBZUAI/LaMini-T5-223M'
52
+ import datetime
53
+ summaries = []
54
+ start = datetime.datetime.now() #not used now but useful
55
+ console.print("[yellow bold] Inizializing Summarization Chain")
56
+ # Call the Chain and run it
57
+ llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
58
+ chain = load_summarize_chain(llm, chain_type="map_reduce")
59
+ summary = chain.run(texts)
60
+ summaries.append(summary)
61
+ # print the results with rich text format
62
+ console.print("[bold green]Summary for: ", myvideo.title)
63
+ console.print(f"[italic black] with {llm_id} \n")
64
+ # calculate the elapsed time and print to a Rich Console
65
+ stop = datetime.datetime.now() #not used now but useful
66
+ delta = stop-start
67
+ print(Panel(str(summaries[0]), title='AI Summarization'))
68
+ console.print(f"[red bold]Summarization completed in {delta}")
69
+
70
+ iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
71
+ iface.launch()