Spaces:
Sleeping
Sleeping
Commit
·
4d281ef
1
Parent(s):
2b603d4
Create APP.py
Browse files
APP.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from textblob import TextBlob
|
3 |
+
from langchain.document_loaders import TextLoader #for textfiles
|
4 |
+
from langchain.text_splitter import CharacterTextSplitter #text splitter
|
5 |
+
from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
|
6 |
+
from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
|
7 |
+
from langchain.chains.question_answering import load_qa_chain
|
8 |
+
from langchain import HuggingFaceHub
|
9 |
+
import rich
|
10 |
+
from rich.console import Console
|
11 |
+
from rich.panel import Panel
|
12 |
+
from rich import print
|
13 |
+
from langchain.document_loaders import TextLoader
|
14 |
+
# text splitter for create chunks
|
15 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
16 |
+
from langchain.chains.summarize import load_summarize_chain
|
17 |
+
import datetime
|
18 |
+
|
19 |
+
def You_sum(text):
|
20 |
+
url = input("Enter the url of video you want summary of")
|
21 |
+
print(myvideo.title)
|
22 |
+
print(myvideo.captions)
|
23 |
+
code = input("Enter the code you want: ")
|
24 |
+
print("Scraping subtitiles....")
|
25 |
+
sub = myvideo.captions[code]
|
26 |
+
#downloading the files in SRT format
|
27 |
+
caption = sub.generate_srt_captions()
|
28 |
+
def clean_sub(sub_list):
|
29 |
+
lines = sub_list
|
30 |
+
text = ''
|
31 |
+
for line in lines:
|
32 |
+
if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
|
33 |
+
text += ' ' + line.rstrip('\n')
|
34 |
+
text = text.lstrip()
|
35 |
+
#print(text)
|
36 |
+
return text
|
37 |
+
print("transform subtitles to text: ")
|
38 |
+
srt_list = str(caption).split("\n") #generates a list of all the lines
|
39 |
+
final_text = clean_sub(srt_list)
|
40 |
+
doc = summarization_text
|
41 |
+
summaries = []
|
42 |
+
text_splitter = RecursiveCharacterTextSplitter(
|
43 |
+
# Set a really small chunk size, just to show.
|
44 |
+
chunk_size = 2500,
|
45 |
+
chunk_overlap = 20,
|
46 |
+
length_function = len,
|
47 |
+
)
|
48 |
+
texts = text_splitter.create_documents([doc])
|
49 |
+
#print(texts)
|
50 |
+
# Call the Hugging Face API to run the Summarization Chain
|
51 |
+
llm_id = 'MBZUAI/LaMini-T5-223M'
|
52 |
+
import datetime
|
53 |
+
summaries = []
|
54 |
+
start = datetime.datetime.now() #not used now but useful
|
55 |
+
console.print("[yellow bold] Inizializing Summarization Chain")
|
56 |
+
# Call the Chain and run it
|
57 |
+
llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
|
58 |
+
chain = load_summarize_chain(llm, chain_type="map_reduce")
|
59 |
+
summary = chain.run(texts)
|
60 |
+
summaries.append(summary)
|
61 |
+
# print the results with rich text format
|
62 |
+
console.print("[bold green]Summary for: ", myvideo.title)
|
63 |
+
console.print(f"[italic black] with {llm_id} \n")
|
64 |
+
# calculate the elapsed time and print to a Rich Console
|
65 |
+
stop = datetime.datetime.now() #not used now but useful
|
66 |
+
delta = stop-start
|
67 |
+
print(Panel(str(summaries[0]), title='AI Summarization'))
|
68 |
+
console.print(f"[red bold]Summarization completed in {delta}")
|
69 |
+
|
70 |
+
iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
|
71 |
+
iface.launch()
|