Spaces:

Ankoorkashyap
/

YOUSUM

Sleeping

App Files Files Community

Ankoorkashyap commited on Jun 22, 2023

Commit

4d281ef

1 Parent(s): 2b603d4

Create APP.py

Browse files

Files changed (1) hide show

APP.py +71 -0

APP.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import gradio as gr
+from textblob import TextBlob
+from langchain.document_loaders import TextLoader  #for textfiles
+from langchain.text_splitter import CharacterTextSplitter #text splitter
+from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
+from langchain.vectorstores import FAISS  #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
+from langchain.chains.question_answering import load_qa_chain
+from langchain import HuggingFaceHub
+import rich
+from rich.console import Console
+from rich.panel import Panel
+from rich import print
+from langchain.document_loaders import TextLoader
+# text splitter for create chunks
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.chains.summarize import load_summarize_chain
+import datetime
+def You_sum(text):
+    url = input("Enter the url of video you want summary of")
+    print(myvideo.title)
+    print(myvideo.captions)
+    code = input("Enter the code you want: ")
+    print("Scraping subtitiles....")
+    sub = myvideo.captions[code]
+    #downloading the files in SRT format
+    caption = sub.generate_srt_captions()
+    def clean_sub(sub_list):
+      lines = sub_list
+      text = ''
+      for line in lines:
+        if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
+            text += ' ' + line.rstrip('\n')
+        text = text.lstrip()
+  #print(text)
+      return text
+    print("transform subtitles to text: ")
+    srt_list = str(caption).split("\n") #generates a list of all the lines
+    final_text = clean_sub(srt_list)
+    doc = summarization_text
+    summaries = []
+    text_splitter = RecursiveCharacterTextSplitter(
+    # Set a really small chunk size, just to show.
+        chunk_size = 2500,
+        chunk_overlap  = 20,
+        length_function = len,
+    )
+    texts = text_splitter.create_documents([doc])
+    #print(texts)
+    # Call the Hugging Face API to run the Summarization Chain
+    llm_id = 'MBZUAI/LaMini-T5-223M'
+    import datetime
+    summaries = []
+    start = datetime.datetime.now() #not used now but useful
+    console.print("[yellow bold] Inizializing Summarization Chain")
+    # Call the Chain and run it
+    llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
+    chain = load_summarize_chain(llm, chain_type="map_reduce")
+    summary = chain.run(texts)
+    summaries.append(summary)
+    # print the results with rich text format
+    console.print("[bold green]Summary for: ", myvideo.title)
+    console.print(f"[italic black] with {llm_id} \n")
+    # calculate the elapsed time and print to a Rich Console
+    stop = datetime.datetime.now() #not used now but useful
+    delta = stop-start
+    print(Panel(str(summaries[0]), title='AI Summarization'))
+    console.print(f"[red bold]Summarization completed in {delta}")
+iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
+iface.launch()