Spaces:

Ankoorkashyap
/

YOUSUM

Sleeping

App Files Files Community

Rename APP.py to app.py

by anshu-man853 - opened Jun 24, 2023

base: refs/heads/main

←

from: refs/pr/2

Discussion Files changed

+54

-71

Files changed (2) hide show

APP.py +0 -71
app.py +54 -0

APP.py DELETED Viewed

@@ -1,71 +0,0 @@
-import gradio as gr
-from textblob import TextBlob
-from langchain.document_loaders import TextLoader  #for textfiles
-from langchain.text_splitter import CharacterTextSplitter #text splitter
-from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
-from langchain.vectorstores import FAISS  #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
-from langchain.chains.question_answering import load_qa_chain
-from langchain import HuggingFaceHub
-import rich
-from rich.console import Console
-from rich.panel import Panel
-from rich import print
-from langchain.document_loaders import TextLoader
-# text splitter for create chunks
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.chains.summarize import load_summarize_chain
-import datetime
-def You_sum(text):
-    url = input("Enter the url of video you want summary of")
-    print(myvideo.title)
-    print(myvideo.captions)
-    code = input("Enter the code you want: ")
-    print("Scraping subtitiles....")
-    sub = myvideo.captions[code]
-    #downloading the files in SRT format
-    caption = sub.generate_srt_captions()
-    def clean_sub(sub_list):
-      lines = sub_list
-      text = ''
-      for line in lines:
-        if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
-            text += ' ' + line.rstrip('\n')
-        text = text.lstrip()
-  #print(text)
-      return text
-    print("transform subtitles to text: ")
-    srt_list = str(caption).split("\n") #generates a list of all the lines
-    final_text = clean_sub(srt_list)
-    doc = summarization_text
-    summaries = []
-    text_splitter = RecursiveCharacterTextSplitter(
-    # Set a really small chunk size, just to show.
-        chunk_size = 2500,
-        chunk_overlap  = 20,
-        length_function = len,
-    )
-    texts = text_splitter.create_documents([doc])
-    #print(texts)
-    # Call the Hugging Face API to run the Summarization Chain
-    llm_id = 'MBZUAI/LaMini-T5-223M'
-    import datetime
-    summaries = []
-    start = datetime.datetime.now() #not used now but useful
-    console.print("[yellow bold] Inizializing Summarization Chain")
-    # Call the Chain and run it
-    llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
-    chain = load_summarize_chain(llm, chain_type="map_reduce")
-    summary = chain.run(texts)
-    summaries.append(summary)
-    # print the results with rich text format
-    console.print("[bold green]Summary for: ", myvideo.title)
-    console.print(f"[italic black] with {llm_id} \n")
-    # calculate the elapsed time and print to a Rich Console
-    stop = datetime.datetime.now() #not used now but useful
-    delta = stop-start
-    print(Panel(str(summaries[0]), title='AI Summarization'))
-    console.print(f"[red bold]Summarization completed in {delta}")
-iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
-iface.launch()

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+from transformers import pipeline
+import youtube_dl
+# Load the 'falcon-7b-instruct' model for summarization
+model_name = 'falcon-7b-instruct'
+summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
+# Function to extract YouTube transcript given a video URL
+def extract_youtube_transcript(url):
+    ydl_opts = {
+        'writesubtitles': True,
+        'subtitleslangs': ['en'],
+        'skip_download': True,
+        'ignoreerrors': True
+    }
+    try:
+        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=False)
+            if 'subtitles' in info:
+                for subtitle in info['subtitles']:
+                    if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
+                        return subtitle['url']
+    except Exception as e:
+        print(f"An error occurred while fetching the transcript: {e}")
+    return None
+# Define the Gradio interface
+def summarize_youtube_transcript(url):
+    transcript_url = extract_youtube_transcript(url)
+    if transcript_url:
+        # Fetch the transcript
+        try:
+            with youtube_dl.YoutubeDL({}) as ydl:
+                transcript_info = ydl.extract_info(transcript_url, download=False)
+                transcript = transcript_info['subtitles']['en'][0]['text']
+                summary = summarizer(transcript, max_length=300, min_length=50, do_sample=False)
+                return summary[0]['summary_text']
+        except Exception as e:
+            print(f"An error occurred while processing the transcript: {e}")
+    return "Unable to fetch or process the transcript."
+# Create the Gradio interface
+iface = gr.Interface(
+    fn=summarize_youtube_transcript,
+    inputs=gr.inputs.Textbox(label="YouTube URL"),
+    outputs=gr.outputs.Textbox(label="Summary"),
+    title="YouTube Transcript Summarizer",
+    description="Enter a YouTube URL and get a summary of the transcript.",
+    theme="huggingface",
+)
+# Launch the interface
+iface.launch(share=True)