Rename APP.py to app.py

#2
by anshu-man853 - opened
Files changed (2) hide show
  1. APP.py +0 -71
  2. app.py +54 -0
APP.py DELETED
@@ -1,71 +0,0 @@
1
- import gradio as gr
2
- from textblob import TextBlob
3
- from langchain.document_loaders import TextLoader #for textfiles
4
- from langchain.text_splitter import CharacterTextSplitter #text splitter
5
- from langchain.embeddings import HuggingFaceEmbeddings #for using HugginFace models
6
- from langchain.vectorstores import FAISS #facebook vectorizationfrom langchain.chains.question_answering import load_qa_chain
7
- from langchain.chains.question_answering import load_qa_chain
8
- from langchain import HuggingFaceHub
9
- import rich
10
- from rich.console import Console
11
- from rich.panel import Panel
12
- from rich import print
13
- from langchain.document_loaders import TextLoader
14
- # text splitter for create chunks
15
- from langchain.text_splitter import RecursiveCharacterTextSplitter
16
- from langchain.chains.summarize import load_summarize_chain
17
- import datetime
18
-
19
- def You_sum(text):
20
- url = input("Enter the url of video you want summary of")
21
- print(myvideo.title)
22
- print(myvideo.captions)
23
- code = input("Enter the code you want: ")
24
- print("Scraping subtitiles....")
25
- sub = myvideo.captions[code]
26
- #downloading the files in SRT format
27
- caption = sub.generate_srt_captions()
28
- def clean_sub(sub_list):
29
- lines = sub_list
30
- text = ''
31
- for line in lines:
32
- if re.search('^[0-9]+$', line) is None and re.search('^[0-9]{2}:[0-9]{2}:[0-9]{2}', line) is None and re.search('^$', line) is None:
33
- text += ' ' + line.rstrip('\n')
34
- text = text.lstrip()
35
- #print(text)
36
- return text
37
- print("transform subtitles to text: ")
38
- srt_list = str(caption).split("\n") #generates a list of all the lines
39
- final_text = clean_sub(srt_list)
40
- doc = summarization_text
41
- summaries = []
42
- text_splitter = RecursiveCharacterTextSplitter(
43
- # Set a really small chunk size, just to show.
44
- chunk_size = 2500,
45
- chunk_overlap = 20,
46
- length_function = len,
47
- )
48
- texts = text_splitter.create_documents([doc])
49
- #print(texts)
50
- # Call the Hugging Face API to run the Summarization Chain
51
- llm_id = 'MBZUAI/LaMini-T5-223M'
52
- import datetime
53
- summaries = []
54
- start = datetime.datetime.now() #not used now but useful
55
- console.print("[yellow bold] Inizializing Summarization Chain")
56
- # Call the Chain and run it
57
- llm=HuggingFaceHub(repo_id=llm_id, model_kwargs={"temperature":1, "max_length":23000})
58
- chain = load_summarize_chain(llm, chain_type="map_reduce")
59
- summary = chain.run(texts)
60
- summaries.append(summary)
61
- # print the results with rich text format
62
- console.print("[bold green]Summary for: ", myvideo.title)
63
- console.print(f"[italic black] with {llm_id} \n")
64
- # calculate the elapsed time and print to a Rich Console
65
- stop = datetime.datetime.now() #not used now but useful
66
- delta = stop-start
67
- print(Panel(str(summaries[0]), title='AI Summarization'))
68
- console.print(f"[red bold]Summarization completed in {delta}")
69
-
70
- iface = gr.Interface(fn=You_sum, inputs="text", outputs="text")
71
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import youtube_dl
4
+
5
+ # Load the 'falcon-7b-instruct' model for summarization
6
+ model_name = 'falcon-7b-instruct'
7
+ summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
8
+
9
+ # Function to extract YouTube transcript given a video URL
10
+ def extract_youtube_transcript(url):
11
+ ydl_opts = {
12
+ 'writesubtitles': True,
13
+ 'subtitleslangs': ['en'],
14
+ 'skip_download': True,
15
+ 'ignoreerrors': True
16
+ }
17
+ try:
18
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
19
+ info = ydl.extract_info(url, download=False)
20
+ if 'subtitles' in info:
21
+ for subtitle in info['subtitles']:
22
+ if subtitle['ext'] == 'vtt' and subtitle['language'] == 'en':
23
+ return subtitle['url']
24
+ except Exception as e:
25
+ print(f"An error occurred while fetching the transcript: {e}")
26
+ return None
27
+
28
+ # Define the Gradio interface
29
+ def summarize_youtube_transcript(url):
30
+ transcript_url = extract_youtube_transcript(url)
31
+ if transcript_url:
32
+ # Fetch the transcript
33
+ try:
34
+ with youtube_dl.YoutubeDL({}) as ydl:
35
+ transcript_info = ydl.extract_info(transcript_url, download=False)
36
+ transcript = transcript_info['subtitles']['en'][0]['text']
37
+ summary = summarizer(transcript, max_length=300, min_length=50, do_sample=False)
38
+ return summary[0]['summary_text']
39
+ except Exception as e:
40
+ print(f"An error occurred while processing the transcript: {e}")
41
+ return "Unable to fetch or process the transcript."
42
+
43
+ # Create the Gradio interface
44
+ iface = gr.Interface(
45
+ fn=summarize_youtube_transcript,
46
+ inputs=gr.inputs.Textbox(label="YouTube URL"),
47
+ outputs=gr.outputs.Textbox(label="Summary"),
48
+ title="YouTube Transcript Summarizer",
49
+ description="Enter a YouTube URL and get a summary of the transcript.",
50
+ theme="huggingface",
51
+ )
52
+
53
+ # Launch the interface
54
+ iface.launch(share=True)