Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,34 +1,77 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
from tqdm import tqdm
|
3 |
from transformers import pipeline
|
4 |
from IPython.display import YouTubeVideo
|
5 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
|
|
|
6 |
|
7 |
-
def video2Summarizer(link):
|
8 |
youtube_video = link
|
9 |
-
video_id = youtube_video.split('=')[1]
|
10 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
11 |
|
12 |
result = ""
|
13 |
for i in transcript:
|
14 |
result += ' ' + i['text']
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
-
num_iters = int(len(result)/1000)
|
19 |
-
summarized_text = []
|
20 |
-
for i in tqdm(range(0, num_iters + 1)):
|
21 |
-
start = 0
|
22 |
-
start = i * 1000
|
23 |
-
end = (i + 1) * 1000
|
24 |
-
out = summarizer(result[start:end])
|
25 |
-
out = out[0]
|
26 |
-
out = out['summary_text']
|
27 |
-
summarized_text.append(out)
|
28 |
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
|
32 |
-
|
|
|
33 |
|
34 |
iface.launch(inline = False)
|
|
|
1 |
+
import json
|
2 |
+
import torch
|
3 |
import gradio as gr
|
4 |
from tqdm import tqdm
|
5 |
from transformers import pipeline
|
6 |
from IPython.display import YouTubeVideo
|
7 |
from youtube_transcript_api import YouTubeTranscriptApi
|
8 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM
|
9 |
+
|
10 |
+
def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50, language = 'hindi'):
|
11 |
|
|
|
12 |
youtube_video = link
|
13 |
+
video_id = youtube_video.split('=')[1] # Taking the key
|
14 |
transcript = YouTubeTranscriptApi.get_transcript(video_id)
|
15 |
|
16 |
result = ""
|
17 |
for i in transcript:
|
18 |
result += ' ' + i['text']
|
19 |
|
20 |
+
def t5_summarizer(text,ml, model ):
|
21 |
+
tokenizer = T5Tokenizer.from_pretrained(model)
|
22 |
+
model = T5ForConditionalGeneration.from_pretrained(model)
|
23 |
+
preprocess_text = text.strip().replace("\n","")
|
24 |
+
t5_prepared_Text = "summarize: "+preprocess_text
|
25 |
+
|
26 |
+
tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt")
|
27 |
+
|
28 |
+
summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
|
29 |
+
|
30 |
+
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
31 |
+
return output
|
32 |
+
|
33 |
+
|
34 |
+
def allmodel(text, ml, model):
|
35 |
+
tokenizer = AutoTokenizer.from_pretrained(model)
|
36 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model)
|
37 |
+
preprocess_text = text.strip().replace("\n","")
|
38 |
+
tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt")
|
39 |
+
summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
|
40 |
+
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
41 |
+
return output
|
42 |
+
|
43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
def translat(summtext, languages):
|
46 |
+
translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt')
|
47 |
+
text = summtext
|
48 |
+
if languages == 'hindi':
|
49 |
+
return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN')
|
50 |
+
|
51 |
+
elif languages == 'tamil':
|
52 |
+
return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN')
|
53 |
+
|
54 |
+
elif languages == 'english':
|
55 |
+
return text
|
56 |
+
|
57 |
+
else:
|
58 |
+
return None
|
59 |
+
|
60 |
+
if model == 't5-small':
|
61 |
+
output = t5_summarizer(text = result, ml = int(ml), model = model)
|
62 |
+
return translat(output, languages = language)
|
63 |
+
|
64 |
+
|
65 |
+
elif model == 't5-large':
|
66 |
+
output = t5_summarizer(text = result, ml = int(ml), model = model)
|
67 |
+
return translat(output, languages = language)
|
68 |
+
|
69 |
+
else:
|
70 |
+
output = allmodel(text = result, ml = int(ml), model = model)
|
71 |
+
return translat(output, languages = language)
|
72 |
|
73 |
|
74 |
+
|
75 |
+
iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'), gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer', description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] )
|
76 |
|
77 |
iface.launch(inline = False)
|