Spaces:

Aravindan
/

Video2TextSummarizer

Runtime error

App Files Files Community

Aravindan commited on May 8, 2022

Commit

3510371

1 Parent(s): c6da32b

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -15

app.py CHANGED Viewed

@@ -1,34 +1,77 @@
 import gradio as gr
 from tqdm import tqdm
 from transformers import pipeline
 from IPython.display import YouTubeVideo
 from youtube_transcript_api import YouTubeTranscriptApi
-def video2Summarizer(link):
   youtube_video = link
-  video_id = youtube_video.split('=')[1]
   transcript = YouTubeTranscriptApi.get_transcript(video_id)
   result = ""
   for i in transcript:
       result += ' ' + i['text']
-  summarizer = pipeline('summarization')
-  num_iters = int(len(result)/1000)
-  summarized_text = []
-  for i in tqdm(range(0, num_iters + 1)):
-    start = 0
-    start = i * 1000
-    end = (i + 1) * 1000
-    out = summarizer(result[start:end])
-    out = out[0]
-    out = out['summary_text']
-    summarized_text.append(out)
-  return summarized_text
-iface = gr.Interface(fn = video2Summarizer, inputs = 'text', outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'Video To Text Summarizer',  description = 'Just give the url of the YouTube video, then the app will give you the summarized format of the video in 5 to 10 Min, its based on the video length what you have given. Use this example and try to run the same example by clicking that',examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk']])
 iface.launch(inline = False)

+import json
+import torch
 import gradio as gr
 from tqdm import tqdm
 from transformers import pipeline
 from IPython.display import YouTubeVideo
 from youtube_transcript_api import YouTubeTranscriptApi
+from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM
+def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50,  language = 'hindi'):
   youtube_video = link
+  video_id = youtube_video.split('=')[1]   # Taking the key
   transcript = YouTubeTranscriptApi.get_transcript(video_id)
   result = ""
   for i in transcript:
       result += ' ' + i['text']
+  def t5_summarizer(text,ml, model ):
+    tokenizer = T5Tokenizer.from_pretrained(model)
+    model = T5ForConditionalGeneration.from_pretrained(model)
+    preprocess_text = text.strip().replace("\n","")
+    t5_prepared_Text = "summarize: "+preprocess_text
+    tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt")
+    summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True,  )
+    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    return output
+  def allmodel(text, ml, model):
+    tokenizer = AutoTokenizer.from_pretrained(model)
+    model = AutoModelForSeq2SeqLM.from_pretrained(model)
+    preprocess_text = text.strip().replace("\n","")
+    tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt")
+    summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True,  )
+    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+    return output
+  def translat(summtext, languages):
+    translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt')
+    text = summtext
+    if languages == 'hindi':
+      return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN')
+    elif languages == 'tamil':
+      return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN')
+    elif languages == 'english':
+      return text
+    else:
+      return None
+  if model == 't5-small':
+    output = t5_summarizer(text = result, ml = int(ml), model = model)
+    return translat(output, languages = language)
+  elif model == 't5-large':
+    output =  t5_summarizer(text = result, ml = int(ml), model = model)
+    return translat(output, languages = language)
+  else:
+    output = allmodel(text = result, ml = int(ml), model = model)
+    return  translat(output, languages = language)
+iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'),  gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer',  description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] )
 iface.launch(inline = False)