storresbusquets commited on
Commit
549e47a
1 Parent(s): b8e3183

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -23
app.py CHANGED
@@ -307,8 +307,7 @@ class GradioInference:
307
  def from_article(self, article, progress=gr.Progress()):
308
  """
309
  Call the Gradio Inference python class.
310
- Uses it directly the Whisper model to perform Automatic Speech Recognition (i.e Speech-to-Text).
311
- Once the function has the transcription of the video it proccess it to obtain:
312
  - Summary: using Facebook's BART transformer.
313
  - KeyWords: using VoiceLabT5 keyword extractor.
314
  - Sentiment Analysis: using Hugging Face's default sentiment classifier
@@ -320,14 +319,14 @@ class GradioInference:
320
 
321
  # Perform summarization on the transcription
322
  transcription_summary = self.bart_summarizer(
323
- results["text"], max_length=150, min_length=30, do_sample=False, truncation=True
324
  )
325
 
326
  #### Resumen multilingue
327
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
328
 
329
  input_ids_sum = self.mt5_tokenizer(
330
- [WHITESPACE_HANDLER(results["text"])],
331
  return_tensors="pt",
332
  padding="max_length",
333
  truncation=True,
@@ -352,7 +351,7 @@ class GradioInference:
352
 
353
  # Extract keywords using VoiceLabT5
354
  task_prefix = "Keywords: "
355
- input_sequence = task_prefix + results["text"]
356
 
357
  input_ids = self.keyword_tokenizer(
358
  input_sequence,
@@ -387,26 +386,16 @@ class GradioInference:
387
  progress(0.90, desc="Generating Wordcloud")
388
  # WordCloud object
389
  wordcloud = WordCloud(colormap = "Oranges").generate(
390
- results["text"]
391
  )
392
  wordcloud_image = wordcloud.to_image()
393
 
394
- if lang == "english" or lang == "none":
395
- return (
396
- results["text"],
397
- transcription_summary[0]["summary_text"],
398
- formatted_keywords,
399
- formatted_sentiment,
400
- wordcloud_image,
401
- )
402
- else:
403
- return (
404
- results["text"],
405
- summary,
406
- formatted_keywords,
407
- formatted_sentiment,
408
- wordcloud_image,
409
- )
410
 
411
 
412
  gio = GradioInference()
@@ -428,7 +417,7 @@ with block as demo:
428
  </div>
429
  """
430
  )
431
- with gr.Group(spacing_size="md", radius_size="md"):
432
  with gr.Tab("From YouTube 📹"):
433
  with gr.Box():
434
 
 
307
  def from_article(self, article, progress=gr.Progress()):
308
  """
309
  Call the Gradio Inference python class.
310
+ Acepts the user's text imput, then it performs:
 
311
  - Summary: using Facebook's BART transformer.
312
  - KeyWords: using VoiceLabT5 keyword extractor.
313
  - Sentiment Analysis: using Hugging Face's default sentiment classifier
 
319
 
320
  # Perform summarization on the transcription
321
  transcription_summary = self.bart_summarizer(
322
+ article, max_length=150, min_length=30, do_sample=False, truncation=True
323
  )
324
 
325
  #### Resumen multilingue
326
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
327
 
328
  input_ids_sum = self.mt5_tokenizer(
329
+ [WHITESPACE_HANDLER(article)],
330
  return_tensors="pt",
331
  padding="max_length",
332
  truncation=True,
 
351
 
352
  # Extract keywords using VoiceLabT5
353
  task_prefix = "Keywords: "
354
+ input_sequence = task_prefix + article
355
 
356
  input_ids = self.keyword_tokenizer(
357
  input_sequence,
 
386
  progress(0.90, desc="Generating Wordcloud")
387
  # WordCloud object
388
  wordcloud = WordCloud(colormap = "Oranges").generate(
389
+ article
390
  )
391
  wordcloud_image = wordcloud.to_image()
392
 
393
+ return (
394
+ transcription_summary[0]["summary_text"],
395
+ formatted_keywords,
396
+ formatted_sentiment,
397
+ wordcloud_image,
398
+ )
 
 
 
 
 
 
 
 
 
 
399
 
400
 
401
  gio = GradioInference()
 
417
  </div>
418
  """
419
  )
420
+ with gr.Group():
421
  with gr.Tab("From YouTube 📹"):
422
  with gr.Box():
423