Spaces:

storresbusquets
/

llm-demo1

Runtime error

App Files Files Community

storresbusquets commited on Sep 18, 2023

Commit

31eb124

•

1 Parent(s): 24a4fff

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -44

app.py CHANGED Viewed

@@ -86,49 +86,34 @@ class GradioInference:
  progress(0.40, desc="Summarizing")
  # Perform summarization on the transcription
- # transcription_summary = self.summarizer(
- # results["text"], max_length=150, min_length=30, do_sample=False
- # )
- #### Prueba
- # WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
- # input_ids_sum = self.tokenizer(
- # [WHITESPACE_HANDLER(results["text"])],
- # return_tensors="pt",
- # padding="max_length",
- # truncation=True,
- # max_length=512
- # )["input_ids"]
- # output_ids_sum = self.model.generate(
- # input_ids=input_ids_sum,
- # max_length=130,
- # no_repeat_ngram_size=2,
- # num_beams=4
- # )[0]
- # summary = self.tokenizer.decode(
- # output_ids_sum,
- # skip_special_tokens=True,
- # clean_up_tokenization_spaces=False
- # )
- #### Fin prueba
- ### Prueba con LLM ###
- template = """
- [INST] <<SYS>>
- You are a helpful, respectful and honest assistant that performs summaries of text. Write a concise summary of the following text.
- <</SYS>>
- {text}[/INST]
- """
- prompt = PromptTemplate(template=template, input_variables=["text"])
- llm_chain = LLMChain(prompt=prompt, llm=self.llm)
- summary2 = llm_chain.run(results["text"])
- ### Fin prueba LLM ###
  progress(0.60, desc="Extracting Keywords")
@@ -171,8 +156,7 @@ class GradioInference:
  if lang == "english" or lang == "none":
  return (
  results["text"],
- summary2,
- # transcription_summary[0]["summary_text"],
  formatted_keywords,
  formatted_sentiment,
  wordcloud_image,
@@ -180,7 +164,7 @@ class GradioInference:
  else:
  return (
  results["text"],
- summary2,
  formatted_keywords,
  formatted_sentiment,
  wordcloud_image,
@@ -229,7 +213,7 @@ class GradioInference:
  results["text"], max_length=150, min_length=30, do_sample=False
  )
- #### Prueba
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
  input_ids_sum = self.tokenizer(
@@ -252,7 +236,7 @@ class GradioInference:
  skip_special_tokens=True,
  clean_up_tokenization_spaces=False
  )
- #### Fin prueba
  progress(0.50, desc="Extracting Keywords")

  progress(0.40, desc="Summarizing")
  # Perform summarization on the transcription
+ transcription_summary = self.summarizer(
+ results["text"], max_length=150, min_length=30, do_sample=False
+ )
+ #### Resumen multilingue
+ WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+ input_ids_sum = self.tokenizer(
+ [WHITESPACE_HANDLER(results["text"])],
+ return_tensors="pt",
+ padding="max_length",
+ truncation=True,
+ max_length=512
+ )["input_ids"]
+ output_ids_sum = self.model.generate(
+ input_ids=input_ids_sum,
+ max_length=130,
+ no_repeat_ngram_size=2,
+ num_beams=4
+ )[0]
+ summary = self.tokenizer.decode(
+ output_ids_sum,
+ skip_special_tokens=True,
+ clean_up_tokenization_spaces=False
+ )
+ #### Fin resumen multilingue
  progress(0.60, desc="Extracting Keywords")
  if lang == "english" or lang == "none":
  return (
  results["text"],
+ transcription_summary[0]["summary_text"],
  formatted_keywords,
  formatted_sentiment,
  wordcloud_image,
  else:
  return (
  results["text"],
+ summary,
  formatted_keywords,
  formatted_sentiment,
  wordcloud_image,
  results["text"], max_length=150, min_length=30, do_sample=False
  )
+ #### Resumen multilingue
  WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
  input_ids_sum = self.tokenizer(
  skip_special_tokens=True,
  clean_up_tokenization_spaces=False
  )
+ #### Fin resumen multilingue
  progress(0.50, desc="Extracting Keywords")