pszemraj commited on
Commit
7452863
Β·
1 Parent(s): b4e0534

Signed-off-by: peter szemraj <[email protected]>

Files changed (2) hide show
  1. app.py +7 -5
  2. utils.py +4 -1
app.py CHANGED
@@ -75,7 +75,7 @@ def proc_submission(
75
  msg = f"""
76
  <div style="background-color: #FFA500; color: white; padding: 20px;">
77
  <h3>Warning</h3>
78
- <p>Input text was truncated to {max_input_length} words. This is about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
79
  </div>
80
  """
81
  logging.warning(msg)
@@ -188,7 +188,9 @@ if __name__ == "__main__":
188
 
189
  logging.info("Loading summ models")
190
  with contextlib.redirect_stdout(None):
191
- model, tokenizer = load_model_and_tokenizer("pszemraj/pegasus-x-large-book-summary")
 
 
192
  model_sm, tokenizer_sm = load_model_and_tokenizer(
193
  "pszemraj/long-t5-tglobal-base-16384-book-summary"
194
  )
@@ -240,9 +242,9 @@ if __name__ == "__main__":
240
  )
241
  with gr.Row():
242
  input_text = gr.Textbox(
243
- lines=4,
244
- label="Input Text (for summarization)",
245
- placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
246
  )
247
  with gr.Column(min_width=100, scale=0.5):
248
  load_examples_button = gr.Button(
 
75
  msg = f"""
76
  <div style="background-color: #FFA500; color: white; padding: 20px;">
77
  <h3>Warning</h3>
78
+ <p>Input text was truncated to {max_input_length} words. That's about {100*max_input_length/len(input_wc):.2f}% of the submission.</p>
79
  </div>
80
  """
81
  logging.warning(msg)
 
188
 
189
  logging.info("Loading summ models")
190
  with contextlib.redirect_stdout(None):
191
+ model, tokenizer = load_model_and_tokenizer(
192
+ "pszemraj/pegasus-x-large-book-summary"
193
+ )
194
  model_sm, tokenizer_sm = load_model_and_tokenizer(
195
  "pszemraj/long-t5-tglobal-base-16384-book-summary"
196
  )
 
242
  )
243
  with gr.Row():
244
  input_text = gr.Textbox(
245
+ lines=4,
246
+ label="Input Text (for summarization)",
247
+ placeholder="Enter text to summarize, the text will be cleaned and truncated on Spaces. Narrative, academic (both papers and lecture transcription), and article text work well. May take a bit to generate depending on the input text :)",
248
  )
249
  with gr.Column(min_width=100, scale=0.5):
250
  load_examples_button = gr.Button(
utils.py CHANGED
@@ -8,6 +8,7 @@ from pathlib import Path
8
  from natsort import natsorted
9
  import subprocess
10
 
 
11
  def truncate_word_count(text, max_words=512):
12
  """
13
  truncate_word_count - a helper function for the gradio module
@@ -40,7 +41,9 @@ def load_examples(src, filetypes=[".txt", ".pdf"]):
40
  src = Path(src)
41
  src.mkdir(exist_ok=True)
42
 
43
- pdf_url = "https://www.dropbox.com/s/y92xy7o5qb88yij/all_you_need_is_attention.pdf?dl=1"
 
 
44
  subprocess.run(["wget", pdf_url, "-O", src / "all_you_need_is_attention.pdf"])
45
  examples = [f for f in src.iterdir() if f.suffix in filetypes]
46
  examples = natsorted(examples)
 
8
  from natsort import natsorted
9
  import subprocess
10
 
11
+
12
  def truncate_word_count(text, max_words=512):
13
  """
14
  truncate_word_count - a helper function for the gradio module
 
41
  src = Path(src)
42
  src.mkdir(exist_ok=True)
43
 
44
+ pdf_url = (
45
+ "https://www.dropbox.com/s/y92xy7o5qb88yij/all_you_need_is_attention.pdf?dl=1"
46
+ )
47
  subprocess.run(["wget", pdf_url, "-O", src / "all_you_need_is_attention.pdf"])
48
  examples = [f for f in src.iterdir() if f.suffix in filetypes]
49
  examples = natsorted(examples)