Spaces:

pszemraj
/

document-summarization

Running on CPU Upgrade

pszemraj commited on Apr 30, 2023

Commit

55b49e6

1 Parent(s): 73feb19

🚸 🎨

Signed-off-by: peter szemraj <[email protected]>

Files changed (3) hide show

app.py CHANGED Viewed

@@ -334,6 +334,7 @@ if __name__ == "__main__":
                     uploaded_file = gr.File(
                         label="File Upload",
                         file_count="single",
                         type="file",
                     )
             with gr.Row():

                     uploaded_file = gr.File(
                         label="File Upload",
                         file_count="single",
+                        file_types=[".txt", ".md", ".pdf"],
                         type="file",
                     )
             with gr.Row():

summarize.py CHANGED Viewed

@@ -114,7 +114,9 @@ def summarize_via_tokenbatches(
         tokenizer (): the tokenizer to use for summarization
         batch_length (int, optional): the length of each batch. Defaults to 2048.
         batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
     Returns:
         list: a list of dictionaries containing the input tokens, the summary, and the summary score
     """

         tokenizer (): the tokenizer to use for summarization
         batch_length (int, optional): the length of each batch. Defaults to 2048.
         batch_stride (int, optional): the stride of each batch. Defaults to 16. The stride is the number of tokens that overlap between batches.
+        min_batch_length (int, optional): the minimum length of each batch. Defaults to 512.
+        **kwargs: any additional arguments to pass to the model for inference
     Returns:
         list: a list of dictionaries containing the input tokens, the summary, and the summary score
     """

utils.py CHANGED Viewed

@@ -156,7 +156,7 @@ def extract_keywords(
     for keyword in keywords:
         if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
             final_keywords.append(keyword)
-    logger.info(f"Keywords (final):\t{final_keywords}")
     return final_keywords
@@ -178,9 +178,9 @@ def saves_summary(
     full_summary = "\n".join(sum_text)
     keywords = "_".join(extract_keywords(full_summary))
-    logger.info(f"kw:\t{keywords}")
     outpath = (
-        Path.cwd() / f"document_summary_{get_timestamp()}_{keywords}.txt"
         if outpath is None
         else Path(outpath)
     )

     for keyword in keywords:
         if not any(fuzz.ratio(keyword, other) > 70 for other in final_keywords):
             final_keywords.append(keyword)
+    logger.debug(f"Keywords (final):\t{final_keywords}")
     return final_keywords
     full_summary = "\n".join(sum_text)
     keywords = "_".join(extract_keywords(full_summary))
+    logger.debug(f"kw:\t{keywords}")
     outpath = (
+        Path.cwd() / f"document_summary_{keywords}_{get_timestamp()}.txt"
         if outpath is None
         else Path(outpath)
     )