Spaces:

MarineLives
/

MarineLives-Legal-Assistant

Build error

App Files Files Community

Addaci commited on 27 days ago

Commit

0eb38a5

•

1 Parent(s): fb6b907

Update app.py (sort out logging; separate button boxes)

Browse files

Files changed (1) hide show

app.py +41 -14

app.py CHANGED Viewed

@@ -1,7 +1,11 @@
 import os
 import gradio as gr
 from transformers import MT5Tokenizer, MT5ForConditionalGeneration
 # Load your fine-tuned mT5 model
 model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
 tokenizer = MT5Tokenizer.from_pretrained(model_name)
@@ -9,47 +13,74 @@ model = MT5ForConditionalGeneration.from_pretrained(model_name)
 def correct_htr(raw_htr_text):
     try:
         inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
         outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
         corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return corrected_text
     except Exception as e:
         return str(e)
 def summarize_text(legal_text):
     try:
         inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
         outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
         summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return summary
     except Exception as e:
         return str(e)
 def answer_question(legal_text, question):
     try:
         formatted_input = f"question: {question} context: {legal_text}"
         inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
         outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
         answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return answer
     except Exception as e:
         return str(e)
-# Create the Gradio Blocks interface with boxed clickable buttons and bold text
 with gr.Blocks() as demo:
     gr.Markdown("# mT5 Legal Assistant")
     gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
-    # Adding external link buttons with a box around them and bold text
     with gr.Row():
-        gr.HTML('''<div style="border: 2px solid black; padding: 10px; display: inline-block;">
-                      <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
-                          <button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
-                      </a>
-                      <a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
-                          <button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
-                      </a>
-                   </div>''')
     with gr.Tab("Correct HTR"):
         gr.Markdown("### Correct Raw HTR Text")
@@ -84,7 +115,3 @@ with gr.Blocks() as demo:
 # Launch the Gradio interface
 demo.launch()

 import os
 import gradio as gr
+import logging
 from transformers import MT5Tokenizer, MT5ForConditionalGeneration
+# Setup logging
+logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
 # Load your fine-tuned mT5 model
 model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
 tokenizer = MT5Tokenizer.from_pretrained(model_name)
 def correct_htr(raw_htr_text):
     try:
+        logging.info("Processing HTR correction...")
         inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
+        logging.debug(f"Tokenized Inputs for HTR Correction: {inputs}")
         outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
+        logging.debug(f"Generated Output (Tokens) for HTR Correction: {outputs}")
         corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        logging.debug(f"Decoded Output for HTR Correction: {corrected_text}")
         return corrected_text
     except Exception as e:
+        logging.error(f"Error in HTR Correction: {e}", exc_info=True)
         return str(e)
 def summarize_text(legal_text):
     try:
+        logging.info("Processing summarization...")
         inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
+        logging.debug(f"Tokenized Inputs for Summarization: {inputs}")
         outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
+        logging.debug(f"Generated Summary (Tokens): {outputs}")
         summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        logging.debug(f"Decoded Summary: {summary}")
         return summary
     except Exception as e:
+        logging.error(f"Error in Summarization: {e}", exc_info=True)
         return str(e)
 def answer_question(legal_text, question):
     try:
+        logging.info("Processing question-answering...")
         formatted_input = f"question: {question} context: {legal_text}"
         inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
+        logging.debug(f"Tokenized Inputs for Question Answering: {inputs}")
         outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
+        logging.debug(f"Generated Answer (Tokens): {outputs}")
         answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        logging.debug(f"Decoded Answer: {answer}")
         return answer
     except Exception as e:
+        logging.error(f"Error in Question Answering: {e}", exc_info=True)
         return str(e)
+# Create the Gradio Blocks interface
 with gr.Blocks() as demo:
     gr.Markdown("# mT5 Legal Assistant")
     gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
+    # Adding external link buttons with a box around each and bold text
     with gr.Row():
+        gr.HTML('''
+            <div style="display: flex; gap: 10px;">
+                <div style="border: 2px solid black; padding: 10px; display: inline-block;">
+                    <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
+                        <button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
+                    </a>
+                </div>
+                <div style="border: 2px solid black; padding: 10px; display: inline-block;">
+                    <a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
+                        <button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
+                    </a>
+                </div>
+            </div>
+        ''')
     with gr.Tab("Correct HTR"):
         gr.Markdown("### Correct Raw HTR Text")
 # Launch the Gradio interface
 demo.launch()