Spaces:
Build error
Build error
Update app.py (sort out logging; separate button boxes)
Browse files
app.py
CHANGED
@@ -1,7 +1,11 @@
|
|
1 |
import os
|
2 |
import gradio as gr
|
|
|
3 |
from transformers import MT5Tokenizer, MT5ForConditionalGeneration
|
4 |
|
|
|
|
|
|
|
5 |
# Load your fine-tuned mT5 model
|
6 |
model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
|
7 |
tokenizer = MT5Tokenizer.from_pretrained(model_name)
|
@@ -9,47 +13,74 @@ model = MT5ForConditionalGeneration.from_pretrained(model_name)
|
|
9 |
|
10 |
def correct_htr(raw_htr_text):
|
11 |
try:
|
|
|
12 |
inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
|
13 |
outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
|
|
|
|
|
14 |
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
15 |
return corrected_text
|
16 |
except Exception as e:
|
|
|
17 |
return str(e)
|
18 |
|
19 |
def summarize_text(legal_text):
|
20 |
try:
|
|
|
21 |
inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
|
22 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
|
|
|
|
23 |
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
24 |
return summary
|
25 |
except Exception as e:
|
|
|
26 |
return str(e)
|
27 |
|
28 |
def answer_question(legal_text, question):
|
29 |
try:
|
|
|
30 |
formatted_input = f"question: {question} context: {legal_text}"
|
31 |
inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
|
|
|
|
|
32 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
|
|
|
|
33 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
34 |
return answer
|
35 |
except Exception as e:
|
|
|
36 |
return str(e)
|
37 |
|
38 |
-
# Create the Gradio Blocks interface
|
39 |
with gr.Blocks() as demo:
|
40 |
gr.Markdown("# mT5 Legal Assistant")
|
41 |
gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
|
42 |
|
43 |
-
# Adding external link buttons with a box around
|
44 |
with gr.Row():
|
45 |
-
gr.HTML('''
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
with gr.Tab("Correct HTR"):
|
55 |
gr.Markdown("### Correct Raw HTR Text")
|
@@ -84,7 +115,3 @@ with gr.Blocks() as demo:
|
|
84 |
|
85 |
# Launch the Gradio interface
|
86 |
demo.launch()
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
1 |
import os
|
2 |
import gradio as gr
|
3 |
+
import logging
|
4 |
from transformers import MT5Tokenizer, MT5ForConditionalGeneration
|
5 |
|
6 |
+
# Setup logging
|
7 |
+
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
|
8 |
+
|
9 |
# Load your fine-tuned mT5 model
|
10 |
model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
|
11 |
tokenizer = MT5Tokenizer.from_pretrained(model_name)
|
|
|
13 |
|
14 |
def correct_htr(raw_htr_text):
|
15 |
try:
|
16 |
+
logging.info("Processing HTR correction...")
|
17 |
inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
|
18 |
+
logging.debug(f"Tokenized Inputs for HTR Correction: {inputs}")
|
19 |
+
|
20 |
outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
|
21 |
+
logging.debug(f"Generated Output (Tokens) for HTR Correction: {outputs}")
|
22 |
+
|
23 |
corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
24 |
+
logging.debug(f"Decoded Output for HTR Correction: {corrected_text}")
|
25 |
return corrected_text
|
26 |
except Exception as e:
|
27 |
+
logging.error(f"Error in HTR Correction: {e}", exc_info=True)
|
28 |
return str(e)
|
29 |
|
30 |
def summarize_text(legal_text):
|
31 |
try:
|
32 |
+
logging.info("Processing summarization...")
|
33 |
inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
|
34 |
+
logging.debug(f"Tokenized Inputs for Summarization: {inputs}")
|
35 |
+
|
36 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
37 |
+
logging.debug(f"Generated Summary (Tokens): {outputs}")
|
38 |
+
|
39 |
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
40 |
+
logging.debug(f"Decoded Summary: {summary}")
|
41 |
return summary
|
42 |
except Exception as e:
|
43 |
+
logging.error(f"Error in Summarization: {e}", exc_info=True)
|
44 |
return str(e)
|
45 |
|
46 |
def answer_question(legal_text, question):
|
47 |
try:
|
48 |
+
logging.info("Processing question-answering...")
|
49 |
formatted_input = f"question: {question} context: {legal_text}"
|
50 |
inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
|
51 |
+
logging.debug(f"Tokenized Inputs for Question Answering: {inputs}")
|
52 |
+
|
53 |
outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
|
54 |
+
logging.debug(f"Generated Answer (Tokens): {outputs}")
|
55 |
+
|
56 |
answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
57 |
+
logging.debug(f"Decoded Answer: {answer}")
|
58 |
return answer
|
59 |
except Exception as e:
|
60 |
+
logging.error(f"Error in Question Answering: {e}", exc_info=True)
|
61 |
return str(e)
|
62 |
|
63 |
+
# Create the Gradio Blocks interface
|
64 |
with gr.Blocks() as demo:
|
65 |
gr.Markdown("# mT5 Legal Assistant")
|
66 |
gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
|
67 |
|
68 |
+
# Adding external link buttons with a box around each and bold text
|
69 |
with gr.Row():
|
70 |
+
gr.HTML('''
|
71 |
+
<div style="display: flex; gap: 10px;">
|
72 |
+
<div style="border: 2px solid black; padding: 10px; display: inline-block;">
|
73 |
+
<a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
|
74 |
+
<button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
|
75 |
+
</a>
|
76 |
+
</div>
|
77 |
+
<div style="border: 2px solid black; padding: 10px; display: inline-block;">
|
78 |
+
<a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
|
79 |
+
<button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
|
80 |
+
</a>
|
81 |
+
</div>
|
82 |
+
</div>
|
83 |
+
''')
|
84 |
|
85 |
with gr.Tab("Correct HTR"):
|
86 |
gr.Markdown("### Correct Raw HTR Text")
|
|
|
115 |
|
116 |
# Launch the Gradio interface
|
117 |
demo.launch()
|
|
|
|
|
|
|
|