Addaci commited on
Commit
0eb38a5
1 Parent(s): fb6b907

Update app.py (sort out logging; separate button boxes)

Browse files
Files changed (1) hide show
  1. app.py +41 -14
app.py CHANGED
@@ -1,7 +1,11 @@
1
  import os
2
  import gradio as gr
 
3
  from transformers import MT5Tokenizer, MT5ForConditionalGeneration
4
 
 
 
 
5
  # Load your fine-tuned mT5 model
6
  model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
7
  tokenizer = MT5Tokenizer.from_pretrained(model_name)
@@ -9,47 +13,74 @@ model = MT5ForConditionalGeneration.from_pretrained(model_name)
9
 
10
  def correct_htr(raw_htr_text):
11
  try:
 
12
  inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
 
 
13
  outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
 
 
14
  corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
15
  return corrected_text
16
  except Exception as e:
 
17
  return str(e)
18
 
19
  def summarize_text(legal_text):
20
  try:
 
21
  inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
 
 
22
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
 
 
23
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
24
  return summary
25
  except Exception as e:
 
26
  return str(e)
27
 
28
  def answer_question(legal_text, question):
29
  try:
 
30
  formatted_input = f"question: {question} context: {legal_text}"
31
  inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
 
 
32
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
 
 
33
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
34
  return answer
35
  except Exception as e:
 
36
  return str(e)
37
 
38
- # Create the Gradio Blocks interface with boxed clickable buttons and bold text
39
  with gr.Blocks() as demo:
40
  gr.Markdown("# mT5 Legal Assistant")
41
  gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
42
 
43
- # Adding external link buttons with a box around them and bold text
44
  with gr.Row():
45
- gr.HTML('''<div style="border: 2px solid black; padding: 10px; display: inline-block;">
46
- <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
47
- <button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
48
- </a>
49
- <a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
50
- <button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
51
- </a>
52
- </div>''')
 
 
 
 
 
 
53
 
54
  with gr.Tab("Correct HTR"):
55
  gr.Markdown("### Correct Raw HTR Text")
@@ -84,7 +115,3 @@ with gr.Blocks() as demo:
84
 
85
  # Launch the Gradio interface
86
  demo.launch()
87
-
88
-
89
-
90
-
 
1
  import os
2
  import gradio as gr
3
+ import logging
4
  from transformers import MT5Tokenizer, MT5ForConditionalGeneration
5
 
6
+ # Setup logging
7
+ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
8
+
9
  # Load your fine-tuned mT5 model
10
  model_name = "Addaci/mT5-small-experiment-13-checkpoint-2790"
11
  tokenizer = MT5Tokenizer.from_pretrained(model_name)
 
13
 
14
  def correct_htr(raw_htr_text):
15
  try:
16
+ logging.info("Processing HTR correction...")
17
  inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
18
+ logging.debug(f"Tokenized Inputs for HTR Correction: {inputs}")
19
+
20
  outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
21
+ logging.debug(f"Generated Output (Tokens) for HTR Correction: {outputs}")
22
+
23
  corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ logging.debug(f"Decoded Output for HTR Correction: {corrected_text}")
25
  return corrected_text
26
  except Exception as e:
27
+ logging.error(f"Error in HTR Correction: {e}", exc_info=True)
28
  return str(e)
29
 
30
  def summarize_text(legal_text):
31
  try:
32
+ logging.info("Processing summarization...")
33
  inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
34
+ logging.debug(f"Tokenized Inputs for Summarization: {inputs}")
35
+
36
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
37
+ logging.debug(f"Generated Summary (Tokens): {outputs}")
38
+
39
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
+ logging.debug(f"Decoded Summary: {summary}")
41
  return summary
42
  except Exception as e:
43
+ logging.error(f"Error in Summarization: {e}", exc_info=True)
44
  return str(e)
45
 
46
  def answer_question(legal_text, question):
47
  try:
48
+ logging.info("Processing question-answering...")
49
  formatted_input = f"question: {question} context: {legal_text}"
50
  inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
51
+ logging.debug(f"Tokenized Inputs for Question Answering: {inputs}")
52
+
53
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
54
+ logging.debug(f"Generated Answer (Tokens): {outputs}")
55
+
56
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
57
+ logging.debug(f"Decoded Answer: {answer}")
58
  return answer
59
  except Exception as e:
60
+ logging.error(f"Error in Question Answering: {e}", exc_info=True)
61
  return str(e)
62
 
63
+ # Create the Gradio Blocks interface
64
  with gr.Blocks() as demo:
65
  gr.Markdown("# mT5 Legal Assistant")
66
  gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
67
 
68
+ # Adding external link buttons with a box around each and bold text
69
  with gr.Row():
70
+ gr.HTML('''
71
+ <div style="display: flex; gap: 10px;">
72
+ <div style="border: 2px solid black; padding: 10px; display: inline-block;">
73
+ <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
74
+ <button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
75
+ </a>
76
+ </div>
77
+ <div style="border: 2px solid black; padding: 10px; display: inline-block;">
78
+ <a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
79
+ <button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
80
+ </a>
81
+ </div>
82
+ </div>
83
+ ''')
84
 
85
  with gr.Tab("Correct HTR"):
86
  gr.Markdown("### Correct Raw HTR Text")
 
115
 
116
  # Launch the Gradio interface
117
  demo.launch()