Addaci commited on
Commit
77122ee
1 Parent(s): 40c679d

Update app.py (added two clickable buttons at top of each tab page)

Browse files

Add Gradio Interface and Launch
# After your function definitions (correct_htr, summarize_text, answer_question), you need to define your Gradio interface and call demo.launch() to run the app.

Add two clickable buttons
# To add two clickable buttons for accessing external links (the Admiralty Court Legal Glossary and HCA 13/70 Ground Truth) on the left-hand side of each tab window, you can use Gradio's gr.Button() along with gr.HTML() to embed links in the interface.
# Unfortunately, Gradio does not currently support traditional sidebars, but we can still place these buttons or links at the top of the interface (before the main content) for easy access.

Files changed (1) hide show
  1. app.py +44 -27
app.py CHANGED
@@ -8,49 +8,66 @@ tokenizer = T5Tokenizer.from_pretrained(model_name)
8
  model = T5ForConditionalGeneration.from_pretrained(model_name)
9
 
10
  def correct_htr(raw_htr_text):
11
- # Tokenize the input text
12
  inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
13
- print("Tokenized Inputs for HTR Correction:", inputs) # Debugging
14
-
15
- # Generate corrected text with max_length and beam search
16
  outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
17
- print("Generated Output (Tokens) for HTR Correction:", outputs) # Debugging
18
-
19
- # Decode the output, skipping special tokens
20
  corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
21
- print("Decoded Output for HTR Correction:", corrected_text) # Debugging
22
-
23
  return corrected_text
24
 
25
  def summarize_text(legal_text):
26
- # Tokenize the input text with the summarization prompt
27
  inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
28
- print("Tokenized Inputs for Summarization:", inputs) # Debugging
29
-
30
- # Generate summary with beam search for better results
31
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
32
- print("Generated Summary (Tokens):", outputs) # Debugging
33
-
34
- # Decode the output, skipping special tokens
35
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
36
- print("Decoded Summary:", summary) # Debugging
37
-
38
  return summary
39
 
40
  def answer_question(legal_text, question):
41
- # Format input for question-answering
42
  formatted_input = f"question: {question} context: {legal_text}"
43
  inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
44
- print("Tokenized Inputs for Question Answering:", inputs) # Debugging
45
-
46
- # Generate answer using beam search
47
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
48
- print("Generated Answer (Tokens):", outputs) # Debugging
49
-
50
- # Decode the output, skipping special tokens
51
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
52
- print("Decoded Answer:", answer) # Debugging
53
-
54
  return answer
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
 
8
  model = T5ForConditionalGeneration.from_pretrained(model_name)
9
 
10
  def correct_htr(raw_htr_text):
 
11
  inputs = tokenizer(raw_htr_text, return_tensors="pt", max_length=512, truncation=True)
 
 
 
12
  outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
 
 
 
13
  corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
14
  return corrected_text
15
 
16
  def summarize_text(legal_text):
 
17
  inputs = tokenizer("summarize: " + legal_text, return_tensors="pt", max_length=512, truncation=True)
 
 
 
18
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
 
 
 
19
  summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
20
  return summary
21
 
22
  def answer_question(legal_text, question):
 
23
  formatted_input = f"question: {question} context: {legal_text}"
24
  inputs = tokenizer(formatted_input, return_tensors="pt", max_length=512, truncation=True)
 
 
 
25
  outputs = model.generate(**inputs, max_length=150, num_beams=4, early_stopping=True)
 
 
 
26
  answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
27
  return answer
28
 
29
+ # Create the Gradio Blocks interface
30
+ with gr.Blocks() as demo:
31
+ gr.Markdown("# mT5 Legal Assistant")
32
+ gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases.")
33
+
34
+ # Adding external link buttons at the top
35
+ with gr.Row():
36
+ gr.HTML('<a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank"><button>Admiralty Court Legal Glossary</button></a>')
37
+ gr.HTML('<a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank"><button>HCA 13/70 Ground Truth (1654-55)</button></a>')
38
+
39
+ with gr.Tab("Correct HTR"):
40
+ gr.Markdown("### Correct Raw HTR Text")
41
+ raw_htr_input = gr.Textbox(lines=5, placeholder="Enter raw HTR text here...")
42
+ corrected_output = gr.Textbox(lines=5, placeholder="Corrected HTR text")
43
+ correct_button = gr.Button("Correct HTR")
44
+ clear_button = gr.Button("Clear")
45
+
46
+ correct_button.click(correct_htr, inputs=raw_htr_input, outputs=corrected_output)
47
+ clear_button.click(lambda: ("", ""), outputs=[raw_htr_input, corrected_output])
48
+
49
+ with gr.Tab("Summarize Legal Text"):
50
+ gr.Markdown("### Summarize Legal Text")
51
+ legal_text_input = gr.Textbox(lines=10, placeholder="Enter legal text to summarize...")
52
+ summary_output = gr.Textbox(lines=5, placeholder="Summary of legal text")
53
+ summarize_button = gr.Button("Summarize Text")
54
+ clear_button = gr.Button("Clear")
55
+
56
+ summarize_button.click(summarize_text, inputs=legal_text_input, outputs=summary_output)
57
+ clear_button.click(lambda: ("", ""), outputs=[legal_text_input, summary_output])
58
+
59
+ with gr.Tab("Answer Legal Question"):
60
+ gr.Markdown("### Answer a Question Based on Legal Text")
61
+ legal_text_input_q = gr.Textbox(lines=10, placeholder="Enter legal text...")
62
+ question_input = gr.Textbox(lines=2, placeholder="Enter your question...")
63
+ answer_output = gr.Textbox(lines=5, placeholder="Answer to your question")
64
+ answer_button = gr.Button("Get Answer")
65
+ clear_button = gr.Button("Clear")
66
+
67
+ answer_button.click(answer_question, inputs=[legal_text_input_q, question_input], outputs=answer_output)
68
+ clear_button.click(lambda: ("", "", ""), outputs=[legal_text_input_q, question_input, answer_output])
69
+
70
+ # Launch the Gradio interface
71
+ demo.launch()
72
+
73