Spaces:

MarineLives
/

MarineLives-Legal-Assistant

Build error

App Files Files Community

Addaci commited on 25 days ago

Commit

edba1cc

•

1 Parent(s): d503b1e

Further debugging of app.py

Browse files

Summary of Changes:

Summarize Legal Text:

Corrected the logic by ensuring max_new_tokens and temperature are passed properly to the model’s generate() method.

Correct Raw HTR Text:

Fixed the input handling by ensuring the text tokenization is consistent with the model’s requirements.

Answer Legal Question:

Decoupled the textboxes to make sure each tab functions independently. Now, the "Enter your question" input will work properly even when the summarization tab has pre-existing text.

Files changed (1) hide show

app.py +57 -90

app.py CHANGED Viewed

@@ -1,103 +1,70 @@
 import gradio as gr
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
-import logging
-# Setup logging
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
-# Load the Flan-T5 Small model and tokenizer
-model_id = "google/flan-t5-small"
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
-def correct_htr(raw_htr_text, max_new_tokens, temperature):
-    try:
-        logging.info("Processing HTR correction...")
-        prompt = f"Correct this text: {raw_htr_text}"
-        inputs = tokenizer(prompt, return_tensors="pt")
-        outputs = model.generate(**inputs, max_length=min(max_new_tokens, len(inputs['input_ids'][0]) + max_new_tokens), temperature=temperature)
-        corrected_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return corrected_text
-    except Exception as e:
-        logging.error(f"Error in HTR correction: {e}", exc_info=True)
-        return str(e)
-def summarize_text(legal_text, max_new_tokens, temperature):
-    try:
-        logging.info("Processing summarization...")
-        prompt = f"Summarize the following legal text: {legal_text}"
-        inputs = tokenizer(prompt, return_tensors="pt")
-        outputs = model.generate(**inputs, max_length=min(max_new_tokens, len(inputs['input_ids'][0]) + max_new_tokens), temperature=temperature)
-        summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return summary
-    except Exception as e:
-        logging.error(f"Error in summarization: {e}", exc_info=True)
-        return str(e)
-def answer_question(legal_text, question, max_new_tokens, temperature):
-    try:
-        logging.info("Processing question-answering...")
-        prompt = f"Answer the following question based on the provided context:\n\nQuestion: {question}\n\nContext: {legal_text}"
-        inputs = tokenizer(prompt, return_tensors="pt")
-        outputs = model.generate(**inputs, max_length=min(max_new_tokens, len(inputs['input_ids'][0]) + max_new_tokens), temperature=temperature)
-        answer = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        return answer
-    except Exception as e:
-        logging.error(f"Error in question-answering: {e}", exc_info=True)
-        return str(e)
-# Create the Gradio Blocks interface
 with gr.Blocks() as demo:
-    gr.Markdown("# Flan-T5 Small Legal Assistant")
-    gr.Markdown("Use this tool to correct raw HTR, summarize legal texts, or answer questions about legal cases (powered by Flan-T5 Small).")
-    with gr.Row():
-        gr.HTML('''
-            <div style="display: flex; gap: 10px;">
-                <div style="border: 2px solid black; padding: 10px;">
-                    <a href="http://www.marinelives.org/wiki/Tools:_Admiralty_court_legal_glossary" target="_blank">
-                        <button style="font-weight:bold;">Admiralty Court Legal Glossary</button>
-                    </a>
-                </div>
-                <div style="border: 2px solid black; padding: 10px;">
-                    <a href="https://raw.githubusercontent.com/Addaci/HCA/refs/heads/main/HCA_13_70_Full_Volume_Processed_Text_EDITED_Ver.1.2_18062024.txt" target="_blank">
-                        <button style="font-weight:bold;">HCA 13/70 Ground Truth (1654-55)</button>
-                    </a>
-                </div>
-            </div>
-        ''')
-    # Tab 1: Correct HTR
-    with gr.Tab("Correct HTR"):
-        gr.Markdown("### Correct Raw HTR Text")
-        raw_htr_input = gr.Textbox(lines=5, placeholder="Enter raw HTR text here...")
-        corrected_output = gr.Textbox(lines=5, placeholder="Corrected HTR text")
-        correct_button = gr.Button("Correct HTR")
-        clear_button = gr.Button("Clear")
-        correct_button.click(correct_htr, inputs=[raw_htr_input, gr.Slider(minimum=10, maximum=512, value=128, step=1, label="Max New Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")], outputs=corrected_output)
-        clear_button.click(lambda: ("", ""), outputs=[raw_htr_input, corrected_output])
-    # Tab 2: Summarize Legal Text
     with gr.Tab("Summarize Legal Text"):
-        gr.Markdown("### Summarize Legal Text")
-        legal_text_input = gr.Textbox(lines=10, placeholder="Enter legal text to summarize...")
-        summary_output = gr.Textbox(lines=5, placeholder="Summary of legal text")
         summarize_button = gr.Button("Summarize Text")
-        clear_button = gr.Button("Clear")
-        summarize_button.click(summarize_text, inputs=[legal_text_input, gr.Slider(minimum=10, maximum=512, value=256, step=1, label="Max New Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.5, step=0.1, label="Temperature")], outputs=summary_output)
-        clear_button.click(lambda: ("", ""), outputs=[legal_text_input, summary_output])
-    # Tab 3: Answer Legal Question
     with gr.Tab("Answer Legal Question"):
-        gr.Markdown("### Answer a Question Based on Legal Text")
-        legal_text_input_q = gr.Textbox(lines=10, placeholder="Enter legal text...")
-        question_input = gr.Textbox(lines=2, placeholder="Enter your question...")
-        answer_output = gr.Textbox(lines=5, placeholder="Answer to your question")
-        answer_button = gr.Button("Get Answer")
-        clear_button = gr.Button("Clear")
-        answer_button.click(answer_question, inputs=[legal_text_input_q, question_input, gr.Slider(minimum=10, maximum=512, value=150, step=1, label="Max New Tokens"), gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.1, label="Temperature")], outputs=answer_output)
-        clear_button.click(lambda: ("", "", ""), outputs=[legal_text_input_q, question_input, answer_output])
-# Launch the Gradio interface
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
+from transformers import pipeline, T5Tokenizer, T5ForConditionalGeneration
+# Load model and tokenizer
+model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small")
+tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
+# Summarize Legal Text function
+def summarize_legal_text(input_text, max_new_tokens, temperature):
+    input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+    summary_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)
+    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)
+# Correct HTR function
+def correct_htr_text(input_text, max_new_tokens, temperature):
+    input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+    output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)
+    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
+# Answer Legal Question function
+def answer_legal_question(context, question, max_new_tokens, temperature):
+    input_text = f"Answer the following question based on the context: {question}\nContext: {context}"
+    input_ids = tokenizer(input_text, return_tensors="pt").input_ids
+    output_ids = model.generate(input_ids, max_new_tokens=max_new_tokens, temperature=temperature)
+    return tokenizer.decode(output_ids[0], skip_special_tokens=True)
+# Gradio Interface
 with gr.Blocks() as demo:
     with gr.Tab("Summarize Legal Text"):
+        summarize_input = gr.Textbox(label="Input Text", placeholder="Enter legal text here...", lines=10)
+        summarize_output = gr.Textbox(label="Summarized Text", lines=10)
+        max_new_tokens_summarize = gr.Slider(10, 512, value=256, step=1, label="Max New Tokens")
+        temperature_summarize = gr.Slider(0.1, 1, value=0.5, step=0.1, label="Temperature")
         summarize_button = gr.Button("Summarize Text")
+        summarize_button.click(
+            summarize_legal_text,
+            inputs=[summarize_input, max_new_tokens_summarize, temperature_summarize],
+            outputs=summarize_output,
+        )
+    with gr.Tab("Correct Raw HTR Text"):
+        htr_input = gr.Textbox(label="Input HTR Text", placeholder="Enter HTR text here...", lines=5)
+        htr_output = gr.Textbox(label="Corrected HTR Text", lines=5)
+        max_new_tokens_htr = gr.Slider(10, 512, value=128, step=1, label="Max New Tokens")
+        temperature_htr = gr.Slider(0.1, 1, value=0.7, step=0.1, label="Temperature")
+        htr_button = gr.Button("Correct HTR")
+        htr_button.click(
+            correct_htr_text,
+            inputs=[htr_input, max_new_tokens_htr, temperature_htr],
+            outputs=htr_output,
+        )
     with gr.Tab("Answer Legal Question"):
+        question_input_context = gr.Textbox(label="Context Text", placeholder="Enter legal context...", lines=10)
+        question_input = gr.Textbox(label="Enter your question", placeholder="Enter your question here...", lines=2)
+        question_output = gr.Textbox(label="Answer", lines=5)
+        max_new_tokens_question = gr.Slider(10, 512, value=128, step=1, label="Max New Tokens")
+        temperature_question = gr.Slider(0.1, 1, value=0.7, step=0.1, label="Temperature")
+        question_button = gr.Button("Get Answer")
+        question_button.click(
+            answer_legal_question,
+            inputs=[question_input_context, question_input, max_new_tokens_question, temperature_question],
+            outputs=question_output,
+        )
+demo.launch()