legal / app.py
gauravchand11's picture
Update app.py
2a8dc4e verified
import gradio as gr
import os
from datetime import datetime
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import pytesseract
from PIL import Image
import fitz # PyMuPDF
import requests
import uuid
# Configuration
MODEL_NAME = "google/gemma-2b-it"
CURRENT_USER = "AkarshanGupta"
CURRENT_TIME = "2025-03-23 03:33:01"
# API Keys
HF_TOKEN = os.getenv('HF_TOKEN')
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
LLAMA_API_KEY = os.getenv('LLAMA_API_KEY')
LLAMA_API_ENDPOINT = "https://api.llama.ai/v1/generate"
class TextExtractor:
@staticmethod
def extract_text_from_input(input_file):
if isinstance(input_file, str):
return input_file
if isinstance(input_file, Image.Image):
try:
return pytesseract.image_to_string(input_file)
except Exception as e:
return f"Error extracting text from image: {str(e)}"
if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
try:
doc = fitz.open(stream=input_file.read(), filetype="pdf")
text = ""
for page in doc:
text += page.get_text() + "\n\n"
doc.close()
return text
except Exception as e:
return f"Error extracting text from PDF: {str(e)}"
return "Unsupported input type"
class Translator:
def _init_(self):
self.key = AZURE_TRANSLATION_KEY
self.region = 'centralindia'
self.endpoint = "https://api.cognitive.microsofttranslator.com"
if not self.key:
raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")
def translate_text(self, text, target_language="en"):
try:
bullet_points = text.split('\nβ€’ ')
translated_points = []
for point in bullet_points:
if point.strip():
path = '/translate'
constructed_url = self.endpoint + path
params = {
'api-version': '3.0',
'to': target_language
}
headers = {
'Ocp-Apim-Subscription-Key': self.key,
'Ocp-Apim-Subscription-Region': self.region,
'Content-type': 'application/json',
'X-ClientTraceId': str(uuid.uuid4())
}
body = [{
'text': point.strip()
}]
response = requests.post(
constructed_url,
params=params,
headers=headers,
json=body
)
response.raise_for_status()
translation = response.json()[0]["translations"][0]["text"]
translated_points.append(translation)
translated_text = '\nβ€’ ' + '\nβ€’ '.join(translated_points)
return translated_text
except Exception as e:
return f"Translation error: {str(e)}"
class LegalEaseAssistant:
def _init_(self):
if not HF_TOKEN:
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
login(token=HF_TOKEN)
# Initialize text_extractor first
self.text_extractor = TextExtractor()
self.tokenizer = AutoTokenizer.from_pretrained(
MODEL_NAME,
token=HF_TOKEN
)
self.model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
token=HF_TOKEN,
device_map="auto",
torch_dtype=torch.float32
)
def format_response(self, text):
sentences = [s.strip() for s in text.split('.') if s.strip()]
bullet_points = ['β€’ ' + s + '.' for s in sentences]
return '\n'.join(bullet_points)
def generate_response(self, input_file, task_type):
text = self.text_extractor.extract_text_from_input(input_file)
task_prompts = {
"simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:",
"summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
"key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
"risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
}
prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
inputs = self.tokenizer(prompt, return_tensors="pt")
outputs = self.model.generate(
**inputs,
max_new_tokens=300,
num_return_sequences=1,
do_sample=True,
temperature=0.7,
top_p=0.9
)
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
response_parts = response.split(prompt.split("\n\n")[-1])
raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
return self.format_response(raw_response)
def generate_chatbot_response(self, user_input):
if not LLAMA_API_KEY:
return "LLaMA API key not found. Please set the LLAMA_API_KEY environment variable."
response = requests.post(
LLAMA_API_ENDPOINT,
headers={"Authorization": f"Bearer {LLAMA_API_KEY}"},
json={"prompt": user_input, "max_tokens": 150}
)
if response.status_code == 401:
return "Unauthorized: Please check your LLaMA API key."
elif response.status_code != 200:
return f"Error: Received {response.status_code} status code from LLaMA API."
return response.json()["choices"][0]["text"].strip()
def create_interface():
assistant = LegalEaseAssistant()
translator = Translator()
SUPPORTED_LANGUAGES = {
"English": "en",
"Hindi": "hi",
"Bengali": "bn",
"Telugu": "te",
"Tamil": "ta",
"Marathi": "mr",
"Gujarati": "gu",
"Kannada": "kn",
"Malayalam": "ml",
"Punjabi": "pa",
"Spanish": "es",
"French": "fr",
"German": "de",
"Chinese (Simplified)": "zh-Hans",
"Japanese": "ja"
}
def process_with_translation(func, *args, target_lang="English"):
result = func(*args)
if target_lang != "English":
result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
return result
with gr.Blocks(title="LegalEase", css="""
.gradio-container {max-width: 1200px; margin: auto;}
.header {text-align: center; margin-bottom: 2rem;}
.content {padding: 2rem;}
""") as demo:
gr.HTML(f"""
<div style="text-align: center; background-color: #e0e0e0; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
<h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">πŸ“œ LegalEase</h1>
<h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
<div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
<div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
<span style="font-weight: bold;">User:</span> {CURRENT_USER}
</div>
<div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
<span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
</div>
</div>
</div>
""")
language_selector = gr.Dropdown(
choices=list(SUPPORTED_LANGUAGES.keys()),
value="English",
label="Select Output Language",
scale=1
)
with gr.Tabs():
# Simplify Language Tab
with gr.Tab("πŸ“ Simplify Language"):
with gr.Row():
with gr.Column(scale=1):
simplify_input = gr.File(
file_types=['txt', 'pdf', 'image'],
label="πŸ“Ž Upload Document"
)
gr.HTML("<div style='height: 10px'></div>")
simplify_text_input = gr.Textbox(
label="✍ Or Type/Paste Text",
placeholder="Enter your legal text here...",
lines=4
)
gr.HTML("<div style='height: 10px'></div>")
simplify_btn = gr.Button(
"πŸ” Simplify Language",
variant="primary"
)
with gr.Column(scale=1):
simplify_output = gr.Textbox(
label="πŸ“‹ Simplified Explanation",
lines=12,
show_copy_button=True
)
def simplify_handler(file, text, lang):
input_source = file or text
if not input_source:
return "Please provide some text or upload a document to analyze."
return process_with_translation(
assistant.generate_response,
input_source,
"simplify",
target_lang=lang
)
simplify_btn.click(
fn=simplify_handler,
inputs=[simplify_input, simplify_text_input, language_selector],
outputs=simplify_output
)
# Document Summary Tab
with gr.Tab("πŸ“š Document Summary"):
with gr.Row():
with gr.Column(scale=1):
summary_input = gr.File(
file_types=['txt', 'pdf', 'image'],
label="πŸ“Ž Upload Document"
)
gr.HTML("<div style='height: 10px'></div>")
summary_text_input = gr.Textbox(
label="✍ Or Type/Paste Text",
placeholder="Enter your legal document here...",
lines=4
)
gr.HTML("<div style='height: 10px'></div>")
summary_btn = gr.Button(
"πŸ“‹ Generate Summary",
variant="primary"
)
with gr.Column(scale=1):
summary_output = gr.Textbox(
label="πŸ“‘ Document Summary",
lines=12,
show_copy_button=True
)
def summary_handler(file, text, lang):
input_source = file or text
if not input_source:
return "Please provide some text or upload a document to summarize."
return process_with_translation(
assistant.generate_response,
input_source,
"summary",
target_lang=lang
)
summary_btn.click(
fn=summary_handler,
inputs=[summary_input, summary_text_input, language_selector],
outputs=summary_output
)
# Key Terms Tab
with gr.Tab("πŸ”‘ Key Terms"):
with gr.Row():
with gr.Column(scale=1):
terms_input = gr.File(
file_types=['txt', 'pdf', 'image'],
label="πŸ“Ž Upload Document"
)
gr.HTML("<div style='height: 10px'></div>")
terms_text_input = gr.Textbox(
label="✍ Or Type/Paste Text",
placeholder="Enter your legal document here...",
lines=4
)
gr.HTML("<div style='height: 10px'></div>")
terms_btn = gr.Button(
"πŸ” Extract Key Terms",
variant="primary"
)
with gr.Column(scale=1):
terms_output = gr.Textbox(
label="πŸ”‘ Key Terms & Definitions",
lines=12,
show_copy_button=True
)
def terms_handler(file, text, lang):
input_source = file or text
if not input_source:
return "Please provide some text or upload a document to analyze key terms."
return process_with_translation(
assistant.generate_response,
input_source,
"key_terms",
target_lang=lang
)
terms_btn.click(
fn=terms_handler,
inputs=[terms_input, terms_text_input, language_selector],
outputs=terms_output
)
# Risk Analysis Tab
with gr.Tab("⚠ Risk Analysis"):
with gr.Row():
with gr.Column(scale=1):
risk_input = gr.File(
file_types=['txt', 'pdf', 'image'],
label="πŸ“Ž Upload Document"
)
gr.HTML("<div style='height: 10px'></div>")
risk_text_input = gr.Textbox(
label="✍ Or Type/Paste Text",
placeholder="Enter your legal document here...",
lines=4
)
gr.HTML("<div style='height: 10px'></div>")
risk_btn = gr.Button(
"πŸ” Analyze Risks",
variant="primary"
)
with gr.Column(scale=1):
risk_output = gr.Textbox(
label="⚠ Risk Assessment",
lines=12,
show_copy_button=True
)
def risk_handler(file, text, lang):
input_source = file or text
if not input_source:
return "Please provide some text or upload a document to analyze risks."
return process_with_translation(
assistant.generate_response,
input_source,
"risk",
target_lang=lang
)
risk_btn.click(
fn=risk_handler,
inputs=[risk_input, risk_text_input, language_selector],
outputs=risk_output
)
# Legal Assistant Chat Tab
with gr.Tab("πŸ€– Legal Assistant Chat"):
chatbot_input = gr.Textbox(
label="πŸ’¬ Your Message",
placeholder="Ask me anything about legal matters...",
lines=2
)
chatbot_output = gr.Textbox(
label="πŸ€– Assistant Response",
lines=10,
show_copy_button=True
)
chatbot_btn = gr.Button(
"πŸ’¬ Send Message",
variant="primary"
)
def chatbot_handler(user_input, lang):
if not user_input:
return "Please type a message to start the conversation."
response = assistant.generate_chatbot_response(user_input)
if lang != "English":
response = translator.translate_text(response, SUPPORTED_LANGUAGES[lang])
return response
chatbot_btn.click(
fn=chatbot_handler,
inputs=[chatbot_input, language_selector],
outputs=chatbot_output
)
gr.HTML(f"""
<div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #e0e0e0; border-radius: 10px;">
<p style="color: #576574; margin: 0;">Made by Team Ice Age</p>
</div>
""")
return demo
def main():
demo = create_interface()
demo.queue()
demo.launch(share=True)
if __name__ == "__main__":
main()