Spaces:

gauravchand11
/

legal

Sleeping

File size: 18,174 Bytes

import gradio as gr
import os
from datetime import datetime
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from huggingface_hub import login
import pytesseract
from PIL import Image
import fitz  # PyMuPDF
import requests
import uuid

# Configuration
MODEL_NAME = "google/gemma-2b-it"
CURRENT_USER = "AkarshanGupta"
CURRENT_TIME = "2025-03-23 03:33:01"

# API Keys
HF_TOKEN = os.getenv('HF_TOKEN')
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
LLAMA_API_KEY = os.getenv('LLAMA_API_KEY')
LLAMA_API_ENDPOINT = "https://api.llama.ai/v1/generate"

class TextExtractor:
    @staticmethod
    def extract_text_from_input(input_file):
        if isinstance(input_file, str):
            return input_file
        
        if isinstance(input_file, Image.Image):
            try:
                return pytesseract.image_to_string(input_file)
            except Exception as e:
                return f"Error extracting text from image: {str(e)}"
        
        if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
            try:
                doc = fitz.open(stream=input_file.read(), filetype="pdf")
                text = ""
                for page in doc:
                    text += page.get_text() + "\n\n"
                doc.close()
                return text
            except Exception as e:
                return f"Error extracting text from PDF: {str(e)}"
        
        return "Unsupported input type"

class Translator:
    def _init_(self):
        self.key = AZURE_TRANSLATION_KEY
        self.region = 'centralindia'
        self.endpoint = "https://api.cognitive.microsofttranslator.com"
        
        if not self.key:
            raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")

    def translate_text(self, text, target_language="en"):
        try:
            bullet_points = text.split('\n• ')
            translated_points = []
            
            for point in bullet_points:
                if point.strip():
                    path = '/translate'
                    constructed_url = self.endpoint + path
                    
                    params = {
                        'api-version': '3.0',
                        'to': target_language
                    }
                    
                    headers = {
                        'Ocp-Apim-Subscription-Key': self.key,
                        'Ocp-Apim-Subscription-Region': self.region,
                        'Content-type': 'application/json',
                        'X-ClientTraceId': str(uuid.uuid4())
                    }
                    
                    body = [{
                        'text': point.strip()
                    }]
                    
                    response = requests.post(
                        constructed_url,
                        params=params,
                        headers=headers,
                        json=body
                    )
                    response.raise_for_status()
                    
                    translation = response.json()[0]["translations"][0]["text"]
                    translated_points.append(translation)
            
            translated_text = '\n• ' + '\n• '.join(translated_points)
            return translated_text
            
        except Exception as e:
            return f"Translation error: {str(e)}"

class LegalEaseAssistant:
    def _init_(self):
        if not HF_TOKEN:
            raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
        
        login(token=HF_TOKEN)
        
        # Initialize text_extractor first
        self.text_extractor = TextExtractor()
        
        self.tokenizer = AutoTokenizer.from_pretrained(
            MODEL_NAME, 
            token=HF_TOKEN
        )
        self.model = AutoModelForCausalLM.from_pretrained(
            MODEL_NAME, 
            token=HF_TOKEN,
            device_map="auto",
            torch_dtype=torch.float32
        )
    
    def format_response(self, text):
        sentences = [s.strip() for s in text.split('.') if s.strip()]
        bullet_points = ['• ' + s + '.' for s in sentences]
        return '\n'.join(bullet_points)
    
    def generate_response(self, input_file, task_type):
        text = self.text_extractor.extract_text_from_input(input_file)
        
        task_prompts = {
            "simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:",
            "summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
            "key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
            "risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
        }
        
        prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
        
        inputs = self.tokenizer(prompt, return_tensors="pt")
        outputs = self.model.generate(
            **inputs, 
            max_new_tokens=300,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9
        )
        
        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        response_parts = response.split(prompt.split("\n\n")[-1])
        raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
        
        return self.format_response(raw_response)
    
    def generate_chatbot_response(self, user_input):
        if not LLAMA_API_KEY:
            return "LLaMA API key not found. Please set the LLAMA_API_KEY environment variable."
        
        response = requests.post(
            LLAMA_API_ENDPOINT,
            headers={"Authorization": f"Bearer {LLAMA_API_KEY}"},
            json={"prompt": user_input, "max_tokens": 150}
        )
        
        if response.status_code == 401:
            return "Unauthorized: Please check your LLaMA API key."
        elif response.status_code != 200:
            return f"Error: Received {response.status_code} status code from LLaMA API."
        
        return response.json()["choices"][0]["text"].strip()

def create_interface():
    assistant = LegalEaseAssistant()
    translator = Translator()
    
    SUPPORTED_LANGUAGES = {
        "English": "en",
        "Hindi": "hi",
        "Bengali": "bn",
        "Telugu": "te",
        "Tamil": "ta",
        "Marathi": "mr",
        "Gujarati": "gu",
        "Kannada": "kn",
        "Malayalam": "ml",
        "Punjabi": "pa",
        "Spanish": "es",
        "French": "fr",
        "German": "de",
        "Chinese (Simplified)": "zh-Hans",
        "Japanese": "ja"
    }
    
    def process_with_translation(func, *args, target_lang="English"):
        result = func(*args)
        if target_lang != "English":
            result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
        return result

    with gr.Blocks(title="LegalEase", css="""
        .gradio-container {max-width: 1200px; margin: auto;}
        .header {text-align: center; margin-bottom: 2rem;}
        .content {padding: 2rem;}
    """) as demo:
        gr.HTML(f"""
        <div style="text-align: center; background-color: #e0e0e0; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
            <h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">📜 LegalEase</h1>
            <h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
            <div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
                <div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                    <span style="font-weight: bold;">User:</span> {CURRENT_USER}
                </div>
                <div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
                    <span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
                </div>
            </div>
        </div>
        """)
        
        language_selector = gr.Dropdown(
            choices=list(SUPPORTED_LANGUAGES.keys()),
            value="English",
            label="Select Output Language",
            scale=1
        )
        
        with gr.Tabs():
            # Simplify Language Tab
            with gr.Tab("📝 Simplify Language"):
                with gr.Row():
                    with gr.Column(scale=1):
                        simplify_input = gr.File(
                            file_types=['txt', 'pdf', 'image'], 
                            label="📎 Upload Document"
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        simplify_text_input = gr.Textbox(
                            label="✍ Or Type/Paste Text",
                            placeholder="Enter your legal text here...",
                            lines=4
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        simplify_btn = gr.Button(
                            "🔍 Simplify Language",
                            variant="primary"
                        )
                    
                    with gr.Column(scale=1):
                        simplify_output = gr.Textbox(
                            label="📋 Simplified Explanation",
                            lines=12,
                            show_copy_button=True
                        )
                
                def simplify_handler(file, text, lang):
                    input_source = file or text
                    if not input_source:
                        return "Please provide some text or upload a document to analyze."
                    return process_with_translation(
                        assistant.generate_response,
                        input_source,
                        "simplify",
                        target_lang=lang
                    )
                
                simplify_btn.click(
                    fn=simplify_handler,
                    inputs=[simplify_input, simplify_text_input, language_selector],
                    outputs=simplify_output
                )

            # Document Summary Tab
            with gr.Tab("📚 Document Summary"):
                with gr.Row():
                    with gr.Column(scale=1):
                        summary_input = gr.File(
                            file_types=['txt', 'pdf', 'image'], 
                            label="📎 Upload Document"
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        summary_text_input = gr.Textbox(
                            label="✍ Or Type/Paste Text",
                            placeholder="Enter your legal document here...",
                            lines=4
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        summary_btn = gr.Button(
                            "📋 Generate Summary",
                            variant="primary"
                        )
                    
                    with gr.Column(scale=1):
                        summary_output = gr.Textbox(
                            label="📑 Document Summary",
                            lines=12,
                            show_copy_button=True
                        )
                
                def summary_handler(file, text, lang):
                    input_source = file or text
                    if not input_source:
                        return "Please provide some text or upload a document to summarize."
                    return process_with_translation(
                        assistant.generate_response,
                        input_source,
                        "summary",
                        target_lang=lang
                    )
                
                summary_btn.click(
                    fn=summary_handler,
                    inputs=[summary_input, summary_text_input, language_selector],
                    outputs=summary_output
                )

            # Key Terms Tab
            with gr.Tab("🔑 Key Terms"):
                with gr.Row():
                    with gr.Column(scale=1):
                        terms_input = gr.File(
                            file_types=['txt', 'pdf', 'image'], 
                            label="📎 Upload Document"
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        terms_text_input = gr.Textbox(
                            label="✍ Or Type/Paste Text",
                            placeholder="Enter your legal document here...",
                            lines=4
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        terms_btn = gr.Button(
                            "🔍 Extract Key Terms",
                            variant="primary"
                        )
                    
                    with gr.Column(scale=1):
                        terms_output = gr.Textbox(
                            label="🔑 Key Terms & Definitions",
                            lines=12,
                            show_copy_button=True
                        )
                
                def terms_handler(file, text, lang):
                    input_source = file or text
                    if not input_source:
                        return "Please provide some text or upload a document to analyze key terms."
                    return process_with_translation(
                        assistant.generate_response,
                        input_source,
                        "key_terms",
                        target_lang=lang
                    )
                
                terms_btn.click(
                    fn=terms_handler,
                    inputs=[terms_input, terms_text_input, language_selector],
                    outputs=terms_output
                )

            # Risk Analysis Tab
            with gr.Tab("⚠ Risk Analysis"):
                with gr.Row():
                    with gr.Column(scale=1):
                        risk_input = gr.File(
                            file_types=['txt', 'pdf', 'image'], 
                            label="📎 Upload Document"
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        risk_text_input = gr.Textbox(
                            label="✍ Or Type/Paste Text",
                            placeholder="Enter your legal document here...",
                            lines=4
                        )
                        gr.HTML("<div style='height: 10px'></div>")
                        risk_btn = gr.Button(
                            "🔍 Analyze Risks",
                            variant="primary"
                        )
                    
                    with gr.Column(scale=1):
                        risk_output = gr.Textbox(
                            label="⚠ Risk Assessment",
                            lines=12,
                            show_copy_button=True
                        )
                
                def risk_handler(file, text, lang):
                    input_source = file or text
                    if not input_source:
                        return "Please provide some text or upload a document to analyze risks."
                    return process_with_translation(
                        assistant.generate_response,
                        input_source,
                        "risk",
                        target_lang=lang
                    )
                
                risk_btn.click(
                    fn=risk_handler,
                    inputs=[risk_input, risk_text_input, language_selector],
                    outputs=risk_output
                )

            # Legal Assistant Chat Tab
            with gr.Tab("🤖 Legal Assistant Chat"):
                chatbot_input = gr.Textbox(
                    label="💬 Your Message",
                    placeholder="Ask me anything about legal matters...",
                    lines=2
                )
                chatbot_output = gr.Textbox(
                    label="🤖 Assistant Response",
                    lines=10,
                    show_copy_button=True
                )
                chatbot_btn = gr.Button(
                    "💬 Send Message",
                    variant="primary"
                )

                def chatbot_handler(user_input, lang):
                    if not user_input:
                        return "Please type a message to start the conversation."
                    response = assistant.generate_chatbot_response(user_input)
                    if lang != "English":
                        response = translator.translate_text(response, SUPPORTED_LANGUAGES[lang])
                    return response
                
                chatbot_btn.click(
                    fn=chatbot_handler,
                    inputs=[chatbot_input, language_selector],
                    outputs=chatbot_output
                )

        gr.HTML(f"""
        <div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #e0e0e0; border-radius: 10px;">
            <p style="color: #576574; margin: 0;">Made by Team Ice Age</p>
        </div>
        """)

    return demo

def main():
    demo = create_interface()
    demo.queue()
    demo.launch(share=True)

if __name__ == "__main__":
    main()