Spaces:

gauravchand11
/

legal

Sleeping

App Files Files Community

gauravchand11 commited on Mar 22

Commit

3257652

verified ·

1 Parent(s): 72a488c

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -271

app.py CHANGED Viewed

@@ -9,6 +9,9 @@ from PIL import Image
 import PyPDF2
 import requests
 import uuid
 # Configuration
 MODEL_NAME = "google/gemma-2b-it"
@@ -19,61 +22,6 @@ CURRENT_TIME = "2025-03-22 21:00:45"
 HF_TOKEN = os.getenv('HF_TOKEN')
 AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
-class Translator:
-    def _init_(self):
-        self.key = AZURE_TRANSLATION_KEY
-        self.region = 'centralindia'
-        self.endpoint = "https://api.cognitive.microsofttranslator.com"
-        if not self.key:
-            raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")
-    def translate_text(self, text, target_language="en"):
-        try:
-            # Split the text into bullet points
-            bullet_points = text.split('\n• ')
-            translated_points = []
-            # Translate each bullet point separately
-            for point in bullet_points:
-                if point.strip():  # Only translate non-empty points
-                    path = '/translate'
-                    constructed_url = self.endpoint + path
-                    params = {
-                        'api-version': '3.0',
-                        'to': target_language
-                    }
-                    headers = {
-                        'Ocp-Apim-Subscription-Key': self.key,
-                        'Ocp-Apim-Subscription-Region': 'centralindia',
-                        'Content-type': 'application/json',
-                        'X-ClientTraceId': str(uuid.uuid4())
-                    }
-                    body = [{
-                        'text': point.strip()
-                    }]
-                    response = requests.post(
-                        constructed_url,
-                        params=params,
-                        headers=headers,
-                        json=body
-                    )
-                    response.raise_for_status()
-                    translation = response.json()[0]["translations"][0]["text"]
-                    translated_points.append(translation)
-            # Reconstruct the bullet-pointed text
-            translated_text = '\n• ' + '\n• '.join(translated_points)
-            return translated_text
-        except Exception as e:
-            return f"Translation error: {str(e)}"
 class TextExtractor:
     @staticmethod
     def extract_text_from_input(input_file):
@@ -99,7 +47,7 @@ class TextExtractor:
         return "Unsupported input type"
 class LegalEaseAssistant:
-    def _init_(self):
         if not HF_TOKEN:
             raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
@@ -117,231 +65,56 @@ class LegalEaseAssistant:
         )
         self.text_extractor = TextExtractor()
-    def format_response(self, text):
-        """Format response as bullet points"""
-        sentences = [s.strip() for s in text.split('.') if s.strip()]
-        bullet_points = ['• ' + s + '.' for s in sentences]
-        return '\n'.join(bullet_points)
-    def generate_response(self, input_file, task_type):
-        text = self.text_extractor.extract_text_from_input(input_file)
-        task_prompts = {
-            "simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:",
-            "summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
-            "key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
-            "risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
-        }
-        prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
-        inputs = self.tokenizer(prompt, return_tensors="pt")
-        outputs = self.model.generate(
-            **inputs,
-            max_new_tokens=300,
-            num_return_sequences=1,
-            do_sample=True,
-            temperature=0.7,
-            top_p=0.9
-        )
-        response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
-        response_parts = response.split(prompt.split("\n\n")[-1])
-        raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
-        return self.format_response(raw_response)
 def create_interface():
     assistant = LegalEaseAssistant()
-    translator = Translator()
-    SUPPORTED_LANGUAGES = {
-        "English": "en",
-        "Hindi": "hi",
-        "Bengali": "bn",
-        "Telugu": "te",
-        "Tamil": "ta",
-        "Marathi": "mr",
-        "Gujarati": "gu",
-        "Kannada": "kn",
-        "Malayalam": "ml",
-        "Punjabi": "pa",
-        "Spanish": "es",
-        "French": "fr",
-        "German": "de",
-        "Chinese (Simplified)": "zh-Hans",
-        "Japanese": "ja"
-    }
-    def process_with_translation(func, *args, target_lang="English"):
-        result = func(*args)
-        if target_lang != "English":
-            result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
-        return result
     with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
-        gr.HTML(f"""
-        <div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
-            <h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">📜 LegalEase</h1>
-            <h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
-            <div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
-                <div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
-                    <span style="font-weight: bold;">User:</span> {CURRENT_USER}
-                </div>
-                <div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
-                    <span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
-                </div>
-            </div>
-        </div>
-        """)
-        language_selector = gr.Dropdown(
-            choices=list(SUPPORTED_LANGUAGES.keys()),
-            value="English",
-            label="Select Output Language",
-            scale=1
-        )
-        with gr.Tabs():
-            with gr.Tab("📝 Simplify Language"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        simplify_input = gr.File(
-                            file_types=['txt', 'pdf', 'image'],
-                            label="📎 Upload Document"
-                        )
-                        gr.HTML("<div style='height: 10px'></div>")
-                        simplify_text_input = gr.Textbox(
-                            label="✍ Or Type/Paste Text",
-                            placeholder="Enter your legal text here...",
-                            lines=4
-                        )
-                        gr.HTML("<div style='height: 10px'></div>")
-                        simplify_btn = gr.Button(
-                            "🔍 Simplify Language",
-                            variant="primary"
-                        )
-                    with gr.Column(scale=1):
-                        simplify_output = gr.Textbox(
-                            label="📋 Simplified Explanation",
-                            lines=12,
-                            show_copy_button=True
-                        )
-                def simplify_handler(file, text, lang):
-                    input_source = file or text
-                    if not input_source:
-                        return "Please provide some text or upload a document to analyze."
-                    return process_with_translation(
-                        assistant.generate_response,
-                        input_source,
-                        "simplify",
-                        target_lang=lang
-                    )
-                simplify_btn.click(
-                    fn=simplify_handler,
-                    inputs=[simplify_input, simplify_text_input, language_selector],
-                    outputs=simplify_output
-                )
-            with gr.Tab("📚 Document Summary"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        summary_input = gr.File(
-                            file_types=['txt', 'pdf', 'image'],
-                            label="📎 Upload Document"
-                        )
-                        gr.HTML("<div style='height: 10px'></div>")
-                        summary_text_input = gr.Textbox(
-                            label="✍ Or Type/Paste Text",
-                            placeholder="Enter your legal document here...",
-                            lines=4
-                        )
-                        gr.HTML("<div style='height: 10px'></div>")
-                        summary_btn = gr.Button(
-                            "📋 Generate Summary",
-                            variant="primary"
-                        )
-                    with gr.Column(scale=1):
-                        summary_output = gr.Textbox(
-                            label="📑 Document Summary",
-                            lines=12,
-                            show_copy_button=True
-                        )
-                def summary_handler(file, text, lang):
-                    input_source = file or text
-                    if not input_source:
-                        return "Please provide some text or upload a document to summarize."
-                    return process_with_translation(
-                        assistant.generate_response,
-                        input_source,
-                        "summary",
-                        target_lang=lang
-                    )
-                summary_btn.click(
-                    fn=summary_handler,
-                    inputs=[summary_input, summary_text_input, language_selector],
-                    outputs=summary_output
-                )
-            with gr.Tab("⚠ Risk Analysis"):
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        risk_input = gr.File(
-                            file_types=['txt', 'pdf', 'image'],
-                            label="📎 Upload Document"
-                        )
-                        gr.HTML("<div style='height: 10px'></div>")
-                        risk_text_input = gr.Textbox(
-                            label="✍ Or Type/Paste Text",
-                            placeholder="Enter your legal document here...",
-                            lines=4
-                        )
-                        gr.HTML("<div style='height: 10px'></div>")
-                        risk_btn = gr.Button(
-                            "🔍 Analyze Risks",
-                            variant="primary"
-                        )
-                    with gr.Column(scale=1):
-                        risk_output = gr.Textbox(
-                            label="⚠ Risk Assessment",
-                            lines=12,
-                            show_copy_button=True
-                        )
-                def risk_handler(file, text, lang):
-                    input_source = file or text
-                    if not input_source:
-                        return "Please provide some text or upload a document to analyze risks."
-                    return process_with_translation(
-                        assistant.generate_response,
-                        input_source,
-                        "risk",
-                        target_lang=lang
-                    )
-                risk_btn.click(
-                    fn=risk_handler,
-                    inputs=[risk_input, risk_text_input, language_selector],
-                    outputs=risk_output
-                )
-        gr.HTML(f"""
-        <div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #f0f2f6; border-radius: 10px;">
-            <p style="color: #576574; margin: 0;">Powered by Gemma 2B and Azure Translator</p>
-            <p style="color: #576574; margin: 5px 0 0 0; font-size: 0.9em;">Built for Language Translation Hackathon</p>
-        </div>
-        """)
     return demo
 demo = create_interface()
 if __name__ == "__main__":
-    demo.launch()

 import PyPDF2
 import requests
 import uuid
+from collections import Counter
+import re
+from difflib import unified_diff
 # Configuration
 MODEL_NAME = "google/gemma-2b-it"
 HF_TOKEN = os.getenv('HF_TOKEN')
 AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
 class TextExtractor:
     @staticmethod
     def extract_text_from_input(input_file):
         return "Unsupported input type"
 class LegalEaseAssistant:
+    def __init__(self):
         if not HF_TOKEN:
             raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
         )
         self.text_extractor = TextExtractor()
+    def generate_keywords(self, text):
+        words = re.findall(r'\b[A-Za-z]{5,}\b', text)
+        word_freq = Counter(words)
+        return ', '.join([word for word, _ in word_freq.most_common(10)])
+    def compare_contracts(self, text1, text2):
+        text1_lines = text1.split('\n')
+        text2_lines = text2.split('\n')
+        diff = '\n'.join(unified_diff(text1_lines, text2_lines, lineterm=''))
+        return diff if diff else "The documents are identical."
 def create_interface():
     assistant = LegalEaseAssistant()
     with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
+        gr.Markdown("## 📜 LegalEase: AI-Powered Legal Document Assistant")
+        with gr.Tab("🔑 Keyword Extraction"):
+            keyword_input = gr.File(file_types=['txt', 'pdf', 'image'], label="Upload Document")
+            keyword_text_input = gr.Textbox(label="Or Enter Text", placeholder="Enter legal text here...", lines=4)
+            keyword_output = gr.Textbox(label="Extracted Keywords", lines=2, show_copy_button=True)
+            keyword_btn = gr.Button("Extract Keywords")
+            def keyword_handler(file, text):
+                input_source = file or text
+                if not input_source:
+                    return "Please provide some text or upload a document."
+                text = assistant.text_extractor.extract_text_from_input(input_source)
+                return assistant.generate_keywords(text)
+            keyword_btn.click(fn=keyword_handler, inputs=[keyword_input, keyword_text_input], outputs=keyword_output)
+        with gr.Tab("⚖ Contract Comparison"):
+            contract_input1 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 1")
+            contract_input2 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 2")
+            comparison_output = gr.Textbox(label="Comparison Result", lines=12, show_copy_button=True)
+            compare_btn = gr.Button("Compare Contracts")
+            def compare_handler(file1, file2):
+                if not file1 or not file2:
+                    return "Please upload two contracts for comparison."
+                text1 = assistant.text_extractor.extract_text_from_input(file1)
+                text2 = assistant.text_extractor.extract_text_from_input(file2)
+                return assistant.compare_contracts(text1, text2)
+            compare_btn.click(fn=compare_handler, inputs=[contract_input1, contract_input2], outputs=comparison_output)
     return demo
 demo = create_interface()
 if __name__ == "__main__":
+    demo.launch()