Spaces:

gauravchand11
/

legal

Sleeping

App Files Files Community

gauravchand11 commited on Mar 23

Commit

c26b78e

verified ·

1 Parent(s): 2f7d824

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -58

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ HF_TOKEN = os.getenv('HF_TOKEN')
 AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
 class Translator:
-    def __init__(self):
         self.key = AZURE_TRANSLATION_KEY
         self.region = 'centralindia'
         self.endpoint = "https://api.cognitive.microsofttranslator.com"
@@ -47,7 +47,7 @@ class Translator:
                     headers = {
                         'Ocp-Apim-Subscription-Key': self.key,
-                        'Ocp-Apim-Subscription-Region': self.region,
                         'Content-type': 'application/json',
                         'X-ClientTraceId': str(uuid.uuid4())
                     }
@@ -79,50 +79,27 @@ class TextExtractor:
     def extract_text_from_input(input_file):
         if isinstance(input_file, str):
             return input_file
-        # Handle file uploads from gradio
-        if hasattr(input_file, 'name'):
-            file_path = input_file.name
-            file_ext = os.path.splitext(file_path)[1].lower()
-            # Handle PDF files
-            if file_ext == '.pdf':
-                try:
-                    pdf_reader = PyPDF2.PdfReader(input_file)
-                    text = ""
-                    for page in pdf_reader.pages:
-                        text += page.extract_text() + "\n\n"
-                    return text
-                except Exception as e:
-                    return f"Error extracting text from PDF: {str(e)}"
-            # Handle image files
-            elif file_ext in ['.jpg', '.jpeg', '.png']:
-                try:
-                    img = Image.open(input_file)
-                    return pytesseract.image_to_string(img)
-                except Exception as e:
-                    return f"Error extracting text from image: {str(e)}"
-            # Handle text files
-            elif file_ext == '.txt':
-                try:
-                    with open(file_path, 'r', encoding='utf-8') as f:
-                        return f.read()
-                except Exception as e:
-                    return f"Error reading text file: {str(e)}"
-        # Handle PIL Image objects directly
         if isinstance(input_file, Image.Image):
             try:
                 return pytesseract.image_to_string(input_file)
             except Exception as e:
                 return f"Error extracting text from image: {str(e)}"
-        return "Unsupported input type or file format"
 class LegalEaseAssistant:
-    def __init__(self):
         if not HF_TOKEN:
             raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
@@ -158,7 +135,7 @@ class LegalEaseAssistant:
         prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         outputs = self.model.generate(
             **inputs,
             max_new_tokens=300,
@@ -230,8 +207,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         simplify_input = gr.File(
-                            # Don't specify file_types to allow any file upload
-                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         simplify_text_input = gr.Textbox(
@@ -273,8 +250,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         summary_input = gr.File(
-                            # Don't specify file_types to allow any file upload
-                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         summary_text_input = gr.Textbox(
@@ -317,8 +294,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         terms_input = gr.File(
-                            # Don't specify file_types to allow any file upload
-                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         terms_text_input = gr.Textbox(
@@ -361,8 +338,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         contract1_input = gr.File(
-                            # Don't specify file_types to allow any file upload
-                            label="📎 Upload First Contract (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract1_text = gr.Textbox(
@@ -373,8 +350,8 @@ def create_interface():
                     with gr.Column(scale=1):
                         contract2_input = gr.File(
-                            # Don't specify file_types to allow any file upload
-                            label="📎 Upload Second Contract (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract2_text = gr.Textbox(
@@ -401,25 +378,22 @@ def create_interface():
                     if not contract1 or not contract2:
                         return "Please provide both contracts for comparison."
-                    # Extract text if needed
-                    if not isinstance(contract1, str):
-                        contract1 = assistant.text_extractor.extract_text_from_input(contract1)
-                    if not isinstance(contract2, str):
-                        contract2 = assistant.text_extractor.extract_text_from_input(contract2)
                     def compare_contracts(contract1, contract2):
                         prompt = f"""Compare these two contracts and identify key differences and similarities:
 Contract 1:
 {contract1}
 Contract 2:
 {contract2}
 Please analyze and list:
 1. Key similarities
 2. Important differences
 3. Unique terms in each contract
 4. Potential implications of the differences"""
-                        inputs = assistant.tokenizer(prompt, return_tensors="pt").to(assistant.model.device)
                         outputs = assistant.model.generate(
                             **inputs,
                             max_new_tokens=400,
@@ -453,8 +427,8 @@ Please analyze and list:
                 with gr.Row():
                     with gr.Column(scale=1):
                         risk_input = gr.File(
-                            # Don't specify file_types to allow any file upload
-                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         risk_text_input = gr.Textbox(
@@ -503,5 +477,5 @@ Please analyze and list:
 demo = create_interface()
-if __name__ == "__main__":
     demo.launch()

 AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
 class Translator:
+    def init(self):
         self.key = AZURE_TRANSLATION_KEY
         self.region = 'centralindia'
         self.endpoint = "https://api.cognitive.microsofttranslator.com"
                     headers = {
                         'Ocp-Apim-Subscription-Key': self.key,
+                        'Ocp-Apim-Subscription-Region': 'centralindia',
                         'Content-type': 'application/json',
                         'X-ClientTraceId': str(uuid.uuid4())
                     }
     def extract_text_from_input(input_file):
         if isinstance(input_file, str):
             return input_file
         if isinstance(input_file, Image.Image):
             try:
                 return pytesseract.image_to_string(input_file)
             except Exception as e:
                 return f"Error extracting text from image: {str(e)}"
+        if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
+            try:
+                pdf_reader = PyPDF2.PdfReader(input_file)
+                text = ""
+                for page in pdf_reader.pages:
+                    text += page.extract_text() + "\n\n"
+                return text
+            except Exception as e:
+                return f"Error extracting text from PDF: {str(e)}"
+        return "Unsupported input type"
 class LegalEaseAssistant:
+    def init(self):
         if not HF_TOKEN:
             raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
         prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
+        inputs = self.tokenizer(prompt, return_tensors="pt")
         outputs = self.model.generate(
             **inputs,
             max_new_tokens=300,
                 with gr.Row():
                     with gr.Column(scale=1):
                         simplify_input = gr.File(
+                            file_types=['txt', 'pdf', 'image'],
+                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         simplify_text_input = gr.Textbox(
                 with gr.Row():
                     with gr.Column(scale=1):
                         summary_input = gr.File(
+                            file_types=['txt', 'pdf', 'image'],
+                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         summary_text_input = gr.Textbox(
                 with gr.Row():
                     with gr.Column(scale=1):
                         terms_input = gr.File(
+                            file_types=['txt', 'pdf', 'image'],
+                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         terms_text_input = gr.Textbox(
                 with gr.Row():
                     with gr.Column(scale=1):
                         contract1_input = gr.File(
+                            file_types=['txt', 'pdf', 'image'],
+                            label="📎 Upload First Contract"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract1_text = gr.Textbox(
                     with gr.Column(scale=1):
                         contract2_input = gr.File(
+                            file_types=['txt', 'pdf', 'image'],
+                            label="📎 Upload Second Contract"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract2_text = gr.Textbox(
                     if not contract1 or not contract2:
                         return "Please provide both contracts for comparison."
                     def compare_contracts(contract1, contract2):
                         prompt = f"""Compare these two contracts and identify key differences and similarities:
 Contract 1:
 {contract1}
 Contract 2:
 {contract2}
 Please analyze and list:
 1. Key similarities
 2. Important differences
 3. Unique terms in each contract
 4. Potential implications of the differences"""
+                        inputs = assistant.tokenizer(prompt, return_tensors="pt")
                         outputs = assistant.model.generate(
                             **inputs,
                             max_new_tokens=400,
                 with gr.Row():
                     with gr.Column(scale=1):
                         risk_input = gr.File(
+                            file_types=['txt', 'pdf', 'image'],
+                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         risk_text_input = gr.Textbox(
 demo = create_interface()
+if _name_ == "_main_":
     demo.launch()