Spaces:

gauravchand11
/

legal

Sleeping

App Files Files Community

gauravchand11 commited on Mar 23

Commit

2f7d824

verified ·

1 Parent(s): 0166259

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -27

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ HF_TOKEN = os.getenv('HF_TOKEN')
 AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
 class Translator:
-    def __init__(self):  # Fixed method name from _init_ to __init__
         self.key = AZURE_TRANSLATION_KEY
         self.region = 'centralindia'
         self.endpoint = "https://api.cognitive.microsofttranslator.com"
@@ -79,27 +79,50 @@ class TextExtractor:
     def extract_text_from_input(input_file):
         if isinstance(input_file, str):
             return input_file
         if isinstance(input_file, Image.Image):
             try:
                 return pytesseract.image_to_string(input_file)
             except Exception as e:
                 return f"Error extracting text from image: {str(e)}"
-        if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
-            try:
-                pdf_reader = PyPDF2.PdfReader(input_file)
-                text = ""
-                for page in pdf_reader.pages:
-                    text += page.extract_text() + "\n\n"
-                return text
-            except Exception as e:
-                return f"Error extracting text from PDF: {str(e)}"
-        return "Unsupported input type"
 class LegalEaseAssistant:
-    def __init__(self):  # Fixed method name from _init_ to __init__
         if not HF_TOKEN:
             raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
@@ -135,7 +158,7 @@ class LegalEaseAssistant:
         prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)  # Add to(device) to ensure tensor is on the right device
         outputs = self.model.generate(
             **inputs,
             max_new_tokens=300,
@@ -207,8 +230,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         simplify_input = gr.File(
-                            file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'],  # Added image file types explicitly
-                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         simplify_text_input = gr.Textbox(
@@ -250,8 +273,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         summary_input = gr.File(
-                            file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'],  # Added image file types explicitly
-                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         summary_text_input = gr.Textbox(
@@ -294,8 +317,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         terms_input = gr.File(
-                            file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'],  # Added image file types explicitly
-                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         terms_text_input = gr.Textbox(
@@ -338,8 +361,8 @@ def create_interface():
                 with gr.Row():
                     with gr.Column(scale=1):
                         contract1_input = gr.File(
-                            file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'],  # Added image file types explicitly
-                            label="📎 Upload First Contract"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract1_text = gr.Textbox(
@@ -350,8 +373,8 @@ def create_interface():
                     with gr.Column(scale=1):
                         contract2_input = gr.File(
-                            file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'],  # Added image file types explicitly
-                            label="📎 Upload Second Contract"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract2_text = gr.Textbox(
@@ -396,7 +419,7 @@ Please analyze and list:
 3. Unique terms in each contract
 4. Potential implications of the differences"""
-                        inputs = assistant.tokenizer(prompt, return_tensors="pt").to(assistant.model.device)  # Add to(device) to ensure tensor is on the right device
                         outputs = assistant.model.generate(
                             **inputs,
                             max_new_tokens=400,
@@ -430,8 +453,8 @@ Please analyze and list:
                 with gr.Row():
                     with gr.Column(scale=1):
                         risk_input = gr.File(
-                            file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'],  # Added image file types explicitly
-                            label="📎 Upload Document"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         risk_text_input = gr.Textbox(

 AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
 class Translator:
+    def __init__(self):
         self.key = AZURE_TRANSLATION_KEY
         self.region = 'centralindia'
         self.endpoint = "https://api.cognitive.microsofttranslator.com"
     def extract_text_from_input(input_file):
         if isinstance(input_file, str):
             return input_file
+        # Handle file uploads from gradio
+        if hasattr(input_file, 'name'):
+            file_path = input_file.name
+            file_ext = os.path.splitext(file_path)[1].lower()
+            # Handle PDF files
+            if file_ext == '.pdf':
+                try:
+                    pdf_reader = PyPDF2.PdfReader(input_file)
+                    text = ""
+                    for page in pdf_reader.pages:
+                        text += page.extract_text() + "\n\n"
+                    return text
+                except Exception as e:
+                    return f"Error extracting text from PDF: {str(e)}"
+            # Handle image files
+            elif file_ext in ['.jpg', '.jpeg', '.png']:
+                try:
+                    img = Image.open(input_file)
+                    return pytesseract.image_to_string(img)
+                except Exception as e:
+                    return f"Error extracting text from image: {str(e)}"
+            # Handle text files
+            elif file_ext == '.txt':
+                try:
+                    with open(file_path, 'r', encoding='utf-8') as f:
+                        return f.read()
+                except Exception as e:
+                    return f"Error reading text file: {str(e)}"
+        # Handle PIL Image objects directly
         if isinstance(input_file, Image.Image):
             try:
                 return pytesseract.image_to_string(input_file)
             except Exception as e:
                 return f"Error extracting text from image: {str(e)}"
+        return "Unsupported input type or file format"
 class LegalEaseAssistant:
+    def __init__(self):
         if not HF_TOKEN:
             raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
         prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         outputs = self.model.generate(
             **inputs,
             max_new_tokens=300,
                 with gr.Row():
                     with gr.Column(scale=1):
                         simplify_input = gr.File(
+                            # Don't specify file_types to allow any file upload
+                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         simplify_text_input = gr.Textbox(
                 with gr.Row():
                     with gr.Column(scale=1):
                         summary_input = gr.File(
+                            # Don't specify file_types to allow any file upload
+                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         summary_text_input = gr.Textbox(
                 with gr.Row():
                     with gr.Column(scale=1):
                         terms_input = gr.File(
+                            # Don't specify file_types to allow any file upload
+                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         terms_text_input = gr.Textbox(
                 with gr.Row():
                     with gr.Column(scale=1):
                         contract1_input = gr.File(
+                            # Don't specify file_types to allow any file upload
+                            label="📎 Upload First Contract (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract1_text = gr.Textbox(
                     with gr.Column(scale=1):
                         contract2_input = gr.File(
+                            # Don't specify file_types to allow any file upload
+                            label="📎 Upload Second Contract (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         contract2_text = gr.Textbox(
 3. Unique terms in each contract
 4. Potential implications of the differences"""
+                        inputs = assistant.tokenizer(prompt, return_tensors="pt").to(assistant.model.device)
                         outputs = assistant.model.generate(
                             **inputs,
                             max_new_tokens=400,
                 with gr.Row():
                     with gr.Column(scale=1):
                         risk_input = gr.File(
+                            # Don't specify file_types to allow any file upload
+                            label="📎 Upload Document (TXT, PDF, or Image)"
                         )
                         gr.HTML("<div style='height: 10px'></div>")
                         risk_text_input = gr.Textbox(