gauravchand11 commited on
Commit
2f7d824
Β·
verified Β·
1 Parent(s): 0166259

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -27
app.py CHANGED
@@ -20,7 +20,7 @@ HF_TOKEN = os.getenv('HF_TOKEN')
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
21
 
22
  class Translator:
23
- def __init__(self): # Fixed method name from _init_ to __init__
24
  self.key = AZURE_TRANSLATION_KEY
25
  self.region = 'centralindia'
26
  self.endpoint = "https://api.cognitive.microsofttranslator.com"
@@ -79,27 +79,50 @@ class TextExtractor:
79
  def extract_text_from_input(input_file):
80
  if isinstance(input_file, str):
81
  return input_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
 
83
  if isinstance(input_file, Image.Image):
84
  try:
85
  return pytesseract.image_to_string(input_file)
86
  except Exception as e:
87
  return f"Error extracting text from image: {str(e)}"
88
 
89
- if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
90
- try:
91
- pdf_reader = PyPDF2.PdfReader(input_file)
92
- text = ""
93
- for page in pdf_reader.pages:
94
- text += page.extract_text() + "\n\n"
95
- return text
96
- except Exception as e:
97
- return f"Error extracting text from PDF: {str(e)}"
98
-
99
- return "Unsupported input type"
100
 
101
  class LegalEaseAssistant:
102
- def __init__(self): # Fixed method name from _init_ to __init__
103
  if not HF_TOKEN:
104
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
105
 
@@ -135,7 +158,7 @@ class LegalEaseAssistant:
135
 
136
  prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
137
 
138
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device) # Add to(device) to ensure tensor is on the right device
139
  outputs = self.model.generate(
140
  **inputs,
141
  max_new_tokens=300,
@@ -207,8 +230,8 @@ def create_interface():
207
  with gr.Row():
208
  with gr.Column(scale=1):
209
  simplify_input = gr.File(
210
- file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'], # Added image file types explicitly
211
- label="πŸ“Ž Upload Document"
212
  )
213
  gr.HTML("<div style='height: 10px'></div>")
214
  simplify_text_input = gr.Textbox(
@@ -250,8 +273,8 @@ def create_interface():
250
  with gr.Row():
251
  with gr.Column(scale=1):
252
  summary_input = gr.File(
253
- file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'], # Added image file types explicitly
254
- label="πŸ“Ž Upload Document"
255
  )
256
  gr.HTML("<div style='height: 10px'></div>")
257
  summary_text_input = gr.Textbox(
@@ -294,8 +317,8 @@ def create_interface():
294
  with gr.Row():
295
  with gr.Column(scale=1):
296
  terms_input = gr.File(
297
- file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'], # Added image file types explicitly
298
- label="πŸ“Ž Upload Document"
299
  )
300
  gr.HTML("<div style='height: 10px'></div>")
301
  terms_text_input = gr.Textbox(
@@ -338,8 +361,8 @@ def create_interface():
338
  with gr.Row():
339
  with gr.Column(scale=1):
340
  contract1_input = gr.File(
341
- file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'], # Added image file types explicitly
342
- label="πŸ“Ž Upload First Contract"
343
  )
344
  gr.HTML("<div style='height: 10px'></div>")
345
  contract1_text = gr.Textbox(
@@ -350,8 +373,8 @@ def create_interface():
350
 
351
  with gr.Column(scale=1):
352
  contract2_input = gr.File(
353
- file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'], # Added image file types explicitly
354
- label="πŸ“Ž Upload Second Contract"
355
  )
356
  gr.HTML("<div style='height: 10px'></div>")
357
  contract2_text = gr.Textbox(
@@ -396,7 +419,7 @@ Please analyze and list:
396
  3. Unique terms in each contract
397
  4. Potential implications of the differences"""
398
 
399
- inputs = assistant.tokenizer(prompt, return_tensors="pt").to(assistant.model.device) # Add to(device) to ensure tensor is on the right device
400
  outputs = assistant.model.generate(
401
  **inputs,
402
  max_new_tokens=400,
@@ -430,8 +453,8 @@ Please analyze and list:
430
  with gr.Row():
431
  with gr.Column(scale=1):
432
  risk_input = gr.File(
433
- file_types=['txt', 'pdf', 'jpg', 'jpeg', 'png'], # Added image file types explicitly
434
- label="πŸ“Ž Upload Document"
435
  )
436
  gr.HTML("<div style='height: 10px'></div>")
437
  risk_text_input = gr.Textbox(
 
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
21
 
22
  class Translator:
23
+ def __init__(self):
24
  self.key = AZURE_TRANSLATION_KEY
25
  self.region = 'centralindia'
26
  self.endpoint = "https://api.cognitive.microsofttranslator.com"
 
79
  def extract_text_from_input(input_file):
80
  if isinstance(input_file, str):
81
  return input_file
82
+
83
+ # Handle file uploads from gradio
84
+ if hasattr(input_file, 'name'):
85
+ file_path = input_file.name
86
+ file_ext = os.path.splitext(file_path)[1].lower()
87
+
88
+ # Handle PDF files
89
+ if file_ext == '.pdf':
90
+ try:
91
+ pdf_reader = PyPDF2.PdfReader(input_file)
92
+ text = ""
93
+ for page in pdf_reader.pages:
94
+ text += page.extract_text() + "\n\n"
95
+ return text
96
+ except Exception as e:
97
+ return f"Error extracting text from PDF: {str(e)}"
98
+
99
+ # Handle image files
100
+ elif file_ext in ['.jpg', '.jpeg', '.png']:
101
+ try:
102
+ img = Image.open(input_file)
103
+ return pytesseract.image_to_string(img)
104
+ except Exception as e:
105
+ return f"Error extracting text from image: {str(e)}"
106
+
107
+ # Handle text files
108
+ elif file_ext == '.txt':
109
+ try:
110
+ with open(file_path, 'r', encoding='utf-8') as f:
111
+ return f.read()
112
+ except Exception as e:
113
+ return f"Error reading text file: {str(e)}"
114
 
115
+ # Handle PIL Image objects directly
116
  if isinstance(input_file, Image.Image):
117
  try:
118
  return pytesseract.image_to_string(input_file)
119
  except Exception as e:
120
  return f"Error extracting text from image: {str(e)}"
121
 
122
+ return "Unsupported input type or file format"
 
 
 
 
 
 
 
 
 
 
123
 
124
  class LegalEaseAssistant:
125
+ def __init__(self):
126
  if not HF_TOKEN:
127
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
128
 
 
158
 
159
  prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
160
 
161
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
162
  outputs = self.model.generate(
163
  **inputs,
164
  max_new_tokens=300,
 
230
  with gr.Row():
231
  with gr.Column(scale=1):
232
  simplify_input = gr.File(
233
+ # Don't specify file_types to allow any file upload
234
+ label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
235
  )
236
  gr.HTML("<div style='height: 10px'></div>")
237
  simplify_text_input = gr.Textbox(
 
273
  with gr.Row():
274
  with gr.Column(scale=1):
275
  summary_input = gr.File(
276
+ # Don't specify file_types to allow any file upload
277
+ label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
278
  )
279
  gr.HTML("<div style='height: 10px'></div>")
280
  summary_text_input = gr.Textbox(
 
317
  with gr.Row():
318
  with gr.Column(scale=1):
319
  terms_input = gr.File(
320
+ # Don't specify file_types to allow any file upload
321
+ label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
322
  )
323
  gr.HTML("<div style='height: 10px'></div>")
324
  terms_text_input = gr.Textbox(
 
361
  with gr.Row():
362
  with gr.Column(scale=1):
363
  contract1_input = gr.File(
364
+ # Don't specify file_types to allow any file upload
365
+ label="πŸ“Ž Upload First Contract (TXT, PDF, or Image)"
366
  )
367
  gr.HTML("<div style='height: 10px'></div>")
368
  contract1_text = gr.Textbox(
 
373
 
374
  with gr.Column(scale=1):
375
  contract2_input = gr.File(
376
+ # Don't specify file_types to allow any file upload
377
+ label="πŸ“Ž Upload Second Contract (TXT, PDF, or Image)"
378
  )
379
  gr.HTML("<div style='height: 10px'></div>")
380
  contract2_text = gr.Textbox(
 
419
  3. Unique terms in each contract
420
  4. Potential implications of the differences"""
421
 
422
+ inputs = assistant.tokenizer(prompt, return_tensors="pt").to(assistant.model.device)
423
  outputs = assistant.model.generate(
424
  **inputs,
425
  max_new_tokens=400,
 
453
  with gr.Row():
454
  with gr.Column(scale=1):
455
  risk_input = gr.File(
456
+ # Don't specify file_types to allow any file upload
457
+ label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
458
  )
459
  gr.HTML("<div style='height: 10px'></div>")
460
  risk_text_input = gr.Textbox(