gauravchand11 commited on
Commit
c26b78e
Β·
verified Β·
1 Parent(s): 2f7d824

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -58
app.py CHANGED
@@ -20,7 +20,7 @@ HF_TOKEN = os.getenv('HF_TOKEN')
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
21
 
22
  class Translator:
23
- def __init__(self):
24
  self.key = AZURE_TRANSLATION_KEY
25
  self.region = 'centralindia'
26
  self.endpoint = "https://api.cognitive.microsofttranslator.com"
@@ -47,7 +47,7 @@ class Translator:
47
 
48
  headers = {
49
  'Ocp-Apim-Subscription-Key': self.key,
50
- 'Ocp-Apim-Subscription-Region': self.region,
51
  'Content-type': 'application/json',
52
  'X-ClientTraceId': str(uuid.uuid4())
53
  }
@@ -79,50 +79,27 @@ class TextExtractor:
79
  def extract_text_from_input(input_file):
80
  if isinstance(input_file, str):
81
  return input_file
82
-
83
- # Handle file uploads from gradio
84
- if hasattr(input_file, 'name'):
85
- file_path = input_file.name
86
- file_ext = os.path.splitext(file_path)[1].lower()
87
-
88
- # Handle PDF files
89
- if file_ext == '.pdf':
90
- try:
91
- pdf_reader = PyPDF2.PdfReader(input_file)
92
- text = ""
93
- for page in pdf_reader.pages:
94
- text += page.extract_text() + "\n\n"
95
- return text
96
- except Exception as e:
97
- return f"Error extracting text from PDF: {str(e)}"
98
-
99
- # Handle image files
100
- elif file_ext in ['.jpg', '.jpeg', '.png']:
101
- try:
102
- img = Image.open(input_file)
103
- return pytesseract.image_to_string(img)
104
- except Exception as e:
105
- return f"Error extracting text from image: {str(e)}"
106
-
107
- # Handle text files
108
- elif file_ext == '.txt':
109
- try:
110
- with open(file_path, 'r', encoding='utf-8') as f:
111
- return f.read()
112
- except Exception as e:
113
- return f"Error reading text file: {str(e)}"
114
 
115
- # Handle PIL Image objects directly
116
  if isinstance(input_file, Image.Image):
117
  try:
118
  return pytesseract.image_to_string(input_file)
119
  except Exception as e:
120
  return f"Error extracting text from image: {str(e)}"
121
 
122
- return "Unsupported input type or file format"
 
 
 
 
 
 
 
 
 
 
123
 
124
  class LegalEaseAssistant:
125
- def __init__(self):
126
  if not HF_TOKEN:
127
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
128
 
@@ -158,7 +135,7 @@ class LegalEaseAssistant:
158
 
159
  prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
160
 
161
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
162
  outputs = self.model.generate(
163
  **inputs,
164
  max_new_tokens=300,
@@ -230,8 +207,8 @@ def create_interface():
230
  with gr.Row():
231
  with gr.Column(scale=1):
232
  simplify_input = gr.File(
233
- # Don't specify file_types to allow any file upload
234
- label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
235
  )
236
  gr.HTML("<div style='height: 10px'></div>")
237
  simplify_text_input = gr.Textbox(
@@ -273,8 +250,8 @@ def create_interface():
273
  with gr.Row():
274
  with gr.Column(scale=1):
275
  summary_input = gr.File(
276
- # Don't specify file_types to allow any file upload
277
- label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
278
  )
279
  gr.HTML("<div style='height: 10px'></div>")
280
  summary_text_input = gr.Textbox(
@@ -317,8 +294,8 @@ def create_interface():
317
  with gr.Row():
318
  with gr.Column(scale=1):
319
  terms_input = gr.File(
320
- # Don't specify file_types to allow any file upload
321
- label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
322
  )
323
  gr.HTML("<div style='height: 10px'></div>")
324
  terms_text_input = gr.Textbox(
@@ -361,8 +338,8 @@ def create_interface():
361
  with gr.Row():
362
  with gr.Column(scale=1):
363
  contract1_input = gr.File(
364
- # Don't specify file_types to allow any file upload
365
- label="πŸ“Ž Upload First Contract (TXT, PDF, or Image)"
366
  )
367
  gr.HTML("<div style='height: 10px'></div>")
368
  contract1_text = gr.Textbox(
@@ -373,8 +350,8 @@ def create_interface():
373
 
374
  with gr.Column(scale=1):
375
  contract2_input = gr.File(
376
- # Don't specify file_types to allow any file upload
377
- label="πŸ“Ž Upload Second Contract (TXT, PDF, or Image)"
378
  )
379
  gr.HTML("<div style='height: 10px'></div>")
380
  contract2_text = gr.Textbox(
@@ -401,25 +378,22 @@ def create_interface():
401
  if not contract1 or not contract2:
402
  return "Please provide both contracts for comparison."
403
 
404
- # Extract text if needed
405
- if not isinstance(contract1, str):
406
- contract1 = assistant.text_extractor.extract_text_from_input(contract1)
407
- if not isinstance(contract2, str):
408
- contract2 = assistant.text_extractor.extract_text_from_input(contract2)
409
-
410
  def compare_contracts(contract1, contract2):
411
  prompt = f"""Compare these two contracts and identify key differences and similarities:
 
412
  Contract 1:
413
  {contract1}
 
414
  Contract 2:
415
  {contract2}
 
416
  Please analyze and list:
417
  1. Key similarities
418
  2. Important differences
419
  3. Unique terms in each contract
420
  4. Potential implications of the differences"""
421
 
422
- inputs = assistant.tokenizer(prompt, return_tensors="pt").to(assistant.model.device)
423
  outputs = assistant.model.generate(
424
  **inputs,
425
  max_new_tokens=400,
@@ -453,8 +427,8 @@ Please analyze and list:
453
  with gr.Row():
454
  with gr.Column(scale=1):
455
  risk_input = gr.File(
456
- # Don't specify file_types to allow any file upload
457
- label="πŸ“Ž Upload Document (TXT, PDF, or Image)"
458
  )
459
  gr.HTML("<div style='height: 10px'></div>")
460
  risk_text_input = gr.Textbox(
@@ -503,5 +477,5 @@ Please analyze and list:
503
 
504
  demo = create_interface()
505
 
506
- if __name__ == "__main__":
507
  demo.launch()
 
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
21
 
22
  class Translator:
23
+ def init(self):
24
  self.key = AZURE_TRANSLATION_KEY
25
  self.region = 'centralindia'
26
  self.endpoint = "https://api.cognitive.microsofttranslator.com"
 
47
 
48
  headers = {
49
  'Ocp-Apim-Subscription-Key': self.key,
50
+ 'Ocp-Apim-Subscription-Region': 'centralindia',
51
  'Content-type': 'application/json',
52
  'X-ClientTraceId': str(uuid.uuid4())
53
  }
 
79
  def extract_text_from_input(input_file):
80
  if isinstance(input_file, str):
81
  return input_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
 
83
  if isinstance(input_file, Image.Image):
84
  try:
85
  return pytesseract.image_to_string(input_file)
86
  except Exception as e:
87
  return f"Error extracting text from image: {str(e)}"
88
 
89
+ if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
90
+ try:
91
+ pdf_reader = PyPDF2.PdfReader(input_file)
92
+ text = ""
93
+ for page in pdf_reader.pages:
94
+ text += page.extract_text() + "\n\n"
95
+ return text
96
+ except Exception as e:
97
+ return f"Error extracting text from PDF: {str(e)}"
98
+
99
+ return "Unsupported input type"
100
 
101
  class LegalEaseAssistant:
102
+ def init(self):
103
  if not HF_TOKEN:
104
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
105
 
 
135
 
136
  prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
137
 
138
+ inputs = self.tokenizer(prompt, return_tensors="pt")
139
  outputs = self.model.generate(
140
  **inputs,
141
  max_new_tokens=300,
 
207
  with gr.Row():
208
  with gr.Column(scale=1):
209
  simplify_input = gr.File(
210
+ file_types=['txt', 'pdf', 'image'],
211
+ label="πŸ“Ž Upload Document"
212
  )
213
  gr.HTML("<div style='height: 10px'></div>")
214
  simplify_text_input = gr.Textbox(
 
250
  with gr.Row():
251
  with gr.Column(scale=1):
252
  summary_input = gr.File(
253
+ file_types=['txt', 'pdf', 'image'],
254
+ label="πŸ“Ž Upload Document"
255
  )
256
  gr.HTML("<div style='height: 10px'></div>")
257
  summary_text_input = gr.Textbox(
 
294
  with gr.Row():
295
  with gr.Column(scale=1):
296
  terms_input = gr.File(
297
+ file_types=['txt', 'pdf', 'image'],
298
+ label="πŸ“Ž Upload Document"
299
  )
300
  gr.HTML("<div style='height: 10px'></div>")
301
  terms_text_input = gr.Textbox(
 
338
  with gr.Row():
339
  with gr.Column(scale=1):
340
  contract1_input = gr.File(
341
+ file_types=['txt', 'pdf', 'image'],
342
+ label="πŸ“Ž Upload First Contract"
343
  )
344
  gr.HTML("<div style='height: 10px'></div>")
345
  contract1_text = gr.Textbox(
 
350
 
351
  with gr.Column(scale=1):
352
  contract2_input = gr.File(
353
+ file_types=['txt', 'pdf', 'image'],
354
+ label="πŸ“Ž Upload Second Contract"
355
  )
356
  gr.HTML("<div style='height: 10px'></div>")
357
  contract2_text = gr.Textbox(
 
378
  if not contract1 or not contract2:
379
  return "Please provide both contracts for comparison."
380
 
 
 
 
 
 
 
381
  def compare_contracts(contract1, contract2):
382
  prompt = f"""Compare these two contracts and identify key differences and similarities:
383
+
384
  Contract 1:
385
  {contract1}
386
+
387
  Contract 2:
388
  {contract2}
389
+
390
  Please analyze and list:
391
  1. Key similarities
392
  2. Important differences
393
  3. Unique terms in each contract
394
  4. Potential implications of the differences"""
395
 
396
+ inputs = assistant.tokenizer(prompt, return_tensors="pt")
397
  outputs = assistant.model.generate(
398
  **inputs,
399
  max_new_tokens=400,
 
427
  with gr.Row():
428
  with gr.Column(scale=1):
429
  risk_input = gr.File(
430
+ file_types=['txt', 'pdf', 'image'],
431
+ label="πŸ“Ž Upload Document"
432
  )
433
  gr.HTML("<div style='height: 10px'></div>")
434
  risk_text_input = gr.Textbox(
 
477
 
478
  demo = create_interface()
479
 
480
+ if _name_ == "_main_":
481
  demo.launch()