gauravchand11 commited on
Commit
2a8dc4e
Β·
verified Β·
1 Parent(s): 2bcbb41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -45
app.py CHANGED
@@ -6,21 +6,48 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
6
  from huggingface_hub import login
7
  import pytesseract
8
  from PIL import Image
9
- import PyPDF2
10
  import requests
11
  import uuid
12
 
13
  # Configuration
14
  MODEL_NAME = "google/gemma-2b-it"
15
  CURRENT_USER = "AkarshanGupta"
16
- CURRENT_TIME = "2025-03-22 21:00:45"
17
 
18
  # API Keys
19
  HF_TOKEN = os.getenv('HF_TOKEN')
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  class Translator:
23
- def init(self):
24
  self.key = AZURE_TRANSLATION_KEY
25
  self.region = 'centralindia'
26
  self.endpoint = "https://api.cognitive.microsofttranslator.com"
@@ -30,13 +57,11 @@ class Translator:
30
 
31
  def translate_text(self, text, target_language="en"):
32
  try:
33
- # Split the text into bullet points
34
  bullet_points = text.split('\nβ€’ ')
35
  translated_points = []
36
 
37
- # Translate each bullet point separately
38
  for point in bullet_points:
39
- if point.strip(): # Only translate non-empty points
40
  path = '/translate'
41
  constructed_url = self.endpoint + path
42
 
@@ -47,7 +72,7 @@ class Translator:
47
 
48
  headers = {
49
  'Ocp-Apim-Subscription-Key': self.key,
50
- 'Ocp-Apim-Subscription-Region': 'centralindia',
51
  'Content-type': 'application/json',
52
  'X-ClientTraceId': str(uuid.uuid4())
53
  }
@@ -67,44 +92,22 @@ class Translator:
67
  translation = response.json()[0]["translations"][0]["text"]
68
  translated_points.append(translation)
69
 
70
- # Reconstruct the bullet-pointed text
71
  translated_text = '\nβ€’ ' + '\nβ€’ '.join(translated_points)
72
  return translated_text
73
 
74
  except Exception as e:
75
  return f"Translation error: {str(e)}"
76
 
77
- class TextExtractor:
78
- @staticmethod
79
- def extract_text_from_input(input_file):
80
- if isinstance(input_file, str):
81
- return input_file
82
-
83
- if isinstance(input_file, Image.Image):
84
- try:
85
- return pytesseract.image_to_string(input_file)
86
- except Exception as e:
87
- return f"Error extracting text from image: {str(e)}"
88
-
89
- if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
90
- try:
91
- pdf_reader = PyPDF2.PdfReader(input_file)
92
- text = ""
93
- for page in pdf_reader.pages:
94
- text += page.extract_text() + "\n\n"
95
- return text
96
- except Exception as e:
97
- return f"Error extracting text from PDF: {str(e)}"
98
-
99
- return "Unsupported input type"
100
-
101
  class LegalEaseAssistant:
102
- def init(self):
103
  if not HF_TOKEN:
104
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
105
 
106
  login(token=HF_TOKEN)
107
 
 
 
 
108
  self.tokenizer = AutoTokenizer.from_pretrained(
109
  MODEL_NAME,
110
  token=HF_TOKEN
@@ -115,10 +118,8 @@ class LegalEaseAssistant:
115
  device_map="auto",
116
  torch_dtype=torch.float32
117
  )
118
- self.text_extractor = TextExtractor()
119
 
120
  def format_response(self, text):
121
- """Format response as bullet points"""
122
  sentences = [s.strip() for s in text.split('.') if s.strip()]
123
  bullet_points = ['β€’ ' + s + '.' for s in sentences]
124
  return '\n'.join(bullet_points)
@@ -150,6 +151,23 @@ class LegalEaseAssistant:
150
  raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
151
 
152
  return self.format_response(raw_response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  def create_interface():
155
  assistant = LegalEaseAssistant()
@@ -179,16 +197,20 @@ def create_interface():
179
  result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
180
  return result
181
 
182
- with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
 
 
 
 
183
  gr.HTML(f"""
184
- <div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
185
  <h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">πŸ“œ LegalEase</h1>
186
  <h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
187
  <div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
188
- <div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
189
  <span style="font-weight: bold;">User:</span> {CURRENT_USER}
190
  </div>
191
- <div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
192
  <span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
193
  </div>
194
  </div>
@@ -203,6 +225,7 @@ def create_interface():
203
  )
204
 
205
  with gr.Tabs():
 
206
  with gr.Tab("πŸ“ Simplify Language"):
207
  with gr.Row():
208
  with gr.Column(scale=1):
@@ -246,6 +269,7 @@ def create_interface():
246
  outputs=simplify_output
247
  )
248
 
 
249
  with gr.Tab("πŸ“š Document Summary"):
250
  with gr.Row():
251
  with gr.Column(scale=1):
@@ -289,6 +313,51 @@ def create_interface():
289
  outputs=summary_output
290
  )
291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
  with gr.Tab("⚠ Risk Analysis"):
293
  with gr.Row():
294
  with gr.Column(scale=1):
@@ -332,16 +401,49 @@ def create_interface():
332
  outputs=risk_output
333
  )
334
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
335
  gr.HTML(f"""
336
- <div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #f0f2f6; border-radius: 10px;">
337
- <p style="color: #576574; margin: 0;">Powered by Gemma 2B and Azure Translator</p>
338
- <p style="color: #576574; margin: 5px 0 0 0; font-size: 0.9em;">Built for Language Translation Hackathon</p>
339
  </div>
340
  """)
341
 
342
  return demo
343
 
344
- demo = create_interface()
 
 
 
345
 
346
- if _name_ == "_main_":
347
- demo.launch()
 
6
  from huggingface_hub import login
7
  import pytesseract
8
  from PIL import Image
9
+ import fitz # PyMuPDF
10
  import requests
11
  import uuid
12
 
13
  # Configuration
14
  MODEL_NAME = "google/gemma-2b-it"
15
  CURRENT_USER = "AkarshanGupta"
16
+ CURRENT_TIME = "2025-03-23 03:33:01"
17
 
18
  # API Keys
19
  HF_TOKEN = os.getenv('HF_TOKEN')
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
21
+ LLAMA_API_KEY = os.getenv('LLAMA_API_KEY')
22
+ LLAMA_API_ENDPOINT = "https://api.llama.ai/v1/generate"
23
+
24
+ class TextExtractor:
25
+ @staticmethod
26
+ def extract_text_from_input(input_file):
27
+ if isinstance(input_file, str):
28
+ return input_file
29
+
30
+ if isinstance(input_file, Image.Image):
31
+ try:
32
+ return pytesseract.image_to_string(input_file)
33
+ except Exception as e:
34
+ return f"Error extracting text from image: {str(e)}"
35
+
36
+ if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
37
+ try:
38
+ doc = fitz.open(stream=input_file.read(), filetype="pdf")
39
+ text = ""
40
+ for page in doc:
41
+ text += page.get_text() + "\n\n"
42
+ doc.close()
43
+ return text
44
+ except Exception as e:
45
+ return f"Error extracting text from PDF: {str(e)}"
46
+
47
+ return "Unsupported input type"
48
 
49
  class Translator:
50
+ def _init_(self):
51
  self.key = AZURE_TRANSLATION_KEY
52
  self.region = 'centralindia'
53
  self.endpoint = "https://api.cognitive.microsofttranslator.com"
 
57
 
58
  def translate_text(self, text, target_language="en"):
59
  try:
 
60
  bullet_points = text.split('\nβ€’ ')
61
  translated_points = []
62
 
 
63
  for point in bullet_points:
64
+ if point.strip():
65
  path = '/translate'
66
  constructed_url = self.endpoint + path
67
 
 
72
 
73
  headers = {
74
  'Ocp-Apim-Subscription-Key': self.key,
75
+ 'Ocp-Apim-Subscription-Region': self.region,
76
  'Content-type': 'application/json',
77
  'X-ClientTraceId': str(uuid.uuid4())
78
  }
 
92
  translation = response.json()[0]["translations"][0]["text"]
93
  translated_points.append(translation)
94
 
 
95
  translated_text = '\nβ€’ ' + '\nβ€’ '.join(translated_points)
96
  return translated_text
97
 
98
  except Exception as e:
99
  return f"Translation error: {str(e)}"
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  class LegalEaseAssistant:
102
+ def _init_(self):
103
  if not HF_TOKEN:
104
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
105
 
106
  login(token=HF_TOKEN)
107
 
108
+ # Initialize text_extractor first
109
+ self.text_extractor = TextExtractor()
110
+
111
  self.tokenizer = AutoTokenizer.from_pretrained(
112
  MODEL_NAME,
113
  token=HF_TOKEN
 
118
  device_map="auto",
119
  torch_dtype=torch.float32
120
  )
 
121
 
122
  def format_response(self, text):
 
123
  sentences = [s.strip() for s in text.split('.') if s.strip()]
124
  bullet_points = ['β€’ ' + s + '.' for s in sentences]
125
  return '\n'.join(bullet_points)
 
151
  raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
152
 
153
  return self.format_response(raw_response)
154
+
155
+ def generate_chatbot_response(self, user_input):
156
+ if not LLAMA_API_KEY:
157
+ return "LLaMA API key not found. Please set the LLAMA_API_KEY environment variable."
158
+
159
+ response = requests.post(
160
+ LLAMA_API_ENDPOINT,
161
+ headers={"Authorization": f"Bearer {LLAMA_API_KEY}"},
162
+ json={"prompt": user_input, "max_tokens": 150}
163
+ )
164
+
165
+ if response.status_code == 401:
166
+ return "Unauthorized: Please check your LLaMA API key."
167
+ elif response.status_code != 200:
168
+ return f"Error: Received {response.status_code} status code from LLaMA API."
169
+
170
+ return response.json()["choices"][0]["text"].strip()
171
 
172
  def create_interface():
173
  assistant = LegalEaseAssistant()
 
197
  result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
198
  return result
199
 
200
+ with gr.Blocks(title="LegalEase", css="""
201
+ .gradio-container {max-width: 1200px; margin: auto;}
202
+ .header {text-align: center; margin-bottom: 2rem;}
203
+ .content {padding: 2rem;}
204
+ """) as demo:
205
  gr.HTML(f"""
206
+ <div style="text-align: center; background-color: #e0e0e0; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
207
  <h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">πŸ“œ LegalEase</h1>
208
  <h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
209
  <div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
210
+ <div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
211
  <span style="font-weight: bold;">User:</span> {CURRENT_USER}
212
  </div>
213
+ <div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
214
  <span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
215
  </div>
216
  </div>
 
225
  )
226
 
227
  with gr.Tabs():
228
+ # Simplify Language Tab
229
  with gr.Tab("πŸ“ Simplify Language"):
230
  with gr.Row():
231
  with gr.Column(scale=1):
 
269
  outputs=simplify_output
270
  )
271
 
272
+ # Document Summary Tab
273
  with gr.Tab("πŸ“š Document Summary"):
274
  with gr.Row():
275
  with gr.Column(scale=1):
 
313
  outputs=summary_output
314
  )
315
 
316
+ # Key Terms Tab
317
+ with gr.Tab("πŸ”‘ Key Terms"):
318
+ with gr.Row():
319
+ with gr.Column(scale=1):
320
+ terms_input = gr.File(
321
+ file_types=['txt', 'pdf', 'image'],
322
+ label="πŸ“Ž Upload Document"
323
+ )
324
+ gr.HTML("<div style='height: 10px'></div>")
325
+ terms_text_input = gr.Textbox(
326
+ label="✍ Or Type/Paste Text",
327
+ placeholder="Enter your legal document here...",
328
+ lines=4
329
+ )
330
+ gr.HTML("<div style='height: 10px'></div>")
331
+ terms_btn = gr.Button(
332
+ "πŸ” Extract Key Terms",
333
+ variant="primary"
334
+ )
335
+
336
+ with gr.Column(scale=1):
337
+ terms_output = gr.Textbox(
338
+ label="πŸ”‘ Key Terms & Definitions",
339
+ lines=12,
340
+ show_copy_button=True
341
+ )
342
+
343
+ def terms_handler(file, text, lang):
344
+ input_source = file or text
345
+ if not input_source:
346
+ return "Please provide some text or upload a document to analyze key terms."
347
+ return process_with_translation(
348
+ assistant.generate_response,
349
+ input_source,
350
+ "key_terms",
351
+ target_lang=lang
352
+ )
353
+
354
+ terms_btn.click(
355
+ fn=terms_handler,
356
+ inputs=[terms_input, terms_text_input, language_selector],
357
+ outputs=terms_output
358
+ )
359
+
360
+ # Risk Analysis Tab
361
  with gr.Tab("⚠ Risk Analysis"):
362
  with gr.Row():
363
  with gr.Column(scale=1):
 
401
  outputs=risk_output
402
  )
403
 
404
+ # Legal Assistant Chat Tab
405
+ with gr.Tab("πŸ€– Legal Assistant Chat"):
406
+ chatbot_input = gr.Textbox(
407
+ label="πŸ’¬ Your Message",
408
+ placeholder="Ask me anything about legal matters...",
409
+ lines=2
410
+ )
411
+ chatbot_output = gr.Textbox(
412
+ label="πŸ€– Assistant Response",
413
+ lines=10,
414
+ show_copy_button=True
415
+ )
416
+ chatbot_btn = gr.Button(
417
+ "πŸ’¬ Send Message",
418
+ variant="primary"
419
+ )
420
+
421
+ def chatbot_handler(user_input, lang):
422
+ if not user_input:
423
+ return "Please type a message to start the conversation."
424
+ response = assistant.generate_chatbot_response(user_input)
425
+ if lang != "English":
426
+ response = translator.translate_text(response, SUPPORTED_LANGUAGES[lang])
427
+ return response
428
+
429
+ chatbot_btn.click(
430
+ fn=chatbot_handler,
431
+ inputs=[chatbot_input, language_selector],
432
+ outputs=chatbot_output
433
+ )
434
+
435
  gr.HTML(f"""
436
+ <div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #e0e0e0; border-radius: 10px;">
437
+ <p style="color: #576574; margin: 0;">Made by Team Ice Age</p>
 
438
  </div>
439
  """)
440
 
441
  return demo
442
 
443
+ def main():
444
+ demo = create_interface()
445
+ demo.queue()
446
+ demo.launch(share=True)
447
 
448
+ if __name__ == "__main__":
449
+ main()