gauravchand11 commited on
Commit
8ac3228
Β·
verified Β·
1 Parent(s): 3257652

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +271 -44
app.py CHANGED
@@ -9,9 +9,6 @@ from PIL import Image
9
  import PyPDF2
10
  import requests
11
  import uuid
12
- from collections import Counter
13
- import re
14
- from difflib import unified_diff
15
 
16
  # Configuration
17
  MODEL_NAME = "google/gemma-2b-it"
@@ -22,6 +19,61 @@ CURRENT_TIME = "2025-03-22 21:00:45"
22
  HF_TOKEN = os.getenv('HF_TOKEN')
23
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  class TextExtractor:
26
  @staticmethod
27
  def extract_text_from_input(input_file):
@@ -47,7 +99,7 @@ class TextExtractor:
47
  return "Unsupported input type"
48
 
49
  class LegalEaseAssistant:
50
- def __init__(self):
51
  if not HF_TOKEN:
52
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
53
 
@@ -65,56 +117,231 @@ class LegalEaseAssistant:
65
  )
66
  self.text_extractor = TextExtractor()
67
 
68
- def generate_keywords(self, text):
69
- words = re.findall(r'\b[A-Za-z]{5,}\b', text)
70
- word_freq = Counter(words)
71
- return ', '.join([word for word, _ in word_freq.most_common(10)])
 
72
 
73
- def compare_contracts(self, text1, text2):
74
- text1_lines = text1.split('\n')
75
- text2_lines = text2.split('\n')
76
- diff = '\n'.join(unified_diff(text1_lines, text2_lines, lineterm=''))
77
- return diff if diff else "The documents are identical."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  def create_interface():
80
  assistant = LegalEaseAssistant()
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
83
- gr.Markdown("## πŸ“œ LegalEase: AI-Powered Legal Document Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
- with gr.Tab("πŸ”‘ Keyword Extraction"):
86
- keyword_input = gr.File(file_types=['txt', 'pdf', 'image'], label="Upload Document")
87
- keyword_text_input = gr.Textbox(label="Or Enter Text", placeholder="Enter legal text here...", lines=4)
88
- keyword_output = gr.Textbox(label="Extracted Keywords", lines=2, show_copy_button=True)
89
- keyword_btn = gr.Button("Extract Keywords")
90
-
91
- def keyword_handler(file, text):
92
- input_source = file or text
93
- if not input_source:
94
- return "Please provide some text or upload a document."
95
- text = assistant.text_extractor.extract_text_from_input(input_source)
96
- return assistant.generate_keywords(text)
97
-
98
- keyword_btn.click(fn=keyword_handler, inputs=[keyword_input, keyword_text_input], outputs=keyword_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
- with gr.Tab("βš– Contract Comparison"):
101
- contract_input1 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 1")
102
- contract_input2 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 2")
103
- comparison_output = gr.Textbox(label="Comparison Result", lines=12, show_copy_button=True)
104
- compare_btn = gr.Button("Compare Contracts")
105
-
106
- def compare_handler(file1, file2):
107
- if not file1 or not file2:
108
- return "Please upload two contracts for comparison."
109
- text1 = assistant.text_extractor.extract_text_from_input(file1)
110
- text2 = assistant.text_extractor.extract_text_from_input(file2)
111
- return assistant.compare_contracts(text1, text2)
112
-
113
- compare_btn.click(fn=compare_handler, inputs=[contract_input1, contract_input2], outputs=comparison_output)
114
-
115
  return demo
116
 
117
  demo = create_interface()
118
 
119
  if __name__ == "__main__":
120
- demo.launch()
 
9
  import PyPDF2
10
  import requests
11
  import uuid
 
 
 
12
 
13
  # Configuration
14
  MODEL_NAME = "google/gemma-2b-it"
 
19
  HF_TOKEN = os.getenv('HF_TOKEN')
20
  AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
21
 
22
+ class Translator:
23
+ def _init_(self):
24
+ self.key = AZURE_TRANSLATION_KEY
25
+ self.region = 'centralindia'
26
+ self.endpoint = "https://api.cognitive.microsofttranslator.com"
27
+
28
+ if not self.key:
29
+ raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")
30
+
31
+ def translate_text(self, text, target_language="en"):
32
+ try:
33
+ # Split the text into bullet points
34
+ bullet_points = text.split('\nβ€’ ')
35
+ translated_points = []
36
+
37
+ # Translate each bullet point separately
38
+ for point in bullet_points:
39
+ if point.strip(): # Only translate non-empty points
40
+ path = '/translate'
41
+ constructed_url = self.endpoint + path
42
+
43
+ params = {
44
+ 'api-version': '3.0',
45
+ 'to': target_language
46
+ }
47
+
48
+ headers = {
49
+ 'Ocp-Apim-Subscription-Key': self.key,
50
+ 'Ocp-Apim-Subscription-Region': 'centralindia',
51
+ 'Content-type': 'application/json',
52
+ 'X-ClientTraceId': str(uuid.uuid4())
53
+ }
54
+
55
+ body = [{
56
+ 'text': point.strip()
57
+ }]
58
+
59
+ response = requests.post(
60
+ constructed_url,
61
+ params=params,
62
+ headers=headers,
63
+ json=body
64
+ )
65
+ response.raise_for_status()
66
+
67
+ translation = response.json()[0]["translations"][0]["text"]
68
+ translated_points.append(translation)
69
+
70
+ # Reconstruct the bullet-pointed text
71
+ translated_text = '\nβ€’ ' + '\nβ€’ '.join(translated_points)
72
+ return translated_text
73
+
74
+ except Exception as e:
75
+ return f"Translation error: {str(e)}"
76
+
77
  class TextExtractor:
78
  @staticmethod
79
  def extract_text_from_input(input_file):
 
99
  return "Unsupported input type"
100
 
101
  class LegalEaseAssistant:
102
+ def _init_(self):
103
  if not HF_TOKEN:
104
  raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
105
 
 
117
  )
118
  self.text_extractor = TextExtractor()
119
 
120
+ def format_response(self, text):
121
+ """Format response as bullet points"""
122
+ sentences = [s.strip() for s in text.split('.') if s.strip()]
123
+ bullet_points = ['β€’ ' + s + '.' for s in sentences]
124
+ return '\n'.join(bullet_points)
125
 
126
+ def generate_response(self, input_file, task_type):
127
+ text = self.text_extractor.extract_text_from_input(input_file)
128
+
129
+ task_prompts = {
130
+ "simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:",
131
+ "summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
132
+ "key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
133
+ "risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
134
+ }
135
+
136
+ prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
137
+
138
+ inputs = self.tokenizer(prompt, return_tensors="pt")
139
+ outputs = self.model.generate(
140
+ **inputs,
141
+ max_new_tokens=300,
142
+ num_return_sequences=1,
143
+ do_sample=True,
144
+ temperature=0.7,
145
+ top_p=0.9
146
+ )
147
+
148
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
149
+ response_parts = response.split(prompt.split("\n\n")[-1])
150
+ raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
151
+
152
+ return self.format_response(raw_response)
153
 
154
  def create_interface():
155
  assistant = LegalEaseAssistant()
156
+ translator = Translator()
157
 
158
+ SUPPORTED_LANGUAGES = {
159
+ "English": "en",
160
+ "Hindi": "hi",
161
+ "Bengali": "bn",
162
+ "Telugu": "te",
163
+ "Tamil": "ta",
164
+ "Marathi": "mr",
165
+ "Gujarati": "gu",
166
+ "Kannada": "kn",
167
+ "Malayalam": "ml",
168
+ "Punjabi": "pa",
169
+ "Spanish": "es",
170
+ "French": "fr",
171
+ "German": "de",
172
+ "Chinese (Simplified)": "zh-Hans",
173
+ "Japanese": "ja"
174
+ }
175
+
176
+ def process_with_translation(func, *args, target_lang="English"):
177
+ result = func(*args)
178
+ if target_lang != "English":
179
+ result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
180
+ return result
181
+
182
  with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
183
+ gr.HTML(f"""
184
+ <div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
185
+ <h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">πŸ“œ LegalEase</h1>
186
+ <h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
187
+ <div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
188
+ <div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
189
+ <span style="font-weight: bold;">User:</span> {CURRENT_USER}
190
+ </div>
191
+ <div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
192
+ <span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
193
+ </div>
194
+ </div>
195
+ </div>
196
+ """)
197
+
198
+ language_selector = gr.Dropdown(
199
+ choices=list(SUPPORTED_LANGUAGES.keys()),
200
+ value="English",
201
+ label="Select Output Language",
202
+ scale=1
203
+ )
204
+
205
+ with gr.Tabs():
206
+ with gr.Tab("πŸ“ Simplify Language"):
207
+ with gr.Row():
208
+ with gr.Column(scale=1):
209
+ simplify_input = gr.File(
210
+ file_types=['txt', 'pdf', 'image'],
211
+ label="πŸ“Ž Upload Document"
212
+ )
213
+ gr.HTML("<div style='height: 10px'></div>")
214
+ simplify_text_input = gr.Textbox(
215
+ label="✍ Or Type/Paste Text",
216
+ placeholder="Enter your legal text here...",
217
+ lines=4
218
+ )
219
+ gr.HTML("<div style='height: 10px'></div>")
220
+ simplify_btn = gr.Button(
221
+ "πŸ” Simplify Language",
222
+ variant="primary"
223
+ )
224
+
225
+ with gr.Column(scale=1):
226
+ simplify_output = gr.Textbox(
227
+ label="πŸ“‹ Simplified Explanation",
228
+ lines=12,
229
+ show_copy_button=True
230
+ )
231
+
232
+ def simplify_handler(file, text, lang):
233
+ input_source = file or text
234
+ if not input_source:
235
+ return "Please provide some text or upload a document to analyze."
236
+ return process_with_translation(
237
+ assistant.generate_response,
238
+ input_source,
239
+ "simplify",
240
+ target_lang=lang
241
+ )
242
+
243
+ simplify_btn.click(
244
+ fn=simplify_handler,
245
+ inputs=[simplify_input, simplify_text_input, language_selector],
246
+ outputs=simplify_output
247
+ )
248
 
249
+ with gr.Tab("πŸ“š Document Summary"):
250
+ with gr.Row():
251
+ with gr.Column(scale=1):
252
+ summary_input = gr.File(
253
+ file_types=['txt', 'pdf', 'image'],
254
+ label="πŸ“Ž Upload Document"
255
+ )
256
+ gr.HTML("<div style='height: 10px'></div>")
257
+ summary_text_input = gr.Textbox(
258
+ label="✍ Or Type/Paste Text",
259
+ placeholder="Enter your legal document here...",
260
+ lines=4
261
+ )
262
+ gr.HTML("<div style='height: 10px'></div>")
263
+ summary_btn = gr.Button(
264
+ "πŸ“‹ Generate Summary",
265
+ variant="primary"
266
+ )
267
+
268
+ with gr.Column(scale=1):
269
+ summary_output = gr.Textbox(
270
+ label="πŸ“‘ Document Summary",
271
+ lines=12,
272
+ show_copy_button=True
273
+ )
274
+
275
+ def summary_handler(file, text, lang):
276
+ input_source = file or text
277
+ if not input_source:
278
+ return "Please provide some text or upload a document to summarize."
279
+ return process_with_translation(
280
+ assistant.generate_response,
281
+ input_source,
282
+ "summary",
283
+ target_lang=lang
284
+ )
285
+
286
+ summary_btn.click(
287
+ fn=summary_handler,
288
+ inputs=[summary_input, summary_text_input, language_selector],
289
+ outputs=summary_output
290
+ )
291
+
292
+ with gr.Tab("⚠ Risk Analysis"):
293
+ with gr.Row():
294
+ with gr.Column(scale=1):
295
+ risk_input = gr.File(
296
+ file_types=['txt', 'pdf', 'image'],
297
+ label="πŸ“Ž Upload Document"
298
+ )
299
+ gr.HTML("<div style='height: 10px'></div>")
300
+ risk_text_input = gr.Textbox(
301
+ label="✍ Or Type/Paste Text",
302
+ placeholder="Enter your legal document here...",
303
+ lines=4
304
+ )
305
+ gr.HTML("<div style='height: 10px'></div>")
306
+ risk_btn = gr.Button(
307
+ "πŸ” Analyze Risks",
308
+ variant="primary"
309
+ )
310
+
311
+ with gr.Column(scale=1):
312
+ risk_output = gr.Textbox(
313
+ label="⚠ Risk Assessment",
314
+ lines=12,
315
+ show_copy_button=True
316
+ )
317
+
318
+ def risk_handler(file, text, lang):
319
+ input_source = file or text
320
+ if not input_source:
321
+ return "Please provide some text or upload a document to analyze risks."
322
+ return process_with_translation(
323
+ assistant.generate_response,
324
+ input_source,
325
+ "risk",
326
+ target_lang=lang
327
+ )
328
+
329
+ risk_btn.click(
330
+ fn=risk_handler,
331
+ inputs=[risk_input, risk_text_input, language_selector],
332
+ outputs=risk_output
333
+ )
334
+
335
+ gr.HTML(f"""
336
+ <div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #f0f2f6; border-radius: 10px;">
337
+ <p style="color: #576574; margin: 0;">Powered by Gemma 2B and Azure Translator</p>
338
+ <p style="color: #576574; margin: 5px 0 0 0; font-size: 0.9em;">Built for Language Translation Hackathon</p>
339
+ </div>
340
+ """)
341
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  return demo
343
 
344
  demo = create_interface()
345
 
346
  if __name__ == "__main__":
347
+ demo.launch()