Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,9 +9,6 @@ from PIL import Image
|
|
9 |
import PyPDF2
|
10 |
import requests
|
11 |
import uuid
|
12 |
-
from collections import Counter
|
13 |
-
import re
|
14 |
-
from difflib import unified_diff
|
15 |
|
16 |
# Configuration
|
17 |
MODEL_NAME = "google/gemma-2b-it"
|
@@ -22,6 +19,61 @@ CURRENT_TIME = "2025-03-22 21:00:45"
|
|
22 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
23 |
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
class TextExtractor:
|
26 |
@staticmethod
|
27 |
def extract_text_from_input(input_file):
|
@@ -47,7 +99,7 @@ class TextExtractor:
|
|
47 |
return "Unsupported input type"
|
48 |
|
49 |
class LegalEaseAssistant:
|
50 |
-
def
|
51 |
if not HF_TOKEN:
|
52 |
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
53 |
|
@@ -65,56 +117,231 @@ class LegalEaseAssistant:
|
|
65 |
)
|
66 |
self.text_extractor = TextExtractor()
|
67 |
|
68 |
-
def
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
72 |
|
73 |
-
def
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def create_interface():
|
80 |
assistant = LegalEaseAssistant()
|
|
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
|
83 |
-
gr.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
-
with gr.Tab("β Contract Comparison"):
|
101 |
-
contract_input1 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 1")
|
102 |
-
contract_input2 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 2")
|
103 |
-
comparison_output = gr.Textbox(label="Comparison Result", lines=12, show_copy_button=True)
|
104 |
-
compare_btn = gr.Button("Compare Contracts")
|
105 |
-
|
106 |
-
def compare_handler(file1, file2):
|
107 |
-
if not file1 or not file2:
|
108 |
-
return "Please upload two contracts for comparison."
|
109 |
-
text1 = assistant.text_extractor.extract_text_from_input(file1)
|
110 |
-
text2 = assistant.text_extractor.extract_text_from_input(file2)
|
111 |
-
return assistant.compare_contracts(text1, text2)
|
112 |
-
|
113 |
-
compare_btn.click(fn=compare_handler, inputs=[contract_input1, contract_input2], outputs=comparison_output)
|
114 |
-
|
115 |
return demo
|
116 |
|
117 |
demo = create_interface()
|
118 |
|
119 |
if __name__ == "__main__":
|
120 |
-
demo.launch()
|
|
|
9 |
import PyPDF2
|
10 |
import requests
|
11 |
import uuid
|
|
|
|
|
|
|
12 |
|
13 |
# Configuration
|
14 |
MODEL_NAME = "google/gemma-2b-it"
|
|
|
19 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
20 |
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
|
21 |
|
22 |
+
class Translator:
|
23 |
+
def _init_(self):
|
24 |
+
self.key = AZURE_TRANSLATION_KEY
|
25 |
+
self.region = 'centralindia'
|
26 |
+
self.endpoint = "https://api.cognitive.microsofttranslator.com"
|
27 |
+
|
28 |
+
if not self.key:
|
29 |
+
raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")
|
30 |
+
|
31 |
+
def translate_text(self, text, target_language="en"):
|
32 |
+
try:
|
33 |
+
# Split the text into bullet points
|
34 |
+
bullet_points = text.split('\nβ’ ')
|
35 |
+
translated_points = []
|
36 |
+
|
37 |
+
# Translate each bullet point separately
|
38 |
+
for point in bullet_points:
|
39 |
+
if point.strip(): # Only translate non-empty points
|
40 |
+
path = '/translate'
|
41 |
+
constructed_url = self.endpoint + path
|
42 |
+
|
43 |
+
params = {
|
44 |
+
'api-version': '3.0',
|
45 |
+
'to': target_language
|
46 |
+
}
|
47 |
+
|
48 |
+
headers = {
|
49 |
+
'Ocp-Apim-Subscription-Key': self.key,
|
50 |
+
'Ocp-Apim-Subscription-Region': 'centralindia',
|
51 |
+
'Content-type': 'application/json',
|
52 |
+
'X-ClientTraceId': str(uuid.uuid4())
|
53 |
+
}
|
54 |
+
|
55 |
+
body = [{
|
56 |
+
'text': point.strip()
|
57 |
+
}]
|
58 |
+
|
59 |
+
response = requests.post(
|
60 |
+
constructed_url,
|
61 |
+
params=params,
|
62 |
+
headers=headers,
|
63 |
+
json=body
|
64 |
+
)
|
65 |
+
response.raise_for_status()
|
66 |
+
|
67 |
+
translation = response.json()[0]["translations"][0]["text"]
|
68 |
+
translated_points.append(translation)
|
69 |
+
|
70 |
+
# Reconstruct the bullet-pointed text
|
71 |
+
translated_text = '\nβ’ ' + '\nβ’ '.join(translated_points)
|
72 |
+
return translated_text
|
73 |
+
|
74 |
+
except Exception as e:
|
75 |
+
return f"Translation error: {str(e)}"
|
76 |
+
|
77 |
class TextExtractor:
|
78 |
@staticmethod
|
79 |
def extract_text_from_input(input_file):
|
|
|
99 |
return "Unsupported input type"
|
100 |
|
101 |
class LegalEaseAssistant:
|
102 |
+
def _init_(self):
|
103 |
if not HF_TOKEN:
|
104 |
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
105 |
|
|
|
117 |
)
|
118 |
self.text_extractor = TextExtractor()
|
119 |
|
120 |
+
def format_response(self, text):
|
121 |
+
"""Format response as bullet points"""
|
122 |
+
sentences = [s.strip() for s in text.split('.') if s.strip()]
|
123 |
+
bullet_points = ['β’ ' + s + '.' for s in sentences]
|
124 |
+
return '\n'.join(bullet_points)
|
125 |
|
126 |
+
def generate_response(self, input_file, task_type):
|
127 |
+
text = self.text_extractor.extract_text_from_input(input_file)
|
128 |
+
|
129 |
+
task_prompts = {
|
130 |
+
"simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:",
|
131 |
+
"summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
|
132 |
+
"key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
|
133 |
+
"risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
|
134 |
+
}
|
135 |
+
|
136 |
+
prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
|
137 |
+
|
138 |
+
inputs = self.tokenizer(prompt, return_tensors="pt")
|
139 |
+
outputs = self.model.generate(
|
140 |
+
**inputs,
|
141 |
+
max_new_tokens=300,
|
142 |
+
num_return_sequences=1,
|
143 |
+
do_sample=True,
|
144 |
+
temperature=0.7,
|
145 |
+
top_p=0.9
|
146 |
+
)
|
147 |
+
|
148 |
+
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
149 |
+
response_parts = response.split(prompt.split("\n\n")[-1])
|
150 |
+
raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
|
151 |
+
|
152 |
+
return self.format_response(raw_response)
|
153 |
|
154 |
def create_interface():
|
155 |
assistant = LegalEaseAssistant()
|
156 |
+
translator = Translator()
|
157 |
|
158 |
+
SUPPORTED_LANGUAGES = {
|
159 |
+
"English": "en",
|
160 |
+
"Hindi": "hi",
|
161 |
+
"Bengali": "bn",
|
162 |
+
"Telugu": "te",
|
163 |
+
"Tamil": "ta",
|
164 |
+
"Marathi": "mr",
|
165 |
+
"Gujarati": "gu",
|
166 |
+
"Kannada": "kn",
|
167 |
+
"Malayalam": "ml",
|
168 |
+
"Punjabi": "pa",
|
169 |
+
"Spanish": "es",
|
170 |
+
"French": "fr",
|
171 |
+
"German": "de",
|
172 |
+
"Chinese (Simplified)": "zh-Hans",
|
173 |
+
"Japanese": "ja"
|
174 |
+
}
|
175 |
+
|
176 |
+
def process_with_translation(func, *args, target_lang="English"):
|
177 |
+
result = func(*args)
|
178 |
+
if target_lang != "English":
|
179 |
+
result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
|
180 |
+
return result
|
181 |
+
|
182 |
with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
|
183 |
+
gr.HTML(f"""
|
184 |
+
<div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
|
185 |
+
<h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">π LegalEase</h1>
|
186 |
+
<h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
|
187 |
+
<div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
|
188 |
+
<div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
189 |
+
<span style="font-weight: bold;">User:</span> {CURRENT_USER}
|
190 |
+
</div>
|
191 |
+
<div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
192 |
+
<span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
|
193 |
+
</div>
|
194 |
+
</div>
|
195 |
+
</div>
|
196 |
+
""")
|
197 |
+
|
198 |
+
language_selector = gr.Dropdown(
|
199 |
+
choices=list(SUPPORTED_LANGUAGES.keys()),
|
200 |
+
value="English",
|
201 |
+
label="Select Output Language",
|
202 |
+
scale=1
|
203 |
+
)
|
204 |
+
|
205 |
+
with gr.Tabs():
|
206 |
+
with gr.Tab("π Simplify Language"):
|
207 |
+
with gr.Row():
|
208 |
+
with gr.Column(scale=1):
|
209 |
+
simplify_input = gr.File(
|
210 |
+
file_types=['txt', 'pdf', 'image'],
|
211 |
+
label="π Upload Document"
|
212 |
+
)
|
213 |
+
gr.HTML("<div style='height: 10px'></div>")
|
214 |
+
simplify_text_input = gr.Textbox(
|
215 |
+
label="β Or Type/Paste Text",
|
216 |
+
placeholder="Enter your legal text here...",
|
217 |
+
lines=4
|
218 |
+
)
|
219 |
+
gr.HTML("<div style='height: 10px'></div>")
|
220 |
+
simplify_btn = gr.Button(
|
221 |
+
"π Simplify Language",
|
222 |
+
variant="primary"
|
223 |
+
)
|
224 |
+
|
225 |
+
with gr.Column(scale=1):
|
226 |
+
simplify_output = gr.Textbox(
|
227 |
+
label="π Simplified Explanation",
|
228 |
+
lines=12,
|
229 |
+
show_copy_button=True
|
230 |
+
)
|
231 |
+
|
232 |
+
def simplify_handler(file, text, lang):
|
233 |
+
input_source = file or text
|
234 |
+
if not input_source:
|
235 |
+
return "Please provide some text or upload a document to analyze."
|
236 |
+
return process_with_translation(
|
237 |
+
assistant.generate_response,
|
238 |
+
input_source,
|
239 |
+
"simplify",
|
240 |
+
target_lang=lang
|
241 |
+
)
|
242 |
+
|
243 |
+
simplify_btn.click(
|
244 |
+
fn=simplify_handler,
|
245 |
+
inputs=[simplify_input, simplify_text_input, language_selector],
|
246 |
+
outputs=simplify_output
|
247 |
+
)
|
248 |
|
249 |
+
with gr.Tab("π Document Summary"):
|
250 |
+
with gr.Row():
|
251 |
+
with gr.Column(scale=1):
|
252 |
+
summary_input = gr.File(
|
253 |
+
file_types=['txt', 'pdf', 'image'],
|
254 |
+
label="π Upload Document"
|
255 |
+
)
|
256 |
+
gr.HTML("<div style='height: 10px'></div>")
|
257 |
+
summary_text_input = gr.Textbox(
|
258 |
+
label="β Or Type/Paste Text",
|
259 |
+
placeholder="Enter your legal document here...",
|
260 |
+
lines=4
|
261 |
+
)
|
262 |
+
gr.HTML("<div style='height: 10px'></div>")
|
263 |
+
summary_btn = gr.Button(
|
264 |
+
"π Generate Summary",
|
265 |
+
variant="primary"
|
266 |
+
)
|
267 |
+
|
268 |
+
with gr.Column(scale=1):
|
269 |
+
summary_output = gr.Textbox(
|
270 |
+
label="π Document Summary",
|
271 |
+
lines=12,
|
272 |
+
show_copy_button=True
|
273 |
+
)
|
274 |
+
|
275 |
+
def summary_handler(file, text, lang):
|
276 |
+
input_source = file or text
|
277 |
+
if not input_source:
|
278 |
+
return "Please provide some text or upload a document to summarize."
|
279 |
+
return process_with_translation(
|
280 |
+
assistant.generate_response,
|
281 |
+
input_source,
|
282 |
+
"summary",
|
283 |
+
target_lang=lang
|
284 |
+
)
|
285 |
+
|
286 |
+
summary_btn.click(
|
287 |
+
fn=summary_handler,
|
288 |
+
inputs=[summary_input, summary_text_input, language_selector],
|
289 |
+
outputs=summary_output
|
290 |
+
)
|
291 |
+
|
292 |
+
with gr.Tab("β Risk Analysis"):
|
293 |
+
with gr.Row():
|
294 |
+
with gr.Column(scale=1):
|
295 |
+
risk_input = gr.File(
|
296 |
+
file_types=['txt', 'pdf', 'image'],
|
297 |
+
label="π Upload Document"
|
298 |
+
)
|
299 |
+
gr.HTML("<div style='height: 10px'></div>")
|
300 |
+
risk_text_input = gr.Textbox(
|
301 |
+
label="β Or Type/Paste Text",
|
302 |
+
placeholder="Enter your legal document here...",
|
303 |
+
lines=4
|
304 |
+
)
|
305 |
+
gr.HTML("<div style='height: 10px'></div>")
|
306 |
+
risk_btn = gr.Button(
|
307 |
+
"π Analyze Risks",
|
308 |
+
variant="primary"
|
309 |
+
)
|
310 |
+
|
311 |
+
with gr.Column(scale=1):
|
312 |
+
risk_output = gr.Textbox(
|
313 |
+
label="β Risk Assessment",
|
314 |
+
lines=12,
|
315 |
+
show_copy_button=True
|
316 |
+
)
|
317 |
+
|
318 |
+
def risk_handler(file, text, lang):
|
319 |
+
input_source = file or text
|
320 |
+
if not input_source:
|
321 |
+
return "Please provide some text or upload a document to analyze risks."
|
322 |
+
return process_with_translation(
|
323 |
+
assistant.generate_response,
|
324 |
+
input_source,
|
325 |
+
"risk",
|
326 |
+
target_lang=lang
|
327 |
+
)
|
328 |
+
|
329 |
+
risk_btn.click(
|
330 |
+
fn=risk_handler,
|
331 |
+
inputs=[risk_input, risk_text_input, language_selector],
|
332 |
+
outputs=risk_output
|
333 |
+
)
|
334 |
+
|
335 |
+
gr.HTML(f"""
|
336 |
+
<div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #f0f2f6; border-radius: 10px;">
|
337 |
+
<p style="color: #576574; margin: 0;">Powered by Gemma 2B and Azure Translator</p>
|
338 |
+
<p style="color: #576574; margin: 5px 0 0 0; font-size: 0.9em;">Built for Language Translation Hackathon</p>
|
339 |
+
</div>
|
340 |
+
""")
|
341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
return demo
|
343 |
|
344 |
demo = create_interface()
|
345 |
|
346 |
if __name__ == "__main__":
|
347 |
+
demo.launch()
|