Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,9 @@ from PIL import Image
|
|
9 |
import PyPDF2
|
10 |
import requests
|
11 |
import uuid
|
|
|
|
|
|
|
12 |
|
13 |
# Configuration
|
14 |
MODEL_NAME = "google/gemma-2b-it"
|
@@ -19,61 +22,6 @@ CURRENT_TIME = "2025-03-22 21:00:45"
|
|
19 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
20 |
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
|
21 |
|
22 |
-
class Translator:
|
23 |
-
def _init_(self):
|
24 |
-
self.key = AZURE_TRANSLATION_KEY
|
25 |
-
self.region = 'centralindia'
|
26 |
-
self.endpoint = "https://api.cognitive.microsofttranslator.com"
|
27 |
-
|
28 |
-
if not self.key:
|
29 |
-
raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")
|
30 |
-
|
31 |
-
def translate_text(self, text, target_language="en"):
|
32 |
-
try:
|
33 |
-
# Split the text into bullet points
|
34 |
-
bullet_points = text.split('\n• ')
|
35 |
-
translated_points = []
|
36 |
-
|
37 |
-
# Translate each bullet point separately
|
38 |
-
for point in bullet_points:
|
39 |
-
if point.strip(): # Only translate non-empty points
|
40 |
-
path = '/translate'
|
41 |
-
constructed_url = self.endpoint + path
|
42 |
-
|
43 |
-
params = {
|
44 |
-
'api-version': '3.0',
|
45 |
-
'to': target_language
|
46 |
-
}
|
47 |
-
|
48 |
-
headers = {
|
49 |
-
'Ocp-Apim-Subscription-Key': self.key,
|
50 |
-
'Ocp-Apim-Subscription-Region': 'centralindia',
|
51 |
-
'Content-type': 'application/json',
|
52 |
-
'X-ClientTraceId': str(uuid.uuid4())
|
53 |
-
}
|
54 |
-
|
55 |
-
body = [{
|
56 |
-
'text': point.strip()
|
57 |
-
}]
|
58 |
-
|
59 |
-
response = requests.post(
|
60 |
-
constructed_url,
|
61 |
-
params=params,
|
62 |
-
headers=headers,
|
63 |
-
json=body
|
64 |
-
)
|
65 |
-
response.raise_for_status()
|
66 |
-
|
67 |
-
translation = response.json()[0]["translations"][0]["text"]
|
68 |
-
translated_points.append(translation)
|
69 |
-
|
70 |
-
# Reconstruct the bullet-pointed text
|
71 |
-
translated_text = '\n• ' + '\n• '.join(translated_points)
|
72 |
-
return translated_text
|
73 |
-
|
74 |
-
except Exception as e:
|
75 |
-
return f"Translation error: {str(e)}"
|
76 |
-
|
77 |
class TextExtractor:
|
78 |
@staticmethod
|
79 |
def extract_text_from_input(input_file):
|
@@ -99,7 +47,7 @@ class TextExtractor:
|
|
99 |
return "Unsupported input type"
|
100 |
|
101 |
class LegalEaseAssistant:
|
102 |
-
def
|
103 |
if not HF_TOKEN:
|
104 |
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
105 |
|
@@ -117,231 +65,56 @@ class LegalEaseAssistant:
|
|
117 |
)
|
118 |
self.text_extractor = TextExtractor()
|
119 |
|
120 |
-
def
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
return '\n'.join(bullet_points)
|
125 |
|
126 |
-
def
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
"summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
|
132 |
-
"key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
|
133 |
-
"risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
|
134 |
-
}
|
135 |
-
|
136 |
-
prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")
|
137 |
-
|
138 |
-
inputs = self.tokenizer(prompt, return_tensors="pt")
|
139 |
-
outputs = self.model.generate(
|
140 |
-
**inputs,
|
141 |
-
max_new_tokens=300,
|
142 |
-
num_return_sequences=1,
|
143 |
-
do_sample=True,
|
144 |
-
temperature=0.7,
|
145 |
-
top_p=0.9
|
146 |
-
)
|
147 |
-
|
148 |
-
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
149 |
-
response_parts = response.split(prompt.split("\n\n")[-1])
|
150 |
-
raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
|
151 |
-
|
152 |
-
return self.format_response(raw_response)
|
153 |
|
154 |
def create_interface():
|
155 |
assistant = LegalEaseAssistant()
|
156 |
-
translator = Translator()
|
157 |
|
158 |
-
SUPPORTED_LANGUAGES = {
|
159 |
-
"English": "en",
|
160 |
-
"Hindi": "hi",
|
161 |
-
"Bengali": "bn",
|
162 |
-
"Telugu": "te",
|
163 |
-
"Tamil": "ta",
|
164 |
-
"Marathi": "mr",
|
165 |
-
"Gujarati": "gu",
|
166 |
-
"Kannada": "kn",
|
167 |
-
"Malayalam": "ml",
|
168 |
-
"Punjabi": "pa",
|
169 |
-
"Spanish": "es",
|
170 |
-
"French": "fr",
|
171 |
-
"German": "de",
|
172 |
-
"Chinese (Simplified)": "zh-Hans",
|
173 |
-
"Japanese": "ja"
|
174 |
-
}
|
175 |
-
|
176 |
-
def process_with_translation(func, *args, target_lang="English"):
|
177 |
-
result = func(*args)
|
178 |
-
if target_lang != "English":
|
179 |
-
result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
|
180 |
-
return result
|
181 |
-
|
182 |
with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
|
183 |
-
gr.
|
184 |
-
<div style="text-align: center; background-color: #f0f2f6; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
|
185 |
-
<h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">📜 LegalEase</h1>
|
186 |
-
<h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
|
187 |
-
<div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
|
188 |
-
<div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
189 |
-
<span style="font-weight: bold;">User:</span> {CURRENT_USER}
|
190 |
-
</div>
|
191 |
-
<div style="background-color: white; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
192 |
-
<span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
|
193 |
-
</div>
|
194 |
-
</div>
|
195 |
-
</div>
|
196 |
-
""")
|
197 |
-
|
198 |
-
language_selector = gr.Dropdown(
|
199 |
-
choices=list(SUPPORTED_LANGUAGES.keys()),
|
200 |
-
value="English",
|
201 |
-
label="Select Output Language",
|
202 |
-
scale=1
|
203 |
-
)
|
204 |
-
|
205 |
-
with gr.Tabs():
|
206 |
-
with gr.Tab("📝 Simplify Language"):
|
207 |
-
with gr.Row():
|
208 |
-
with gr.Column(scale=1):
|
209 |
-
simplify_input = gr.File(
|
210 |
-
file_types=['txt', 'pdf', 'image'],
|
211 |
-
label="📎 Upload Document"
|
212 |
-
)
|
213 |
-
gr.HTML("<div style='height: 10px'></div>")
|
214 |
-
simplify_text_input = gr.Textbox(
|
215 |
-
label="✍ Or Type/Paste Text",
|
216 |
-
placeholder="Enter your legal text here...",
|
217 |
-
lines=4
|
218 |
-
)
|
219 |
-
gr.HTML("<div style='height: 10px'></div>")
|
220 |
-
simplify_btn = gr.Button(
|
221 |
-
"🔍 Simplify Language",
|
222 |
-
variant="primary"
|
223 |
-
)
|
224 |
-
|
225 |
-
with gr.Column(scale=1):
|
226 |
-
simplify_output = gr.Textbox(
|
227 |
-
label="📋 Simplified Explanation",
|
228 |
-
lines=12,
|
229 |
-
show_copy_button=True
|
230 |
-
)
|
231 |
-
|
232 |
-
def simplify_handler(file, text, lang):
|
233 |
-
input_source = file or text
|
234 |
-
if not input_source:
|
235 |
-
return "Please provide some text or upload a document to analyze."
|
236 |
-
return process_with_translation(
|
237 |
-
assistant.generate_response,
|
238 |
-
input_source,
|
239 |
-
"simplify",
|
240 |
-
target_lang=lang
|
241 |
-
)
|
242 |
-
|
243 |
-
simplify_btn.click(
|
244 |
-
fn=simplify_handler,
|
245 |
-
inputs=[simplify_input, simplify_text_input, language_selector],
|
246 |
-
outputs=simplify_output
|
247 |
-
)
|
248 |
-
|
249 |
-
with gr.Tab("📚 Document Summary"):
|
250 |
-
with gr.Row():
|
251 |
-
with gr.Column(scale=1):
|
252 |
-
summary_input = gr.File(
|
253 |
-
file_types=['txt', 'pdf', 'image'],
|
254 |
-
label="📎 Upload Document"
|
255 |
-
)
|
256 |
-
gr.HTML("<div style='height: 10px'></div>")
|
257 |
-
summary_text_input = gr.Textbox(
|
258 |
-
label="✍ Or Type/Paste Text",
|
259 |
-
placeholder="Enter your legal document here...",
|
260 |
-
lines=4
|
261 |
-
)
|
262 |
-
gr.HTML("<div style='height: 10px'></div>")
|
263 |
-
summary_btn = gr.Button(
|
264 |
-
"📋 Generate Summary",
|
265 |
-
variant="primary"
|
266 |
-
)
|
267 |
-
|
268 |
-
with gr.Column(scale=1):
|
269 |
-
summary_output = gr.Textbox(
|
270 |
-
label="📑 Document Summary",
|
271 |
-
lines=12,
|
272 |
-
show_copy_button=True
|
273 |
-
)
|
274 |
-
|
275 |
-
def summary_handler(file, text, lang):
|
276 |
-
input_source = file or text
|
277 |
-
if not input_source:
|
278 |
-
return "Please provide some text or upload a document to summarize."
|
279 |
-
return process_with_translation(
|
280 |
-
assistant.generate_response,
|
281 |
-
input_source,
|
282 |
-
"summary",
|
283 |
-
target_lang=lang
|
284 |
-
)
|
285 |
-
|
286 |
-
summary_btn.click(
|
287 |
-
fn=summary_handler,
|
288 |
-
inputs=[summary_input, summary_text_input, language_selector],
|
289 |
-
outputs=summary_output
|
290 |
-
)
|
291 |
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
risk_btn = gr.Button(
|
307 |
-
"🔍 Analyze Risks",
|
308 |
-
variant="primary"
|
309 |
-
)
|
310 |
-
|
311 |
-
with gr.Column(scale=1):
|
312 |
-
risk_output = gr.Textbox(
|
313 |
-
label="⚠ Risk Assessment",
|
314 |
-
lines=12,
|
315 |
-
show_copy_button=True
|
316 |
-
)
|
317 |
-
|
318 |
-
def risk_handler(file, text, lang):
|
319 |
-
input_source = file or text
|
320 |
-
if not input_source:
|
321 |
-
return "Please provide some text or upload a document to analyze risks."
|
322 |
-
return process_with_translation(
|
323 |
-
assistant.generate_response,
|
324 |
-
input_source,
|
325 |
-
"risk",
|
326 |
-
target_lang=lang
|
327 |
-
)
|
328 |
-
|
329 |
-
risk_btn.click(
|
330 |
-
fn=risk_handler,
|
331 |
-
inputs=[risk_input, risk_text_input, language_selector],
|
332 |
-
outputs=risk_output
|
333 |
-
)
|
334 |
-
|
335 |
-
gr.HTML(f"""
|
336 |
-
<div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #f0f2f6; border-radius: 10px;">
|
337 |
-
<p style="color: #576574; margin: 0;">Powered by Gemma 2B and Azure Translator</p>
|
338 |
-
<p style="color: #576574; margin: 5px 0 0 0; font-size: 0.9em;">Built for Language Translation Hackathon</p>
|
339 |
-
</div>
|
340 |
-
""")
|
341 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
342 |
return demo
|
343 |
|
344 |
demo = create_interface()
|
345 |
|
346 |
if __name__ == "__main__":
|
347 |
-
demo.launch()
|
|
|
9 |
import PyPDF2
|
10 |
import requests
|
11 |
import uuid
|
12 |
+
from collections import Counter
|
13 |
+
import re
|
14 |
+
from difflib import unified_diff
|
15 |
|
16 |
# Configuration
|
17 |
MODEL_NAME = "google/gemma-2b-it"
|
|
|
22 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
23 |
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
class TextExtractor:
|
26 |
@staticmethod
|
27 |
def extract_text_from_input(input_file):
|
|
|
47 |
return "Unsupported input type"
|
48 |
|
49 |
class LegalEaseAssistant:
|
50 |
+
def __init__(self):
|
51 |
if not HF_TOKEN:
|
52 |
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
53 |
|
|
|
65 |
)
|
66 |
self.text_extractor = TextExtractor()
|
67 |
|
68 |
+
def generate_keywords(self, text):
|
69 |
+
words = re.findall(r'\b[A-Za-z]{5,}\b', text)
|
70 |
+
word_freq = Counter(words)
|
71 |
+
return ', '.join([word for word, _ in word_freq.most_common(10)])
|
|
|
72 |
|
73 |
+
def compare_contracts(self, text1, text2):
|
74 |
+
text1_lines = text1.split('\n')
|
75 |
+
text2_lines = text2.split('\n')
|
76 |
+
diff = '\n'.join(unified_diff(text1_lines, text2_lines, lineterm=''))
|
77 |
+
return diff if diff else "The documents are identical."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def create_interface():
|
80 |
assistant = LegalEaseAssistant()
|
|
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
with gr.Blocks(title="LegalEase: AI Legal Assistant") as demo:
|
83 |
+
gr.Markdown("## 📜 LegalEase: AI-Powered Legal Document Assistant")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
|
85 |
+
with gr.Tab("🔑 Keyword Extraction"):
|
86 |
+
keyword_input = gr.File(file_types=['txt', 'pdf', 'image'], label="Upload Document")
|
87 |
+
keyword_text_input = gr.Textbox(label="Or Enter Text", placeholder="Enter legal text here...", lines=4)
|
88 |
+
keyword_output = gr.Textbox(label="Extracted Keywords", lines=2, show_copy_button=True)
|
89 |
+
keyword_btn = gr.Button("Extract Keywords")
|
90 |
+
|
91 |
+
def keyword_handler(file, text):
|
92 |
+
input_source = file or text
|
93 |
+
if not input_source:
|
94 |
+
return "Please provide some text or upload a document."
|
95 |
+
text = assistant.text_extractor.extract_text_from_input(input_source)
|
96 |
+
return assistant.generate_keywords(text)
|
97 |
+
|
98 |
+
keyword_btn.click(fn=keyword_handler, inputs=[keyword_input, keyword_text_input], outputs=keyword_output)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
|
100 |
+
with gr.Tab("⚖ Contract Comparison"):
|
101 |
+
contract_input1 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 1")
|
102 |
+
contract_input2 = gr.File(file_types=['txt', 'pdf'], label="Upload Contract 2")
|
103 |
+
comparison_output = gr.Textbox(label="Comparison Result", lines=12, show_copy_button=True)
|
104 |
+
compare_btn = gr.Button("Compare Contracts")
|
105 |
+
|
106 |
+
def compare_handler(file1, file2):
|
107 |
+
if not file1 or not file2:
|
108 |
+
return "Please upload two contracts for comparison."
|
109 |
+
text1 = assistant.text_extractor.extract_text_from_input(file1)
|
110 |
+
text2 = assistant.text_extractor.extract_text_from_input(file2)
|
111 |
+
return assistant.compare_contracts(text1, text2)
|
112 |
+
|
113 |
+
compare_btn.click(fn=compare_handler, inputs=[contract_input1, contract_input2], outputs=comparison_output)
|
114 |
+
|
115 |
return demo
|
116 |
|
117 |
demo = create_interface()
|
118 |
|
119 |
if __name__ == "__main__":
|
120 |
+
demo.launch()
|