Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,21 +6,48 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
6 |
from huggingface_hub import login
|
7 |
import pytesseract
|
8 |
from PIL import Image
|
9 |
-
import
|
10 |
import requests
|
11 |
import uuid
|
12 |
|
13 |
# Configuration
|
14 |
MODEL_NAME = "google/gemma-2b-it"
|
15 |
CURRENT_USER = "AkarshanGupta"
|
16 |
-
CURRENT_TIME = "2025-03-
|
17 |
|
18 |
# API Keys
|
19 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
20 |
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
class Translator:
|
23 |
-
def
|
24 |
self.key = AZURE_TRANSLATION_KEY
|
25 |
self.region = 'centralindia'
|
26 |
self.endpoint = "https://api.cognitive.microsofttranslator.com"
|
@@ -30,13 +57,11 @@ class Translator:
|
|
30 |
|
31 |
def translate_text(self, text, target_language="en"):
|
32 |
try:
|
33 |
-
# Split the text into bullet points
|
34 |
bullet_points = text.split('\nβ’ ')
|
35 |
translated_points = []
|
36 |
|
37 |
-
# Translate each bullet point separately
|
38 |
for point in bullet_points:
|
39 |
-
if point.strip():
|
40 |
path = '/translate'
|
41 |
constructed_url = self.endpoint + path
|
42 |
|
@@ -47,7 +72,7 @@ class Translator:
|
|
47 |
|
48 |
headers = {
|
49 |
'Ocp-Apim-Subscription-Key': self.key,
|
50 |
-
'Ocp-Apim-Subscription-Region':
|
51 |
'Content-type': 'application/json',
|
52 |
'X-ClientTraceId': str(uuid.uuid4())
|
53 |
}
|
@@ -67,44 +92,22 @@ class Translator:
|
|
67 |
translation = response.json()[0]["translations"][0]["text"]
|
68 |
translated_points.append(translation)
|
69 |
|
70 |
-
# Reconstruct the bullet-pointed text
|
71 |
translated_text = '\nβ’ ' + '\nβ’ '.join(translated_points)
|
72 |
return translated_text
|
73 |
|
74 |
except Exception as e:
|
75 |
return f"Translation error: {str(e)}"
|
76 |
|
77 |
-
class TextExtractor:
|
78 |
-
@staticmethod
|
79 |
-
def extract_text_from_input(input_file):
|
80 |
-
if isinstance(input_file, str):
|
81 |
-
return input_file
|
82 |
-
|
83 |
-
if isinstance(input_file, Image.Image):
|
84 |
-
try:
|
85 |
-
return pytesseract.image_to_string(input_file)
|
86 |
-
except Exception as e:
|
87 |
-
return f"Error extracting text from image: {str(e)}"
|
88 |
-
|
89 |
-
if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
|
90 |
-
try:
|
91 |
-
pdf_reader = PyPDF2.PdfReader(input_file)
|
92 |
-
text = ""
|
93 |
-
for page in pdf_reader.pages:
|
94 |
-
text += page.extract_text() + "\n\n"
|
95 |
-
return text
|
96 |
-
except Exception as e:
|
97 |
-
return f"Error extracting text from PDF: {str(e)}"
|
98 |
-
|
99 |
-
return "Unsupported input type"
|
100 |
-
|
101 |
class LegalEaseAssistant:
|
102 |
-
def
|
103 |
if not HF_TOKEN:
|
104 |
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
105 |
|
106 |
login(token=HF_TOKEN)
|
107 |
|
|
|
|
|
|
|
108 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
109 |
MODEL_NAME,
|
110 |
token=HF_TOKEN
|
@@ -115,10 +118,8 @@ class LegalEaseAssistant:
|
|
115 |
device_map="auto",
|
116 |
torch_dtype=torch.float32
|
117 |
)
|
118 |
-
self.text_extractor = TextExtractor()
|
119 |
|
120 |
def format_response(self, text):
|
121 |
-
"""Format response as bullet points"""
|
122 |
sentences = [s.strip() for s in text.split('.') if s.strip()]
|
123 |
bullet_points = ['β’ ' + s + '.' for s in sentences]
|
124 |
return '\n'.join(bullet_points)
|
@@ -150,6 +151,23 @@ class LegalEaseAssistant:
|
|
150 |
raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
|
151 |
|
152 |
return self.format_response(raw_response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
|
154 |
def create_interface():
|
155 |
assistant = LegalEaseAssistant()
|
@@ -179,16 +197,20 @@ def create_interface():
|
|
179 |
result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
|
180 |
return result
|
181 |
|
182 |
-
with gr.Blocks(title="LegalEase
|
|
|
|
|
|
|
|
|
183 |
gr.HTML(f"""
|
184 |
-
<div style="text-align: center; background-color: #
|
185 |
<h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">π LegalEase</h1>
|
186 |
<h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
|
187 |
<div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
|
188 |
-
<div style="background-color:
|
189 |
<span style="font-weight: bold;">User:</span> {CURRENT_USER}
|
190 |
</div>
|
191 |
-
<div style="background-color:
|
192 |
<span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
|
193 |
</div>
|
194 |
</div>
|
@@ -203,6 +225,7 @@ def create_interface():
|
|
203 |
)
|
204 |
|
205 |
with gr.Tabs():
|
|
|
206 |
with gr.Tab("π Simplify Language"):
|
207 |
with gr.Row():
|
208 |
with gr.Column(scale=1):
|
@@ -246,6 +269,7 @@ def create_interface():
|
|
246 |
outputs=simplify_output
|
247 |
)
|
248 |
|
|
|
249 |
with gr.Tab("π Document Summary"):
|
250 |
with gr.Row():
|
251 |
with gr.Column(scale=1):
|
@@ -289,6 +313,51 @@ def create_interface():
|
|
289 |
outputs=summary_output
|
290 |
)
|
291 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
292 |
with gr.Tab("β Risk Analysis"):
|
293 |
with gr.Row():
|
294 |
with gr.Column(scale=1):
|
@@ -332,16 +401,49 @@ def create_interface():
|
|
332 |
outputs=risk_output
|
333 |
)
|
334 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
335 |
gr.HTML(f"""
|
336 |
-
<div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #
|
337 |
-
<p style="color: #576574; margin: 0;">
|
338 |
-
<p style="color: #576574; margin: 5px 0 0 0; font-size: 0.9em;">Built for Language Translation Hackathon</p>
|
339 |
</div>
|
340 |
""")
|
341 |
|
342 |
return demo
|
343 |
|
344 |
-
|
|
|
|
|
|
|
345 |
|
346 |
-
if
|
347 |
-
|
|
|
6 |
from huggingface_hub import login
|
7 |
import pytesseract
|
8 |
from PIL import Image
|
9 |
+
import fitz # PyMuPDF
|
10 |
import requests
|
11 |
import uuid
|
12 |
|
13 |
# Configuration
|
14 |
MODEL_NAME = "google/gemma-2b-it"
|
15 |
CURRENT_USER = "AkarshanGupta"
|
16 |
+
CURRENT_TIME = "2025-03-23 03:33:01"
|
17 |
|
18 |
# API Keys
|
19 |
HF_TOKEN = os.getenv('HF_TOKEN')
|
20 |
AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
|
21 |
+
LLAMA_API_KEY = os.getenv('LLAMA_API_KEY')
|
22 |
+
LLAMA_API_ENDPOINT = "https://api.llama.ai/v1/generate"
|
23 |
+
|
24 |
+
class TextExtractor:
|
25 |
+
@staticmethod
|
26 |
+
def extract_text_from_input(input_file):
|
27 |
+
if isinstance(input_file, str):
|
28 |
+
return input_file
|
29 |
+
|
30 |
+
if isinstance(input_file, Image.Image):
|
31 |
+
try:
|
32 |
+
return pytesseract.image_to_string(input_file)
|
33 |
+
except Exception as e:
|
34 |
+
return f"Error extracting text from image: {str(e)}"
|
35 |
+
|
36 |
+
if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
|
37 |
+
try:
|
38 |
+
doc = fitz.open(stream=input_file.read(), filetype="pdf")
|
39 |
+
text = ""
|
40 |
+
for page in doc:
|
41 |
+
text += page.get_text() + "\n\n"
|
42 |
+
doc.close()
|
43 |
+
return text
|
44 |
+
except Exception as e:
|
45 |
+
return f"Error extracting text from PDF: {str(e)}"
|
46 |
+
|
47 |
+
return "Unsupported input type"
|
48 |
|
49 |
class Translator:
|
50 |
+
def _init_(self):
|
51 |
self.key = AZURE_TRANSLATION_KEY
|
52 |
self.region = 'centralindia'
|
53 |
self.endpoint = "https://api.cognitive.microsofttranslator.com"
|
|
|
57 |
|
58 |
def translate_text(self, text, target_language="en"):
|
59 |
try:
|
|
|
60 |
bullet_points = text.split('\nβ’ ')
|
61 |
translated_points = []
|
62 |
|
|
|
63 |
for point in bullet_points:
|
64 |
+
if point.strip():
|
65 |
path = '/translate'
|
66 |
constructed_url = self.endpoint + path
|
67 |
|
|
|
72 |
|
73 |
headers = {
|
74 |
'Ocp-Apim-Subscription-Key': self.key,
|
75 |
+
'Ocp-Apim-Subscription-Region': self.region,
|
76 |
'Content-type': 'application/json',
|
77 |
'X-ClientTraceId': str(uuid.uuid4())
|
78 |
}
|
|
|
92 |
translation = response.json()[0]["translations"][0]["text"]
|
93 |
translated_points.append(translation)
|
94 |
|
|
|
95 |
translated_text = '\nβ’ ' + '\nβ’ '.join(translated_points)
|
96 |
return translated_text
|
97 |
|
98 |
except Exception as e:
|
99 |
return f"Translation error: {str(e)}"
|
100 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
class LegalEaseAssistant:
|
102 |
+
def _init_(self):
|
103 |
if not HF_TOKEN:
|
104 |
raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
|
105 |
|
106 |
login(token=HF_TOKEN)
|
107 |
|
108 |
+
# Initialize text_extractor first
|
109 |
+
self.text_extractor = TextExtractor()
|
110 |
+
|
111 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
112 |
MODEL_NAME,
|
113 |
token=HF_TOKEN
|
|
|
118 |
device_map="auto",
|
119 |
torch_dtype=torch.float32
|
120 |
)
|
|
|
121 |
|
122 |
def format_response(self, text):
|
|
|
123 |
sentences = [s.strip() for s in text.split('.') if s.strip()]
|
124 |
bullet_points = ['β’ ' + s + '.' for s in sentences]
|
125 |
return '\n'.join(bullet_points)
|
|
|
151 |
raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()
|
152 |
|
153 |
return self.format_response(raw_response)
|
154 |
+
|
155 |
+
def generate_chatbot_response(self, user_input):
|
156 |
+
if not LLAMA_API_KEY:
|
157 |
+
return "LLaMA API key not found. Please set the LLAMA_API_KEY environment variable."
|
158 |
+
|
159 |
+
response = requests.post(
|
160 |
+
LLAMA_API_ENDPOINT,
|
161 |
+
headers={"Authorization": f"Bearer {LLAMA_API_KEY}"},
|
162 |
+
json={"prompt": user_input, "max_tokens": 150}
|
163 |
+
)
|
164 |
+
|
165 |
+
if response.status_code == 401:
|
166 |
+
return "Unauthorized: Please check your LLaMA API key."
|
167 |
+
elif response.status_code != 200:
|
168 |
+
return f"Error: Received {response.status_code} status code from LLaMA API."
|
169 |
+
|
170 |
+
return response.json()["choices"][0]["text"].strip()
|
171 |
|
172 |
def create_interface():
|
173 |
assistant = LegalEaseAssistant()
|
|
|
197 |
result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
|
198 |
return result
|
199 |
|
200 |
+
with gr.Blocks(title="LegalEase", css="""
|
201 |
+
.gradio-container {max-width: 1200px; margin: auto;}
|
202 |
+
.header {text-align: center; margin-bottom: 2rem;}
|
203 |
+
.content {padding: 2rem;}
|
204 |
+
""") as demo:
|
205 |
gr.HTML(f"""
|
206 |
+
<div style="text-align: center; background-color: #e0e0e0; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
|
207 |
<h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">π LegalEase</h1>
|
208 |
<h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
|
209 |
<div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
|
210 |
+
<div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
211 |
<span style="font-weight: bold;">User:</span> {CURRENT_USER}
|
212 |
</div>
|
213 |
+
<div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
|
214 |
<span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
|
215 |
</div>
|
216 |
</div>
|
|
|
225 |
)
|
226 |
|
227 |
with gr.Tabs():
|
228 |
+
# Simplify Language Tab
|
229 |
with gr.Tab("π Simplify Language"):
|
230 |
with gr.Row():
|
231 |
with gr.Column(scale=1):
|
|
|
269 |
outputs=simplify_output
|
270 |
)
|
271 |
|
272 |
+
# Document Summary Tab
|
273 |
with gr.Tab("π Document Summary"):
|
274 |
with gr.Row():
|
275 |
with gr.Column(scale=1):
|
|
|
313 |
outputs=summary_output
|
314 |
)
|
315 |
|
316 |
+
# Key Terms Tab
|
317 |
+
with gr.Tab("π Key Terms"):
|
318 |
+
with gr.Row():
|
319 |
+
with gr.Column(scale=1):
|
320 |
+
terms_input = gr.File(
|
321 |
+
file_types=['txt', 'pdf', 'image'],
|
322 |
+
label="π Upload Document"
|
323 |
+
)
|
324 |
+
gr.HTML("<div style='height: 10px'></div>")
|
325 |
+
terms_text_input = gr.Textbox(
|
326 |
+
label="β Or Type/Paste Text",
|
327 |
+
placeholder="Enter your legal document here...",
|
328 |
+
lines=4
|
329 |
+
)
|
330 |
+
gr.HTML("<div style='height: 10px'></div>")
|
331 |
+
terms_btn = gr.Button(
|
332 |
+
"π Extract Key Terms",
|
333 |
+
variant="primary"
|
334 |
+
)
|
335 |
+
|
336 |
+
with gr.Column(scale=1):
|
337 |
+
terms_output = gr.Textbox(
|
338 |
+
label="π Key Terms & Definitions",
|
339 |
+
lines=12,
|
340 |
+
show_copy_button=True
|
341 |
+
)
|
342 |
+
|
343 |
+
def terms_handler(file, text, lang):
|
344 |
+
input_source = file or text
|
345 |
+
if not input_source:
|
346 |
+
return "Please provide some text or upload a document to analyze key terms."
|
347 |
+
return process_with_translation(
|
348 |
+
assistant.generate_response,
|
349 |
+
input_source,
|
350 |
+
"key_terms",
|
351 |
+
target_lang=lang
|
352 |
+
)
|
353 |
+
|
354 |
+
terms_btn.click(
|
355 |
+
fn=terms_handler,
|
356 |
+
inputs=[terms_input, terms_text_input, language_selector],
|
357 |
+
outputs=terms_output
|
358 |
+
)
|
359 |
+
|
360 |
+
# Risk Analysis Tab
|
361 |
with gr.Tab("β Risk Analysis"):
|
362 |
with gr.Row():
|
363 |
with gr.Column(scale=1):
|
|
|
401 |
outputs=risk_output
|
402 |
)
|
403 |
|
404 |
+
# Legal Assistant Chat Tab
|
405 |
+
with gr.Tab("π€ Legal Assistant Chat"):
|
406 |
+
chatbot_input = gr.Textbox(
|
407 |
+
label="π¬ Your Message",
|
408 |
+
placeholder="Ask me anything about legal matters...",
|
409 |
+
lines=2
|
410 |
+
)
|
411 |
+
chatbot_output = gr.Textbox(
|
412 |
+
label="π€ Assistant Response",
|
413 |
+
lines=10,
|
414 |
+
show_copy_button=True
|
415 |
+
)
|
416 |
+
chatbot_btn = gr.Button(
|
417 |
+
"π¬ Send Message",
|
418 |
+
variant="primary"
|
419 |
+
)
|
420 |
+
|
421 |
+
def chatbot_handler(user_input, lang):
|
422 |
+
if not user_input:
|
423 |
+
return "Please type a message to start the conversation."
|
424 |
+
response = assistant.generate_chatbot_response(user_input)
|
425 |
+
if lang != "English":
|
426 |
+
response = translator.translate_text(response, SUPPORTED_LANGUAGES[lang])
|
427 |
+
return response
|
428 |
+
|
429 |
+
chatbot_btn.click(
|
430 |
+
fn=chatbot_handler,
|
431 |
+
inputs=[chatbot_input, language_selector],
|
432 |
+
outputs=chatbot_output
|
433 |
+
)
|
434 |
+
|
435 |
gr.HTML(f"""
|
436 |
+
<div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #e0e0e0; border-radius: 10px;">
|
437 |
+
<p style="color: #576574; margin: 0;">Made by Team Ice Age</p>
|
|
|
438 |
</div>
|
439 |
""")
|
440 |
|
441 |
return demo
|
442 |
|
443 |
+
def main():
|
444 |
+
demo = create_interface()
|
445 |
+
demo.queue()
|
446 |
+
demo.launch(share=True)
|
447 |
|
448 |
+
if __name__ == "__main__":
|
449 |
+
main()
|