Spaces:

gauravchand11
/

legal

Sleeping

App Files Files Community

legal / app.py

gauravchand11

Update app.py

2a8dc4e verified 6 months ago

raw

history blame contribute delete

18.2 kB

	import gradio as gr
	import os
	from datetime import datetime
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from huggingface_hub import login
	import pytesseract
	from PIL import Image
	import fitz # PyMuPDF
	import requests
	import uuid

	# Configuration
	MODEL_NAME = "google/gemma-2b-it"
	CURRENT_USER = "AkarshanGupta"
	CURRENT_TIME = "2025-03-23 03:33:01"

	# API Keys
	HF_TOKEN = os.getenv('HF_TOKEN')
	AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY')
	LLAMA_API_KEY = os.getenv('LLAMA_API_KEY')
	LLAMA_API_ENDPOINT = "https://api.llama.ai/v1/generate"

	class TextExtractor:
	@staticmethod
	def extract_text_from_input(input_file):
	if isinstance(input_file, str):
	return input_file

	if isinstance(input_file, Image.Image):
	try:
	return pytesseract.image_to_string(input_file)
	except Exception as e:
	return f"Error extracting text from image: {str(e)}"

	if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'):
	try:
	doc = fitz.open(stream=input_file.read(), filetype="pdf")
	text = ""
	for page in doc:
	text += page.get_text() + "\n\n"
	doc.close()
	return text
	except Exception as e:
	return f"Error extracting text from PDF: {str(e)}"

	return "Unsupported input type"

	class Translator:
	def _init_(self):
	self.key = AZURE_TRANSLATION_KEY
	self.region = 'centralindia'
	self.endpoint = "https://api.cognitive.microsofttranslator.com"

	if not self.key:
	raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.")

	def translate_text(self, text, target_language="en"):
	try:
	bullet_points = text.split('\n• ')
	translated_points = []

	for point in bullet_points:
	if point.strip():
	path = '/translate'
	constructed_url = self.endpoint + path

	params = {
	'api-version': '3.0',
	'to': target_language
	}

	headers = {
	'Ocp-Apim-Subscription-Key': self.key,
	'Ocp-Apim-Subscription-Region': self.region,
	'Content-type': 'application/json',
	'X-ClientTraceId': str(uuid.uuid4())
	}

	body = [{
	'text': point.strip()
	}]

	response = requests.post(
	constructed_url,
	params=params,
	headers=headers,
	json=body
	)
	response.raise_for_status()

	translation = response.json()[0]["translations"][0]["text"]
	translated_points.append(translation)

	translated_text = '\n• ' + '\n• '.join(translated_points)
	return translated_text

	except Exception as e:
	return f"Translation error: {str(e)}"

	class LegalEaseAssistant:
	def _init_(self):
	if not HF_TOKEN:
	raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.")

	login(token=HF_TOKEN)

	# Initialize text_extractor first
	self.text_extractor = TextExtractor()

	self.tokenizer = AutoTokenizer.from_pretrained(
	MODEL_NAME,
	token=HF_TOKEN
	)
	self.model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	token=HF_TOKEN,
	device_map="auto",
	torch_dtype=torch.float32
	)

	def format_response(self, text):
	sentences = [s.strip() for s in text.split('.') if s.strip()]
	bullet_points = ['• ' + s + '.' for s in sentences]
	return '\n'.join(bullet_points)

	def generate_response(self, input_file, task_type):
	text = self.text_extractor.extract_text_from_input(input_file)

	task_prompts = {
	"simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:",
	"summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:",
	"key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:",
	"risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:"
	}

	prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:")

	inputs = self.tokenizer(prompt, return_tensors="pt")
	outputs = self.model.generate(
	**inputs,
	max_new_tokens=300,
	num_return_sequences=1,
	do_sample=True,
	temperature=0.7,
	top_p=0.9
	)

	response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
	response_parts = response.split(prompt.split("\n\n")[-1])
	raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip()

	return self.format_response(raw_response)

	def generate_chatbot_response(self, user_input):
	if not LLAMA_API_KEY:
	return "LLaMA API key not found. Please set the LLAMA_API_KEY environment variable."

	response = requests.post(
	LLAMA_API_ENDPOINT,
	headers={"Authorization": f"Bearer {LLAMA_API_KEY}"},
	json={"prompt": user_input, "max_tokens": 150}
	)

	if response.status_code == 401:
	return "Unauthorized: Please check your LLaMA API key."
	elif response.status_code != 200:
	return f"Error: Received {response.status_code} status code from LLaMA API."

	return response.json()["choices"][0]["text"].strip()

	def create_interface():
	assistant = LegalEaseAssistant()
	translator = Translator()

	SUPPORTED_LANGUAGES = {
	"English": "en",
	"Hindi": "hi",
	"Bengali": "bn",
	"Telugu": "te",
	"Tamil": "ta",
	"Marathi": "mr",
	"Gujarati": "gu",
	"Kannada": "kn",
	"Malayalam": "ml",
	"Punjabi": "pa",
	"Spanish": "es",
	"French": "fr",
	"German": "de",
	"Chinese (Simplified)": "zh-Hans",
	"Japanese": "ja"
	}

	def process_with_translation(func, *args, target_lang="English"):
	result = func(*args)
	if target_lang != "English":
	result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang])
	return result

	with gr.Blocks(title="LegalEase", css="""
	.gradio-container {max-width: 1200px; margin: auto;}
	.header {text-align: center; margin-bottom: 2rem;}
	.content {padding: 2rem;}
	""") as demo:
	gr.HTML(f"""
	<div style="text-align: center; background-color: #e0e0e0; padding: 20px; border-radius: 10px; margin-bottom: 20px;">
	<h1 style="color: #2c3e50; font-size: 2.5em; margin-bottom: 10px;">📜 LegalEase</h1>
	<h2 style="color: #34495e; font-size: 1.5em; margin-bottom: 20px;">AI-Powered Legal Document Assistant</h2>
	<div style="display: flex; justify-content: center; gap: 40px; color: #576574; font-size: 1.1em;">
	<div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<span style="font-weight: bold;">User:</span> {CURRENT_USER}
	</div>
	<div style="background-color: #e0e0e0; padding: 10px 20px; border-radius: 8px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);">
	<span style="font-weight: bold;">Last Updated:</span> {CURRENT_TIME} UTC
	</div>
	</div>
	</div>
	""")

	language_selector = gr.Dropdown(
	choices=list(SUPPORTED_LANGUAGES.keys()),
	value="English",
	label="Select Output Language",
	scale=1
	)

	with gr.Tabs():
	# Simplify Language Tab
	with gr.Tab("📝 Simplify Language"):
	with gr.Row():
	with gr.Column(scale=1):
	simplify_input = gr.File(
	file_types=['txt', 'pdf', 'image'],
	label="📎 Upload Document"
	)
	gr.HTML("<div style='height: 10px'></div>")
	simplify_text_input = gr.Textbox(
	label="✍ Or Type/Paste Text",
	placeholder="Enter your legal text here...",
	lines=4
	)
	gr.HTML("<div style='height: 10px'></div>")
	simplify_btn = gr.Button(
	"🔍 Simplify Language",
	variant="primary"
	)

	with gr.Column(scale=1):
	simplify_output = gr.Textbox(
	label="📋 Simplified Explanation",
	lines=12,
	show_copy_button=True
	)

	def simplify_handler(file, text, lang):
	input_source = file or text
	if not input_source:
	return "Please provide some text or upload a document to analyze."
	return process_with_translation(
	assistant.generate_response,
	input_source,
	"simplify",
	target_lang=lang
	)

	simplify_btn.click(
	fn=simplify_handler,
	inputs=[simplify_input, simplify_text_input, language_selector],
	outputs=simplify_output
	)

	# Document Summary Tab
	with gr.Tab("📚 Document Summary"):
	with gr.Row():
	with gr.Column(scale=1):
	summary_input = gr.File(
	file_types=['txt', 'pdf', 'image'],
	label="📎 Upload Document"
	)
	gr.HTML("<div style='height: 10px'></div>")
	summary_text_input = gr.Textbox(
	label="✍ Or Type/Paste Text",
	placeholder="Enter your legal document here...",
	lines=4
	)
	gr.HTML("<div style='height: 10px'></div>")
	summary_btn = gr.Button(
	"📋 Generate Summary",
	variant="primary"
	)

	with gr.Column(scale=1):
	summary_output = gr.Textbox(
	label="📑 Document Summary",
	lines=12,
	show_copy_button=True
	)

	def summary_handler(file, text, lang):
	input_source = file or text
	if not input_source:
	return "Please provide some text or upload a document to summarize."
	return process_with_translation(
	assistant.generate_response,
	input_source,
	"summary",
	target_lang=lang
	)

	summary_btn.click(
	fn=summary_handler,
	inputs=[summary_input, summary_text_input, language_selector],
	outputs=summary_output
	)

	# Key Terms Tab
	with gr.Tab("🔑 Key Terms"):
	with gr.Row():
	with gr.Column(scale=1):
	terms_input = gr.File(
	file_types=['txt', 'pdf', 'image'],
	label="📎 Upload Document"
	)
	gr.HTML("<div style='height: 10px'></div>")
	terms_text_input = gr.Textbox(
	label="✍ Or Type/Paste Text",
	placeholder="Enter your legal document here...",
	lines=4
	)
	gr.HTML("<div style='height: 10px'></div>")
	terms_btn = gr.Button(
	"🔍 Extract Key Terms",
	variant="primary"
	)

	with gr.Column(scale=1):
	terms_output = gr.Textbox(
	label="🔑 Key Terms & Definitions",
	lines=12,
	show_copy_button=True
	)

	def terms_handler(file, text, lang):
	input_source = file or text
	if not input_source:
	return "Please provide some text or upload a document to analyze key terms."
	return process_with_translation(
	assistant.generate_response,
	input_source,
	"key_terms",
	target_lang=lang
	)

	terms_btn.click(
	fn=terms_handler,
	inputs=[terms_input, terms_text_input, language_selector],
	outputs=terms_output
	)

	# Risk Analysis Tab
	with gr.Tab("⚠ Risk Analysis"):
	with gr.Row():
	with gr.Column(scale=1):
	risk_input = gr.File(
	file_types=['txt', 'pdf', 'image'],
	label="📎 Upload Document"
	)
	gr.HTML("<div style='height: 10px'></div>")
	risk_text_input = gr.Textbox(
	label="✍ Or Type/Paste Text",
	placeholder="Enter your legal document here...",
	lines=4
	)
	gr.HTML("<div style='height: 10px'></div>")
	risk_btn = gr.Button(
	"🔍 Analyze Risks",
	variant="primary"
	)

	with gr.Column(scale=1):
	risk_output = gr.Textbox(
	label="⚠ Risk Assessment",
	lines=12,
	show_copy_button=True
	)

	def risk_handler(file, text, lang):
	input_source = file or text
	if not input_source:
	return "Please provide some text or upload a document to analyze risks."
	return process_with_translation(
	assistant.generate_response,
	input_source,
	"risk",
	target_lang=lang
	)

	risk_btn.click(
	fn=risk_handler,
	inputs=[risk_input, risk_text_input, language_selector],
	outputs=risk_output
	)

	# Legal Assistant Chat Tab
	with gr.Tab("🤖 Legal Assistant Chat"):
	chatbot_input = gr.Textbox(
	label="💬 Your Message",
	placeholder="Ask me anything about legal matters...",
	lines=2
	)
	chatbot_output = gr.Textbox(
	label="🤖 Assistant Response",
	lines=10,
	show_copy_button=True
	)
	chatbot_btn = gr.Button(
	"💬 Send Message",
	variant="primary"
	)

	def chatbot_handler(user_input, lang):
	if not user_input:
	return "Please type a message to start the conversation."
	response = assistant.generate_chatbot_response(user_input)
	if lang != "English":
	response = translator.translate_text(response, SUPPORTED_LANGUAGES[lang])
	return response

	chatbot_btn.click(
	fn=chatbot_handler,
	inputs=[chatbot_input, language_selector],
	outputs=chatbot_output
	)

	gr.HTML(f"""
	<div style="text-align: center; margin-top: 20px; padding: 20px; background-color: #e0e0e0; border-radius: 10px;">
	<p style="color: #576574; margin: 0;">Made by Team Ice Age</p>
	</div>
	""")

	return demo

	def main():
	demo = create_interface()
	demo.queue()
	demo.launch(share=True)

	if __name__ == "__main__":
	main()