import gradio as gr import os from datetime import datetime import torch from transformers import AutoTokenizer, AutoModelForCausalLM from huggingface_hub import login import pytesseract from PIL import Image import fitz # PyMuPDF import requests import uuid # Configuration MODEL_NAME = "google/gemma-2b-it" CURRENT_USER = "AkarshanGupta" CURRENT_TIME = "2025-03-23 03:33:01" # API Keys HF_TOKEN = os.getenv('HF_TOKEN') AZURE_TRANSLATION_KEY = os.getenv('AZURE_TRANSLATION_KEY') LLAMA_API_KEY = os.getenv('LLAMA_API_KEY') LLAMA_API_ENDPOINT = "https://api.llama.ai/v1/generate" class TextExtractor: @staticmethod def extract_text_from_input(input_file): if isinstance(input_file, str): return input_file if isinstance(input_file, Image.Image): try: return pytesseract.image_to_string(input_file) except Exception as e: return f"Error extracting text from image: {str(e)}" if hasattr(input_file, 'name') and input_file.name.lower().endswith('.pdf'): try: doc = fitz.open(stream=input_file.read(), filetype="pdf") text = "" for page in doc: text += page.get_text() + "\n\n" doc.close() return text except Exception as e: return f"Error extracting text from PDF: {str(e)}" return "Unsupported input type" class Translator: def _init_(self): self.key = AZURE_TRANSLATION_KEY self.region = 'centralindia' self.endpoint = "https://api.cognitive.microsofttranslator.com" if not self.key: raise ValueError("Azure Translator not configured. Please set AZURE_TRANSLATION_KEY in Spaces settings.") def translate_text(self, text, target_language="en"): try: bullet_points = text.split('\n⢠') translated_points = [] for point in bullet_points: if point.strip(): path = '/translate' constructed_url = self.endpoint + path params = { 'api-version': '3.0', 'to': target_language } headers = { 'Ocp-Apim-Subscription-Key': self.key, 'Ocp-Apim-Subscription-Region': self.region, 'Content-type': 'application/json', 'X-ClientTraceId': str(uuid.uuid4()) } body = [{ 'text': point.strip() }] response = requests.post( constructed_url, params=params, headers=headers, json=body ) response.raise_for_status() translation = response.json()[0]["translations"][0]["text"] translated_points.append(translation) translated_text = '\n⢠' + '\n⢠'.join(translated_points) return translated_text except Exception as e: return f"Translation error: {str(e)}" class LegalEaseAssistant: def _init_(self): if not HF_TOKEN: raise ValueError("Hugging Face token not found. Please set the HF_TOKEN environment variable.") login(token=HF_TOKEN) # Initialize text_extractor first self.text_extractor = TextExtractor() self.tokenizer = AutoTokenizer.from_pretrained( MODEL_NAME, token=HF_TOKEN ) self.model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, token=HF_TOKEN, device_map="auto", torch_dtype=torch.float32 ) def format_response(self, text): sentences = [s.strip() for s in text.split('.') if s.strip()] bullet_points = ['⢠' + s + '.' for s in sentences] return '\n'.join(bullet_points) def generate_response(self, input_file, task_type): text = self.text_extractor.extract_text_from_input(input_file) task_prompts = { "simplify": f"Simplify the following legal text in clear, plain language. Provide the response as separate points:\n\n{text}\n\nSimplified explanation:", "summary": f"Provide a concise summary of the following legal document as separate key points:\n\n{text}\n\nSummary:", "key_terms": f"Identify and explain the key legal terms and obligations in this text as separate points:\n\n{text}\n\nKey Terms:", "risk": f"Perform a risk analysis on the following legal document and list each risk as a separate point:\n\n{text}\n\nRisk Assessment:" } prompt = task_prompts.get(task_type, f"Analyze the following text and provide points:\n\n{text}\n\nAnalysis:") inputs = self.tokenizer(prompt, return_tensors="pt") outputs = self.model.generate( **inputs, max_new_tokens=300, num_return_sequences=1, do_sample=True, temperature=0.7, top_p=0.9 ) response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) response_parts = response.split(prompt.split("\n\n")[-1]) raw_response = response_parts[-1].strip() if len(response_parts) > 1 else response.strip() return self.format_response(raw_response) def generate_chatbot_response(self, user_input): if not LLAMA_API_KEY: return "LLaMA API key not found. Please set the LLAMA_API_KEY environment variable." response = requests.post( LLAMA_API_ENDPOINT, headers={"Authorization": f"Bearer {LLAMA_API_KEY}"}, json={"prompt": user_input, "max_tokens": 150} ) if response.status_code == 401: return "Unauthorized: Please check your LLaMA API key." elif response.status_code != 200: return f"Error: Received {response.status_code} status code from LLaMA API." return response.json()["choices"][0]["text"].strip() def create_interface(): assistant = LegalEaseAssistant() translator = Translator() SUPPORTED_LANGUAGES = { "English": "en", "Hindi": "hi", "Bengali": "bn", "Telugu": "te", "Tamil": "ta", "Marathi": "mr", "Gujarati": "gu", "Kannada": "kn", "Malayalam": "ml", "Punjabi": "pa", "Spanish": "es", "French": "fr", "German": "de", "Chinese (Simplified)": "zh-Hans", "Japanese": "ja" } def process_with_translation(func, *args, target_lang="English"): result = func(*args) if target_lang != "English": result = translator.translate_text(result, SUPPORTED_LANGUAGES[target_lang]) return result with gr.Blocks(title="LegalEase", css=""" .gradio-container {max-width: 1200px; margin: auto;} .header {text-align: center; margin-bottom: 2rem;} .content {padding: 2rem;} """) as demo: gr.HTML(f"""
Made by Team Ice Age