Spaces:

DocSA
/

Legal_Position_Generator

Runtime error

App Files Files Community

DocUA commited on Nov 13, 2024

Commit

a8c7830

1 Parent(s): c212462

Add python-dotenv to requirements

Browse files

Files changed (2) hide show

main_new.py +0 -264
requirements.txt +2 -1

main_new.py DELETED Viewed

@@ -1,264 +0,0 @@
-import os
-import re
-import gradio as gr
-import pandas as pd
-import requests
-import json
-import faiss
-import nest_asyncio
-import sys
-from pathlib import Path
-from bs4 import BeautifulSoup
-from typing import Union, List
-import asyncio
-from llama_index.core import (
-    StorageContext,
-    ServiceContext,
-    VectorStoreIndex,
-    Settings,
-    load_index_from_storage
-)
-from llama_index.llms.openai import OpenAI
-from llama_index.core.llms import ChatMessage
-from llama_index.core.schema import IndexNode
-from llama_index.core.storage.docstore import SimpleDocumentStore
-from llama_index.retrievers.bm25 import BM25Retriever
-from llama_index.embeddings.openai import OpenAIEmbedding
-from llama_index.core.retrievers import QueryFusionRetriever
-from llama_index.core.workflow import Event, Context, Workflow, StartEvent, StopEvent, step
-from llama_index.core.schema import NodeWithScore
-from llama_index.core.prompts import PromptTemplate
-from llama_index.core.response_synthesizers import ResponseMode, get_response_synthesizer
-from prompts import CITATION_QA_TEMPLATE, CITATION_REFINE_TEMPLATE
-from dotenv import load_dotenv
-load_dotenv()
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
-embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
-Settings.embed_model = embed_model
-Settings.context_window = 20000
-Settings.chunk_size = 2048
-Settings.similarity_top_k = 20
-PERSIST_DIR = "/home/docsa/Legal_Position/Save_index"
-# Apply nest_asyncio to handle nested async calls
-nest_asyncio.apply()
-def parse_doc_ids(doc_ids):
-    if doc_ids is None:
-        return []
-    if isinstance(doc_ids, list):
-        return [str(id).strip('[]') for id in doc_ids]
-    if isinstance(doc_ids, str):
-        cleaned = doc_ids.strip('[]').replace(' ', '')
-        if cleaned:
-            return [id.strip() for id in cleaned.split(',')]
-    return []
-def get_links_html(doc_ids):
-    parsed_ids = parse_doc_ids(doc_ids)
-    if not parsed_ids:
-        return ""
-    links = [f"[Рішення ВСУ: {doc_id}](https://reyestr.court.gov.ua/Review/{doc_id})"
-             for doc_id in parsed_ids]
-    return ", ".join(links)
-def initialize_components():
-    try:
-        persist_path = Path(PERSIST_DIR)
-        if not persist_path.exists():
-            raise FileNotFoundError(f"Directory not found: {persist_path}")
-        required_files = ['docstore_es_filter.json', 'bm25_retriever']
-        missing_files = [f for f in required_files if not (persist_path / f).exists()]
-        if missing_files:
-            raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
-        global retriever_bm25
-        docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore_es_filter.json"))
-        bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
-        retriever_bm25 = QueryFusionRetriever(
-            [
-                bm25_retriever,
-            ],
-            similarity_top_k=Settings.similarity_top_k,
-            num_queries=1,
-            use_async=True,
-        )
-        return True
-    except Exception as e:
-        print(f"Error initializing components: {str(e)}", file=sys.stderr)
-        return False
-def extract_court_decision_text(url):
-    response = requests.get(url)
-    soup = BeautifulSoup(response.content, 'html.parser')
-    unwanted_texts = [
-        "Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.",
-        "З метою упередження перешкоджанню стабільній роботі Реєстру"
-    ]
-    decision_text = ""
-    for paragraph in soup.find_all('p'):
-        text = paragraph.get_text(separator="\n").strip()
-        if not any(unwanted_text in text for unwanted_text in unwanted_texts):
-            decision_text += text + "\n"
-    return decision_text.strip()
-def generate_legal_position(court_decision_text, user_question):
-    llm_lp = OpenAI(model="gpt-4o-mini", temperature=0)
-    response_format = {
-        "type": "json_schema",
-        "json_schema": {
-            "name": "lp_schema",
-            "schema": {
-                "type": "object",
-                "properties": {
-                    "title": {"type": "string", "description": "Title of the legal position"},
-                    "text": {"type": "string", "description": "Text of the legal position"},
-                },
-                "required": ["title", "text"],
-                "additionalProperties": False
-            },
-            "strict": True
-        }
-    }
-    system_prompt = """
-    Ви кваліфікований юрист, якому доручено сформулювати правову позицію на основі судового рішення.
-    """
-    prompt = f"""Дотримуйтесь цих інструкцій уважно:
-    1. Спочатку вам буде надано текст судового рішення:
-    <court_decision>
-    {court_decision_text}
-    </court_decision>
-    2. Уважно прочитайте та проаналізуйте текст судового рішення. Зверніть увагу на:
-       - Юридичну суть рішення
-       - Основне правове обґрунтування
-       - Головні юридичні міркування
-    3. На основі вашого аналізу сформулюйте правову позицію, дотримуючись таких вказівок:
-       - Будьте чіткими, точними та обґрунтованими
-       - Використовуйте відповідну юридичну термінологію
-       - Зберігайте стислість, але повністю передайте суть судового рішення
-       - Уникайте додаткових пояснень чи коментарів
-       - Спробуйте узагальнювати та уникати специфічної інформації (наприклад, імен або назв) під час подачі результатів
-       - Використовуйте лише українську мову
-    4. Окрім правової позиції, створіть короткий заголовок, який відображає основну її думку.
-    5. Відформатуйте вашу відповідь у форматі JSON
-    <examples>
-    {{
-        "title": "Заголовок правової позиції",
-        "text": "Текст правової позиції"
-    }}
-    </examples>
-    Переконайтеся, що ваша відповідь відповідає цьому формату та вказівкам. Надайте лише один JSON-вихід без будь-яких додаткових коментарів.
-    """
-    messages = [
-        ChatMessage(role="system", content=system_prompt),
-        ChatMessage(role="user", content=prompt),
-    ]
-    response = llm_lp.chat(messages, response_format=response_format)
-    try:
-        parsed_response = json.loads(response.message.content)
-        if "title" in parsed_response and "text" in parsed_response:
-            return parsed_response
-        else:
-            return {
-                "title": "Error: Missing required fields in response",
-                "text": response.message.content
-            }
-    except json.JSONDecodeError:
-        return {
-            "title": "Error parsing response",
-            "text": response.message.content
-        }
-def create_gradio_interface():
-    with gr.Blocks() as app:
-        gr.Markdown("# Аналізатор судових рішень на основі правових позицій Верховного Суду")
-        with gr.Row():
-            url_input = gr.Textbox(label="URL судового рішення:")
-            question_input = gr.Textbox(label="Ваше питання:")
-        with gr.Row():
-            generate_position_button = gr.Button("Генерувати правову позицію")
-            search_with_ai_button = gr.Button("Пошук із ШІ", interactive=False)
-        position_output = gr.Markdown(label="Короткий зміст позиції суду за введеним рішенням")
-        search_output = gr.Markdown(label="Результат пошуку")
-        analysis_output = gr.Markdown(label="Результат аналізу")
-        state = gr.State()
-        async def generate_position_action(url):
-            try:
-                court_decision_text = extract_court_decision_text(url)
-                legal_position_json = generate_legal_position(court_decision_text, "")
-                position_output_content = f"**Короткий зміст позиції суду за введеним рішенням:**\n *{legal_position_json['title']}*: \n{legal_position_json['text']}\n\n"
-                return position_output_content, legal_position_json
-            except Exception as e:
-                return f"Error during position generation: {str(e)}", None
-        async def search_with_ai_action(legal_position_json):
-            try:
-                nodes = await retriever_bm25.aretrieve(legal_position_json["text"])
-                sources_output = "\n **Результати пошуку (наявні правові позиції ВСУ):** \n\n"
-                for index, node in enumerate(nodes, start=1):
-                    source_title = node.node.metadata.get('title', 'Невідомий заголовок')
-                    doc_ids = node.node.metadata.get('doc_id')
-                    links = get_links_html(doc_ids)
-                    sources_output += f"\n[{index}] *{source_title}* 👉 Score: {node.score} {links}\n"
-                return sources_output
-            except Exception as e:
-                return f"Error during search: {str(e)}"
-        generate_position_button.click(fn=generate_position_action, inputs=url_input, outputs=[position_output, state])
-        generate_position_button.click(fn=lambda: gr.update(interactive=True), inputs=None,
-                                       outputs=search_with_ai_button)
-        search_with_ai_button.click(fn=search_with_ai_action, inputs=state, outputs=search_output)
-    return app
-if __name__ == "__main__":
-    if initialize_components():
-        print("Components initialized successfully!")
-        app = create_gradio_interface()
-        app.launch(share=True)
-    else:
-        print("Failed to initialize components. Please check the paths and try again.", file=sys.stderr)
-        sys.exit(1)

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ llama-index-llms-openai
 gradio
 beautifulsoup4
 nest-asyncio
-boto3

 gradio
 beautifulsoup4
 nest-asyncio
+boto3
+python-dotenv