Spaces:
Runtime error
Runtime error
Commit
·
5f1f67c
0
Parent(s):
Initial commit
Browse files- config.py +10 -0
- main.py +277 -0
- prompts +0 -0
- prompts.py +56 -0
- requirements.txt +11 -0
config.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from dotenv import load_dotenv
|
3 |
+
|
4 |
+
# Завантажуємо налаштування з .env файлу
|
5 |
+
load_dotenv()
|
6 |
+
|
7 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
8 |
+
|
9 |
+
if not OPENAI_API_KEY:
|
10 |
+
raise ValueError("API ключ OpenAI не знайдено. Додайте його в .env файл.")
|
main.py
ADDED
@@ -0,0 +1,277 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import gradio as gr
|
4 |
+
import pandas as pd
|
5 |
+
import requests
|
6 |
+
import json
|
7 |
+
import faiss
|
8 |
+
import nest_asyncio
|
9 |
+
import sys
|
10 |
+
from pathlib import Path
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
+
from typing import Union, List
|
13 |
+
import asyncio
|
14 |
+
import nest_asyncio
|
15 |
+
nest_asyncio.apply()
|
16 |
+
|
17 |
+
|
18 |
+
from llama_index.core import (
|
19 |
+
StorageContext,
|
20 |
+
ServiceContext,
|
21 |
+
VectorStoreIndex,
|
22 |
+
Settings,
|
23 |
+
load_index_from_storage
|
24 |
+
)
|
25 |
+
from llama_index.llms.openai import OpenAI
|
26 |
+
from llama_index.core.llms import ChatMessage
|
27 |
+
from llama_index.core.schema import IndexNode
|
28 |
+
from llama_index.core.storage.docstore import SimpleDocumentStore
|
29 |
+
from llama_index.retrievers.bm25 import BM25Retriever
|
30 |
+
from llama_index.embeddings.openai import OpenAIEmbedding
|
31 |
+
from llama_index.vector_stores.faiss import FaissVectorStore
|
32 |
+
from llama_index.core.retrievers import QueryFusionRetriever
|
33 |
+
from llama_index.core.workflow import Event, Context, Workflow, StartEvent, StopEvent, step
|
34 |
+
from llama_index.core.schema import NodeWithScore
|
35 |
+
from llama_index.core.prompts import PromptTemplate
|
36 |
+
from llama_index.core.response_synthesizers import ResponseMode, get_response_synthesizer
|
37 |
+
|
38 |
+
from prompts import CITATION_QA_TEMPLATE, CITATION_REFINE_TEMPLATE
|
39 |
+
|
40 |
+
# Constants and Settings
|
41 |
+
from dotenv import load_dotenv
|
42 |
+
|
43 |
+
# Завантажуємо налаштування з .env файлу
|
44 |
+
load_dotenv()
|
45 |
+
|
46 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
47 |
+
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
|
48 |
+
|
49 |
+
# Initialize embeddings and settings
|
50 |
+
embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
|
51 |
+
Settings.embed_model = embed_model
|
52 |
+
Settings.context_window = 20000
|
53 |
+
Settings.chunk_size = 1024
|
54 |
+
Settings.similarity_top_k = 20
|
55 |
+
|
56 |
+
# Your specific persist directory
|
57 |
+
PERSIST_DIR = "/home/docsa/PycharmProjects/Legal_Position/Save_index"
|
58 |
+
|
59 |
+
|
60 |
+
# Templates remain the same...
|
61 |
+
|
62 |
+
class RetrieverEvent(Event):
|
63 |
+
"""Result of running retrieval"""
|
64 |
+
nodes: list[NodeWithScore]
|
65 |
+
|
66 |
+
|
67 |
+
class CitationQueryEngineWorkflow(Workflow):
|
68 |
+
@step
|
69 |
+
async def retrieve(self, ctx: Context, ev: StartEvent) -> Union[RetrieverEvent, None]:
|
70 |
+
query = ev.get("query")
|
71 |
+
question = ev.get("question")
|
72 |
+
if not query:
|
73 |
+
return None
|
74 |
+
|
75 |
+
await ctx.set("query", query)
|
76 |
+
await ctx.set("question", question)
|
77 |
+
|
78 |
+
nodes = retriever_fusion_faiss_bm25.retrieve(query)
|
79 |
+
return RetrieverEvent(nodes=nodes)
|
80 |
+
|
81 |
+
@step
|
82 |
+
async def synthesize(self, ctx: Context, ev: RetrieverEvent) -> StopEvent:
|
83 |
+
query = await ctx.get("query", default=None)
|
84 |
+
question = await ctx.get("question", default=None)
|
85 |
+
llm_answer = OpenAI(model="gpt-4o-mini")
|
86 |
+
|
87 |
+
synthesizer = get_response_synthesizer(
|
88 |
+
llm=llm_answer,
|
89 |
+
text_qa_template=CITATION_QA_TEMPLATE,
|
90 |
+
refine_template=CITATION_REFINE_TEMPLATE,
|
91 |
+
response_mode=ResponseMode.COMPACT,
|
92 |
+
use_async=True,
|
93 |
+
)
|
94 |
+
|
95 |
+
response = await synthesizer.asynthesize(query=query, question=question, nodes=ev.nodes)
|
96 |
+
return StopEvent(result=response)
|
97 |
+
|
98 |
+
|
99 |
+
def initialize_components():
|
100 |
+
try:
|
101 |
+
persist_path = Path(PERSIST_DIR)
|
102 |
+
|
103 |
+
if not persist_path.exists():
|
104 |
+
raise FileNotFoundError(f"Directory not found: {persist_path}")
|
105 |
+
|
106 |
+
required_files = ['docstore.json', 'bm25_retriever', 'index_faiss']
|
107 |
+
missing_files = [f for f in required_files if not (persist_path / f).exists()]
|
108 |
+
|
109 |
+
if missing_files:
|
110 |
+
raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
|
111 |
+
|
112 |
+
global retriever_fusion_faiss_bm25
|
113 |
+
|
114 |
+
docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore.json"))
|
115 |
+
bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever"))
|
116 |
+
|
117 |
+
faiss_vector_store = FaissVectorStore.from_persist_dir(str(persist_path / "index_faiss"))
|
118 |
+
storage_context_faiss = StorageContext.from_defaults(
|
119 |
+
vector_store=faiss_vector_store,
|
120 |
+
persist_dir=str(persist_path / "index_faiss")
|
121 |
+
)
|
122 |
+
index_faiss = load_index_from_storage(storage_context=storage_context_faiss)
|
123 |
+
|
124 |
+
retriever_fusion_faiss_bm25 = QueryFusionRetriever(
|
125 |
+
[
|
126 |
+
bm25_retriever,
|
127 |
+
index_faiss.as_retriever(similarity_top_k=Settings.similarity_top_k, response_mode="no_text")
|
128 |
+
],
|
129 |
+
mode="reciprocal_rerank",
|
130 |
+
similarity_top_k=Settings.similarity_top_k,
|
131 |
+
num_queries=1,
|
132 |
+
use_async=True,
|
133 |
+
)
|
134 |
+
return True
|
135 |
+
except Exception as e:
|
136 |
+
print(f"Error initializing components: {str(e)}", file=sys.stderr)
|
137 |
+
return False
|
138 |
+
|
139 |
+
|
140 |
+
# Add this function before create_gradio_interface()
|
141 |
+
|
142 |
+
async def process_court_decision(url, question, progress=gr.Progress()):
|
143 |
+
try:
|
144 |
+
# Extract text from URL
|
145 |
+
progress(0, desc="Extracting court decision text...")
|
146 |
+
court_decision_text = extract_court_decision_text(url)
|
147 |
+
|
148 |
+
# Generate legal position
|
149 |
+
progress(0.3, desc="Generating legal position...")
|
150 |
+
legal_position_json = generate_legal_position(court_decision_text, question)
|
151 |
+
|
152 |
+
# Initialize workflow
|
153 |
+
progress(0.5, desc="Initializing analysis workflow...")
|
154 |
+
w = CitationQueryEngineWorkflow(timeout=600)
|
155 |
+
|
156 |
+
# Run workflow
|
157 |
+
progress(0.7, desc="Analyzing and finding precedents...")
|
158 |
+
result = await w.run(query=legal_position_json["Legal_position"], question=question)
|
159 |
+
|
160 |
+
# Process results
|
161 |
+
progress(0.9, desc="Processing results...")
|
162 |
+
citations = re.findall(r'\[(\d+)\]', result.response)
|
163 |
+
unique_citations = sorted(set(citations), key=int)
|
164 |
+
|
165 |
+
# Prepare output
|
166 |
+
output = f"**Правова позиція:**\n{legal_position_json['Title']}: {legal_position_json['Legal_position']}\n\n"
|
167 |
+
output += f"**Відповідь ШІ:**\n{result.response}\n\n"
|
168 |
+
|
169 |
+
output += "**Цитовані джерела:**\n"
|
170 |
+
for citation in unique_citations:
|
171 |
+
citation_index = int(citation) - 1
|
172 |
+
if 0 <= citation_index < len(result.source_nodes):
|
173 |
+
output += f"[{citation}]: {result.source_nodes[citation_index].node.metadata['title']}\n"
|
174 |
+
|
175 |
+
progress(1.0, desc="Complete!")
|
176 |
+
return output
|
177 |
+
except Exception as e:
|
178 |
+
return f"Error processing court decision: {str(e)}"
|
179 |
+
|
180 |
+
|
181 |
+
# Also, add the extract_court_decision_text function if it's not already there
|
182 |
+
def extract_court_decision_text(url):
|
183 |
+
response = requests.get(url)
|
184 |
+
soup = BeautifulSoup(response.content, 'html.parser')
|
185 |
+
|
186 |
+
unwanted_texts = [
|
187 |
+
"Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.",
|
188 |
+
"З метою упередження перешкоджанню стабільній роботі Реєстру"
|
189 |
+
]
|
190 |
+
|
191 |
+
decision_text = ""
|
192 |
+
for paragraph in soup.find_all('p'):
|
193 |
+
text = paragraph.get_text(separator="\n").strip()
|
194 |
+
if not any(unwanted_text in text for unwanted_text in unwanted_texts):
|
195 |
+
decision_text += text + "\n"
|
196 |
+
return decision_text.strip()
|
197 |
+
|
198 |
+
|
199 |
+
# And the generate_legal_position function
|
200 |
+
def generate_legal_position(court_decision_text, user_question):
|
201 |
+
llm_lp = OpenAI(model="ft:gpt-4o-mini-2024-07-18:dochome:legal-position-100:9wSVvFmd", temperature=0)
|
202 |
+
|
203 |
+
system_prompt = """
|
204 |
+
You are a qualified lawyer tasked with creating a Legal Position based on a court decision.
|
205 |
+
Your result will be used to search for precedents in the database of existing legal positions of the Supreme Court of Ukraine.
|
206 |
+
"""
|
207 |
+
|
208 |
+
prompt = f"""To create the legal position:
|
209 |
+
1. Carefully read and analyze the Court decision.
|
210 |
+
2. Identify the key legal principle or ruling established in the decision.
|
211 |
+
3. Summarize this principle concisely, focusing on its legal implications.
|
212 |
+
4. Ensure your summary is clear, precise, and uses appropriate legal terminology.
|
213 |
+
|
214 |
+
Format your legal position following these guidelines:
|
215 |
+
- Keep it brief, ideally no more than 3-4 sentences.
|
216 |
+
- Use appropriate legal terminology.
|
217 |
+
- Do not include any additional explanations or comments.
|
218 |
+
|
219 |
+
Text content should be in Ukrainian only!
|
220 |
+
Return the result as JSON in the format:
|
221 |
+
{{
|
222 |
+
"Title": "Brief title of the legal position",
|
223 |
+
"Legal_position": "Full text of the legal position"
|
224 |
+
}}
|
225 |
+
|
226 |
+
Court decision:
|
227 |
+
{court_decision_text}
|
228 |
+
|
229 |
+
User's question:
|
230 |
+
{user_question}
|
231 |
+
"""
|
232 |
+
|
233 |
+
messages = [
|
234 |
+
ChatMessage(role="system", content=system_prompt),
|
235 |
+
ChatMessage(role="user", content=prompt),
|
236 |
+
]
|
237 |
+
|
238 |
+
response = llm_lp.chat(messages)
|
239 |
+
try:
|
240 |
+
return json.loads(response.message.content)
|
241 |
+
except json.JSONDecodeError:
|
242 |
+
# If JSON parsing fails, create a structured response
|
243 |
+
return {
|
244 |
+
"Title": "Error parsing response",
|
245 |
+
"Legal_position": response.message.content
|
246 |
+
}
|
247 |
+
|
248 |
+
|
249 |
+
# Update the create_gradio_interface function to use share=True
|
250 |
+
def create_gradio_interface():
|
251 |
+
with gr.Blocks() as app:
|
252 |
+
gr.Markdown("# Аналізатор судових рішень на основі правових позицій Верховного Суду")
|
253 |
+
|
254 |
+
with gr.Row():
|
255 |
+
url_input = gr.Textbox(label="URL судового рішення:")
|
256 |
+
question_input = gr.Textbox(label="Ваше питання:")
|
257 |
+
|
258 |
+
analyze_button = gr.Button("Аналізувати")
|
259 |
+
output = gr.Markdown(label="Результат аналізу")
|
260 |
+
|
261 |
+
analyze_button.click(
|
262 |
+
fn=lambda url, q: asyncio.run(process_court_decision(url, q)),
|
263 |
+
inputs=[url_input, question_input],
|
264 |
+
outputs=output
|
265 |
+
)
|
266 |
+
|
267 |
+
return app
|
268 |
+
|
269 |
+
|
270 |
+
if __name__ == "__main__":
|
271 |
+
if initialize_components():
|
272 |
+
print("Components initialized successfully!")
|
273 |
+
app = create_gradio_interface()
|
274 |
+
app.launch(share=True) # Added share=True here
|
275 |
+
else:
|
276 |
+
print("Failed to initialize components. Please check the paths and try again.", file=sys.stderr)
|
277 |
+
sys.exit(1)
|
prompts
ADDED
File without changes
|
prompts.py
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_index.core.prompts import PromptTemplate
|
2 |
+
|
3 |
+
CITATION_QA_TEMPLATE = PromptTemplate(
|
4 |
+
"You are a qualified lawyer. "
|
5 |
+
"Please provide a legal analysis based solely on the provided legal precedents and positions. "
|
6 |
+
"When referencing a legal precedent or position, "
|
7 |
+
"cite the appropriate source(s) using their corresponding numbers. "
|
8 |
+
"Every answer should include at least one legal source citation relevant to the query. "
|
9 |
+
"Only cite a source when you are explicitly referencing it. "
|
10 |
+
"If none of the legal precedents or positions are helpful, you should indicate that. "
|
11 |
+
"Use only the Ukrainian language to answer. "
|
12 |
+
"For example:\n"
|
13 |
+
"Source 1:\n"
|
14 |
+
"The court ruled that contractual obligations must be fulfilled even if the terms are vague.\n"
|
15 |
+
"Source 2:\n"
|
16 |
+
"In case of unforeseen circumstances, contracts may be voided under certain conditions.\n"
|
17 |
+
"Query: Can a contract be voided due to unforeseen circumstances?\n"
|
18 |
+
"Answer: A contract may be voided under certain conditions if unforeseen circumstances arise [2]. "
|
19 |
+
"Now it's your turn. Below are several numbered legal sources and precedents (legal positions):"
|
20 |
+
"\n------\n"
|
21 |
+
"{context_str}"
|
22 |
+
"\n------\n"
|
23 |
+
"New legal issue: {query_str}\n"
|
24 |
+
"User question: {question}\n"
|
25 |
+
"Answer: "
|
26 |
+
)
|
27 |
+
|
28 |
+
CITATION_REFINE_TEMPLATE = PromptTemplate(
|
29 |
+
"You are a qualified lawyer. "
|
30 |
+
"Please refine the legal analysis based solely on the provided legal precedents and positions. "
|
31 |
+
"When referencing a legal precedent or position, "
|
32 |
+
"cite the appropriate source(s) using their corresponding numbers. "
|
33 |
+
"Every refined answer should include at least one relevant legal source citation. "
|
34 |
+
"Only cite a source when you are explicitly referencing it. "
|
35 |
+
"If none of the legal precedents or positions are helpful, you should repeat the existing answer. "
|
36 |
+
"Use only the Ukrainian language to answer."
|
37 |
+
"For example:\n"
|
38 |
+
"Source 1:\n"
|
39 |
+
"The court ruled that contractual obligations must be fulfilled even if the terms are vague.\n"
|
40 |
+
"Source 2:\n"
|
41 |
+
"In case of unforeseen circumstances, contracts may be voided under certain conditions.\n"
|
42 |
+
"Query: Can a contract be voided due to unforeseen circumstances?\n"
|
43 |
+
"Existing answer: A contract may be voided under certain conditions if unforeseen circumstances arise [2].\n"
|
44 |
+
"Now it's your turn. "
|
45 |
+
"We have provided an existing legal analysis: {existing_answer}"
|
46 |
+
"Below are several numbered legal sources and precedents. "
|
47 |
+
"Use them to refine the existing legal answer. "
|
48 |
+
"If the provided legal sources are not helpful, you will repeat the existing answer."
|
49 |
+
"\nBegin refining!"
|
50 |
+
"\n------\n"
|
51 |
+
"{context_msg}"
|
52 |
+
"\n------\n"
|
53 |
+
"New legal issue: {query_str}\n"
|
54 |
+
"User question: {question}\n"
|
55 |
+
"Answer: "
|
56 |
+
)
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
llama-index
|
2 |
+
llama-index-readers-file
|
3 |
+
llama-index-vector-stores-faiss
|
4 |
+
llama-index-retrievers-bm25
|
5 |
+
openai
|
6 |
+
faiss-cpu
|
7 |
+
llama-index-embeddings-openai
|
8 |
+
llama-index-llms-openai
|
9 |
+
gradio
|
10 |
+
beautifulsoup4
|
11 |
+
nest-asyncio
|