DocUA commited on
Commit
5f1f67c
·
0 Parent(s):

Initial commit

Browse files
Files changed (5) hide show
  1. config.py +10 -0
  2. main.py +277 -0
  3. prompts +0 -0
  4. prompts.py +56 -0
  5. requirements.txt +11 -0
config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+
4
+ # Завантажуємо налаштування з .env файлу
5
+ load_dotenv()
6
+
7
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
8
+
9
+ if not OPENAI_API_KEY:
10
+ raise ValueError("API ключ OpenAI не знайдено. Додайте його в .env файл.")
main.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import gradio as gr
4
+ import pandas as pd
5
+ import requests
6
+ import json
7
+ import faiss
8
+ import nest_asyncio
9
+ import sys
10
+ from pathlib import Path
11
+ from bs4 import BeautifulSoup
12
+ from typing import Union, List
13
+ import asyncio
14
+ import nest_asyncio
15
+ nest_asyncio.apply()
16
+
17
+
18
+ from llama_index.core import (
19
+ StorageContext,
20
+ ServiceContext,
21
+ VectorStoreIndex,
22
+ Settings,
23
+ load_index_from_storage
24
+ )
25
+ from llama_index.llms.openai import OpenAI
26
+ from llama_index.core.llms import ChatMessage
27
+ from llama_index.core.schema import IndexNode
28
+ from llama_index.core.storage.docstore import SimpleDocumentStore
29
+ from llama_index.retrievers.bm25 import BM25Retriever
30
+ from llama_index.embeddings.openai import OpenAIEmbedding
31
+ from llama_index.vector_stores.faiss import FaissVectorStore
32
+ from llama_index.core.retrievers import QueryFusionRetriever
33
+ from llama_index.core.workflow import Event, Context, Workflow, StartEvent, StopEvent, step
34
+ from llama_index.core.schema import NodeWithScore
35
+ from llama_index.core.prompts import PromptTemplate
36
+ from llama_index.core.response_synthesizers import ResponseMode, get_response_synthesizer
37
+
38
+ from prompts import CITATION_QA_TEMPLATE, CITATION_REFINE_TEMPLATE
39
+
40
+ # Constants and Settings
41
+ from dotenv import load_dotenv
42
+
43
+ # Завантажуємо налаштування з .env файлу
44
+ load_dotenv()
45
+
46
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
47
+ os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
48
+
49
+ # Initialize embeddings and settings
50
+ embed_model = OpenAIEmbedding(model_name="text-embedding-3-small")
51
+ Settings.embed_model = embed_model
52
+ Settings.context_window = 20000
53
+ Settings.chunk_size = 1024
54
+ Settings.similarity_top_k = 20
55
+
56
+ # Your specific persist directory
57
+ PERSIST_DIR = "/home/docsa/PycharmProjects/Legal_Position/Save_index"
58
+
59
+
60
+ # Templates remain the same...
61
+
62
+ class RetrieverEvent(Event):
63
+ """Result of running retrieval"""
64
+ nodes: list[NodeWithScore]
65
+
66
+
67
+ class CitationQueryEngineWorkflow(Workflow):
68
+ @step
69
+ async def retrieve(self, ctx: Context, ev: StartEvent) -> Union[RetrieverEvent, None]:
70
+ query = ev.get("query")
71
+ question = ev.get("question")
72
+ if not query:
73
+ return None
74
+
75
+ await ctx.set("query", query)
76
+ await ctx.set("question", question)
77
+
78
+ nodes = retriever_fusion_faiss_bm25.retrieve(query)
79
+ return RetrieverEvent(nodes=nodes)
80
+
81
+ @step
82
+ async def synthesize(self, ctx: Context, ev: RetrieverEvent) -> StopEvent:
83
+ query = await ctx.get("query", default=None)
84
+ question = await ctx.get("question", default=None)
85
+ llm_answer = OpenAI(model="gpt-4o-mini")
86
+
87
+ synthesizer = get_response_synthesizer(
88
+ llm=llm_answer,
89
+ text_qa_template=CITATION_QA_TEMPLATE,
90
+ refine_template=CITATION_REFINE_TEMPLATE,
91
+ response_mode=ResponseMode.COMPACT,
92
+ use_async=True,
93
+ )
94
+
95
+ response = await synthesizer.asynthesize(query=query, question=question, nodes=ev.nodes)
96
+ return StopEvent(result=response)
97
+
98
+
99
+ def initialize_components():
100
+ try:
101
+ persist_path = Path(PERSIST_DIR)
102
+
103
+ if not persist_path.exists():
104
+ raise FileNotFoundError(f"Directory not found: {persist_path}")
105
+
106
+ required_files = ['docstore.json', 'bm25_retriever', 'index_faiss']
107
+ missing_files = [f for f in required_files if not (persist_path / f).exists()]
108
+
109
+ if missing_files:
110
+ raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
111
+
112
+ global retriever_fusion_faiss_bm25
113
+
114
+ docstore = SimpleDocumentStore.from_persist_path(str(persist_path / "docstore.json"))
115
+ bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever"))
116
+
117
+ faiss_vector_store = FaissVectorStore.from_persist_dir(str(persist_path / "index_faiss"))
118
+ storage_context_faiss = StorageContext.from_defaults(
119
+ vector_store=faiss_vector_store,
120
+ persist_dir=str(persist_path / "index_faiss")
121
+ )
122
+ index_faiss = load_index_from_storage(storage_context=storage_context_faiss)
123
+
124
+ retriever_fusion_faiss_bm25 = QueryFusionRetriever(
125
+ [
126
+ bm25_retriever,
127
+ index_faiss.as_retriever(similarity_top_k=Settings.similarity_top_k, response_mode="no_text")
128
+ ],
129
+ mode="reciprocal_rerank",
130
+ similarity_top_k=Settings.similarity_top_k,
131
+ num_queries=1,
132
+ use_async=True,
133
+ )
134
+ return True
135
+ except Exception as e:
136
+ print(f"Error initializing components: {str(e)}", file=sys.stderr)
137
+ return False
138
+
139
+
140
+ # Add this function before create_gradio_interface()
141
+
142
+ async def process_court_decision(url, question, progress=gr.Progress()):
143
+ try:
144
+ # Extract text from URL
145
+ progress(0, desc="Extracting court decision text...")
146
+ court_decision_text = extract_court_decision_text(url)
147
+
148
+ # Generate legal position
149
+ progress(0.3, desc="Generating legal position...")
150
+ legal_position_json = generate_legal_position(court_decision_text, question)
151
+
152
+ # Initialize workflow
153
+ progress(0.5, desc="Initializing analysis workflow...")
154
+ w = CitationQueryEngineWorkflow(timeout=600)
155
+
156
+ # Run workflow
157
+ progress(0.7, desc="Analyzing and finding precedents...")
158
+ result = await w.run(query=legal_position_json["Legal_position"], question=question)
159
+
160
+ # Process results
161
+ progress(0.9, desc="Processing results...")
162
+ citations = re.findall(r'\[(\d+)\]', result.response)
163
+ unique_citations = sorted(set(citations), key=int)
164
+
165
+ # Prepare output
166
+ output = f"**Правова позиція:**\n{legal_position_json['Title']}: {legal_position_json['Legal_position']}\n\n"
167
+ output += f"**Відповідь ШІ:**\n{result.response}\n\n"
168
+
169
+ output += "**Цитовані джерела:**\n"
170
+ for citation in unique_citations:
171
+ citation_index = int(citation) - 1
172
+ if 0 <= citation_index < len(result.source_nodes):
173
+ output += f"[{citation}]: {result.source_nodes[citation_index].node.metadata['title']}\n"
174
+
175
+ progress(1.0, desc="Complete!")
176
+ return output
177
+ except Exception as e:
178
+ return f"Error processing court decision: {str(e)}"
179
+
180
+
181
+ # Also, add the extract_court_decision_text function if it's not already there
182
+ def extract_court_decision_text(url):
183
+ response = requests.get(url)
184
+ soup = BeautifulSoup(response.content, 'html.parser')
185
+
186
+ unwanted_texts = [
187
+ "Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.",
188
+ "З метою упередження перешкоджанню стабільній роботі Реєстру"
189
+ ]
190
+
191
+ decision_text = ""
192
+ for paragraph in soup.find_all('p'):
193
+ text = paragraph.get_text(separator="\n").strip()
194
+ if not any(unwanted_text in text for unwanted_text in unwanted_texts):
195
+ decision_text += text + "\n"
196
+ return decision_text.strip()
197
+
198
+
199
+ # And the generate_legal_position function
200
+ def generate_legal_position(court_decision_text, user_question):
201
+ llm_lp = OpenAI(model="ft:gpt-4o-mini-2024-07-18:dochome:legal-position-100:9wSVvFmd", temperature=0)
202
+
203
+ system_prompt = """
204
+ You are a qualified lawyer tasked with creating a Legal Position based on a court decision.
205
+ Your result will be used to search for precedents in the database of existing legal positions of the Supreme Court of Ukraine.
206
+ """
207
+
208
+ prompt = f"""To create the legal position:
209
+ 1. Carefully read and analyze the Court decision.
210
+ 2. Identify the key legal principle or ruling established in the decision.
211
+ 3. Summarize this principle concisely, focusing on its legal implications.
212
+ 4. Ensure your summary is clear, precise, and uses appropriate legal terminology.
213
+
214
+ Format your legal position following these guidelines:
215
+ - Keep it brief, ideally no more than 3-4 sentences.
216
+ - Use appropriate legal terminology.
217
+ - Do not include any additional explanations or comments.
218
+
219
+ Text content should be in Ukrainian only!
220
+ Return the result as JSON in the format:
221
+ {{
222
+ "Title": "Brief title of the legal position",
223
+ "Legal_position": "Full text of the legal position"
224
+ }}
225
+
226
+ Court decision:
227
+ {court_decision_text}
228
+
229
+ User's question:
230
+ {user_question}
231
+ """
232
+
233
+ messages = [
234
+ ChatMessage(role="system", content=system_prompt),
235
+ ChatMessage(role="user", content=prompt),
236
+ ]
237
+
238
+ response = llm_lp.chat(messages)
239
+ try:
240
+ return json.loads(response.message.content)
241
+ except json.JSONDecodeError:
242
+ # If JSON parsing fails, create a structured response
243
+ return {
244
+ "Title": "Error parsing response",
245
+ "Legal_position": response.message.content
246
+ }
247
+
248
+
249
+ # Update the create_gradio_interface function to use share=True
250
+ def create_gradio_interface():
251
+ with gr.Blocks() as app:
252
+ gr.Markdown("# Аналізатор судових рішень на основі правових позицій Верховного Суду")
253
+
254
+ with gr.Row():
255
+ url_input = gr.Textbox(label="URL судового рішення:")
256
+ question_input = gr.Textbox(label="Ваше питання:")
257
+
258
+ analyze_button = gr.Button("Аналізувати")
259
+ output = gr.Markdown(label="Результат аналізу")
260
+
261
+ analyze_button.click(
262
+ fn=lambda url, q: asyncio.run(process_court_decision(url, q)),
263
+ inputs=[url_input, question_input],
264
+ outputs=output
265
+ )
266
+
267
+ return app
268
+
269
+
270
+ if __name__ == "__main__":
271
+ if initialize_components():
272
+ print("Components initialized successfully!")
273
+ app = create_gradio_interface()
274
+ app.launch(share=True) # Added share=True here
275
+ else:
276
+ print("Failed to initialize components. Please check the paths and try again.", file=sys.stderr)
277
+ sys.exit(1)
prompts ADDED
File without changes
prompts.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_index.core.prompts import PromptTemplate
2
+
3
+ CITATION_QA_TEMPLATE = PromptTemplate(
4
+ "You are a qualified lawyer. "
5
+ "Please provide a legal analysis based solely on the provided legal precedents and positions. "
6
+ "When referencing a legal precedent or position, "
7
+ "cite the appropriate source(s) using their corresponding numbers. "
8
+ "Every answer should include at least one legal source citation relevant to the query. "
9
+ "Only cite a source when you are explicitly referencing it. "
10
+ "If none of the legal precedents or positions are helpful, you should indicate that. "
11
+ "Use only the Ukrainian language to answer. "
12
+ "For example:\n"
13
+ "Source 1:\n"
14
+ "The court ruled that contractual obligations must be fulfilled even if the terms are vague.\n"
15
+ "Source 2:\n"
16
+ "In case of unforeseen circumstances, contracts may be voided under certain conditions.\n"
17
+ "Query: Can a contract be voided due to unforeseen circumstances?\n"
18
+ "Answer: A contract may be voided under certain conditions if unforeseen circumstances arise [2]. "
19
+ "Now it's your turn. Below are several numbered legal sources and precedents (legal positions):"
20
+ "\n------\n"
21
+ "{context_str}"
22
+ "\n------\n"
23
+ "New legal issue: {query_str}\n"
24
+ "User question: {question}\n"
25
+ "Answer: "
26
+ )
27
+
28
+ CITATION_REFINE_TEMPLATE = PromptTemplate(
29
+ "You are a qualified lawyer. "
30
+ "Please refine the legal analysis based solely on the provided legal precedents and positions. "
31
+ "When referencing a legal precedent or position, "
32
+ "cite the appropriate source(s) using their corresponding numbers. "
33
+ "Every refined answer should include at least one relevant legal source citation. "
34
+ "Only cite a source when you are explicitly referencing it. "
35
+ "If none of the legal precedents or positions are helpful, you should repeat the existing answer. "
36
+ "Use only the Ukrainian language to answer."
37
+ "For example:\n"
38
+ "Source 1:\n"
39
+ "The court ruled that contractual obligations must be fulfilled even if the terms are vague.\n"
40
+ "Source 2:\n"
41
+ "In case of unforeseen circumstances, contracts may be voided under certain conditions.\n"
42
+ "Query: Can a contract be voided due to unforeseen circumstances?\n"
43
+ "Existing answer: A contract may be voided under certain conditions if unforeseen circumstances arise [2].\n"
44
+ "Now it's your turn. "
45
+ "We have provided an existing legal analysis: {existing_answer}"
46
+ "Below are several numbered legal sources and precedents. "
47
+ "Use them to refine the existing legal answer. "
48
+ "If the provided legal sources are not helpful, you will repeat the existing answer."
49
+ "\nBegin refining!"
50
+ "\n------\n"
51
+ "{context_msg}"
52
+ "\n------\n"
53
+ "New legal issue: {query_str}\n"
54
+ "User question: {question}\n"
55
+ "Answer: "
56
+ )
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ llama-index
2
+ llama-index-readers-file
3
+ llama-index-vector-stores-faiss
4
+ llama-index-retrievers-bm25
5
+ openai
6
+ faiss-cpu
7
+ llama-index-embeddings-openai
8
+ llama-index-llms-openai
9
+ gradio
10
+ beautifulsoup4
11
+ nest-asyncio