Spaces:

DocSA
/

Legal_Position_hybrid_search_without_AI

Runtime error

App Files Files Community

i-d-lytvynenko commited on Dec 7, 2024

Commit

920001b

1 Parent(s): a171113

Add basic BM25 search and corpus generation

Browse files

Files changed (7) hide show

.gitignore +5 -0
healthcheck_bm25.py +28 -0
init_bm25.py +298 -0
main.py +77 -53
poetry.lock +0 -0
pyproject.toml +41 -0
requirements.txt +1 -0

.gitignore CHANGED Viewed

@@ -7,6 +7,7 @@
 # Ігноруємо кеші Python
 __pycache__/
 *.pyc
 # Ігноруємо конфіденційні файли
 .env
@@ -15,3 +16,7 @@ __pycache__/
 Save_index/
 /lp/
 /Save_Index_Local/

 # Ігноруємо кеші Python
 __pycache__/
 *.pyc
+.gradio/
 # Ігноруємо конфіденційні файли
 .env
 Save_index/
 /lp/
 /Save_Index_Local/
+# Ігноруємо дані для генерації корпуса
+*.csv
+*.xlsx

healthcheck_bm25.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from pathlib import Path
+from llama_index.retrievers.bm25 import BM25Retriever
+from main import extract_court_decision_text
+PERSIST_PATH = Path("Save_Index_Local")
+INDEX_NAME = "bm25_retriever"
+# INDEX_NAME = "bm25_retriever_meta"
+TEST_CD_URL = "https://reyestr.court.gov.ua/Review/118766467"
+# TEST_CD_URL = "https://reyestr.court.gov.ua/Review/118763429"
+PRINT_CD = False
+retriever = BM25Retriever.from_persist_dir(str(PERSIST_PATH / INDEX_NAME))
+court_decision_text = extract_court_decision_text(TEST_CD_URL)
+if PRINT_CD:
+    print(court_decision_text, "\n\n\n\n\n")
+nodes_with_score = retriever.retrieve(court_decision_text)
+for index, node_with_score in enumerate(nodes_with_score, start=1):
+    source_title = node_with_score.node.metadata.get("title", "Невідомий заголовок")
+    print(index, f"{node_with_score.score:.4f}", source_title, "\n", sep="\t")

init_bm25.py ADDED Viewed

	@@ -0,0 +1,298 @@

+import re
+import unicodedata
+from pathlib import Path
+import bm25s
+import pandas as pd
+from llama_index.core import Document
+from llama_index.core.schema import MetadataMode
+from llama_index.core.vector_stores.utils import node_to_metadata_dict
+from llama_index.retrievers.bm25 import BM25Retriever
+PERSIST_PATH = Path("Save_Index_Local")
+LP_INFO_FILE = "legal_position_with_categories_documents_all.xlsx"
+INDEX_NAME = "bm25_retriever"
+USE_META = False
+# INDEX_NAME = "bm25_retriever_meta"
+# USE_META = True
+def clean_string(text: pd.Series):
+    text = text.fillna("")
+    text = text.str.replace(r"«|»", '"', regex=True)
+    text = text.str.replace(r"\xa0", " ")
+    text = text.str.replace(r"§", "№")
+    # Handle unicode fractions
+    text = text.apply(lambda t: unicodedata.normalize("NFKC", t))  # type: ignore
+    text = text.str.replace("⁄", "/")
+    return text
+def find_matching_pattern(categories):
+    """
+    Search for matching patterns in the categories list and return the first match found.
+    Args:
+        categories: List of strings or string to search in
+    Returns:
+        str: Matching pattern or empty string if no match found
+    """
+    patterns = [
+        "Велика Палата",
+        "Касаційний кримінальний суд",
+        "Касаційний адміністративний суд",
+        "Касаційний господарський суд",
+        "Касаційний цивільний суд",
+    ]
+    # Handle both string and list inputs
+    if isinstance(categories, str):
+        categories = [categories]
+    elif isinstance(categories, list):
+        # If list contains lists, flatten it
+        categories = [item for sublist in categories for item in sublist]
+    # Search for patterns
+    for category in categories:
+        for pattern in patterns:
+            if pattern in category:
+                return pattern
+    return ""
+ukrainian_stopwords_1 = [
+    "я",
+    "ти",
+    "він",
+    "вона",
+    "воно",
+    "ми",
+    "ви",
+    "вони",
+    "це",
+    "той",
+    "така",
+    "таке",
+    "такі",
+    "цей",
+    "моя",
+    "твоя",
+    "його",
+    "її",
+    "наш",
+    "ваш",
+    "їх",
+    "де",
+    "чи",
+    "а",
+    "але",
+    "і",
+    "або",
+    "так",
+    "ні",
+    "чи",
+    "в",
+    "на",
+    "з",
+    "до",
+    "під",
+    "через",
+    "після",
+    "між",
+    "серед",
+    "без",
+    "для",
+    "про",
+    "о",
+    "за",
+    "від",
+    "до",
+    "як",
+    "якби",
+    "коли",
+    "де",
+    "тому",
+    "тому що",
+    "що",
+    "чому",
+    "хто",
+    "що",
+    "якось",
+    "коли-небудь",
+    "де-небудь",
+    "чимало",
+]
+ukrainian_stopwords_2 = [
+    # Articles
+    "і",
+    "й",
+    "у",
+    "в",
+    "та",
+    "і",
+    # Pronouns
+    "я",
+    "ти",
+    "він",
+    "вона",
+    "воно",
+    "ми",
+    "ви",
+    "вони",
+    "мене",
+    "тебе",
+    "його",
+    "її",
+    "нас",
+    "вас",
+    "їх",
+    "мій",
+    "твій",
+    "наш",
+    "ваш",
+    "свій",
+    # Prepositions
+    "з",
+    "до",
+    "від",
+    "біля",
+    "над",
+    "під",
+    "через",
+    "для",
+    "без",
+    "між",
+    "серед",
+    "крізь",
+    "понад",
+    "поза",
+    "крім",
+    # Conjunctions
+    "та",
+    "і",
+    "але",
+    "або",
+    "однак",
+    "проте",
+    "тому",
+    "тому що",
+    "оскільки",
+    "якщо",
+    "коли",
+    "хоча",
+    # Auxiliary words
+    "так",
+    "ні",
+    "не",
+    "бути",
+    "мати",
+    "можна",
+    "треба",
+    # Common filler words
+    "цей",
+    "той",
+    "це",
+    "те",
+    "такий",
+    "який",
+    "котрий",
+    # Modal words
+    "мабуть",
+    "напевно",
+    "звичайно",
+    "можливо",
+    # Particles
+    "ось",
+    "ніби",
+    "майже",
+    "майже що",
+    "саме",
+    "лише",
+    "тільки",
+]
+ukrainian_stopwords = list(set(ukrainian_stopwords_1 + ukrainian_stopwords_2))
+final_df = pd.read_excel(LP_INFO_FILE)
+if USE_META:
+    category_columns = [
+        col for col in final_df.columns if re.match(r"category_\d+$", col)
+    ]
+    text_columns = ["title", "text_lp", "category_all"] + category_columns
+    final_df[text_columns] = final_df[text_columns].apply(clean_string)
+    final_df["category_search"] = final_df[category_columns].apply(
+        lambda row: ", ".join([str(val) for val in row if pd.notna(val)]), axis=1
+    )
+    final_df["category_filter"] = final_df["category_all"].apply(find_matching_pattern)
+    legal_position_title_category = [
+        Document(
+            text=row["text_lp"],  # type: ignore
+            metadata={  # type: ignore
+                "lp_id": row["id"],
+                "title": row["title"],
+                "doc_id": row["document_ids"],
+                "category_filter": find_matching_pattern(row["category_all"]),
+                "category_search": row["category_search"],
+            },
+            excluded_embed_metadata_keys=["doc_id", "category_filter"],
+            excluded_llm_metadata_keys=["doc_id", "category_filter"],
+        )
+        for _, row in final_df.iterrows()
+    ]
+else:
+    final_df[["title", "text_lp"]] = final_df[["title", "text_lp"]].apply(clean_string)
+    legal_position_title_category = [
+        Document(
+            text=row["text_lp"],  # type: ignore
+            metadata={  # type: ignore
+                "title": row["title"],
+            },
+            excluded_embed_metadata_keys=["title"],
+            excluded_llm_metadata_keys=["title"],
+        )
+        for _, row in final_df.iterrows()
+    ]
+# Copied from BM25Retriever __init__ method, but note that output looks awful and might work worse (this needs checking)
+corpus = [node_to_metadata_dict(node) for node in legal_position_title_category]
+corpus_tokens = bm25s.tokenize(
+    [
+        node.get_content(metadata_mode=MetadataMode.EMBED)
+        for node in legal_position_title_category
+    ],
+    stopwords=ukrainian_stopwords,
+)
+existing_bm25 = bm25s.BM25(
+    k1=1.88,
+    b=1.25,
+    delta=0.5,
+    method="robertson",
+    # No corpus is saved without this line:
+    corpus=corpus,  # prevents TypeError: 'NoneType' object is not subscriptable
+)
+existing_bm25.index(corpus=corpus_tokens)
+bm25_retriever = BM25Retriever(
+    existing_bm25=existing_bm25,
+    similarity_top_k=20,
+)
+bm25_retriever.persist(str(PERSIST_PATH / INDEX_NAME))
+# Returns an error on invalid corpus
+loaded_retriever = BM25Retriever.from_persist_dir(str(PERSIST_PATH / INDEX_NAME))

main.py CHANGED Viewed

@@ -1,32 +1,36 @@
 import os
 import re
-import gradio as gr
-import requests
-import nest_asyncio
 import sys
-import boto3
 from pathlib import Path
-from bs4 import BeautifulSoup
-from llama_index.core import (
-    Settings,
-)
-from llama_index.retrievers.bm25 import BM25Retriever
 from llama_index.core.retrievers import QueryFusionRetriever
-from dotenv import load_dotenv
 load_dotenv()
-Settings.similarity_top_k = 20
 # Параметри S3
 BUCKET_NAME = "legal-position"
 PREFIX_RETRIEVER = "Save_Index/"  # Префікс для всього вмісту, який потрібно завантажити
 LOCAL_DIR = Path("Save_Index_Local")  # Локальна директорія для збереження даних з S3
 # Ініціалізація клієнта S3
 s3_client = boto3.client(
@@ -36,9 +40,6 @@ s3_client = boto3.client(
     region_name="eu-north-1"
 )
-# Створюємо локальну директорію, якщо вона не існує
-LOCAL_DIR.mkdir(parents=True, exist_ok=True)
 # Функція для завантаження файлу з S3
 def download_s3_file(bucket_name, s3_key, local_path):
     s3_client.download_file(bucket_name, s3_key, str(local_path))
@@ -73,60 +74,66 @@ def parse_doc_ids(doc_ids):
     if doc_ids is None:
         return []
     if isinstance(doc_ids, list):
-        return [str(id).strip('[]') for id in doc_ids]
     if isinstance(doc_ids, str):
-        cleaned = doc_ids.strip('[]').replace(' ', '')
         if cleaned:
-            return [id.strip() for id in cleaned.split(',')]
     return []
 def get_links_html(doc_ids):
     parsed_ids = parse_doc_ids(doc_ids)
     if not parsed_ids:
         return ""
-    links = [f"[Рішення ВС: {doc_id}](https://reyestr.court.gov.ua/Review/{doc_id})"
-             for doc_id in parsed_ids]
     return ", ".join(links)
 def parse_lp_ids(lp_ids):
     if lp_ids is None:
         return []
     if isinstance(lp_ids, (str, int)):
-        cleaned = str(lp_ids).strip('[]').replace(' ', '')
         if cleaned:
             return [cleaned]
     return []
 def get_links_html_lp(lp_ids):
     parsed_ids = parse_lp_ids(lp_ids)
     if not parsed_ids:
         return ""
-    links = [f"[Правова позиція ВС: {lp_id}](https://lpd.court.gov.ua/home/search/{lp_id})" for lp_id in parsed_ids]
     return ", ".join(links)
 def initialize_components():
     try:
-        persist_path = Path("Save_Index_Local")
-        if not persist_path.exists():
-            raise FileNotFoundError(f"Directory not found: {persist_path}")
-        required_files = ['docstore_es_filter.json', 'bm25_retriever_es']
-        missing_files = [f for f in required_files if not (persist_path / f).exists()]
         if missing_files:
             raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
         global retriever_bm25
-        bm25_retriever = BM25Retriever.from_persist_dir(str(persist_path / "bm25_retriever_es"))
         retriever_bm25 = QueryFusionRetriever(
             [
                 bm25_retriever,
             ],
-            similarity_top_k=Settings.similarity_top_k,
             num_queries=1,
             use_async=True,
         )
@@ -138,15 +145,15 @@ def initialize_components():
 def extract_court_decision_text(url):
     response = requests.get(url)
-    soup = BeautifulSoup(response.content, 'html.parser')
     unwanted_texts = [
         "Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.",
-        "З метою упередження перешкоджанню стабільній роботі Реєстру"
     ]
     decision_text = ""
-    for paragraph in soup.find_all('p'):
         text = paragraph.get_text(separator="\n").strip()
         if not any(unwanted_text in text for unwanted_text in unwanted_texts):
             decision_text += text + "\n"
@@ -158,11 +165,13 @@ async def search_without_ai_action(url):
         court_decision_text = extract_court_decision_text(url)
         nodes = await retriever_bm25.aretrieve(court_decision_text)
-        search_output_content = f"**Результати пошуку (наявні правові позиції ВС) за посиланням:** \n\n"
         for index, node in enumerate(nodes, start=1):
-            source_title = node.node.metadata.get('title', 'Невідомий заголовок')
-            doc_ids = node.node.metadata.get('doc_id')
-            lp_ids = node.node.metadata.get('lp_id')
             links = get_links_html(doc_ids)
             links_lp = get_links_html_lp(lp_ids)
@@ -172,20 +181,20 @@ async def search_without_ai_action(url):
     except Exception as e:
         return f"Error during search: {str(e)}", None
 async def search_without_ai_action_text(question_input):
     try:
         nodes = await retriever_bm25.aretrieve(question_input)
         search_output_content = f"**Результати пошуку (наявні правові позиції ВС) за текстовим запитом:** \n\n"
         for index, node in enumerate(nodes, start=1):
-            source_title = node.node.metadata.get('title', 'Невідомий заголовок')
-            doc_ids = node.node.metadata.get('doc_id')
-            lp_ids = node.node.metadata.get('lp_id')
             links = get_links_html(doc_ids)
             links_lp = get_links_html_lp(lp_ids)
             search_output_content += f"\n[{index}] *{source_title}* ⚖️ {links_lp} | {links} 👉 Score: {node.score} \n"
         return search_output_content, nodes
     except Exception as e:
         return f"Error during search: {str(e)}", None
@@ -195,7 +204,9 @@ def create_gradio_interface():
     with gr.Blocks() as app:
         gr.Markdown("# Знаходьте правові позиції Верховного Суду")
-        input_field = gr.Textbox(label="Введіть текст або посилання на судове рішення", lines=1)
         search_button = gr.Button("Пошук", interactive=False)
         warning_message = gr.Markdown(visible=False)
@@ -204,7 +215,9 @@ def create_gradio_interface():
         state_nodes = gr.State()
         async def search_action(input_text):
-            if re.match(r"^https://reyestr\.court\.gov\.ua/Review/\d+$", input_text.strip()):
                 return await search_without_ai_action(input_text)
             else:
                 return await search_without_ai_action_text(input_text)
@@ -212,33 +225,44 @@ def create_gradio_interface():
         def update_button_state(text):
             text = text.strip()
             if not text:
-                return gr.update(value="Пошук", interactive=False), gr.update(visible=False)
             elif re.match(r"^https://reyestr\.court\.gov\.ua/Review/\d+$", text):
-                return gr.update(value="Пошук за URL", interactive=True), gr.update(visible=False)
             elif text.startswith("http"):
-                return gr.update(value="Пошук", interactive=False), gr.update(value="Неправильний формат URL. Використовуйте посилання формату https://reyestr.court.gov.ua/Review/{doc_id}", visible=True)
             else:
-                return gr.update(value="Пошук за текстом", interactive=True), gr.update(visible=False)
         search_button.click(
-            fn=search_action,
-            inputs=input_field,
-            outputs=[search_output, state_nodes]
         )
         input_field.change(
             fn=update_button_state,
             inputs=input_field,
-            outputs=[search_button, warning_message]
         )
     return app
 if __name__ == "__main__":
     if initialize_components():
         print("Components initialized successfully!")
         app = create_gradio_interface()
         app.launch(share=True)
     else:
-        print("Failed to initialize components. Please check the paths and try again.", file=sys.stderr)
         sys.exit(1)

+import asyncio
 import os
 import re
 import sys
 from pathlib import Path
+import boto3
+import gradio as gr
+import nest_asyncio
+import requests
+from bs4 import BeautifulSoup
+from dotenv import load_dotenv
+from llama_index.core import Settings
 from llama_index.core.retrievers import QueryFusionRetriever
+from llama_index.retrievers.bm25 import BM25Retriever
 load_dotenv()
+Settings.similarity_top_k = 20  # type: ignore
 # Параметри S3
 BUCKET_NAME = "legal-position"
 PREFIX_RETRIEVER = "Save_Index/"  # Префікс для всього вмісту, який потрібно завантажити
 LOCAL_DIR = Path("Save_Index_Local")  # Локальна директорія для збереження даних з S3
+# Параметри індексу
+PERSIST_PATH = Path("Save_Index_Local")
+INDEX_NAME = "bm25_retriever"
+# INDEX_NAME = "bm25_retriever_meta"
+# Створюємо локальну директорію, якщо вона не існує
+LOCAL_DIR.mkdir(parents=True, exist_ok=True)
 # Ініціалізація клієнта S3
 s3_client = boto3.client(
     region_name="eu-north-1"
 )
 # Функція для завантаження файлу з S3
 def download_s3_file(bucket_name, s3_key, local_path):
     s3_client.download_file(bucket_name, s3_key, str(local_path))
     if doc_ids is None:
         return []
     if isinstance(doc_ids, list):
+        return [str(id).strip("[]") for id in doc_ids]
     if isinstance(doc_ids, str):
+        cleaned = doc_ids.strip("[]").replace(" ", "")
         if cleaned:
+            return [id.strip() for id in cleaned.split(",")]
     return []
 def get_links_html(doc_ids):
     parsed_ids = parse_doc_ids(doc_ids)
     if not parsed_ids:
         return ""
+    links = [
+        f"[Рішення ВС: {doc_id}](https://reyestr.court.gov.ua/Review/{doc_id})"
+        for doc_id in parsed_ids
+    ]
     return ", ".join(links)
 def parse_lp_ids(lp_ids):
     if lp_ids is None:
         return []
     if isinstance(lp_ids, (str, int)):
+        cleaned = str(lp_ids).strip("[]").replace(" ", "")
         if cleaned:
             return [cleaned]
     return []
 def get_links_html_lp(lp_ids):
     parsed_ids = parse_lp_ids(lp_ids)
     if not parsed_ids:
         return ""
+    links = [
+        f"[Правова позиція ВС: {lp_id}](https://lpd.court.gov.ua/home/search/{lp_id})"
+        for lp_id in parsed_ids
+    ]
     return ", ".join(links)
 def initialize_components():
     try:
+        if not PERSIST_PATH.exists():
+            raise FileNotFoundError(f"Directory not found: {PERSIST_PATH}")
+        required_files = [INDEX_NAME]
+        missing_files = [f for f in required_files if not (PERSIST_PATH / f).exists()]
         if missing_files:
             raise FileNotFoundError(f"Missing required files: {', '.join(missing_files)}")
         global retriever_bm25
+        bm25_retriever = BM25Retriever.from_persist_dir(str(PERSIST_PATH / INDEX_NAME))
         retriever_bm25 = QueryFusionRetriever(
             [
                 bm25_retriever,
             ],
+            similarity_top_k=Settings.similarity_top_k,  # type: ignore
             num_queries=1,
             use_async=True,
         )
 def extract_court_decision_text(url):
     response = requests.get(url)
+    soup = BeautifulSoup(response.content, "html.parser")
     unwanted_texts = [
         "Доступ до Реєстру здійснюється в тестовому (обмеженому) режимі.",
+        "З метою упередження перешкоджанню стабільній роботі Реєстру",
     ]
     decision_text = ""
+    for paragraph in soup.find_all("p"):
         text = paragraph.get_text(separator="\n").strip()
         if not any(unwanted_text in text for unwanted_text in unwanted_texts):
             decision_text += text + "\n"
         court_decision_text = extract_court_decision_text(url)
         nodes = await retriever_bm25.aretrieve(court_decision_text)
+        search_output_content = (
+            f"**Результати пошуку (наявні правові позиції ВС) за посиланням:** \n\n"
+        )
         for index, node in enumerate(nodes, start=1):
+            source_title = node.node.metadata.get("title", "Невідомий заголовок")
+            doc_ids = node.node.metadata.get("doc_id")
+            lp_ids = node.node.metadata.get("lp_id")
             links = get_links_html(doc_ids)
             links_lp = get_links_html_lp(lp_ids)
     except Exception as e:
         return f"Error during search: {str(e)}", None
 async def search_without_ai_action_text(question_input):
     try:
         nodes = await retriever_bm25.aretrieve(question_input)
         search_output_content = f"**Результати пошуку (наявні правові позиції ВС) за текстовим запитом:** \n\n"
         for index, node in enumerate(nodes, start=1):
+            source_title = node.node.metadata.get("title", "Невідомий заголовок")
+            doc_ids = node.node.metadata.get("doc_id")
+            lp_ids = node.node.metadata.get("lp_id")
             links = get_links_html(doc_ids)
             links_lp = get_links_html_lp(lp_ids)
             search_output_content += f"\n[{index}] *{source_title}* ⚖️ {links_lp} | {links} 👉 Score: {node.score} \n"
         return search_output_content, nodes
     except Exception as e:
         return f"Error during search: {str(e)}", None
     with gr.Blocks() as app:
         gr.Markdown("# Знаходьте правові позиції Верховного Суду")
+        input_field = gr.Textbox(
+            label="Введіть текст або посилання на судове рішення", lines=1
+        )
         search_button = gr.Button("Пошук", interactive=False)
         warning_message = gr.Markdown(visible=False)
         state_nodes = gr.State()
         async def search_action(input_text):
+            if re.match(
+                r"^https://reyestr\.court\.gov\.ua/Review/\d+$", input_text.strip()
+            ):
                 return await search_without_ai_action(input_text)
             else:
                 return await search_without_ai_action_text(input_text)
         def update_button_state(text):
             text = text.strip()
             if not text:
+                return gr.update(value="Пошук", interactive=False), gr.update(
+                    visible=False
+                )
             elif re.match(r"^https://reyestr\.court\.gov\.ua/Review/\d+$", text):
+                return gr.update(value="Пошук за URL", interactive=True), gr.update(
+                    visible=False
+                )
             elif text.startswith("http"):
+                return gr.update(value="Пошук", interactive=False), gr.update(
+                    value="Неправильний формат URL. Використовуйте посилання формату https://reyestr.court.gov.ua/Review/{doc_id}",
+                    visible=True,
+                )
             else:
+                return gr.update(value="Пошук за текстом", interactive=True), gr.update(
+                    visible=False
+                )
         search_button.click(
+            fn=search_action, inputs=input_field, outputs=[search_output, state_nodes]
         )
         input_field.change(
             fn=update_button_state,
             inputs=input_field,
+            outputs=[search_button, warning_message],
         )
     return app
 if __name__ == "__main__":
     if initialize_components():
         print("Components initialized successfully!")
         app = create_gradio_interface()
         app.launch(share=True)
     else:
+        print(
+            "Failed to initialize components. Please check the paths and try again.",
+            file=sys.stderr,
+        )
         sys.exit(1)

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,41 @@

+[tool.poetry]
+name = "prototype"
+version = "0.1.0"
+description = ""
+authors = ["Ivan Lytvynenko <[email protected]>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+llama-index = "^0.12.3"
+llama-index-readers-file = "^0.4.1"
+llama-index-vector-stores-faiss = "^0.3.0"
+llama-index-retrievers-bm25 = "^0.5.0"
+openai = "^1.57.0"
+faiss-cpu = "^1.9.0.post1"
+llama-index-embeddings-openai = "^0.3.1"
+llama-index-llms-openai = "^0.3.2"
+gradio = "^5.8.0"
+beautifulsoup4 = "^4.12.3"
+nest-asyncio = "^1.6.0"
+boto3 = "^1.35.76"
+python-dotenv = "^1.0.1"
+openpyxl = "^3.1.5"
+[tool.pyright]
+venvPath = "."
+venv = ".venv"
+[tool.black]
+line-length = 90
+target-version = ["py311"]
+[tool.isort]
+src_paths = ["src"]
+profile = "black"
+line_length = 90
+lines_after_imports = 2
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

requirements.txt CHANGED Viewed

@@ -11,3 +11,4 @@ beautifulsoup4
 nest-asyncio
 boto3
 python-dotenv

 nest-asyncio
 boto3
 python-dotenv
+openpyxl