from pathlib import Path import polars as pl pl.Config( fmt_str_lengths=9, set_tbl_rows=5, set_tbl_hide_dtype_separator=True, set_tbl_dataframe_shape_below=True, set_tbl_formatting="UTF8_FULL_CONDENSED", ) def filename_reducer(docs_a, docs_b): """ Reduces two lists of document dictionaries by updating docs_a with entries from docs_b based on matching filenames. Args: docs_a (list): A list of dictionaries, each containing a "filename" key. docs_b (list): A list of dictionaries, each containing a "filename" key. Returns: list: The updated list of dictionaries from docs_a with entries from docs_b. """ if docs_a == []: return docs_b b_dict = {d["filename"]: d for d in docs_b} for i, dict_a in enumerate(docs_a): filename = dict_a.get("filename") if filename in b_dict: docs_a[i] = b_dict[filename] return docs_a class Paths: """ A utility class for managing directory paths used in the project. """ DATA = Path("data") RAW = DATA / "raw" STAGING = DATA / "staging" OUT = DATA / "out" PDFS_AZURE = STAGING / "pdfs_azure" SUMMARY = OUT / "summary" FIGS = SUMMARY / "figs" PROMPTS = Path("planning_ai/chains/prompts") @classmethod def ensure_directories_exist(cls): """ Ensures that all necessary directories exist, creating them if necessary. """ for path in [ cls.DATA, cls.RAW, cls.STAGING, cls.OUT, cls.SUMMARY, cls.FIGS, cls.PDFS_AZURE, ]: path.mkdir(parents=True, exist_ok=True) Paths.ensure_directories_exist()