Spaces:
Sleeping
Sleeping
from pathlib import Path | |
import polars as pl | |
pl.Config( | |
fmt_str_lengths=9, | |
set_tbl_rows=5, | |
set_tbl_hide_dtype_separator=True, | |
set_tbl_dataframe_shape_below=True, | |
set_tbl_formatting="UTF8_FULL_CONDENSED", | |
) | |
def filename_reducer(docs_a, docs_b): | |
""" | |
Reduces two lists of document dictionaries by updating docs_a with entries from docs_b | |
based on matching filenames. | |
Args: | |
docs_a (list): A list of dictionaries, each containing a "filename" key. | |
docs_b (list): A list of dictionaries, each containing a "filename" key. | |
Returns: | |
list: The updated list of dictionaries from docs_a with entries from docs_b. | |
""" | |
if docs_a == []: | |
return docs_b | |
b_dict = {d["filename"]: d for d in docs_b} | |
for i, dict_a in enumerate(docs_a): | |
filename = dict_a.get("filename") | |
if filename in b_dict: | |
docs_a[i] = b_dict[filename] | |
return docs_a | |
class Paths: | |
""" | |
A utility class for managing directory paths used in the project. | |
""" | |
DATA = Path("data") | |
RAW = DATA / "raw" | |
STAGING = DATA / "staging" | |
OUT = DATA / "out" | |
PDFS_AZURE = STAGING / "pdfs_azure" | |
SUMMARY = OUT / "summary" | |
FIGS = SUMMARY / "figs" | |
PROMPTS = Path("planning_ai/chains/prompts") | |
def ensure_directories_exist(cls): | |
""" | |
Ensures that all necessary directories exist, creating them if necessary. | |
""" | |
for path in [ | |
cls.DATA, | |
cls.RAW, | |
cls.STAGING, | |
cls.OUT, | |
cls.SUMMARY, | |
cls.FIGS, | |
cls.PDFS_AZURE, | |
]: | |
path.mkdir(parents=True, exist_ok=True) | |
Paths.ensure_directories_exist() | |