cjber's picture
docs: add docstrings
a3397bd
from pathlib import Path
import polars as pl
pl.Config(
fmt_str_lengths=9,
set_tbl_rows=5,
set_tbl_hide_dtype_separator=True,
set_tbl_dataframe_shape_below=True,
set_tbl_formatting="UTF8_FULL_CONDENSED",
)
def filename_reducer(docs_a, docs_b):
"""
Reduces two lists of document dictionaries by updating docs_a with entries from docs_b
based on matching filenames.
Args:
docs_a (list): A list of dictionaries, each containing a "filename" key.
docs_b (list): A list of dictionaries, each containing a "filename" key.
Returns:
list: The updated list of dictionaries from docs_a with entries from docs_b.
"""
if docs_a == []:
return docs_b
b_dict = {d["filename"]: d for d in docs_b}
for i, dict_a in enumerate(docs_a):
filename = dict_a.get("filename")
if filename in b_dict:
docs_a[i] = b_dict[filename]
return docs_a
class Paths:
"""
A utility class for managing directory paths used in the project.
"""
DATA = Path("data")
RAW = DATA / "raw"
STAGING = DATA / "staging"
OUT = DATA / "out"
PDFS_AZURE = STAGING / "pdfs_azure"
SUMMARY = OUT / "summary"
FIGS = SUMMARY / "figs"
PROMPTS = Path("planning_ai/chains/prompts")
@classmethod
def ensure_directories_exist(cls):
"""
Ensures that all necessary directories exist, creating them if necessary.
"""
for path in [
cls.DATA,
cls.RAW,
cls.STAGING,
cls.OUT,
cls.SUMMARY,
cls.FIGS,
cls.PDFS_AZURE,
]:
path.mkdir(parents=True, exist_ok=True)
Paths.ensure_directories_exist()