Spaces:
Sleeping
Sleeping
File size: 1,745 Bytes
21b7409 963aee4 a3397bd 963aee4 21b7409 a3397bd 21b7409 0ed214c 21b7409 0ed214c 3dfa711 0ed214c 3dfa711 0ed214c a3397bd 963aee4 3dfa711 963aee4 0ed214c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from pathlib import Path
import polars as pl
pl.Config(
fmt_str_lengths=9,
set_tbl_rows=5,
set_tbl_hide_dtype_separator=True,
set_tbl_dataframe_shape_below=True,
set_tbl_formatting="UTF8_FULL_CONDENSED",
)
def filename_reducer(docs_a, docs_b):
"""
Reduces two lists of document dictionaries by updating docs_a with entries from docs_b
based on matching filenames.
Args:
docs_a (list): A list of dictionaries, each containing a "filename" key.
docs_b (list): A list of dictionaries, each containing a "filename" key.
Returns:
list: The updated list of dictionaries from docs_a with entries from docs_b.
"""
if docs_a == []:
return docs_b
b_dict = {d["filename"]: d for d in docs_b}
for i, dict_a in enumerate(docs_a):
filename = dict_a.get("filename")
if filename in b_dict:
docs_a[i] = b_dict[filename]
return docs_a
class Paths:
"""
A utility class for managing directory paths used in the project.
"""
DATA = Path("data")
RAW = DATA / "raw"
STAGING = DATA / "staging"
OUT = DATA / "out"
PDFS_AZURE = STAGING / "pdfs_azure"
SUMMARY = OUT / "summary"
FIGS = SUMMARY / "figs"
PROMPTS = Path("planning_ai/chains/prompts")
@classmethod
def ensure_directories_exist(cls):
"""
Ensures that all necessary directories exist, creating them if necessary.
"""
for path in [
cls.DATA,
cls.RAW,
cls.STAGING,
cls.OUT,
cls.SUMMARY,
cls.FIGS,
cls.PDFS_AZURE,
]:
path.mkdir(parents=True, exist_ok=True)
Paths.ensure_directories_exist()
|