File size: 1,745 Bytes
21b7409
 
 
 
 
 
 
 
 
 
 
 
 
963aee4
a3397bd
 
 
 
 
 
 
 
 
 
 
963aee4
 
 
 
 
 
 
 
 
 
 
21b7409
a3397bd
 
 
 
21b7409
0ed214c
21b7409
 
 
0ed214c
3dfa711
 
0ed214c
3dfa711
0ed214c
 
 
 
 
a3397bd
 
 
963aee4
 
 
 
 
 
3dfa711
 
963aee4
0ed214c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from pathlib import Path

import polars as pl

pl.Config(
    fmt_str_lengths=9,
    set_tbl_rows=5,
    set_tbl_hide_dtype_separator=True,
    set_tbl_dataframe_shape_below=True,
    set_tbl_formatting="UTF8_FULL_CONDENSED",
)


def filename_reducer(docs_a, docs_b):
    """
    Reduces two lists of document dictionaries by updating docs_a with entries from docs_b
    based on matching filenames.

    Args:
        docs_a (list): A list of dictionaries, each containing a "filename" key.
        docs_b (list): A list of dictionaries, each containing a "filename" key.

    Returns:
        list: The updated list of dictionaries from docs_a with entries from docs_b.
    """
    if docs_a == []:
        return docs_b
    b_dict = {d["filename"]: d for d in docs_b}

    for i, dict_a in enumerate(docs_a):
        filename = dict_a.get("filename")
        if filename in b_dict:
            docs_a[i] = b_dict[filename]
    return docs_a


class Paths:
    """
    A utility class for managing directory paths used in the project.
    """

    DATA = Path("data")

    RAW = DATA / "raw"
    STAGING = DATA / "staging"
    OUT = DATA / "out"

    PDFS_AZURE = STAGING / "pdfs_azure"

    SUMMARY = OUT / "summary"
    FIGS = SUMMARY / "figs"

    PROMPTS = Path("planning_ai/chains/prompts")

    @classmethod
    def ensure_directories_exist(cls):
        """
        Ensures that all necessary directories exist, creating them if necessary.
        """
        for path in [
            cls.DATA,
            cls.RAW,
            cls.STAGING,
            cls.OUT,
            cls.SUMMARY,
            cls.FIGS,
            cls.PDFS_AZURE,
        ]:
            path.mkdir(parents=True, exist_ok=True)


Paths.ensure_directories_exist()