File size: 1,959 Bytes
8acb22e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import ujson as json
import re
import traceback


def trace_back(error_msg):
    exc = traceback.format_exc()
    msg = f'[Error]: {error_msg}.\n[Traceback]: {exc}'
    return msg


def extract_numbered_list(paragraph):
    # Updated regular expression to match numbered list
    # It looks for:
    # - start of line
    # - one or more digits
    # - a period or parenthesis
    # - optional whitespace
    # - any character (captured in a group) until the end of line or a new number
    pattern = r"^\s*(\d+[.)]\s?.*?)(?=\s*\d+[.)]|$)"
    
    matches = re.findall(pattern, paragraph, re.DOTALL | re.MULTILINE)
    return [match.strip() for match in matches]


def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i : i + n]


def reset_state_list(*states):
    empty = [None for _ in states[1:]]
    return [[]] + empty


def LoadJsonL(filename):
    if isinstance(filename, str):
        jsl = []
        with open(filename) as f:
            for line in f:
                jsl.append(json.loads(line))
        return jsl
    else:
        return filename


def extract_jsons_from_text(text):
    json_dicts = []
    stack = []
    start_index = None
    
    for i, char in enumerate(text):
        if char == '{':
            stack.append(char)
            if start_index is None:
                start_index = i
        elif char == '}':
            if stack:
                stack.pop()
            if not stack and start_index is not None:
                json_candidate = text[start_index:i+1]
                try:
                    parsed_json = json.loads(json_candidate)
                    json_dicts.append(parsed_json)
                    start_index = None
                except json.JSONDecodeError:
                    pass
                finally:
                    start_index = None
    
    if len(json_dicts) == 0: json_dicts = [{}]
    return json_dicts