Spaces:
Runtime error
Runtime error
import ujson as json | |
import re | |
import traceback | |
def trace_back(error_msg): | |
exc = traceback.format_exc() | |
msg = f'[Error]: {error_msg}.\n[Traceback]: {exc}' | |
return msg | |
def extract_numbered_list(paragraph): | |
# Updated regular expression to match numbered list | |
# It looks for: | |
# - start of line | |
# - one or more digits | |
# - a period or parenthesis | |
# - optional whitespace | |
# - any character (captured in a group) until the end of line or a new number | |
pattern = r"^\s*(\d+[.)]\s?.*?)(?=\s*\d+[.)]|$)" | |
matches = re.findall(pattern, paragraph, re.DOTALL | re.MULTILINE) | |
return [match.strip() for match in matches] | |
def chunks(lst, n): | |
"""Yield successive n-sized chunks from lst.""" | |
for i in range(0, len(lst), n): | |
yield lst[i : i + n] | |
def reset_state_list(*states): | |
empty = [None for _ in states[1:]] | |
return [[]] + empty | |
def LoadJsonL(filename): | |
if isinstance(filename, str): | |
jsl = [] | |
with open(filename) as f: | |
for line in f: | |
jsl.append(json.loads(line)) | |
return jsl | |
else: | |
return filename | |
def extract_jsons_from_text(text): | |
json_dicts = [] | |
stack = [] | |
start_index = None | |
for i, char in enumerate(text): | |
if char == '{': | |
stack.append(char) | |
if start_index is None: | |
start_index = i | |
elif char == '}': | |
if stack: | |
stack.pop() | |
if not stack and start_index is not None: | |
json_candidate = text[start_index:i+1] | |
try: | |
parsed_json = json.loads(json_candidate) | |
json_dicts.append(parsed_json) | |
start_index = None | |
except json.JSONDecodeError: | |
pass | |
finally: | |
start_index = None | |
if len(json_dicts) == 0: json_dicts = [{}] | |
return json_dicts |