import csv import os import shutil import json import yaml from typing import Any, List, Tuple, Union from copy import deepcopy from .log import logger from .others import get_cur_time_str from .file import ensure_dir class CSVDataRecord: """Collect data into CSV file. Automatically backup existed file which has the same file name to avoid DATA LOST: ``` # data lost: all content in ./a-file-contains-important-data.csv will be # flushed and unrecoverable if it's opened by 'w': with open('./a-file-contains-important-data.csv', 'w') as f: # do sth. ``` Assuming a scene (actually it was my sad experience): - The code above is in the top of your experimental code, - And you've finished this experiment and collected the data into the CSV file. - After that, if you run this script file again accidentally, then all valuable data will be lost! :attr:`CSVDataRecord` makes this scene never happen again. """ def __init__(self, file_path: str, header: List[str], backup=True): """Open the file and write CSV header into it. Args: file_path (str): Target CSV file path. header (List[str]): CSV header, like `['name', 'age', 'sex', ...]`. backup (bool, optional): If True, the existed file in :attr:`file_path` will be backup to `file_path + '.' + cur timestamp`. Defaults to True. """ self.file_path = file_path self.header = header if backup and os.path.exists(file_path): backup_file_path = '{}.{}'.format(file_path, get_cur_time_str()) shutil.copyfile(file_path, backup_file_path) logger.warn('csv file already exists! backup raw file to {}'.format(backup_file_path)) ensure_dir(file_path) with open(file_path, 'w') as f: writer = csv.writer(f) writer.writerow(header) def write(self, data: Union[List[Any], Tuple[Any]]): """Write a row of data to file in :attr:`file_path`. Args: data (Union[List[Any], Tuple[Any]]): A row of data, like `('ekko', 18, 'man')`. """ assert len(data) == len(self.header) with open(self.file_path, 'a') as f: writer = csv.writer(f) writer.writerow(data) def write_json(file_path: str, obj: Any, indent=2, backup=True, ensure_obj_serializable=False): """Collect data into JSON file. Automatically backup existed file which has the same file name to avoid DATA LOST. (refers to :class:`CSVDataRecord`) Args: file_path (str): Target JSON file path. obj (Any): Collected data which can be serialized into JSON format. indent (int, optional): Keep indent to ensure readability. Defaults to 2. backup (bool, optional): If True, the existed file in :attr:`file_path` will be \ backup to `file_path + '.' + cur timestamp`. Defaults to True. """ if backup and os.path.exists(file_path): backup_file_path = '{}.{}'.format(file_path, get_cur_time_str()) shutil.copyfile(file_path, backup_file_path) logger.warn('json file already exists! backup raw file to {}'.format(backup_file_path)) ensure_dir(file_path) if ensure_obj_serializable: obj = deepcopy(obj) make_obj_json_serializable(obj) with open(file_path, 'w', encoding='utf8') as f: obj_str = json.dumps(obj, indent=indent, ensure_ascii=False) f.write(obj_str) def read_json(file_path: str): """Read JSON file. Args: file_path (str): Target JSON file path. Returns: Any: The object parsed from the target file. """ with open(file_path, 'r', encoding='utf8') as f: return json.loads(f.read()) def read_yaml(file_path: str): """Read YAML file. Args: file_path (str): Target YAML file path. Returns: Any: The object parsed from the target file. """ with open(file_path, 'r') as f: return yaml.load(f, yaml.Loader) import inspect import torch def make_obj_json_serializable(obj): for k, v in obj.items(): if isinstance(v, dict): obj[k] = make_obj_json_serializable(v) elif hasattr(v, '__call__'): obj[k] = inspect.getsource(v) elif isinstance(v, torch.Tensor): obj[k] = str(v) return obj