import os import pandas as pd import numpy as np import json class Utils: @staticmethod def read_dataframe_from_csv(file_path): """ Reads a DataFrame from a CSV file if the file exists. Parameters: - file_path: The full path to the CSV file. Returns: - A pandas DataFrame if the file exists and is read successfully; None otherwise. """ # Check if the file exists if os.path.isfile(file_path): try: # Attempt to read the CSV file into a DataFrame df = pd.read_csv(file_path) return df except Exception as e: # If an error occurs during reading, print it print(f"An error occurred while reading the file: {e}") return None else: # If the file does not exist, print a message print(f"File does not exist: {file_path}") return None @staticmethod def read_json_files_to_dataframe(folder_path): """ Reads JSON files from a specified folder, automatically infers columns from the JSON files, and returns the data as a pandas DataFrame. :param folder_path: Path to the folder containing JSON files. :return: A pandas DataFrame containing data from all JSON files in the folder. """ data = [] for filename in os.listdir(folder_path): if filename.endswith('.json'): file_path = os.path.join(folder_path, filename) with open(file_path, 'r') as file: # First attempt to load the JSON json_data = json.load(file) # Check if json_data is a string instead of a dict, decode it again if isinstance(json_data, str): json_data = json.loads(json_data) data.append(json_data) # Create a DataFrame from the list of dictionaries df = pd.DataFrame(data) return df @staticmethod def write_pandas_to_local(df, output_path): """ Writes a pandas DataFrame to a CSV file at the specified output path. :param df: The pandas DataFrame to be saved. :param output_path: The file path where the DataFrame should be saved as a CSV. """ # Create the directory if it does not exist os.makedirs(os.path.dirname(output_path), exist_ok=True) # Save the DataFrame to a CSV file without saving the index df.to_csv(output_path, index=False) @staticmethod def convert_iterables_to_strings(df): """ Convert columns with iterable types (excluding strings) to string representations. This includes handling numpy arrays or lists within dataframe cells. """ for col in df.columns: # Apply conversion if the value is an iterable (excluding strings) or a numpy array df[col] = df[col].apply(lambda x: str(x) if isinstance(x, (np.ndarray, list)) else x) return df