Spaces:
Running
Running
import os | |
import pandas as pd | |
import numpy as np | |
import json | |
class Utils: | |
def read_dataframe_from_csv(file_path): | |
""" | |
Reads a DataFrame from a CSV file if the file exists. | |
Parameters: | |
- file_path: The full path to the CSV file. | |
Returns: | |
- A pandas DataFrame if the file exists and is read successfully; None otherwise. | |
""" | |
# Check if the file exists | |
if os.path.isfile(file_path): | |
try: | |
# Attempt to read the CSV file into a DataFrame | |
df = pd.read_csv(file_path) | |
return df | |
except Exception as e: | |
# If an error occurs during reading, print it | |
print(f"An error occurred while reading the file: {e}") | |
return None | |
else: | |
# If the file does not exist, print a message | |
print(f"File does not exist: {file_path}") | |
return None | |
def read_json_files_to_dataframe(folder_path): | |
""" | |
Reads JSON files from a specified folder, automatically infers columns from the JSON files, | |
and returns the data as a pandas DataFrame. | |
:param folder_path: Path to the folder containing JSON files. | |
:return: A pandas DataFrame containing data from all JSON files in the folder. | |
""" | |
data = [] | |
for filename in os.listdir(folder_path): | |
if filename.endswith('.json'): | |
file_path = os.path.join(folder_path, filename) | |
with open(file_path, 'r') as file: | |
# First attempt to load the JSON | |
json_data = json.load(file) | |
# Check if json_data is a string instead of a dict, decode it again | |
if isinstance(json_data, str): | |
json_data = json.loads(json_data) | |
data.append(json_data) | |
# Create a DataFrame from the list of dictionaries | |
df = pd.DataFrame(data) | |
return df | |
def write_pandas_to_local(df, output_path): | |
""" | |
Writes a pandas DataFrame to a CSV file at the specified output path. | |
:param df: The pandas DataFrame to be saved. | |
:param output_path: The file path where the DataFrame should be saved as a CSV. | |
""" | |
# Create the directory if it does not exist | |
os.makedirs(os.path.dirname(output_path), exist_ok=True) | |
# Save the DataFrame to a CSV file without saving the index | |
df.to_csv(output_path, index=False) | |
def convert_iterables_to_strings(df): | |
""" | |
Convert columns with iterable types (excluding strings) to string representations. | |
This includes handling numpy arrays or lists within dataframe cells. | |
""" | |
for col in df.columns: | |
# Apply conversion if the value is an iterable (excluding strings) or a numpy array | |
df[col] = df[col].apply(lambda x: str(x) if isinstance(x, (np.ndarray, list)) else x) | |
return df | |