File size: 3,102 Bytes
37c2a8d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import pandas as pd
import numpy as np
import json


class Utils:
    @staticmethod
    def read_dataframe_from_csv(file_path):
        """
        Reads a DataFrame from a CSV file if the file exists.

        Parameters:
        - file_path: The full path to the CSV file.

        Returns:
        - A pandas DataFrame if the file exists and is read successfully; None otherwise.
        """
        # Check if the file exists
        if os.path.isfile(file_path):
            try:
                # Attempt to read the CSV file into a DataFrame
                df = pd.read_csv(file_path)
                return df
            except Exception as e:
                # If an error occurs during reading, print it
                print(f"An error occurred while reading the file: {e}")
                return None
        else:
            # If the file does not exist, print a message
            print(f"File does not exist: {file_path}")
            return None

    @staticmethod
    def read_json_files_to_dataframe(folder_path):
        """
        Reads JSON files from a specified folder, automatically infers columns from the JSON files,
        and returns the data as a pandas DataFrame.

        :param folder_path: Path to the folder containing JSON files.
        :return: A pandas DataFrame containing data from all JSON files in the folder.
        """
        data = []

        for filename in os.listdir(folder_path):
            if filename.endswith('.json'):
                file_path = os.path.join(folder_path, filename)

                with open(file_path, 'r') as file:
                    # First attempt to load the JSON
                    json_data = json.load(file)

                    # Check if json_data is a string instead of a dict, decode it again
                    if isinstance(json_data, str):
                        json_data = json.loads(json_data)

                    data.append(json_data)

        # Create a DataFrame from the list of dictionaries
        df = pd.DataFrame(data)

        return df

    @staticmethod
    def write_pandas_to_local(df, output_path):
        """
        Writes a pandas DataFrame to a CSV file at the specified output path.

        :param df: The pandas DataFrame to be saved.
        :param output_path: The file path where the DataFrame should be saved as a CSV.
        """
        # Create the directory if it does not exist
        os.makedirs(os.path.dirname(output_path), exist_ok=True)

        # Save the DataFrame to a CSV file without saving the index
        df.to_csv(output_path, index=False)

    @staticmethod
    def convert_iterables_to_strings(df):
        """
        Convert columns with iterable types (excluding strings) to string representations.
        This includes handling numpy arrays or lists within dataframe cells.
        """
        for col in df.columns:
            # Apply conversion if the value is an iterable (excluding strings) or a numpy array
            df[col] = df[col].apply(lambda x: str(x) if isinstance(x, (np.ndarray, list)) else x)
        return df