josegoji commited on
Commit
f784bc9
verified
1 Parent(s): 1ebf2b9

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +112 -0
  2. exog_creation.py +192 -0
  3. pipeline.pkl +3 -0
  4. requirements.txt +9 -0
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import pandas as pd
3
+ import pickle
4
+ import numpy as np
5
+ from sklearn.preprocessing import MinMaxScaler
6
+ import joblib
7
+ from io import StringIO
8
+ import gradio as gr
9
+ import os
10
+ import sys
11
+ from json import load
12
+ from skforecast.utils import load_forecaster
13
+ from skforecast.preprocessing import RollingFeatures
14
+ from sklearn.preprocessing import MinMaxScaler
15
+ from sklearn.preprocessing import FunctionTransformer
16
+ from sklearn.pipeline import Pipeline
17
+ from exog_creation import create_exog
18
+ import contextlib
19
+ import warnings
20
+ # Funci贸n para cargar el archivo CSV y mostrar las primeras 5 filas
21
+ def load_csv(input_file):
22
+ try:
23
+ # Leer el archivo CSV
24
+ df = pd.read_csv(input_file)
25
+
26
+ # Verificar si el DataFrame est谩 vac铆o
27
+ if df.empty:
28
+ return "El archivo subido est谩 vac铆o o no tiene datos v谩lidos."
29
+
30
+ # Retornar las primeras 5 filas como tabla HTML
31
+ # return df.head().to_html()
32
+ return df
33
+ except Exception as e:
34
+ return f"Error al procesar el archivo: {e}"
35
+
36
+ def set_datetime_index(df):
37
+ df['datetime'] = pd.to_datetime(df['datetime'])
38
+ df = df.set_index('datetime')
39
+ df = df.asfreq('h')
40
+ return df
41
+
42
+ def load_model(name):
43
+ current_dir = os.getcwd()
44
+ ROOT_PATH = os.path.dirname(current_dir)
45
+ sys.path.insert(1, ROOT_PATH)
46
+ import root
47
+ model = load_forecaster(root.DIR_DATA_ANALYTICS + name,
48
+ verbose=True)
49
+ return model
50
+
51
+ def load_pipeline(name):
52
+ with open('pipeline.pkl', 'rb') as file:
53
+ pipeline = pickle.load(file)
54
+ return pipeline
55
+
56
+
57
+ def flujo(input_file):
58
+
59
+ warnings.filterwarnings("ignore")
60
+
61
+ datos = load_csv("archivo.csv")
62
+
63
+ datos = set_datetime_index(datos)
64
+
65
+ datos_exog = create_exog(datos)
66
+
67
+ # Redirigir stdout a os.devnull para suprimir cualquier impresi贸n
68
+ sys.stdout = open(os.devnull, 'w')
69
+
70
+ # Cargar el modelo
71
+ forecaster = load_model('tree_model.joblib')
72
+
73
+ # Restaurar stdout a la consola
74
+ sys.stdout = sys.__stdout__
75
+
76
+ exog_selectec = ['temperature', 'rain', 'surface_pressure', 'cloudcover_total', 'windspeed_10m', 'winddirection_10m', 'shortwave_radiation', 'euros_per_mwh', 'installed_capacity', 'hour_sin', 'poly_month_sin__week_sin', 'poly_month_sin__week_cos', 'poly_month_sin__day_of_week_sin', 'poly_month_sin__day_of_week_cos', 'poly_month_sin__hour_sin', 'poly_month_sin__hour_cos', 'poly_month_sin__sunrise_hour_cos', 'poly_month_cos__week_sin', 'poly_month_cos__day_of_week_sin', 'poly_month_cos__day_of_week_cos', 'poly_month_cos__hour_sin', 'poly_month_cos__hour_cos', 'poly_month_cos__sunset_hour_sin', 'poly_week_sin__week_cos', 'poly_week_sin__day_of_week_sin', 'poly_week_sin__day_of_week_cos', 'poly_week_sin__hour_sin', 'poly_week_sin__hour_cos', 'poly_week_sin__sunrise_hour_cos', 'poly_week_sin__sunset_hour_cos', 'poly_week_cos__day_of_week_sin', 'poly_week_cos__day_of_week_cos', 'poly_week_cos__hour_sin', 'poly_week_cos__hour_cos', 'poly_week_cos__sunrise_hour_sin', 'poly_week_cos__sunrise_hour_cos', 'poly_week_cos__sunset_hour_sin', 'poly_day_of_week_sin__day_of_week_cos', 'poly_day_of_week_sin__hour_sin', 'poly_day_of_week_sin__hour_cos', 'poly_day_of_week_sin__sunrise_hour_sin', 'poly_day_of_week_sin__sunrise_hour_cos', 'poly_day_of_week_sin__sunset_hour_sin', 'poly_day_of_week_sin__sunset_hour_cos', 'poly_day_of_week_cos__hour_sin', 'poly_day_of_week_cos__hour_cos', 'poly_day_of_week_cos__sunrise_hour_sin', 'poly_day_of_week_cos__sunrise_hour_cos', 'poly_day_of_week_cos__sunset_hour_sin', 'poly_day_of_week_cos__sunset_hour_cos', 'poly_hour_sin__hour_cos', 'poly_hour_sin__sunrise_hour_sin', 'poly_hour_sin__sunrise_hour_cos', 'poly_hour_sin__sunset_hour_sin', 'poly_hour_sin__sunset_hour_cos', 'poly_hour_cos__sunrise_hour_sin', 'poly_hour_cos__sunrise_hour_cos', 'poly_hour_cos__sunset_hour_sin', 'poly_hour_cos__sunset_hour_cos']
77
+
78
+ predictions = forecaster.predict(steps=24, exog = datos_exog[exog_selectec])
79
+
80
+ datos['target'] = predictions
81
+
82
+ target_column = 'target'
83
+
84
+ columns_order = [target_column] + [col for col in datos.columns if col != target_column]
85
+
86
+ datos = datos[columns_order]
87
+
88
+ pipeline = load_pipeline('pipeline.pkl')
89
+
90
+ pred_scaled = pipeline.inverse_transform(datos)
91
+
92
+ pred_scaled_df = pd.DataFrame(pred_scaled, columns=datos.columns, index=datos.index)
93
+
94
+ df_reset = pred_scaled_df.reset_index()
95
+
96
+ df_target = df_reset[['datetime', 'target']]
97
+
98
+
99
+ return df_target.to_html()
100
+
101
+ # Crear la interfaz con Gradio
102
+ interface = gr.Interface(
103
+ fn=flujo, # Funci贸n principal
104
+ inputs=gr.File(label="Sube tu archivo CSV"), # Entrada de archivo
105
+ outputs="html", # Salida como tabla HTML
106
+ title="Prediccion geenracion de energia",
107
+ description="Sube un archivo CSV y perdice la geenracion de energia."
108
+ )
109
+
110
+ interface.launch(share = True)
111
+
112
+
exog_creation.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tratamiento de datos
2
+ # ==============================================================================
3
+ import re
4
+ import numpy as np
5
+ import pandas as pd
6
+ from astral.sun import sun
7
+ from astral import LocationInfo
8
+ from skforecast.datasets import fetch_dataset
9
+ from feature_engine.datetime import DatetimeFeatures
10
+ from feature_engine.creation import CyclicalFeatures
11
+ from feature_engine.timeseries.forecasting import WindowFeatures
12
+ from sklearn.preprocessing import PolynomialFeatures
13
+ import sys
14
+ import os
15
+ ##########################################################################################
16
+
17
+
18
+ # current_dir = os.getcwd()
19
+ # ROOT_PATH = os.path.dirname(current_dir)
20
+ # sys.path.insert(1, ROOT_PATH)
21
+ # import root
22
+ # datos = pd.read_pickle(root.DIR_DATA_STAGE + 'train.pkl')
23
+
24
+ # Variables basadas en el calendario
25
+ def calendar_features(datos):
26
+ features_to_extract = [
27
+ 'month',
28
+ 'week',
29
+ 'day_of_week',
30
+ 'hour'
31
+ ]
32
+ calendar_transformer = DatetimeFeatures(
33
+ variables='index',
34
+ features_to_extract=features_to_extract,
35
+ drop_original=True,
36
+ )
37
+ variables_calendario = calendar_transformer.fit_transform(datos)[features_to_extract]
38
+
39
+ return variables_calendario
40
+
41
+ # Variables basadas en la luz solar
42
+ def solar_features(datos):
43
+ location = LocationInfo(
44
+ name = 'Taillin',
45
+ region = 'Estonia',
46
+ timezone = 'Europe/Riga',
47
+ latitude = 56.946285,
48
+ longitude = 24.105078
49
+ )
50
+ sunrise_hour = [
51
+ sun(location.observer, date=date, tzinfo=location.timezone)['sunrise']
52
+ for date in datos.index
53
+ ]
54
+ sunset_hour = [
55
+ sun(location.observer, date=date, tzinfo=location.timezone)['sunset']
56
+ for date in datos.index
57
+ ]
58
+ sunrise_hour = pd.Series(sunrise_hour, index=datos.index).dt.round("h").dt.hour
59
+ sunset_hour = pd.Series(sunset_hour, index=datos.index).dt.round("h").dt.hour
60
+ variables_solares = pd.DataFrame({
61
+ 'sunrise_hour': sunrise_hour,
62
+ 'sunset_hour': sunset_hour
63
+ })
64
+ variables_solares['daylight_hours'] = (
65
+ variables_solares['sunset_hour'] - variables_solares['sunrise_hour']
66
+ )
67
+ variables_solares["is_daylight"] = np.where(
68
+ (datos.index.hour >= variables_solares["sunrise_hour"])
69
+ & (datos.index.hour < variables_solares["sunset_hour"]),
70
+ 1,
71
+ 0,
72
+ )
73
+
74
+ return variables_solares
75
+
76
+ # Uni贸n de variables ex贸genas
77
+
78
+ def union_exog_features(variables_calendario, variables_solares):
79
+ assert all(variables_calendario.index == variables_solares.index)
80
+ variables_exogenas = pd.concat([
81
+ variables_calendario,
82
+ variables_solares
83
+ ], axis=1)
84
+
85
+ return variables_exogenas
86
+
87
+ def ciclic_features(variables_exogenas):
88
+ features_to_encode = [
89
+ "month",
90
+ "week",
91
+ "day_of_week",
92
+ "hour",
93
+ "sunrise_hour",
94
+ "sunset_hour",
95
+ ]
96
+ max_values = {
97
+ "month": 12,
98
+ "week": 52,
99
+ "day_of_week": 6,
100
+ "hour": 23,
101
+ "sunrise_hour": 23,
102
+ "sunset_hour": 23,
103
+ }
104
+ cyclical_encoder = CyclicalFeatures(
105
+ variables = features_to_encode,
106
+ max_values = max_values,
107
+ drop_original = False
108
+ )
109
+
110
+ variables_exogenas = cyclical_encoder.fit_transform(variables_exogenas)
111
+
112
+ return variables_exogenas
113
+
114
+ def pol_features(variables_exogenas):
115
+ # Interacci贸n entre variables ex贸genas
116
+ transformer_poly = PolynomialFeatures(
117
+ degree = 2,
118
+ interaction_only = True,
119
+ include_bias = False
120
+ ).set_output(transform="pandas")
121
+ poly_cols = [
122
+ 'month_sin',
123
+ 'month_cos',
124
+ 'week_sin',
125
+ 'week_cos',
126
+ 'day_of_week_sin',
127
+ 'day_of_week_cos',
128
+ 'hour_sin',
129
+ 'hour_cos',
130
+ 'sunrise_hour_sin',
131
+ 'sunrise_hour_cos',
132
+ 'sunset_hour_sin',
133
+ 'sunset_hour_cos',
134
+ 'daylight_hours',
135
+ 'is_daylight',
136
+ ]
137
+ variables_poly = transformer_poly.fit_transform(variables_exogenas[poly_cols])
138
+ variables_poly = variables_poly.drop(columns=poly_cols)
139
+ variables_poly.columns = [f"poly_{col}" for col in variables_poly.columns]
140
+ variables_poly.columns = variables_poly.columns.str.replace(" ", "__")
141
+ assert all(variables_exogenas.index == variables_poly.index)
142
+ variables_exogenas = pd.concat([variables_exogenas, variables_poly], axis=1)
143
+
144
+ return variables_exogenas
145
+
146
+
147
+ def select_exog_features(variables_exogenas):
148
+ # Selecci贸n de variables ex贸genas incluidas en el modelo
149
+ exog_features = []
150
+ # Columnas que terminan con _seno o _coseno son seleccionadas
151
+ exog_features.extend(variables_exogenas.filter(regex='_sin$|_cos$').columns.tolist())
152
+
153
+ return exog_features
154
+
155
+ def merge_df(datos,variables_exogenas, exog_features):
156
+ datos = datos.merge(variables_exogenas[exog_features],
157
+ left_index=True,
158
+ right_index=True,
159
+ how='left' # Usar solo las filas que coinciden en ambos DataFrames
160
+ )
161
+
162
+ return datos
163
+
164
+
165
+ def create_exog(datos):
166
+ # Read datasets
167
+
168
+ ################### Train ######################
169
+ # Prepare date columns
170
+ variables_calendario = calendar_features(datos)
171
+
172
+ #solar features
173
+ variables_solares = solar_features(datos)
174
+
175
+ # mergin variables
176
+ variables_exogenas = union_exog_features(variables_calendario, variables_solares)
177
+
178
+ # cyclical features
179
+ variables_exogenas = ciclic_features(variables_exogenas)
180
+
181
+ # polynomial features
182
+ variables_exogenas = pol_features(variables_exogenas)
183
+
184
+ # Select exog features
185
+ exog_features = select_exog_features(variables_exogenas)
186
+
187
+ # Merge datasets
188
+ datos = merge_df(datos,variables_exogenas, exog_features)
189
+
190
+ return datos
191
+
192
+
pipeline.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6fa69d47a46f823b38783c73e2d36215e1884b6b30742c45c9912ed1542a4be
3
+ size 2283
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ astral==3.2
2
+ feature_engine==1.8.2
3
+ gradio==5.6.0
4
+ joblib==1.4.2
5
+ numpy==2.1.3
6
+ pandas==2.2.3
7
+ scikit_learn==1.5.2
8
+ seaborn==0.13.2
9
+ skforecast==0.14.0