Upload 4 files
Browse files- app.py +112 -0
- exog_creation.py +192 -0
- pipeline.pkl +3 -0
- requirements.txt +9 -0
app.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import pandas as pd
|
3 |
+
import pickle
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.preprocessing import MinMaxScaler
|
6 |
+
import joblib
|
7 |
+
from io import StringIO
|
8 |
+
import gradio as gr
|
9 |
+
import os
|
10 |
+
import sys
|
11 |
+
from json import load
|
12 |
+
from skforecast.utils import load_forecaster
|
13 |
+
from skforecast.preprocessing import RollingFeatures
|
14 |
+
from sklearn.preprocessing import MinMaxScaler
|
15 |
+
from sklearn.preprocessing import FunctionTransformer
|
16 |
+
from sklearn.pipeline import Pipeline
|
17 |
+
from exog_creation import create_exog
|
18 |
+
import contextlib
|
19 |
+
import warnings
|
20 |
+
# Funci贸n para cargar el archivo CSV y mostrar las primeras 5 filas
|
21 |
+
def load_csv(input_file):
|
22 |
+
try:
|
23 |
+
# Leer el archivo CSV
|
24 |
+
df = pd.read_csv(input_file)
|
25 |
+
|
26 |
+
# Verificar si el DataFrame est谩 vac铆o
|
27 |
+
if df.empty:
|
28 |
+
return "El archivo subido est谩 vac铆o o no tiene datos v谩lidos."
|
29 |
+
|
30 |
+
# Retornar las primeras 5 filas como tabla HTML
|
31 |
+
# return df.head().to_html()
|
32 |
+
return df
|
33 |
+
except Exception as e:
|
34 |
+
return f"Error al procesar el archivo: {e}"
|
35 |
+
|
36 |
+
def set_datetime_index(df):
|
37 |
+
df['datetime'] = pd.to_datetime(df['datetime'])
|
38 |
+
df = df.set_index('datetime')
|
39 |
+
df = df.asfreq('h')
|
40 |
+
return df
|
41 |
+
|
42 |
+
def load_model(name):
|
43 |
+
current_dir = os.getcwd()
|
44 |
+
ROOT_PATH = os.path.dirname(current_dir)
|
45 |
+
sys.path.insert(1, ROOT_PATH)
|
46 |
+
import root
|
47 |
+
model = load_forecaster(root.DIR_DATA_ANALYTICS + name,
|
48 |
+
verbose=True)
|
49 |
+
return model
|
50 |
+
|
51 |
+
def load_pipeline(name):
|
52 |
+
with open('pipeline.pkl', 'rb') as file:
|
53 |
+
pipeline = pickle.load(file)
|
54 |
+
return pipeline
|
55 |
+
|
56 |
+
|
57 |
+
def flujo(input_file):
|
58 |
+
|
59 |
+
warnings.filterwarnings("ignore")
|
60 |
+
|
61 |
+
datos = load_csv("archivo.csv")
|
62 |
+
|
63 |
+
datos = set_datetime_index(datos)
|
64 |
+
|
65 |
+
datos_exog = create_exog(datos)
|
66 |
+
|
67 |
+
# Redirigir stdout a os.devnull para suprimir cualquier impresi贸n
|
68 |
+
sys.stdout = open(os.devnull, 'w')
|
69 |
+
|
70 |
+
# Cargar el modelo
|
71 |
+
forecaster = load_model('tree_model.joblib')
|
72 |
+
|
73 |
+
# Restaurar stdout a la consola
|
74 |
+
sys.stdout = sys.__stdout__
|
75 |
+
|
76 |
+
exog_selectec = ['temperature', 'rain', 'surface_pressure', 'cloudcover_total', 'windspeed_10m', 'winddirection_10m', 'shortwave_radiation', 'euros_per_mwh', 'installed_capacity', 'hour_sin', 'poly_month_sin__week_sin', 'poly_month_sin__week_cos', 'poly_month_sin__day_of_week_sin', 'poly_month_sin__day_of_week_cos', 'poly_month_sin__hour_sin', 'poly_month_sin__hour_cos', 'poly_month_sin__sunrise_hour_cos', 'poly_month_cos__week_sin', 'poly_month_cos__day_of_week_sin', 'poly_month_cos__day_of_week_cos', 'poly_month_cos__hour_sin', 'poly_month_cos__hour_cos', 'poly_month_cos__sunset_hour_sin', 'poly_week_sin__week_cos', 'poly_week_sin__day_of_week_sin', 'poly_week_sin__day_of_week_cos', 'poly_week_sin__hour_sin', 'poly_week_sin__hour_cos', 'poly_week_sin__sunrise_hour_cos', 'poly_week_sin__sunset_hour_cos', 'poly_week_cos__day_of_week_sin', 'poly_week_cos__day_of_week_cos', 'poly_week_cos__hour_sin', 'poly_week_cos__hour_cos', 'poly_week_cos__sunrise_hour_sin', 'poly_week_cos__sunrise_hour_cos', 'poly_week_cos__sunset_hour_sin', 'poly_day_of_week_sin__day_of_week_cos', 'poly_day_of_week_sin__hour_sin', 'poly_day_of_week_sin__hour_cos', 'poly_day_of_week_sin__sunrise_hour_sin', 'poly_day_of_week_sin__sunrise_hour_cos', 'poly_day_of_week_sin__sunset_hour_sin', 'poly_day_of_week_sin__sunset_hour_cos', 'poly_day_of_week_cos__hour_sin', 'poly_day_of_week_cos__hour_cos', 'poly_day_of_week_cos__sunrise_hour_sin', 'poly_day_of_week_cos__sunrise_hour_cos', 'poly_day_of_week_cos__sunset_hour_sin', 'poly_day_of_week_cos__sunset_hour_cos', 'poly_hour_sin__hour_cos', 'poly_hour_sin__sunrise_hour_sin', 'poly_hour_sin__sunrise_hour_cos', 'poly_hour_sin__sunset_hour_sin', 'poly_hour_sin__sunset_hour_cos', 'poly_hour_cos__sunrise_hour_sin', 'poly_hour_cos__sunrise_hour_cos', 'poly_hour_cos__sunset_hour_sin', 'poly_hour_cos__sunset_hour_cos']
|
77 |
+
|
78 |
+
predictions = forecaster.predict(steps=24, exog = datos_exog[exog_selectec])
|
79 |
+
|
80 |
+
datos['target'] = predictions
|
81 |
+
|
82 |
+
target_column = 'target'
|
83 |
+
|
84 |
+
columns_order = [target_column] + [col for col in datos.columns if col != target_column]
|
85 |
+
|
86 |
+
datos = datos[columns_order]
|
87 |
+
|
88 |
+
pipeline = load_pipeline('pipeline.pkl')
|
89 |
+
|
90 |
+
pred_scaled = pipeline.inverse_transform(datos)
|
91 |
+
|
92 |
+
pred_scaled_df = pd.DataFrame(pred_scaled, columns=datos.columns, index=datos.index)
|
93 |
+
|
94 |
+
df_reset = pred_scaled_df.reset_index()
|
95 |
+
|
96 |
+
df_target = df_reset[['datetime', 'target']]
|
97 |
+
|
98 |
+
|
99 |
+
return df_target.to_html()
|
100 |
+
|
101 |
+
# Crear la interfaz con Gradio
|
102 |
+
interface = gr.Interface(
|
103 |
+
fn=flujo, # Funci贸n principal
|
104 |
+
inputs=gr.File(label="Sube tu archivo CSV"), # Entrada de archivo
|
105 |
+
outputs="html", # Salida como tabla HTML
|
106 |
+
title="Prediccion geenracion de energia",
|
107 |
+
description="Sube un archivo CSV y perdice la geenracion de energia."
|
108 |
+
)
|
109 |
+
|
110 |
+
interface.launch(share = True)
|
111 |
+
|
112 |
+
|
exog_creation.py
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Tratamiento de datos
|
2 |
+
# ==============================================================================
|
3 |
+
import re
|
4 |
+
import numpy as np
|
5 |
+
import pandas as pd
|
6 |
+
from astral.sun import sun
|
7 |
+
from astral import LocationInfo
|
8 |
+
from skforecast.datasets import fetch_dataset
|
9 |
+
from feature_engine.datetime import DatetimeFeatures
|
10 |
+
from feature_engine.creation import CyclicalFeatures
|
11 |
+
from feature_engine.timeseries.forecasting import WindowFeatures
|
12 |
+
from sklearn.preprocessing import PolynomialFeatures
|
13 |
+
import sys
|
14 |
+
import os
|
15 |
+
##########################################################################################
|
16 |
+
|
17 |
+
|
18 |
+
# current_dir = os.getcwd()
|
19 |
+
# ROOT_PATH = os.path.dirname(current_dir)
|
20 |
+
# sys.path.insert(1, ROOT_PATH)
|
21 |
+
# import root
|
22 |
+
# datos = pd.read_pickle(root.DIR_DATA_STAGE + 'train.pkl')
|
23 |
+
|
24 |
+
# Variables basadas en el calendario
|
25 |
+
def calendar_features(datos):
|
26 |
+
features_to_extract = [
|
27 |
+
'month',
|
28 |
+
'week',
|
29 |
+
'day_of_week',
|
30 |
+
'hour'
|
31 |
+
]
|
32 |
+
calendar_transformer = DatetimeFeatures(
|
33 |
+
variables='index',
|
34 |
+
features_to_extract=features_to_extract,
|
35 |
+
drop_original=True,
|
36 |
+
)
|
37 |
+
variables_calendario = calendar_transformer.fit_transform(datos)[features_to_extract]
|
38 |
+
|
39 |
+
return variables_calendario
|
40 |
+
|
41 |
+
# Variables basadas en la luz solar
|
42 |
+
def solar_features(datos):
|
43 |
+
location = LocationInfo(
|
44 |
+
name = 'Taillin',
|
45 |
+
region = 'Estonia',
|
46 |
+
timezone = 'Europe/Riga',
|
47 |
+
latitude = 56.946285,
|
48 |
+
longitude = 24.105078
|
49 |
+
)
|
50 |
+
sunrise_hour = [
|
51 |
+
sun(location.observer, date=date, tzinfo=location.timezone)['sunrise']
|
52 |
+
for date in datos.index
|
53 |
+
]
|
54 |
+
sunset_hour = [
|
55 |
+
sun(location.observer, date=date, tzinfo=location.timezone)['sunset']
|
56 |
+
for date in datos.index
|
57 |
+
]
|
58 |
+
sunrise_hour = pd.Series(sunrise_hour, index=datos.index).dt.round("h").dt.hour
|
59 |
+
sunset_hour = pd.Series(sunset_hour, index=datos.index).dt.round("h").dt.hour
|
60 |
+
variables_solares = pd.DataFrame({
|
61 |
+
'sunrise_hour': sunrise_hour,
|
62 |
+
'sunset_hour': sunset_hour
|
63 |
+
})
|
64 |
+
variables_solares['daylight_hours'] = (
|
65 |
+
variables_solares['sunset_hour'] - variables_solares['sunrise_hour']
|
66 |
+
)
|
67 |
+
variables_solares["is_daylight"] = np.where(
|
68 |
+
(datos.index.hour >= variables_solares["sunrise_hour"])
|
69 |
+
& (datos.index.hour < variables_solares["sunset_hour"]),
|
70 |
+
1,
|
71 |
+
0,
|
72 |
+
)
|
73 |
+
|
74 |
+
return variables_solares
|
75 |
+
|
76 |
+
# Uni贸n de variables ex贸genas
|
77 |
+
|
78 |
+
def union_exog_features(variables_calendario, variables_solares):
|
79 |
+
assert all(variables_calendario.index == variables_solares.index)
|
80 |
+
variables_exogenas = pd.concat([
|
81 |
+
variables_calendario,
|
82 |
+
variables_solares
|
83 |
+
], axis=1)
|
84 |
+
|
85 |
+
return variables_exogenas
|
86 |
+
|
87 |
+
def ciclic_features(variables_exogenas):
|
88 |
+
features_to_encode = [
|
89 |
+
"month",
|
90 |
+
"week",
|
91 |
+
"day_of_week",
|
92 |
+
"hour",
|
93 |
+
"sunrise_hour",
|
94 |
+
"sunset_hour",
|
95 |
+
]
|
96 |
+
max_values = {
|
97 |
+
"month": 12,
|
98 |
+
"week": 52,
|
99 |
+
"day_of_week": 6,
|
100 |
+
"hour": 23,
|
101 |
+
"sunrise_hour": 23,
|
102 |
+
"sunset_hour": 23,
|
103 |
+
}
|
104 |
+
cyclical_encoder = CyclicalFeatures(
|
105 |
+
variables = features_to_encode,
|
106 |
+
max_values = max_values,
|
107 |
+
drop_original = False
|
108 |
+
)
|
109 |
+
|
110 |
+
variables_exogenas = cyclical_encoder.fit_transform(variables_exogenas)
|
111 |
+
|
112 |
+
return variables_exogenas
|
113 |
+
|
114 |
+
def pol_features(variables_exogenas):
|
115 |
+
# Interacci贸n entre variables ex贸genas
|
116 |
+
transformer_poly = PolynomialFeatures(
|
117 |
+
degree = 2,
|
118 |
+
interaction_only = True,
|
119 |
+
include_bias = False
|
120 |
+
).set_output(transform="pandas")
|
121 |
+
poly_cols = [
|
122 |
+
'month_sin',
|
123 |
+
'month_cos',
|
124 |
+
'week_sin',
|
125 |
+
'week_cos',
|
126 |
+
'day_of_week_sin',
|
127 |
+
'day_of_week_cos',
|
128 |
+
'hour_sin',
|
129 |
+
'hour_cos',
|
130 |
+
'sunrise_hour_sin',
|
131 |
+
'sunrise_hour_cos',
|
132 |
+
'sunset_hour_sin',
|
133 |
+
'sunset_hour_cos',
|
134 |
+
'daylight_hours',
|
135 |
+
'is_daylight',
|
136 |
+
]
|
137 |
+
variables_poly = transformer_poly.fit_transform(variables_exogenas[poly_cols])
|
138 |
+
variables_poly = variables_poly.drop(columns=poly_cols)
|
139 |
+
variables_poly.columns = [f"poly_{col}" for col in variables_poly.columns]
|
140 |
+
variables_poly.columns = variables_poly.columns.str.replace(" ", "__")
|
141 |
+
assert all(variables_exogenas.index == variables_poly.index)
|
142 |
+
variables_exogenas = pd.concat([variables_exogenas, variables_poly], axis=1)
|
143 |
+
|
144 |
+
return variables_exogenas
|
145 |
+
|
146 |
+
|
147 |
+
def select_exog_features(variables_exogenas):
|
148 |
+
# Selecci贸n de variables ex贸genas incluidas en el modelo
|
149 |
+
exog_features = []
|
150 |
+
# Columnas que terminan con _seno o _coseno son seleccionadas
|
151 |
+
exog_features.extend(variables_exogenas.filter(regex='_sin$|_cos$').columns.tolist())
|
152 |
+
|
153 |
+
return exog_features
|
154 |
+
|
155 |
+
def merge_df(datos,variables_exogenas, exog_features):
|
156 |
+
datos = datos.merge(variables_exogenas[exog_features],
|
157 |
+
left_index=True,
|
158 |
+
right_index=True,
|
159 |
+
how='left' # Usar solo las filas que coinciden en ambos DataFrames
|
160 |
+
)
|
161 |
+
|
162 |
+
return datos
|
163 |
+
|
164 |
+
|
165 |
+
def create_exog(datos):
|
166 |
+
# Read datasets
|
167 |
+
|
168 |
+
################### Train ######################
|
169 |
+
# Prepare date columns
|
170 |
+
variables_calendario = calendar_features(datos)
|
171 |
+
|
172 |
+
#solar features
|
173 |
+
variables_solares = solar_features(datos)
|
174 |
+
|
175 |
+
# mergin variables
|
176 |
+
variables_exogenas = union_exog_features(variables_calendario, variables_solares)
|
177 |
+
|
178 |
+
# cyclical features
|
179 |
+
variables_exogenas = ciclic_features(variables_exogenas)
|
180 |
+
|
181 |
+
# polynomial features
|
182 |
+
variables_exogenas = pol_features(variables_exogenas)
|
183 |
+
|
184 |
+
# Select exog features
|
185 |
+
exog_features = select_exog_features(variables_exogenas)
|
186 |
+
|
187 |
+
# Merge datasets
|
188 |
+
datos = merge_df(datos,variables_exogenas, exog_features)
|
189 |
+
|
190 |
+
return datos
|
191 |
+
|
192 |
+
|
pipeline.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6fa69d47a46f823b38783c73e2d36215e1884b6b30742c45c9912ed1542a4be
|
3 |
+
size 2283
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
astral==3.2
|
2 |
+
feature_engine==1.8.2
|
3 |
+
gradio==5.6.0
|
4 |
+
joblib==1.4.2
|
5 |
+
numpy==2.1.3
|
6 |
+
pandas==2.2.3
|
7 |
+
scikit_learn==1.5.2
|
8 |
+
seaborn==0.13.2
|
9 |
+
skforecast==0.14.0
|