myfirspace / model.py
KotVasily's picture
Add application file
d39b93e
import pandas as pd
import tensorflow as tf
import pickle
import plotly.express as px
import os
import numpy as np
from stock_and import GetNewData
class Model:
def __init__(self, stock_name, model_name) -> None:
"""Этот класс делает прогнозирования, выводит графики, рассчитывает важность признаков."""
self.stock_name = stock_name # Название
self.features = ['lag_25', 'lag_34','lag_33','lag_26','lag_32','lag_31','lag_30','lag_29','lag_27','sentiment_neutral', 'lag_28',
'sentiment_positive','sentiment_negative', 'month','day'] # Фичи для модели
self.model_name = model_name # Название модели
# Загрузка моделей
if model_name == 'NN':
self.model = tf.keras.models.load_model(f'models/nn_predict_1day_ver2_{stock_name}.h5', custom_objects={'mae': tf.keras.metrics.MeanAbsoluteError()})
if model_name == 'LinearRegression':
with open(os.path.join('models', f'linear_predict_1day_ver2_{stock_name}.pkl'), 'rb') as f:
self.model = pickle.load(f)
if model_name == 'LGB':
with open(os.path.join('models', f'lgb_predict_1day_ver2_{stock_name}.pkl'), 'rb') as f:
self.model = pickle.load(f)
def generate_dataset(self, stock_name, num_day):
"""Парсим датасет, добавляем новые row для прогонза"""
merged_df, string = GetNewData(stock_name).get_full_data() # Тут мы получаем датасет с новостями и акциями
# Создаем датасет с дополнительными строками
last_date = merged_df['DATE'].max()
new_date_rng = pd.date_range(start=last_date + pd.Timedelta(hours=1), periods=24 * num_day , freq='H')
new_df = pd.DataFrame(new_date_rng, columns=['DATE'])
new_df['month'] = new_df['DATE'].dt.month
new_df['day'] = new_df['DATE'].dt.day
for c in self.features:
last_value = merged_df[c].values[-24 * num_day :]
new_df[c] = last_value
# Объединяем датасет
merged_df_new = pd.concat([merged_df, new_df[self.features+['DATE']]], ignore_index=True)
return merged_df_new, string
def predict(self, num_day):
# Создаем прогнозы, важности признаков
merged_df, string = self.generate_dataset(self.stock_name, num_day) # Парсим датасет
if self.model_name == 'NN':
importance = np.abs(self.model.layers[0].get_weights()[0])
importance = np.mean(importance, axis=1)
df_weighted = merged_df[self.features].ffill().bfill()[-num_day*24:] * importance
if self.model_name == 'LinearRegression':
df_weighted = merged_df[self.features].ffill().bfill()[-num_day*24:] * self.model.coef_
if self.model_name == 'LGB':
df_weighted = merged_df[self.features].ffill().bfill()[-num_day*24:] * self.model.feature_importances_
average_values = df_weighted.mean(axis=0).abs().sort_values(ascending=False)
average_values_filtered = average_values.drop('lag_25')
total_sum = average_values_filtered.sum()
average_values_percentage = (average_values_filtered / total_sum) * 100
string += '\n Самые полезные признаки для прогнозов: \n'
for f, v in zip(average_values_percentage.index, average_values_percentage.values):
string += f'- {f}: важность = {v:.2f}%\n'
if self.model_name in ['LinearRegression', 'LGB']:
return pd.DataFrame({
'predict': self.model.predict(merged_df[self.features].ffill().bfill().values),
'DATE': merged_df['DATE'].values,
'CLOSE': merged_df['CLOSE'].values
}), string
else:
return pd.DataFrame({
'predict': self.model.predict(merged_df[self.features].ffill().bfill().values)[:, 0],
'DATE': merged_df['DATE'].values,
'CLOSE': merged_df['CLOSE'].values
}), string
def plot_predict(self, predict, add_smoothing):
predict = predict[-24*12:]
scaling_factor = predict['CLOSE'].mean() / predict['predict'].mean()
scaled_preds = predict['predict'] * scaling_factor
fig = px.line(predict, x=predict.DATE, y='CLOSE', labels={'value': 'Цена'}, title='CLOSE')
fig.add_scatter(x=predict.DATE, y=scaled_preds, mode='lines', name='Predict', opacity=0.7)
if add_smoothing:
smoothed_preds = pd.Series(scaled_preds).ewm(3).mean()
fig.add_scatter(x=predict.DATE, y=smoothed_preds, mode='lines', name='Сглаженные предсказания', opacity=0.7)
fig.update_layout(xaxis=dict(type='category'))
return fig