Spaces:
Build error
Build error
# -*- coding: utf-8 -*- | |
"""dynamic pricing.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1pMuvzwELNm1DsTdL5dfBdA2HCjB6uwgh | |
""" | |
# Commented out IPython magic to ensure Python compatibility. | |
import datetime | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# %matplotlib inline | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LinearRegression | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.metrics import r2_score | |
dataset = pd.read_csv("Pop_Data.csv") | |
dataset.head(5) | |
X_train, X_test, y_train, y_test = train_test_split(dataset.iloc[:, :-1], | |
dataset.iloc[:, -1], | |
test_size = 0.3, | |
random_state = 42) | |
X_train.info() | |
"""# EDA""" | |
X_train = X_train.iloc[:, 3:] | |
X_test = X_test.iloc[:, 3:] | |
X_train.info | |
plt.figure(figsize = (12, 8)) | |
plot = sns.countplot(x = 'day_of_week', data = X_train) | |
plt.xticks(rotation = 90) | |
for p in plot.patches: | |
plot.annotate(p.get_height(), | |
(p.get_x() + p.get_width() / 2.0, | |
p.get_height()), | |
ha = 'center', | |
va = 'center', | |
xytext = (0, 5), | |
textcoords = 'offset points') | |
plt.title("Price changes based on day") | |
plt.xlabel("Day") | |
plt.ylabel("Price") | |
print(sum(X_train["day_of_week"].isnull())) | |
print(sum(X_test["day_of_week"].isnull())) | |
print(sum(X_train["hour_of_day"].isnull())) | |
print(sum(X_test["hour_of_day"].isnull())) | |
print(sum(X_train["popularity_percent_normal"].isnull())) | |
print(sum(X_test["popularity_percent_normal"].isnull())) | |
X_train["popularity_percent_normal"].fillna(X_train["popularity_percent_normal"].astype("float64").mean(), inplace = True) | |
X_train = pd.get_dummies(X_train, | |
columns = ["day_of_week"], | |
drop_first = True) | |
X_test = pd.get_dummies(X_test, | |
columns = ["day_of_week"], | |
drop_first = True) | |
missing_cols = set(X_train.columns) - set(X_test.columns) | |
for col in missing_cols: | |
X_test[col] = 0 | |
X_test = X_test[X_train.columns] | |
standardScaler = StandardScaler() | |
standardScaler.fit(X_train) | |
X_train = standardScaler.transform(X_train) | |
X_test = standardScaler.transform(X_test) | |
linearRegression = LinearRegression() | |
linearRegression.fit(X_train, y_train) | |
y_pred = linearRegression.predict(X_test) | |
r2_score(y_test, y_pred) | |
rf = RandomForestRegressor(n_estimators = 100) | |
rf.fit(X_train, y_train) | |
y_pred = rf.predict(X_test) | |
r2_score(y_test, y_pred) | |
def predict(x_test): | |
return rf.predict(x_test) | |