File size: 6,147 Bytes
4714bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba970d3
4714bf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba970d3
4714bf7
 
 
 
 
 
 
 
 
507c578
25a2395
6cea37c
4b4b385
 
6cea37c
 
493b13e
4714bf7
 
6cea37c
 
8934416
6cea37c
 
4714bf7
 
 
8934416
4714bf7
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# Loading key libraries
import streamlit as st
import os
import pickle
import numpy as np
import pandas as pd
import re
from pathlib import Path
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns



# Setting the page configurations
st.set_page_config(page_title= "Prediction Forecasting", layout= "wide", initial_sidebar_state= "auto")

# Setting the page title
st.title("Grocery Store Forecasting Prediction")

# Load the saved data
df = pd.read_csv('Grocery.csv')


toolkit = "toolkit_folder"
@st.cache_resource
def load_toolkit(filepath = toolkit):
    with open(toolkit, "rb") as file:
        loaded_toolkit = pickle.load(file)
    return loaded_toolkit


toolkit = load_toolkit()
Encoder = toolkit["OneHotEncoder"]
model = toolkit["model"]



# main sections of the app
menu = st.sidebar.radio('menu',['Home view','Prediction target'])

if menu == 'Home view':
      st.write('Grocery Store Time Series Forecasting')
      st.image('images1.jpg',width = 450)
      st.write('Graphical representation and Data Overview')
      if st.checkbox('Data Set '):
            st.table(df.head(15))
st.title('Charts')
graph = st.selectbox('Varieties of graphs',['scatter plot','Bar chat','Histogram'])
if graph == 'scatter plot':
      fig,ax = plt.subplots(figsize=(10,5))
      sns.scatterplot(y = 'target',x = 'onpromotion',data = df.iloc[:1000],palette = 'bright',hue = 'city');
      st.pyplot(fig)

if graph == 'Bar chat':
       fig,ax = plt.subplots(figsize=(10,5))
       t = df.groupby("city")["target"].sum().reset_index().sort_values(by="target",ascending=False).iloc[:10]
       sns.barplot(data=t[:20] , y="target", x="city", palette='Blues_d')
       st.pyplot(fig)

if graph == 'Histogram':
        fig,ax = plt.subplots(figsize=(10,5))
        st.write('Target Categories')
        sns.distplot(df.target.iloc[:20], kde=True)
        st.pyplot(fig)
      




if menu == 'Prediction target':
    st.image('image 2.jpg', width = 460)
    
    st.sidebar.markdown('User Input Details and Information')

    store_id= st.sidebar.selectbox('store_id', options = sorted(list(df['store_id'].unique())))
    category_id= st.sidebar.selectbox('categegory_id',options = sorted(list(df['category_id'].unique())))
    onpromotion= st.sidebar.number_input('onpromotion', min_value= df["onpromotion"].min(), value= df["onpromotion"].min())
    year = st.sidebar.selectbox('year', options = sorted(list(df['year'].unique())))
    month = st.sidebar.selectbox('month', options = sorted(list(df['month'].unique())))
    dayofmonth= st.sidebar.number_input('dayofmonth', min_value= df["dayofmonth"].min(), value= df["dayofmonth"].min())
    dayofweek = st.sidebar.number_input('dayofweek', min_value= df["dayofweek"].min(), value= df["dayofweek"].min())
    dayofyear = st.sidebar.number_input('dayofyear', min_value= df["dayofyear"].min(), value= df["dayofyear"].min())
    weekofyear = st.sidebar.number_input('weekofyear', min_value= df["weekofyear"].min(), value= df["weekofyear"].min())
    quarter  = st.sidebar.number_input('quarter', min_value= df["quarter"].min(), value= df["quarter"].min())
    is_month_start = st.sidebar.number_input('is_month_start', min_value= df["is_month_start"].min(), value= df["is_month_start"].min())
    is_month_end = st.sidebar.number_input('is_month_end', min_value= df["is_month_end"].min(), value= df["is_month_end"].min())
    is_quarter_start = st.sidebar.number_input('is_quarter_start', min_value= df["is_quarter_start"].min(), value= df["is_quarter_start"].min())
    is_quarter_end = st.sidebar.number_input('is_quarter_end', min_value= df["is_quarter_end"].min(), value= df["is_quarter_end"].min())
    is_year_start = st.sidebar.number_input('is_year_start', min_value= df["is_year_start"].min(), value= df["is_year_start"].min())
    is_year_end = st.sidebar.number_input('is_year_end', min_value= df["is_year_end"].min(), value= df["is_year_end"].min())
    year_weekofyear = st.sidebar.number_input('year_weekofyear', min_value= df["year_weekofyear"].min(), value= df["year_weekofyear"].min())
    city =  st.sidebar.selectbox("city:", options= sorted(set(df["city"])))
    type_y =  st.sidebar.number_input('type', min_value= df["type"].min(), value= df["type"].min())
    cluster = st.sidebar.selectbox('cluster', options = sorted(list(df['cluster'].unique())))



    input_df = {
            'store_id':store_id, 
            'category_id':category_id, 
            'onpromotion' :onpromotion, 
            'year' : year, 
            'month' :month, 
            'dayofmonth' :dayofmonth,
            'dayofweek' : dayofweek,
            'dayofyear' : dayofyear, 
            'weekofyear' : weekofyear, 
            'quarter' : quarter, 
            'is_month_start' : is_month_start,
            'is_month_end' : is_month_start, 
            'is_quarter_start' : is_quarter_start, 
            'is_quarter_end' : is_quarter_end, 
            'is_year_start' : is_year_start,
            'is_year_end' : is_year_end, 
            'year_weekofyear' : year_weekofyear,
            'city' : city, 
            'type' : type_y, 
            'cluster': cluster
} 

 # Put the input dictionary in a dataset
    input_data = pd.DataFrame(input_df, index = [0])



# defining categories and numeric columns

    col = ['city']
    #columns = list(input_data.columns) 
    input_encoded_df = pd.DataFrame(Encoder.transform(input_data).toarray(),
                                    columns=Encoder.get_feature_names_out(col))

    #encoded_cat = Encoder.transform(input_data[col])

    # we dropped the categorical encoder column before we concat 
    train_enc = input_data.drop(['city'],axis = 1)
    #input_d = pd.concat([train_enc, encoded_cat], axis=1)
    input_d = pd.concat([train_enc, input_encoded_df], axis=1)
    prediction = input_d.values



    # convert input_data to a numpy array before flattening to convert it back to a 2D array
    input_df= input_d.to_numpy()
    prediction = model.predict(prediction.flatten().reshape(1, -1))
    

    if st.button('Predict'):
               st.success('The predicted target is ' + str(round(prediction[0],2)))