Yt-view-pred-sam / prediction.py
kaz9112's picture
first upload
8ce5240
import streamlit as st
import seaborn as sns
sns.set()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import json
# Load model
with open('random_forest.pkl', 'rb') as file_1:
model_random_forest = joblib.load(file_1)
with open('list_cat.txt', 'r') as file_3:
model_cat_list = json.load(file_3)
with open('list_num.txt', 'r') as file_4:
model_num_list = json.load(file_4)
def categorica_yt(x):
if x == 'Entertainment':
category_id = 24
elif x == 'Music':
category_id = 10
elif x == 'Howto & Style':
category_id = 26
elif x == 'Comedy':
category_id = 23
elif x == 'People & Blogs':
category_id = 22
elif x == 'News & Politics':
category_id = 25
elif x == 'Science & Technology':
category_id = 28
elif x == 'Film & Animation':
category_id = 1
elif x == 'Sports':
category_id = 17
elif x == 'Education':
category_id = 27
elif x == 'Pets & Animals':
category_id = 15
elif x == 'Gaming':
category_id = 20
elif x == 'Travel & Events':
category_id = 19
elif x == 'Autos & Vehicles':
category_id = 2
elif x == 'Nonprofits & Activism':
category_id = 29
elif x == 'Shows':
category_id = 43
return category_id
def convert_to_str(x):
for i in model_cat_list :
x[i] = x[i].map({True: 'yes', False: 'no'})
# print(i)
return x
def run():
# Membuat Title
st.title('Predicting your views')
# membuat form
with st.form(key='form_parameters'):
likes = st.number_input('Likes', min_value=0, value=25, step=1, help='Likes count')
dislikes = st.number_input('Dislikes', min_value=0, value=25, step=1, help='Dislikes count')
comment_count = st.number_input('Comment counts', min_value=0, value=25, step=1, help='Comment counts')
hour = st.number_input('Hour published', min_value=0, max_value=23, value=8, step=1, help='Video Hour published')
category = st.selectbox('Category', ('Entertainment', 'Music', 'Howto & Style', 'Comedy', 'People & Blogs', 'News & Politics', 'Science & Technology', 'Film & Animation', 'Sports', 'Education', 'Pets & Animals', 'Gaming', 'Travel & Events', 'Autos & Vehicles', 'Nonprofits & Activism', 'Shows'))
cat_id = categorica_yt(category)
comment_disabled = st.selectbox('Comment disabled', ('no','yes'))
rating_disabled = st.selectbox('Rating disabled', ('no','yes'))
video_error_or_removed = st.selectbox('video error or removed disabled', ('no','yes'))
st.markdown('---')
submitted = st.form_submit_button('Predict')
data_inf = {
'category_id': cat_id,
'likes': likes,
'dislikes': dislikes,
'comment_count': comment_count,
'hour' : hour,
'comments_disabled': comment_disabled,
'ratings_disabled': rating_disabled,
'video_error_or_removed': video_error_or_removed
}
data_inf = pd.DataFrame([data_inf])
st.dataframe(data_inf)
if submitted:
# divide features
data_inf_1 = data_inf.copy()
# convert some column to str so we can put it to pipeline later
new_inf = convert_to_str(data_inf_1)
y_pred_inf = model_random_forest.predict(new_inf)
st.write('## Total views predicted around: ' + str(int(y_pred_inf)))
if __name__ == '__main__':
run()