import streamlit as st import seaborn as sns sns.set() import pandas as pd import numpy as np import matplotlib.pyplot as plt import joblib import json # Load model with open('random_forest.pkl', 'rb') as file_1: model_random_forest = joblib.load(file_1) with open('list_cat.txt', 'r') as file_3: model_cat_list = json.load(file_3) with open('list_num.txt', 'r') as file_4: model_num_list = json.load(file_4) def categorica_yt(x): if x == 'Entertainment': category_id = 24 elif x == 'Music': category_id = 10 elif x == 'Howto & Style': category_id = 26 elif x == 'Comedy': category_id = 23 elif x == 'People & Blogs': category_id = 22 elif x == 'News & Politics': category_id = 25 elif x == 'Science & Technology': category_id = 28 elif x == 'Film & Animation': category_id = 1 elif x == 'Sports': category_id = 17 elif x == 'Education': category_id = 27 elif x == 'Pets & Animals': category_id = 15 elif x == 'Gaming': category_id = 20 elif x == 'Travel & Events': category_id = 19 elif x == 'Autos & Vehicles': category_id = 2 elif x == 'Nonprofits & Activism': category_id = 29 elif x == 'Shows': category_id = 43 return category_id def convert_to_str(x): for i in model_cat_list : x[i] = x[i].map({True: 'yes', False: 'no'}) # print(i) return x def run(): # Membuat Title st.title('Predicting your views') # membuat form with st.form(key='form_parameters'): likes = st.number_input('Likes', min_value=0, value=25, step=1, help='Likes count') dislikes = st.number_input('Dislikes', min_value=0, value=25, step=1, help='Dislikes count') comment_count = st.number_input('Comment counts', min_value=0, value=25, step=1, help='Comment counts') hour = st.number_input('Hour published', min_value=0, max_value=23, value=8, step=1, help='Video Hour published') category = st.selectbox('Category', ('Entertainment', 'Music', 'Howto & Style', 'Comedy', 'People & Blogs', 'News & Politics', 'Science & Technology', 'Film & Animation', 'Sports', 'Education', 'Pets & Animals', 'Gaming', 'Travel & Events', 'Autos & Vehicles', 'Nonprofits & Activism', 'Shows')) cat_id = categorica_yt(category) comment_disabled = st.selectbox('Comment disabled', ('no','yes')) rating_disabled = st.selectbox('Rating disabled', ('no','yes')) video_error_or_removed = st.selectbox('video error or removed disabled', ('no','yes')) st.markdown('---') submitted = st.form_submit_button('Predict') data_inf = { 'category_id': cat_id, 'likes': likes, 'dislikes': dislikes, 'comment_count': comment_count, 'hour' : hour, 'comments_disabled': comment_disabled, 'ratings_disabled': rating_disabled, 'video_error_or_removed': video_error_or_removed } data_inf = pd.DataFrame([data_inf]) st.dataframe(data_inf) if submitted: # divide features data_inf_1 = data_inf.copy() # convert some column to str so we can put it to pipeline later new_inf = convert_to_str(data_inf_1) y_pred_inf = model_random_forest.predict(new_inf) st.write('## Total views predicted around: ' + str(int(y_pred_inf))) if __name__ == '__main__': run()