import streamlit as st
import seaborn as sns
sns.set()
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
import json

# Load model

with open('random_forest.pkl', 'rb') as file_1:
  model_random_forest = joblib.load(file_1)

with open('list_cat.txt', 'r') as file_3:
  model_cat_list = json.load(file_3)

with open('list_num.txt', 'r') as file_4:
  model_num_list = json.load(file_4)

def categorica_yt(x):
    if x == 'Entertainment':
      category_id = 24
    elif x == 'Music':
      category_id = 10
    elif x == 'Howto & Style':
      category_id = 26
    elif x == 'Comedy':
      category_id = 23
    elif x == 'People & Blogs':
      category_id = 22
    elif x == 'News & Politics':
      category_id = 25
    elif x == 'Science & Technology':
      category_id = 28
    elif x == 'Film & Animation':
      category_id = 1
    elif x == 'Sports':
      category_id = 17
    elif x == 'Education':
      category_id = 27
    elif x == 'Pets & Animals':
      category_id = 15
    elif x == 'Gaming':
      category_id = 20
    elif x == 'Travel & Events':
      category_id = 19
    elif x == 'Autos & Vehicles':
      category_id = 2
    elif x == 'Nonprofits & Activism':
      category_id = 29
    elif x == 'Shows':
      category_id = 43
    return category_id

def convert_to_str(x):
    for i in model_cat_list :
        x[i] = x[i].map({True: 'yes', False: 'no'})
        # print(i)
    return x


def run():
      # Membuat Title
    st.title('Predicting your views')

    # membuat form
    with st.form(key='form_parameters'):
        likes = st.number_input('Likes', min_value=0, value=25, step=1, help='Likes count')
        dislikes = st.number_input('Dislikes', min_value=0, value=25, step=1, help='Dislikes count')
        

        comment_count = st.number_input('Comment counts', min_value=0, value=25, step=1, help='Comment counts')
        hour = st.number_input('Hour published', min_value=0, max_value=23, value=8, step=1, help='Video Hour published')

        category = st.selectbox('Category', ('Entertainment', 'Music', 'Howto & Style', 'Comedy', 'People & Blogs', 'News & Politics', 'Science & Technology', 'Film & Animation', 'Sports', 'Education', 'Pets & Animals', 'Gaming', 'Travel & Events', 'Autos & Vehicles', 'Nonprofits & Activism', 'Shows'))
        cat_id = categorica_yt(category)

        comment_disabled = st.selectbox('Comment disabled', ('no','yes'))
        rating_disabled = st.selectbox('Rating disabled', ('no','yes'))
        video_error_or_removed = st.selectbox('video error or removed disabled', ('no','yes'))


        st.markdown('---')

        submitted = st.form_submit_button('Predict')

    data_inf = {
        'category_id': cat_id,
        'likes': likes,
        'dislikes': dislikes,
        'comment_count': comment_count,
        'hour' : hour,
        'comments_disabled': comment_disabled,
        'ratings_disabled': rating_disabled,
        'video_error_or_removed': video_error_or_removed
    }

    data_inf = pd.DataFrame([data_inf])
    st.dataframe(data_inf)

    if submitted:
        # divide features
        data_inf_1 = data_inf.copy()

        # convert some column to str so we can put it to pipeline later
        new_inf = convert_to_str(data_inf_1)
        
        y_pred_inf = model_random_forest.predict(new_inf)

        st.write('## Total views predicted around: ' + str(int(y_pred_inf)))

if __name__ == '__main__':
    run()