Spaces:

kaz9112
/

Yt-view-pred-sam

Runtime error

App Files Files Community

kaz9112 commited on Mar 4, 2023

Commit

8ce5240

1 Parent(s): e64f3a1

first upload

Browse files

Files changed (11) hide show

.gitattributes +1 -0
EDA.py +49 -0
USvideos.csv +3 -0
barchart_hour.jpg +0 -0
convert_str.pkl +3 -0
list_cat.txt +1 -0
list_num.txt +1 -0
main.py +10 -0
prediction.py +114 -0
random_forest.pkl +3 -0
requirements.txt +7 -0

.gitattributes CHANGED Viewed

@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text

EDA.py ADDED Viewed

	@@ -0,0 +1,49 @@

+import streamlit as st
+import seaborn as sns
+sns.set()
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+from PIL import Image
+image = Image.open('barchart_hour.jpg')
+def run():
+    # Membuat Title
+    st.title('Exploratory Data Analysis')
+    # Membuat Sub Header
+    st.subheader('EDA For American Youtube trending videos')
+    # Membuat Deskripsi
+    st.write('This page made by Sam')
+    # Membuat Garis lurus
+    st.markdown('---')
+    # Magic Syntax
+    '''
+    Ma ma mia ma ma mia
+    '''
+    # Show DataFrame
+    raw_data = pd.read_csv('USvideos.csv')
+    st.dataframe(raw_data.sample(100))
+    st.write('### scatterplot views(in hundred millions) vs likes')
+    fig = plt.figure(figsize=(15, 10))
+    sns.scatterplot(x=raw_data['views'],y=raw_data['likes'])
+    st.pyplot(fig)
+    st.write('### scatterplot views(in hundred millions) vs comment_counts')
+    fig = plt.figure(figsize=(15, 10))
+    sns.scatterplot(x=raw_data['views'],y=raw_data['comment_count'], palette='hls')
+    st.pyplot(fig)
+    st.write('### Views(in millions) by video published hour')
+    st.image(image, caption=None, width=None, use_column_width=None, clamp=False, channels="RGB", output_format="auto")
+if __name__ == '__main__':
+    run()

USvideos.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:09b4eb71295752705e472ebefeac9d2afab4177b7a818af795dea62744a48eb2
+size 62756152

barchart_hour.jpg ADDED Viewed

convert_str.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6ce72b3cc0b5e955db3448b168c04caa4452c6b929fb16e1ac5d11d5076da6b3
+size 42

list_cat.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["comments_disabled", "ratings_disabled", "video_error_or_removed"]

list_num.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["category_id", "likes", "dislikes", "comment_count", "hour"]

main.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import streamlit as st
+import EDA
+import prediction
+navigation = st.sidebar.selectbox('Choose page: ', ('Exploratory Data Analysis', 'Predict a Views'))
+if navigation == 'Predict a Views':
+    prediction.run()
+else:
+    EDA.run()

prediction.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import streamlit as st
+import seaborn as sns
+sns.set()
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import joblib
+import json
+# Load model
+with open('random_forest.pkl', 'rb') as file_1:
+  model_random_forest = joblib.load(file_1)
+with open('list_cat.txt', 'r') as file_3:
+  model_cat_list = json.load(file_3)
+with open('list_num.txt', 'r') as file_4:
+  model_num_list = json.load(file_4)
+def categorica_yt(x):
+    if x == 'Entertainment':
+      category_id = 24
+    elif x == 'Music':
+      category_id = 10
+    elif x == 'Howto & Style':
+      category_id = 26
+    elif x == 'Comedy':
+      category_id = 23
+    elif x == 'People & Blogs':
+      category_id = 22
+    elif x == 'News & Politics':
+      category_id = 25
+    elif x == 'Science & Technology':
+      category_id = 28
+    elif x == 'Film & Animation':
+      category_id = 1
+    elif x == 'Sports':
+      category_id = 17
+    elif x == 'Education':
+      category_id = 27
+    elif x == 'Pets & Animals':
+      category_id = 15
+    elif x == 'Gaming':
+      category_id = 20
+    elif x == 'Travel & Events':
+      category_id = 19
+    elif x == 'Autos & Vehicles':
+      category_id = 2
+    elif x == 'Nonprofits & Activism':
+      category_id = 29
+    elif x == 'Shows':
+      category_id = 43
+    return category_id
+def convert_to_str(x):
+    for i in model_cat_list :
+        x[i] = x[i].map({True: 'yes', False: 'no'})
+        # print(i)
+    return x
+def run():
+      # Membuat Title
+    st.title('Predicting your views')
+    # membuat form
+    with st.form(key='form_parameters'):
+        likes = st.number_input('Likes', min_value=0, value=25, step=1, help='Likes count')
+        dislikes = st.number_input('Dislikes', min_value=0, value=25, step=1, help='Dislikes count')
+        comment_count = st.number_input('Comment counts', min_value=0, value=25, step=1, help='Comment counts')
+        hour = st.number_input('Hour published', min_value=0, max_value=23, value=8, step=1, help='Video Hour published')
+        category = st.selectbox('Category', ('Entertainment', 'Music', 'Howto & Style', 'Comedy', 'People & Blogs', 'News & Politics', 'Science & Technology', 'Film & Animation', 'Sports', 'Education', 'Pets & Animals', 'Gaming', 'Travel & Events', 'Autos & Vehicles', 'Nonprofits & Activism', 'Shows'))
+        cat_id = categorica_yt(category)
+        comment_disabled = st.selectbox('Comment disabled', ('no','yes'))
+        rating_disabled = st.selectbox('Rating disabled', ('no','yes'))
+        video_error_or_removed = st.selectbox('video error or removed disabled', ('no','yes'))
+        st.markdown('---')
+        submitted = st.form_submit_button('Predict')
+    data_inf = {
+        'category_id': cat_id,
+        'likes': likes,
+        'dislikes': dislikes,
+        'comment_count': comment_count,
+        'hour' : hour,
+        'comments_disabled': comment_disabled,
+        'ratings_disabled': rating_disabled,
+        'video_error_or_removed': video_error_or_removed
+    }
+    data_inf = pd.DataFrame([data_inf])
+    st.dataframe(data_inf)
+    if submitted:
+        # divide features
+        data_inf_1 = data_inf.copy()
+        # convert some column to str so we can put it to pipeline later
+        new_inf = convert_to_str(data_inf_1)
+        y_pred_inf = model_random_forest.predict(new_inf)
+        st.write('## Total views predicted around: ' + str(int(y_pred_inf)))
+if __name__ == '__main__':
+    run()

random_forest.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4703aec6fb84d3ec2b24dbb76720eb15f1b02abcbe950ed768a00912b5c9f92
+size 113252720

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+seaborn
+pandas
+numpy
+matplotlib
+joblib
+scikit-learn==1.0.2
+Pillow==9.2.0