kaz9112 commited on
Commit
8ce5240
1 Parent(s): e64f3a1

first upload

Browse files
Files changed (11) hide show
  1. .gitattributes +1 -0
  2. EDA.py +49 -0
  3. USvideos.csv +3 -0
  4. barchart_hour.jpg +0 -0
  5. convert_str.pkl +3 -0
  6. list_cat.txt +1 -0
  7. list_num.txt +1 -0
  8. main.py +10 -0
  9. prediction.py +114 -0
  10. random_forest.pkl +3 -0
  11. requirements.txt +7 -0
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ *.csv filter=lfs diff=lfs merge=lfs -text
EDA.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import seaborn as sns
3
+ sns.set()
4
+ import pandas as pd
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ from PIL import Image
8
+ image = Image.open('barchart_hour.jpg')
9
+
10
+
11
+
12
+ def run():
13
+
14
+ # Membuat Title
15
+ st.title('Exploratory Data Analysis')
16
+
17
+ # Membuat Sub Header
18
+ st.subheader('EDA For American Youtube trending videos')
19
+
20
+ # Membuat Deskripsi
21
+ st.write('This page made by Sam')
22
+
23
+ # Membuat Garis lurus
24
+ st.markdown('---')
25
+
26
+ # Magic Syntax
27
+ '''
28
+ Ma ma mia ma ma mia
29
+ '''
30
+
31
+ # Show DataFrame
32
+ raw_data = pd.read_csv('USvideos.csv')
33
+ st.dataframe(raw_data.sample(100))
34
+
35
+ st.write('### scatterplot views(in hundred millions) vs likes')
36
+ fig = plt.figure(figsize=(15, 10))
37
+ sns.scatterplot(x=raw_data['views'],y=raw_data['likes'])
38
+ st.pyplot(fig)
39
+
40
+ st.write('### scatterplot views(in hundred millions) vs comment_counts')
41
+ fig = plt.figure(figsize=(15, 10))
42
+ sns.scatterplot(x=raw_data['views'],y=raw_data['comment_count'], palette='hls')
43
+ st.pyplot(fig)
44
+
45
+ st.write('### Views(in millions) by video published hour')
46
+ st.image(image, caption=None, width=None, use_column_width=None, clamp=False, channels="RGB", output_format="auto")
47
+
48
+ if __name__ == '__main__':
49
+ run()
USvideos.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09b4eb71295752705e472ebefeac9d2afab4177b7a818af795dea62744a48eb2
3
+ size 62756152
barchart_hour.jpg ADDED
convert_str.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce72b3cc0b5e955db3448b168c04caa4452c6b929fb16e1ac5d11d5076da6b3
3
+ size 42
list_cat.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["comments_disabled", "ratings_disabled", "video_error_or_removed"]
list_num.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ ["category_id", "likes", "dislikes", "comment_count", "hour"]
main.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import EDA
3
+ import prediction
4
+
5
+ navigation = st.sidebar.selectbox('Choose page: ', ('Exploratory Data Analysis', 'Predict a Views'))
6
+
7
+ if navigation == 'Predict a Views':
8
+ prediction.run()
9
+ else:
10
+ EDA.run()
prediction.py ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import seaborn as sns
3
+ sns.set()
4
+ import pandas as pd
5
+ import numpy as np
6
+ import matplotlib.pyplot as plt
7
+ import joblib
8
+ import json
9
+
10
+ # Load model
11
+
12
+ with open('random_forest.pkl', 'rb') as file_1:
13
+ model_random_forest = joblib.load(file_1)
14
+
15
+ with open('list_cat.txt', 'r') as file_3:
16
+ model_cat_list = json.load(file_3)
17
+
18
+ with open('list_num.txt', 'r') as file_4:
19
+ model_num_list = json.load(file_4)
20
+
21
+ def categorica_yt(x):
22
+ if x == 'Entertainment':
23
+ category_id = 24
24
+ elif x == 'Music':
25
+ category_id = 10
26
+ elif x == 'Howto & Style':
27
+ category_id = 26
28
+ elif x == 'Comedy':
29
+ category_id = 23
30
+ elif x == 'People & Blogs':
31
+ category_id = 22
32
+ elif x == 'News & Politics':
33
+ category_id = 25
34
+ elif x == 'Science & Technology':
35
+ category_id = 28
36
+ elif x == 'Film & Animation':
37
+ category_id = 1
38
+ elif x == 'Sports':
39
+ category_id = 17
40
+ elif x == 'Education':
41
+ category_id = 27
42
+ elif x == 'Pets & Animals':
43
+ category_id = 15
44
+ elif x == 'Gaming':
45
+ category_id = 20
46
+ elif x == 'Travel & Events':
47
+ category_id = 19
48
+ elif x == 'Autos & Vehicles':
49
+ category_id = 2
50
+ elif x == 'Nonprofits & Activism':
51
+ category_id = 29
52
+ elif x == 'Shows':
53
+ category_id = 43
54
+ return category_id
55
+
56
+ def convert_to_str(x):
57
+ for i in model_cat_list :
58
+ x[i] = x[i].map({True: 'yes', False: 'no'})
59
+ # print(i)
60
+ return x
61
+
62
+
63
+ def run():
64
+ # Membuat Title
65
+ st.title('Predicting your views')
66
+
67
+ # membuat form
68
+ with st.form(key='form_parameters'):
69
+ likes = st.number_input('Likes', min_value=0, value=25, step=1, help='Likes count')
70
+ dislikes = st.number_input('Dislikes', min_value=0, value=25, step=1, help='Dislikes count')
71
+
72
+
73
+ comment_count = st.number_input('Comment counts', min_value=0, value=25, step=1, help='Comment counts')
74
+ hour = st.number_input('Hour published', min_value=0, max_value=23, value=8, step=1, help='Video Hour published')
75
+
76
+ category = st.selectbox('Category', ('Entertainment', 'Music', 'Howto & Style', 'Comedy', 'People & Blogs', 'News & Politics', 'Science & Technology', 'Film & Animation', 'Sports', 'Education', 'Pets & Animals', 'Gaming', 'Travel & Events', 'Autos & Vehicles', 'Nonprofits & Activism', 'Shows'))
77
+ cat_id = categorica_yt(category)
78
+
79
+ comment_disabled = st.selectbox('Comment disabled', ('no','yes'))
80
+ rating_disabled = st.selectbox('Rating disabled', ('no','yes'))
81
+ video_error_or_removed = st.selectbox('video error or removed disabled', ('no','yes'))
82
+
83
+
84
+ st.markdown('---')
85
+
86
+ submitted = st.form_submit_button('Predict')
87
+
88
+ data_inf = {
89
+ 'category_id': cat_id,
90
+ 'likes': likes,
91
+ 'dislikes': dislikes,
92
+ 'comment_count': comment_count,
93
+ 'hour' : hour,
94
+ 'comments_disabled': comment_disabled,
95
+ 'ratings_disabled': rating_disabled,
96
+ 'video_error_or_removed': video_error_or_removed
97
+ }
98
+
99
+ data_inf = pd.DataFrame([data_inf])
100
+ st.dataframe(data_inf)
101
+
102
+ if submitted:
103
+ # divide features
104
+ data_inf_1 = data_inf.copy()
105
+
106
+ # convert some column to str so we can put it to pipeline later
107
+ new_inf = convert_to_str(data_inf_1)
108
+
109
+ y_pred_inf = model_random_forest.predict(new_inf)
110
+
111
+ st.write('## Total views predicted around: ' + str(int(y_pred_inf)))
112
+
113
+ if __name__ == '__main__':
114
+ run()
random_forest.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4703aec6fb84d3ec2b24dbb76720eb15f1b02abcbe950ed768a00912b5c9f92
3
+ size 113252720
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ seaborn
2
+ pandas
3
+ numpy
4
+ matplotlib
5
+ joblib
6
+ scikit-learn==1.0.2
7
+ Pillow==9.2.0