Spaces:
Runtime error
Runtime error
first upload
Browse files- .gitattributes +1 -0
- EDA.py +49 -0
- USvideos.csv +3 -0
- barchart_hour.jpg +0 -0
- convert_str.pkl +3 -0
- list_cat.txt +1 -0
- list_num.txt +1 -0
- main.py +10 -0
- prediction.py +114 -0
- random_forest.pkl +3 -0
- requirements.txt +7 -0
.gitattributes
CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
32 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*.csv filter=lfs diff=lfs merge=lfs -text
|
EDA.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import seaborn as sns
|
3 |
+
sns.set()
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
from PIL import Image
|
8 |
+
image = Image.open('barchart_hour.jpg')
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
def run():
|
13 |
+
|
14 |
+
# Membuat Title
|
15 |
+
st.title('Exploratory Data Analysis')
|
16 |
+
|
17 |
+
# Membuat Sub Header
|
18 |
+
st.subheader('EDA For American Youtube trending videos')
|
19 |
+
|
20 |
+
# Membuat Deskripsi
|
21 |
+
st.write('This page made by Sam')
|
22 |
+
|
23 |
+
# Membuat Garis lurus
|
24 |
+
st.markdown('---')
|
25 |
+
|
26 |
+
# Magic Syntax
|
27 |
+
'''
|
28 |
+
Ma ma mia ma ma mia
|
29 |
+
'''
|
30 |
+
|
31 |
+
# Show DataFrame
|
32 |
+
raw_data = pd.read_csv('USvideos.csv')
|
33 |
+
st.dataframe(raw_data.sample(100))
|
34 |
+
|
35 |
+
st.write('### scatterplot views(in hundred millions) vs likes')
|
36 |
+
fig = plt.figure(figsize=(15, 10))
|
37 |
+
sns.scatterplot(x=raw_data['views'],y=raw_data['likes'])
|
38 |
+
st.pyplot(fig)
|
39 |
+
|
40 |
+
st.write('### scatterplot views(in hundred millions) vs comment_counts')
|
41 |
+
fig = plt.figure(figsize=(15, 10))
|
42 |
+
sns.scatterplot(x=raw_data['views'],y=raw_data['comment_count'], palette='hls')
|
43 |
+
st.pyplot(fig)
|
44 |
+
|
45 |
+
st.write('### Views(in millions) by video published hour')
|
46 |
+
st.image(image, caption=None, width=None, use_column_width=None, clamp=False, channels="RGB", output_format="auto")
|
47 |
+
|
48 |
+
if __name__ == '__main__':
|
49 |
+
run()
|
USvideos.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09b4eb71295752705e472ebefeac9d2afab4177b7a818af795dea62744a48eb2
|
3 |
+
size 62756152
|
barchart_hour.jpg
ADDED
convert_str.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ce72b3cc0b5e955db3448b168c04caa4452c6b929fb16e1ac5d11d5076da6b3
|
3 |
+
size 42
|
list_cat.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["comments_disabled", "ratings_disabled", "video_error_or_removed"]
|
list_num.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["category_id", "likes", "dislikes", "comment_count", "hour"]
|
main.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import EDA
|
3 |
+
import prediction
|
4 |
+
|
5 |
+
navigation = st.sidebar.selectbox('Choose page: ', ('Exploratory Data Analysis', 'Predict a Views'))
|
6 |
+
|
7 |
+
if navigation == 'Predict a Views':
|
8 |
+
prediction.run()
|
9 |
+
else:
|
10 |
+
EDA.run()
|
prediction.py
ADDED
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import seaborn as sns
|
3 |
+
sns.set()
|
4 |
+
import pandas as pd
|
5 |
+
import numpy as np
|
6 |
+
import matplotlib.pyplot as plt
|
7 |
+
import joblib
|
8 |
+
import json
|
9 |
+
|
10 |
+
# Load model
|
11 |
+
|
12 |
+
with open('random_forest.pkl', 'rb') as file_1:
|
13 |
+
model_random_forest = joblib.load(file_1)
|
14 |
+
|
15 |
+
with open('list_cat.txt', 'r') as file_3:
|
16 |
+
model_cat_list = json.load(file_3)
|
17 |
+
|
18 |
+
with open('list_num.txt', 'r') as file_4:
|
19 |
+
model_num_list = json.load(file_4)
|
20 |
+
|
21 |
+
def categorica_yt(x):
|
22 |
+
if x == 'Entertainment':
|
23 |
+
category_id = 24
|
24 |
+
elif x == 'Music':
|
25 |
+
category_id = 10
|
26 |
+
elif x == 'Howto & Style':
|
27 |
+
category_id = 26
|
28 |
+
elif x == 'Comedy':
|
29 |
+
category_id = 23
|
30 |
+
elif x == 'People & Blogs':
|
31 |
+
category_id = 22
|
32 |
+
elif x == 'News & Politics':
|
33 |
+
category_id = 25
|
34 |
+
elif x == 'Science & Technology':
|
35 |
+
category_id = 28
|
36 |
+
elif x == 'Film & Animation':
|
37 |
+
category_id = 1
|
38 |
+
elif x == 'Sports':
|
39 |
+
category_id = 17
|
40 |
+
elif x == 'Education':
|
41 |
+
category_id = 27
|
42 |
+
elif x == 'Pets & Animals':
|
43 |
+
category_id = 15
|
44 |
+
elif x == 'Gaming':
|
45 |
+
category_id = 20
|
46 |
+
elif x == 'Travel & Events':
|
47 |
+
category_id = 19
|
48 |
+
elif x == 'Autos & Vehicles':
|
49 |
+
category_id = 2
|
50 |
+
elif x == 'Nonprofits & Activism':
|
51 |
+
category_id = 29
|
52 |
+
elif x == 'Shows':
|
53 |
+
category_id = 43
|
54 |
+
return category_id
|
55 |
+
|
56 |
+
def convert_to_str(x):
|
57 |
+
for i in model_cat_list :
|
58 |
+
x[i] = x[i].map({True: 'yes', False: 'no'})
|
59 |
+
# print(i)
|
60 |
+
return x
|
61 |
+
|
62 |
+
|
63 |
+
def run():
|
64 |
+
# Membuat Title
|
65 |
+
st.title('Predicting your views')
|
66 |
+
|
67 |
+
# membuat form
|
68 |
+
with st.form(key='form_parameters'):
|
69 |
+
likes = st.number_input('Likes', min_value=0, value=25, step=1, help='Likes count')
|
70 |
+
dislikes = st.number_input('Dislikes', min_value=0, value=25, step=1, help='Dislikes count')
|
71 |
+
|
72 |
+
|
73 |
+
comment_count = st.number_input('Comment counts', min_value=0, value=25, step=1, help='Comment counts')
|
74 |
+
hour = st.number_input('Hour published', min_value=0, max_value=23, value=8, step=1, help='Video Hour published')
|
75 |
+
|
76 |
+
category = st.selectbox('Category', ('Entertainment', 'Music', 'Howto & Style', 'Comedy', 'People & Blogs', 'News & Politics', 'Science & Technology', 'Film & Animation', 'Sports', 'Education', 'Pets & Animals', 'Gaming', 'Travel & Events', 'Autos & Vehicles', 'Nonprofits & Activism', 'Shows'))
|
77 |
+
cat_id = categorica_yt(category)
|
78 |
+
|
79 |
+
comment_disabled = st.selectbox('Comment disabled', ('no','yes'))
|
80 |
+
rating_disabled = st.selectbox('Rating disabled', ('no','yes'))
|
81 |
+
video_error_or_removed = st.selectbox('video error or removed disabled', ('no','yes'))
|
82 |
+
|
83 |
+
|
84 |
+
st.markdown('---')
|
85 |
+
|
86 |
+
submitted = st.form_submit_button('Predict')
|
87 |
+
|
88 |
+
data_inf = {
|
89 |
+
'category_id': cat_id,
|
90 |
+
'likes': likes,
|
91 |
+
'dislikes': dislikes,
|
92 |
+
'comment_count': comment_count,
|
93 |
+
'hour' : hour,
|
94 |
+
'comments_disabled': comment_disabled,
|
95 |
+
'ratings_disabled': rating_disabled,
|
96 |
+
'video_error_or_removed': video_error_or_removed
|
97 |
+
}
|
98 |
+
|
99 |
+
data_inf = pd.DataFrame([data_inf])
|
100 |
+
st.dataframe(data_inf)
|
101 |
+
|
102 |
+
if submitted:
|
103 |
+
# divide features
|
104 |
+
data_inf_1 = data_inf.copy()
|
105 |
+
|
106 |
+
# convert some column to str so we can put it to pipeline later
|
107 |
+
new_inf = convert_to_str(data_inf_1)
|
108 |
+
|
109 |
+
y_pred_inf = model_random_forest.predict(new_inf)
|
110 |
+
|
111 |
+
st.write('## Total views predicted around: ' + str(int(y_pred_inf)))
|
112 |
+
|
113 |
+
if __name__ == '__main__':
|
114 |
+
run()
|
random_forest.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c4703aec6fb84d3ec2b24dbb76720eb15f1b02abcbe950ed768a00912b5c9f92
|
3 |
+
size 113252720
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
seaborn
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
matplotlib
|
5 |
+
joblib
|
6 |
+
scikit-learn==1.0.2
|
7 |
+
Pillow==9.2.0
|