Spaces:
Sleeping
Sleeping
Upload 3 files
Browse files- .gitattributes +1 -0
- app.py +96 -0
- requirements.txt +9 -0
- train_users_2.csv +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
train_users_2.csv filter=lfs diff=lfs merge=lfs -text
|
app.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import plotly.express as px
|
4 |
+
import pycountry
|
5 |
+
import plotly.graph_objects as go
|
6 |
+
import plotly.figure_factory as ff
|
7 |
+
import pycountry_convert as pc
|
8 |
+
import pandas as pd
|
9 |
+
import numpy as np
|
10 |
+
|
11 |
+
import pycountry
|
12 |
+
import pycountry_convert as pc
|
13 |
+
user = pd.read_csv('train_users_2.csv')
|
14 |
+
user = user[user['age'] < 100]
|
15 |
+
user["year"] = user["date_account_created"].str[:4].astype(int)
|
16 |
+
user["month"] = user["date_account_created"].str[5:7].astype(int)
|
17 |
+
user['year-month'] = user['date_account_created'].str[:7]
|
18 |
+
user['date_first_booking'] = user['date_first_booking'].replace(np.nan, '2020-13-31')
|
19 |
+
|
20 |
+
user['month_booking'] = user['date_first_booking'].str[5:7].astype(int)
|
21 |
+
user['year_booking'] = user['date_first_booking'].str[:4].astype(int)
|
22 |
+
user['year-month_booking'] = user['date_first_booking'].str[:7]
|
23 |
+
user["language"] = user["language"].str.upper()
|
24 |
+
|
25 |
+
df = user
|
26 |
+
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
|
27 |
+
st.set_page_config(layout="wide")
|
28 |
+
st.title("Airbnb New User Bookings")
|
29 |
+
col0, col1, col2 = st.columns([0.5, 2, 2]) # Adjust the width ratios here
|
30 |
+
with col0:
|
31 |
+
region = st.selectbox("Select Region", df['language'].unique(), index=0)
|
32 |
+
column = st.selectbox("Select Column", ['first_device_type', 'first_browser','affiliate_provider'], index=0)
|
33 |
+
year = st.slider("Select Year", df['year'].min(), df['year'].max(), df['year'].max())
|
34 |
+
with col1:
|
35 |
+
# Update graph based on column and year selection
|
36 |
+
filtered_df = df[(df['year'] == year)]
|
37 |
+
visit = filtered_df[column].value_counts()
|
38 |
+
booking = filtered_df[filtered_df['year_booking']==year][column].value_counts()
|
39 |
+
counts = pd.merge(visit, booking, left_index=True, right_index=True).reset_index()
|
40 |
+
counts.columns = ['first_device_type', 'visit', 'booking']
|
41 |
+
counts['rate'] = counts['booking'] / counts['visit']
|
42 |
+
counts_t = counts.melt(id_vars=['first_device_type'], value_vars=['visit', 'booking'])
|
43 |
+
fig = px.bar(counts_t, x="first_device_type", y="value", color='variable')
|
44 |
+
fig.update_layout(yaxis2=dict(overlaying='y', side='right', range=[0, 1]))
|
45 |
+
fig.add_trace(go.Scatter(x=counts['first_device_type'], y=counts['rate'], mode='lines+markers', name='Conversion Rate', yaxis='y2'))
|
46 |
+
fig.update_layout(height=250, margin={'l': 20, 'b': 50, 't': 10, 'r': 10}, hovermode='closest')
|
47 |
+
st.plotly_chart(fig)
|
48 |
+
|
49 |
+
# Update new-users figure based on region selection
|
50 |
+
dff = df[df['language'] == region]
|
51 |
+
visit = dff.groupby(['year', 'month'])['year-month'].value_counts().reset_index(name='count')
|
52 |
+
visit = visit.iloc[:, 2:]
|
53 |
+
booking = dff.groupby(['year_booking', 'month_booking'])['year-month_booking'].value_counts().reset_index(name='count')
|
54 |
+
booking = booking[booking['year_booking'] != 2020]
|
55 |
+
booking = booking.iloc[:, 2:]
|
56 |
+
counts = pd.merge(visit, booking, left_on=['year-month'], right_on=['year-month_booking'])
|
57 |
+
counts.columns = [ 'year-month', 'visit', 'year-month_booking', 'booking']
|
58 |
+
counts['rate'] = counts['booking'] / counts['visit']
|
59 |
+
fig = go.Figure()
|
60 |
+
fig.add_trace(go.Scatter(x=counts['year-month'], y=counts['visit'], mode='lines', name='visit'))
|
61 |
+
fig.add_trace(go.Scatter(x=counts['year-month'], y=counts['booking'], mode='lines', name='booking'))
|
62 |
+
fig.update_layout(yaxis2=dict(overlaying='y', side='right', range=[0, 1]))
|
63 |
+
fig.add_trace(go.Scatter(x=counts['year-month'], y=counts['rate'], mode='lines+markers', name='Conversion Rate', yaxis='y2'))
|
64 |
+
fig.update_layout(height=250, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})
|
65 |
+
st.plotly_chart(fig)
|
66 |
+
|
67 |
+
with col2:
|
68 |
+
# Update destination-country figure based on year selection
|
69 |
+
dff = df.loc[(df["country_destination"]!="NDF") & (df["country_destination"]!="other")]
|
70 |
+
dff = dff[dff['year'] == year]
|
71 |
+
counts = dff['country_destination'].value_counts()
|
72 |
+
counts = pd.DataFrame(counts)
|
73 |
+
counts.reset_index(inplace=True)
|
74 |
+
counts.columns = ['country_destination', 'count']
|
75 |
+
counts['country'] = counts['country_destination'].apply(lambda x: pycountry.countries.get(alpha_2=x).alpha_3)
|
76 |
+
counts['continent'] = counts['country_destination'].apply(lambda x: pc.convert_continent_code_to_continent_name(pc.country_alpha2_to_continent_code(x)))
|
77 |
+
fig = px.scatter_geo(counts, locations="country", color="continent", size='count', hover_name=counts['country'])
|
78 |
+
fig.update_traces(customdata=counts['country'])
|
79 |
+
title = '<b>{}</b>'.format(year)
|
80 |
+
fig.add_annotation(x=0, y=0.85, xanchor='left', yanchor='bottom', xref='paper', yref='paper', showarrow=False, align='left', text=title)
|
81 |
+
fig.update_layout(height=250, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})
|
82 |
+
fig.update_layout(clickmode='event+select')
|
83 |
+
st.plotly_chart(fig)
|
84 |
+
# Update new-users-age figure based on selected country
|
85 |
+
country_name = st.selectbox("Select Country", df['country_destination'].unique(), index=0)
|
86 |
+
dff = df[df['country_destination'] == country_name]
|
87 |
+
dff = dff[dff['year'] == year] # Modify this line to match your data
|
88 |
+
dff = dff[dff['gender'] != '-unknown-']
|
89 |
+
fig = px.violin(dff, x="gender", y="age", box=True, color="gender", violinmode='overlay')
|
90 |
+
title = '<b>{}</b>'.format(year)
|
91 |
+
fig.add_annotation(x=0, y=0.85, xanchor='left', yanchor='bottom', xref='paper', yref='paper', showarrow=False, align='left',
|
92 |
+
text=title)
|
93 |
+
fig.update_layout(height=225, margin={'l': 20, 'b': 30, 'r': 10, 't': 10})
|
94 |
+
st.plotly_chart(fig)
|
95 |
+
|
96 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
pandas
|
3 |
+
plotly
|
4 |
+
pycountry
|
5 |
+
plotly
|
6 |
+
pandas
|
7 |
+
numpy
|
8 |
+
pycountry
|
9 |
+
pycountry-convert
|
train_users_2.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d57dabe8b06534e51f86520ce6a66b6f67cce1704fb77a3fcfb596124858818
|
3 |
+
size 24853881
|