Spaces:

cboettig
/

hf-streamlit-demo

Sleeping

App Files Files Community

cboettig commited on Mar 9, 2024

Commit

2a55e4a

1 Parent(s): a329c1d

test

Browse files

Files changed (2) hide show

app.py +48 -78
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -17,42 +17,38 @@
 import os
 import altair as alt
 import numpy as np
 import pandas as pd
 import pydeck as pdk
 import streamlit as st
 # SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
-st.set_page_config(layout="wide", page_title="NYC Ridesharing Demo", page_icon=":taxi:")
 # LOAD DATA ONCE
 @st.cache_resource
 def load_data():
-    path = "uber-raw-data-sep14.csv.gz"
-    if not os.path.isfile(path):
-        path = f"https://github.com/streamlit/demo-uber-nyc-pickups/raw/main/{path}"
-    data = pd.read_csv(
-        path,
-        nrows=100000,  # approx. 10% of data
-        names=[
-            "date/time",
-            "lat",
-            "lon",
-        ],  # specify names directly since they don't change
-        skiprows=1,  # don't read header since names specified directly
-        usecols=[0, 1, 2],  # doesn't load last column, constant value "B02512"
-        parse_dates=[
-            "date/time"
-        ],  # set as datetime instead of converting after the fact
-    )
     return data
-# FUNCTION FOR AIRPORT MAPS
 def map(data, lat, lon, zoom):
     st.write(
         pdk.Deck(
@@ -67,10 +63,9 @@ def map(data, lat, lon, zoom):
                 pdk.Layer(
                     "HexagonLayer",
                     data=data,
-                    get_position=["lon", "lat"],
-                    radius=100,
-                    elevation_scale=4,
-                    elevation_range=[0, 1000],
                     pickable=True,
                     extruded=True,
                 ),
@@ -79,10 +74,10 @@ def map(data, lat, lon, zoom):
     )
-# FILTER DATA FOR A SPECIFIC HOUR, CACHE
 @st.cache_data
-def filterdata(df, hour_selected):
-    return df[df["date/time"].dt.hour == hour_selected]
 # CALCULATE MIDPOINT FOR GIVEN SET OF DATA
@@ -91,17 +86,6 @@ def mpoint(lat, lon):
     return (np.average(lat), np.average(lon))
-# FILTER DATA BY HOUR
-@st.cache_data
-def histdata(df, hr):
-    filtered = data[
-        (df["date/time"].dt.hour >= hr) & (df["date/time"].dt.hour < (hr + 1))
-    ]
-    hist = np.histogram(filtered["date/time"].dt.minute, bins=60, range=(0, 60))[0]
-    return pd.DataFrame({"minute": range(60), "pickups": hist})
 # STREAMLIT APP LAYOUT
 data = load_data()
@@ -114,8 +98,8 @@ row1_1, row1_2 = st.columns((2, 3))
 # E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
 if not st.session_state.get("url_synced", False):
     try:
-        pickup_hour = int(st.query_params["pickup_hour"][0])
-        st.session_state["pickup_hour"] = pickup_hour
         st.session_state["url_synced"] = True
     except KeyError:
         pass
@@ -123,14 +107,14 @@ if not st.session_state.get("url_synced", False):
 # IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
 def update_query_params():
-    hour_selected = st.session_state["pickup_hour"]
-    st.query_params["pickup_hour"]=hour_selected
 with row1_1:
     st.title("NYC Uber Ridesharing Data")
     hour_selected = st.slider(
-        "Select hour of pickup", 0, 23, key="pickup_hour", on_change=update_query_params
     )
@@ -138,57 +122,43 @@ with row1_2:
     st.write(
         """
     ##
-    Examining how Uber pickups vary over time in New York City's and at its major regional airports.
-    By sliding the slider on the left you can view different slices of time and explore different transportation trends.
     """
     )
 # LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
 row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
 # SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
-la_guardia = [40.7900, -73.8700]
-jfk = [40.6650, -73.7821]
-newark = [40.7090, -74.1805]
-zoom_level = 12
-midpoint = mpoint(data["lat"], data["lon"])
 with row2_1:
     st.write(
-        f"""**All New York City from {hour_selected}:00 and {(hour_selected + 1) % 24}:00**"""
     )
-    map(filterdata(data, hour_selected), midpoint[0], midpoint[1], 11)
 with row2_2:
-    st.write("**La Guardia Airport**")
-    map(filterdata(data, hour_selected), la_guardia[0], la_guardia[1], zoom_level)
 with row2_3:
-    st.write("**JFK Airport**")
-    map(filterdata(data, hour_selected), jfk[0], jfk[1], zoom_level)
 with row2_4:
-    st.write("**Newark Airport**")
-    map(filterdata(data, hour_selected), newark[0], newark[1], zoom_level)
-# CALCULATING DATA FOR THE HISTOGRAM
-chart_data = histdata(data, hour_selected)
-# LAYING OUT THE HISTOGRAM SECTION
-st.write(
-    f"""**Breakdown of rides per minute between {hour_selected}:00 and {(hour_selected + 1) % 24}:00**"""
-)
-st.altair_chart(
-    alt.Chart(chart_data)
-    .mark_area(
-        interpolate="step-after",
-    )
-    .encode(
-        x=alt.X("minute:Q", scale=alt.Scale(nice=False)),
-        y=alt.Y("pickups:Q"),
-        tooltip=["minute", "pickups"],
-    )
-    .configure_mark(opacity=0.2, color="red"),
-    use_container_width=True,
-)

 import os
+# +
 import altair as alt
 import numpy as np
 import pandas as pd
 import pydeck as pdk
 import streamlit as st
+import ibis
+from ibis import _
+# -
 # SETTING PAGE CONFIG TO WIDE MODE AND ADDING A TITLE AND FAVICON
+st.set_page_config(layout="wide", page_title="GBIF Biodiversity Demo", page_icon=":butterfly:")
 # LOAD DATA ONCE
 @st.cache_resource
 def load_data():
+    con = ibis.duckdb.connect()
+    path = "butterflies_gb.parquet"
+    if os.path.isfile(path):
+        return con.read_parquet(path)
+    df = con.read_parquet("s3://gbif-open-data-us-east-1/occurrence/2023-06-01/occurrence.parquet")
+    data = (df
+            .dropna(["decimallongitude", "decimallatitude", "year"], how="any")
+            .filter([_.order == "Lepidoptera", _.countrycode=="GB"])
+            .select(['year', 'decimallongitude', 'decimallatitude'])
+           )
     return data
 def map(data, lat, lon, zoom):
     st.write(
         pdk.Deck(
                 pdk.Layer(
                     "HexagonLayer",
                     data=data,
+                    get_position=["decimallongitude", "decimallatitude"],
+                    radius=1000,
+                    elevation_scale=100,
                     pickable=True,
                     extruded=True,
                 ),
     )
+# FILTER DATA FOR A SPECIFIC YEAR, CACHE
 @st.cache_data
+def filterdata(df, year):
+    return df.filter(_.year == year).to_pandas()
 # CALCULATE MIDPOINT FOR GIVEN SET OF DATA
     return (np.average(lat), np.average(lon))
 # STREAMLIT APP LAYOUT
 data = load_data()
 # E.G. https://share.streamlit.io/streamlit/demo-uber-nyc-pickups/main?pickup_hour=2
 if not st.session_state.get("url_synced", False):
     try:
+        pickup_hour = int(st.query_params["year"][0])
+        st.session_state["year"] = year
         st.session_state["url_synced"] = True
     except KeyError:
         pass
 # IF THE SLIDER CHANGES, UPDATE THE QUERY PARAM
 def update_query_params():
+    year_selected = st.session_state["year"]
+    st.query_params["year"]=year_selected
 with row1_1:
     st.title("NYC Uber Ridesharing Data")
     hour_selected = st.slider(
+        "Select hour of pickup", 2010, 2023, key="year", on_change=update_query_params
     )
     st.write(
         """
     ##
+    By sliding the slider on the left you can view different slices of time and explore different trends.
     """
     )
 # LAYING OUT THE MIDDLE SECTION OF THE APP WITH THE MAPS
 row2_1, row2_2, row2_3, row2_4 = st.columns((2, 1, 1, 1))
+# +
 # SETTING THE ZOOM LOCATIONS FOR THE AIRPORTS
+midpoint =  (52.0, -1.0)  #mpoint(data["lat"], data["lon"])
+# -
 with row2_1:
     st.write(
+        f"""**All UK {year}**"""
     )
+    map(filterdata(data, year), midpoint[0], midpoint[1], 4)
 with row2_2:
+    st.write("**Panel 1**")
+    map(filterdata(data, year), midpoint[0], midpoint[1], 3)
 with row2_3:
+    st.write("**Panel 2**")
+    map(filterdata(data, year), midpoint[0], midpoint[1], 2)
 with row2_4:
+    st.write("**Panel 3**")
+    map(filterdata(data, year), midpoint[0], midpoint[1], 1)

requirements.txt CHANGED Viewed

@@ -3,3 +3,4 @@ numpy
 pandas
 pydeck
 streamlit

 pandas
 pydeck
 streamlit
+ibis-framework[duckdb]