{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "b313a218-4778-4d5b-9036-f0370d4212a0", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import streamlit as st\n", "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n", "\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])" ] }, { "cell_type": "markdown", "id": "943e4127-4af5-42c9-b1e2-48af2d888c24", "metadata": {}, "source": [ "# Landvote data" ] }, { "cell_type": "code", "execution_count": null, "id": "b486df27-e64c-48af-a577-89a74752ed5b", "metadata": {}, "outputs": [], "source": [ "landvote = conn.read_csv(\"landvote.csv\")\n", "\n", "landvote_df = (landvote\n", " .rename(jurisdiction = \"Jurisdiction Type\", state = \"State\", name = \"Jurisdiction Name\")\n", " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n", " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n", " .mutate(log_amount=_.amount_n.log())\n", " .mutate(year=_['Date'].year().cast('int32'))\n", " .mutate(\n", " yes=ibis.case()\n", " .when(_.yes.isin(['Pass', 'None','Fail']), None) # Handle non-numeric cases\n", " .when(_.yes.notnull(), (_.yes.replace('%', '').cast('float').round(2).cast(dt.float64)).cast(dt.string) + '%') # Convert valid percentages and add %\n", " .else_(None) # Default to None for other cases\n", " .end()\n", " )\n", " .mutate(\n", " municipal=ibis.case()\n", " .when(_.jurisdiction.isin(['State','County']), None) \n", " .else_(_.name) \n", " .end()\n", " )\n", " .mutate(\n", " county=ibis.case()\n", " .when(_.jurisdiction.isin(['State','Municipal']), None) \n", " .else_(_.name) \n", " .end()\n", " )\n", " .mutate(log_amount = _.log_amount.round(4))\n", " .select('name','state','county','municipal','jurisdiction','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "b78ad1e0-3b4e-4f56-bd25-b8da186d2857", "metadata": {}, "source": [ "# State Data" ] }, { "cell_type": "code", "execution_count": null, "id": "09ecf89a-a315-45ef-9ece-783963e6e07e", "metadata": {}, "outputs": [], "source": [ "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n", "states = conn.read_parquet(state_boundaries).rename(state_id = \"STUSPS\", state = \"NAME\")\n", "\n", "landvote_states = landvote_df.filter(_.jurisdiction == \"State\")\n", "\n", "df_state = (states\n", " .select('state_id','geometry')\n", " .cast({\"geometry\": \"geometry\"})\n", " .right_join(landvote_states, landvote_states['state'] == states['state_id'] )\n", " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "010717bb-1d7e-4915-b476-a4038728131c", "metadata": {}, "source": [ "# County Data" ] }, { "cell_type": "code", "execution_count": null, "id": "ba4d8915-cde3-4ef9-ad8c-7759ed2c8a13", "metadata": {}, "outputs": [], "source": [ "landvote_county = landvote_df.filter(_.jurisdiction == \"County\")\n", "\n", "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n", "county = conn.read_parquet(county_boundaries).rename(county = \"NAMELSAD\", state = \"STATE_NAME\")\n", "\n", "\n", "df_county = (county\n", " .select('state','county','geometry')\n", " .cast({\"geometry\": \"geometry\"})\n", " .right_join(landvote_county, \"county\" )\n", " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "913b4a98-82eb-452f-8d81-8bce2b58a4c3", "metadata": {}, "source": [ "# Municipal Data" ] }, { "cell_type": "code", "execution_count": null, "id": "0cce23c9-245c-4c28-9523-0231eb5acc17", "metadata": {}, "outputs": [], "source": [ "landvote_local = landvote_df.filter(_.jurisdiction == \"Municipal\")\n", "\n", "local = (conn\n", " .read_parquet(\"us_localities.parquet\")\n", " .rename(municipal = \"name\",state = \"state_id\")\n", " )\n", "\n", "df_local = (local\n", " .select('state','county','municipal','geometry')\n", " .cast({\"geometry\": \"geometry\"})\n", " .mutate(geometry = _.geometry.buffer(.07))\n", " .right_join(landvote_local, [\"municipal\",\"state\"])\n", " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "728993c4-7d58-4e89-924d-72dd43333585", "metadata": {}, "source": [ "# Make PMTiles. Each jurisdiction type is its own layer" ] }, { "cell_type": "code", "execution_count": null, "id": "5d3bee26-7ca8-490c-be5b-fc69a6c3db2a", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "\n", "login(st.secrets[\"HF_TOKEN\"])\n", "# api = HfApi(add_to_git_credential=False)\n", "api = HfApi()\n", "\n", "def hf_upload(file, repo_id):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=repo_id,\n", " repo_type=\"dataset\",\n", " )\n", "def generate_pmtiles(input_file, input_file2, input_file3, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-zg\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " \"--projection\", \"EPSG:4326\", \n", " \"-L\",\"state:\"+input_file,\n", " \"-L\",\"county:\"+input_file2,\n", " \"-L\",\"municipal:\"+input_file3\n", " ]\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "26ffccfd-2666-445a-9c03-2089bd589650", "metadata": {}, "outputs": [], "source": [ "df_state.execute().to_file(\"vote_state.geojson\")\n", "df_county.execute().to_file(\"vote_county.geojson\")\n", "df_local.execute().to_file(\"vote_municipal.geojson\")\n", "\n", "# hf_upload(\"vote.geojson\", \"boettiger-lab/landvote\")\n", "\n", "generate_pmtiles(\"vote_state.geojson\", \"vote_county.geojson\",\"vote_municipal.geojson\", \"landvote.pmtiles\")\n", "hf_upload(\"landvote.pmtiles\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "markdown", "id": "4af09411-a4fe-475a-8b2b-f2d36388aa19", "metadata": {}, "source": [ "# Combine all 3 jurisdiction types into a parquet." ] }, { "cell_type": "code", "execution_count": null, "id": "fa6db898-a132-4ad5-86e9-de80613275d4", "metadata": {}, "outputs": [], "source": [ "df_temp = df_county.union(df_local)\n", "df = df_temp.union(df_state)\n", "df.execute().to_parquet(\"landvote.parquet\")\n", "hf_upload(\"landvote.parquet\", \"boettiger-lab/landvote\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" } }, "nbformat": 4, "nbformat_minor": 5 }