{ "cells": [ { "cell_type": "markdown", "id": "fa25e9ed-1dd1-4ca1-9898-9606c94d449f", "metadata": {}, "source": [ "# Merging state, county, and city polygons with landvote data\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b313a218-4778-4d5b-9036-f0370d4212a0", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import streamlit as st\n", "import ibis.expr.datatypes as dt \n", "\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])" ] }, { "cell_type": "markdown", "id": "943e4127-4af5-42c9-b1e2-48af2d888c24", "metadata": {}, "source": [ "# Landvote data\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b486df27-e64c-48af-a577-89a74752ed5b", "metadata": {}, "outputs": [], "source": [ "landvote = conn.read_csv(\"landvote.csv\")\n", "\n", "landvote_df = (landvote\n", " .rename(jurisdiction = \"Jurisdiction Type\", state = \"State\", name = \"Jurisdiction Name\")\n", " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n", " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n", " .mutate(log_amount=_.amount_n.log())\n", " .mutate(year=_['Date'].year().cast('int32'))\n", " .mutate(\n", " yes=ibis.case()\n", " .when(_.yes.isin(['Pass', 'None','Fail']), None) # Handle non-numeric cases\n", " .when(_.yes.notnull(), (_.yes.replace('%', '').cast('float').round(2).cast(dt.float64)).cast(dt.string) + '%') # Convert valid percentages and add %\n", " .else_(None) # Default to None for other cases\n", " .end()\n", " )\n", " .mutate(\n", " municipal=ibis.case()\n", " .when(_.jurisdiction.isin(['State','County']), None) \n", " .else_(_.name) \n", " .end()\n", " )\n", " .mutate(\n", " county=ibis.case()\n", " .when(_.jurisdiction.isin(['State','Municipal']), None) \n", " .else_(_.name) \n", " .end()\n", " )\n", " .mutate(log_amount = _.log_amount.round(4))\n", " .select('name','state','county','municipal','jurisdiction','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "b78ad1e0-3b4e-4f56-bd25-b8da186d2857", "metadata": {}, "source": [ "# State Data" ] }, { "cell_type": "code", "execution_count": null, "id": "09ecf89a-a315-45ef-9ece-783963e6e07e", "metadata": {}, "outputs": [], "source": [ "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n", "landvote_states = landvote_df.filter(_.jurisdiction == \"State\")\n", "\n", "state = (conn\n", " .read_parquet(state_boundaries)\n", " .rename(state = \"STUSPS\", state_name = \"NAME\")\n", " .select('state','geometry','state_name')\n", " .cast({\"geometry\": \"geometry\"})\n", " )\n", "\n", "df_state = (state\n", " .join(landvote_states, \"state\",how = \"inner\")\n", " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "010717bb-1d7e-4915-b476-a4038728131c", "metadata": {}, "source": [ "# County Data" ] }, { "cell_type": "code", "execution_count": null, "id": "ba4d8915-cde3-4ef9-ad8c-7759ed2c8a13", "metadata": {}, "outputs": [], "source": [ "landvote_county = landvote_df.filter(_.jurisdiction == \"County\")\n", "\n", "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n", "\n", "df_county = (conn\n", " .read_parquet(county_boundaries)\n", " .rename(county = \"NAMELSAD\", state_name = \"STATE_NAME\")\n", " .join(state, \"state_name\", how = \"inner\")\n", " .select('state','state_name','county','geometry')\n", " .cast({\"geometry\": \"geometry\"})\n", " .join(landvote_county, [\"county\",\"state\"],how = \"inner\")\n", " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "913b4a98-82eb-452f-8d81-8bce2b58a4c3", "metadata": {}, "source": [ "# Municipal Data" ] }, { "cell_type": "code", "execution_count": null, "id": "0cce23c9-245c-4c28-9523-0231eb5acc17", "metadata": {}, "outputs": [], "source": [ "landvote_city = landvote_df.filter(_.jurisdiction == \"Municipal\")\n", "\n", "df_city = (conn\n", " .read_parquet(\"us_localities.parquet\")\n", " .select('state','county','municipal','geometry')\n", " .cast({\"geometry\": \"geometry\"})\n", " .join(landvote_city, [\"municipal\",\"state\"], how = \"inner\")\n", " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )" ] }, { "cell_type": "markdown", "id": "728993c4-7d58-4e89-924d-72dd43333585", "metadata": {}, "source": [ "# Make PMTiles. Each jurisdiction type is its own layer" ] }, { "cell_type": "code", "execution_count": null, "id": "5d3bee26-7ca8-490c-be5b-fc69a6c3db2a", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "\n", "login(st.secrets[\"HF_TOKEN\"])\n", "# api = HfApi(add_to_git_credential=False)\n", "api = HfApi()\n", "\n", "def hf_upload(file, repo_id):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=repo_id,\n", " repo_type=\"dataset\",\n", " )\n", "def generate_pmtiles(input_file, input_file2, input_file3, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-zg\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " \"--projection\", \"EPSG:4326\", \n", " \"-L\",\"state:\"+input_file,\n", " \"-L\",\"county:\"+input_file2,\n", " \"-L\",\"municipal:\"+input_file3\n", " ]\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "26ffccfd-2666-445a-9c03-2089bd589650", "metadata": {}, "outputs": [], "source": [ "gdf_state = df_state.execute().set_crs(\"EPSG:4326\")\n", "gdf_state.to_file(\"landvote_state.geojson\")\n", "\n", "gdf_county = df_county.execute().set_crs(\"EPSG:4326\")\n", "gdf_county.to_file(\"landvote_county.geojson\")\n", "\n", "gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n", "gdf_city.to_file(\"landvote_municipal.geojson\")\n", "\n", "generate_pmtiles(\"landvote_state.geojson\", \"landvote_county.geojson\",\"landvote_municipal.geojson\", \"landvote_polygons.pmtiles\")\n", "hf_upload(\"landvote_polygons.pmtiles\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "markdown", "id": "4af09411-a4fe-475a-8b2b-f2d36388aa19", "metadata": {}, "source": [ "# Combine all 3 jurisdiction types into a parquet." ] }, { "cell_type": "code", "execution_count": null, "id": "fa6db898-a132-4ad5-86e9-de80613275d4", "metadata": {}, "outputs": [], "source": [ "df_temp = df_county.union(df_city)\n", "df = df_temp.union(df_state)\n", "df.execute().set_crs(\"EPSG:4326\").to_parquet(\"landvote_polygons.parquet\")\n", "hf_upload(\"landvote_polygons.parquet\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1f1e18bd-dce5-4cd4-b25a-371674340348", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 5 }