{ "cells": [ { "cell_type": "markdown", "id": "9fe02137-ba1c-4ced-9909-b5b7c39ed6d4", "metadata": {}, "source": [ "# Merging state/county/city polygons with party affiliation and landvote data" ] }, { "cell_type": "code", "execution_count": null, "id": "6452373c-d10f-472c-9174-cd05a3363587", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "\n", "import streamlit as st\n", "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n", "\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n", "\n", "landvote_url = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/landvote_polygons.parquet\"\n", "# party_url = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/party_polygons.parquet\"\n", "party_url = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/party_polygons_all.parquet\"\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837", "metadata": {}, "outputs": [], "source": [ "landvote = (conn\n", " .read_parquet(landvote_url)\n", " .cast({\"geometry\": \"geometry\"})\n", " .mutate(county = _.county.upper())\n", " .mutate(municipal = _.municipal.upper())\n", " .mutate(elect_year = _.year - _.year % 4) # get most recent election year \n", " .cast({\"municipal\": \"string\",\"county\":\"string\"})\n", " .mutate(municipal=ibis.case()\n", " .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n", " .else_(_.municipal) \n", " .end()\n", " )\n", " .mutate(county=ibis.case()\n", " .when(_.jurisdiction.isin(['State']), ibis.literal(\"-\"))\n", " .else_(ibis.case()\n", " .when(_.county.endswith('COUNTY'), _.county)\n", " .else_(_.county + ' COUNTY')\n", " .end())\n", " .end())\n", " )\n", "\n", "party = (conn\n", " .read_parquet(party_url)\n", " .cast({\"geometry\": \"geometry\",\"municipal\":\"string\"})\n", " .mutate(municipal=ibis.case()\n", " .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n", " .else_(_.municipal) \n", " .end()\n", " )\n", " .mutate(county=ibis.case()\n", " .when(_.jurisdiction.isin(['State']), ibis.literal(\"-\")) \n", " .else_(_.county) \n", " .end()\n", " )\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "8fc40e96-fffd-4b23-9963-c931fdce96f6", "metadata": {}, "outputs": [], "source": [ "votes = (landvote\n", " .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"left\")\n", " .drop('elect_year','state_right','county_right','municipal_right','year_right',\"geometry_right\",\"jurisdiction_right\")\n", " .mutate(municipal=ibis.case()\n", " .when(_.municipal == ibis.literal(\"-\"), None) \n", " .else_(_.municipal) \n", " .end()\n", " )\n", " .mutate(county=ibis.case()\n", " .when(_.county == ibis.literal(\"-\"), None) \n", " .else_(_.county) \n", " .end()\n", " )\n", " )" ] }, { "cell_type": "markdown", "id": "e80cfd2e-40eb-4065-9ae6-dcaf83319d9a", "metadata": {}, "source": [ "# Make PMTiles. Each jurisdiction type is its own layer" ] }, { "cell_type": "code", "execution_count": null, "id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "\n", "login(st.secrets[\"HF_TOKEN\"])\n", "# api = HfApi(add_to_git_credential=False)\n", "api = HfApi()\n", "\n", "def hf_upload(file, repo_id):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=repo_id,\n", " repo_type=\"dataset\",\n", " )\n", "def generate_pmtiles(input_file, input_file2, input_file3, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-zg\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " \"--projection\", \"EPSG:4326\", \n", " \"-L\",\"state:\"+input_file,\n", " \"-L\",\"county:\"+input_file2,\n", " \"-L\",\"municipal:\"+input_file3\n", " ]\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "7061577e-0632-4395-8ac5-241a1fab53b0", "metadata": {}, "outputs": [], "source": [ "gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n", "gdf_state.to_file(\"votes_state.geojson\")\n", "\n", "gdf_county = votes.filter(_.jurisdiction == 'County').execute().set_crs(\"EPSG:4326\")\n", "gdf_county.to_file(\"votes_county.geojson\")\n", "\n", "gdf_city = votes.filter(_.jurisdiction == 'Municipal').execute().set_crs(\"EPSG:4326\")\n", "gdf_city.to_file(\"votes_municipal.geojson\")\n", "\n", "generate_pmtiles(\"votes_state.geojson\", \"votes_county.geojson\",\"votes_municipal.geojson\", \"votes.pmtiles\")\n", "hf_upload(\"votes.pmtiles\", \"boettiger-lab/landvote\")" ] }, { "cell_type": "code", "execution_count": null, "id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e", "metadata": {}, "outputs": [], "source": [ "# save as parquet\n", "votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n", "hf_upload(\"votes.parquet\", \"boettiger-lab/landvote\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }