{ "cells": [ { "cell_type": "markdown", "id": "f2d27d42-74aa-44cb-8ab6-5a0f856dcca0", "metadata": {}, "source": [ "# Merging state, county, and city polygons with political parties" ] }, { "cell_type": "code", "execution_count": null, "id": "d179ded1-6235-47ed-bbfb-6d72468188d5", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import streamlit as st\n", "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n", "\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])" ] }, { "cell_type": "markdown", "id": "b9bc2d50-481b-4f62-a74b-4a576ff89ecd", "metadata": {}, "source": [ "# State " ] }, { "cell_type": "code", "execution_count": null, "id": "322b9a85-bdf9-45f9-9b19-695cc1b996e8", "metadata": {}, "outputs": [], "source": [ "#getting party\n", "state = (conn\n", " .read_csv(\"1976-2020-president.csv\")\n", " # .filter(_. year >=2000)\n", " .rename(state=\"state_po\" , party = \"party_simplified\") # rename columns\n", " .group_by([\"year\", \"state\"])\n", " .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n", " .select(\"year\", \"state\", \"party\") # select only relevant columns\n", " )" ] }, { "cell_type": "code", "execution_count": null, "id": "6ab49b51-d6fe-47b3-9bed-b23b5ecf7f0e", "metadata": {}, "outputs": [], "source": [ "# merging with state polygons\n", "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n", "\n", "df_state = (conn\n", " .read_parquet(state_boundaries)\n", " .rename(state = \"STUSPS\", state_ = \"NAME\")\n", " .select(\"state\",\"geometry\")\n", " .join(state,\"state\",how = \"inner\")\n", " .mutate(county = None)\n", " .mutate(municipal = None)\n", " .mutate(jurisdiction = ibis.literal(\"State\"))\n", " .cast({\"geometry\": \"geometry\",\"county\":\"string\",\"municipal\": \"string\"})\n", " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n", " )" ] }, { "cell_type": "markdown", "id": "170ba045-8848-4a99-a4f6-68bde22428af", "metadata": {}, "source": [ "# County" ] }, { "cell_type": "code", "execution_count": null, "id": "0231c801-82e2-45be-9ec5-607d5588a3e5", "metadata": {}, "outputs": [], "source": [ "# getting party\n", "county = (conn\n", " .read_csv(\"countypres_2000-2020.csv\")\n", " .filter((_.totalvotes > 0)) # filter empty votes\n", " .rename(state=\"state_po\", state_name = \"state\") \n", " .mutate(county = _.county_name + ibis.literal(\" COUNTY\"))\n", " .group_by([\"year\", \"state\", \"county\", \"state_name\", \"party\"])\n", " .aggregate(\n", " total_candidate_votes=_.candidatevotes.sum() #getting total votes per candidate \n", " )\n", " .group_by([\"year\", \"state\", \"county\", \"state_name\"])\n", " .aggregate(\n", " party=_.party.argmax(_.total_candidate_votes) # party with the highest total votes\n", " )\n", " .select(\"year\", \"state\", \"county\", \"party\",\"state_name\") \n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "179b5066-030c-4302-a8cf-8216a753080e", "metadata": {}, "outputs": [], "source": [ "# merging with county polygons\n", "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n", "df_county = (conn\n", " .read_parquet(county_boundaries)\n", " .mutate(county = _.NAMELSAD.upper(), state_name = _.STATE_NAME.upper())\n", " .select(\"state_name\",\"county\",\"geometry\")\n", " .join(county,[\"state_name\",\"county\"],how = \"inner\")\n", " .mutate(municipal = None)\n", " .cast({\"geometry\": \"geometry\",\"municipal\": \"string\"})\n", " .mutate(jurisdiction = ibis.literal(\"County\"))\n", " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n", " )\n" ] }, { "cell_type": "markdown", "id": "8c12fd16-a953-4273-9e0f-44b50eacf633", "metadata": {}, "source": [ "# Getting Municipals " ] }, { "cell_type": "code", "execution_count": null, "id": "b3429fea-7c0d-4838-bcbb-6552079dc3b6", "metadata": {}, "outputs": [], "source": [ "localities_boundaries = \"us_localities.parquet\"\n", "locality = (conn\n", " .read_parquet(localities_boundaries)\n", " .mutate(county = _.county.upper())\n", " .mutate(municipal = _.municipal.upper())\n", " )\n", "\n", "df_city = (county\n", " .drop(\"state_name\")\n", " .join(locality, [\"state\",\"county\"], how = \"inner\")\n", " .cast({\"geometry\": \"geometry\"})\n", " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n", " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n", " )\n" ] }, { "cell_type": "markdown", "id": "ae5b417d-4266-456d-952c-ac2696234ea0", "metadata": {}, "source": [ "# Make PMTiles with only state/county. Each jurisdiction type is its own layer. " ] }, { "cell_type": "code", "execution_count": null, "id": "12cdf02e-bc22-4a5f-91b9-00a8eee587bd", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "\n", "login(st.secrets[\"HF_TOKEN\"])\n", "# api = HfApi(add_to_git_credential=False)\n", "api = HfApi()\n", "\n", "def hf_upload(file, repo_id):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=repo_id,\n", " repo_type=\"dataset\",\n", " )\n", "def generate_pmtiles(input_file, input_file2, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-zg\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " \"--projection\", \"EPSG:4326\", \n", " \"-L\",\"state:\"+input_file,\n", " \"-L\",\"county:\"+input_file2\n", " ]\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "24df77fb-c881-4491-b7ca-7f3a3023cee0", "metadata": {}, "outputs": [], "source": [ "gdf_state = df_state.execute().set_crs(\"EPSG:4326\")\n", "gdf_state.to_file(\"party_state.geojson\")\n", "\n", "gdf_county = df_county.execute().set_crs(\"EPSG:4326\")\n", "gdf_county.to_file(\"party_county.geojson\")\n", "\n", "# city data too large to add to pmtiles :( \n", "# gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n", "# gdf_city.to_file(\"party_municipal.geojson\")\n", "\n", "generate_pmtiles(\"party_state.geojson\", \"party_county.geojson\", \"party_polygons.pmtiles\")\n", "hf_upload(\"party_polygons.pmtiles\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "markdown", "id": "190169bb-5bfb-4eb7-a135-c5ce0e316595", "metadata": {}, "source": [ "# Combine all 3 jurisdiction types into a parquet." ] }, { "cell_type": "code", "execution_count": null, "id": "4ad8ad0a-afb3-427f-8b52-ea328e06ce85", "metadata": {}, "outputs": [], "source": [ "df_temp = df_county.union(df_city)\n", "df = df_temp.union(df_state)\n", "df.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons.parquet\")\n", "hf_upload(\"party_polygons.parquet\", \"boettiger-lab/landvote\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }