{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "b8e12b31-956b-46a9-8263-2b0c27bad60a", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import streamlit as st\n", "import ibis.expr.datatypes as dt \n", "\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])" ] }, { "cell_type": "markdown", "id": "18dc8e53-b775-4931-b86e-18f3b3213dbb", "metadata": {}, "source": [ "# Adding county/city parties for 1988 - 1996" ] }, { "cell_type": "code", "execution_count": null, "id": "27e27c6d-6ad5-4e2c-a780-2bac6ecd7836", "metadata": {}, "outputs": [], "source": [ "# convert wide format to long format\n", "def convert_data(candidates,year,county):\n", " county= (county\n", " .rename(county=\"Geographic Name\", GEOID = \"FIPS\")\n", " .filter(_.GEOID != \"fips\")\n", " .cast({\"GEOID\":\"string\"})\n", " .mutate(\n", " GEOID=ibis.case()\n", " .when(_.GEOID.length() == 4, \"0\"+_.GEOID)\n", " .else_(_.GEOID)\n", " .end()\n", " )\n", " .drop(\"Geographic Subtype\")\n", " .mutate(county= \n", " ibis.case()\n", " .when(_.county.endswith('County'), _.county)\n", " .else_(_.county + ' County') .end())\n", " )\n", " \n", " candidate_labels = ibis.memtable({'candidate': candidates}).cross_join(county)\n", " \n", " case_builder = (\n", " ibis.case()\n", " .when(candidate_labels.candidate == candidates[0], county[candidates[0]])\n", " )\n", " \n", " for candidate in candidates[1:]:\n", " case_builder = case_builder.when(candidate_labels.candidate == candidate, county[candidate])\n", " \n", " candidate_votes = case_builder.end()\n", " \n", " long_format = candidate_labels.mutate(\n", " GEOID = _.GEOID,\n", " year=year,\n", " candidate_votes=candidate_votes.cast(\"int64\")\n", " ).filter(candidate_votes.notnull() & (candidate_votes.cast(\"int64\") > 0))\n", " \n", " result = long_format.select(\n", " \"year\", \n", " \"GEOID\",\n", " \"county\", \n", " \"candidate\", \n", " \"candidate_votes\"\n", " )\n", " return result" ] }, { "cell_type": "code", "execution_count": null, "id": "6f59d864-1ff2-49ab-a4c3-f6b268989059", "metadata": {}, "outputs": [], "source": [ "#get winner of each county\n", "def get_winner(df):\n", " win = (df\n", " .group_by([\"year\", \"GEOID\"])\n", " .aggregate(candidate=_.candidate.argmax(_.candidate_votes)\n", " , county = _.county.max()) # winning party \n", " ) \n", " return win" ] }, { "cell_type": "markdown", "id": "abc6b2d8-4ea9-4542-8e4b-612ee10a444e", "metadata": {}, "source": [ "# 1988 Election Results" ] }, { "cell_type": "code", "execution_count": null, "id": "7cc5793d-fba9-4986-864e-3fdd6d5bddd3", "metadata": {}, "outputs": [], "source": [ "county = conn.read_csv(\"1988_0_0_2.csv\")\n", " \n", "candidates = ['Michael Dukakis', 'George Bush', 'Ron Paul', 'Lenora Fulani', 'David Duke', 'Eugene McCarthy', \n", " 'James Griffen', 'Lyndon LaRouche', 'William Marra', 'Write-In', 'Edward Winn', 'James Warren', \n", " 'Herbert Lewin', 'Earl Dodge', 'Larry Holmes', 'None o.t. Candidates', 'Willa Kenoyer', \n", " 'Delmar Dennis', 'Jack Herer', 'Louis Youngkeit', 'John Martin']\n", "\n", "year = ibis.literal(\"1988\")\n", "\n", "year1 = convert_data(candidates,year,county)\n", "\n", "winner = get_winner(year1)\n", "\n", "df_1988 = (winner\n", " .mutate(\n", " party=ibis.case()\n", " .when(_.candidate == \"George Bush\", \"REPUBLICAN\") \n", " .when(_.candidate == \"Michael Dukakis\", \"DEMOCRAT\") \n", " .else_(None) \n", " .end()\n", " )\n", " )\n", "\n" ] }, { "cell_type": "markdown", "id": "e51abd1c-3cba-4beb-b092-82405f8a253a", "metadata": {}, "source": [ "# 1992 Election Results" ] }, { "cell_type": "code", "execution_count": null, "id": "9128ee1b-fcef-4cb3-b03b-14620918c4c0", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import ibis.expr.datatypes as dt \n", "\n", "county = (conn\n", " .read_csv(\"1992_0_0_2.csv\")\n", " )\n", "\n", "candidates = ['William Clinton', 'George Bush', 'H. Ross Perot', 'Andre Marrou', 'James Bo Gritz', 'Lenore Fulani', \n", " 'Howard Phillips', 'Dr. John Hagelin', 'Ron Daniels', 'Lyndon LaRouche Jr.', 'James Warren', \n", " 'Write-ins', 'Drew Bradford', 'Jack Herer', 'J. Quinn Brisben', 'Helen Halyard', \n", " 'None o.t. Candidates', 'John Yiamouyiannis', 'Ehlers', 'Earl Dodge', 'Jim Boren', \n", " 'Hem', 'Isabell Masters', 'Smith', 'Gloria LaRiva']\n", "\n", "year = ibis.literal(\"1992\")\n", "\n", "year2 = convert_data(candidates,year,county)\n", "\n", "winner = get_winner(year2)\n", "\n", "df_1992 = (winner\n", " .mutate(\n", " party=ibis.case()\n", " .when(_.candidate == \"George Bush\", \"REPUBLICAN\") \n", " .when(_.candidate == \"William Clinton\", \"DEMOCRAT\") \n", " .when(_.candidate == \"H. Ross Perot\", \"REFORM\") \n", " .else_(None) \n", " .end()\n", " )\n", " )\n", "\n" ] }, { "cell_type": "markdown", "id": "652dd5e5-0347-4adf-99ba-65cb61fabb8d", "metadata": {}, "source": [ "# 1996 Election Results" ] }, { "cell_type": "code", "execution_count": null, "id": "b23a92d7-9916-4412-be4f-e4d249ccabca", "metadata": {}, "outputs": [], "source": [ "\n", "county = (conn\n", " .read_csv(\"1996_0_0_2.csv\")\n", " )\n", "\n", "candidates = [\n", " 'William Clinton', 'Robert Dole', 'H. Ross Perot', 'Ralph Nader', 'Harry Browne', \n", " 'Howard Phillips', 'Dr. John Hagelin', 'Monica Moorehead', 'Marsha Feinland', \n", " 'Write-ins', 'Charles Collins', 'James Harris', 'None o.t. Candidates', \n", " 'Dennis Peron', 'Mary Cal Hollis', 'Jerome White', 'Diane Templin', 'Earl Dodge', \n", " 'A. Peter Crane', 'Just. Ralph Forbes', 'John Birrenback', 'Isabell Masters pHD', \n", " 'Steve Michael'\n", "]\n", "year = ibis.literal(\"1996\")\n", "\n", "year3 = convert_data(candidates,year,county)\n", "\n", "winner = get_winner(year3)\n", "\n", "df_1996 = (winner\n", " .mutate(\n", " party=ibis.case()\n", " .when(_.candidate == \"Robert Dole\", \"REPUBLICAN\") \n", " .when(_.candidate == \"William Clinton\", \"DEMOCRAT\") \n", " .else_(None) \n", " .end()\n", " )\n", " )" ] }, { "cell_type": "markdown", "id": "761bdd22-c2b9-431b-a59f-84920a85b841", "metadata": {}, "source": [ "# Merging with county polygons " ] }, { "cell_type": "code", "execution_count": null, "id": "6b084fcb-a502-4058-b141-98d04af17048", "metadata": {}, "outputs": [], "source": [ "df_temp = df_1988.union(df_1992)\n", "df_county = df_temp.union(df_1996)" ] }, { "cell_type": "code", "execution_count": null, "id": "a900e1ff-86ec-480a-8305-54da9fc744bc", "metadata": {}, "outputs": [], "source": [ "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n", "state = (conn\n", " .read_parquet(state_boundaries)\n", " .rename(state = \"STUSPS\", state_name = \"NAME\")\n", " .select('state','geometry','state_name',\"GEOID\")\n", " .cast({\"geometry\": \"geometry\",\"GEOID\":\"string\"})\n", " )\n", "\n", "\n", "# merging with county polygons\n", "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n", "df_polygons = (conn\n", " .read_parquet(county_boundaries)\n", " .rename(county = \"NAMELSAD\", state_name = \"STATE_NAME\")\n", " .join(state, \"state_name\", how = \"inner\")\n", " .select('state','state_name','county','geometry','GEOID')\n", " )\n", "\n", "\n", "\n", "\n", "df_county_polygons = (df_polygons\n", " .join(df_county, [\"GEOID\"],how = \"inner\")\n", " .mutate(municipal = None)\n", " .mutate(jurisdiction = ibis.literal(\"County\"))\n", " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n", " .cast({\"geometry\": \"geometry\",\"municipal\":\"string\",\"year\":\"int64\"})\n", " .mutate(county = _.county.upper())\n", " )" ] }, { "cell_type": "markdown", "id": "fbeb95fd-eb09-48d3-8883-9966669f8079", "metadata": {}, "source": [ "# Combine with Elections 2000-2020 (Only Counties)" ] }, { "cell_type": "code", "execution_count": null, "id": "a26e28c6-4229-42ad-8548-6617623de30a", "metadata": {}, "outputs": [], "source": [ "df_2000 = (conn\n", " .read_parquet(\"party_polygons.parquet\")\n", " .cast({\"geometry\": \"geometry\"})\n", " .filter(_.jurisdiction == \"County\")\n", " .union(df_county_polygons)\n", " )\n" ] }, { "cell_type": "markdown", "id": "5741c5d8-9f7a-45b3-9563-07315d23e77b", "metadata": {}, "source": [ "# Make PMTiles - only with county level parties " ] }, { "cell_type": "code", "execution_count": null, "id": "c7f7689a-e0e5-40b9-9fda-3e39f5c7b08f", "metadata": {}, "outputs": [], "source": [ "import subprocess\n", "import os\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "\n", "login(st.secrets[\"HF_TOKEN\"])\n", "# api = HfApi(add_to_git_credential=False)\n", "api = HfApi()\n", "\n", "def hf_upload(file, repo_id):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=repo_id,\n", " repo_type=\"dataset\",\n", " )\n", "def generate_pmtiles(input_file, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-zg\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " \"--projection\", \"EPSG:4326\", \n", " \"-L\",\"county:\"+input_file\n", " ]\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "6d54069c-49c5-48dc-9cca-c59ded295f16", "metadata": {}, "outputs": [], "source": [ "gdf = df_2000.execute().set_crs(\"EPSG:4326\")\n", "gdf.to_file(\"party_polygons_all.geojson\")\n", "\n", "generate_pmtiles(\"party_polygons_all.geojson\", \"party_polygons_all.pmtiles\")\n", "hf_upload(\"party_polygons_all.pmtiles\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "code", "execution_count": null, "id": "1706ec30-9c56-488d-b739-5babed79d8e8", "metadata": {}, "outputs": [], "source": [ "# save parquet \n", "df_2000.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons_all.parquet\")\n", "hf_upload(\"party_polygons_all.parquet\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "markdown", "id": "51efd1ff-945b-47ec-8ca0-97a8ce43fd2d", "metadata": {}, "source": [ "# Getting party affiliation for cities 1988-1996" ] }, { "cell_type": "code", "execution_count": null, "id": "86b63b03-1f33-4945-85c5-1ecdc2b91907", "metadata": {}, "outputs": [], "source": [ "\n", " \n", "df_state_city = (df_polygons\n", " .join(df_county, [\"GEOID\"],how = \"inner\")\n", " .mutate(county = _.county.upper())\n", " .select('state','county','year','party')\n", ")\n", "\n", "\n", "localities_boundaries = \"us_localities.parquet\"\n", "locality = (conn\n", " .read_parquet(localities_boundaries)\n", " .mutate(county = _.county.upper())\n", " .mutate(municipal = _.municipal.upper())\n", " .mutate(county=ibis.case()\n", " .when(_.county.endswith('COUNTY'), _.county)\n", " .else_(_.county + ' COUNTY')\n", " .end())\n", " )\n", "\n", "df_city = (df_state_city\n", " # .drop(\"state_name\")\n", " .join(locality, [\"state\",\"county\"], how = \"inner\")\n", " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n", " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n", " .cast({\"geometry\": \"geometry\",\"municipal\":\"string\",\"year\":\"int64\"})\n", " )\n", "\n", "\n", "df_before2000 = df_city.union(df_county_polygons) # adding all the data before 2000\n" ] }, { "cell_type": "markdown", "id": "a1db2c44-7b84-42ff-85b9-46dee348650f", "metadata": {}, "source": [ "# Combine with Elections 2000-2020 (Cities + Counties)" ] }, { "cell_type": "code", "execution_count": null, "id": "d478037c-083f-488d-950f-692ff5fc1b83", "metadata": {}, "outputs": [], "source": [ "\n", "df_2000_all = (conn\n", " .read_parquet(\"party_polygons.parquet\")\n", " .cast({\"geometry\": \"geometry\"})\n", " .union(df_before2000)\n", " )\n" ] }, { "cell_type": "code", "execution_count": null, "id": "c471d9cf-8775-4d88-bc73-d963b8ecfb37", "metadata": {}, "outputs": [], "source": [ "# save parquet \n", "df_2000_all.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons_all.parquet\")\n", "hf_upload(\"party_polygons_all.parquet\", \"boettiger-lab/landvote\")\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.7" } }, "nbformat": 4, "nbformat_minor": 5 }