{ "cells": [ { "cell_type": "code", "execution_count": 16, "id": "d179ded1-6235-47ed-bbfb-6d72468188d5", "metadata": {}, "outputs": [], "source": [ "import ibis\n", "from ibis import _\n", "import streamlit as st\n", "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n", "\n", "\n", "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n", "\n", "# pres = conn.read_csv(\"sources-president.csv\")\n", "county = conn.read_csv(\"countypres_2000-2020.csv\")\n", "votes = conn.read_parquet(\"vote.parquet\")" ] }, { "cell_type": "markdown", "id": "170ba045-8848-4a99-a4f6-68bde22428af", "metadata": {}, "source": [ "# Getting party affiliations for counties" ] }, { "cell_type": "code", "execution_count": null, "id": "ab644102-c725-4cf4-915c-8550a0a74c32", "metadata": {}, "outputs": [], "source": [ "filtered = county.filter((_.mode == \"TOTAL\") & (_.totalvotes > 0))\n", "\n", "# Find the winning party for each year, state, and county\n", "most_votes = (\n", " filtered\n", " .group_by(['year', 'state_po', 'county_name', 'party'])\n", " .aggregate(winning_votes=_.candidatevotes.sum())\n", ")\n", "\n", "# For each year, state, and county, select the party with the highest total votes\n", "winning_party = (\n", " most_votes\n", " .group_by('year', 'state_po', 'county_name')\n", " .aggregate(\n", " max_votes=_.winning_votes.max(), # Max votes in this group\n", " )\n", " .join(\n", " most_votes,\n", " [\"year\",\"state_po\",\"county_name\",most_votes['winning_votes'] == _.max_votes],\n", " how='inner'\n", " )\n", " .select(\"year\",\"state_po\",\"county_name\",most_votes['party'].name('current_party')\n", " )\n", ")\n", "\n", "# Self-join to get the previous year's winning party\n", "previous_year = winning_party.view()\n", "\n", "joined = (\n", " winning_party\n", " .join(\n", " previous_year, [\"county_name\",\"state_po\",winning_party['year'] == previous_year['year'] + 4],\n", " how='left'\n", " )\n", " .rename(state_id = \"state_po\")\n", " .mutate(key = _.county_name + ibis.literal(\" COUNTY-\") + _.state_id)\n", " .select(\"year\",\"key\",\"current_party\",previous_year['current_party'].name('previous_party'))\n", ")\n", "\n", "county_parties = joined.filter(_.year >2000).order_by(\"year\")\n", "\n", "print(county_parties.execute())" ] }, { "cell_type": "code", "execution_count": null, "id": "ce0d80bf-3b78-4aa9-8048-5cc0dbf970d9", "metadata": {}, "outputs": [], "source": [ "df = (votes\n", " .mutate(key = _.key.upper())\n", " .filter(_.jurisdiction == \"Municipal\")\n", " .join(county_parties, [\"key\",\"year\"],how='inner'\n", " )\n", " .cast({\"geometry\": \"geometry\"})\n", ")" ] }, { "cell_type": "code", "execution_count": 30, "id": "87bef5e2-a40a-4aff-aa27-e7d49ec68aac", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n", "Token is valid (permission: write).\n", "Your token has been saved to /home/rstudio/.cache/huggingface/token\n", "Login successful\n" ] } ], "source": [ "import subprocess\n", "import os\n", "from huggingface_hub import HfApi, login\n", "import streamlit as st\n", "\n", "login(st.secrets[\"HF_TOKEN\"])\n", "# api = HfApi(add_to_git_credential=False)\n", "api = HfApi()\n", "\n", "def hf_upload(file, repo_id):\n", " info = api.upload_file(\n", " path_or_fileobj=file,\n", " path_in_repo=file,\n", " repo_id=repo_id,\n", " repo_type=\"dataset\",\n", " )\n", "def generate_pmtiles(input_file, output_file, max_zoom=12):\n", " # Ensure Tippecanoe is installed\n", " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n", " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n", "\n", " # Construct the Tippecanoe command\n", " command = [\n", " \"tippecanoe\",\n", " \"-o\", output_file,\n", " \"-zg\",\n", " \"--extend-zooms-if-still-dropping\",\n", " \"--force\",\n", " \"--projection\", \"EPSG:4326\", \n", " input_file\n", " ]\n", "\n", " # Run Tippecanoe\n", " try:\n", " subprocess.run(command, check=True)\n", " print(f\"Successfully generated PMTiles file: {output_file}\")\n", " except subprocess.CalledProcessError as e:\n", " print(f\"Error running Tippecanoe: {e}\")\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "b086e76c-4285-4036-8033-e4e45cb6966b", "metadata": {}, "outputs": [], "source": [ "gdf= df.execute()\n", "gdf = gdf.set_crs(\"EPSG:4326\")\n", "\n", "# gdf.to_parquet(\"county_parties.parquet\")\n", "# hf_upload(\"county_parties.parquet\", \"boettiger-lab/landvote\")\n", "\n", "# gdf.to_file(\"county_parties.geojson\")\n", "# hf_upload(\"county_parties.geojson\", \"boettiger-lab/landvote\")\n", "\n", "# generate_pmtiles(\"county_parties.geojson\", \"county_parties.pmtiles\")\n", "# hf_upload(\"county_parties.pmtiles\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "markdown", "id": "6193c4b9-0183-4aae-9a25-899a748fd65e", "metadata": {}, "source": [ "# Checking map" ] }, { "cell_type": "code", "execution_count": null, "id": "c2ae8ada-c73e-4b2e-938e-70a29584f199", "metadata": {}, "outputs": [], "source": [ "import leafmap.maplibregl as leafmap\n", "m = leafmap.Map(style=\"positron\")\n", "\n", "\n", "url_states = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/county_parties.pmtiles\"\n", "\n", "outcome = [\n", " 'match',\n", " ['get', 'Status'], \n", " \"Pass\", '#2E865F',\n", " \"Fail\", '#FF3300', \n", " '#ccc'\n", " ]\n", "paint_states = {\"fill-color\": outcome, \n", " # \"fill-opacity\": 0.2,\n", " }\n", "style_states = {\n", " \"layers\": [\n", " {\n", " \"id\": \"county_parties\",\n", " \"source\": \"county_parties\",\n", " \"source-layer\": \"county_parties\",\n", " \"type\": \"fill\",\n", " \"filter\": [\n", " \"==\",\n", " [\"get\", \"year\"],\n", " 2008,\n", " ], # only show buildings with height info\n", " \"paint\": paint_states\n", " },\n", " ],\n", "}\n", "\n", "m.add_pmtiles(\n", " url_states,\n", " style=style_states,\n", " visible=True,\n", " opacity=0.4,\n", " tooltip=True,\n", " fit_bounds=False,\n", ")\n", "\n", "m\n" ] }, { "cell_type": "markdown", "id": "8c12fd16-a953-4273-9e0f-44b50eacf633", "metadata": {}, "source": [ "# Getting Municipals " ] }, { "cell_type": "code", "execution_count": null, "id": "b3429fea-7c0d-4838-bcbb-6552079dc3b6", "metadata": {}, "outputs": [], "source": [ "\n", "localities_boundaries = \"us_localities.parquet\"\n", "locality = conn.read_parquet(localities_boundaries)\n", "landvote = conn.read_csv(\"landvote.csv\")\n", "\n", "#needed to redo this, since I didn't save county in \"votes.parquet\". \n", "vote_local = (landvote\n", " .filter(_[\"Jurisdiction Type\"] == \"Municipal\")\n", " .rename(city = \"Jurisdiction Name\", state_id = \"State\")\n", " .mutate(key = _.city + ibis.literal('-') + _.state_id)\n", " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n", " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n", " .mutate(log_amount=_.amount_n.log())\n", " .mutate(year=_['Date'].year().cast('int32'))\n", " .mutate(\n", " yes=ibis.case()\n", " .when(_.yes.isin(['Pass', 'None','Fail']), None) # Handle non-numeric cases\n", " .when(_.yes.notnull(), (_.yes.replace('%', '').cast('float').round(2).cast(dt.float64)).cast(dt.string) + '%') # Convert valid percentages and add %\n", " .else_(None) # Default to None for other cases\n", " .end()\n", " )\n", " .mutate(log_amount = _.log_amount.round(4))\n", " .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n", " )\n", "\n", "# getting the county parties for each municipal \n", "df_municipals = (locality \n", " .mutate(key_municipal = _.name + ibis.literal('-') + _.state_id) \n", " .mutate(key = (_.county + ibis.literal('-') + _.state_id).upper()) \n", " .select('key', 'geometry','key_municipal','name')\n", " .right_join(vote_local, [_.key_municipal == vote_local[\"key\"]])\n", " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n", " .cast({\"geometry\": \"geometry\"})\n", " .mutate(geometry = _.geometry.buffer(.07))\n", " .join(county_parties, [\"key\",\"year\"],how='inner')\n", " .rename(county = \"key\")\n", " .rename(key = \"key_municipal\")\n", " .select('key','geometry','Status','yes','year','amount','log_amount','jurisdiction','current_party','previous_party')\n", " )\n", "\n", "\n", "gdf_municipals = df_municipals.execute()\n", "gdf_municipals = gdf_municipals.set_crs(\"EPSG:4326\")\n", "gdf_municipals\n", "\n", "\n", "# gdf_municipals.to_parquet(\"municipal_parties.parquet\")\n", "# hf_upload(\"municipal_parties.parquet\", \"boettiger-lab/landvote\")\n", "\n", "# gdf_municipals.to_file(\"municipal_parties.geojson\")\n", "# hf_upload(\"municipal_parties.geojson\", \"boettiger-lab/landvote\")\n", "\n", "# generate_pmtiles(\"municipal_parties.geojson\", \"municipal_parties.pmtiles\")\n", "# hf_upload(\"municipal_parties.pmtiles\", \"boettiger-lab/landvote\")\n" ] }, { "cell_type": "markdown", "id": "06e24a7e-5f7f-42bc-b515-43082016d496", "metadata": {}, "source": [ "# Get States" ] }, { "cell_type": "code", "execution_count": 28, "id": "217170ca-b732-4875-b4cb-f8a8cd2fc405", "metadata": {}, "outputs": [], "source": [ "states = (conn\n", " .read_csv(\"1976-2020-president.csv\")\n", " .filter(_. year >=2000)\n", " )\n", "# states.execute()" ] }, { "cell_type": "code", "execution_count": 18, "id": "322b9a85-bdf9-45f9-9b19-695cc1b996e8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " year key current_party previous_party\n", "0 2004 HI DEMOCRAT DEMOCRAT\n", "1 2004 ME DEMOCRAT DEMOCRAT\n", "2 2004 NJ DEMOCRAT DEMOCRAT\n", "3 2004 NM REPUBLICAN DEMOCRAT\n", "4 2004 ND REPUBLICAN REPUBLICAN\n", ".. ... .. ... ...\n", "250 2020 VT DEMOCRAT DEMOCRAT\n", "251 2020 AL REPUBLICAN REPUBLICAN\n", "252 2020 IA REPUBLICAN REPUBLICAN\n", "253 2020 SD REPUBLICAN REPUBLICAN\n", "254 2020 GA DEMOCRAT REPUBLICAN\n", "\n", "[255 rows x 4 columns]\n" ] } ], "source": [ "# filtered = county.filter((_.mode == \"TOTAL\") & (_.totalvotes > 0))\n", "\n", "# Find the winning party for each year, state, and county\n", "most_votes= (\n", " states\n", " .group_by(['year', 'state_po', 'party_simplified'])\n", " .aggregate(winning_votes=_.candidatevotes.sum())\n", ")\n", "\n", "# For each year, state, and county, select the party with the highest total votes\n", "winning_party = (\n", " most_votes\n", " .group_by('year', 'state_po')\n", " .aggregate(\n", " max_votes=_.winning_votes.max(), # Max votes in this group\n", " )\n", " .join(\n", " most_votes,\n", " [\"year\",\"state_po\",most_votes['winning_votes'] == _.max_votes],\n", " how='inner'\n", " )\n", " .select(\"year\",\"state_po\",most_votes['party_simplified'].name('current_party')\n", " )\n", ")\n", "\n", "# Self-join to get the previous year's winning party\n", "previous_year = winning_party.view()\n", "\n", "joined = (\n", " winning_party\n", " .join(\n", " previous_year, [\"state_po\",winning_party['year'] == previous_year['year'] + 4],\n", " how='left'\n", " )\n", " .rename(key = \"state_po\")\n", " # .mutate(key = _.county_name + ibis.literal(\" COUNTY-\") + _.state_id)\n", " .select(\"year\",\"key\",\"current_party\",previous_year['current_party'].name('previous_party'))\n", ")\n", "\n", "state_parties = joined.filter(_.year >2000).order_by(\"year\")\n", "\n", "print(state_parties.execute())" ] }, { "cell_type": "code", "execution_count": 31, "id": "2c03920e-76da-4034-8eaf-1e80a56f5b0d", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "3574af6546ff4cd1949e27c63cd15cd7", "version_major": 2, "version_minor": 0 }, "text/plain": [ "states_parties.parquet: 0%| | 0.00/2.36M [00:00