cassiebuhler commited on
Commit
02c6b99
1 Parent(s): 4501539

added political parties

Browse files
app.py CHANGED
@@ -63,7 +63,8 @@ year = st.slider("Select a year", min_value=1988, max_value=2024, value=2022, st
63
  import leafmap.maplibregl as leafmap
64
  m = leafmap.Map(style="positron", center=(-100, 40), zoom=3)
65
 
66
- url = "https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/landvote.pmtiles"
 
67
 
68
  dark_orange = 'rgba(171, 86, 1, 1)' # dark orange - min value
69
  light_orange = 'rgba(243, 211, 177, 1)' # light orange
@@ -216,7 +217,7 @@ m.add_pmtiles(
216
  url,
217
  style=style_counties,
218
  visible=True,
219
- opacity=0.6,
220
  tooltip=True,
221
  fit_bounds=False
222
  )
 
63
  import leafmap.maplibregl as leafmap
64
  m = leafmap.Map(style="positron", center=(-100, 40), zoom=3)
65
 
66
+ url = "https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/landvote_polygons.pmtiles"
67
+ # url = "https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/votes.pmtiles"
68
 
69
  dark_orange = 'rgba(171, 86, 1, 1)' # dark orange - min value
70
  light_orange = 'rgba(243, 211, 177, 1)' # light orange
 
217
  url,
218
  style=style_counties,
219
  visible=True,
220
+ opacity=1.0,
221
  tooltip=True,
222
  fit_bounds=False
223
  )
get_city_polygons.ipynb ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "4bfd881f-5889-4a26-b003-e2611708ad2a",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Getting city polygons from Overture Maps"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "id": "3e5756d2-382b-49e9-93b5-2ecf6d0eb812",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import duckdb\n",
19
+ "\n",
20
+ "con = duckdb.connect()\n",
21
+ "\n",
22
+ "con.execute(\"SET s3_region='us-west-2';\")\n",
23
+ "con.execute(\"LOAD spatial;\")\n",
24
+ "con.execute(\"LOAD httpfs;\")\n",
25
+ "\n",
26
+ "# getting polygons of localities in the US.\n",
27
+ "query = \"\"\"\n",
28
+ " COPY (\n",
29
+ " SELECT * \n",
30
+ " FROM read_parquet('s3://overturemaps-us-west-2/release/2024-09-18.0/theme=divisions/*/*')\n",
31
+ " WHERE country = 'US' AND subtype IN ('locality')\n",
32
+ " ) TO 'us_localities_raw.parquet' (FORMAT 'parquet');\n",
33
+ "\"\"\"\n",
34
+ "con.execute(query)\n",
35
+ "\n"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "id": "25f62dd7-5539-438b-8f0a-1d85c9bc78ab",
42
+ "metadata": {},
43
+ "outputs": [],
44
+ "source": [
45
+ "import ibis\n",
46
+ "from ibis import _\n",
47
+ "\n",
48
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
49
+ "\n",
50
+ "df = (conn\n",
51
+ " .read_parquet(\"us_localities_raw.parquet\")\n",
52
+ " .cast({\"geometry\": \"geometry\"})\n",
53
+ " .filter(_[\"type\"] == \"division\")\n",
54
+ " .mutate(municipal = _.names[\"primary\"])\n",
55
+ " .mutate(state = _.region.replace(\"US-\", \"\")) \n",
56
+ " .mutate(county = _.hierarchies[0][2]['name'] ) #extract county from nested dictionary \n",
57
+ " .mutate(key_long = _.municipal + ibis.literal('-') + _.county + ibis.literal('-') + _.state)\n",
58
+ " .select(\"key_long\",\"municipal\", \"county\",\"state\" ,\"geometry\")\n",
59
+ " )\n",
60
+ "\n",
61
+ "\n",
62
+ "## Dropping rows with same locality and state, with differing counties - landvote doesn't specify county for cities so we are dropping these to avoid duplicates. \n",
63
+ "county_count = (\n",
64
+ " df.group_by([\"municipal\", \"state\"])\n",
65
+ " .aggregate(county_count=_.county.nunique()) # Count unique counties for each group\n",
66
+ ") \n",
67
+ "valid_names = county_count.filter(county_count.county_count == 1).select(\"municipal\", \"state\")\n",
68
+ "df_filtered = df.join(valid_names, [\"municipal\", \"state\"], how=\"inner\")\n",
69
+ "\n",
70
+ "\n",
71
+ "# if two records have the same name but different geometries, only keep the first one. \n",
72
+ "df_unique = (\n",
73
+ " df_filtered.group_by(\"key_long\")\n",
74
+ " .aggregate(\n",
75
+ " municipal=df_filtered.municipal.first(),\n",
76
+ " county=df_filtered.county.first(),\n",
77
+ " state=df_filtered.state.first(),\n",
78
+ " geometry=df_filtered.geometry.first()\n",
79
+ " )\n",
80
+ " .mutate(geometry = _.geometry.buffer(.07))\n",
81
+ " .select(\"state\",\"county\",\"municipal\",\"geometry\")\n",
82
+ ")\n",
83
+ "\n",
84
+ "df_unique.execute().to_parquet(\"us_localities.parquet\")\n"
85
+ ]
86
+ },
87
+ {
88
+ "cell_type": "markdown",
89
+ "id": "0fce9fe8-584f-4260-9217-3aade9e71eef",
90
+ "metadata": {},
91
+ "source": [
92
+ "# Uploading city polygons to Hugging Face"
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "execution_count": null,
98
+ "id": "ca02743f-0bf4-46e5-91fd-a5fe37519ecd",
99
+ "metadata": {},
100
+ "outputs": [],
101
+ "source": [
102
+ "import subprocess\n",
103
+ "import os\n",
104
+ "from huggingface_hub import HfApi, login\n",
105
+ "import streamlit as st\n",
106
+ "\n",
107
+ "login(st.secrets[\"HF_TOKEN\"])\n",
108
+ "api = HfApi()\n",
109
+ "\n",
110
+ "def hf_upload(file, repo_id):\n",
111
+ " info = api.upload_file(\n",
112
+ " path_or_fileobj=file,\n",
113
+ " path_in_repo=file,\n",
114
+ " repo_id=repo_id,\n",
115
+ " repo_type=\"dataset\",\n",
116
+ " )\n",
117
+ "hf_upload(\"us_localities.parquet\", \"boettiger-lab/landvote\")\n",
118
+ "\n"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": null,
124
+ "id": "1ddadf0a-5b45-487f-a664-0c0696f75579",
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": []
128
+ }
129
+ ],
130
+ "metadata": {
131
+ "kernelspec": {
132
+ "display_name": "Python 3 (ipykernel)",
133
+ "language": "python",
134
+ "name": "python3"
135
+ },
136
+ "language_info": {
137
+ "codemirror_mode": {
138
+ "name": "ipython",
139
+ "version": 3
140
+ },
141
+ "file_extension": ".py",
142
+ "mimetype": "text/x-python",
143
+ "name": "python",
144
+ "nbconvert_exporter": "python",
145
+ "pygments_lexer": "ipython3",
146
+ "version": "3.11.10"
147
+ }
148
+ },
149
+ "nbformat": 4,
150
+ "nbformat_minor": 5
151
+ }
static-maps.ipynb → get_landvote.ipynb RENAMED
@@ -1,5 +1,13 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
@@ -10,7 +18,7 @@
10
  "import ibis\n",
11
  "from ibis import _\n",
12
  "import streamlit as st\n",
13
- "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n",
14
  "\n",
15
  "conn = ibis.duckdb.connect(extensions=[\"spatial\"])"
16
  ]
@@ -20,7 +28,7 @@
20
  "id": "943e4127-4af5-42c9-b1e2-48af2d888c24",
21
  "metadata": {},
22
  "source": [
23
- "# Landvote data"
24
  ]
25
  },
26
  {
@@ -78,16 +86,19 @@
78
  "outputs": [],
79
  "source": [
80
  "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n",
81
- "states = conn.read_parquet(state_boundaries).rename(state_id = \"STUSPS\", state = \"NAME\")\n",
82
- "\n",
83
  "landvote_states = landvote_df.filter(_.jurisdiction == \"State\")\n",
84
  "\n",
85
- "df_state = (states\n",
86
- " .select('state_id','geometry')\n",
87
- " .cast({\"geometry\": \"geometry\"})\n",
88
- " .right_join(landvote_states, landvote_states['state'] == states['state_id'] )\n",
 
 
 
 
 
89
  " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n",
90
- " )"
91
  ]
92
  },
93
  {
@@ -108,13 +119,14 @@
108
  "landvote_county = landvote_df.filter(_.jurisdiction == \"County\")\n",
109
  "\n",
110
  "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n",
111
- "county = conn.read_parquet(county_boundaries).rename(county = \"NAMELSAD\", state = \"STATE_NAME\")\n",
112
  "\n",
113
- "\n",
114
- "df_county = (county\n",
115
- " .select('state','county','geometry')\n",
 
 
116
  " .cast({\"geometry\": \"geometry\"})\n",
117
- " .right_join(landvote_county, \"county\" )\n",
118
  " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n",
119
  " )"
120
  ]
@@ -134,20 +146,15 @@
134
  "metadata": {},
135
  "outputs": [],
136
  "source": [
137
- "landvote_local = landvote_df.filter(_.jurisdiction == \"Municipal\")\n",
138
- "\n",
139
- "local = (conn\n",
140
- " .read_parquet(\"us_localities.parquet\")\n",
141
- " .rename(municipal = \"name\",state = \"state_id\")\n",
142
- " )\n",
143
  "\n",
144
- "df_local = (local\n",
 
145
  " .select('state','county','municipal','geometry')\n",
146
  " .cast({\"geometry\": \"geometry\"})\n",
147
- " .mutate(geometry = _.geometry.buffer(.07))\n",
148
- " .right_join(landvote_local, [\"municipal\",\"state\"])\n",
149
  " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n",
150
- " )"
151
  ]
152
  },
153
  {
@@ -214,14 +221,17 @@
214
  "metadata": {},
215
  "outputs": [],
216
  "source": [
217
- "df_state.execute().to_file(\"vote_state.geojson\")\n",
218
- "df_county.execute().to_file(\"vote_county.geojson\")\n",
219
- "df_local.execute().to_file(\"vote_municipal.geojson\")\n",
 
 
220
  "\n",
221
- "# hf_upload(\"vote.geojson\", \"boettiger-lab/landvote\")\n",
 
222
  "\n",
223
- "generate_pmtiles(\"vote_state.geojson\", \"vote_county.geojson\",\"vote_municipal.geojson\", \"landvote.pmtiles\")\n",
224
- "hf_upload(\"landvote.pmtiles\", \"boettiger-lab/landvote\")\n"
225
  ]
226
  },
227
  {
@@ -239,11 +249,19 @@
239
  "metadata": {},
240
  "outputs": [],
241
  "source": [
242
- "df_temp = df_county.union(df_local)\n",
243
  "df = df_temp.union(df_state)\n",
244
- "df.execute().to_parquet(\"landvote.parquet\")\n",
245
- "hf_upload(\"landvote.parquet\", \"boettiger-lab/landvote\")\n"
246
  ]
 
 
 
 
 
 
 
 
247
  }
248
  ],
249
  "metadata": {
@@ -262,7 +280,7 @@
262
  "name": "python",
263
  "nbconvert_exporter": "python",
264
  "pygments_lexer": "ipython3",
265
- "version": "3.10.12"
266
  }
267
  },
268
  "nbformat": 4,
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "fa25e9ed-1dd1-4ca1-9898-9606c94d449f",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Merging state, county, and city polygons with landvote data\n"
9
+ ]
10
+ },
11
  {
12
  "cell_type": "code",
13
  "execution_count": null,
 
18
  "import ibis\n",
19
  "from ibis import _\n",
20
  "import streamlit as st\n",
21
+ "import ibis.expr.datatypes as dt \n",
22
  "\n",
23
  "conn = ibis.duckdb.connect(extensions=[\"spatial\"])"
24
  ]
 
28
  "id": "943e4127-4af5-42c9-b1e2-48af2d888c24",
29
  "metadata": {},
30
  "source": [
31
+ "# Landvote data\n"
32
  ]
33
  },
34
  {
 
86
  "outputs": [],
87
  "source": [
88
  "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n",
 
 
89
  "landvote_states = landvote_df.filter(_.jurisdiction == \"State\")\n",
90
  "\n",
91
+ "state = (conn\n",
92
+ " .read_parquet(state_boundaries)\n",
93
+ " .rename(state = \"STUSPS\", state_name = \"NAME\")\n",
94
+ " .select('state','geometry','state_name')\n",
95
+ " .cast({\"geometry\": \"geometry\"})\n",
96
+ " )\n",
97
+ "\n",
98
+ "df_state = (state\n",
99
+ " .join(landvote_states, \"state\",how = \"inner\")\n",
100
  " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n",
101
+ " )"
102
  ]
103
  },
104
  {
 
119
  "landvote_county = landvote_df.filter(_.jurisdiction == \"County\")\n",
120
  "\n",
121
  "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n",
 
122
  "\n",
123
+ "df_county = (conn\n",
124
+ " .read_parquet(county_boundaries)\n",
125
+ " .rename(county = \"NAMELSAD\", state_name = \"STATE_NAME\")\n",
126
+ " .join(state, \"state_name\", how = \"inner\")\n",
127
+ " .select('state','state_name','county','geometry')\n",
128
  " .cast({\"geometry\": \"geometry\"})\n",
129
+ " .join(landvote_county, [\"county\",\"state\"],how = \"inner\")\n",
130
  " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n",
131
  " )"
132
  ]
 
146
  "metadata": {},
147
  "outputs": [],
148
  "source": [
149
+ "landvote_city = landvote_df.filter(_.jurisdiction == \"Municipal\")\n",
 
 
 
 
 
150
  "\n",
151
+ "df_city = (conn\n",
152
+ " .read_parquet(\"us_localities.parquet\")\n",
153
  " .select('state','county','municipal','geometry')\n",
154
  " .cast({\"geometry\": \"geometry\"})\n",
155
+ " .join(landvote_city, [\"municipal\",\"state\"], how = \"inner\")\n",
 
156
  " .select('state','county','municipal','jurisdiction','geometry','Status', 'yes', 'year', 'amount', 'log_amount', )\n",
157
+ " )"
158
  ]
159
  },
160
  {
 
221
  "metadata": {},
222
  "outputs": [],
223
  "source": [
224
+ "gdf_state = df_state.execute().set_crs(\"EPSG:4326\")\n",
225
+ "gdf_state.to_file(\"landvote_state.geojson\")\n",
226
+ "\n",
227
+ "gdf_county = df_county.execute().set_crs(\"EPSG:4326\")\n",
228
+ "gdf_county.to_file(\"landvote_county.geojson\")\n",
229
  "\n",
230
+ "gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n",
231
+ "gdf_city.to_file(\"landvote_municipal.geojson\")\n",
232
  "\n",
233
+ "generate_pmtiles(\"landvote_state.geojson\", \"landvote_county.geojson\",\"landvote_municipal.geojson\", \"landvote_polygons.pmtiles\")\n",
234
+ "hf_upload(\"landvote_polygons.pmtiles\", \"boettiger-lab/landvote\")\n"
235
  ]
236
  },
237
  {
 
249
  "metadata": {},
250
  "outputs": [],
251
  "source": [
252
+ "df_temp = df_county.union(df_city)\n",
253
  "df = df_temp.union(df_state)\n",
254
+ "df.execute().set_crs(\"EPSG:4326\").to_parquet(\"landvote_polygons.parquet\")\n",
255
+ "hf_upload(\"landvote_polygons.parquet\", \"boettiger-lab/landvote\")\n"
256
  ]
257
+ },
258
+ {
259
+ "cell_type": "code",
260
+ "execution_count": null,
261
+ "id": "1f1e18bd-dce5-4cd4-b25a-371674340348",
262
+ "metadata": {},
263
+ "outputs": [],
264
+ "source": []
265
  }
266
  ],
267
  "metadata": {
 
280
  "name": "python",
281
  "nbconvert_exporter": "python",
282
  "pygments_lexer": "ipython3",
283
+ "version": "3.11.10"
284
  }
285
  },
286
  "nbformat": 4,
get_party.ipynb CHANGED
@@ -1,8 +1,16 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
- "execution_count": 16,
6
  "id": "d179ded1-6235-47ed-bbfb-6d72468188d5",
7
  "metadata": {},
8
  "outputs": [],
@@ -12,226 +20,105 @@
12
  "import streamlit as st\n",
13
  "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n",
14
  "\n",
15
- "\n",
16
- "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
17
- "\n",
18
- "# pres = conn.read_csv(\"sources-president.csv\")\n",
19
- "county = conn.read_csv(\"countypres_2000-2020.csv\")\n",
20
- "votes = conn.read_parquet(\"vote.parquet\")"
21
  ]
22
  },
23
  {
24
  "cell_type": "markdown",
25
- "id": "170ba045-8848-4a99-a4f6-68bde22428af",
26
  "metadata": {},
27
  "source": [
28
- "# Getting party affiliations for counties"
29
  ]
30
  },
31
  {
32
  "cell_type": "code",
33
  "execution_count": null,
34
- "id": "ab644102-c725-4cf4-915c-8550a0a74c32",
35
  "metadata": {},
36
  "outputs": [],
37
  "source": [
38
- "filtered = county.filter((_.mode == \"TOTAL\") & (_.totalvotes > 0))\n",
39
- "\n",
40
- "# Find the winning party for each year, state, and county\n",
41
- "most_votes = (\n",
42
- " filtered\n",
43
- " .group_by(['year', 'state_po', 'county_name', 'party'])\n",
44
- " .aggregate(winning_votes=_.candidatevotes.sum())\n",
45
- ")\n",
46
- "\n",
47
- "# For each year, state, and county, select the party with the highest total votes\n",
48
- "winning_party = (\n",
49
- " most_votes\n",
50
- " .group_by('year', 'state_po', 'county_name')\n",
51
- " .aggregate(\n",
52
- " max_votes=_.winning_votes.max(), # Max votes in this group\n",
53
- " )\n",
54
- " .join(\n",
55
- " most_votes,\n",
56
- " [\"year\",\"state_po\",\"county_name\",most_votes['winning_votes'] == _.max_votes],\n",
57
- " how='inner'\n",
58
- " )\n",
59
- " .select(\"year\",\"state_po\",\"county_name\",most_votes['party'].name('current_party')\n",
60
- " )\n",
61
- ")\n",
62
- "\n",
63
- "# Self-join to get the previous year's winning party\n",
64
- "previous_year = winning_party.view()\n",
65
- "\n",
66
- "joined = (\n",
67
- " winning_party\n",
68
- " .join(\n",
69
- " previous_year, [\"county_name\",\"state_po\",winning_party['year'] == previous_year['year'] + 4],\n",
70
- " how='left'\n",
71
- " )\n",
72
- " .rename(state_id = \"state_po\")\n",
73
- " .mutate(key = _.county_name + ibis.literal(\" COUNTY-\") + _.state_id)\n",
74
- " .select(\"year\",\"key\",\"current_party\",previous_year['current_party'].name('previous_party'))\n",
75
- ")\n",
76
- "\n",
77
- "county_parties = joined.filter(_.year >2000).order_by(\"year\")\n",
78
- "\n",
79
- "print(county_parties.execute())"
80
  ]
81
  },
82
  {
83
  "cell_type": "code",
84
  "execution_count": null,
85
- "id": "ce0d80bf-3b78-4aa9-8048-5cc0dbf970d9",
86
  "metadata": {},
87
  "outputs": [],
88
  "source": [
89
- "df = (votes\n",
90
- " .mutate(key = _.key.upper())\n",
91
- " .filter(_.jurisdiction == \"Municipal\")\n",
92
- " .join(county_parties, [\"key\",\"year\"],how='inner'\n",
93
- " )\n",
94
- " .cast({\"geometry\": \"geometry\"})\n",
95
- ")"
 
 
 
 
 
 
 
96
  ]
97
  },
98
  {
99
- "cell_type": "code",
100
- "execution_count": 30,
101
- "id": "87bef5e2-a40a-4aff-aa27-e7d49ec68aac",
102
  "metadata": {},
103
- "outputs": [
104
- {
105
- "name": "stdout",
106
- "output_type": "stream",
107
- "text": [
108
- "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
109
- "Token is valid (permission: write).\n",
110
- "Your token has been saved to /home/rstudio/.cache/huggingface/token\n",
111
- "Login successful\n"
112
- ]
113
- }
114
- ],
115
  "source": [
116
- "import subprocess\n",
117
- "import os\n",
118
- "from huggingface_hub import HfApi, login\n",
119
- "import streamlit as st\n",
120
- "\n",
121
- "login(st.secrets[\"HF_TOKEN\"])\n",
122
- "# api = HfApi(add_to_git_credential=False)\n",
123
- "api = HfApi()\n",
124
- "\n",
125
- "def hf_upload(file, repo_id):\n",
126
- " info = api.upload_file(\n",
127
- " path_or_fileobj=file,\n",
128
- " path_in_repo=file,\n",
129
- " repo_id=repo_id,\n",
130
- " repo_type=\"dataset\",\n",
131
- " )\n",
132
- "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
133
- " # Ensure Tippecanoe is installed\n",
134
- " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
135
- " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
136
- "\n",
137
- " # Construct the Tippecanoe command\n",
138
- " command = [\n",
139
- " \"tippecanoe\",\n",
140
- " \"-o\", output_file,\n",
141
- " \"-zg\",\n",
142
- " \"--extend-zooms-if-still-dropping\",\n",
143
- " \"--force\",\n",
144
- " \"--projection\", \"EPSG:4326\", \n",
145
- " input_file\n",
146
- " ]\n",
147
- "\n",
148
- " # Run Tippecanoe\n",
149
- " try:\n",
150
- " subprocess.run(command, check=True)\n",
151
- " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
152
- " except subprocess.CalledProcessError as e:\n",
153
- " print(f\"Error running Tippecanoe: {e}\")\n",
154
- "\n"
155
  ]
156
  },
157
  {
158
  "cell_type": "code",
159
  "execution_count": null,
160
- "id": "b086e76c-4285-4036-8033-e4e45cb6966b",
161
  "metadata": {},
162
  "outputs": [],
163
  "source": [
164
- "gdf= df.execute()\n",
165
- "gdf = gdf.set_crs(\"EPSG:4326\")\n",
166
- "\n",
167
- "# gdf.to_parquet(\"county_parties.parquet\")\n",
168
- "# hf_upload(\"county_parties.parquet\", \"boettiger-lab/landvote\")\n",
169
- "\n",
170
- "# gdf.to_file(\"county_parties.geojson\")\n",
171
- "# hf_upload(\"county_parties.geojson\", \"boettiger-lab/landvote\")\n",
172
- "\n",
173
- "# generate_pmtiles(\"county_parties.geojson\", \"county_parties.pmtiles\")\n",
174
- "# hf_upload(\"county_parties.pmtiles\", \"boettiger-lab/landvote\")\n"
175
- ]
176
- },
177
- {
178
- "cell_type": "markdown",
179
- "id": "6193c4b9-0183-4aae-9a25-899a748fd65e",
180
- "metadata": {},
181
- "source": [
182
- "# Checking map"
183
  ]
184
  },
185
  {
186
  "cell_type": "code",
187
  "execution_count": null,
188
- "id": "c2ae8ada-c73e-4b2e-938e-70a29584f199",
189
  "metadata": {},
190
  "outputs": [],
191
  "source": [
192
- "import leafmap.maplibregl as leafmap\n",
193
- "m = leafmap.Map(style=\"positron\")\n",
194
- "\n",
195
- "\n",
196
- "url_states = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/county_parties.pmtiles\"\n",
197
- "\n",
198
- "outcome = [\n",
199
- " 'match',\n",
200
- " ['get', 'Status'], \n",
201
- " \"Pass\", '#2E865F',\n",
202
- " \"Fail\", '#FF3300', \n",
203
- " '#ccc'\n",
204
- " ]\n",
205
- "paint_states = {\"fill-color\": outcome, \n",
206
- " # \"fill-opacity\": 0.2,\n",
207
- " }\n",
208
- "style_states = {\n",
209
- " \"layers\": [\n",
210
- " {\n",
211
- " \"id\": \"county_parties\",\n",
212
- " \"source\": \"county_parties\",\n",
213
- " \"source-layer\": \"county_parties\",\n",
214
- " \"type\": \"fill\",\n",
215
- " \"filter\": [\n",
216
- " \"==\",\n",
217
- " [\"get\", \"year\"],\n",
218
- " 2008,\n",
219
- " ], # only show buildings with height info\n",
220
- " \"paint\": paint_states\n",
221
- " },\n",
222
- " ],\n",
223
- "}\n",
224
- "\n",
225
- "m.add_pmtiles(\n",
226
- " url_states,\n",
227
- " style=style_states,\n",
228
- " visible=True,\n",
229
- " opacity=0.4,\n",
230
- " tooltip=True,\n",
231
- " fit_bounds=False,\n",
232
- ")\n",
233
- "\n",
234
- "m\n"
235
  ]
236
  },
237
  {
@@ -249,227 +136,124 @@
249
  "metadata": {},
250
  "outputs": [],
251
  "source": [
252
- "\n",
253
  "localities_boundaries = \"us_localities.parquet\"\n",
254
- "locality = conn.read_parquet(localities_boundaries)\n",
255
- "landvote = conn.read_csv(\"landvote.csv\")\n",
256
- "\n",
257
- "#needed to redo this, since I didn't save county in \"votes.parquet\". \n",
258
- "vote_local = (landvote\n",
259
- " .filter(_[\"Jurisdiction Type\"] == \"Municipal\")\n",
260
- " .rename(city = \"Jurisdiction Name\", state_id = \"State\")\n",
261
- " .mutate(key = _.city + ibis.literal('-') + _.state_id)\n",
262
- " .rename(amount = 'Conservation Funds at Stake', yes = '% Yes')\n",
263
- " .mutate(amount_n=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
264
- " .mutate(log_amount=_.amount_n.log())\n",
265
- " .mutate(year=_['Date'].year().cast('int32'))\n",
266
- " .mutate(\n",
267
- " yes=ibis.case()\n",
268
- " .when(_.yes.isin(['Pass', 'None','Fail']), None) # Handle non-numeric cases\n",
269
- " .when(_.yes.notnull(), (_.yes.replace('%', '').cast('float').round(2).cast(dt.float64)).cast(dt.string) + '%') # Convert valid percentages and add %\n",
270
- " .else_(None) # Default to None for other cases\n",
271
- " .end()\n",
272
- " )\n",
273
- " .mutate(log_amount = _.log_amount.round(4))\n",
274
- " .select('key', 'Status', 'yes', 'year', 'amount', 'log_amount', )\n",
275
- " )\n",
276
- "\n",
277
- "# getting the county parties for each municipal \n",
278
- "df_municipals = (locality \n",
279
- " .mutate(key_municipal = _.name + ibis.literal('-') + _.state_id) \n",
280
- " .mutate(key = (_.county + ibis.literal('-') + _.state_id).upper()) \n",
281
- " .select('key', 'geometry','key_municipal','name')\n",
282
- " .right_join(vote_local, [_.key_municipal == vote_local[\"key\"]])\n",
283
- " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n",
284
- " .cast({\"geometry\": \"geometry\"})\n",
285
- " .mutate(geometry = _.geometry.buffer(.07))\n",
286
- " .join(county_parties, [\"key\",\"year\"],how='inner')\n",
287
- " .rename(county = \"key\")\n",
288
- " .rename(key = \"key_municipal\")\n",
289
- " .select('key','geometry','Status','yes','year','amount','log_amount','jurisdiction','current_party','previous_party')\n",
290
  " )\n",
291
  "\n",
292
- "\n",
293
- "gdf_municipals = df_municipals.execute()\n",
294
- "gdf_municipals = gdf_municipals.set_crs(\"EPSG:4326\")\n",
295
- "gdf_municipals\n",
296
- "\n",
297
- "\n",
298
- "# gdf_municipals.to_parquet(\"municipal_parties.parquet\")\n",
299
- "# hf_upload(\"municipal_parties.parquet\", \"boettiger-lab/landvote\")\n",
300
- "\n",
301
- "# gdf_municipals.to_file(\"municipal_parties.geojson\")\n",
302
- "# hf_upload(\"municipal_parties.geojson\", \"boettiger-lab/landvote\")\n",
303
- "\n",
304
- "# generate_pmtiles(\"municipal_parties.geojson\", \"municipal_parties.pmtiles\")\n",
305
- "# hf_upload(\"municipal_parties.pmtiles\", \"boettiger-lab/landvote\")\n"
306
  ]
307
  },
308
  {
309
  "cell_type": "markdown",
310
- "id": "06e24a7e-5f7f-42bc-b515-43082016d496",
311
  "metadata": {},
312
  "source": [
313
- "# Get States"
314
  ]
315
  },
316
  {
317
  "cell_type": "code",
318
- "execution_count": 28,
319
- "id": "217170ca-b732-4875-b4cb-f8a8cd2fc405",
320
  "metadata": {},
321
  "outputs": [],
322
  "source": [
323
- "states = (conn\n",
324
- " .read_csv(\"1976-2020-president.csv\")\n",
325
- " .filter(_. year >=2000)\n",
326
- " )\n",
327
- "# states.execute()"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
328
  ]
329
  },
330
  {
331
  "cell_type": "code",
332
- "execution_count": 18,
333
- "id": "322b9a85-bdf9-45f9-9b19-695cc1b996e8",
334
  "metadata": {},
335
- "outputs": [
336
- {
337
- "name": "stdout",
338
- "output_type": "stream",
339
- "text": [
340
- " year key current_party previous_party\n",
341
- "0 2004 HI DEMOCRAT DEMOCRAT\n",
342
- "1 2004 ME DEMOCRAT DEMOCRAT\n",
343
- "2 2004 NJ DEMOCRAT DEMOCRAT\n",
344
- "3 2004 NM REPUBLICAN DEMOCRAT\n",
345
- "4 2004 ND REPUBLICAN REPUBLICAN\n",
346
- ".. ... .. ... ...\n",
347
- "250 2020 VT DEMOCRAT DEMOCRAT\n",
348
- "251 2020 AL REPUBLICAN REPUBLICAN\n",
349
- "252 2020 IA REPUBLICAN REPUBLICAN\n",
350
- "253 2020 SD REPUBLICAN REPUBLICAN\n",
351
- "254 2020 GA DEMOCRAT REPUBLICAN\n",
352
- "\n",
353
- "[255 rows x 4 columns]\n"
354
- ]
355
- }
356
- ],
357
  "source": [
358
- "# filtered = county.filter((_.mode == \"TOTAL\") & (_.totalvotes > 0))\n",
 
359
  "\n",
360
- "# Find the winning party for each year, state, and county\n",
361
- "most_votes= (\n",
362
- " states\n",
363
- " .group_by(['year', 'state_po', 'party_simplified'])\n",
364
- " .aggregate(winning_votes=_.candidatevotes.sum())\n",
365
- ")\n",
366
  "\n",
367
- "# For each year, state, and county, select the party with the highest total votes\n",
368
- "winning_party = (\n",
369
- " most_votes\n",
370
- " .group_by('year', 'state_po')\n",
371
- " .aggregate(\n",
372
- " max_votes=_.winning_votes.max(), # Max votes in this group\n",
373
- " )\n",
374
- " .join(\n",
375
- " most_votes,\n",
376
- " [\"year\",\"state_po\",most_votes['winning_votes'] == _.max_votes],\n",
377
- " how='inner'\n",
378
- " )\n",
379
- " .select(\"year\",\"state_po\",most_votes['party_simplified'].name('current_party')\n",
380
- " )\n",
381
- ")\n",
382
  "\n",
383
- "# Self-join to get the previous year's winning party\n",
384
- "previous_year = winning_party.view()\n",
385
- "\n",
386
- "joined = (\n",
387
- " winning_party\n",
388
- " .join(\n",
389
- " previous_year, [\"state_po\",winning_party['year'] == previous_year['year'] + 4],\n",
390
- " how='left'\n",
391
- " )\n",
392
- " .rename(key = \"state_po\")\n",
393
- " # .mutate(key = _.county_name + ibis.literal(\" COUNTY-\") + _.state_id)\n",
394
- " .select(\"year\",\"key\",\"current_party\",previous_year['current_party'].name('previous_party'))\n",
395
- ")\n",
396
- "\n",
397
- "state_parties = joined.filter(_.year >2000).order_by(\"year\")\n",
398
- "\n",
399
- "print(state_parties.execute())"
400
  ]
401
  },
402
  {
403
  "cell_type": "code",
404
- "execution_count": 31,
405
- "id": "2c03920e-76da-4034-8eaf-1e80a56f5b0d",
406
  "metadata": {},
407
- "outputs": [
408
- {
409
- "data": {
410
- "application/vnd.jupyter.widget-view+json": {
411
- "model_id": "3574af6546ff4cd1949e27c63cd15cd7",
412
- "version_major": 2,
413
- "version_minor": 0
414
- },
415
- "text/plain": [
416
- "states_parties.parquet: 0%| | 0.00/2.36M [00:00<?, ?B/s]"
417
- ]
418
- },
419
- "metadata": {},
420
- "output_type": "display_data"
421
- },
422
- {
423
- "name": "stderr",
424
- "output_type": "stream",
425
- "text": [
426
- "For layer 0, using name \"states_parties\"\n",
427
- "12 features, 833591 bytes of geometry and attributes, 542 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
428
- "Choosing a maxzoom of -z0 for features typically 7514540 feet (2290432 meters) apart, and at least 2073685 feet (632060 meters) apart\n",
429
- "Choosing a maxzoom of -z10 for resolution of about 376 feet (114 meters) within features\n",
430
- " 99.9% 10/271/383 \n",
431
- " 100.0% 10/187/380 \r"
432
- ]
433
- },
434
- {
435
- "name": "stdout",
436
- "output_type": "stream",
437
- "text": [
438
- "Successfully generated PMTiles file: states_parties.pmtiles\n"
439
- ]
440
- }
441
- ],
442
  "source": [
443
- "# state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n",
444
- "states = conn.read_parquet(\"vote_states.parquet\")\n",
445
- "\n",
446
- "df_states = (states\n",
447
- " .mutate(key = _.key.upper())\n",
448
- " # .filter(_.jurisdiction == \"Municipal\")\n",
449
- " .join(state_parties, [\"key\",\"year\"],how='inner'\n",
450
- " )\n",
451
- " .cast({\"geometry\": \"geometry\"})\n",
452
- ")\n",
453
- "\n",
454
- "gdf_states = df_states.execute()\n",
455
- "gdf_states = gdf_states.set_crs(\"EPSG:4326\")\n",
456
- "gdf_states\n",
457
- "\n",
458
- "\n",
459
- "gdf_states.to_parquet(\"states_parties.parquet\")\n",
460
- "hf_upload(\"states_parties.parquet\", \"boettiger-lab/landvote\")\n",
461
- "\n",
462
- "gdf_states.to_file(\"states_parties.geojson\")\n",
463
- "hf_upload(\"states_parties.geojson\", \"boettiger-lab/landvote\")\n",
464
- "\n",
465
- "generate_pmtiles(\"states_parties.geojson\", \"states_parties.pmtiles\")\n",
466
- "hf_upload(\"states_parties.pmtiles\", \"boettiger-lab/landvote\")\n"
467
  ]
468
  },
469
  {
470
  "cell_type": "code",
471
  "execution_count": null,
472
- "id": "dcf5d049-68ae-4d73-be77-72d985c9ed1c",
473
  "metadata": {},
474
  "outputs": [],
475
  "source": []
@@ -491,7 +275,7 @@
491
  "name": "python",
492
  "nbconvert_exporter": "python",
493
  "pygments_lexer": "ipython3",
494
- "version": "3.10.12"
495
  }
496
  },
497
  "nbformat": 4,
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "f2d27d42-74aa-44cb-8ab6-5a0f856dcca0",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Merging state, county, and city polygons with political parties"
9
+ ]
10
+ },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": null,
14
  "id": "d179ded1-6235-47ed-bbfb-6d72468188d5",
15
  "metadata": {},
16
  "outputs": [],
 
20
  "import streamlit as st\n",
21
  "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n",
22
  "\n",
23
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])"
 
 
 
 
 
24
  ]
25
  },
26
  {
27
  "cell_type": "markdown",
28
+ "id": "b9bc2d50-481b-4f62-a74b-4a576ff89ecd",
29
  "metadata": {},
30
  "source": [
31
+ "# State "
32
  ]
33
  },
34
  {
35
  "cell_type": "code",
36
  "execution_count": null,
37
+ "id": "322b9a85-bdf9-45f9-9b19-695cc1b996e8",
38
  "metadata": {},
39
  "outputs": [],
40
  "source": [
41
+ "#getting party\n",
42
+ "state = (conn\n",
43
+ " .read_csv(\"1976-2020-president.csv\")\n",
44
+ " .filter(_. year >=2000)\n",
45
+ " .rename(state=\"state_po\" , party = \"party_simplified\") # rename columns\n",
46
+ " .group_by([\"year\", \"state\"])\n",
47
+ " .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
48
+ " .select(\"year\", \"state\", \"party\") # select only relevant columns\n",
49
+ " )"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  ]
51
  },
52
  {
53
  "cell_type": "code",
54
  "execution_count": null,
55
+ "id": "6ab49b51-d6fe-47b3-9bed-b23b5ecf7f0e",
56
  "metadata": {},
57
  "outputs": [],
58
  "source": [
59
+ "# merging with state polygons\n",
60
+ "state_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-state-territory.parquet\"\n",
61
+ "\n",
62
+ "df_state = (conn\n",
63
+ " .read_parquet(state_boundaries)\n",
64
+ " .rename(state = \"STUSPS\", state_ = \"NAME\")\n",
65
+ " .select(\"state\",\"geometry\")\n",
66
+ " .join(state,\"state\",how = \"inner\")\n",
67
+ " .mutate(county = None)\n",
68
+ " .mutate(municipal = None)\n",
69
+ " .mutate(jurisdiction = ibis.literal(\"State\"))\n",
70
+ " .cast({\"geometry\": \"geometry\",\"county\":\"string\",\"municipal\": \"string\"})\n",
71
+ " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
72
+ " )"
73
  ]
74
  },
75
  {
76
+ "cell_type": "markdown",
77
+ "id": "170ba045-8848-4a99-a4f6-68bde22428af",
 
78
  "metadata": {},
 
 
 
 
 
 
 
 
 
 
 
 
79
  "source": [
80
+ "# County"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ]
82
  },
83
  {
84
  "cell_type": "code",
85
  "execution_count": null,
86
+ "id": "0231c801-82e2-45be-9ec5-607d5588a3e5",
87
  "metadata": {},
88
  "outputs": [],
89
  "source": [
90
+ "# getting party\n",
91
+ "county = (conn\n",
92
+ " .read_csv(\"countypres_2000-2020.csv\")\n",
93
+ " .filter((_.mode == \"TOTAL\") & (_.totalvotes > 0)) # filter only final votes\n",
94
+ " .rename(state=\"state_po\", state_name = \"state\") \n",
95
+ " .mutate(county = _.county_name + ibis.literal(\" COUNTY\"))\n",
96
+ " .group_by([\"year\", \"state\", \"county\",\"state_name\"])\n",
97
+ " .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
98
+ " .select(\"year\", \"state\", \"county\", \"party\",\"state_name\") \n",
99
+ ")"
 
 
 
 
 
 
 
 
 
100
  ]
101
  },
102
  {
103
  "cell_type": "code",
104
  "execution_count": null,
105
+ "id": "179b5066-030c-4302-a8cf-8216a753080e",
106
  "metadata": {},
107
  "outputs": [],
108
  "source": [
109
+ "# merging with county polygons\n",
110
+ "county_boundaries = \"https://data.source.coop/cboettig/us-boundaries/us-county.parquet\"\n",
111
+ "df_county = (conn\n",
112
+ " .read_parquet(county_boundaries)\n",
113
+ " .mutate(county = _.NAMELSAD.upper(), state_name = _.STATE_NAME.upper())\n",
114
+ " .select(\"state_name\",\"county\",\"geometry\")\n",
115
+ " .join(county,[\"state_name\",\"county\"],how = \"inner\")\n",
116
+ " .mutate(municipal = None)\n",
117
+ " .cast({\"geometry\": \"geometry\",\"municipal\": \"string\"})\n",
118
+ " .mutate(jurisdiction = ibis.literal(\"County\"))\n",
119
+ " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
120
+ "\n",
121
+ " )"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  ]
123
  },
124
  {
 
136
  "metadata": {},
137
  "outputs": [],
138
  "source": [
 
139
  "localities_boundaries = \"us_localities.parquet\"\n",
140
+ "locality = (conn\n",
141
+ " .read_parquet(localities_boundaries)\n",
142
+ " .mutate(county = _.county.upper())\n",
143
+ " .mutate(municipal = _.municipal.upper())\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  " )\n",
145
  "\n",
146
+ "df_city = (county\n",
147
+ " .drop(\"state_name\")\n",
148
+ " .join(locality, [\"state\",\"county\"], how = \"inner\")\n",
149
+ " .cast({\"geometry\": \"geometry\"})\n",
150
+ " .mutate(jurisdiction = ibis.literal(\"Municipal\"))\n",
151
+ " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
152
+ " )\n"
 
 
 
 
 
 
 
153
  ]
154
  },
155
  {
156
  "cell_type": "markdown",
157
+ "id": "ae5b417d-4266-456d-952c-ac2696234ea0",
158
  "metadata": {},
159
  "source": [
160
+ "# Make PMTiles. Each jurisdiction type is its own layer"
161
  ]
162
  },
163
  {
164
  "cell_type": "code",
165
+ "execution_count": null,
166
+ "id": "12cdf02e-bc22-4a5f-91b9-00a8eee587bd",
167
  "metadata": {},
168
  "outputs": [],
169
  "source": [
170
+ "import subprocess\n",
171
+ "import os\n",
172
+ "from huggingface_hub import HfApi, login\n",
173
+ "import streamlit as st\n",
174
+ "\n",
175
+ "login(st.secrets[\"HF_TOKEN\"])\n",
176
+ "# api = HfApi(add_to_git_credential=False)\n",
177
+ "api = HfApi()\n",
178
+ "\n",
179
+ "def hf_upload(file, repo_id):\n",
180
+ " info = api.upload_file(\n",
181
+ " path_or_fileobj=file,\n",
182
+ " path_in_repo=file,\n",
183
+ " repo_id=repo_id,\n",
184
+ " repo_type=\"dataset\",\n",
185
+ " )\n",
186
+ "def generate_pmtiles(input_file, input_file2, input_file3, output_file, max_zoom=12):\n",
187
+ " # Ensure Tippecanoe is installed\n",
188
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
189
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
190
+ "\n",
191
+ " # Construct the Tippecanoe command\n",
192
+ " command = [\n",
193
+ " \"tippecanoe\",\n",
194
+ " \"-o\", output_file,\n",
195
+ " \"-zg\",\n",
196
+ " \"--extend-zooms-if-still-dropping\",\n",
197
+ " \"--force\",\n",
198
+ " \"--projection\", \"EPSG:4326\", \n",
199
+ " \"-L\",\"state:\"+input_file,\n",
200
+ " \"-L\",\"county:\"+input_file2,\n",
201
+ " \"-L\",\"municipal:\"+input_file3\n",
202
+ " ]\n",
203
+ " # Run Tippecanoe\n",
204
+ " try:\n",
205
+ " subprocess.run(command, check=True)\n",
206
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
207
+ " except subprocess.CalledProcessError as e:\n",
208
+ " print(f\"Error running Tippecanoe: {e}\")\n",
209
+ "\n"
210
  ]
211
  },
212
  {
213
  "cell_type": "code",
214
+ "execution_count": null,
215
+ "id": "24df77fb-c881-4491-b7ca-7f3a3023cee0",
216
  "metadata": {},
217
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  "source": [
219
+ "gdf_state = df_state.execute().set_crs(\"EPSG:4326\")\n",
220
+ "gdf_state.to_file(\"party_state.geojson\")\n",
221
  "\n",
222
+ "gdf_county = df_county.execute().set_crs(\"EPSG:4326\")\n",
223
+ "gdf_county.to_file(\"party_county.geojson\")\n",
 
 
 
 
224
  "\n",
225
+ "gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n",
226
+ "gdf_city.to_file(\"party_municipal.geojson\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  "\n",
228
+ "generate_pmtiles(\"party_state.geojson\", \"party_county.geojson\",\"party_municipal.geojson\", \"landvote_polygons.pmtiles\")\n",
229
+ "hf_upload(\"party_polygons.pmtiles\", \"boettiger-lab/landvote\")\n"
230
+ ]
231
+ },
232
+ {
233
+ "cell_type": "markdown",
234
+ "id": "190169bb-5bfb-4eb7-a135-c5ce0e316595",
235
+ "metadata": {},
236
+ "source": [
237
+ "# Combine all 3 jurisdiction types into a parquet."
 
 
 
 
 
 
 
238
  ]
239
  },
240
  {
241
  "cell_type": "code",
242
+ "execution_count": null,
243
+ "id": "4ad8ad0a-afb3-427f-8b52-ea328e06ce85",
244
  "metadata": {},
245
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  "source": [
247
+ "df_temp = df_county.union(df_city)\n",
248
+ "df = df_temp.union(df_state)\n",
249
+ "df.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons.parquet\")\n",
250
+ "hf_upload(\"party_polygons.parquet\", \"boettiger-lab/landvote\")\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  ]
252
  },
253
  {
254
  "cell_type": "code",
255
  "execution_count": null,
256
+ "id": "3e87fc40-b535-4a44-9230-99f6777e7c48",
257
  "metadata": {},
258
  "outputs": [],
259
  "source": []
 
275
  "name": "python",
276
  "nbconvert_exporter": "python",
277
  "pygments_lexer": "ipython3",
278
+ "version": "3.11.10"
279
  }
280
  },
281
  "nbformat": 4,
preprocess.ipynb CHANGED
@@ -1,84 +1,118 @@
1
  {
2
  "cells": [
 
 
 
 
 
 
 
 
3
  {
4
  "cell_type": "code",
5
  "execution_count": null,
6
- "id": "3e5756d2-382b-49e9-93b5-2ecf6d0eb812",
7
  "metadata": {},
8
  "outputs": [],
9
  "source": [
10
- "import duckdb\n",
 
11
  "\n",
12
- "con = duckdb.connect()\n",
 
13
  "\n",
14
- "con.execute(\"SET s3_region='us-west-2';\")\n",
15
- "con.execute(\"LOAD spatial;\")\n",
16
- "con.execute(\"LOAD httpfs;\")\n",
17
  "\n",
18
- "query = \"\"\"\n",
19
- " COPY (\n",
20
- " SELECT * \n",
21
- " FROM read_parquet('s3://overturemaps-us-west-2/release/2024-09-18.0/theme=divisions/*/*')\n",
22
- " WHERE country = 'US' AND subtype IN ('locality', 'neighborhood')\n",
23
- " ) TO 'us_localities_neighborhoods.parquet' (FORMAT 'parquet');\n",
24
- "\"\"\"\n",
25
- "con.execute(query)\n",
26
  "\n"
27
  ]
28
  },
29
  {
30
  "cell_type": "code",
31
  "execution_count": null,
32
- "id": "25f62dd7-5539-438b-8f0a-1d85c9bc78ab",
33
  "metadata": {},
34
  "outputs": [],
35
  "source": [
36
- "import ibis\n",
37
- "from ibis import _\n",
38
- "\n",
39
- "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
40
- "\n",
41
- "df = (conn\n",
42
- " .read_parquet(\"us_localities_neighborhoods.parquet\")\n",
43
- " .cast({\"geometry\": \"geometry\"})\n",
44
- " .filter(_[\"type\"] == \"division\")\n",
45
- " .filter(_[\"subtype\"] == \"locality\")\n",
46
- " .mutate(name = _.names[\"primary\"])\n",
47
- " .mutate(state_id = _.region.replace(\"US-\", \"\")) \n",
48
- " .mutate(county = _.hierarchies[0][2]['name'] )\n",
49
- " .mutate(key_long = _.name + ibis.literal('-') + _.county + ibis.literal('-') + _.state_id)\n",
50
- " .select(\"key_long\",\"name\", \"county\",\"state_id\" ,\"geometry\")\n",
51
- " )\n",
52
- "\n",
53
- "\n",
54
- "## Dropping rows with same locality and state, with differing counties \n",
55
- "county_count = (\n",
56
- " df.group_by([\"name\", \"state_id\"])\n",
57
- " .aggregate(county_count=_.county.nunique()) # Count unique counties for each group\n",
58
- ") \n",
59
- "valid_names = county_count.filter(county_count.county_count == 1).select(\"name\", \"state_id\")\n",
60
- "df_filtered = df.join(valid_names, [\"name\", \"state_id\"], how=\"inner\")\n",
61
  "\n",
62
- "\n",
63
- "# if two records have the same name but different geometries, only keep the first one.\n",
64
- "df_first = (\n",
65
- " df_filtered.group_by(\"key_long\")\n",
66
- " .aggregate(\n",
67
- " name=df_filtered.name.first(),\n",
68
- " county=df_filtered.county.first(),\n",
69
- " state_id=df_filtered.state_id.first(),\n",
70
- " geometry=df_filtered.geometry.first()\n",
71
- " )\n",
72
- "\n",
73
- ")\n",
74
- "\n",
75
- "df_first.execute().to_parquet(\"us_localities.parquet\")\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  ]
77
  },
78
  {
79
  "cell_type": "code",
80
  "execution_count": null,
81
- "id": "ca02743f-0bf4-46e5-91fd-a5fe37519ecd",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  "metadata": {},
83
  "outputs": [],
84
  "source": [
@@ -98,9 +132,71 @@
98
  " repo_id=repo_id,\n",
99
  " repo_type=\"dataset\",\n",
100
  " )\n",
101
- "hf_upload(\"us_localities.parquet\", \"boettiger-lab/landvote\")\n",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  "\n"
103
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  }
105
  ],
106
  "metadata": {
 
1
  {
2
  "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "9fe02137-ba1c-4ced-9909-b5b7c39ed6d4",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Merging state/county/city polygons with party affiliation and landvote data"
9
+ ]
10
+ },
11
  {
12
  "cell_type": "code",
13
  "execution_count": null,
14
+ "id": "6452373c-d10f-472c-9174-cd05a3363587",
15
  "metadata": {},
16
  "outputs": [],
17
  "source": [
18
+ "import ibis\n",
19
+ "from ibis import _\n",
20
  "\n",
21
+ "import streamlit as st\n",
22
+ "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n",
23
  "\n",
24
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
 
 
25
  "\n",
26
+ "landvote_url = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/landvote_polygons.parquet\"\n",
27
+ "party_url = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/party_polygons.parquet\"\n",
 
 
 
 
 
 
28
  "\n"
29
  ]
30
  },
31
  {
32
  "cell_type": "code",
33
  "execution_count": null,
34
+ "id": "371ef7f4-95b9-49f0-80ba-48807b98b052",
35
  "metadata": {},
36
  "outputs": [],
37
  "source": [
38
+ "landvote = (conn\n",
39
+ " .read_parquet(landvote_url)\n",
40
+ " .cast({\"geometry\": \"geometry\"})\n",
41
+ " .mutate(county = _.county.upper())\n",
42
+ " .mutate(municipal = _.municipal.upper())\n",
43
+ " .mutate(elect_year = _.year - _.year % 4) # get most recent election year \n",
44
+ " .cast({\"municipal\": \"string\",\"county\":\"string\"})\n",
45
+ " .mutate(municipal=ibis.case()\n",
46
+ " .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n",
47
+ " .else_(_.municipal) \n",
48
+ " .end()\n",
49
+ " )\n",
50
+ " .mutate(county=ibis.case()\n",
51
+ " .when(_.jurisdiction.isin(['State']),ibis.literal(\"-\")) \n",
52
+ " .else_(_.county) \n",
53
+ " .end()\n",
54
+ " )\n",
55
+ " )\n",
 
 
 
 
 
 
 
56
  "\n",
57
+ "party = (conn\n",
58
+ " .read_parquet(party_url)\n",
59
+ " .cast({\"geometry\": \"geometry\"})\n",
60
+ " .mutate(municipal=ibis.case()\n",
61
+ " .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n",
62
+ " .else_(_.municipal) \n",
63
+ " .end()\n",
64
+ " )\n",
65
+ " .mutate(county=ibis.case()\n",
66
+ " .when(_.jurisdiction.isin(['State']), ibis.literal(\"-\")) \n",
67
+ " .else_(_.county) \n",
68
+ " .end()\n",
69
+ " )\n",
70
+ " )"
71
+ ]
72
+ },
73
+ {
74
+ "cell_type": "code",
75
+ "execution_count": null,
76
+ "id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
77
+ "metadata": {},
78
+ "outputs": [],
79
+ "source": [
80
+ "votes = (landvote\n",
81
+ " .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",'geometry', _.elect_year == party[\"year\"]],how = \"inner\")\n",
82
+ " .drop('elect_year','year_right')\n",
83
+ " .mutate(municipal=ibis.case()\n",
84
+ " .when(_.municipal == ibis.literal(\"-\"), None) \n",
85
+ " .else_(_.municipal) \n",
86
+ " .end()\n",
87
+ " )\n",
88
+ " .mutate(county=ibis.case()\n",
89
+ " .when(_.county == ibis.literal(\"-\"), None) \n",
90
+ " .else_(_.county) \n",
91
+ " .end()\n",
92
+ " )\n",
93
+ " )"
94
  ]
95
  },
96
  {
97
  "cell_type": "code",
98
  "execution_count": null,
99
+ "id": "65e75bc0-fef0-48f2-a543-7aae999579bf",
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": []
103
+ },
104
+ {
105
+ "cell_type": "markdown",
106
+ "id": "e80cfd2e-40eb-4065-9ae6-dcaf83319d9a",
107
+ "metadata": {},
108
+ "source": [
109
+ "# Make PMTiles. Each jurisdiction type is its own layer"
110
+ ]
111
+ },
112
+ {
113
+ "cell_type": "code",
114
+ "execution_count": null,
115
+ "id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
116
  "metadata": {},
117
  "outputs": [],
118
  "source": [
 
132
  " repo_id=repo_id,\n",
133
  " repo_type=\"dataset\",\n",
134
  " )\n",
135
+ "def generate_pmtiles(input_file, input_file2, input_file3, output_file, max_zoom=12):\n",
136
+ " # Ensure Tippecanoe is installed\n",
137
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
138
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
139
+ "\n",
140
+ " # Construct the Tippecanoe command\n",
141
+ " command = [\n",
142
+ " \"tippecanoe\",\n",
143
+ " \"-o\", output_file,\n",
144
+ " \"-zg\",\n",
145
+ " \"--extend-zooms-if-still-dropping\",\n",
146
+ " \"--force\",\n",
147
+ " \"--projection\", \"EPSG:4326\", \n",
148
+ " \"-L\",\"state:\"+input_file,\n",
149
+ " \"-L\",\"county:\"+input_file2,\n",
150
+ " \"-L\",\"municipal:\"+input_file3\n",
151
+ " ]\n",
152
+ " # Run Tippecanoe\n",
153
+ " try:\n",
154
+ " subprocess.run(command, check=True)\n",
155
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
156
+ " except subprocess.CalledProcessError as e:\n",
157
+ " print(f\"Error running Tippecanoe: {e}\")\n",
158
  "\n"
159
  ]
160
+ },
161
+ {
162
+ "cell_type": "code",
163
+ "execution_count": null,
164
+ "id": "7061577e-0632-4395-8ac5-241a1fab53b0",
165
+ "metadata": {},
166
+ "outputs": [],
167
+ "source": [
168
+ "gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
169
+ "gdf_state.to_file(\"votes_state.geojson\")\n",
170
+ "\n",
171
+ "gdf_county = votes.filter(_.jurisdiction == 'County').execute().set_crs(\"EPSG:4326\")\n",
172
+ "gdf_county.to_file(\"votes_county.geojson\")\n",
173
+ "\n",
174
+ "gdf_city = votes.filter(_.jurisdiction == 'Municipal').execute().set_crs(\"EPSG:4326\")\n",
175
+ "gdf_city.to_file(\"votes_municipal.geojson\")\n",
176
+ "\n",
177
+ "generate_pmtiles(\"votes_state.geojson\", \"votes_county.geojson\",\"votes_municipal.geojson\", \"votes.pmtiles\")\n",
178
+ "hf_upload(\"votes.pmtiles\", \"boettiger-lab/landvote\")"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": null,
184
+ "id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
185
+ "metadata": {},
186
+ "outputs": [],
187
+ "source": [
188
+ "# save as parquet\n",
189
+ "votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
190
+ "hf_upload(\"votes.parquet\", \"boettiger-lab/landvote\")\n"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "markdown",
195
+ "id": "2ec22cf4-cfdc-4845-a793-ed9236054ff4",
196
+ "metadata": {},
197
+ "source": [
198
+ "# "
199
+ ]
200
  }
201
  ],
202
  "metadata": {