cassiebuhler commited on
Commit
899836e
·
1 Parent(s): 02c6b99

fixed 2020 and state data

Browse files
Files changed (2) hide show
  1. get_party.ipynb +12 -15
  2. preprocess.ipynb +139 -30
get_party.ipynb CHANGED
@@ -41,7 +41,7 @@
41
  "#getting party\n",
42
  "state = (conn\n",
43
  " .read_csv(\"1976-2020-president.csv\")\n",
44
- " .filter(_. year >=2000)\n",
45
  " .rename(state=\"state_po\" , party = \"party_simplified\") # rename columns\n",
46
  " .group_by([\"year\", \"state\"])\n",
47
  " .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
@@ -90,11 +90,17 @@
90
  "# getting party\n",
91
  "county = (conn\n",
92
  " .read_csv(\"countypres_2000-2020.csv\")\n",
93
- " .filter((_.mode == \"TOTAL\") & (_.totalvotes > 0)) # filter only final votes\n",
94
  " .rename(state=\"state_po\", state_name = \"state\") \n",
95
  " .mutate(county = _.county_name + ibis.literal(\" COUNTY\"))\n",
96
- " .group_by([\"year\", \"state\", \"county\",\"state_name\"])\n",
97
- " .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
 
 
 
 
 
 
98
  " .select(\"year\", \"state\", \"county\", \"party\",\"state_name\") \n",
99
  ")"
100
  ]
@@ -117,7 +123,6 @@
117
  " .cast({\"geometry\": \"geometry\",\"municipal\": \"string\"})\n",
118
  " .mutate(jurisdiction = ibis.literal(\"County\"))\n",
119
  " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
120
- "\n",
121
  " )"
122
  ]
123
  },
@@ -225,7 +230,7 @@
225
  "gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n",
226
  "gdf_city.to_file(\"party_municipal.geojson\")\n",
227
  "\n",
228
- "generate_pmtiles(\"party_state.geojson\", \"party_county.geojson\",\"party_municipal.geojson\", \"landvote_polygons.pmtiles\")\n",
229
  "hf_upload(\"party_polygons.pmtiles\", \"boettiger-lab/landvote\")\n"
230
  ]
231
  },
@@ -249,14 +254,6 @@
249
  "df.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons.parquet\")\n",
250
  "hf_upload(\"party_polygons.parquet\", \"boettiger-lab/landvote\")\n"
251
  ]
252
- },
253
- {
254
- "cell_type": "code",
255
- "execution_count": null,
256
- "id": "3e87fc40-b535-4a44-9230-99f6777e7c48",
257
- "metadata": {},
258
- "outputs": [],
259
- "source": []
260
  }
261
  ],
262
  "metadata": {
@@ -275,7 +272,7 @@
275
  "name": "python",
276
  "nbconvert_exporter": "python",
277
  "pygments_lexer": "ipython3",
278
- "version": "3.11.10"
279
  }
280
  },
281
  "nbformat": 4,
 
41
  "#getting party\n",
42
  "state = (conn\n",
43
  " .read_csv(\"1976-2020-president.csv\")\n",
44
+ " # .filter(_. year >=2000)\n",
45
  " .rename(state=\"state_po\" , party = \"party_simplified\") # rename columns\n",
46
  " .group_by([\"year\", \"state\"])\n",
47
  " .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
 
90
  "# getting party\n",
91
  "county = (conn\n",
92
  " .read_csv(\"countypres_2000-2020.csv\")\n",
93
+ " .filter((_.totalvotes > 0)) # filter empty votes\n",
94
  " .rename(state=\"state_po\", state_name = \"state\") \n",
95
  " .mutate(county = _.county_name + ibis.literal(\" COUNTY\"))\n",
96
+ " .group_by([\"year\", \"state\", \"county\", \"state_name\", \"party\"])\n",
97
+ " .aggregate(\n",
98
+ " total_candidate_votes=_.candidatevotes.sum() #getting total votes per candidate \n",
99
+ " )\n",
100
+ " .group_by([\"year\", \"state\", \"county\", \"state_name\"])\n",
101
+ " .aggregate(\n",
102
+ " party=_.party.argmax(_.total_candidate_votes) # party with the highest total votes\n",
103
+ " )\n",
104
  " .select(\"year\", \"state\", \"county\", \"party\",\"state_name\") \n",
105
  ")"
106
  ]
 
123
  " .cast({\"geometry\": \"geometry\",\"municipal\": \"string\"})\n",
124
  " .mutate(jurisdiction = ibis.literal(\"County\"))\n",
125
  " .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
 
126
  " )"
127
  ]
128
  },
 
230
  "gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n",
231
  "gdf_city.to_file(\"party_municipal.geojson\")\n",
232
  "\n",
233
+ "generate_pmtiles(\"party_state.geojson\", \"party_county.geojson\",\"party_municipal.geojson\", \"party_polygons.pmtiles\")\n",
234
  "hf_upload(\"party_polygons.pmtiles\", \"boettiger-lab/landvote\")\n"
235
  ]
236
  },
 
254
  "df.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons.parquet\")\n",
255
  "hf_upload(\"party_polygons.parquet\", \"boettiger-lab/landvote\")\n"
256
  ]
 
 
 
 
 
 
 
 
257
  }
258
  ],
259
  "metadata": {
 
272
  "name": "python",
273
  "nbconvert_exporter": "python",
274
  "pygments_lexer": "ipython3",
275
+ "version": "3.10.12"
276
  }
277
  },
278
  "nbformat": 4,
preprocess.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": null,
14
  "id": "6452373c-d10f-472c-9174-cd05a3363587",
15
  "metadata": {},
16
  "outputs": [],
@@ -30,8 +30,8 @@
30
  },
31
  {
32
  "cell_type": "code",
33
- "execution_count": null,
34
- "id": "371ef7f4-95b9-49f0-80ba-48807b98b052",
35
  "metadata": {},
36
  "outputs": [],
37
  "source": [
@@ -43,16 +43,18 @@
43
  " .mutate(elect_year = _.year - _.year % 4) # get most recent election year \n",
44
  " .cast({\"municipal\": \"string\",\"county\":\"string\"})\n",
45
  " .mutate(municipal=ibis.case()\n",
46
- " .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n",
47
- " .else_(_.municipal) \n",
48
- " .end()\n",
49
- " )\n",
50
- " .mutate(county=ibis.case()\n",
51
- " .when(_.jurisdiction.isin(['State']),ibis.literal(\"-\")) \n",
52
- " .else_(_.county) \n",
53
- " .end()\n",
54
- " )\n",
55
- " )\n",
 
 
56
  "\n",
57
  "party = (conn\n",
58
  " .read_parquet(party_url)\n",
@@ -72,13 +74,13 @@
72
  },
73
  {
74
  "cell_type": "code",
75
- "execution_count": null,
76
  "id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
77
  "metadata": {},
78
  "outputs": [],
79
  "source": [
80
  "votes = (landvote\n",
81
- " .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",'geometry', _.elect_year == party[\"year\"]],how = \"inner\")\n",
82
  " .drop('elect_year','year_right')\n",
83
  " .mutate(municipal=ibis.case()\n",
84
  " .when(_.municipal == ibis.literal(\"-\"), None) \n",
@@ -93,14 +95,6 @@
93
  " )"
94
  ]
95
  },
96
- {
97
- "cell_type": "code",
98
- "execution_count": null,
99
- "id": "65e75bc0-fef0-48f2-a543-7aae999579bf",
100
- "metadata": {},
101
- "outputs": [],
102
- "source": []
103
- },
104
  {
105
  "cell_type": "markdown",
106
  "id": "e80cfd2e-40eb-4065-9ae6-dcaf83319d9a",
@@ -111,10 +105,21 @@
111
  },
112
  {
113
  "cell_type": "code",
114
- "execution_count": null,
115
  "id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
116
  "metadata": {},
117
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
118
  "source": [
119
  "import subprocess\n",
120
  "import os\n",
@@ -160,10 +165,85 @@
160
  },
161
  {
162
  "cell_type": "code",
163
- "execution_count": null,
164
  "id": "7061577e-0632-4395-8ac5-241a1fab53b0",
165
  "metadata": {},
166
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  "source": [
168
  "gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
169
  "gdf_state.to_file(\"votes_state.geojson\")\n",
@@ -180,10 +260,39 @@
180
  },
181
  {
182
  "cell_type": "code",
183
- "execution_count": null,
184
  "id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
185
  "metadata": {},
186
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  "source": [
188
  "# save as parquet\n",
189
  "votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
@@ -215,7 +324,7 @@
215
  "name": "python",
216
  "nbconvert_exporter": "python",
217
  "pygments_lexer": "ipython3",
218
- "version": "3.11.10"
219
  }
220
  },
221
  "nbformat": 4,
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 1,
14
  "id": "6452373c-d10f-472c-9174-cd05a3363587",
15
  "metadata": {},
16
  "outputs": [],
 
30
  },
31
  {
32
  "cell_type": "code",
33
+ "execution_count": 2,
34
+ "id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837",
35
  "metadata": {},
36
  "outputs": [],
37
  "source": [
 
43
  " .mutate(elect_year = _.year - _.year % 4) # get most recent election year \n",
44
  " .cast({\"municipal\": \"string\",\"county\":\"string\"})\n",
45
  " .mutate(municipal=ibis.case()\n",
46
+ " .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n",
47
+ " .else_(_.municipal) \n",
48
+ " .end()\n",
49
+ " )\n",
50
+ " .mutate(county=ibis.case()\n",
51
+ " .when(_.jurisdiction.isin(['State']), ibis.literal(\"-\"))\n",
52
+ " .else_(ibis.case()\n",
53
+ " .when(_.county.endswith('COUNTY'), _.county)\n",
54
+ " .else_(_.county + ' COUNTY')\n",
55
+ " .end())\n",
56
+ " .end())\n",
57
+ " )\n",
58
  "\n",
59
  "party = (conn\n",
60
  " .read_parquet(party_url)\n",
 
74
  },
75
  {
76
  "cell_type": "code",
77
+ "execution_count": 3,
78
  "id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
79
  "metadata": {},
80
  "outputs": [],
81
  "source": [
82
  "votes = (landvote\n",
83
+ " .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"inner\")\n",
84
  " .drop('elect_year','year_right')\n",
85
  " .mutate(municipal=ibis.case()\n",
86
  " .when(_.municipal == ibis.literal(\"-\"), None) \n",
 
95
  " )"
96
  ]
97
  },
 
 
 
 
 
 
 
 
98
  {
99
  "cell_type": "markdown",
100
  "id": "e80cfd2e-40eb-4065-9ae6-dcaf83319d9a",
 
105
  },
106
  {
107
  "cell_type": "code",
108
+ "execution_count": 4,
109
  "id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
110
  "metadata": {},
111
+ "outputs": [
112
+ {
113
+ "name": "stdout",
114
+ "output_type": "stream",
115
+ "text": [
116
+ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
117
+ "Token is valid (permission: write).\n",
118
+ "Your token has been saved to /home/rstudio/.cache/huggingface/token\n",
119
+ "Login successful\n"
120
+ ]
121
+ }
122
+ ],
123
  "source": [
124
  "import subprocess\n",
125
  "import os\n",
 
165
  },
166
  {
167
  "cell_type": "code",
168
+ "execution_count": 5,
169
  "id": "7061577e-0632-4395-8ac5-241a1fab53b0",
170
  "metadata": {},
171
+ "outputs": [
172
+ {
173
+ "data": {
174
+ "application/vnd.jupyter.widget-view+json": {
175
+ "model_id": "341b737abd4c4fceae622965133c09a2",
176
+ "version_major": 2,
177
+ "version_minor": 0
178
+ },
179
+ "text/plain": [
180
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
181
+ ]
182
+ },
183
+ "metadata": {},
184
+ "output_type": "display_data"
185
+ },
186
+ {
187
+ "data": {
188
+ "application/vnd.jupyter.widget-view+json": {
189
+ "model_id": "859a3d133ac1491f8841d61bc2da25bf",
190
+ "version_major": 2,
191
+ "version_minor": 0
192
+ },
193
+ "text/plain": [
194
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
195
+ ]
196
+ },
197
+ "metadata": {},
198
+ "output_type": "display_data"
199
+ },
200
+ {
201
+ "data": {
202
+ "application/vnd.jupyter.widget-view+json": {
203
+ "model_id": "b0e18752ba4143bf88c8b447e521e8a6",
204
+ "version_major": 2,
205
+ "version_minor": 0
206
+ },
207
+ "text/plain": [
208
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
209
+ ]
210
+ },
211
+ "metadata": {},
212
+ "output_type": "display_data"
213
+ },
214
+ {
215
+ "name": "stderr",
216
+ "output_type": "stream",
217
+ "text": [
218
+ "1576 features, 14118861 bytes of geometry and attributes, 45035 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
219
+ "Choosing a maxzoom of -z3 for features typically 91177 feet (27791 meters) apart, and at least 11358 feet (3462 meters) apart\n",
220
+ "Choosing a maxzoom of -z10 for resolution of about 308 feet (93 meters) within features\n",
221
+ " 99.9% 10/261/364 \n",
222
+ " 100.0% 10/55/448 \r"
223
+ ]
224
+ },
225
+ {
226
+ "name": "stdout",
227
+ "output_type": "stream",
228
+ "text": [
229
+ "Successfully generated PMTiles file: votes.pmtiles\n"
230
+ ]
231
+ },
232
+ {
233
+ "data": {
234
+ "application/vnd.jupyter.widget-view+json": {
235
+ "model_id": "17feadfbbbc54429b56aa85a8087310b",
236
+ "version_major": 2,
237
+ "version_minor": 0
238
+ },
239
+ "text/plain": [
240
+ "votes.pmtiles: 0%| | 0.00/4.08M [00:00<?, ?B/s]"
241
+ ]
242
+ },
243
+ "metadata": {},
244
+ "output_type": "display_data"
245
+ }
246
+ ],
247
  "source": [
248
  "gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
249
  "gdf_state.to_file(\"votes_state.geojson\")\n",
 
260
  },
261
  {
262
  "cell_type": "code",
263
+ "execution_count": 6,
264
  "id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
265
  "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "data": {
269
+ "application/vnd.jupyter.widget-view+json": {
270
+ "model_id": "cc5d666af6bd4f9da53710f207f6103b",
271
+ "version_major": 2,
272
+ "version_minor": 0
273
+ },
274
+ "text/plain": [
275
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
276
+ ]
277
+ },
278
+ "metadata": {},
279
+ "output_type": "display_data"
280
+ },
281
+ {
282
+ "data": {
283
+ "application/vnd.jupyter.widget-view+json": {
284
+ "model_id": "9a0c8f8fb5bd4486af8220e633cd9c97",
285
+ "version_major": 2,
286
+ "version_minor": 0
287
+ },
288
+ "text/plain": [
289
+ "votes.parquet: 0%| | 0.00/31.7M [00:00<?, ?B/s]"
290
+ ]
291
+ },
292
+ "metadata": {},
293
+ "output_type": "display_data"
294
+ }
295
+ ],
296
  "source": [
297
  "# save as parquet\n",
298
  "votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
 
324
  "name": "python",
325
  "nbconvert_exporter": "python",
326
  "pygments_lexer": "ipython3",
327
+ "version": "3.10.12"
328
  }
329
  },
330
  "nbformat": 4,