Spaces:
Sleeping
Sleeping
cassiebuhler
commited on
Commit
·
899836e
1
Parent(s):
02c6b99
fixed 2020 and state data
Browse files- get_party.ipynb +12 -15
- preprocess.ipynb +139 -30
get_party.ipynb
CHANGED
@@ -41,7 +41,7 @@
|
|
41 |
"#getting party\n",
|
42 |
"state = (conn\n",
|
43 |
" .read_csv(\"1976-2020-president.csv\")\n",
|
44 |
-
" .filter(_. year >=2000)\n",
|
45 |
" .rename(state=\"state_po\" , party = \"party_simplified\") # rename columns\n",
|
46 |
" .group_by([\"year\", \"state\"])\n",
|
47 |
" .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
|
@@ -90,11 +90,17 @@
|
|
90 |
"# getting party\n",
|
91 |
"county = (conn\n",
|
92 |
" .read_csv(\"countypres_2000-2020.csv\")\n",
|
93 |
-
" .filter((_.
|
94 |
" .rename(state=\"state_po\", state_name = \"state\") \n",
|
95 |
" .mutate(county = _.county_name + ibis.literal(\" COUNTY\"))\n",
|
96 |
-
" .group_by([\"year\", \"state\", \"county\"
|
97 |
-
" .aggregate(
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
" .select(\"year\", \"state\", \"county\", \"party\",\"state_name\") \n",
|
99 |
")"
|
100 |
]
|
@@ -117,7 +123,6 @@
|
|
117 |
" .cast({\"geometry\": \"geometry\",\"municipal\": \"string\"})\n",
|
118 |
" .mutate(jurisdiction = ibis.literal(\"County\"))\n",
|
119 |
" .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
|
120 |
-
"\n",
|
121 |
" )"
|
122 |
]
|
123 |
},
|
@@ -225,7 +230,7 @@
|
|
225 |
"gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n",
|
226 |
"gdf_city.to_file(\"party_municipal.geojson\")\n",
|
227 |
"\n",
|
228 |
-
"generate_pmtiles(\"party_state.geojson\", \"party_county.geojson\",\"party_municipal.geojson\", \"
|
229 |
"hf_upload(\"party_polygons.pmtiles\", \"boettiger-lab/landvote\")\n"
|
230 |
]
|
231 |
},
|
@@ -249,14 +254,6 @@
|
|
249 |
"df.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons.parquet\")\n",
|
250 |
"hf_upload(\"party_polygons.parquet\", \"boettiger-lab/landvote\")\n"
|
251 |
]
|
252 |
-
},
|
253 |
-
{
|
254 |
-
"cell_type": "code",
|
255 |
-
"execution_count": null,
|
256 |
-
"id": "3e87fc40-b535-4a44-9230-99f6777e7c48",
|
257 |
-
"metadata": {},
|
258 |
-
"outputs": [],
|
259 |
-
"source": []
|
260 |
}
|
261 |
],
|
262 |
"metadata": {
|
@@ -275,7 +272,7 @@
|
|
275 |
"name": "python",
|
276 |
"nbconvert_exporter": "python",
|
277 |
"pygments_lexer": "ipython3",
|
278 |
-
"version": "3.
|
279 |
}
|
280 |
},
|
281 |
"nbformat": 4,
|
|
|
41 |
"#getting party\n",
|
42 |
"state = (conn\n",
|
43 |
" .read_csv(\"1976-2020-president.csv\")\n",
|
44 |
+
" # .filter(_. year >=2000)\n",
|
45 |
" .rename(state=\"state_po\" , party = \"party_simplified\") # rename columns\n",
|
46 |
" .group_by([\"year\", \"state\"])\n",
|
47 |
" .aggregate(party=_.party.argmax(_.candidatevotes)) # winning party \n",
|
|
|
90 |
"# getting party\n",
|
91 |
"county = (conn\n",
|
92 |
" .read_csv(\"countypres_2000-2020.csv\")\n",
|
93 |
+
" .filter((_.totalvotes > 0)) # filter empty votes\n",
|
94 |
" .rename(state=\"state_po\", state_name = \"state\") \n",
|
95 |
" .mutate(county = _.county_name + ibis.literal(\" COUNTY\"))\n",
|
96 |
+
" .group_by([\"year\", \"state\", \"county\", \"state_name\", \"party\"])\n",
|
97 |
+
" .aggregate(\n",
|
98 |
+
" total_candidate_votes=_.candidatevotes.sum() #getting total votes per candidate \n",
|
99 |
+
" )\n",
|
100 |
+
" .group_by([\"year\", \"state\", \"county\", \"state_name\"])\n",
|
101 |
+
" .aggregate(\n",
|
102 |
+
" party=_.party.argmax(_.total_candidate_votes) # party with the highest total votes\n",
|
103 |
+
" )\n",
|
104 |
" .select(\"year\", \"state\", \"county\", \"party\",\"state_name\") \n",
|
105 |
")"
|
106 |
]
|
|
|
123 |
" .cast({\"geometry\": \"geometry\",\"municipal\": \"string\"})\n",
|
124 |
" .mutate(jurisdiction = ibis.literal(\"County\"))\n",
|
125 |
" .select(\"state\", \"county\", \"municipal\",\"jurisdiction\",\"geometry\", \"year\", \"party\")\n",
|
|
|
126 |
" )"
|
127 |
]
|
128 |
},
|
|
|
230 |
"gdf_city = df_city.execute().set_crs(\"EPSG:4326\")\n",
|
231 |
"gdf_city.to_file(\"party_municipal.geojson\")\n",
|
232 |
"\n",
|
233 |
+
"generate_pmtiles(\"party_state.geojson\", \"party_county.geojson\",\"party_municipal.geojson\", \"party_polygons.pmtiles\")\n",
|
234 |
"hf_upload(\"party_polygons.pmtiles\", \"boettiger-lab/landvote\")\n"
|
235 |
]
|
236 |
},
|
|
|
254 |
"df.execute().set_crs(\"EPSG:4326\").to_parquet(\"party_polygons.parquet\")\n",
|
255 |
"hf_upload(\"party_polygons.parquet\", \"boettiger-lab/landvote\")\n"
|
256 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
}
|
258 |
],
|
259 |
"metadata": {
|
|
|
272 |
"name": "python",
|
273 |
"nbconvert_exporter": "python",
|
274 |
"pygments_lexer": "ipython3",
|
275 |
+
"version": "3.10.12"
|
276 |
}
|
277 |
},
|
278 |
"nbformat": 4,
|
preprocess.ipynb
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
-
"execution_count":
|
14 |
"id": "6452373c-d10f-472c-9174-cd05a3363587",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
@@ -30,8 +30,8 @@
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
-
"execution_count":
|
34 |
-
"id": "
|
35 |
"metadata": {},
|
36 |
"outputs": [],
|
37 |
"source": [
|
@@ -43,16 +43,18 @@
|
|
43 |
" .mutate(elect_year = _.year - _.year % 4) # get most recent election year \n",
|
44 |
" .cast({\"municipal\": \"string\",\"county\":\"string\"})\n",
|
45 |
" .mutate(municipal=ibis.case()\n",
|
46 |
-
"
|
47 |
-
"
|
48 |
-
"
|
49 |
-
"
|
50 |
-
"
|
51 |
-
"
|
52 |
-
"
|
53 |
-
"
|
54 |
-
"
|
55 |
-
"
|
|
|
|
|
56 |
"\n",
|
57 |
"party = (conn\n",
|
58 |
" .read_parquet(party_url)\n",
|
@@ -72,13 +74,13 @@
|
|
72 |
},
|
73 |
{
|
74 |
"cell_type": "code",
|
75 |
-
"execution_count":
|
76 |
"id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
|
77 |
"metadata": {},
|
78 |
"outputs": [],
|
79 |
"source": [
|
80 |
"votes = (landvote\n",
|
81 |
-
" .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\"
|
82 |
" .drop('elect_year','year_right')\n",
|
83 |
" .mutate(municipal=ibis.case()\n",
|
84 |
" .when(_.municipal == ibis.literal(\"-\"), None) \n",
|
@@ -93,14 +95,6 @@
|
|
93 |
" )"
|
94 |
]
|
95 |
},
|
96 |
-
{
|
97 |
-
"cell_type": "code",
|
98 |
-
"execution_count": null,
|
99 |
-
"id": "65e75bc0-fef0-48f2-a543-7aae999579bf",
|
100 |
-
"metadata": {},
|
101 |
-
"outputs": [],
|
102 |
-
"source": []
|
103 |
-
},
|
104 |
{
|
105 |
"cell_type": "markdown",
|
106 |
"id": "e80cfd2e-40eb-4065-9ae6-dcaf83319d9a",
|
@@ -111,10 +105,21 @@
|
|
111 |
},
|
112 |
{
|
113 |
"cell_type": "code",
|
114 |
-
"execution_count":
|
115 |
"id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
|
116 |
"metadata": {},
|
117 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
"source": [
|
119 |
"import subprocess\n",
|
120 |
"import os\n",
|
@@ -160,10 +165,85 @@
|
|
160 |
},
|
161 |
{
|
162 |
"cell_type": "code",
|
163 |
-
"execution_count":
|
164 |
"id": "7061577e-0632-4395-8ac5-241a1fab53b0",
|
165 |
"metadata": {},
|
166 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
167 |
"source": [
|
168 |
"gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
|
169 |
"gdf_state.to_file(\"votes_state.geojson\")\n",
|
@@ -180,10 +260,39 @@
|
|
180 |
},
|
181 |
{
|
182 |
"cell_type": "code",
|
183 |
-
"execution_count":
|
184 |
"id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
|
185 |
"metadata": {},
|
186 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
187 |
"source": [
|
188 |
"# save as parquet\n",
|
189 |
"votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
|
@@ -215,7 +324,7 @@
|
|
215 |
"name": "python",
|
216 |
"nbconvert_exporter": "python",
|
217 |
"pygments_lexer": "ipython3",
|
218 |
-
"version": "3.
|
219 |
}
|
220 |
},
|
221 |
"nbformat": 4,
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
+
"execution_count": 1,
|
14 |
"id": "6452373c-d10f-472c-9174-cd05a3363587",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
+
"execution_count": 2,
|
34 |
+
"id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837",
|
35 |
"metadata": {},
|
36 |
"outputs": [],
|
37 |
"source": [
|
|
|
43 |
" .mutate(elect_year = _.year - _.year % 4) # get most recent election year \n",
|
44 |
" .cast({\"municipal\": \"string\",\"county\":\"string\"})\n",
|
45 |
" .mutate(municipal=ibis.case()\n",
|
46 |
+
" .when(_.jurisdiction.isin(['State','County']), ibis.literal(\"-\")) \n",
|
47 |
+
" .else_(_.municipal) \n",
|
48 |
+
" .end()\n",
|
49 |
+
" )\n",
|
50 |
+
" .mutate(county=ibis.case()\n",
|
51 |
+
" .when(_.jurisdiction.isin(['State']), ibis.literal(\"-\"))\n",
|
52 |
+
" .else_(ibis.case()\n",
|
53 |
+
" .when(_.county.endswith('COUNTY'), _.county)\n",
|
54 |
+
" .else_(_.county + ' COUNTY')\n",
|
55 |
+
" .end())\n",
|
56 |
+
" .end())\n",
|
57 |
+
" )\n",
|
58 |
"\n",
|
59 |
"party = (conn\n",
|
60 |
" .read_parquet(party_url)\n",
|
|
|
74 |
},
|
75 |
{
|
76 |
"cell_type": "code",
|
77 |
+
"execution_count": 3,
|
78 |
"id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
|
79 |
"metadata": {},
|
80 |
"outputs": [],
|
81 |
"source": [
|
82 |
"votes = (landvote\n",
|
83 |
+
" .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"inner\")\n",
|
84 |
" .drop('elect_year','year_right')\n",
|
85 |
" .mutate(municipal=ibis.case()\n",
|
86 |
" .when(_.municipal == ibis.literal(\"-\"), None) \n",
|
|
|
95 |
" )"
|
96 |
]
|
97 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
{
|
99 |
"cell_type": "markdown",
|
100 |
"id": "e80cfd2e-40eb-4065-9ae6-dcaf83319d9a",
|
|
|
105 |
},
|
106 |
{
|
107 |
"cell_type": "code",
|
108 |
+
"execution_count": 4,
|
109 |
"id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
|
110 |
"metadata": {},
|
111 |
+
"outputs": [
|
112 |
+
{
|
113 |
+
"name": "stdout",
|
114 |
+
"output_type": "stream",
|
115 |
+
"text": [
|
116 |
+
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
117 |
+
"Token is valid (permission: write).\n",
|
118 |
+
"Your token has been saved to /home/rstudio/.cache/huggingface/token\n",
|
119 |
+
"Login successful\n"
|
120 |
+
]
|
121 |
+
}
|
122 |
+
],
|
123 |
"source": [
|
124 |
"import subprocess\n",
|
125 |
"import os\n",
|
|
|
165 |
},
|
166 |
{
|
167 |
"cell_type": "code",
|
168 |
+
"execution_count": 5,
|
169 |
"id": "7061577e-0632-4395-8ac5-241a1fab53b0",
|
170 |
"metadata": {},
|
171 |
+
"outputs": [
|
172 |
+
{
|
173 |
+
"data": {
|
174 |
+
"application/vnd.jupyter.widget-view+json": {
|
175 |
+
"model_id": "341b737abd4c4fceae622965133c09a2",
|
176 |
+
"version_major": 2,
|
177 |
+
"version_minor": 0
|
178 |
+
},
|
179 |
+
"text/plain": [
|
180 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
181 |
+
]
|
182 |
+
},
|
183 |
+
"metadata": {},
|
184 |
+
"output_type": "display_data"
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"data": {
|
188 |
+
"application/vnd.jupyter.widget-view+json": {
|
189 |
+
"model_id": "859a3d133ac1491f8841d61bc2da25bf",
|
190 |
+
"version_major": 2,
|
191 |
+
"version_minor": 0
|
192 |
+
},
|
193 |
+
"text/plain": [
|
194 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
195 |
+
]
|
196 |
+
},
|
197 |
+
"metadata": {},
|
198 |
+
"output_type": "display_data"
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"data": {
|
202 |
+
"application/vnd.jupyter.widget-view+json": {
|
203 |
+
"model_id": "b0e18752ba4143bf88c8b447e521e8a6",
|
204 |
+
"version_major": 2,
|
205 |
+
"version_minor": 0
|
206 |
+
},
|
207 |
+
"text/plain": [
|
208 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
209 |
+
]
|
210 |
+
},
|
211 |
+
"metadata": {},
|
212 |
+
"output_type": "display_data"
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"name": "stderr",
|
216 |
+
"output_type": "stream",
|
217 |
+
"text": [
|
218 |
+
"1576 features, 14118861 bytes of geometry and attributes, 45035 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
|
219 |
+
"Choosing a maxzoom of -z3 for features typically 91177 feet (27791 meters) apart, and at least 11358 feet (3462 meters) apart\n",
|
220 |
+
"Choosing a maxzoom of -z10 for resolution of about 308 feet (93 meters) within features\n",
|
221 |
+
" 99.9% 10/261/364 \n",
|
222 |
+
" 100.0% 10/55/448 \r"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"name": "stdout",
|
227 |
+
"output_type": "stream",
|
228 |
+
"text": [
|
229 |
+
"Successfully generated PMTiles file: votes.pmtiles\n"
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"data": {
|
234 |
+
"application/vnd.jupyter.widget-view+json": {
|
235 |
+
"model_id": "17feadfbbbc54429b56aa85a8087310b",
|
236 |
+
"version_major": 2,
|
237 |
+
"version_minor": 0
|
238 |
+
},
|
239 |
+
"text/plain": [
|
240 |
+
"votes.pmtiles: 0%| | 0.00/4.08M [00:00<?, ?B/s]"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
"metadata": {},
|
244 |
+
"output_type": "display_data"
|
245 |
+
}
|
246 |
+
],
|
247 |
"source": [
|
248 |
"gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
|
249 |
"gdf_state.to_file(\"votes_state.geojson\")\n",
|
|
|
260 |
},
|
261 |
{
|
262 |
"cell_type": "code",
|
263 |
+
"execution_count": 6,
|
264 |
"id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
|
265 |
"metadata": {},
|
266 |
+
"outputs": [
|
267 |
+
{
|
268 |
+
"data": {
|
269 |
+
"application/vnd.jupyter.widget-view+json": {
|
270 |
+
"model_id": "cc5d666af6bd4f9da53710f207f6103b",
|
271 |
+
"version_major": 2,
|
272 |
+
"version_minor": 0
|
273 |
+
},
|
274 |
+
"text/plain": [
|
275 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
276 |
+
]
|
277 |
+
},
|
278 |
+
"metadata": {},
|
279 |
+
"output_type": "display_data"
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"data": {
|
283 |
+
"application/vnd.jupyter.widget-view+json": {
|
284 |
+
"model_id": "9a0c8f8fb5bd4486af8220e633cd9c97",
|
285 |
+
"version_major": 2,
|
286 |
+
"version_minor": 0
|
287 |
+
},
|
288 |
+
"text/plain": [
|
289 |
+
"votes.parquet: 0%| | 0.00/31.7M [00:00<?, ?B/s]"
|
290 |
+
]
|
291 |
+
},
|
292 |
+
"metadata": {},
|
293 |
+
"output_type": "display_data"
|
294 |
+
}
|
295 |
+
],
|
296 |
"source": [
|
297 |
"# save as parquet\n",
|
298 |
"votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
|
|
|
324 |
"name": "python",
|
325 |
"nbconvert_exporter": "python",
|
326 |
"pygments_lexer": "ipython3",
|
327 |
+
"version": "3.10.12"
|
328 |
}
|
329 |
},
|
330 |
"nbformat": 4,
|