cassiebuhler commited on
Commit
2cc9bf8
1 Parent(s): d328343

fixed counties/cities

Browse files
Files changed (1) hide show
  1. preprocess.ipynb +126 -19
preprocess.ipynb CHANGED
@@ -10,7 +10,7 @@
10
  },
11
  {
12
  "cell_type": "code",
13
- "execution_count": null,
14
  "id": "6452373c-d10f-472c-9174-cd05a3363587",
15
  "metadata": {},
16
  "outputs": [],
@@ -30,7 +30,7 @@
30
  },
31
  {
32
  "cell_type": "code",
33
- "execution_count": null,
34
  "id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837",
35
  "metadata": {},
36
  "outputs": [],
@@ -74,14 +74,14 @@
74
  },
75
  {
76
  "cell_type": "code",
77
- "execution_count": null,
78
  "id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
79
  "metadata": {},
80
  "outputs": [],
81
  "source": [
82
  "votes = (landvote\n",
83
- " .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"inner\")\n",
84
- " .drop('elect_year','year_right')\n",
85
  " .mutate(municipal=ibis.case()\n",
86
  " .when(_.municipal == ibis.literal(\"-\"), None) \n",
87
  " .else_(_.municipal) \n",
@@ -105,10 +105,21 @@
105
  },
106
  {
107
  "cell_type": "code",
108
- "execution_count": null,
109
  "id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
110
  "metadata": {},
111
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
112
  "source": [
113
  "import subprocess\n",
114
  "import os\n",
@@ -154,10 +165,85 @@
154
  },
155
  {
156
  "cell_type": "code",
157
- "execution_count": null,
158
  "id": "7061577e-0632-4395-8ac5-241a1fab53b0",
159
  "metadata": {},
160
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  "source": [
162
  "gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
163
  "gdf_state.to_file(\"votes_state.geojson\")\n",
@@ -174,23 +260,44 @@
174
  },
175
  {
176
  "cell_type": "code",
177
- "execution_count": null,
178
  "id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
179
  "metadata": {},
180
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  "source": [
182
  "# save as parquet\n",
183
  "votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
184
  "hf_upload(\"votes.parquet\", \"boettiger-lab/landvote\")\n"
185
  ]
186
- },
187
- {
188
- "cell_type": "markdown",
189
- "id": "2ec22cf4-cfdc-4845-a793-ed9236054ff4",
190
- "metadata": {},
191
- "source": [
192
- "# "
193
- ]
194
  }
195
  ],
196
  "metadata": {
 
10
  },
11
  {
12
  "cell_type": "code",
13
+ "execution_count": 1,
14
  "id": "6452373c-d10f-472c-9174-cd05a3363587",
15
  "metadata": {},
16
  "outputs": [],
 
30
  },
31
  {
32
  "cell_type": "code",
33
+ "execution_count": 2,
34
  "id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837",
35
  "metadata": {},
36
  "outputs": [],
 
74
  },
75
  {
76
  "cell_type": "code",
77
+ "execution_count": 3,
78
  "id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
79
  "metadata": {},
80
  "outputs": [],
81
  "source": [
82
  "votes = (landvote\n",
83
+ " .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"left\")\n",
84
+ " .drop('elect_year','state_right','county_right','municipal_right','year_right',\"geometry_right\",\"jurisdiction_right\")\n",
85
  " .mutate(municipal=ibis.case()\n",
86
  " .when(_.municipal == ibis.literal(\"-\"), None) \n",
87
  " .else_(_.municipal) \n",
 
105
  },
106
  {
107
  "cell_type": "code",
108
+ "execution_count": 4,
109
  "id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
110
  "metadata": {},
111
+ "outputs": [
112
+ {
113
+ "name": "stdout",
114
+ "output_type": "stream",
115
+ "text": [
116
+ "The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
117
+ "Token is valid (permission: write).\n",
118
+ "Your token has been saved to /home/rstudio/.cache/huggingface/token\n",
119
+ "Login successful\n"
120
+ ]
121
+ }
122
+ ],
123
  "source": [
124
  "import subprocess\n",
125
  "import os\n",
 
165
  },
166
  {
167
  "cell_type": "code",
168
+ "execution_count": 5,
169
  "id": "7061577e-0632-4395-8ac5-241a1fab53b0",
170
  "metadata": {},
171
+ "outputs": [
172
+ {
173
+ "data": {
174
+ "application/vnd.jupyter.widget-view+json": {
175
+ "model_id": "ff135769c16340f9a87905b600836afe",
176
+ "version_major": 2,
177
+ "version_minor": 0
178
+ },
179
+ "text/plain": [
180
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
181
+ ]
182
+ },
183
+ "metadata": {},
184
+ "output_type": "display_data"
185
+ },
186
+ {
187
+ "data": {
188
+ "application/vnd.jupyter.widget-view+json": {
189
+ "model_id": "037e0563f82447998bbd438473a2017d",
190
+ "version_major": 2,
191
+ "version_minor": 0
192
+ },
193
+ "text/plain": [
194
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
195
+ ]
196
+ },
197
+ "metadata": {},
198
+ "output_type": "display_data"
199
+ },
200
+ {
201
+ "data": {
202
+ "application/vnd.jupyter.widget-view+json": {
203
+ "model_id": "60716409890f4760a744123d4e6003de",
204
+ "version_major": 2,
205
+ "version_minor": 0
206
+ },
207
+ "text/plain": [
208
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
209
+ ]
210
+ },
211
+ "metadata": {},
212
+ "output_type": "display_data"
213
+ },
214
+ {
215
+ "name": "stderr",
216
+ "output_type": "stream",
217
+ "text": [
218
+ "2286 features, 17891307 bytes of geometry and attributes, 58357 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
219
+ "Choosing a maxzoom of -z3 for features typically 83927 feet (25581 meters) apart, and at least 11120 feet (3390 meters) apart\n",
220
+ "Choosing a maxzoom of -z10 for resolution of about 278 feet (84 meters) within features\n",
221
+ " 99.9% 10/279/424 \n",
222
+ " 100.0% 10/55/448 \r"
223
+ ]
224
+ },
225
+ {
226
+ "name": "stdout",
227
+ "output_type": "stream",
228
+ "text": [
229
+ "Successfully generated PMTiles file: votes.pmtiles\n"
230
+ ]
231
+ },
232
+ {
233
+ "data": {
234
+ "application/vnd.jupyter.widget-view+json": {
235
+ "model_id": "c8a1a66665814dd8b0ad839929a6b30d",
236
+ "version_major": 2,
237
+ "version_minor": 0
238
+ },
239
+ "text/plain": [
240
+ "votes.pmtiles: 0%| | 0.00/4.72M [00:00<?, ?B/s]"
241
+ ]
242
+ },
243
+ "metadata": {},
244
+ "output_type": "display_data"
245
+ }
246
+ ],
247
  "source": [
248
  "gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
249
  "gdf_state.to_file(\"votes_state.geojson\")\n",
 
260
  },
261
  {
262
  "cell_type": "code",
263
+ "execution_count": 6,
264
  "id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
265
  "metadata": {},
266
+ "outputs": [
267
+ {
268
+ "data": {
269
+ "application/vnd.jupyter.widget-view+json": {
270
+ "model_id": "6b928b70b56e45a8b1f59c727bd6879a",
271
+ "version_major": 2,
272
+ "version_minor": 0
273
+ },
274
+ "text/plain": [
275
+ "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
276
+ ]
277
+ },
278
+ "metadata": {},
279
+ "output_type": "display_data"
280
+ },
281
+ {
282
+ "data": {
283
+ "application/vnd.jupyter.widget-view+json": {
284
+ "model_id": "0f4fa7e221ff481697375deba7f34b94",
285
+ "version_major": 2,
286
+ "version_minor": 0
287
+ },
288
+ "text/plain": [
289
+ "votes.parquet: 0%| | 0.00/43.1M [00:00<?, ?B/s]"
290
+ ]
291
+ },
292
+ "metadata": {},
293
+ "output_type": "display_data"
294
+ }
295
+ ],
296
  "source": [
297
  "# save as parquet\n",
298
  "votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
299
  "hf_upload(\"votes.parquet\", \"boettiger-lab/landvote\")\n"
300
  ]
 
 
 
 
 
 
 
 
301
  }
302
  ],
303
  "metadata": {