Spaces:
Sleeping
Sleeping
cassiebuhler
commited on
Commit
•
2cc9bf8
1
Parent(s):
d328343
fixed counties/cities
Browse files- preprocess.ipynb +126 -19
preprocess.ipynb
CHANGED
@@ -10,7 +10,7 @@
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
-
"execution_count":
|
14 |
"id": "6452373c-d10f-472c-9174-cd05a3363587",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
@@ -30,7 +30,7 @@
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
-
"execution_count":
|
34 |
"id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837",
|
35 |
"metadata": {},
|
36 |
"outputs": [],
|
@@ -74,14 +74,14 @@
|
|
74 |
},
|
75 |
{
|
76 |
"cell_type": "code",
|
77 |
-
"execution_count":
|
78 |
"id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
|
79 |
"metadata": {},
|
80 |
"outputs": [],
|
81 |
"source": [
|
82 |
"votes = (landvote\n",
|
83 |
-
" .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"
|
84 |
-
" .drop('elect_year','year_right')\n",
|
85 |
" .mutate(municipal=ibis.case()\n",
|
86 |
" .when(_.municipal == ibis.literal(\"-\"), None) \n",
|
87 |
" .else_(_.municipal) \n",
|
@@ -105,10 +105,21 @@
|
|
105 |
},
|
106 |
{
|
107 |
"cell_type": "code",
|
108 |
-
"execution_count":
|
109 |
"id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
|
110 |
"metadata": {},
|
111 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
"source": [
|
113 |
"import subprocess\n",
|
114 |
"import os\n",
|
@@ -154,10 +165,85 @@
|
|
154 |
},
|
155 |
{
|
156 |
"cell_type": "code",
|
157 |
-
"execution_count":
|
158 |
"id": "7061577e-0632-4395-8ac5-241a1fab53b0",
|
159 |
"metadata": {},
|
160 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
161 |
"source": [
|
162 |
"gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
|
163 |
"gdf_state.to_file(\"votes_state.geojson\")\n",
|
@@ -174,23 +260,44 @@
|
|
174 |
},
|
175 |
{
|
176 |
"cell_type": "code",
|
177 |
-
"execution_count":
|
178 |
"id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
|
179 |
"metadata": {},
|
180 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
"source": [
|
182 |
"# save as parquet\n",
|
183 |
"votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
|
184 |
"hf_upload(\"votes.parquet\", \"boettiger-lab/landvote\")\n"
|
185 |
]
|
186 |
-
},
|
187 |
-
{
|
188 |
-
"cell_type": "markdown",
|
189 |
-
"id": "2ec22cf4-cfdc-4845-a793-ed9236054ff4",
|
190 |
-
"metadata": {},
|
191 |
-
"source": [
|
192 |
-
"# "
|
193 |
-
]
|
194 |
}
|
195 |
],
|
196 |
"metadata": {
|
|
|
10 |
},
|
11 |
{
|
12 |
"cell_type": "code",
|
13 |
+
"execution_count": 1,
|
14 |
"id": "6452373c-d10f-472c-9174-cd05a3363587",
|
15 |
"metadata": {},
|
16 |
"outputs": [],
|
|
|
30 |
},
|
31 |
{
|
32 |
"cell_type": "code",
|
33 |
+
"execution_count": 2,
|
34 |
"id": "dd9ffc64-b4cf-4e5b-9c96-703e91a77837",
|
35 |
"metadata": {},
|
36 |
"outputs": [],
|
|
|
74 |
},
|
75 |
{
|
76 |
"cell_type": "code",
|
77 |
+
"execution_count": 3,
|
78 |
"id": "8fc40e96-fffd-4b23-9963-c931fdce96f6",
|
79 |
"metadata": {},
|
80 |
"outputs": [],
|
81 |
"source": [
|
82 |
"votes = (landvote\n",
|
83 |
+
" .join(party,[\"state\",\"county\",\"municipal\",\"jurisdiction\",\"geometry\", _.elect_year == party[\"year\"]],how = \"left\")\n",
|
84 |
+
" .drop('elect_year','state_right','county_right','municipal_right','year_right',\"geometry_right\",\"jurisdiction_right\")\n",
|
85 |
" .mutate(municipal=ibis.case()\n",
|
86 |
" .when(_.municipal == ibis.literal(\"-\"), None) \n",
|
87 |
" .else_(_.municipal) \n",
|
|
|
105 |
},
|
106 |
{
|
107 |
"cell_type": "code",
|
108 |
+
"execution_count": 4,
|
109 |
"id": "b1cd8f44-57fa-49a8-b438-f9e4aab747c5",
|
110 |
"metadata": {},
|
111 |
+
"outputs": [
|
112 |
+
{
|
113 |
+
"name": "stdout",
|
114 |
+
"output_type": "stream",
|
115 |
+
"text": [
|
116 |
+
"The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.\n",
|
117 |
+
"Token is valid (permission: write).\n",
|
118 |
+
"Your token has been saved to /home/rstudio/.cache/huggingface/token\n",
|
119 |
+
"Login successful\n"
|
120 |
+
]
|
121 |
+
}
|
122 |
+
],
|
123 |
"source": [
|
124 |
"import subprocess\n",
|
125 |
"import os\n",
|
|
|
165 |
},
|
166 |
{
|
167 |
"cell_type": "code",
|
168 |
+
"execution_count": 5,
|
169 |
"id": "7061577e-0632-4395-8ac5-241a1fab53b0",
|
170 |
"metadata": {},
|
171 |
+
"outputs": [
|
172 |
+
{
|
173 |
+
"data": {
|
174 |
+
"application/vnd.jupyter.widget-view+json": {
|
175 |
+
"model_id": "ff135769c16340f9a87905b600836afe",
|
176 |
+
"version_major": 2,
|
177 |
+
"version_minor": 0
|
178 |
+
},
|
179 |
+
"text/plain": [
|
180 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
181 |
+
]
|
182 |
+
},
|
183 |
+
"metadata": {},
|
184 |
+
"output_type": "display_data"
|
185 |
+
},
|
186 |
+
{
|
187 |
+
"data": {
|
188 |
+
"application/vnd.jupyter.widget-view+json": {
|
189 |
+
"model_id": "037e0563f82447998bbd438473a2017d",
|
190 |
+
"version_major": 2,
|
191 |
+
"version_minor": 0
|
192 |
+
},
|
193 |
+
"text/plain": [
|
194 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
195 |
+
]
|
196 |
+
},
|
197 |
+
"metadata": {},
|
198 |
+
"output_type": "display_data"
|
199 |
+
},
|
200 |
+
{
|
201 |
+
"data": {
|
202 |
+
"application/vnd.jupyter.widget-view+json": {
|
203 |
+
"model_id": "60716409890f4760a744123d4e6003de",
|
204 |
+
"version_major": 2,
|
205 |
+
"version_minor": 0
|
206 |
+
},
|
207 |
+
"text/plain": [
|
208 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
209 |
+
]
|
210 |
+
},
|
211 |
+
"metadata": {},
|
212 |
+
"output_type": "display_data"
|
213 |
+
},
|
214 |
+
{
|
215 |
+
"name": "stderr",
|
216 |
+
"output_type": "stream",
|
217 |
+
"text": [
|
218 |
+
"2286 features, 17891307 bytes of geometry and attributes, 58357 bytes of string pool, 0 bytes of vertices, 0 bytes of nodes\n",
|
219 |
+
"Choosing a maxzoom of -z3 for features typically 83927 feet (25581 meters) apart, and at least 11120 feet (3390 meters) apart\n",
|
220 |
+
"Choosing a maxzoom of -z10 for resolution of about 278 feet (84 meters) within features\n",
|
221 |
+
" 99.9% 10/279/424 \n",
|
222 |
+
" 100.0% 10/55/448 \r"
|
223 |
+
]
|
224 |
+
},
|
225 |
+
{
|
226 |
+
"name": "stdout",
|
227 |
+
"output_type": "stream",
|
228 |
+
"text": [
|
229 |
+
"Successfully generated PMTiles file: votes.pmtiles\n"
|
230 |
+
]
|
231 |
+
},
|
232 |
+
{
|
233 |
+
"data": {
|
234 |
+
"application/vnd.jupyter.widget-view+json": {
|
235 |
+
"model_id": "c8a1a66665814dd8b0ad839929a6b30d",
|
236 |
+
"version_major": 2,
|
237 |
+
"version_minor": 0
|
238 |
+
},
|
239 |
+
"text/plain": [
|
240 |
+
"votes.pmtiles: 0%| | 0.00/4.72M [00:00<?, ?B/s]"
|
241 |
+
]
|
242 |
+
},
|
243 |
+
"metadata": {},
|
244 |
+
"output_type": "display_data"
|
245 |
+
}
|
246 |
+
],
|
247 |
"source": [
|
248 |
"gdf_state = votes.filter(_.jurisdiction == 'State').execute().set_crs(\"EPSG:4326\")\n",
|
249 |
"gdf_state.to_file(\"votes_state.geojson\")\n",
|
|
|
260 |
},
|
261 |
{
|
262 |
"cell_type": "code",
|
263 |
+
"execution_count": 6,
|
264 |
"id": "f2979624-bcdf-4a8a-899a-c22fc3cdaf0e",
|
265 |
"metadata": {},
|
266 |
+
"outputs": [
|
267 |
+
{
|
268 |
+
"data": {
|
269 |
+
"application/vnd.jupyter.widget-view+json": {
|
270 |
+
"model_id": "6b928b70b56e45a8b1f59c727bd6879a",
|
271 |
+
"version_major": 2,
|
272 |
+
"version_minor": 0
|
273 |
+
},
|
274 |
+
"text/plain": [
|
275 |
+
"FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))"
|
276 |
+
]
|
277 |
+
},
|
278 |
+
"metadata": {},
|
279 |
+
"output_type": "display_data"
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"data": {
|
283 |
+
"application/vnd.jupyter.widget-view+json": {
|
284 |
+
"model_id": "0f4fa7e221ff481697375deba7f34b94",
|
285 |
+
"version_major": 2,
|
286 |
+
"version_minor": 0
|
287 |
+
},
|
288 |
+
"text/plain": [
|
289 |
+
"votes.parquet: 0%| | 0.00/43.1M [00:00<?, ?B/s]"
|
290 |
+
]
|
291 |
+
},
|
292 |
+
"metadata": {},
|
293 |
+
"output_type": "display_data"
|
294 |
+
}
|
295 |
+
],
|
296 |
"source": [
|
297 |
"# save as parquet\n",
|
298 |
"votes.execute().set_crs(\"EPSG:4326\").to_parquet(\"votes.parquet\")\n",
|
299 |
"hf_upload(\"votes.parquet\", \"boettiger-lab/landvote\")\n"
|
300 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
301 |
}
|
302 |
],
|
303 |
"metadata": {
|