cassiebuhler commited on
Commit
b8c83d8
1 Parent(s): 690fe2e

adding party affiliations

Browse files
Files changed (1) hide show
  1. get_party.ipynb +250 -0
get_party.ipynb ADDED
@@ -0,0 +1,250 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "id": "d179ded1-6235-47ed-bbfb-6d72468188d5",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import ibis\n",
11
+ "from ibis import _\n",
12
+ "import streamlit as st\n",
13
+ "import ibis.expr.datatypes as dt # Make sure to import the necessary module\n",
14
+ "\n",
15
+ "\n",
16
+ "conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
17
+ "\n",
18
+ "# pres = conn.read_csv(\"sources-president.csv\")\n",
19
+ "county = conn.read_csv(\"countypres_2000-2020.csv\")\n",
20
+ "votes = conn.read_parquet(\"vote.parquet\")"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": null,
26
+ "id": "ab644102-c725-4cf4-915c-8550a0a74c32",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "filtered = county.filter((_.mode == \"TOTAL\") & (_.totalvotes > 0))\n",
31
+ "\n",
32
+ "# Find the winning party for each year, state, and county\n",
33
+ "most_votes = (\n",
34
+ " filtered\n",
35
+ " .group_by(['year', 'state_po', 'county_name', 'party'])\n",
36
+ " .aggregate(winning_votes=_.candidatevotes.sum())\n",
37
+ ")\n",
38
+ "\n",
39
+ "# For each year, state, and county, select the party with the highest total votes\n",
40
+ "winning_party = (\n",
41
+ " most_votes\n",
42
+ " .group_by('year', 'state_po', 'county_name')\n",
43
+ " .aggregate(\n",
44
+ " max_votes=_.winning_votes.max(), # Max votes in this group\n",
45
+ " )\n",
46
+ " .join(\n",
47
+ " most_votes,\n",
48
+ " [\"year\",\"state_po\",\"county_name\",most_votes['winning_votes'] == _.max_votes],\n",
49
+ " how='inner'\n",
50
+ " )\n",
51
+ " .select(\"year\",\"state_po\",\"county_name\",most_votes['party'].name('current_party')\n",
52
+ " )\n",
53
+ ")\n",
54
+ "\n",
55
+ "# Self-join to get the previous year's winning party\n",
56
+ "previous_year = winning_party.view()\n",
57
+ "\n",
58
+ "joined = (\n",
59
+ " winning_party\n",
60
+ " .join(\n",
61
+ " previous_year, [\"county_name\",\"state_po\",winning_party['year'] == previous_year['year'] + 4],\n",
62
+ " how='left'\n",
63
+ " )\n",
64
+ " .rename(state_id = \"state_po\")\n",
65
+ " .mutate(key = _.county_name + ibis.literal(\" COUNTY-\") + _.state_id)\n",
66
+ " .select(\"year\",\"key\",\"current_party\",previous_year['current_party'].name('previous_party'))\n",
67
+ ")\n",
68
+ "\n",
69
+ "county_parties = joined.filter(_.year >2000).order_by(\"year\")\n",
70
+ "\n",
71
+ "print(county_parties.execute())"
72
+ ]
73
+ },
74
+ {
75
+ "cell_type": "code",
76
+ "execution_count": null,
77
+ "id": "ce0d80bf-3b78-4aa9-8048-5cc0dbf970d9",
78
+ "metadata": {},
79
+ "outputs": [],
80
+ "source": [
81
+ "df = (votes\n",
82
+ " .mutate(key = _.key.upper())\n",
83
+ " .filter(_.jurisdiction == \"County\")\n",
84
+ " .join(county_parties, [\"key\",\"year\"],how='inner'\n",
85
+ " )\n",
86
+ " .cast({\"geometry\": \"geometry\"})\n",
87
+ ")"
88
+ ]
89
+ },
90
+ {
91
+ "cell_type": "code",
92
+ "execution_count": null,
93
+ "id": "87bef5e2-a40a-4aff-aa27-e7d49ec68aac",
94
+ "metadata": {},
95
+ "outputs": [],
96
+ "source": [
97
+ "import subprocess\n",
98
+ "import os\n",
99
+ "from huggingface_hub import HfApi, login\n",
100
+ "import streamlit as st\n",
101
+ "\n",
102
+ "login(st.secrets[\"HF_TOKEN\"])\n",
103
+ "# api = HfApi(add_to_git_credential=False)\n",
104
+ "api = HfApi()\n",
105
+ "\n",
106
+ "def hf_upload(file, repo_id):\n",
107
+ " info = api.upload_file(\n",
108
+ " path_or_fileobj=file,\n",
109
+ " path_in_repo=file,\n",
110
+ " repo_id=repo_id,\n",
111
+ " repo_type=\"dataset\",\n",
112
+ " )\n",
113
+ "def generate_pmtiles(input_file, output_file, max_zoom=12):\n",
114
+ " # Ensure Tippecanoe is installed\n",
115
+ " if subprocess.call([\"which\", \"tippecanoe\"], stdout=subprocess.DEVNULL) != 0:\n",
116
+ " raise RuntimeError(\"Tippecanoe is not installed or not in PATH\")\n",
117
+ "\n",
118
+ " # Construct the Tippecanoe command\n",
119
+ " command = [\n",
120
+ " \"tippecanoe\",\n",
121
+ " \"-o\", output_file,\n",
122
+ " \"-zg\",\n",
123
+ " \"--extend-zooms-if-still-dropping\",\n",
124
+ " \"--force\",\n",
125
+ " \"--projection\", \"EPSG:4326\", \n",
126
+ " input_file\n",
127
+ " ]\n",
128
+ "\n",
129
+ " # Run Tippecanoe\n",
130
+ " try:\n",
131
+ " subprocess.run(command, check=True)\n",
132
+ " print(f\"Successfully generated PMTiles file: {output_file}\")\n",
133
+ " except subprocess.CalledProcessError as e:\n",
134
+ " print(f\"Error running Tippecanoe: {e}\")\n",
135
+ "\n"
136
+ ]
137
+ },
138
+ {
139
+ "cell_type": "code",
140
+ "execution_count": null,
141
+ "id": "b086e76c-4285-4036-8033-e4e45cb6966b",
142
+ "metadata": {},
143
+ "outputs": [],
144
+ "source": [
145
+ "gdf= df.execute()\n",
146
+ "gdf = gdf.set_crs(\"EPSG:4326\")\n",
147
+ "\n",
148
+ "gdf.to_parquet(\"county_parties.parquet\")\n",
149
+ "hf_upload(\"county_parties.parquet\", \"boettiger-lab/landvote\")\n",
150
+ "\n",
151
+ "gdf.to_file(\"county_parties.geojson\")\n",
152
+ "hf_upload(\"county_parties.geojson\", \"boettiger-lab/landvote\")\n",
153
+ "\n",
154
+ "generate_pmtiles(\"county_parties.geojson\", \"county_parties.pmtiles\")\n",
155
+ "hf_upload(\"county_parties.pmtiles\", \"boettiger-lab/landvote\")\n"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": null,
161
+ "id": "cb790ed9-6cb8-4705-abaa-ce0008851a87",
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": [
165
+ "gdf"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": null,
171
+ "id": "c2ae8ada-c73e-4b2e-938e-70a29584f199",
172
+ "metadata": {},
173
+ "outputs": [],
174
+ "source": [
175
+ "import leafmap.maplibregl as leafmap\n",
176
+ "m = leafmap.Map(style=\"positron\")\n",
177
+ "\n",
178
+ "\n",
179
+ "url_states = \"https://huggingface.co/datasets/boettiger-lab/landvote/resolve/main/county_parties.pmtiles\"\n",
180
+ "\n",
181
+ "outcome = [\n",
182
+ " 'match',\n",
183
+ " ['get', 'Status'], \n",
184
+ " \"Pass\", '#2E865F',\n",
185
+ " \"Fail\", '#FF3300', \n",
186
+ " '#ccc'\n",
187
+ " ]\n",
188
+ "paint_states = {\"fill-color\": outcome, \n",
189
+ " # \"fill-opacity\": 0.2,\n",
190
+ " }\n",
191
+ "style_states = {\n",
192
+ " \"layers\": [\n",
193
+ " {\n",
194
+ " \"id\": \"county_parties\",\n",
195
+ " \"source\": \"county_parties\",\n",
196
+ " \"source-layer\": \"county_parties\",\n",
197
+ " \"type\": \"fill\",\n",
198
+ " \"filter\": [\n",
199
+ " \"==\",\n",
200
+ " [\"get\", \"year\"],\n",
201
+ " 2008,\n",
202
+ " ], # only show buildings with height info\n",
203
+ " \"paint\": paint_states\n",
204
+ " },\n",
205
+ " ],\n",
206
+ "}\n",
207
+ "\n",
208
+ "m.add_pmtiles(\n",
209
+ " url_states,\n",
210
+ " style=style_states,\n",
211
+ " visible=True,\n",
212
+ " opacity=0.4,\n",
213
+ " tooltip=True,\n",
214
+ " fit_bounds=False,\n",
215
+ ")\n",
216
+ "\n",
217
+ "m\n"
218
+ ]
219
+ },
220
+ {
221
+ "cell_type": "code",
222
+ "execution_count": null,
223
+ "id": "e4280fb7-449a-4a1d-b760-67ce68fb5d92",
224
+ "metadata": {},
225
+ "outputs": [],
226
+ "source": []
227
+ }
228
+ ],
229
+ "metadata": {
230
+ "kernelspec": {
231
+ "display_name": "Python 3 (ipykernel)",
232
+ "language": "python",
233
+ "name": "python3"
234
+ },
235
+ "language_info": {
236
+ "codemirror_mode": {
237
+ "name": "ipython",
238
+ "version": 3
239
+ },
240
+ "file_extension": ".py",
241
+ "mimetype": "text/x-python",
242
+ "name": "python",
243
+ "nbconvert_exporter": "python",
244
+ "pygments_lexer": "ipython3",
245
+ "version": "3.10.12"
246
+ }
247
+ },
248
+ "nbformat": 4,
249
+ "nbformat_minor": 5
250
+ }