Spaces:
Sleeping
Sleeping
File size: 4,634 Bytes
02c6b99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
{
"cells": [
{
"cell_type": "markdown",
"id": "4bfd881f-5889-4a26-b003-e2611708ad2a",
"metadata": {},
"source": [
"# Getting city polygons from Overture Maps"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3e5756d2-382b-49e9-93b5-2ecf6d0eb812",
"metadata": {},
"outputs": [],
"source": [
"import duckdb\n",
"\n",
"con = duckdb.connect()\n",
"\n",
"con.execute(\"SET s3_region='us-west-2';\")\n",
"con.execute(\"LOAD spatial;\")\n",
"con.execute(\"LOAD httpfs;\")\n",
"\n",
"# getting polygons of localities in the US.\n",
"query = \"\"\"\n",
" COPY (\n",
" SELECT * \n",
" FROM read_parquet('s3://overturemaps-us-west-2/release/2024-09-18.0/theme=divisions/*/*')\n",
" WHERE country = 'US' AND subtype IN ('locality')\n",
" ) TO 'us_localities_raw.parquet' (FORMAT 'parquet');\n",
"\"\"\"\n",
"con.execute(query)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25f62dd7-5539-438b-8f0a-1d85c9bc78ab",
"metadata": {},
"outputs": [],
"source": [
"import ibis\n",
"from ibis import _\n",
"\n",
"conn = ibis.duckdb.connect(extensions=[\"spatial\"])\n",
"\n",
"df = (conn\n",
" .read_parquet(\"us_localities_raw.parquet\")\n",
" .cast({\"geometry\": \"geometry\"})\n",
" .filter(_[\"type\"] == \"division\")\n",
" .mutate(municipal = _.names[\"primary\"])\n",
" .mutate(state = _.region.replace(\"US-\", \"\")) \n",
" .mutate(county = _.hierarchies[0][2]['name'] ) #extract county from nested dictionary \n",
" .mutate(key_long = _.municipal + ibis.literal('-') + _.county + ibis.literal('-') + _.state)\n",
" .select(\"key_long\",\"municipal\", \"county\",\"state\" ,\"geometry\")\n",
" )\n",
"\n",
"\n",
"## Dropping rows with same locality and state, with differing counties - landvote doesn't specify county for cities so we are dropping these to avoid duplicates. \n",
"county_count = (\n",
" df.group_by([\"municipal\", \"state\"])\n",
" .aggregate(county_count=_.county.nunique()) # Count unique counties for each group\n",
") \n",
"valid_names = county_count.filter(county_count.county_count == 1).select(\"municipal\", \"state\")\n",
"df_filtered = df.join(valid_names, [\"municipal\", \"state\"], how=\"inner\")\n",
"\n",
"\n",
"# if two records have the same name but different geometries, only keep the first one. \n",
"df_unique = (\n",
" df_filtered.group_by(\"key_long\")\n",
" .aggregate(\n",
" municipal=df_filtered.municipal.first(),\n",
" county=df_filtered.county.first(),\n",
" state=df_filtered.state.first(),\n",
" geometry=df_filtered.geometry.first()\n",
" )\n",
" .mutate(geometry = _.geometry.buffer(.07))\n",
" .select(\"state\",\"county\",\"municipal\",\"geometry\")\n",
")\n",
"\n",
"df_unique.execute().to_parquet(\"us_localities.parquet\")\n"
]
},
{
"cell_type": "markdown",
"id": "0fce9fe8-584f-4260-9217-3aade9e71eef",
"metadata": {},
"source": [
"# Uploading city polygons to Hugging Face"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ca02743f-0bf4-46e5-91fd-a5fe37519ecd",
"metadata": {},
"outputs": [],
"source": [
"import subprocess\n",
"import os\n",
"from huggingface_hub import HfApi, login\n",
"import streamlit as st\n",
"\n",
"login(st.secrets[\"HF_TOKEN\"])\n",
"api = HfApi()\n",
"\n",
"def hf_upload(file, repo_id):\n",
" info = api.upload_file(\n",
" path_or_fileobj=file,\n",
" path_in_repo=file,\n",
" repo_id=repo_id,\n",
" repo_type=\"dataset\",\n",
" )\n",
"hf_upload(\"us_localities.parquet\", \"boettiger-lab/landvote\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "1ddadf0a-5b45-487f-a664-0c0696f75579",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
|