# Getting city polygons from Overture Maps

In [None]:
import duckdb

con = duckdb.connect()

con.execute("SET s3_region='us-west-2';")
con.execute("LOAD spatial;")
con.execute("LOAD httpfs;")

# getting polygons of localities in the US.
query = """
 COPY (
 SELECT * 
 FROM read_parquet('s3://overturemaps-us-west-2/release/2024-09-18.0/theme=divisions/*/*')
 WHERE country = 'US' AND subtype IN ('locality')
 ) TO 'us_localities_raw.parquet' (FORMAT 'parquet');
"""
con.execute(query)



In [None]:
import ibis
from ibis import _

conn = ibis.duckdb.connect(extensions=["spatial"])

df = (conn
 .read_parquet("us_localities_raw.parquet")
 .cast({"geometry": "geometry"})
 .filter(_["type"] == "division")
 .mutate(municipal = _.names["primary"])
 .mutate(state = _.region.replace("US-", "")) 
 .mutate(county = _.hierarchies[0][2]['name'] ) #extract county from nested dictionary 
 .mutate(key_long = _.municipal + ibis.literal('-') + _.county + ibis.literal('-') + _.state)
 .select("key_long","municipal", "county","state" ,"geometry")
 )


## Dropping rows with same locality and state, with differing counties - landvote doesn't specify county for cities so we are dropping these to avoid duplicates. 
county_count = (
 df.group_by(["municipal", "state"])
 .aggregate(county_count=_.county.nunique()) # Count unique counties for each group
) 
valid_names = county_count.filter(county_count.county_count == 1).select("municipal", "state")
df_filtered = df.join(valid_names, ["municipal", "state"], how="inner")


# if two records have the same name but different geometries, only keep the first one. 
df_unique = (
 df_filtered.group_by("key_long")
 .aggregate(
 municipal=df_filtered.municipal.first(),
 county=df_filtered.county.first(),
 state=df_filtered.state.first(),
 geometry=df_filtered.geometry.first()
 )
 .mutate(geometry = _.geometry.buffer(.07))
 .select("state","county","municipal","geometry")
)

df_unique.execute().to_parquet("us_localities.parquet")


# Uploading city polygons to Hugging Face

In [None]:
import subprocess
import os
from huggingface_hub import HfApi, login
import streamlit as st

login(st.secrets["HF_TOKEN"])
api = HfApi()

def hf_upload(file, repo_id):
 info = api.upload_file(
 path_or_fileobj=file,
 path_in_repo=file,
 repo_id=repo_id,
 repo_type="dataset",
 )
hf_upload("us_localities.parquet", "boettiger-lab/landvote")

