# %% import urllib from datetime import datetime import pandas as pd import time import requests today_rev = datetime.now().strftime("%Y%m%d") # url = 'https://api.os.uk/search/places/v1/uprn?%s' # params = urllib.parse.urlencode({'uprn':,'dataset':'LPI', 'key':os.environ["ADDRESSBASE_API_KEY"]}) # Places API # Technical guide: https://osdatahub.os.uk/docs/places/technicalSpecification def places_api_query(query, api_key, query_type): def make_api_call(url): max_retries = 3 retries = 0 while retries < max_retries: try: response = requests.get(url) if response.status_code == 200: # If successful response, return the response return response elif response.status_code == 429: # If rate limited, wait for 5 seconds before retrying print("Rate limited. Retrying in 5 seconds...") time.sleep(3) retries += 1 else: # For other errors, return the response return response except Exception as e: print("Error:", str(e)) retries += 1 # If maximum retries reached, return None return None if api_key: overall_tic = time.perf_counter() #filter_code_lsc = "LOGICAL_STATUS_CODE:1" filter_code_lpi_lsc ="LPI_LOGICAL_STATUS_CODE:1" concat_results = [] if query_type == "Address": url = 'https://api.os.uk/search/places/v1/find?%s' params = urllib.parse.urlencode({'query':query, 'dataset':'LPI', 'key':api_key, "maxresults" : 20, 'minmatch':0.70, # This includes partial matches 'matchprecision':2, 'fq':filter_code_lpi_lsc, 'lr':'EN'}) try: request_text = url % params #print(request_text) response = make_api_call(request_text) except Exception as e: print(str(e)) if response is not None: if response.status_code == 200: # Process the response print("Successful response") #print("Successful response:", response.json()) else: print("Error:", response.status_code) else: print("Maximum retries reached. Error occurred.") return pd.DataFrame() # Return blank dataframe # Load JSON response response_data = response.json() # Extract 'results' part try: results = response_data['results'] concat_results.extend(results) except Exception as e: print(str(e)) return pd.DataFrame() # Return blank dataframe # If querying postcode, need to use pagination and postcode API elif query_type == "Postcode": max_results_requested = 100 remaining_calls = 1 totalresults = max_results_requested call_number = 1 while remaining_calls > 0 and call_number <= 10: offset = (call_number-1) * max_results_requested #print("Remaining to query:", remaining_calls) url = 'https://api.os.uk/search/places/v1/postcode?%s' params = urllib.parse.urlencode({'postcode':query, 'dataset':'LPI', 'key':api_key, "maxresults" : max_results_requested, 'offset':offset, #'fq':filter_code_lsc, 'fq':filter_code_lpi_lsc, 'lr':'EN'}) try: request_text = url % params #print(request_text) response = make_api_call(request_text) except Exception as e: print(str(e)) if response is not None: if response.status_code == 200: totalresults = response.json()['header']['totalresults'] print("Successful response") print("Total results:", totalresults) remaining_calls = totalresults - (max_results_requested * call_number) call_number += 1 # Concat results together try: results = response.json()['results'] concat_results.extend(results) except Exception as e: print("Result concat failed with error: ", str(e)) concat_results.append({"invalid_request":True, "POSTCODE_LOCATOR": query}) else: print("Error:", response.status_code, "For postcode: ", query, " With query: ", request_text) concat_results.append({"invalid_request":True, "POSTCODE_LOCATOR": query}) return pd.DataFrame(data={"invalid_request":[True], "POSTCODE_LOCATOR": [query]},index=[0]) # Return blank dataframe else: print("Maximum retries reached. Error occurred.") return pd.DataFrame() # Return blank dataframe else: print("No API key provided.") return pd.DataFrame() # Return blank dataframe # Convert 'results' to DataFrame # Check if 'LPI' sub-branch exists in the JSON response #print(concat_results) if 'LPI' in concat_results[-1]: #print("LPI in result columns") df = pd.json_normalize(concat_results) df.rename(columns=lambda x: x.replace('LPI.', ''), inplace=True) else: # Normalize the entire JSON data if 'LPI' sub-branch doesn't exist df = pd.json_normalize(concat_results) # Ensure df is a DataFrame, even if it has a single row if isinstance(df, pd.Series): print("This is a series!") df = df.to_frame().T # Convert the Series to a DataFrame with a single row overall_toc = time.perf_counter() time_out = f"The API call took {overall_toc - overall_tic:0.1f} seconds" print(time_out) return df