File size: 6,931 Bytes
dd1cbb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# %%
import urllib
from datetime import datetime
import pandas as pd
import time
import requests

today_rev = datetime.now().strftime("%Y%m%d")


# url = 'https://api.os.uk/search/places/v1/uprn?%s'
# params = urllib.parse.urlencode({'uprn':<UPRN>,'dataset':'LPI', 'key':os.environ["ADDRESSBASE_API_KEY"]})

# Places API
# Technical guide: https://osdatahub.os.uk/docs/places/technicalSpecification


def places_api_query(query, api_key, query_type):

    def make_api_call(url):
        max_retries = 3
        retries = 0

        while retries < max_retries:
            try:
                response = requests.get(url)
                if response.status_code == 200:
                    # If successful response, return the response
                    return response
                elif response.status_code == 429:
                    # If rate limited, wait for 5 seconds before retrying
                    print("Rate limited. Retrying in 5 seconds...")
                    time.sleep(3)
                    retries += 1
                else:
                    # For other errors, return the response
                    return response
            except Exception as e:
                print("Error:", str(e))
                retries += 1

        # If maximum retries reached, return None
        return None

    if api_key:

        overall_tic = time.perf_counter()

        #filter_code_lsc = "LOGICAL_STATUS_CODE:1"
        filter_code_lpi_lsc ="LPI_LOGICAL_STATUS_CODE:1"
        concat_results = []

        if query_type == "Address":
            url = 'https://api.os.uk/search/places/v1/find?%s'
            params = urllib.parse.urlencode({'query':query,
                                            'dataset':'LPI',
                                            'key':api_key,
                                            "maxresults" : 20,
                                            'minmatch':0.70, # This includes partial matches
                                            'matchprecision':2,
                                            'fq':filter_code_lpi_lsc,
                                            'lr':'EN'})
            
            try:
                request_text = url % params
                #print(request_text)
                response = make_api_call(request_text)
            except Exception as e:
                print(str(e))
        

            if response is not None:
                if response.status_code == 200:
                    # Process the response
                    print("Successful response")
                    #print("Successful response:", response.json())
                else:
                    print("Error:", response.status_code)

            else:
                print("Maximum retries reached. Error occurred.")
                return pd.DataFrame() # Return blank dataframe
            
            # Load JSON response
            response_data = response.json()

            # Extract 'results' part
            try:
                results = response_data['results']
                concat_results.extend(results)

            except Exception as e:
                print(str(e))
                return pd.DataFrame() # Return blank dataframe

        # If querying postcode, need to use pagination and postcode API
        elif query_type == "Postcode":

            max_results_requested = 100
            remaining_calls = 1
            totalresults = max_results_requested
            call_number = 1

            while remaining_calls > 0 and call_number <= 10:

                offset = (call_number-1) * max_results_requested

                #print("Remaining to query:", remaining_calls)
                

                url = 'https://api.os.uk/search/places/v1/postcode?%s'
                params = urllib.parse.urlencode({'postcode':query,
                                                'dataset':'LPI',
                                                'key':api_key,
                                                "maxresults" : max_results_requested,
                                                'offset':offset,
                                                #'fq':filter_code_lsc,
                                                'fq':filter_code_lpi_lsc,
                                                'lr':'EN'})

                try:
                    request_text = url % params
                    #print(request_text)
                    response = make_api_call(request_text)
                except Exception as e:
                    print(str(e))
        
                if response is not None:
                    if response.status_code == 200:
                        totalresults = response.json()['header']['totalresults']

                        print("Successful response")
                        print("Total results:", totalresults)

                        remaining_calls = totalresults - (max_results_requested * call_number)

                        call_number += 1

                        # Concat results together
                        try:
                            results = response.json()['results']
                            concat_results.extend(results)
                        except Exception as e:
                            print("Result concat failed with error: ", str(e))
                            concat_results.append({"invalid_request":True, "POSTCODE_LOCATOR": query})

                    else:
                        print("Error:", response.status_code, "For postcode: ", query, " With query: ", request_text)
                        concat_results.append({"invalid_request":True, "POSTCODE_LOCATOR": query})
                        return pd.DataFrame(data={"invalid_request":[True], "POSTCODE_LOCATOR": [query]},index=[0]) # Return blank dataframe
                else:
                    print("Maximum retries reached. Error occurred.")
                    return pd.DataFrame() # Return blank dataframe
    
    else:
        print("No API key provided.")
        return pd.DataFrame() # Return blank dataframe
    
    # Convert 'results' to DataFrame

    # Check if 'LPI' sub-branch exists in the JSON response
    #print(concat_results)

    if 'LPI' in concat_results[-1]:
        #print("LPI in result columns")
        df = pd.json_normalize(concat_results)
        df.rename(columns=lambda x: x.replace('LPI.', ''), inplace=True)
    else:
        # Normalize the entire JSON data if 'LPI' sub-branch doesn't exist
        df = pd.json_normalize(concat_results)

        
    # Ensure df is a DataFrame, even if it has a single row    
    if isinstance(df, pd.Series):
        print("This is a series!")
        df = df.to_frame().T  # Convert the Series to a DataFrame with a single row


    overall_toc = time.perf_counter()
    time_out = f"The API call took {overall_toc - overall_tic:0.1f} seconds"
    print(time_out)

    return df