Spaces:
Sleeping
Sleeping
Delete appStore/filter_utils
Browse files- appStore/filter_utils +0 -116
appStore/filter_utils
DELETED
@@ -1,116 +0,0 @@
|
|
1 |
-
import re
|
2 |
-
import json
|
3 |
-
import streamlit as st
|
4 |
-
from appStore.crs_utils import lookup_crs_value
|
5 |
-
from appStore.search import hybrid_search
|
6 |
-
from appStore.prep_data import remove_duplicates, extract_year
|
7 |
-
|
8 |
-
def parse_budget(value):
|
9 |
-
"""
|
10 |
-
Safely parse a budget value from string to float. Returns 0.0 on error.
|
11 |
-
"""
|
12 |
-
try:
|
13 |
-
return float(value)
|
14 |
-
except:
|
15 |
-
return 0.0
|
16 |
-
|
17 |
-
def filter_results(results, country_filter, region_filter, end_year_range, crs_filter, budget_filter,
|
18 |
-
region_df, iso_code_to_sub_region, clean_country_code_fn, get_country_name_fn):
|
19 |
-
"""
|
20 |
-
Filter search results by country, region, end_year range, CRS code, and budget threshold.
|
21 |
-
|
22 |
-
Args:
|
23 |
-
results (list): List of results from the vector DB.
|
24 |
-
country_filter (str): Selected country name to filter by (or "All/Not allocated").
|
25 |
-
region_filter (str): Selected region name to filter by (or "All/Not allocated").
|
26 |
-
end_year_range (tuple): (min_end_year, max_end_year) from a Streamlit slider.
|
27 |
-
crs_filter (str): The chosen CRS code from the dropdown (or "All/Not allocated").
|
28 |
-
budget_filter (float): Minimum budget in million EUR.
|
29 |
-
region_df (pd.DataFrame): Region lookup DataFrame.
|
30 |
-
iso_code_to_sub_region (dict): Mapping from alpha-2 country code to sub-region name.
|
31 |
-
clean_country_code_fn (callable): Function that normalizes country code strings.
|
32 |
-
get_country_name_fn (callable): Function that returns the country name from alpha-2 code.
|
33 |
-
|
34 |
-
Returns:
|
35 |
-
list: Filtered list of results that match all criteria.
|
36 |
-
"""
|
37 |
-
filtered = []
|
38 |
-
for r in results:
|
39 |
-
metadata = r.payload.get('metadata', {})
|
40 |
-
country = metadata.get('country', "[]")
|
41 |
-
year_str = metadata.get('end_year')
|
42 |
-
if year_str:
|
43 |
-
extracted = extract_year(year_str)
|
44 |
-
try:
|
45 |
-
end_year_val = int(extracted) if extracted != "Unknown" else 0
|
46 |
-
except ValueError:
|
47 |
-
end_year_val = 0
|
48 |
-
else:
|
49 |
-
end_year_val = 0
|
50 |
-
|
51 |
-
# Parse country field as a JSON list or a single string
|
52 |
-
if country.strip().startswith("["):
|
53 |
-
try:
|
54 |
-
parsed_country = json.loads(country.replace("'", '"'))
|
55 |
-
if isinstance(parsed_country, str):
|
56 |
-
country_list = [parsed_country]
|
57 |
-
else:
|
58 |
-
country_list = parsed_country
|
59 |
-
except json.JSONDecodeError:
|
60 |
-
country_list = []
|
61 |
-
else:
|
62 |
-
country_list = [country.strip()]
|
63 |
-
|
64 |
-
c_list = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
|
65 |
-
resolved_names = [get_country_name_fn(code, region_df) for code in c_list]
|
66 |
-
|
67 |
-
# Check if any of the codes match the chosen region
|
68 |
-
country_in_region = any(
|
69 |
-
iso_code_to_sub_region.get(code, "Not allocated") == region_filter
|
70 |
-
for code in c_list
|
71 |
-
)
|
72 |
-
|
73 |
-
# CRS filtering
|
74 |
-
crs_key = metadata.get("crs_key", "").strip()
|
75 |
-
crs_key_clean = re.sub(r'\.0$', '', str(crs_key))
|
76 |
-
new_crs_value = lookup_crs_value(crs_key_clean)
|
77 |
-
new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value).strip())
|
78 |
-
crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else ""
|
79 |
-
|
80 |
-
if crs_filter != "All/Not allocated" and crs_combined:
|
81 |
-
if crs_filter != crs_combined:
|
82 |
-
continue
|
83 |
-
|
84 |
-
# Budget filtering
|
85 |
-
budget_value = parse_budget(metadata.get('total_project', "0"))
|
86 |
-
if budget_value < (budget_filter * 1e6):
|
87 |
-
continue
|
88 |
-
|
89 |
-
year_ok = True if end_year_val == 0 else (end_year_range[0] <= end_year_val <= end_year_range[1])
|
90 |
-
|
91 |
-
if ((country_filter == "All/Not allocated" or (country_filter in resolved_names))
|
92 |
-
and (region_filter == "All/Not allocated" or country_in_region)
|
93 |
-
and year_ok):
|
94 |
-
filtered.append(r)
|
95 |
-
|
96 |
-
return filtered
|
97 |
-
|
98 |
-
@st.cache_data(show_spinner=False)
|
99 |
-
def get_crs_options(_client, collection_name):
|
100 |
-
"""
|
101 |
-
Return a sorted list of all CRS code+description combos found in the collection.
|
102 |
-
Used to populate the 'CRS' dropdown in the app.
|
103 |
-
"""
|
104 |
-
results = hybrid_search(_client, "", collection_name)
|
105 |
-
all_results = results[0] + results[1]
|
106 |
-
crs_set = set()
|
107 |
-
for res in all_results:
|
108 |
-
metadata = res.payload.get('metadata', {})
|
109 |
-
raw_crs_key = metadata.get("crs_key", "")
|
110 |
-
crs_key_clean = re.sub(r'\.0$', '', str(raw_crs_key).strip())
|
111 |
-
if crs_key_clean:
|
112 |
-
new_value_raw = lookup_crs_value(crs_key_clean)
|
113 |
-
new_value_clean = re.sub(r'\.0$', '', str(new_value_raw).strip())
|
114 |
-
crs_combined = f"{crs_key_clean}: {new_value_clean}"
|
115 |
-
crs_set.add(crs_combined)
|
116 |
-
return sorted(crs_set)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|