annikwag commited on
Commit
40b7eec
·
verified ·
1 Parent(s): 74e4e05

Create filter_utils.py

Browse files
Files changed (1) hide show
  1. appStore/filter_utils.py +116 -0
appStore/filter_utils.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import json
3
+ import streamlit as st
4
+ from appStore.crs_utils import lookup_crs_value
5
+ from appStore.search import hybrid_search
6
+ from appStore.prep_data import remove_duplicates, extract_year
7
+
8
+ def parse_budget(value):
9
+ """
10
+ Safely parse a budget value from string to float. Returns 0.0 on error.
11
+ """
12
+ try:
13
+ return float(value)
14
+ except:
15
+ return 0.0
16
+
17
+ def filter_results(results, country_filter, region_filter, end_year_range, crs_filter, budget_filter,
18
+ region_df, iso_code_to_sub_region, clean_country_code_fn, get_country_name_fn):
19
+ """
20
+ Filter search results by country, region, end_year range, CRS code, and budget threshold.
21
+
22
+ Args:
23
+ results (list): List of results from the vector DB.
24
+ country_filter (str): Selected country name to filter by (or "All/Not allocated").
25
+ region_filter (str): Selected region name to filter by (or "All/Not allocated").
26
+ end_year_range (tuple): (min_end_year, max_end_year) from a Streamlit slider.
27
+ crs_filter (str): The chosen CRS code from the dropdown (or "All/Not allocated").
28
+ budget_filter (float): Minimum budget in million EUR.
29
+ region_df (pd.DataFrame): Region lookup DataFrame.
30
+ iso_code_to_sub_region (dict): Mapping from alpha-2 country code to sub-region name.
31
+ clean_country_code_fn (callable): Function that normalizes country code strings.
32
+ get_country_name_fn (callable): Function that returns the country name from alpha-2 code.
33
+
34
+ Returns:
35
+ list: Filtered list of results that match all criteria.
36
+ """
37
+ filtered = []
38
+ for r in results:
39
+ metadata = r.payload.get('metadata', {})
40
+ country = metadata.get('country', "[]")
41
+ year_str = metadata.get('end_year')
42
+ if year_str:
43
+ extracted = extract_year(year_str)
44
+ try:
45
+ end_year_val = int(extracted) if extracted != "Unknown" else 0
46
+ except ValueError:
47
+ end_year_val = 0
48
+ else:
49
+ end_year_val = 0
50
+
51
+ # Parse country field as a JSON list or a single string
52
+ if country.strip().startswith("["):
53
+ try:
54
+ parsed_country = json.loads(country.replace("'", '"'))
55
+ if isinstance(parsed_country, str):
56
+ country_list = [parsed_country]
57
+ else:
58
+ country_list = parsed_country
59
+ except json.JSONDecodeError:
60
+ country_list = []
61
+ else:
62
+ country_list = [country.strip()]
63
+
64
+ c_list = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
65
+ resolved_names = [get_country_name_fn(code, region_df) for code in c_list]
66
+
67
+ # Check if any of the codes match the chosen region
68
+ country_in_region = any(
69
+ iso_code_to_sub_region.get(code, "Not allocated") == region_filter
70
+ for code in c_list
71
+ )
72
+
73
+ # CRS filtering
74
+ crs_key = metadata.get("crs_key", "").strip()
75
+ crs_key_clean = re.sub(r'\.0$', '', str(crs_key))
76
+ new_crs_value = lookup_crs_value(crs_key_clean)
77
+ new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value).strip())
78
+ crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else ""
79
+
80
+ if crs_filter != "All/Not allocated" and crs_combined:
81
+ if crs_filter != crs_combined:
82
+ continue
83
+
84
+ # Budget filtering
85
+ budget_value = parse_budget(metadata.get('total_project', "0"))
86
+ if budget_value < (budget_filter * 1e6):
87
+ continue
88
+
89
+ year_ok = True if end_year_val == 0 else (end_year_range[0] <= end_year_val <= end_year_range[1])
90
+
91
+ if ((country_filter == "All/Not allocated" or (country_filter in resolved_names))
92
+ and (region_filter == "All/Not allocated" or country_in_region)
93
+ and year_ok):
94
+ filtered.append(r)
95
+
96
+ return filtered
97
+
98
+ @st.cache_data(show_spinner=False)
99
+ def get_crs_options(_client, collection_name):
100
+ """
101
+ Return a sorted list of all CRS code+description combos found in the collection.
102
+ Used to populate the 'CRS' dropdown in the app.
103
+ """
104
+ results = hybrid_search(_client, "", collection_name)
105
+ all_results = results[0] + results[1]
106
+ crs_set = set()
107
+ for res in all_results:
108
+ metadata = res.payload.get('metadata', {})
109
+ raw_crs_key = metadata.get("crs_key", "")
110
+ crs_key_clean = re.sub(r'\.0$', '', str(raw_crs_key).strip())
111
+ if crs_key_clean:
112
+ new_value_raw = lookup_crs_value(crs_key_clean)
113
+ new_value_clean = re.sub(r'\.0$', '', str(new_value_raw).strip())
114
+ crs_combined = f"{crs_key_clean}: {new_value_clean}"
115
+ crs_set.add(crs_combined)
116
+ return sorted(crs_set)