annikwag commited on
Commit
74e4e05
·
verified ·
1 Parent(s): c966f4d

Delete appStore/filter_utils

Browse files
Files changed (1) hide show
  1. appStore/filter_utils +0 -116
appStore/filter_utils DELETED
@@ -1,116 +0,0 @@
1
- import re
2
- import json
3
- import streamlit as st
4
- from appStore.crs_utils import lookup_crs_value
5
- from appStore.search import hybrid_search
6
- from appStore.prep_data import remove_duplicates, extract_year
7
-
8
- def parse_budget(value):
9
- """
10
- Safely parse a budget value from string to float. Returns 0.0 on error.
11
- """
12
- try:
13
- return float(value)
14
- except:
15
- return 0.0
16
-
17
- def filter_results(results, country_filter, region_filter, end_year_range, crs_filter, budget_filter,
18
- region_df, iso_code_to_sub_region, clean_country_code_fn, get_country_name_fn):
19
- """
20
- Filter search results by country, region, end_year range, CRS code, and budget threshold.
21
-
22
- Args:
23
- results (list): List of results from the vector DB.
24
- country_filter (str): Selected country name to filter by (or "All/Not allocated").
25
- region_filter (str): Selected region name to filter by (or "All/Not allocated").
26
- end_year_range (tuple): (min_end_year, max_end_year) from a Streamlit slider.
27
- crs_filter (str): The chosen CRS code from the dropdown (or "All/Not allocated").
28
- budget_filter (float): Minimum budget in million EUR.
29
- region_df (pd.DataFrame): Region lookup DataFrame.
30
- iso_code_to_sub_region (dict): Mapping from alpha-2 country code to sub-region name.
31
- clean_country_code_fn (callable): Function that normalizes country code strings.
32
- get_country_name_fn (callable): Function that returns the country name from alpha-2 code.
33
-
34
- Returns:
35
- list: Filtered list of results that match all criteria.
36
- """
37
- filtered = []
38
- for r in results:
39
- metadata = r.payload.get('metadata', {})
40
- country = metadata.get('country', "[]")
41
- year_str = metadata.get('end_year')
42
- if year_str:
43
- extracted = extract_year(year_str)
44
- try:
45
- end_year_val = int(extracted) if extracted != "Unknown" else 0
46
- except ValueError:
47
- end_year_val = 0
48
- else:
49
- end_year_val = 0
50
-
51
- # Parse country field as a JSON list or a single string
52
- if country.strip().startswith("["):
53
- try:
54
- parsed_country = json.loads(country.replace("'", '"'))
55
- if isinstance(parsed_country, str):
56
- country_list = [parsed_country]
57
- else:
58
- country_list = parsed_country
59
- except json.JSONDecodeError:
60
- country_list = []
61
- else:
62
- country_list = [country.strip()]
63
-
64
- c_list = [clean_country_code_fn(code) for code in country_list if len(clean_country_code_fn(code)) == 2]
65
- resolved_names = [get_country_name_fn(code, region_df) for code in c_list]
66
-
67
- # Check if any of the codes match the chosen region
68
- country_in_region = any(
69
- iso_code_to_sub_region.get(code, "Not allocated") == region_filter
70
- for code in c_list
71
- )
72
-
73
- # CRS filtering
74
- crs_key = metadata.get("crs_key", "").strip()
75
- crs_key_clean = re.sub(r'\.0$', '', str(crs_key))
76
- new_crs_value = lookup_crs_value(crs_key_clean)
77
- new_crs_value_clean = re.sub(r'\.0$', '', str(new_crs_value).strip())
78
- crs_combined = f"{crs_key_clean}: {new_crs_value_clean}" if crs_key_clean else ""
79
-
80
- if crs_filter != "All/Not allocated" and crs_combined:
81
- if crs_filter != crs_combined:
82
- continue
83
-
84
- # Budget filtering
85
- budget_value = parse_budget(metadata.get('total_project', "0"))
86
- if budget_value < (budget_filter * 1e6):
87
- continue
88
-
89
- year_ok = True if end_year_val == 0 else (end_year_range[0] <= end_year_val <= end_year_range[1])
90
-
91
- if ((country_filter == "All/Not allocated" or (country_filter in resolved_names))
92
- and (region_filter == "All/Not allocated" or country_in_region)
93
- and year_ok):
94
- filtered.append(r)
95
-
96
- return filtered
97
-
98
- @st.cache_data(show_spinner=False)
99
- def get_crs_options(_client, collection_name):
100
- """
101
- Return a sorted list of all CRS code+description combos found in the collection.
102
- Used to populate the 'CRS' dropdown in the app.
103
- """
104
- results = hybrid_search(_client, "", collection_name)
105
- all_results = results[0] + results[1]
106
- crs_set = set()
107
- for res in all_results:
108
- metadata = res.payload.get('metadata', {})
109
- raw_crs_key = metadata.get("crs_key", "")
110
- crs_key_clean = re.sub(r'\.0$', '', str(raw_crs_key).strip())
111
- if crs_key_clean:
112
- new_value_raw = lookup_crs_value(crs_key_clean)
113
- new_value_clean = re.sub(r'\.0$', '', str(new_value_raw).strip())
114
- crs_combined = f"{crs_key_clean}: {new_value_clean}"
115
- crs_set.add(crs_combined)
116
- return sorted(crs_set)