Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -25,12 +25,12 @@ region_df = load_region_data(region_lookup_path)
|
|
25 |
#################### Create the embeddings collection and save ######################
|
26 |
# the steps below need to be performed only once and then commented out any unnecssary compute over-run
|
27 |
##### First we process and create the chunks for relvant data source
|
28 |
-
|
29 |
##### Convert to langchain documents
|
30 |
-
|
31 |
##### Embed and store docs, check if collection exist then you need to update the collection
|
32 |
collection_name = "giz_worldwide"
|
33 |
-
|
34 |
|
35 |
################### Hybrid Search #####################################################
|
36 |
client = get_client()
|
@@ -217,6 +217,15 @@ if show_exact_matches:
|
|
217 |
else:
|
218 |
# 6) Display the first 15 matching results
|
219 |
for res in filtered_lexical_no_dupe[:15]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
220 |
project_name = res.payload['metadata'].get('project_name', 'Project Link')
|
221 |
proj_id = metadata.get('id', 'Unknown')
|
222 |
st.markdown(f"#### {project_name} [{proj_id}]")
|
@@ -240,16 +249,6 @@ if show_exact_matches:
|
|
240 |
if top_keywords:
|
241 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
242 |
|
243 |
-
# Metadata
|
244 |
-
metadata = res.payload.get('metadata', {})
|
245 |
-
countries = metadata.get('countries', "[]")
|
246 |
-
client_name = metadata.get('client', 'Unknown Client')
|
247 |
-
start_year = metadata.get('start_year', None)
|
248 |
-
end_year = metadata.get('end_year', None)
|
249 |
-
total_volume = metadata.get('total_volume', "Unknown")
|
250 |
-
total_project = metadata.get('total_project', "Unknown")
|
251 |
-
id = metadata.get('id', "Unknown")
|
252 |
-
|
253 |
|
254 |
try:
|
255 |
c_list = json.loads(countries.replace("'", '"'))
|
|
|
25 |
#################### Create the embeddings collection and save ######################
|
26 |
# the steps below need to be performed only once and then commented out any unnecssary compute over-run
|
27 |
##### First we process and create the chunks for relvant data source
|
28 |
+
chunks = process_giz_worldwide()
|
29 |
##### Convert to langchain documents
|
30 |
+
temp_doc = create_documents(chunks,'chunks')
|
31 |
##### Embed and store docs, check if collection exist then you need to update the collection
|
32 |
collection_name = "giz_worldwide"
|
33 |
+
hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
|
34 |
|
35 |
################### Hybrid Search #####################################################
|
36 |
client = get_client()
|
|
|
217 |
else:
|
218 |
# 6) Display the first 15 matching results
|
219 |
for res in filtered_lexical_no_dupe[:15]:
|
220 |
+
# Metadata
|
221 |
+
metadata = res.payload.get('metadata', {})
|
222 |
+
countries = metadata.get('countries', "[]")
|
223 |
+
client_name = metadata.get('client', 'Unknown Client')
|
224 |
+
start_year = metadata.get('start_year', None)
|
225 |
+
end_year = metadata.get('end_year', None)
|
226 |
+
total_volume = metadata.get('total_volume', "Unknown")
|
227 |
+
total_project = metadata.get('total_project', "Unknown")
|
228 |
+
id = metadata.get('id', "Unknown")
|
229 |
project_name = res.payload['metadata'].get('project_name', 'Project Link')
|
230 |
proj_id = metadata.get('id', 'Unknown')
|
231 |
st.markdown(f"#### {project_name} [{proj_id}]")
|
|
|
249 |
if top_keywords:
|
250 |
st.markdown(f"_{' 路 '.join(top_keywords)}_")
|
251 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
|
253 |
try:
|
254 |
c_list = json.loads(countries.replace("'", '"'))
|