annikwag commited on
Commit
67f6d38
verified
1 Parent(s): b08d54a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -25,12 +25,12 @@ region_df = load_region_data(region_lookup_path)
25
  #################### Create the embeddings collection and save ######################
26
  # the steps below need to be performed only once and then commented out any unnecssary compute over-run
27
  ##### First we process and create the chunks for relvant data source
28
- #chunks = process_giz_worldwide()
29
  ##### Convert to langchain documents
30
- #temp_doc = create_documents(chunks,'chunks')
31
  ##### Embed and store docs, check if collection exist then you need to update the collection
32
  collection_name = "giz_worldwide"
33
- #hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
34
 
35
  ################### Hybrid Search #####################################################
36
  client = get_client()
@@ -217,6 +217,15 @@ if show_exact_matches:
217
  else:
218
  # 6) Display the first 15 matching results
219
  for res in filtered_lexical_no_dupe[:15]:
 
 
 
 
 
 
 
 
 
220
  project_name = res.payload['metadata'].get('project_name', 'Project Link')
221
  proj_id = metadata.get('id', 'Unknown')
222
  st.markdown(f"#### {project_name} [{proj_id}]")
@@ -240,16 +249,6 @@ if show_exact_matches:
240
  if top_keywords:
241
  st.markdown(f"_{' 路 '.join(top_keywords)}_")
242
 
243
- # Metadata
244
- metadata = res.payload.get('metadata', {})
245
- countries = metadata.get('countries', "[]")
246
- client_name = metadata.get('client', 'Unknown Client')
247
- start_year = metadata.get('start_year', None)
248
- end_year = metadata.get('end_year', None)
249
- total_volume = metadata.get('total_volume', "Unknown")
250
- total_project = metadata.get('total_project', "Unknown")
251
- id = metadata.get('id', "Unknown")
252
-
253
 
254
  try:
255
  c_list = json.loads(countries.replace("'", '"'))
 
25
  #################### Create the embeddings collection and save ######################
26
  # the steps below need to be performed only once and then commented out any unnecssary compute over-run
27
  ##### First we process and create the chunks for relvant data source
28
+ chunks = process_giz_worldwide()
29
  ##### Convert to langchain documents
30
+ temp_doc = create_documents(chunks,'chunks')
31
  ##### Embed and store docs, check if collection exist then you need to update the collection
32
  collection_name = "giz_worldwide"
33
+ hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
34
 
35
  ################### Hybrid Search #####################################################
36
  client = get_client()
 
217
  else:
218
  # 6) Display the first 15 matching results
219
  for res in filtered_lexical_no_dupe[:15]:
220
+ # Metadata
221
+ metadata = res.payload.get('metadata', {})
222
+ countries = metadata.get('countries', "[]")
223
+ client_name = metadata.get('client', 'Unknown Client')
224
+ start_year = metadata.get('start_year', None)
225
+ end_year = metadata.get('end_year', None)
226
+ total_volume = metadata.get('total_volume', "Unknown")
227
+ total_project = metadata.get('total_project', "Unknown")
228
+ id = metadata.get('id', "Unknown")
229
  project_name = res.payload['metadata'].get('project_name', 'Project Link')
230
  proj_id = metadata.get('id', 'Unknown')
231
  st.markdown(f"#### {project_name} [{proj_id}]")
 
249
  if top_keywords:
250
  st.markdown(f"_{' 路 '.join(top_keywords)}_")
251
 
 
 
 
 
 
 
 
 
 
 
252
 
253
  try:
254
  c_list = json.loads(countries.replace("'", '"'))