TEST-GIZ-Project-Search

Sleeping

App Files Files Community

annikwag commited on Feb 27

Commit

67f6d38

verified ·

1 Parent(s): b08d54a

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -13

app.py CHANGED Viewed

@@ -25,12 +25,12 @@ region_df = load_region_data(region_lookup_path)
 ####################  Create the embeddings collection and save ######################
 # the steps below need to be performed only once and then commented out any unnecssary compute over-run
 ##### First we process and create the chunks for relvant data source
-#chunks = process_giz_worldwide()
 ##### Convert to langchain documents
-#temp_doc = create_documents(chunks,'chunks')
 ##### Embed and store docs, check if collection exist then you need to update the collection
 collection_name = "giz_worldwide"
-#hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
 ################### Hybrid Search #####################################################
 client = get_client()
@@ -217,6 +217,15 @@ if show_exact_matches:
     else:
         # 6) Display the first 15 matching results
         for res in filtered_lexical_no_dupe[:15]:
             project_name = res.payload['metadata'].get('project_name', 'Project Link')
             proj_id = metadata.get('id', 'Unknown')
             st.markdown(f"#### {project_name} [{proj_id}]")
@@ -240,16 +249,6 @@ if show_exact_matches:
             if top_keywords:
                 st.markdown(f"_{' · '.join(top_keywords)}_")
-            # Metadata
-            metadata = res.payload.get('metadata', {})
-            countries = metadata.get('countries', "[]")
-            client_name = metadata.get('client', 'Unknown Client')
-            start_year = metadata.get('start_year', None)
-            end_year = metadata.get('end_year', None)
-            total_volume = metadata.get('total_volume', "Unknown")
-            total_project = metadata.get('total_project', "Unknown")
-            id = metadata.get('id', "Unknown")
             try:
                 c_list = json.loads(countries.replace("'", '"'))

 ####################  Create the embeddings collection and save ######################
 # the steps below need to be performed only once and then commented out any unnecssary compute over-run
 ##### First we process and create the chunks for relvant data source
+chunks = process_giz_worldwide()
 ##### Convert to langchain documents
+temp_doc = create_documents(chunks,'chunks')
 ##### Embed and store docs, check if collection exist then you need to update the collection
 collection_name = "giz_worldwide"
+hybrid_embed_chunks(docs=temp_doc, collection_name=collection_name, del_if_exists=True)
 ################### Hybrid Search #####################################################
 client = get_client()
     else:
         # 6) Display the first 15 matching results
         for res in filtered_lexical_no_dupe[:15]:
+            # Metadata
+            metadata = res.payload.get('metadata', {})
+            countries = metadata.get('countries', "[]")
+            client_name = metadata.get('client', 'Unknown Client')
+            start_year = metadata.get('start_year', None)
+            end_year = metadata.get('end_year', None)
+            total_volume = metadata.get('total_volume', "Unknown")
+            total_project = metadata.get('total_project', "Unknown")
+            id = metadata.get('id', "Unknown")
             project_name = res.payload['metadata'].get('project_name', 'Project Link')
             proj_id = metadata.get('id', 'Unknown')
             st.markdown(f"#### {project_name} [{proj_id}]")
             if top_keywords:
                 st.markdown(f"_{' · '.join(top_keywords)}_")
             try:
                 c_list = json.loads(countries.replace("'", '"'))