ziyingsk commited on
Commit
00ae860
·
verified ·
1 Parent(s): f121f74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -13
app.py CHANGED
@@ -75,21 +75,28 @@ if uploaded_file is not None:
75
  vector_count = len(documents)
76
  example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
77
  # Update the Pinecone index with new vectors
78
- if 'ns1' in index.describe_index_stats()['namespaces']:
79
- index.delete(delete_all=True, namespace='ns1')
80
- for ids_vectors_chunk in chunks(example_data_generator, batch_size=100):
81
- index.upsert(vectors=ids_vectors_chunk, namespace='ns1')
82
- index.upsert(vectors=ids_vectors_chunk)
 
 
 
 
 
83
 
84
  # Input for the search query
85
- sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)")
86
- if st.button("Abschicken (Submit)"):
87
- if uploaded_file is not None and sample_query:
88
- # Encode the query and search in the Pinecone index
89
- query_vector = embedding.encode(sample_query).tolist()
90
- query_search = index.query(vector=query_vector, top_k=5, include_metadata=True,namespace='ns1')
91
-
92
- matched_contents = [match["metadata"]["text"] for match in query_search["matches"]]
 
 
93
 
94
  # Rerank
95
  rerank_model = "BAAI/bge-reranker-v2-m3"
 
75
  vector_count = len(documents)
76
  example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
77
  # Update the Pinecone index with new vectors
78
+ for ids_vectors_chunk in chunks(example_data_generator, batch_size=100): # Iterate through chunks of example data
79
+ index.upsert(vectors=ids_vectors_chunk, namespace='ns1') # Upsert (update or insert) vectors
80
+ time.sleep(0.05) # Pause to avoid overwhelming the server
81
+
82
+ ns_count = index.describe_index_stats()['namespaces']['ns1']['vector_count'] # Get current vector count in namespace 'ns1'
83
+
84
+ if vector_count < ns_count: # Check if the old vectors are still inside
85
+ ids_to_delete = [f'id-{i}' for i in range(vector_count, ns_count)] # Generate list of IDs to delete
86
+ index.delete(ids=ids_to_delete, namespace='ns1') # Delete old vectors
87
+ time.sleep(0.05) # Pause to avoid overwhelming the server
88
 
89
  # Input for the search query
90
+ with st.form(key='my_form'):
91
+ sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)") # User query input
92
+ submit_button = st.form_submit_button(label='Abschicken (Submit)') # Submit button
93
+
94
+ if submit_button:
95
+ if uploaded_file is not None and sample_query: # Check if file is uploaded and query provided
96
+ query_vector = embedding.encode(sample_query).tolist() # Encode query to vector
97
+ query_search = index.query(vector=query_vector, top_k=5, include_metadata=True, namespace='ns1') # Search index
98
+ time.sleep(0.1) # Pause to avoid overwhelming the server
99
+ matched_contents = [match["metadata"]["text"] for match in query_search["matches"]] # Extract text metadata from results
100
 
101
  # Rerank
102
  rerank_model = "BAAI/bge-reranker-v2-m3"