Update app.py
Browse files
app.py
CHANGED
@@ -75,21 +75,28 @@ if uploaded_file is not None:
|
|
75 |
vector_count = len(documents)
|
76 |
example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
|
77 |
# Update the Pinecone index with new vectors
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
|
|
|
|
|
|
|
|
|
|
83 |
|
84 |
# Input for the search query
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
93 |
|
94 |
# Rerank
|
95 |
rerank_model = "BAAI/bge-reranker-v2-m3"
|
|
|
75 |
vector_count = len(documents)
|
76 |
example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
|
77 |
# Update the Pinecone index with new vectors
|
78 |
+
for ids_vectors_chunk in chunks(example_data_generator, batch_size=100): # Iterate through chunks of example data
|
79 |
+
index.upsert(vectors=ids_vectors_chunk, namespace='ns1') # Upsert (update or insert) vectors
|
80 |
+
time.sleep(0.05) # Pause to avoid overwhelming the server
|
81 |
+
|
82 |
+
ns_count = index.describe_index_stats()['namespaces']['ns1']['vector_count'] # Get current vector count in namespace 'ns1'
|
83 |
+
|
84 |
+
if vector_count < ns_count: # Check if the old vectors are still inside
|
85 |
+
ids_to_delete = [f'id-{i}' for i in range(vector_count, ns_count)] # Generate list of IDs to delete
|
86 |
+
index.delete(ids=ids_to_delete, namespace='ns1') # Delete old vectors
|
87 |
+
time.sleep(0.05) # Pause to avoid overwhelming the server
|
88 |
|
89 |
# Input for the search query
|
90 |
+
with st.form(key='my_form'):
|
91 |
+
sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)") # User query input
|
92 |
+
submit_button = st.form_submit_button(label='Abschicken (Submit)') # Submit button
|
93 |
+
|
94 |
+
if submit_button:
|
95 |
+
if uploaded_file is not None and sample_query: # Check if file is uploaded and query provided
|
96 |
+
query_vector = embedding.encode(sample_query).tolist() # Encode query to vector
|
97 |
+
query_search = index.query(vector=query_vector, top_k=5, include_metadata=True, namespace='ns1') # Search index
|
98 |
+
time.sleep(0.1) # Pause to avoid overwhelming the server
|
99 |
+
matched_contents = [match["metadata"]["text"] for match in query_search["matches"]] # Extract text metadata from results
|
100 |
|
101 |
# Rerank
|
102 |
rerank_model = "BAAI/bge-reranker-v2-m3"
|