Update app.py
Browse files
@@ -75,21 +75,28 @@ if uploaded_file is not None:
75 |
vector_count = len(documents)
76 |
example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
77 |
# Update the Pinecone index with new vectors
78 |
79 |
80 |
81 |
82 |
83 |
84 |
# Input for the search query
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
# Rerank
95 |
rerank_model = "BAAI/bge-reranker-v2-m3"
75 |
vector_count = len(documents)
76 |
example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
77 |
# Update the Pinecone index with new vectors
78 |
for ids_vectors_chunk in chunks(example_data_generator, batch_size=100): # Iterate through chunks of example data
79 |
index.upsert(vectors=ids_vectors_chunk, namespace='ns1') # Upsert (update or insert) vectors
80 |
time.sleep(0.05) # Pause to avoid overwhelming the server
81 |
82 |
ns_count = index.describe_index_stats()['namespaces']['ns1']['vector_count'] # Get current vector count in namespace 'ns1'
83 |
84 |
if vector_count < ns_count: # Check if the old vectors are still inside
85 |
ids_to_delete = [f'id-{i}' for i in range(vector_count, ns_count)] # Generate list of IDs to delete
86 |
index.delete(ids=ids_to_delete, namespace='ns1') # Delete old vectors
87 |
time.sleep(0.05) # Pause to avoid overwhelming the server
88 |
89 |
# Input for the search query
90 |
with st.form(key='my_form'):
91 |
sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)") # User query input
92 |
submit_button = st.form_submit_button(label='Abschicken (Submit)') # Submit button
93 |
94 |
if submit_button:
95 |
if uploaded_file is not None and sample_query: # Check if file is uploaded and query provided
96 |
query_vector = embedding.encode(sample_query).tolist() # Encode query to vector
97 |
query_search = index.query(vector=query_vector, top_k=5, include_metadata=True, namespace='ns1') # Search index
98 |
time.sleep(0.1) # Pause to avoid overwhelming the server
99 |
matched_contents = [match["metadata"]["text"] for match in query_search["matches"]] # Extract text metadata from results
100 |
101 |
# Rerank
102 |
rerank_model = "BAAI/bge-reranker-v2-m3"