Update app.py
Browse files
app.py
CHANGED
@@ -74,20 +74,23 @@ if uploaded_file is not None:
|
|
74 |
pdf_vectors = embedding.encode(texts)
|
75 |
vector_count = len(documents)
|
76 |
example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
|
77 |
-
#
|
78 |
-
|
|
|
79 |
for ids_vectors_chunk in chunks(example_data_generator, batch_size=100):
|
80 |
-
index.upsert(vectors=ids_vectors_chunk)
|
81 |
-
index.upsert(vectors=ids_vectors_chunk
|
82 |
-
|
83 |
-
#
|
84 |
-
|
85 |
-
|
|
|
|
|
86 |
if uploaded_file is not None and sample_query:
|
87 |
query_vector = embedding.encode(sample_query).tolist()
|
88 |
query_search = index.query(vector=query_vector, top_k=5, include_metadata=True)
|
89 |
|
90 |
-
matched_contents = [match["metadata"]["text"] for match in query_search["matches"]]
|
91 |
|
92 |
# Rerank
|
93 |
rerank_model = "BAAI/bge-reranker-v2-m3"
|
|
|
74 |
pdf_vectors = embedding.encode(texts)
|
75 |
vector_count = len(documents)
|
76 |
example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
|
77 |
+
# Update the Pinecone index with new vectors
|
78 |
+
if 'ns1' in index.describe_index_stats()['namespaces']:
|
79 |
+
index.delete(delete_all=True, namespace='ns1')
|
80 |
for ids_vectors_chunk in chunks(example_data_generator, batch_size=100):
|
81 |
+
index.upsert(vectors=ids_vectors_chunk, namespace='ns1')
|
82 |
+
index.upsert(vectors=ids_vectors_chunk) # creating a second namespace could fix the index0 bug!
|
83 |
+
|
84 |
+
# Input for the search query
|
85 |
+
with st.form(key='my_form'):
|
86 |
+
sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)")
|
87 |
+
submit_button = st.form_submit_button(label='Abschicken (Submit)')
|
88 |
+
if submit_button:
|
89 |
if uploaded_file is not None and sample_query:
|
90 |
query_vector = embedding.encode(sample_query).tolist()
|
91 |
query_search = index.query(vector=query_vector, top_k=5, include_metadata=True)
|
92 |
|
93 |
+
matched_contents = [match["metadata"]["text"] for match in query_search["matches"], namespace='ns1']
|
94 |
|
95 |
# Rerank
|
96 |
rerank_model = "BAAI/bge-reranker-v2-m3"
|