ziyingsk commited on
Commit
85d96eb
·
verified ·
1 Parent(s): 89be574

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -9
app.py CHANGED
@@ -74,20 +74,23 @@ if uploaded_file is not None:
74
  pdf_vectors = embedding.encode(texts)
75
  vector_count = len(documents)
76
  example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
77
- #if len(index.describe_index_stats()['namespaces'])!=0:
78
- # index.delete(delete_all=True)
 
79
  for ids_vectors_chunk in chunks(example_data_generator, batch_size=100):
80
- index.upsert(vectors=ids_vectors_chunk)
81
- index.upsert(vectors=ids_vectors_chunk,namespace='ns1')
82
-
83
- # Search query related context
84
- sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)")
85
- if st.button("Abschicken (Submit)"):
 
 
86
  if uploaded_file is not None and sample_query:
87
  query_vector = embedding.encode(sample_query).tolist()
88
  query_search = index.query(vector=query_vector, top_k=5, include_metadata=True)
89
 
90
- matched_contents = [match["metadata"]["text"] for match in query_search["matches"]]
91
 
92
  # Rerank
93
  rerank_model = "BAAI/bge-reranker-v2-m3"
 
74
  pdf_vectors = embedding.encode(texts)
75
  vector_count = len(documents)
76
  example_data_generator = map(lambda i: (f'id-{i}', pdf_vectors[i], {"text": texts[i]}), range(vector_count))
77
+ # Update the Pinecone index with new vectors
78
+ if 'ns1' in index.describe_index_stats()['namespaces']:
79
+ index.delete(delete_all=True, namespace='ns1')
80
  for ids_vectors_chunk in chunks(example_data_generator, batch_size=100):
81
+ index.upsert(vectors=ids_vectors_chunk, namespace='ns1')
82
+ index.upsert(vectors=ids_vectors_chunk) # creating a second namespace could fix the index0 bug!
83
+
84
+ # Input for the search query
85
+ with st.form(key='my_form'):
86
+ sample_query = st.text_input("Stellen Sie eine Frage zu dem PDF: (Ask a question related to the PDF:)")
87
+ submit_button = st.form_submit_button(label='Abschicken (Submit)')
88
+ if submit_button:
89
  if uploaded_file is not None and sample_query:
90
  query_vector = embedding.encode(sample_query).tolist()
91
  query_search = index.query(vector=query_vector, top_k=5, include_metadata=True)
92
 
93
+ matched_contents = [match["metadata"]["text"] for match in query_search["matches"], namespace='ns1']
94
 
95
  # Rerank
96
  rerank_model = "BAAI/bge-reranker-v2-m3"