Spaces:
Running
on
T4
Running
on
T4
updated ML models
Browse files- pages/Semantic_Search.py +14 -1
- semantic_search/all_search_execute.py +38 -13
pages/Semantic_Search.py
CHANGED
@@ -126,7 +126,11 @@ if "questions" not in st.session_state:
|
|
126 |
st.session_state.questions = []
|
127 |
|
128 |
if "input_mvector_rerank" not in st.session_state:
|
129 |
-
st.session_state.input_colBert_rerank = False
|
|
|
|
|
|
|
|
|
130 |
|
131 |
if "clear_" not in st.session_state:
|
132 |
st.session_state.clear_ = False
|
@@ -685,14 +689,23 @@ if(search_all_type == True or 1==1):
|
|
685 |
########################## enable for query_rewrite ########################
|
686 |
if rewrite_query:
|
687 |
st.session_state.input_is_rewrite_query = 'enabled'
|
|
|
688 |
st.subheader(':blue[Vector Search]')
|
689 |
|
690 |
mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
|
691 |
|
|
|
|
|
692 |
if(mvector_rerank):
|
693 |
st.session_state.input_mvector_rerank = True
|
694 |
else:
|
695 |
st.session_state.input_mvector_rerank = False
|
|
|
|
|
|
|
|
|
|
|
|
|
696 |
st.subheader(':blue[Hybrid Search]')
|
697 |
with st.expander("Set query Weightage:"):
|
698 |
st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5, key='input_Keyword-weight', help=None)
|
|
|
126 |
st.session_state.questions = []
|
127 |
|
128 |
if "input_mvector_rerank" not in st.session_state:
|
129 |
+
st.session_state.input_colBert_rerank = False
|
130 |
+
|
131 |
+
if "input_multilingual" not in st.session_state:
|
132 |
+
st.session_state.input_multilingual = False
|
133 |
+
|
134 |
|
135 |
if "clear_" not in st.session_state:
|
136 |
st.session_state.clear_ = False
|
|
|
689 |
########################## enable for query_rewrite ########################
|
690 |
if rewrite_query:
|
691 |
st.session_state.input_is_rewrite_query = 'enabled'
|
692 |
+
|
693 |
st.subheader(':blue[Vector Search]')
|
694 |
|
695 |
mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
|
696 |
|
697 |
+
multilingual = st.checkbox("Enable multilingual mode",key = 'multilingual',help = "Enabling this option uses titan model's multilingual embeddings to retrieve documents and haike model to translate the product descriptions to the query language.")
|
698 |
+
|
699 |
if(mvector_rerank):
|
700 |
st.session_state.input_mvector_rerank = True
|
701 |
else:
|
702 |
st.session_state.input_mvector_rerank = False
|
703 |
+
|
704 |
+
if(multilingual):
|
705 |
+
st.session_state.input_multilingual = True
|
706 |
+
else:
|
707 |
+
st.session_state.input_multilingual = False
|
708 |
+
|
709 |
st.subheader(':blue[Hybrid Search]')
|
710 |
with st.expander("Set query Weightage:"):
|
711 |
st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5, key='input_Keyword-weight', help=None)
|
semantic_search/all_search_execute.py
CHANGED
@@ -215,6 +215,7 @@ def handler(input_,session_id):
|
|
215 |
hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
|
216 |
|
217 |
if('Vector Search' in search_types):
|
|
|
218 |
if(st.session_state.input_mvector_rerank):
|
219 |
query_vector = cb.vectorise(query,False)
|
220 |
vector_field = "description_vector"
|
@@ -253,15 +254,27 @@ def handler(input_,session_id):
|
|
253 |
|
254 |
#using neural query
|
255 |
else:
|
256 |
-
|
257 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
"product_description_vector": {
|
259 |
"query_text": query,
|
260 |
"model_id": BEDROCK_TEXT_MODEL_ID,
|
261 |
"k": k_
|
262 |
}
|
263 |
-
}
|
264 |
-
|
|
|
|
|
|
|
|
|
|
|
265 |
|
266 |
###### start of efficient filter applying #####
|
267 |
if(st.session_state.input_rewritten_query!=""):
|
@@ -412,14 +425,22 @@ def handler(input_,session_id):
|
|
412 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
413 |
del hybrid_payload["query"]["hybrid"]
|
414 |
hybrid_payload["query"] = single_query
|
415 |
-
if(st.session_state.
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
423 |
|
424 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
425 |
response_ = json.loads(r.text)
|
@@ -488,8 +509,12 @@ def handler(input_,session_id):
|
|
488 |
doc_ids = []
|
489 |
for doc in docs:
|
490 |
if(doc['_source']['image_url'] not in dup):
|
|
|
|
|
|
|
|
|
491 |
res_ = {
|
492 |
-
"desc":
|
493 |
"caption":doc['_source']['caption'],
|
494 |
"image_url":doc['_source']['image_url'],
|
495 |
"category":doc['_source']['category'],
|
|
|
215 |
hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
|
216 |
|
217 |
if('Vector Search' in search_types):
|
218 |
+
|
219 |
if(st.session_state.input_mvector_rerank):
|
220 |
query_vector = cb.vectorise(query,False)
|
221 |
vector_field = "description_vector"
|
|
|
254 |
|
255 |
#using neural query
|
256 |
else:
|
257 |
+
if(st.session_state.input_multilingual):
|
258 |
+
vector_payload = {
|
259 |
+
"term": {
|
260 |
+
"product_description": {
|
261 |
+
"value": query
|
262 |
+
}
|
263 |
+
}}
|
264 |
+
else:
|
265 |
+
vector_payload {"neural": {
|
266 |
"product_description_vector": {
|
267 |
"query_text": query,
|
268 |
"model_id": BEDROCK_TEXT_MODEL_ID,
|
269 |
"k": k_
|
270 |
}
|
271 |
+
}}
|
272 |
+
|
273 |
+
|
274 |
+
|
275 |
+
|
276 |
+
|
277 |
+
|
278 |
|
279 |
###### start of efficient filter applying #####
|
280 |
if(st.session_state.input_rewritten_query!=""):
|
|
|
425 |
single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
|
426 |
del hybrid_payload["query"]["hybrid"]
|
427 |
hybrid_payload["query"] = single_query
|
428 |
+
if(st.session_state.input_multilingual):
|
429 |
+
if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
|
430 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation_with_rerank"
|
431 |
+
url = host + path
|
432 |
+
else:
|
433 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation"
|
434 |
+
url = host + path
|
435 |
+
else:
|
436 |
+
if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
|
437 |
+
path = "demostore-search-index-reindex-new/_search?search_pipeline=rerank_pipeline"
|
438 |
+
url = host + path
|
439 |
+
hybrid_payload["ext"] = {"rerank": {
|
440 |
+
"query_context": {
|
441 |
+
"query_text": query
|
442 |
+
}
|
443 |
+
}}
|
444 |
|
445 |
r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
|
446 |
response_ = json.loads(r.text)
|
|
|
509 |
doc_ids = []
|
510 |
for doc in docs:
|
511 |
if(doc['_source']['image_url'] not in dup):
|
512 |
+
if("product_description_translated" in doc['_source'].keys()):
|
513 |
+
desc = doc['_source']['product_description_translated']
|
514 |
+
else:
|
515 |
+
desc = doc['_source']['product_description']
|
516 |
res_ = {
|
517 |
+
"desc":desc,
|
518 |
"caption":doc['_source']['caption'],
|
519 |
"image_url":doc['_source']['image_url'],
|
520 |
"category":doc['_source']['category'],
|