prasadnu commited on
Commit
ef1e3b1
·
1 Parent(s): 3cdaefa

updated ML models

Browse files
pages/Semantic_Search.py CHANGED
@@ -126,7 +126,11 @@ if "questions" not in st.session_state:
126
  st.session_state.questions = []
127
 
128
  if "input_mvector_rerank" not in st.session_state:
129
- st.session_state.input_colBert_rerank = False
 
 
 
 
130
 
131
  if "clear_" not in st.session_state:
132
  st.session_state.clear_ = False
@@ -685,14 +689,23 @@ if(search_all_type == True or 1==1):
685
  ########################## enable for query_rewrite ########################
686
  if rewrite_query:
687
  st.session_state.input_is_rewrite_query = 'enabled'
 
688
  st.subheader(':blue[Vector Search]')
689
 
690
  mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
691
 
 
 
692
  if(mvector_rerank):
693
  st.session_state.input_mvector_rerank = True
694
  else:
695
  st.session_state.input_mvector_rerank = False
 
 
 
 
 
 
696
  st.subheader(':blue[Hybrid Search]')
697
  with st.expander("Set query Weightage:"):
698
  st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5, key='input_Keyword-weight', help=None)
 
126
  st.session_state.questions = []
127
 
128
  if "input_mvector_rerank" not in st.session_state:
129
+ st.session_state.input_colBert_rerank = False
130
+
131
+ if "input_multilingual" not in st.session_state:
132
+ st.session_state.input_multilingual = False
133
+
134
 
135
  if "clear_" not in st.session_state:
136
  st.session_state.clear_ = False
 
689
  ########################## enable for query_rewrite ########################
690
  if rewrite_query:
691
  st.session_state.input_is_rewrite_query = 'enabled'
692
+
693
  st.subheader(':blue[Vector Search]')
694
 
695
  mvector_rerank = st.checkbox("Search and Re-rank with Token level vectors",key = 'mvector_rerank',help = "Enabling this option uses 'all-MiniLM-L6-v2' model's token level embeddings to retrieve documents and MaxSim to re-rank documents.\n\n Hugging Face Model: https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2")
696
 
697
+ multilingual = st.checkbox("Enable multilingual mode",key = 'multilingual',help = "Enabling this option uses titan model's multilingual embeddings to retrieve documents and haike model to translate the product descriptions to the query language.")
698
+
699
  if(mvector_rerank):
700
  st.session_state.input_mvector_rerank = True
701
  else:
702
  st.session_state.input_mvector_rerank = False
703
+
704
+ if(multilingual):
705
+ st.session_state.input_multilingual = True
706
+ else:
707
+ st.session_state.input_multilingual = False
708
+
709
  st.subheader(':blue[Hybrid Search]')
710
  with st.expander("Set query Weightage:"):
711
  st.number_input("Keyword %", min_value=0, max_value=100, value=100, step=5, key='input_Keyword-weight', help=None)
semantic_search/all_search_execute.py CHANGED
@@ -215,6 +215,7 @@ def handler(input_,session_id):
215
  hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
216
 
217
  if('Vector Search' in search_types):
 
218
  if(st.session_state.input_mvector_rerank):
219
  query_vector = cb.vectorise(query,False)
220
  vector_field = "description_vector"
@@ -253,15 +254,27 @@ def handler(input_,session_id):
253
 
254
  #using neural query
255
  else:
256
- vector_payload = {
257
- "neural": {
 
 
 
 
 
 
 
258
  "product_description_vector": {
259
  "query_text": query,
260
  "model_id": BEDROCK_TEXT_MODEL_ID,
261
  "k": k_
262
  }
263
- }
264
- }
 
 
 
 
 
265
 
266
  ###### start of efficient filter applying #####
267
  if(st.session_state.input_rewritten_query!=""):
@@ -412,14 +425,22 @@ def handler(input_,session_id):
412
  single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
413
  del hybrid_payload["query"]["hybrid"]
414
  hybrid_payload["query"] = single_query
415
- if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
416
- path = "demostore-search-index-reindex-new/_search?search_pipeline=rerank_pipeline"
417
- url = host + path
418
- hybrid_payload["ext"] = {"rerank": {
419
- "query_context": {
420
- "query_text": query
421
- }
422
- }}
 
 
 
 
 
 
 
 
423
 
424
  r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
425
  response_ = json.loads(r.text)
@@ -488,8 +509,12 @@ def handler(input_,session_id):
488
  doc_ids = []
489
  for doc in docs:
490
  if(doc['_source']['image_url'] not in dup):
 
 
 
 
491
  res_ = {
492
- "desc":doc['_source']['product_description'],
493
  "caption":doc['_source']['caption'],
494
  "image_url":doc['_source']['image_url'],
495
  "category":doc['_source']['category'],
 
215
  hybrid_payload["query"]["hybrid"]["queries"].append(keyword_payload)
216
 
217
  if('Vector Search' in search_types):
218
+
219
  if(st.session_state.input_mvector_rerank):
220
  query_vector = cb.vectorise(query,False)
221
  vector_field = "description_vector"
 
254
 
255
  #using neural query
256
  else:
257
+ if(st.session_state.input_multilingual):
258
+ vector_payload = {
259
+ "term": {
260
+ "product_description": {
261
+ "value": query
262
+ }
263
+ }}
264
+ else:
265
+ vector_payload {"neural": {
266
  "product_description_vector": {
267
  "query_text": query,
268
  "model_id": BEDROCK_TEXT_MODEL_ID,
269
  "k": k_
270
  }
271
+ }}
272
+
273
+
274
+
275
+
276
+
277
+
278
 
279
  ###### start of efficient filter applying #####
280
  if(st.session_state.input_rewritten_query!=""):
 
425
  single_query = hybrid_payload["query"]["hybrid"]["queries"][0]
426
  del hybrid_payload["query"]["hybrid"]
427
  hybrid_payload["query"] = single_query
428
+ if(st.session_state.input_multilingual):
429
+ if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
430
+ path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation_with_rerank"
431
+ url = host + path
432
+ else:
433
+ path = "demostore-search-index-reindex-new/_search?search_pipeline=ml_inference_for_vector_search_and_language_translation"
434
+ url = host + path
435
+ else:
436
+ if(st.session_state.re_ranker == 'true' and st.session_state.input_reranker == 'Cohere Rerank'):
437
+ path = "demostore-search-index-reindex-new/_search?search_pipeline=rerank_pipeline"
438
+ url = host + path
439
+ hybrid_payload["ext"] = {"rerank": {
440
+ "query_context": {
441
+ "query_text": query
442
+ }
443
+ }}
444
 
445
  r = requests.get(url, auth=awsauth, json=hybrid_payload, headers=headers)
446
  response_ = json.loads(r.text)
 
509
  doc_ids = []
510
  for doc in docs:
511
  if(doc['_source']['image_url'] not in dup):
512
+ if("product_description_translated" in doc['_source'].keys()):
513
+ desc = doc['_source']['product_description_translated']
514
+ else:
515
+ desc = doc['_source']['product_description']
516
  res_ = {
517
+ "desc":desc,
518
  "caption":doc['_source']['caption'],
519
  "image_url":doc['_source']['image_url'],
520
  "category":doc['_source']['category'],