QueryExpansionForEtsy

Sleeping

HarryLee commited on Mar 28, 2023

Commit

b726bae

1 Parent(s): aaf7855

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ import gzip
 import os
 import torch
 import pickle
 ############
 ## Main page
@@ -64,6 +65,13 @@ with open(embedding_cache_path, "rb") as fIn:
   passages = cache_data['sentences']
   corpus_embeddings = cache_data['embeddings']
 # This function will search all wikipedia articles for passages that
 # answer the query
 def search(query):
@@ -94,8 +102,17 @@ def search(query):
     st.write("\n-------------------------\n")
     st.subheader("Top-N Cross-Encoder Re-ranker hits")
     hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
-    for hit in hits[0:maxtags_sidebar]:
-        st.write("\t{:.3f}\t{}".format(hit['cross-score'], passages[hit['corpus_id']].replace("\n", " ")))
 st.write("## Results:")
 if st.button('Generated Expansion'):

 import os
 import torch
 import pickle
+import yake
 ############
 ## Main page
   passages = cache_data['sentences']
   corpus_embeddings = cache_data['embeddings']
+kw_extractor = yake.KeywordExtractor()
+language = "en"
+max_ngram_size = 3
+deduplication_threshold = 0.9
+numOfKeywords = 20
+custom_kw_extractor=yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, top=numOfKeywords, features=None)
 # This function will search all wikipedia articles for passages that
 # answer the query
 def search(query):
     st.write("\n-------------------------\n")
     st.subheader("Top-N Cross-Encoder Re-ranker hits")
     hits = sorted(hits, key=lambda x: x['cross-score'], reverse=True)
+    #for hit in hits[0:maxtags_sidebar]:
+    #    st.write("\t{:.3f}\t{}".format(hit['cross-score'], passages[hit['corpus_id']].replace("\n", " ")))
+    hit_res = []
+    for hit in hits[0:1000]:
+      q = passages[hit['corpus_id']].replace("\n", " ")
+    if q not in hit_res:
+        hit_res.append(q)
+    for res in hit_res[0:maxtags_sidebar]:
+      keywords = custom_kw_extractor.extract_keywords(res)
+      for kw in keywords:
+        print(kw)
 st.write("## Results:")
 if st.button('Generated Expansion'):