Spaces:

ml6team
/

semantic-search-demo

Runtime error

App Files Files Community

mrchtr commited on Jun 23, 2022

Commit

01628bb

1 Parent(s): 10641ee

Update styles

Browse files

Files changed (3) hide show

app.py +46 -14
retriever.py +3 -3
style.css +18 -0

app.py CHANGED Viewed

@@ -3,30 +3,62 @@
 Here's our first attempt at using data to create a table:
 """
 import streamlit as st
-import pandas as pd
-from load_css import local_css
 from retriever import do_search
 local_css('style.css')
-st.header('Semantic search demo')
-search = st.text_input('')
 if search:
     result = do_search(search)
-    col1, col2, col3 = st.columns(3)
-    with col1:
-        st.write('TF-IDF')
-        st.write(result[0])
-    with col2:
-        st.write('Base dense retriever')
-        st.write(result[1])
-    with col3:
-        st.write('Adapted dense retriever')
-        st.write(result[2])

 Here's our first attempt at using data to create a table:
 """
 import streamlit as st
 from retriever import do_search
+def local_css(file_name):
+    with open(file_name) as f:
+        st.markdown(f'<style>{f.read()}</style>', unsafe_allow_html=True)
+def render_retrieved_content(content, score):
+    print_score = ''
+    if score is not None:
+        score = round(score, 3)
+        print_score = f'<b> Similarity Score: {score}</b>'
+    return f'<blockquote>{content} </blockquote> {print_score}'
 local_css('style.css')
+st.header('🧐  Where my docs at?')
+st.markdown('✨ Imagine you have a bunch of text documents and looking for one specific passage, '
+         'but you can not remember on the exact words. Just about rough content. <br><br>'
+         '💡 This demo compares different search approaches that can help you to find the right '
+         'information.', unsafe_allow_html=True)
+option = st.selectbox(
+     'Choose a dataset',
+     ('CDU election program 2021', 'Partisan news 2019 (dutch)'))
+search = st.text_input('Enter your search query')
 if search:
     result = do_search(search)
+    st.markdown('### 🔎  Term Frequency–Inverse Document Frequency (TF-IDF)')
+    st.markdown('Is a statistical approach that calculates how relevant a word is to a document '
+                'in your collection. Only documents will be found that contain one of the words of '
+                'the given search query. You still have to remember on exact terms that are in the'
+                'searched phrase.')
+    st.markdown(render_retrieved_content(result[0][0].content, None),
+                unsafe_allow_html=True)
+    st.markdown('### 🧠  Semantic search')
+    st.markdown('An alternative approach is semantic search. Instead of using words of the '
+                'documents to calculate the score, we use a neural network that calculate the '
+                'similarity between the query and the documents of the collection. In other words, '
+                'the chance is high to find topic related documents without knowing the exact '
+                'terms.')
+    st.markdown(render_retrieved_content(result[1][0].content, result[1][0].score),
+                unsafe_allow_html=True)
+    st.markdown('### 🚀  Domain adapted semantic search')
+    st.markdown('If our document collection contains a lot of domain specific documents, '
+                'we can not use standard models. These models were trained on a large amount of '
+                'public available data, that covers probably not your domain specific words. To '
+                'improve the search results, we could fine-tune the network to calculate more '
+                'accurate similarities between queries and document regarding to your domain.')
+    st.markdown(render_retrieved_content(result[2][0].content, result[2][0].score),
+                unsafe_allow_html=True)

retriever.py CHANGED Viewed

@@ -56,9 +56,9 @@ def dense_retrieval(query, retriever='base'):
 def do_search(query):
-    sparse_result = sparse_retrieval(query)['documents'][0].content
-    dense_base_result = dense_retrieval(query, retriever='base')['documents'][0].content
-    dense_adapted_result = dense_retrieval(query, retriever='adapted')['documents'][0].content
     return sparse_result, dense_base_result, dense_adapted_result
 if __name__ == '__main__':

 def do_search(query):
+    sparse_result = sparse_retrieval(query)['documents']
+    dense_base_result =dense_retrieval(query, retriever='base')['documents']
+    dense_adapted_result = dense_retrieval(query, retriever='adapted')['documents']
     return sparse_result, dense_base_result, dense_adapted_result
 if __name__ == '__main__':

style.css ADDED Viewed

	@@ -0,0 +1,18 @@

+blockquote {
+  background: #f9f9f9;
+  border-left: 10px solid #ccc;
+  margin: 1.5em 10px;
+  padding: 0.5em 10px;
+  quotes: "\201C""\201D""\2018""\2019";
+}
+blockquote:before {
+  color: #ccc;
+  content: '';
+  font-size: 4em;
+  line-height: 0.1em;
+  margin-right: 0.25em;
+  vertical-align: -0.4em;
+}
+blockquote p {
+  display: inline;
+}