Spaces:

ugaray96
/

neural-search

Sleeping

App Files Files Community

ugaray96 commited on Nov 29, 2022

Commit

c11f999

unverified ·

2 Parent(s): 687697c c397816

Merge pull request #15 from ugm2/feature/audio_to_text

Browse files

Files changed (7) hide show

app.py +4 -0
core/audio.py +22 -0
core/pipelines.py +1 -3
interface/components.py +14 -1
interface/pages.py +2 -3
interface/utils.py +12 -1
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -11,12 +11,16 @@ st.set_page_config(
 from streamlit_option_menu import option_menu
 from interface.config import session_state_variables, pages
 from interface.components import component_select_pipeline
 # Initialization of session state
 for key, value in session_state_variables.items():
     if key not in st.session_state:
         st.session_state[key] = value
 def run_demo():

 from streamlit_option_menu import option_menu
 from interface.config import session_state_variables, pages
 from interface.components import component_select_pipeline
+from interface.utils import load_audio_model
 # Initialization of session state
 for key, value in session_state_variables.items():
     if key not in st.session_state:
         st.session_state[key] = value
+# Init audio model
+st.session_state["audio_model"] = load_audio_model()
 def run_demo():

core/audio.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import whisper
+import pydub
+import os
+whisper_model = "medium"
+def load_model():
+    print("Loading audio model...")
+    return whisper.load_model(whisper_model)
+def audio_to_text(model, audio_file):
+    audio = pydub.AudioSegment.from_file(audio_file)
+    # Export for loading later
+    audio.export("audio_tmp")
+    try:
+        audio = whisper.load_audio("audio_tmp")
+        result = whisper.transcribe(model=model, audio=audio, verbose=True)
+    finally:
+        os.remove("audio_tmp")
+    return result["text"]

core/pipelines.py CHANGED Viewed

@@ -85,9 +85,7 @@ def dense_passage_retrieval(
       - One BERT base model to encode queries
       - Ranking of documents done by dot product similarity between query and document embeddings
     """
-    global document_store
-    if index != document_store.index:
-        document_store = InMemoryDocumentStore(index=index)
     dpr_retriever = DensePassageRetriever(
         document_store=document_store,
         query_embedding_model=query_embedding_model,

       - One BERT base model to encode queries
       - Ranking of documents done by dot product similarity between query and document embeddings
     """
+    document_store = InMemoryDocumentStore(index=index)
     dpr_retriever = DensePassageRetriever(
         document_store=document_store,
         query_embedding_model=query_embedding_model,

interface/components.py CHANGED Viewed

@@ -51,6 +51,19 @@ def component_select_pipeline(container):
                     "doc": pipeline_funcs[index_pipe].__doc__,
                 }
                 reset_vars_data()
 def component_show_pipeline(pipeline, pipeline_name):
@@ -126,7 +139,7 @@ def component_file_input(container, doc_id):
         with st.expander("Enter Files"):
             while True:
                 file = st.file_uploader(
-                    "Upload a .txt, .pdf, .csv, image file", key=doc_id
                 )
                 if file != None:
                     extracted_text = extract_text_from_file(file)

                     "doc": pipeline_funcs[index_pipe].__doc__,
                 }
                 reset_vars_data()
+            # TODO: Use elasticsearch and remove this workaround for TFIDF
+            # Reload if Keyword Search is selected
+            elif st.session_state["pipeline"]["name"] == "Keyword Search":
+                st.session_state["pipeline_func_parameters"] = pipeline_func_parameters
+                (search_pipeline, index_pipeline,) = pipeline_funcs[
+                    index_pipe
+                ](**pipeline_func_parameters[index_pipe])
+                st.session_state["pipeline"] = {
+                    "name": selected_pipeline,
+                    "search_pipeline": search_pipeline,
+                    "index_pipeline": index_pipeline,
+                    "doc": pipeline_funcs[index_pipe].__doc__,
+                }
 def component_show_pipeline(pipeline, pipeline_name):
         with st.expander("Enter Files"):
             while True:
                 file = st.file_uploader(
+                    "Upload a .txt, .pdf, .csv, image file, audio file", key=doc_id
                 )
                 if file != None:
                     extracted_text = extract_text_from_file(file)

interface/pages.py CHANGED Viewed

@@ -12,7 +12,7 @@ from interface.components import (
 def page_landing_page(container):
     with container:
-        st.header("Neural Search V2.0")
         st.markdown(
             "This is a tool to allow indexing & search content using neural capabilities"
@@ -22,7 +22,7 @@ def page_landing_page(container):
         )
         st.markdown(
             "In this second version you can:"
-            "\n  - Index raw text, URLs, CSVs, PDFs and Images"
             "\n  - Use Dense Passage Retrieval, Keyword Search pipeline and DPR Ranker pipelines"
             "\n  - Search the indexed documents"
             "\n  - Read your responses out loud using the `audio_output` option!"
@@ -30,7 +30,6 @@ def page_landing_page(container):
         st.markdown(
             "TODO list:"
             "\n  - File type classification and converter nodes"
-            "\n  - Audio to text support for indexing"
             "\n  - Build other pipelines"
         )
         st.markdown(

 def page_landing_page(container):
     with container:
+        st.header("Neural Search V2.1")
         st.markdown(
             "This is a tool to allow indexing & search content using neural capabilities"
         )
         st.markdown(
             "In this second version you can:"
+            "\n  - Index raw text, URLs, CSVs, PDFs, Images and even audio!"
             "\n  - Use Dense Passage Retrieval, Keyword Search pipeline and DPR Ranker pipelines"
             "\n  - Search the indexed documents"
             "\n  - Read your responses out loud using the `audio_output` option!"
         st.markdown(
             "TODO list:"
             "\n  - File type classification and converter nodes"
             "\n  - Build other pipelines"
         )
         st.markdown(

interface/utils.py CHANGED Viewed

@@ -3,6 +3,7 @@ import os
 import shutil
 import core.pipelines as pipelines_functions
 from core.pipelines import data_path
 from inspect import getmembers, isfunction, signature
 from newspaper import Article
 from PyPDF2 import PdfFileReader
@@ -96,9 +97,19 @@ def extract_text_from_file(file):
         return file_text
     # read image file (OCR)
-    elif file.type == "image/jpeg":
         return pytesseract.image_to_string(Image.open(file))
     else:
         st.warning(f"File type {file.type} not supported")
         return None

 import shutil
 import core.pipelines as pipelines_functions
 from core.pipelines import data_path
+from core.audio import audio_to_text, load_model
 from inspect import getmembers, isfunction, signature
 from newspaper import Article
 from PyPDF2 import PdfFileReader
         return file_text
     # read image file (OCR)
+    elif file.type in ["image/jpeg", "image/png"]:
         return pytesseract.image_to_string(Image.open(file))
+    # read audio file (AudoToText)
+    elif file.type in ["audio/mpeg", "audio/wav", "audio/aac", "audio/x-m4a"]:
+        text = audio_to_text(st.session_state["audio_model"], file)
+        return text
     else:
         st.warning(f"File type {file.type} not supported")
         return None
+@st.experimental_singleton
+def load_audio_model():
+    return load_model()

requirements.txt CHANGED Viewed

@@ -9,4 +9,5 @@ pytesseract==0.3.10
 soundfile==0.10.3.post1
 espnet
 pydub==0.25.1
-espnet_model_zoo==0.1.7

 soundfile==0.10.3.post1
 espnet
 pydub==0.25.1
+espnet_model_zoo==0.1.7
+git+https://github.com/openai/whisper.git