Spaces:

ugaray96
/

neural-search

Running

ugmSorcero commited on Sep 14, 2022

Commit

dbcf2e8

1 Parent(s): 46323da

Linter

Files changed (2) hide show

interface/components.py CHANGED Viewed

@@ -80,11 +80,11 @@ def component_article_url(container):
                     st.markdown("---")
                 else:
                     break
         for idx, doc in enumerate(urls):
             with st.expander(f"Preview URL {idx}"):
                 st.write(doc)
         corpus = [
             {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
         ]
@@ -98,7 +98,9 @@ def component_file_input(container):
         doc_id = 1
         with st.expander("Enter Files"):
             while True:
-                file = st.file_uploader("Upload a .txt, .pdf, .csv, image file", key=doc_id)
                 if file != None:
                     extracted_text = extract_text_from_file(file)
                     if extracted_text != None:
@@ -109,11 +111,11 @@ def component_file_input(container):
                         break
                 else:
                     break
         for idx, doc in enumerate(files):
             with st.expander(f"Preview File {idx}"):
                 st.write(doc)
         corpus = [
             {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
         ]

                     st.markdown("---")
                 else:
                     break
         for idx, doc in enumerate(urls):
             with st.expander(f"Preview URL {idx}"):
                 st.write(doc)
         corpus = [
             {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
         ]
         doc_id = 1
         with st.expander("Enter Files"):
             while True:
+                file = st.file_uploader(
+                    "Upload a .txt, .pdf, .csv, image file", key=doc_id
+                )
                 if file != None:
                     extracted_text = extract_text_from_file(file)
                     if extracted_text != None:
                         break
                 else:
                     break
         for idx, doc in enumerate(files):
             with st.expander(f"Preview File {idx}"):
                 st.write(doc)
         corpus = [
             {"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
         ]

interface/utils.py CHANGED Viewed

@@ -8,6 +8,7 @@ import pandas as pd
 import pytesseract
 from PIL import Image
 def get_pipelines():
     pipeline_names, pipeline_funcs = list(
         zip(*getmembers(pipelines_functions, isfunction))
@@ -26,6 +27,7 @@ def extract_text_from_url(url: str):
     return article.text
 @st.experimental_memo
 def extract_text_from_file(file):
     # read text file
@@ -77,9 +79,9 @@ def extract_text_from_file(file):
                     continue
                 file_text += " " + txt
         return file_text
     # read image file (OCR)
-    elif file.type == 'image/jpeg':
         return pytesseract.image_to_string(Image.open(file))
     else:

 import pytesseract
 from PIL import Image
 def get_pipelines():
     pipeline_names, pipeline_funcs = list(
         zip(*getmembers(pipelines_functions, isfunction))
     return article.text
 @st.experimental_memo
 def extract_text_from_file(file):
     # read text file
                     continue
                 file_text += " " + txt
         return file_text
     # read image file (OCR)
+    elif file.type == "image/jpeg":
         return pytesseract.image_to_string(Image.open(file))
     else: