Spaces:
Runtime error
Runtime error
ugmSorcero
commited on
Commit
•
dbcf2e8
1
Parent(s):
46323da
Linter
Browse files- interface/components.py +7 -5
- interface/utils.py +4 -2
interface/components.py
CHANGED
@@ -80,11 +80,11 @@ def component_article_url(container):
|
|
80 |
st.markdown("---")
|
81 |
else:
|
82 |
break
|
83 |
-
|
84 |
for idx, doc in enumerate(urls):
|
85 |
with st.expander(f"Preview URL {idx}"):
|
86 |
st.write(doc)
|
87 |
-
|
88 |
corpus = [
|
89 |
{"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
|
90 |
]
|
@@ -98,7 +98,9 @@ def component_file_input(container):
|
|
98 |
doc_id = 1
|
99 |
with st.expander("Enter Files"):
|
100 |
while True:
|
101 |
-
file = st.file_uploader(
|
|
|
|
|
102 |
if file != None:
|
103 |
extracted_text = extract_text_from_file(file)
|
104 |
if extracted_text != None:
|
@@ -109,11 +111,11 @@ def component_file_input(container):
|
|
109 |
break
|
110 |
else:
|
111 |
break
|
112 |
-
|
113 |
for idx, doc in enumerate(files):
|
114 |
with st.expander(f"Preview File {idx}"):
|
115 |
st.write(doc)
|
116 |
-
|
117 |
corpus = [
|
118 |
{"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
|
119 |
]
|
|
|
80 |
st.markdown("---")
|
81 |
else:
|
82 |
break
|
83 |
+
|
84 |
for idx, doc in enumerate(urls):
|
85 |
with st.expander(f"Preview URL {idx}"):
|
86 |
st.write(doc)
|
87 |
+
|
88 |
corpus = [
|
89 |
{"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(urls)
|
90 |
]
|
|
|
98 |
doc_id = 1
|
99 |
with st.expander("Enter Files"):
|
100 |
while True:
|
101 |
+
file = st.file_uploader(
|
102 |
+
"Upload a .txt, .pdf, .csv, image file", key=doc_id
|
103 |
+
)
|
104 |
if file != None:
|
105 |
extracted_text = extract_text_from_file(file)
|
106 |
if extracted_text != None:
|
|
|
111 |
break
|
112 |
else:
|
113 |
break
|
114 |
+
|
115 |
for idx, doc in enumerate(files):
|
116 |
with st.expander(f"Preview File {idx}"):
|
117 |
st.write(doc)
|
118 |
+
|
119 |
corpus = [
|
120 |
{"text": doc["text"], "id": doc_id} for doc_id, doc in enumerate(files)
|
121 |
]
|
interface/utils.py
CHANGED
@@ -8,6 +8,7 @@ import pandas as pd
|
|
8 |
import pytesseract
|
9 |
from PIL import Image
|
10 |
|
|
|
11 |
def get_pipelines():
|
12 |
pipeline_names, pipeline_funcs = list(
|
13 |
zip(*getmembers(pipelines_functions, isfunction))
|
@@ -26,6 +27,7 @@ def extract_text_from_url(url: str):
|
|
26 |
|
27 |
return article.text
|
28 |
|
|
|
29 |
@st.experimental_memo
|
30 |
def extract_text_from_file(file):
|
31 |
# read text file
|
@@ -77,9 +79,9 @@ def extract_text_from_file(file):
|
|
77 |
continue
|
78 |
file_text += " " + txt
|
79 |
return file_text
|
80 |
-
|
81 |
# read image file (OCR)
|
82 |
-
elif file.type ==
|
83 |
return pytesseract.image_to_string(Image.open(file))
|
84 |
|
85 |
else:
|
|
|
8 |
import pytesseract
|
9 |
from PIL import Image
|
10 |
|
11 |
+
|
12 |
def get_pipelines():
|
13 |
pipeline_names, pipeline_funcs = list(
|
14 |
zip(*getmembers(pipelines_functions, isfunction))
|
|
|
27 |
|
28 |
return article.text
|
29 |
|
30 |
+
|
31 |
@st.experimental_memo
|
32 |
def extract_text_from_file(file):
|
33 |
# read text file
|
|
|
79 |
continue
|
80 |
file_text += " " + txt
|
81 |
return file_text
|
82 |
+
|
83 |
# read image file (OCR)
|
84 |
+
elif file.type == "image/jpeg":
|
85 |
return pytesseract.image_to_string(Image.open(file))
|
86 |
|
87 |
else:
|