Ankur Goyal
commited on
Commit
•
1af0b6d
1
Parent(s):
2919076
Draw a box over the answer
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
|
3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
4 |
|
|
|
5 |
import streamlit as st
|
6 |
|
7 |
import torch
|
@@ -24,8 +25,28 @@ def construct_pipeline():
|
|
24 |
|
25 |
|
26 |
@st.cache
|
27 |
-
def run_pipeline(question, document):
|
28 |
-
return construct_pipeline()(question=question, **document.context)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
|
31 |
st.markdown("# DocQuery: Query Documents w/ NLP")
|
@@ -75,16 +96,30 @@ question = st.text_input("QUESTION", "")
|
|
75 |
document = st.session_state.document
|
76 |
loading_placeholder = st.empty()
|
77 |
if document is not None:
|
78 |
-
col1, col2 = st.columns(
|
79 |
-
|
80 |
|
81 |
-
if document is not None and question is not None and len(question) > 0:
|
82 |
-
predictions = run_pipeline(question=question, document=document)
|
83 |
|
|
|
|
|
84 |
col2.header("Answers")
|
85 |
-
for p in ensure_list(predictions):
|
86 |
-
col2.subheader(f"{ p['answer'] }: ({round(p['score'] * 100, 1)}%)")
|
87 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
|
90 |
|
|
|
2 |
|
3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
4 |
|
5 |
+
from PIL import ImageDraw
|
6 |
import streamlit as st
|
7 |
|
8 |
import torch
|
|
|
25 |
|
26 |
|
27 |
@st.cache
|
28 |
+
def run_pipeline(question, document, top_k):
|
29 |
+
return construct_pipeline()(question=question, **document.context, top_k=top_k)
|
30 |
+
|
31 |
+
|
32 |
+
# TODO: Move into docquery
|
33 |
+
# TODO: Support words past the first page (or window?)
|
34 |
+
def lift_word_boxes(document):
|
35 |
+
return document.context["image"][0][1]
|
36 |
+
|
37 |
+
|
38 |
+
def expand_bbox(word_boxes):
|
39 |
+
if len(word_boxes) == 0:
|
40 |
+
return None
|
41 |
+
|
42 |
+
min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
|
43 |
+
return [min(min_x), min(min_y), max(max_x), max(max_y)]
|
44 |
+
|
45 |
+
|
46 |
+
# LayoutLM boxes are normalized to 0, 1000
|
47 |
+
def normalize_bbox(box, width, height):
|
48 |
+
pct = [c / 1000 for c in box]
|
49 |
+
return [pct[0] * width, pct[1] * height, pct[2] * width, pct[3] * height]
|
50 |
|
51 |
|
52 |
st.markdown("# DocQuery: Query Documents w/ NLP")
|
|
|
96 |
document = st.session_state.document
|
97 |
loading_placeholder = st.empty()
|
98 |
if document is not None:
|
99 |
+
col1, col2 = st.columns([3, 1])
|
100 |
+
image = document.preview
|
101 |
|
|
|
|
|
102 |
|
103 |
+
colors = ["blue", "red", "green"]
|
104 |
+
if document is not None and question is not None and len(question) > 0:
|
105 |
col2.header("Answers")
|
|
|
|
|
106 |
|
107 |
+
predictions = run_pipeline(question=question, document=document, top_k=1)
|
108 |
+
|
109 |
+
word_boxes = lift_word_boxes(document)
|
110 |
+
image = image.copy()
|
111 |
+
draw = ImageDraw.Draw(image)
|
112 |
+
for i, p in enumerate(ensure_list(predictions)):
|
113 |
+
col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
|
114 |
+
x1, y1, x2, y2 = normalize_bbox(
|
115 |
+
expand_bbox(word_boxes[p["start"] : p["end"] + 1]),
|
116 |
+
image.width,
|
117 |
+
image.height,
|
118 |
+
)
|
119 |
+
draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i])
|
120 |
+
|
121 |
+
if document is not None:
|
122 |
+
col1.image(image, use_column_width=True)
|
123 |
|
124 |
"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
|
125 |
|