Ankur Goyal
commited on
Draw a box over the answer
Browse files
@@ -2,6 +2,7 @@ import os
2 |
3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
4 |
5 |
import streamlit as st
6 |
7 |
import torch
@@ -24,8 +25,28 @@ def construct_pipeline():
24 |
25 |
26 |
27 |
def run_pipeline(question, document):
28 |
return construct_pipeline()(question=question, **document.context)
29 |
30 |
31 |
st.markdown("# DocQuery: Query Documents w/ NLP")
@@ -75,16 +96,30 @@ question = st.text_input("QUESTION", "")
75 |
document = st.session_state.document
76 |
loading_placeholder = st.empty()
77 |
if document is not None:
78 |
col1, col2 = st.columns(
79 |
80 |
81 |
if document is not None and question is not None and len(question) > 0:
82 |
predictions = run_pipeline(question=question, document=document)
83 |
84 |
85 |
for p in ensure_list(predictions):
86 |
col2.subheader(f"{ p['answer'] }: ({round(p['score'] * 100, 1)}%)")
87 |
88 |
89 |
"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
90 |
2 |
3 |
os.environ["TOKENIZERS_PARALLELISM"] = "false"
4 |
5 |
from PIL import ImageDraw
6 |
import streamlit as st
7 |
8 |
import torch
25 |
26 |
27 |
28 |
def run_pipeline(question, document, top_k):
29 |
return construct_pipeline()(question=question, **document.context, top_k=top_k)
30 |
31 |
32 |
# TODO: Move into docquery
33 |
# TODO: Support words past the first page (or window?)
34 |
def lift_word_boxes(document):
35 |
return document.context["image"][0][1]
36 |
37 |
38 |
def expand_bbox(word_boxes):
39 |
if len(word_boxes) == 0:
40 |
return None
41 |
42 |
min_x, min_y, max_x, max_y = zip(*[x[1] for x in word_boxes])
43 |
return [min(min_x), min(min_y), max(max_x), max(max_y)]
44 |
45 |
46 |
# LayoutLM boxes are normalized to 0, 1000
47 |
def normalize_bbox(box, width, height):
48 |
pct = [c / 1000 for c in box]
49 |
return [pct[0] * width, pct[1] * height, pct[2] * width, pct[3] * height]
50 |
51 |
52 |
st.markdown("# DocQuery: Query Documents w/ NLP")
96 |
document = st.session_state.document
97 |
loading_placeholder = st.empty()
98 |
if document is not None:
99 |
col1, col2 = st.columns([3, 1])
100 |
image = document.preview
101 |
102 |
103 |
colors = ["blue", "red", "green"]
104 |
if document is not None and question is not None and len(question) > 0:
105 |
106 |
107 |
predictions = run_pipeline(question=question, document=document, top_k=1)
108 |
109 |
word_boxes = lift_word_boxes(document)
110 |
image = image.copy()
111 |
draw = ImageDraw.Draw(image)
112 |
for i, p in enumerate(ensure_list(predictions)):
113 |
col2.markdown(f"#### { p['answer'] }: ({round(p['score'] * 100, 1)}%)")
114 |
x1, y1, x2, y2 = normalize_bbox(
115 |
expand_bbox(word_boxes[p["start"] : p["end"] + 1]),
116 |
117 |
118 |
119 |
draw.rectangle(((x1, y1), (x2, y2)), outline=colors[i])
120 |
121 |
if document is not None:
122 |
col1.image(image, use_column_width=True)
123 |
124 |
"DocQuery uses LayoutLMv1 fine-tuned on DocVQA, a document visual question answering dataset, as well as SQuAD, which boosts its English-language comprehension. To use it, simply upload an image or PDF, type a question, and click 'submit', or click one of the examples to load them."
125 |