Spaces:
Runtime error
Runtime error
bug fix
Browse files
app.py
CHANGED
@@ -61,13 +61,12 @@ def get_image_description(image):
|
|
61 |
torch.cuda.empty_cache()
|
62 |
gc.collect()
|
63 |
|
64 |
-
descriptions = []
|
65 |
prompt = "[INST] <image>\nDescribe the image in a sentence [/INST]"
|
|
|
66 |
|
67 |
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
|
68 |
output = vision_model.generate(**inputs, max_new_tokens=100)
|
69 |
-
|
70 |
-
return descriptions
|
71 |
|
72 |
|
73 |
CSS = """
|
@@ -101,9 +100,14 @@ def get_vectordb(text, images, img_doc_files):
|
|
101 |
descs = []
|
102 |
for i in range(len(images)):
|
103 |
try:
|
104 |
-
descs.append(img_doc_files[i]+"\n"+get_image_description(images[i])
|
105 |
except:
|
106 |
descs.append("Could not generate image description due to some error")
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
# image_descriptions = get_image_descriptions(images)
|
109 |
image_dict = [{"image": image_to_bytes(img)} for img in images]
|
@@ -134,7 +138,7 @@ def get_vectordb(text, images, img_doc_files):
|
|
134 |
def extract_only_text(reader):
|
135 |
text = ""
|
136 |
for _, page in enumerate(reader.pages):
|
137 |
-
text
|
138 |
return text.strip()
|
139 |
|
140 |
|
@@ -152,7 +156,7 @@ def extract_data_from_pdfs(
|
|
152 |
all_text = ""
|
153 |
|
154 |
images = []
|
155 |
-
img_docs=[]
|
156 |
for doc in docs:
|
157 |
if do_ocr == "Get Text With OCR":
|
158 |
pdf_doc = DocumentFile.from_pdf(doc)
|
@@ -163,8 +167,9 @@ def extract_data_from_pdfs(
|
|
163 |
all_text += extract_only_text(reader) + "\n\n"
|
164 |
|
165 |
if include_images == "Include Images":
|
166 |
-
|
167 |
-
|
|
|
168 |
|
169 |
progress(
|
170 |
0.6, "Generating image descriptions and inserting everything into vectorDB"
|
@@ -266,13 +271,6 @@ def check_validity_and_llm(session_states):
|
|
266 |
raise gr.Error("Please extract data first")
|
267 |
|
268 |
|
269 |
-
def get_stats(vectordb):
|
270 |
-
eles = vectordb.get()
|
271 |
-
# words =
|
272 |
-
text_data = [f"Chunks: {len(eles)}", "HIII"]
|
273 |
-
return "\n".join(text_data), "", ""
|
274 |
-
|
275 |
-
|
276 |
with gr.Blocks(css=CSS, theme=gr.themes.Soft(text_size=sizes.text_md)) as demo:
|
277 |
vectordb = gr.State()
|
278 |
doc_collection = gr.State(value=[])
|
@@ -462,10 +460,14 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(text_size=sizes.text_md)) as demo:
|
|
462 |
[vectordb, msg, num_context, img_context, chatbot, hf_token, model_path],
|
463 |
[chatbot, references, ret_images],
|
464 |
)
|
465 |
-
documents.change(
|
|
|
|
|
|
|
|
|
466 |
|
467 |
back_p1.click(lambda: gr.Tabs(selected=0), None, tabs)
|
468 |
|
469 |
next_p1.click(check_validity_and_llm, session_states, tabs)
|
470 |
if __name__ == "__main__":
|
471 |
-
demo.launch()
|
|
|
61 |
torch.cuda.empty_cache()
|
62 |
gc.collect()
|
63 |
|
|
|
64 |
prompt = "[INST] <image>\nDescribe the image in a sentence [/INST]"
|
65 |
+
n = len(prompt)
|
66 |
|
67 |
inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
|
68 |
output = vision_model.generate(**inputs, max_new_tokens=100)
|
69 |
+
return processor.decode(output[0][n:], skip_special_tokens=True)
|
|
|
70 |
|
71 |
|
72 |
CSS = """
|
|
|
100 |
descs = []
|
101 |
for i in range(len(images)):
|
102 |
try:
|
103 |
+
descs.append(img_doc_files[i] + "\n" + get_image_description(images[i]))
|
104 |
except:
|
105 |
descs.append("Could not generate image description due to some error")
|
106 |
+
print(img_doc_files)
|
107 |
+
print(images[i])
|
108 |
+
print()
|
109 |
+
print(descs[-1])
|
110 |
+
print()
|
111 |
|
112 |
# image_descriptions = get_image_descriptions(images)
|
113 |
image_dict = [{"image": image_to_bytes(img)} for img in images]
|
|
|
138 |
def extract_only_text(reader):
|
139 |
text = ""
|
140 |
for _, page in enumerate(reader.pages):
|
141 |
+
text = page.extract_text()
|
142 |
return text.strip()
|
143 |
|
144 |
|
|
|
156 |
all_text = ""
|
157 |
|
158 |
images = []
|
159 |
+
img_docs = []
|
160 |
for doc in docs:
|
161 |
if do_ocr == "Get Text With OCR":
|
162 |
pdf_doc = DocumentFile.from_pdf(doc)
|
|
|
167 |
all_text += extract_only_text(reader) + "\n\n"
|
168 |
|
169 |
if include_images == "Include Images":
|
170 |
+
imgs = extract_images([doc])
|
171 |
+
images.extend(imgs)
|
172 |
+
img_docs.extend([doc.split("/")[-1] for _ in range(len(imgs))])
|
173 |
|
174 |
progress(
|
175 |
0.6, "Generating image descriptions and inserting everything into vectorDB"
|
|
|
271 |
raise gr.Error("Please extract data first")
|
272 |
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
with gr.Blocks(css=CSS, theme=gr.themes.Soft(text_size=sizes.text_md)) as demo:
|
275 |
vectordb = gr.State()
|
276 |
doc_collection = gr.State(value=[])
|
|
|
460 |
[vectordb, msg, num_context, img_context, chatbot, hf_token, model_path],
|
461 |
[chatbot, references, ret_images],
|
462 |
)
|
463 |
+
documents.change(
|
464 |
+
lambda: "<h1 style='text-align: center'>Click the 'Extract' button to extract data from PDFs<h1>",
|
465 |
+
None,
|
466 |
+
prog,
|
467 |
+
)
|
468 |
|
469 |
back_p1.click(lambda: gr.Tabs(selected=0), None, tabs)
|
470 |
|
471 |
next_p1.click(check_validity_and_llm, session_states, tabs)
|
472 |
if __name__ == "__main__":
|
473 |
+
demo.launch()
|