anand004 commited on
Commit
e979e49
·
unverified ·
1 Parent(s): 57e7f13
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -61,13 +61,12 @@ def get_image_description(image):
61
  torch.cuda.empty_cache()
62
  gc.collect()
63
 
64
- descriptions = []
65
  prompt = "[INST] <image>\nDescribe the image in a sentence [/INST]"
 
66
 
67
  inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
68
  output = vision_model.generate(**inputs, max_new_tokens=100)
69
- descriptions.append(processor.decode(output[0], skip_special_tokens=True))
70
- return descriptions
71
 
72
 
73
  CSS = """
@@ -101,9 +100,14 @@ def get_vectordb(text, images, img_doc_files):
101
  descs = []
102
  for i in range(len(images)):
103
  try:
104
- descs.append(img_doc_files[i]+"\n"+get_image_description(images[i])[0])
105
  except:
106
  descs.append("Could not generate image description due to some error")
 
 
 
 
 
107
 
108
  # image_descriptions = get_image_descriptions(images)
109
  image_dict = [{"image": image_to_bytes(img)} for img in images]
@@ -134,7 +138,7 @@ def get_vectordb(text, images, img_doc_files):
134
  def extract_only_text(reader):
135
  text = ""
136
  for _, page in enumerate(reader.pages):
137
- text += page.extract_text()
138
  return text.strip()
139
 
140
 
@@ -152,7 +156,7 @@ def extract_data_from_pdfs(
152
  all_text = ""
153
 
154
  images = []
155
- img_docs=[]
156
  for doc in docs:
157
  if do_ocr == "Get Text With OCR":
158
  pdf_doc = DocumentFile.from_pdf(doc)
@@ -163,8 +167,9 @@ def extract_data_from_pdfs(
163
  all_text += extract_only_text(reader) + "\n\n"
164
 
165
  if include_images == "Include Images":
166
- images.extend(extract_images([doc]))
167
- img_docs.append(doc.split("/")[-1])
 
168
 
169
  progress(
170
  0.6, "Generating image descriptions and inserting everything into vectorDB"
@@ -266,13 +271,6 @@ def check_validity_and_llm(session_states):
266
  raise gr.Error("Please extract data first")
267
 
268
 
269
- def get_stats(vectordb):
270
- eles = vectordb.get()
271
- # words =
272
- text_data = [f"Chunks: {len(eles)}", "HIII"]
273
- return "\n".join(text_data), "", ""
274
-
275
-
276
  with gr.Blocks(css=CSS, theme=gr.themes.Soft(text_size=sizes.text_md)) as demo:
277
  vectordb = gr.State()
278
  doc_collection = gr.State(value=[])
@@ -462,10 +460,14 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(text_size=sizes.text_md)) as demo:
462
  [vectordb, msg, num_context, img_context, chatbot, hf_token, model_path],
463
  [chatbot, references, ret_images],
464
  )
465
- documents.change(lambda: "<h1 style='text-align: center'>Click the 'Extract' button to extract data from PDFs<h1>", None, prog)
 
 
 
 
466
 
467
  back_p1.click(lambda: gr.Tabs(selected=0), None, tabs)
468
 
469
  next_p1.click(check_validity_and_llm, session_states, tabs)
470
  if __name__ == "__main__":
471
- demo.launch()
 
61
  torch.cuda.empty_cache()
62
  gc.collect()
63
 
 
64
  prompt = "[INST] <image>\nDescribe the image in a sentence [/INST]"
65
+ n = len(prompt)
66
 
67
  inputs = processor(prompt, image, return_tensors="pt").to("cuda:0")
68
  output = vision_model.generate(**inputs, max_new_tokens=100)
69
+ return processor.decode(output[0][n:], skip_special_tokens=True)
 
70
 
71
 
72
  CSS = """
 
100
  descs = []
101
  for i in range(len(images)):
102
  try:
103
+ descs.append(img_doc_files[i] + "\n" + get_image_description(images[i]))
104
  except:
105
  descs.append("Could not generate image description due to some error")
106
+ print(img_doc_files)
107
+ print(images[i])
108
+ print()
109
+ print(descs[-1])
110
+ print()
111
 
112
  # image_descriptions = get_image_descriptions(images)
113
  image_dict = [{"image": image_to_bytes(img)} for img in images]
 
138
  def extract_only_text(reader):
139
  text = ""
140
  for _, page in enumerate(reader.pages):
141
+ text = page.extract_text()
142
  return text.strip()
143
 
144
 
 
156
  all_text = ""
157
 
158
  images = []
159
+ img_docs = []
160
  for doc in docs:
161
  if do_ocr == "Get Text With OCR":
162
  pdf_doc = DocumentFile.from_pdf(doc)
 
167
  all_text += extract_only_text(reader) + "\n\n"
168
 
169
  if include_images == "Include Images":
170
+ imgs = extract_images([doc])
171
+ images.extend(imgs)
172
+ img_docs.extend([doc.split("/")[-1] for _ in range(len(imgs))])
173
 
174
  progress(
175
  0.6, "Generating image descriptions and inserting everything into vectorDB"
 
271
  raise gr.Error("Please extract data first")
272
 
273
 
 
 
 
 
 
 
 
274
  with gr.Blocks(css=CSS, theme=gr.themes.Soft(text_size=sizes.text_md)) as demo:
275
  vectordb = gr.State()
276
  doc_collection = gr.State(value=[])
 
460
  [vectordb, msg, num_context, img_context, chatbot, hf_token, model_path],
461
  [chatbot, references, ret_images],
462
  )
463
+ documents.change(
464
+ lambda: "<h1 style='text-align: center'>Click the 'Extract' button to extract data from PDFs<h1>",
465
+ None,
466
+ prog,
467
+ )
468
 
469
  back_p1.click(lambda: gr.Tabs(selected=0), None, tabs)
470
 
471
  next_p1.click(check_validity_and_llm, session_states, tabs)
472
  if __name__ == "__main__":
473
+ demo.launch()