Spaces:

tcy6
/

VisRAG_Pipeline

Running

App Files Files Community

tcy6 commited on Nov 4, 2024

Commit

56e4893

1 Parent(s): 4b231a8

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -3

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import tqdm
 from PIL import Image
 import hashlib
 import torch
 import fitz
 import threading
 import gradio as gr
@@ -18,6 +19,36 @@ import json
 cache_dir = '/data/KB'
 os.makedirs(cache_dir, exist_ok=True)
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
     hash_md5 = hashlib.md5()
@@ -57,8 +88,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
         image_md5 = get_image_md5(image)
         image_md5s.append(image_md5)
         with torch.no_grad():
-            reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
-        reps_list.append(reps.squeeze(0).cpu().numpy())
         images.append(image)
     for idx in range(len(images)):
@@ -95,7 +126,7 @@ def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     query_with_instruction = "Represent this query for retrieving relavant document: " + query
     with torch.no_grad():
-        query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
     query_md5 = hashlib.md5(query.encode()).hexdigest()

 from PIL import Image
 import hashlib
 import torch
+import torch.nn.functional as F
 import fitz
 import threading
 import gradio as gr
 cache_dir = '/data/KB'
 os.makedirs(cache_dir, exist_ok=True)
+def weighted_mean_pooling(hidden, attention_mask):
+    attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
+    s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
+    d = attention_mask_.sum(dim=1, keepdim=True).float()
+    reps = s / d
+    return reps
+@torch.no_grad()
+def encode(text_or_image_list):
+    global model, tokenizer
+    if (isinstance(text_or_image_list[0], str)):
+        inputs = {
+            "text": text_or_image_list,
+            'image': [None] * len(text_or_image_list),
+            'tokenizer': tokenizer
+        }
+    else:
+        inputs = {
+            "text": [''] * len(text_or_image_list),
+            'image': text_or_image_list,
+            'tokenizer': tokenizer
+        }
+    outputs = model(**inputs)
+    attention_mask = outputs.attention_mask
+    hidden = outputs.last_hidden_state
+    reps = weighted_mean_pooling(hidden, attention_mask)
+    embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
+    return embeddings
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
     hash_md5 = hashlib.md5()
         image_md5 = get_image_md5(image)
         image_md5s.append(image_md5)
         with torch.no_grad():
+            reps = encode([image])
+        reps_list.append(reps)
         images.append(image)
     for idx in range(len(images)):
     query_with_instruction = "Represent this query for retrieving relavant document: " + query
     with torch.no_grad():
+        query_rep = encode([query_with_instruction])
     query_md5 = hashlib.md5(query.encode()).hexdigest()