Spaces:

tcy6
/

VisRAG_Pipeline

Running

App Files Files Community

tcy6 commited on Nov 4, 2024

Commit

80c1548

1 Parent(s): 0d1fb65

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -12

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import tqdm
 from PIL import Image
 import hashlib
 import torch
 import fitz
 import threading
 import gradio as gr
@@ -15,9 +16,41 @@ import os
 import numpy as np
 import json
-cache_dir = '/data/kb_cache'
 os.makedirs(cache_dir, exist_ok=True)
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
     hash_md5 = hashlib.md5()
@@ -57,8 +90,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
         image_md5 = get_image_md5(image)
         image_md5s.append(image_md5)
         with torch.no_grad():
-            reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
-        reps_list.append(reps.squeeze(0).cpu().numpy())
         images.append(image)
     for idx in range(len(images)):
@@ -75,7 +108,7 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
     return knowledge_base_name
-# @spaces.GPU
 def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     global model, tokenizer
@@ -95,22 +128,23 @@ def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     query_with_instruction = "Represent this query for retrieving relavant document: " + query
     with torch.no_grad():
-        query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
     query_md5 = hashlib.md5(query.encode()).hexdigest()
     doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
     similarities = torch.matmul(query_rep, doc_reps_cat.T)
     topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
     topk_values_np = topk_values.cpu().numpy()
-    topk_doc_ids_np = topk_doc_ids.cpu().numpy()
     similarities_np = similarities.cpu().numpy()
     images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
     with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
@@ -170,10 +204,10 @@ def downvote(knowledge_base, query):
-device = 'cuda'
 print("emb model load begin...")
-model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 model.eval()
@@ -182,8 +216,9 @@ print("emb model load success!")
 print("gen model load begin...")
 gen_model_path = 'openbmb/MiniCPM-V-2_6'
-gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, trust_remote_code=True)
-gen_model =  AutoModel.from_pretrained(gen_model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
 gen_model.eval()
 gen_model.to(device)
 print("gen model load success!")
@@ -256,4 +291,4 @@ with gr.Blocks() as app:
     gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
-app.launch()

 from PIL import Image
 import hashlib
 import torch
+import torch.nn.functional as F
 import fitz
 import threading
 import gradio as gr
 import numpy as np
 import json
+cache_dir = '/data/KB'
 os.makedirs(cache_dir, exist_ok=True)
+@spaces.GPU(duration=100)
+def weighted_mean_pooling(hidden, attention_mask):
+    attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
+    s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
+    d = attention_mask_.sum(dim=1, keepdim=True).float()
+    reps = s / d
+    return reps
+@spaces.GPU(duration=100)
+@torch.no_grad()
+def encode(text_or_image_list):
+    global model, tokenizer
+    if (isinstance(text_or_image_list[0], str)):
+        inputs = {
+            "text": text_or_image_list,
+            'image': [None] * len(text_or_image_list),
+            'tokenizer': tokenizer
+        }
+    else:
+        inputs = {
+            "text": [''] * len(text_or_image_list),
+            'image': text_or_image_list,
+            'tokenizer': tokenizer
+        }
+    outputs = model(**inputs)
+    attention_mask = outputs.attention_mask
+    hidden = outputs.last_hidden_state
+    reps = weighted_mean_pooling(hidden, attention_mask)
+    embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
+    return embeddings
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
     hash_md5 = hashlib.md5()
         image_md5 = get_image_md5(image)
         image_md5s.append(image_md5)
         with torch.no_grad():
+            reps = encode([image])
+        reps_list.append(reps)
         images.append(image)
     for idx in range(len(images)):
     return knowledge_base_name
+@spaces.GPU(duration=100)
 def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     global model, tokenizer
     query_with_instruction = "Represent this query for retrieving relavant document: " + query
     with torch.no_grad():
+        query_rep = torch.Tensor(encode([query_with_instruction]))
     query_md5 = hashlib.md5(query.encode()).hexdigest()
     doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
+    print(f"query_rep_shape: {query_rep.shape}, doc_reps_cat_shape: {doc_reps_cat.shape}")
     similarities = torch.matmul(query_rep, doc_reps_cat.T)
     topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
     topk_values_np = topk_values.cpu().numpy()
+    topk_doc_ids_np = topk_doc_ids.squeeze().cpu().numpy()
     similarities_np = similarities.cpu().numpy()
+    print(f"topk_doc_ids_np: {topk_doc_ids_np}, topk_values_np: {topk_values_np}")
     images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
     with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
+device = 'cuda'
 print("emb model load begin...")
+model_path = 'openbmb/VisRAG-Ret' # replace with your local model path
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 model.eval()
 print("gen model load begin...")
 gen_model_path = 'openbmb/MiniCPM-V-2_6'
+gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, attn_implementation='sdpa', trust_remote_code=True)
+gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True,
+    attn_implementation='sdpa', torch_dtype=torch.bfloat16)
 gen_model.eval()
 gen_model.to(device)
 print("gen model load success!")
     gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
+app.launch()