Spaces:

tcy6
/

VisRAG_Pipeline

Running

App Files Files Community

tcy6 commited on Nov 4, 2024

Commit

0d1fb65

1 Parent(s): 38c0a43

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -48

app.py CHANGED Viewed

@@ -2,7 +2,6 @@ import tqdm
 from PIL import Image
 import hashlib
 import torch
-import torch.nn.functional as F
 import fitz
 import threading
 import gradio as gr
@@ -16,41 +15,9 @@ import os
 import numpy as np
 import json
-cache_dir = '/data/KB'
 os.makedirs(cache_dir, exist_ok=True)
-@spaces.GPU(duration=100)
-def weighted_mean_pooling(hidden, attention_mask):
-    attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
-    s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
-    d = attention_mask_.sum(dim=1, keepdim=True).float()
-    reps = s / d
-    return reps
-@spaces.GPU(duration=100)
-@torch.no_grad()
-def encode(text_or_image_list):
-    global model, tokenizer
-    if (isinstance(text_or_image_list[0], str)):
-        inputs = {
-            "text": text_or_image_list,
-            'image': [None] * len(text_or_image_list),
-            'tokenizer': tokenizer
-        }
-    else:
-        inputs = {
-            "text": [''] * len(text_or_image_list),
-            'image': text_or_image_list,
-            'tokenizer': tokenizer
-        }
-    outputs = model(**inputs)
-    attention_mask = outputs.attention_mask
-    hidden = outputs.last_hidden_state
-    reps = weighted_mean_pooling(hidden, attention_mask)
-    embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
-    return embeddings
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
     hash_md5 = hashlib.md5()
@@ -63,7 +30,7 @@ def calculate_md5_from_binary(binary_data):
     hash_md5.update(binary_data)
     return hash_md5.hexdigest()
-@spaces.GPU(duration=50)
 def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
     global model, tokenizer
     model.eval()
@@ -90,8 +57,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
         image_md5 = get_image_md5(image)
         image_md5s.append(image_md5)
         with torch.no_grad():
-            reps = encode([image])
-        reps_list.append(reps)
         images.append(image)
     for idx in range(len(images)):
@@ -108,7 +75,7 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
     return knowledge_base_name
-@spaces.GPU(duration=100)
 def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     global model, tokenizer
@@ -128,23 +95,22 @@ def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     query_with_instruction = "Represent this query for retrieving relavant document: " + query
     with torch.no_grad():
-        query_rep = torch.Tensor(encode([query_with_instruction]))
     query_md5 = hashlib.md5(query.encode()).hexdigest()
     doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
-    print(f"query_rep_shape: {query_rep.shape}, doc_reps_cat_shape: {doc_reps_cat.shape}")
     similarities = torch.matmul(query_rep, doc_reps_cat.T)
     topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
     topk_values_np = topk_values.cpu().numpy()
-    topk_doc_ids_np = topk_doc_ids.squeeze().cpu().numpy()
     similarities_np = similarities.cpu().numpy()
-    print(f"topk_doc_ids_np: {topk_doc_ids_np}, topk_values_np: {topk_values_np}")
     images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
     with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
@@ -204,10 +170,10 @@ def downvote(knowledge_base, query):
-device = 'cuda'
 print("emb model load begin...")
-model_path = 'openbmb/VisRAG-Ret' # replace with your local model path
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 model.eval()
@@ -216,9 +182,8 @@ print("emb model load success!")
 print("gen model load begin...")
 gen_model_path = 'openbmb/MiniCPM-V-2_6'
-gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, attn_implementation='sdpa', trust_remote_code=True)
-gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True,
-    attn_implementation='sdpa', torch_dtype=torch.bfloat16)
 gen_model.eval()
 gen_model.to(device)
 print("gen model load success!")
@@ -291,4 +256,4 @@ with gr.Blocks() as app:
     gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
-app.launch()

 from PIL import Image
 import hashlib
 import torch
 import fitz
 import threading
 import gradio as gr
 import numpy as np
 import json
+cache_dir = '/data/kb_cache'
 os.makedirs(cache_dir, exist_ok=True)
 def get_image_md5(img: Image.Image):
     img_byte_array = img.tobytes()
     hash_md5 = hashlib.md5()
     hash_md5.update(binary_data)
     return hash_md5.hexdigest()
+@spaces.GPU(duration=100)
 def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
     global model, tokenizer
     model.eval()
         image_md5 = get_image_md5(image)
         image_md5s.append(image_md5)
         with torch.no_grad():
+            reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
+        reps_list.append(reps.squeeze(0).cpu().numpy())
         images.append(image)
     for idx in range(len(images)):
     return knowledge_base_name
+# @spaces.GPU
 def retrieve_gradio(knowledge_base: str, query: str, topk: int):
     global model, tokenizer
     query_with_instruction = "Represent this query for retrieving relavant document: " + query
     with torch.no_grad():
+        query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
     query_md5 = hashlib.md5(query.encode()).hexdigest()
     doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
     similarities = torch.matmul(query_rep, doc_reps_cat.T)
     topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
     topk_values_np = topk_values.cpu().numpy()
+    topk_doc_ids_np = topk_doc_ids.cpu().numpy()
     similarities_np = similarities.cpu().numpy()
     images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
     with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
+device = 'cuda'
 print("emb model load begin...")
+model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
 tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
 model.eval()
 print("gen model load begin...")
 gen_model_path = 'openbmb/MiniCPM-V-2_6'
+gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, trust_remote_code=True)
+gen_model =  AutoModel.from_pretrained(gen_model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
 gen_model.eval()
 gen_model.to(device)
 print("gen model load success!")
     gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
+app.launch()