tcy6 commited on
Commit
80c1548
1 Parent(s): 0d1fb65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -12
app.py CHANGED
@@ -2,6 +2,7 @@ import tqdm
2
  from PIL import Image
3
  import hashlib
4
  import torch
 
5
  import fitz
6
  import threading
7
  import gradio as gr
@@ -15,9 +16,41 @@ import os
15
  import numpy as np
16
  import json
17
 
18
- cache_dir = '/data/kb_cache'
19
  os.makedirs(cache_dir, exist_ok=True)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def get_image_md5(img: Image.Image):
22
  img_byte_array = img.tobytes()
23
  hash_md5 = hashlib.md5()
@@ -57,8 +90,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
57
  image_md5 = get_image_md5(image)
58
  image_md5s.append(image_md5)
59
  with torch.no_grad():
60
- reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
61
- reps_list.append(reps.squeeze(0).cpu().numpy())
62
  images.append(image)
63
 
64
  for idx in range(len(images)):
@@ -75,7 +108,7 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
75
 
76
  return knowledge_base_name
77
 
78
- # @spaces.GPU
79
  def retrieve_gradio(knowledge_base: str, query: str, topk: int):
80
  global model, tokenizer
81
 
@@ -95,22 +128,23 @@ def retrieve_gradio(knowledge_base: str, query: str, topk: int):
95
 
96
  query_with_instruction = "Represent this query for retrieving relavant document: " + query
97
  with torch.no_grad():
98
- query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
99
 
100
  query_md5 = hashlib.md5(query.encode()).hexdigest()
101
 
102
  doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
103
 
 
104
  similarities = torch.matmul(query_rep, doc_reps_cat.T)
105
 
106
  topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
107
 
108
  topk_values_np = topk_values.cpu().numpy()
109
 
110
- topk_doc_ids_np = topk_doc_ids.cpu().numpy()
111
 
112
  similarities_np = similarities.cpu().numpy()
113
-
114
  images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
115
 
116
  with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
@@ -170,10 +204,10 @@ def downvote(knowledge_base, query):
170
 
171
 
172
 
173
- device = 'cuda'
174
 
175
  print("emb model load begin...")
176
- model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
177
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
178
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
179
  model.eval()
@@ -182,8 +216,9 @@ print("emb model load success!")
182
 
183
  print("gen model load begin...")
184
  gen_model_path = 'openbmb/MiniCPM-V-2_6'
185
- gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, trust_remote_code=True)
186
- gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
 
187
  gen_model.eval()
188
  gen_model.to(device)
189
  print("gen model load success!")
@@ -256,4 +291,4 @@ with gr.Blocks() as app:
256
  gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
257
 
258
 
259
- app.launch()
 
2
  from PIL import Image
3
  import hashlib
4
  import torch
5
+ import torch.nn.functional as F
6
  import fitz
7
  import threading
8
  import gradio as gr
 
16
  import numpy as np
17
  import json
18
 
19
+ cache_dir = '/data/KB'
20
  os.makedirs(cache_dir, exist_ok=True)
21
 
22
+ @spaces.GPU(duration=100)
23
+ def weighted_mean_pooling(hidden, attention_mask):
24
+ attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
25
+ s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
26
+ d = attention_mask_.sum(dim=1, keepdim=True).float()
27
+ reps = s / d
28
+ return reps
29
+
30
+ @spaces.GPU(duration=100)
31
+ @torch.no_grad()
32
+ def encode(text_or_image_list):
33
+ global model, tokenizer
34
+ if (isinstance(text_or_image_list[0], str)):
35
+ inputs = {
36
+ "text": text_or_image_list,
37
+ 'image': [None] * len(text_or_image_list),
38
+ 'tokenizer': tokenizer
39
+ }
40
+ else:
41
+ inputs = {
42
+ "text": [''] * len(text_or_image_list),
43
+ 'image': text_or_image_list,
44
+ 'tokenizer': tokenizer
45
+ }
46
+ outputs = model(**inputs)
47
+ attention_mask = outputs.attention_mask
48
+ hidden = outputs.last_hidden_state
49
+
50
+ reps = weighted_mean_pooling(hidden, attention_mask)
51
+ embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
52
+ return embeddings
53
+
54
  def get_image_md5(img: Image.Image):
55
  img_byte_array = img.tobytes()
56
  hash_md5 = hashlib.md5()
 
90
  image_md5 = get_image_md5(image)
91
  image_md5s.append(image_md5)
92
  with torch.no_grad():
93
+ reps = encode([image])
94
+ reps_list.append(reps)
95
  images.append(image)
96
 
97
  for idx in range(len(images)):
 
108
 
109
  return knowledge_base_name
110
 
111
+ @spaces.GPU(duration=100)
112
  def retrieve_gradio(knowledge_base: str, query: str, topk: int):
113
  global model, tokenizer
114
 
 
128
 
129
  query_with_instruction = "Represent this query for retrieving relavant document: " + query
130
  with torch.no_grad():
131
+ query_rep = torch.Tensor(encode([query_with_instruction]))
132
 
133
  query_md5 = hashlib.md5(query.encode()).hexdigest()
134
 
135
  doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
136
 
137
+ print(f"query_rep_shape: {query_rep.shape}, doc_reps_cat_shape: {doc_reps_cat.shape}")
138
  similarities = torch.matmul(query_rep, doc_reps_cat.T)
139
 
140
  topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
141
 
142
  topk_values_np = topk_values.cpu().numpy()
143
 
144
+ topk_doc_ids_np = topk_doc_ids.squeeze().cpu().numpy()
145
 
146
  similarities_np = similarities.cpu().numpy()
147
+ print(f"topk_doc_ids_np: {topk_doc_ids_np}, topk_values_np: {topk_values_np}")
148
  images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
149
 
150
  with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
 
204
 
205
 
206
 
207
+ device = 'cuda'
208
 
209
  print("emb model load begin...")
210
+ model_path = 'openbmb/VisRAG-Ret' # replace with your local model path
211
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
212
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
213
  model.eval()
 
216
 
217
  print("gen model load begin...")
218
  gen_model_path = 'openbmb/MiniCPM-V-2_6'
219
+ gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, attn_implementation='sdpa', trust_remote_code=True)
220
+ gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True,
221
+ attn_implementation='sdpa', torch_dtype=torch.bfloat16)
222
  gen_model.eval()
223
  gen_model.to(device)
224
  print("gen model load success!")
 
291
  gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
292
 
293
 
294
+ app.launch()