tcy6 commited on
Commit
0d1fb65
·
1 Parent(s): 38c0a43

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -48
app.py CHANGED
@@ -2,7 +2,6 @@ import tqdm
2
  from PIL import Image
3
  import hashlib
4
  import torch
5
- import torch.nn.functional as F
6
  import fitz
7
  import threading
8
  import gradio as gr
@@ -16,41 +15,9 @@ import os
16
  import numpy as np
17
  import json
18
 
19
- cache_dir = '/data/KB'
20
  os.makedirs(cache_dir, exist_ok=True)
21
 
22
- @spaces.GPU(duration=100)
23
- def weighted_mean_pooling(hidden, attention_mask):
24
- attention_mask_ = attention_mask * attention_mask.cumsum(dim=1)
25
- s = torch.sum(hidden * attention_mask_.unsqueeze(-1).float(), dim=1)
26
- d = attention_mask_.sum(dim=1, keepdim=True).float()
27
- reps = s / d
28
- return reps
29
-
30
- @spaces.GPU(duration=100)
31
- @torch.no_grad()
32
- def encode(text_or_image_list):
33
- global model, tokenizer
34
- if (isinstance(text_or_image_list[0], str)):
35
- inputs = {
36
- "text": text_or_image_list,
37
- 'image': [None] * len(text_or_image_list),
38
- 'tokenizer': tokenizer
39
- }
40
- else:
41
- inputs = {
42
- "text": [''] * len(text_or_image_list),
43
- 'image': text_or_image_list,
44
- 'tokenizer': tokenizer
45
- }
46
- outputs = model(**inputs)
47
- attention_mask = outputs.attention_mask
48
- hidden = outputs.last_hidden_state
49
-
50
- reps = weighted_mean_pooling(hidden, attention_mask)
51
- embeddings = F.normalize(reps, p=2, dim=1).detach().cpu().numpy()
52
- return embeddings
53
-
54
  def get_image_md5(img: Image.Image):
55
  img_byte_array = img.tobytes()
56
  hash_md5 = hashlib.md5()
@@ -63,7 +30,7 @@ def calculate_md5_from_binary(binary_data):
63
  hash_md5.update(binary_data)
64
  return hash_md5.hexdigest()
65
 
66
- @spaces.GPU(duration=50)
67
  def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
68
  global model, tokenizer
69
  model.eval()
@@ -90,8 +57,8 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
90
  image_md5 = get_image_md5(image)
91
  image_md5s.append(image_md5)
92
  with torch.no_grad():
93
- reps = encode([image])
94
- reps_list.append(reps)
95
  images.append(image)
96
 
97
  for idx in range(len(images)):
@@ -108,7 +75,7 @@ def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
108
 
109
  return knowledge_base_name
110
 
111
- @spaces.GPU(duration=100)
112
  def retrieve_gradio(knowledge_base: str, query: str, topk: int):
113
  global model, tokenizer
114
 
@@ -128,23 +95,22 @@ def retrieve_gradio(knowledge_base: str, query: str, topk: int):
128
 
129
  query_with_instruction = "Represent this query for retrieving relavant document: " + query
130
  with torch.no_grad():
131
- query_rep = torch.Tensor(encode([query_with_instruction]))
132
 
133
  query_md5 = hashlib.md5(query.encode()).hexdigest()
134
 
135
  doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
136
 
137
- print(f"query_rep_shape: {query_rep.shape}, doc_reps_cat_shape: {doc_reps_cat.shape}")
138
  similarities = torch.matmul(query_rep, doc_reps_cat.T)
139
 
140
  topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
141
 
142
  topk_values_np = topk_values.cpu().numpy()
143
 
144
- topk_doc_ids_np = topk_doc_ids.squeeze().cpu().numpy()
145
 
146
  similarities_np = similarities.cpu().numpy()
147
- print(f"topk_doc_ids_np: {topk_doc_ids_np}, topk_values_np: {topk_values_np}")
148
  images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
149
 
150
  with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
@@ -204,10 +170,10 @@ def downvote(knowledge_base, query):
204
 
205
 
206
 
207
- device = 'cuda'
208
 
209
  print("emb model load begin...")
210
- model_path = 'openbmb/VisRAG-Ret' # replace with your local model path
211
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
212
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
213
  model.eval()
@@ -216,9 +182,8 @@ print("emb model load success!")
216
 
217
  print("gen model load begin...")
218
  gen_model_path = 'openbmb/MiniCPM-V-2_6'
219
- gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, attn_implementation='sdpa', trust_remote_code=True)
220
- gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True,
221
- attn_implementation='sdpa', torch_dtype=torch.bfloat16)
222
  gen_model.eval()
223
  gen_model.to(device)
224
  print("gen model load success!")
@@ -291,4 +256,4 @@ with gr.Blocks() as app:
291
  gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
292
 
293
 
294
- app.launch()
 
2
  from PIL import Image
3
  import hashlib
4
  import torch
 
5
  import fitz
6
  import threading
7
  import gradio as gr
 
15
  import numpy as np
16
  import json
17
 
18
+ cache_dir = '/data/kb_cache'
19
  os.makedirs(cache_dir, exist_ok=True)
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  def get_image_md5(img: Image.Image):
22
  img_byte_array = img.tobytes()
23
  hash_md5 = hashlib.md5()
 
30
  hash_md5.update(binary_data)
31
  return hash_md5.hexdigest()
32
 
33
+ @spaces.GPU(duration=100)
34
  def add_pdf_gradio(pdf_file_binary, progress=gr.Progress()):
35
  global model, tokenizer
36
  model.eval()
 
57
  image_md5 = get_image_md5(image)
58
  image_md5s.append(image_md5)
59
  with torch.no_grad():
60
+ reps = model(text=[''], image=[image], tokenizer=tokenizer).reps
61
+ reps_list.append(reps.squeeze(0).cpu().numpy())
62
  images.append(image)
63
 
64
  for idx in range(len(images)):
 
75
 
76
  return knowledge_base_name
77
 
78
+ # @spaces.GPU
79
  def retrieve_gradio(knowledge_base: str, query: str, topk: int):
80
  global model, tokenizer
81
 
 
95
 
96
  query_with_instruction = "Represent this query for retrieving relavant document: " + query
97
  with torch.no_grad():
98
+ query_rep = model(text=[query_with_instruction], image=[None], tokenizer=tokenizer).reps.squeeze(0).cpu()
99
 
100
  query_md5 = hashlib.md5(query.encode()).hexdigest()
101
 
102
  doc_reps_cat = torch.stack([torch.Tensor(i) for i in doc_reps], dim=0)
103
 
 
104
  similarities = torch.matmul(query_rep, doc_reps_cat.T)
105
 
106
  topk_values, topk_doc_ids = torch.topk(similarities, k=topk)
107
 
108
  topk_values_np = topk_values.cpu().numpy()
109
 
110
+ topk_doc_ids_np = topk_doc_ids.cpu().numpy()
111
 
112
  similarities_np = similarities.cpu().numpy()
113
+
114
  images_topk = [Image.open(os.path.join(target_cache_dir, f"{md5s[idx]}.png")) for idx in topk_doc_ids_np]
115
 
116
  with open(os.path.join(target_cache_dir, f"q-{query_md5}.json"), 'w') as f:
 
170
 
171
 
172
 
173
+ device = 'cuda'
174
 
175
  print("emb model load begin...")
176
+ model_path = 'RhapsodyAI/minicpm-visual-embedding-v0' # replace with your local model path
177
  tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
178
  model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
179
  model.eval()
 
182
 
183
  print("gen model load begin...")
184
  gen_model_path = 'openbmb/MiniCPM-V-2_6'
185
+ gen_tokenizer = AutoTokenizer.from_pretrained(gen_model_path, trust_remote_code=True)
186
+ gen_model = AutoModel.from_pretrained(gen_model_path, trust_remote_code=True, attn_implementation='sdpa', torch_dtype=torch.bfloat16)
 
187
  gen_model.eval()
188
  gen_model.to(device)
189
  print("gen model load success!")
 
256
  gr.Markdown("By using this demo, you agree to share your use data with us for research purpose, to help improve user experience.")
257
 
258
 
259
+ app.launch()