ArvinZhuang commited on
Commit
6311d84
·
verified ·
1 Parent(s): ad7dc0c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +122 -1
README.md CHANGED
@@ -4,4 +4,125 @@ base_model:
4
  - Qwen/Qwen2.5-VL-3B-Instruct
5
  ---
6
 
7
- Tevatron usage: https://github.com/texttron/tevatron/tree/main/examples/multimodal
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  - Qwen/Qwen2.5-VL-3B-Instruct
5
  ---
6
 
7
+ # Tevatron usage
8
+ Tevatron usage: https://github.com/texttron/tevatron/tree/main/examples/multimodal
9
+
10
+ # Load the model
11
+ ```python
12
+ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration
13
+ from peft import PeftModel, PeftConfig
14
+
15
+ def get_model(peft_model_name):
16
+ config = PeftConfig.from_pretrained(peft_model_name)
17
+ base_model = Qwen2_5_VLForConditionalGeneration.from_pretrained(config.base_model_name_or_path)
18
+ model = PeftModel.from_pretrained(base_model, peft_model_name)
19
+ model = model.merge_and_unload()
20
+ model.eval()
21
+ return model
22
+
23
+ model = get_model('Tevatron/unified-retriever-v0.1').to('cuda:0')
24
+ processor = AutoProcessor.from_pretrained('Tevatron/unified-retriever-v0.1')
25
+
26
+ ```
27
+
28
+ # Encode text query
29
+
30
+ ```python
31
+ import torch
32
+ from qwen_vl_utils import process_vision_info
33
+
34
+ def get_embedding(last_hidden_state: torch.Tensor) -> torch.Tensor:
35
+ reps = last_hidden_state[:, -1]
36
+ reps = torch.nn.functional.normalize(reps, p=2, dim=-1)
37
+ return reps
38
+
39
+ queries = ["Where can we see Llama?", "What is the LLaMA AI model?"]
40
+
41
+ query_messages = []
42
+ for query in queries:
43
+ message = [
44
+ {
45
+ 'role': 'user',
46
+ 'content': [
47
+ {'type': 'text', 'text': f'Query: {query}'},
48
+ ]
49
+ }
50
+ ]
51
+ query_messages.append(message)
52
+
53
+ query_texts = [
54
+ processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=False) + "<|endoftext|>"
55
+ for msg in query_messages
56
+ ]
57
+
58
+ query_image_inputs, query_video_inputs = process_vision_info(query_messages)
59
+ query_inputs = processor(text=query_texts, images=query_image_inputs, videos=query_video_inputs, padding='longest', return_tensors='pt').to('cuda:0')
60
+
61
+ with torch.no_grad():
62
+ output = model(**query_inputs, return_dict=True, output_hidden_states=True)
63
+ query_embeddings = get_embedding(output.hidden_states[-1])
64
+
65
+ ```
66
+ > [!NOTE]
67
+ > For encoding the textual documents, the code is the same as the above query encoding, but remove the `'Query: '` prefix.
68
+
69
+
70
+ # Encode Document Screenshot
71
+
72
+ ```python
73
+ import requests
74
+ from io import BytesIO
75
+ from PIL import Image
76
+
77
+ # URLs of the images
78
+ url1 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v2/resolve/main/animal-llama.png"
79
+ url2 = "https://huggingface.co/Tevatron/dse-phi3-docmatix-v2/resolve/main/meta-llama.png"
80
+
81
+ response1 = requests.get(url1)
82
+ response2 = requests.get(url2)
83
+
84
+ doc_image1 = Image.open(BytesIO(response1.content))
85
+ doc_image2 = Image.open(BytesIO(response2.content))
86
+ doc_images = [doc_image1, doc_image2]
87
+
88
+ doc_messages = []
89
+ for doc in doc_images:
90
+ message = [
91
+ {
92
+ 'role': 'user',
93
+ 'content': [
94
+ {'type': 'text', 'text': ''},
95
+ {'type': 'image', 'image': doc, 'resized_height': 784, 'resized_width': 784}
96
+
97
+ ]
98
+ }
99
+ ]
100
+ doc_messages.append(message)
101
+ doc_texts = [
102
+ processor.apply_chat_template(msg, tokenize=False, add_generation_prompt=False) + "<|endoftext|>"
103
+ for msg in doc_messages
104
+ ]
105
+
106
+ doc_image_inputs, doc_video_inputs = process_vision_info(doc_messages)
107
+ doc_inputs = processor(text=doc_texts, images=doc_image_inputs, videos=doc_video_inputs, padding='longest', return_tensors='pt').to('cuda:0')
108
+
109
+ with torch.no_grad():
110
+ output = model(**doc_inputs, return_dict=True, output_hidden_states=True)
111
+
112
+ doc_embeddings = get_embedding(output.hidden_states[-1])
113
+ ```
114
+
115
+ # Compute Similarity
116
+ ```python
117
+ from torch.nn.functional import cosine_similarity
118
+ num_queries = query_embeddings.size(0)
119
+ num_passages = doc_embeddings.size(0)
120
+
121
+ for i in range(num_queries):
122
+ query_embedding = query_embeddings[i].unsqueeze(0)
123
+ similarities = cosine_similarity(query_embedding, doc_embeddings)
124
+ print(f"Similarities for Query {i+1}: {similarities.cpu().float().numpy()}")
125
+
126
+ # Similarities for Query 1: [0.3282001 0.17449486]
127
+ # Similarities for Query 2: [0.08133292 0.30867738]
128
+ ```