File size: 5,372 Bytes
d336c8b
0b155d7
d336c8b
0b155d7
d336c8b
 
63c44d1
17871e5
d336c8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a814c2f
 
63c44d1
 
8b93855
 
09ffd9d
 
b9ab5e0
 
63c44d1
d336c8b
17871e5
 
0cdc59c
8b93855
a814c2f
17871e5
612a9ab
17871e5
63c44d1
17871e5
09ffd9d
 
3264246
 
612a9ab
 
 
3264246
612a9ab
 
17871e5
3264246
17871e5
 
3264246
 
17871e5
 
3264246
 
17871e5
 
 
 
 
 
 
6c9df61
 
17871e5
 
 
 
d336c8b
63c44d1
09ffd9d
 
63c44d1
 
 
 
 
 
 
 
 
 
 
 
 
612a9ab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63c44d1
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
# import numpy as np
import requests
import torch
from PIL import Image
from torch.nn.functional import cosine_similarity
from transformers import AutoImageProcessor, AutoModel
from transformers import ViTImageProcessor, ViTModel
from transformers import pipeline

# import transformers
#
# print(transformers.__version__)
#
# img_urls = ["https://img0.baidu.com/it/u=3704428154,2884159591&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500",
#             "https://img0.baidu.com/it/u=3704428154,2884159591&fm=253&fmt=auto&app=138&f=JPEG?w=889&h=500"]
#
# image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
# image_gen = Image.open(requests.get(img_urls[1], stream=True).raw).convert("RGB")
#
# # DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DEVICE = torch.device('cpu')
# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
#
# # 1提取图片特征向量
# outputs = pipe([image_real, image_gen])
#
# # get the length of a single output
# print(len(outputs[0][0]))
# # show outputs
# print(outputs)
#
# # 768
# # [[[-0.03909236937761307, 0.43381670117378235, -0.06913255900144577,
#
# # 2计算图片相似度
# similarity_score = cosine_similarity(torch.Tensor(outputs[0]),
#                                      torch.Tensor(outputs[1]), dim=1)
#
# print(similarity_score)

# tensor([0.6043])

# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE)
# output = pipe(image_real)
#
# # 其中第一个维度是批量大小,最后两个维度是嵌入形状。
# print(np.array(outputs).shape)
# # (1, 197, 768)


# 第二种方式推理图片相似度
# processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
# model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
# processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224-in21k")
# model = AutoModel.from_pretrained("google/vit-base-patch16-224-in21k").to(DEVICE)
# processor = AutoImageProcessor.from_pretrained("chanhua/autotrain-izefx-v3qh0")
# model = AutoModel.from_pretrained("chanhua/autotrain-izefx-v3qh0").to(DEVICE)
# processor = ViTImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
# model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
# processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')
# model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')


# tensor([0.6061], device='cuda:0', grad_fn=<SumBackward1>)

# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", device=DEVICE, pool=True)
# pipe = pipeline(task="image-feature-extraction", model_name="chanhua/autotrain-izefx-v3qh0", device=DEVICE, pool=True)
# pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224", device=DEVICE, pool=True, revision="29e7a1e183")


#  推理
def infer4(url1, url2):
    try:
        pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-224-in21k", device=DEVICE, pool=True)

        print("进入推理")
        print("打开图片1")
        # image_real = Image.open(requests.get(url1, stream=True).raw).convert("RGB")
        image_real = Image.open(url1).convert('RGB')

        print("打开图片2")
        # image_gen = Image.open(requests.get(url2, stream=True).raw).convert("RGB")
        image_gen = Image.open(url2).convert('RGB')

        print("利用模型获取图片特征向量")
        outputs = pipe([image_real, image_gen])

        print(f"得到图片特征向量计算相似度: {outputs}")

        similarity_score = cosine_similarity(torch.Tensor(outputs[0]), torch.Tensor(outputs[1]), dim=1)

        print(f"得到图片相似度: {similarity_score}")

        t_cpu = similarity_score.cpu()

        # 然后提取这个值
        return t_cpu.item()

    except Exception as e:
        print(f"发生了一个错误: {e}")

        return 0.0
    finally:
        # 无论是否发生异常,都会执行此代码块
        print("这是finally块")

#  推理
def infer2(url):
    processor = AutoImageProcessor.from_pretrained('google/vit-large-patch16-224-in21k')
    model = ViTModel.from_pretrained('google/vit-large-patch16-224-in21k')
    # image_real = Image.open(requests.get(img_urls[0], stream=True).raw).convert("RGB")
    # image = Image.open(requests.get(url, stream=True).raw).convert("RGB")
    image = Image.open(url).convert('RGB')

    inputs = processor(images=image, return_tensors="pt").to(DEVICE)

    outputs = model(**inputs)
    # last_hidden_states = outputs.last_hidden_state

    return outputs.pooler_output


#  计算相似度
def infer1(image1, image2):
    try:
        embed_real = infer2(image1)
        embed_gen = infer2(image2)
        similarity_score = cosine_similarity(embed_real, embed_gen, dim=1)
        print(similarity_score)
        # 如果你想在CPU上操作这个值,你需要先将tensor移动到CPU
        t_cpu = similarity_score.cpu()

        # 然后提取这个值
        return t_cpu.item()

    except Exception as e:
        print(f"发生了一个错误: {e}")
        return 0.0
    finally:
        # 无论是否发生异常,都会执行此代码块
        print("这是finally块")