Spaces:
Build error
Build error
Remove images
Browse files- README.md +4 -30
- vendiscore.py +20 -30
README.md
CHANGED
@@ -55,7 +55,7 @@ To calculate the score, pass a list of samples and a similarity function or a st
|
|
55 |
- **k**: a pairwise similarity function, or a string identifying a predefined
|
56 |
similarity function. If k is a pairwise similarity function, it should
|
57 |
be symmetric and k(x, x) = 1.
|
58 |
-
Options: ngram_overlap, text_embeddings
|
59 |
- **score_K**: if true, samples is an n x n similarity matrix K.
|
60 |
- **score_X**: if true, samples is an n x d feature matrix X.
|
61 |
- **score_dual**: if true, samples is an n x d feature matrix X and we will
|
@@ -63,20 +63,15 @@ To calculate the score, pass a list of samples and a similarity function or a st
|
|
63 |
- **normalize**: if true, normalize the similarity scores.
|
64 |
- **model (optional)**: if k is "text_embeddings", a model mapping sentences to
|
65 |
embeddings (output should be an object with an attribute called
|
66 |
-
`pooler_output` or `last_hidden_state`).
|
67 |
-
model mapping images to embeddings.
|
68 |
- **tokenizer (optional)**: if k is "text_embeddings" or "ngram_overlap", a
|
69 |
tokenizer mapping strings to lists.
|
70 |
-
- **transform (optional)**: if k is "image_embeddings", a torchvision transform
|
71 |
-
to apply to the samples.
|
72 |
- **model_path (optional)**: if k is "text_embeddings", the name of a model on
|
73 |
the HuggingFace hub.
|
74 |
- **ns (optional)**: if k is "ngram_overlap", the values of n to calculate.
|
75 |
-
- **batch_size (optional)**: batch size to use if k is "text_embedding"
|
76 |
-
"image_embedding".
|
77 |
- **device (optional)**: a string (e.g. "cuda", "cpu") or torch.device
|
78 |
-
identifying the device to use if k is "text_embedding"
|
79 |
-
or "image_embedding".
|
80 |
|
81 |
|
82 |
### Output Values
|
@@ -116,27 +111,6 @@ to compute the Vendi Score using the covariance matrix, `X @ X.T`.
|
|
116 |
{'VS': 1.99989...}
|
117 |
```
|
118 |
|
119 |
-
Image similarity can be calculated using inner products between pixel vectors or between embeddings from a neural network.
|
120 |
-
The default embeddings are from the pool-2048 layer of the torchvision version of the Inception v3 model; other embedding functions can be passed to the `model` argument.
|
121 |
-
```
|
122 |
-
>>> from torchvision import datasets
|
123 |
-
>>> mnist = datasets.MNIST("data/mnist", train=False, download=True)
|
124 |
-
>>> digits = [[x for x, y in mnist if y == c] for c in range(10)]
|
125 |
-
>>> pixel_vs = [vendiscore.compute(samples=imgs, k="pixels") for imgs in digits]
|
126 |
-
>>> inception_vs = [vendiscore.compute(samples=imgs, k="image_embeddings", batch_size=64, device="cuda") for imgs in digits]
|
127 |
-
>>> for y, (pvs, ivs) in enumerate(zip(pixel_vs, inception_vs)): print(f"{y}\t{pvs:.02f}\t{ivs:02f}")
|
128 |
-
0 7.68 3.45
|
129 |
-
1 5.31 3.50
|
130 |
-
2 12.18 3.62
|
131 |
-
3 9.97 2.97
|
132 |
-
4 11.10 3.75
|
133 |
-
5 13.51 3.16
|
134 |
-
6 9.06 3.63
|
135 |
-
7 9.58 4.07
|
136 |
-
8 9.69 3.74
|
137 |
-
9 8.56 3.43
|
138 |
-
```
|
139 |
-
|
140 |
Text similarity can be calculated using n-gram overlap or using inner products between embeddings from a neural network.
|
141 |
```
|
142 |
>>> vendiscore = evaluate.load("danf0/vendiscore", "text")
|
|
|
55 |
- **k**: a pairwise similarity function, or a string identifying a predefined
|
56 |
similarity function. If k is a pairwise similarity function, it should
|
57 |
be symmetric and k(x, x) = 1.
|
58 |
+
Options: ngram_overlap, text_embeddings.
|
59 |
- **score_K**: if true, samples is an n x n similarity matrix K.
|
60 |
- **score_X**: if true, samples is an n x d feature matrix X.
|
61 |
- **score_dual**: if true, samples is an n x d feature matrix X and we will
|
|
|
63 |
- **normalize**: if true, normalize the similarity scores.
|
64 |
- **model (optional)**: if k is "text_embeddings", a model mapping sentences to
|
65 |
embeddings (output should be an object with an attribute called
|
66 |
+
`pooler_output` or `last_hidden_state`).
|
|
|
67 |
- **tokenizer (optional)**: if k is "text_embeddings" or "ngram_overlap", a
|
68 |
tokenizer mapping strings to lists.
|
|
|
|
|
69 |
- **model_path (optional)**: if k is "text_embeddings", the name of a model on
|
70 |
the HuggingFace hub.
|
71 |
- **ns (optional)**: if k is "ngram_overlap", the values of n to calculate.
|
72 |
+
- **batch_size (optional)**: batch size to use if k is "text_embedding".
|
|
|
73 |
- **device (optional)**: a string (e.g. "cuda", "cpu") or torch.device
|
74 |
+
identifying the device to use if k is "text_embedding".
|
|
|
75 |
|
76 |
|
77 |
### Output Values
|
|
|
111 |
{'VS': 1.99989...}
|
112 |
```
|
113 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
Text similarity can be calculated using n-gram overlap or using inner products between embeddings from a neural network.
|
115 |
```
|
116 |
>>> vendiscore = evaluate.load("danf0/vendiscore", "text")
|
vendiscore.py
CHANGED
@@ -14,10 +14,8 @@
|
|
14 |
import evaluate
|
15 |
import datasets
|
16 |
import numpy as np
|
17 |
-
import PIL
|
18 |
-
from PIL import Image
|
19 |
|
20 |
-
from vendi_score import vendi,
|
21 |
|
22 |
# TODO: Add BibTeX citation
|
23 |
_CITATION = ""
|
@@ -36,30 +34,26 @@ Args:
|
|
36 |
matrix K, or an n x d feature matrix X.
|
37 |
k: a pairwise similarity function, or a string identifying a predefined
|
38 |
similarity function.
|
39 |
-
Options: ngram_overlap, text_embeddings
|
40 |
score_K: if true, samples is an n x n similarity matrix K.
|
41 |
score_X: if true, samples is an n x d feature matrix X.
|
42 |
score_dual: if true, compute diversity score of X @ X.T.
|
43 |
normalize: if true, normalize the similarity scores.
|
44 |
model (optional): if k is "text_embeddings", a model mapping sentences to
|
45 |
embeddings (output should be an object with an attribute called
|
46 |
-
`pooler_output` or `last_hidden_state`).
|
47 |
-
model mapping images to embeddings.
|
48 |
tokenizer (optional): if k is "text_embeddings" or "ngram_overlap", a
|
49 |
tokenizer mapping strings to lists.
|
50 |
-
transform (optional): if k is "image_embeddings", a torchvision transform
|
51 |
-
to apply to the samples.
|
52 |
model_path (optional): if k is "text_embeddings", the name of a model on the
|
53 |
HuggingFace hub.
|
54 |
ns (optional): if k is "ngram_overlap", the values of n to calculate.
|
55 |
-
batch_size (optional): batch size to use if k is "text_embedding"
|
56 |
-
"image_embedding".
|
57 |
device (optional): a string (e.g. "cuda", "cpu") or torch.device identifying
|
58 |
-
the device to use if k is "text_embedding
|
59 |
Returns:
|
60 |
VS: The Vendi Score.
|
61 |
Examples:
|
62 |
-
>>> vendiscore = evaluate.load("danf0/vendiscore")
|
63 |
>>> samples = ["Look, Jane.",
|
64 |
"See Spot.",
|
65 |
"See Spot run.",
|
@@ -74,11 +68,8 @@ Examples:
|
|
74 |
def get_features(config_name):
|
75 |
if config_name in ("text", "default"):
|
76 |
return datasets.Features({"samples": datasets.Value("string")})
|
77 |
-
if config_name == "image":
|
78 |
-
|
79 |
-
datasets.Features({"samples": datasets.Array2D}),
|
80 |
-
datasets.Features({"samples": datasets.Array3D}),
|
81 |
-
]
|
82 |
if config_name in ("K", "X"):
|
83 |
return [
|
84 |
datasets.Features(
|
@@ -130,7 +121,6 @@ class VendiScore(evaluate.Metric):
|
|
130 |
normalize=False,
|
131 |
model=None,
|
132 |
tokenizer=None,
|
133 |
-
transform=None,
|
134 |
model_path=None,
|
135 |
ns=[1, 2],
|
136 |
batch_size=16,
|
@@ -155,18 +145,18 @@ class VendiScore(evaluate.Metric):
|
|
155 |
device=device,
|
156 |
model_path=model_path,
|
157 |
)
|
158 |
-
elif type(k) == str and k == "pixels":
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
elif type(k) == str and k == "image_embeddings":
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
else:
|
171 |
vs = vendi.score(samples, k)
|
172 |
return {"VS": vs}
|
|
|
14 |
import evaluate
|
15 |
import datasets
|
16 |
import numpy as np
|
|
|
|
|
17 |
|
18 |
+
from vendi_score import vendi, text_utils
|
19 |
|
20 |
# TODO: Add BibTeX citation
|
21 |
_CITATION = ""
|
|
|
34 |
matrix K, or an n x d feature matrix X.
|
35 |
k: a pairwise similarity function, or a string identifying a predefined
|
36 |
similarity function.
|
37 |
+
Options: ngram_overlap, text_embeddings.
|
38 |
score_K: if true, samples is an n x n similarity matrix K.
|
39 |
score_X: if true, samples is an n x d feature matrix X.
|
40 |
score_dual: if true, compute diversity score of X @ X.T.
|
41 |
normalize: if true, normalize the similarity scores.
|
42 |
model (optional): if k is "text_embeddings", a model mapping sentences to
|
43 |
embeddings (output should be an object with an attribute called
|
44 |
+
`pooler_output` or `last_hidden_state`).
|
|
|
45 |
tokenizer (optional): if k is "text_embeddings" or "ngram_overlap", a
|
46 |
tokenizer mapping strings to lists.
|
|
|
|
|
47 |
model_path (optional): if k is "text_embeddings", the name of a model on the
|
48 |
HuggingFace hub.
|
49 |
ns (optional): if k is "ngram_overlap", the values of n to calculate.
|
50 |
+
batch_size (optional): batch size to use if k is "text_embedding".
|
|
|
51 |
device (optional): a string (e.g. "cuda", "cpu") or torch.device identifying
|
52 |
+
the device to use if k is "text_embedding".
|
53 |
Returns:
|
54 |
VS: The Vendi Score.
|
55 |
Examples:
|
56 |
+
>>> vendiscore = evaluate.load("danf0/vendiscore", "text")
|
57 |
>>> samples = ["Look, Jane.",
|
58 |
"See Spot.",
|
59 |
"See Spot run.",
|
|
|
68 |
def get_features(config_name):
|
69 |
if config_name in ("text", "default"):
|
70 |
return datasets.Features({"samples": datasets.Value("string")})
|
71 |
+
# if config_name == "image":
|
72 |
+
# return datasets.Features({"samples": datasets.Image})
|
|
|
|
|
|
|
73 |
if config_name in ("K", "X"):
|
74 |
return [
|
75 |
datasets.Features(
|
|
|
121 |
normalize=False,
|
122 |
model=None,
|
123 |
tokenizer=None,
|
|
|
124 |
model_path=None,
|
125 |
ns=[1, 2],
|
126 |
batch_size=16,
|
|
|
145 |
device=device,
|
146 |
model_path=model_path,
|
147 |
)
|
148 |
+
# elif type(k) == str and k == "pixels":
|
149 |
+
# vs = image_utils.pixel_vendi_score(
|
150 |
+
# [Image.fromarray(x) for x in samples]
|
151 |
+
# )
|
152 |
+
# elif type(k) == str and k == "image_embeddings":
|
153 |
+
# vs = image_utils.embedding_vendi_score(
|
154 |
+
# [Image.fromarray(x) for x in samples],
|
155 |
+
# batch_size=batch_size,
|
156 |
+
# device=device,
|
157 |
+
# model=model,
|
158 |
+
# transform=transform,
|
159 |
+
# )
|
160 |
else:
|
161 |
vs = vendi.score(samples, k)
|
162 |
return {"VS": vs}
|