imvladikon
commited on
Commit
โข
6651605
1
Parent(s):
54fc15c
Update README.md
Browse files
README.md
CHANGED
@@ -86,6 +86,21 @@ cos_sim = nn.CosineSimilarity(dim=0, eps=1e-6)
|
|
86 |
print(cos_sim(sentence_embeddings[0], sentence_embeddings[1]).item())
|
87 |
```
|
88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
89 |
|
90 |
|
91 |
## Evaluation Results
|
|
|
86 |
print(cos_sim(sentence_embeddings[0], sentence_embeddings[1]).item())
|
87 |
```
|
88 |
|
89 |
+
```
|
90 |
+
def ppl_naive(text, model, tokenizer):
|
91 |
+
input = tokenizer.encode(text, return_tensors="pt")
|
92 |
+
loss = model(input, labels=input)[0]
|
93 |
+
return torch.exp(loss).item()
|
94 |
+
|
95 |
+
text = """{} ืืื ืขืืจ ืืืืจื ืฉื ืืืื ืช ืืฉืจืื, ืืืขืืจ ืืืืืื ืืืืชืจ ืืืฉืจืื ืืืืื ืืืืืืืกืืื"""
|
96 |
+
|
97 |
+
for word in ["ืืืคื", "ืืจืืฉืืื", "ืชื ืืืื"]:
|
98 |
+
print(ppl_naive(text.format(word), model, tokenizer))
|
99 |
+
|
100 |
+
# 10.181422233581543
|
101 |
+
# 9.743313789367676
|
102 |
+
# 10.171016693115234
|
103 |
+
```
|
104 |
|
105 |
|
106 |
## Evaluation Results
|