fix: rename
Browse files
README.md
CHANGED
@@ -2912,9 +2912,9 @@ Trained on the [Nomic Embed](https://arxiv.org/abs/2402.01613) weakly-supervised
|
|
2912 |
|-----------------------|------------|--------------|---------------------|-----------------|-------------------------|---------------|----------------|-----------|------------------|
|
2913 |
| nomic-embed-text-v1 | 768 | 62.4 | 74.1 | 43.9 | **85.2** | 55.7 | 52.8 | 82.1 | 30.1 |
|
2914 |
| nomic-embed-text-v1.5 | 768 | 62.28 | 73.55 | 43.93 | 84.61 | 55.78 | **53.01** | **81.94** | 30.4 |
|
2915 |
-
| modernbert-embed | 768 | **62.62** | **74.31** | **44.98** | 83.96 | **56.42** | 52.89 | 81.78 | **31.39** |
|
2916 |
-
| nomic-embed-text-v1.5 | 256 | 61.04 | 72.1 | 43.16 | 84.09 | 55.18 | 50.81 | 81.34 |
|
2917 |
-
| modernbert-embed | 256 | 61.17 | 72.40 | 43.82 | 83.45 | 55.69 | 50.62 | 81.12 | 31.27 |
|
2918 |
|
2919 |
|
2920 |
|
@@ -2935,7 +2935,7 @@ Most use cases, adding `search_query: ` to the query and `search_document: ` to
|
|
2935 |
```python
|
2936 |
from sentence_transformers import SentenceTransformer
|
2937 |
|
2938 |
-
model = SentenceTransformer("nomic-ai/modernbert-embed")
|
2939 |
|
2940 |
query_embeddings = model.encode([
|
2941 |
"search_query: What is TSNE?",
|
@@ -2960,7 +2960,7 @@ In Sentence Transformers, you can truncate embeddings to a smaller dimension by
|
|
2960 |
```python
|
2961 |
from sentence_transformers import SentenceTransformer
|
2962 |
|
2963 |
-
model = SentenceTransformer("nomic-ai/modernbert-embed", truncate_dim=256)
|
2964 |
|
2965 |
query_embeddings = model.encode([
|
2966 |
"search_query: What is TSNE?",
|
@@ -3003,8 +3003,8 @@ def mean_pooling(model_output, attention_mask):
|
|
3003 |
queries = ["search_query: What is TSNE?", "search_query: Who is Laurens van der Maaten?"]
|
3004 |
documents = ["search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten"]
|
3005 |
|
3006 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
3007 |
-
model = AutoModel.from_pretrained("
|
3008 |
|
3009 |
encoded_queries = tokenizer(queries, padding=True, truncation=True, return_tensors="pt")
|
3010 |
encoded_documents = tokenizer(documents, padding=True, truncation=True, return_tensors="pt")
|
|
|
2912 |
|-----------------------|------------|--------------|---------------------|-----------------|-------------------------|---------------|----------------|-----------|------------------|
|
2913 |
| nomic-embed-text-v1 | 768 | 62.4 | 74.1 | 43.9 | **85.2** | 55.7 | 52.8 | 82.1 | 30.1 |
|
2914 |
| nomic-embed-text-v1.5 | 768 | 62.28 | 73.55 | 43.93 | 84.61 | 55.78 | **53.01** | **81.94** | 30.4 |
|
2915 |
+
| modernbert-embed-base | 768 | **62.62** | **74.31** | **44.98** | 83.96 | **56.42** | 52.89 | 81.78 | **31.39** |
|
2916 |
+
| nomic-embed-text-v1.5 | 256 | 61.04 | 72.1 | 43.16 | 84.09 | 55.18 | 50.81 | 81.34 | 30.05 |
|
2917 |
+
| modernbert-embed-base | 256 | 61.17 | 72.40 | 43.82 | 83.45 | 55.69 | 50.62 | 81.12 | 31.27 |
|
2918 |
|
2919 |
|
2920 |
|
|
|
2935 |
```python
|
2936 |
from sentence_transformers import SentenceTransformer
|
2937 |
|
2938 |
+
model = SentenceTransformer("nomic-ai/modernbert-embed-base")
|
2939 |
|
2940 |
query_embeddings = model.encode([
|
2941 |
"search_query: What is TSNE?",
|
|
|
2960 |
```python
|
2961 |
from sentence_transformers import SentenceTransformer
|
2962 |
|
2963 |
+
model = SentenceTransformer("nomic-ai/modernbert-embed-base", truncate_dim=256)
|
2964 |
|
2965 |
query_embeddings = model.encode([
|
2966 |
"search_query: What is TSNE?",
|
|
|
3003 |
queries = ["search_query: What is TSNE?", "search_query: Who is Laurens van der Maaten?"]
|
3004 |
documents = ["search_document: TSNE is a dimensionality reduction algorithm created by Laurens van Der Maaten"]
|
3005 |
|
3006 |
+
tokenizer = AutoTokenizer.from_pretrained("nomic-ai/modernbert-embed-base")
|
3007 |
+
model = AutoModel.from_pretrained("nomic-ai/modernbert-embed-base")
|
3008 |
|
3009 |
encoded_queries = tokenizer(queries, padding=True, truncation=True, return_tensors="pt")
|
3010 |
encoded_documents = tokenizer(documents, padding=True, truncation=True, return_tensors="pt")
|