tomaarsen HF staff commited on
Commit
a3ac84f
·
verified ·
1 Parent(s): ba33022

Add new SentenceTransformer model.

Browse files
README.md CHANGED
@@ -1,4 +1,5 @@
1
  ---
 
2
  pipeline_tag: sentence-similarity
3
  tags:
4
  - sentence-transformers
@@ -8,7 +9,7 @@ tags:
8
 
9
  ---
10
 
11
- # sentence-transformers-testing/stsb-bert-tiny
12
 
13
  This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 128 dimensional dense vector space and can be used for tasks like clustering or semantic search.
14
 
@@ -28,7 +29,7 @@ Then you can use the model like this:
28
  from sentence_transformers import SentenceTransformer
29
  sentences = ["This is an example sentence", "Each sentence is converted"]
30
 
31
- model = SentenceTransformer('sentence-transformers-testing/stsb-bert-tiny')
32
  embeddings = model.encode(sentences)
33
  print(embeddings)
34
  ```
@@ -54,8 +55,8 @@ def mean_pooling(model_output, attention_mask):
54
  sentences = ['This is an example sentence', 'Each sentence is converted']
55
 
56
  # Load model from HuggingFace Hub
57
- tokenizer = AutoTokenizer.from_pretrained('sentence-transformers-testing/stsb-bert-tiny')
58
- model = AutoModel.from_pretrained('sentence-transformers-testing/stsb-bert-tiny')
59
 
60
  # Tokenize sentences
61
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
@@ -77,7 +78,7 @@ print(sentence_embeddings)
77
 
78
  <!--- Describe how your model was evaluated -->
79
 
80
- For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=sentence-transformers-testing/stsb-bert-tiny)
81
 
82
 
83
  ## Training
@@ -97,13 +98,13 @@ The model was trained with the parameters:
97
  Parameters of the fit()-Method:
98
  ```
99
  {
100
- "epochs": 1,
101
  "evaluation_steps": 1000,
102
  "evaluator": "NoneType",
103
  "max_grad_norm": 1,
104
  "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
105
  "optimizer_params": {
106
- "lr": 2e-05
107
  },
108
  "scheduler": "WarmupLinear",
109
  "steps_per_epoch": null,
 
1
  ---
2
+ library_name: sentence-transformers
3
  pipeline_tag: sentence-similarity
4
  tags:
5
  - sentence-transformers
 
9
 
10
  ---
11
 
12
+ # sentence-transformers-testing/stsb-bert-tiny-safetensors
13
 
14
  This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 128 dimensional dense vector space and can be used for tasks like clustering or semantic search.
15
 
 
29
  from sentence_transformers import SentenceTransformer
30
  sentences = ["This is an example sentence", "Each sentence is converted"]
31
 
32
+ model = SentenceTransformer('sentence-transformers-testing/stsb-bert-tiny-safetensors')
33
  embeddings = model.encode(sentences)
34
  print(embeddings)
35
  ```
 
55
  sentences = ['This is an example sentence', 'Each sentence is converted']
56
 
57
  # Load model from HuggingFace Hub
58
+ tokenizer = AutoTokenizer.from_pretrained('sentence-transformers-testing/stsb-bert-tiny-safetensors')
59
+ model = AutoModel.from_pretrained('sentence-transformers-testing/stsb-bert-tiny-safetensors')
60
 
61
  # Tokenize sentences
62
  encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
 
78
 
79
  <!--- Describe how your model was evaluated -->
80
 
81
+ For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=sentence-transformers-testing/stsb-bert-tiny-safetensors)
82
 
83
 
84
  ## Training
 
98
  Parameters of the fit()-Method:
99
  ```
100
  {
101
+ "epochs": 10,
102
  "evaluation_steps": 1000,
103
  "evaluator": "NoneType",
104
  "max_grad_norm": 1,
105
  "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
106
  "optimizer_params": {
107
+ "lr": 8e-05
108
  },
109
  "scheduler": "WarmupLinear",
110
  "steps_per_epoch": null,
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "torch_dtype": "float32",
21
- "transformers_version": "4.33.0",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 30522
 
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
20
  "torch_dtype": "float32",
21
+ "transformers_version": "4.36.2",
22
  "type_vocab_size": 2,
23
  "use_cache": true,
24
  "vocab_size": 30522
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.2.2",
4
- "transformers": "4.33.0",
5
  "pytorch": "2.1.0+cu121"
6
  }
7
  }
 
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.2.2",
4
+ "transformers": "4.36.2",
5
  "pytorch": "2.1.0+cu121"
6
  }
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3642e25e2379761ee8b18ee545a80506a0d12dbe1499d7312a809865a9b00aec
3
  size 17547912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57380d8465cb456819716ab92ba12933f8e9142ae5f930ba18ca830e9333af2
3
  size 17547912
tokenizer_config.json CHANGED
@@ -1,4 +1,46 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "clean_up_tokenization_spaces": true,
3
  "cls_token": "[CLS]",
4
  "do_basic_tokenize": true,
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,