vazish commited on
Commit
922aad2
·
unverified ·
1 Parent(s): dc8fa51

update model

Browse files
README.md CHANGED
@@ -3,7 +3,7 @@ language: en
3
  thumbnail: https://huggingface.co/front/thumbnails/google.png
4
  license: apache-2.0
5
  base_model:
6
- - cross-encoder/ms-marco-TinyBERT-L-2-v2
7
  pipeline_tag: text-classification
8
  library_name: transformers
9
  metrics:
@@ -16,7 +16,7 @@ datasets:
16
 
17
  ## Cross-Encoder for MS Marco with TinyBert
18
 
19
- This is a fine-tuned version of the model checkpointed at [cross-encoder/ms-marco-TinyBert-L-2-v2](https://huggingface.co/cross-encoder/ms-marco-TinyBERT-L-2-v2).
20
 
21
  It was fine-tuned on html tags and labels generated using [Fathom](https://mozilla.github.io/fathom/commands/label.html).
22
 
@@ -31,18 +31,18 @@ classifier = pipeline(
31
  )
32
 
33
  print(
34
- classifier('<input class="cc-number" placeholder="Enter credit card number..." />')
35
  )
36
 
37
  ```
38
 
39
  ## Model Training Info
40
  ```python
41
- HyperParameters: {
42
- 'learning_rate': 0.000082,
43
- 'num_train_epochs': 71,
44
- 'weight_decay': 0.1,
45
- 'per_device_train_batch_size': 32,
46
  }
47
  ```
48
 
 
3
  thumbnail: https://huggingface.co/front/thumbnails/google.png
4
  license: apache-2.0
5
  base_model:
6
+ - cross-encoder/ms-marco-MiniLM-L-4-v2
7
  pipeline_tag: text-classification
8
  library_name: transformers
9
  metrics:
 
16
 
17
  ## Cross-Encoder for MS Marco with TinyBert
18
 
19
+ This is a fine-tuned version of the model checkpointed at [cross-encoder/ms-marco-MiniLM-L-4-v2](https://huggingface.co/cross-encoder/ms-marco-MiniLM-L-4-v2).
20
 
21
  It was fine-tuned on html tags and labels generated using [Fathom](https://mozilla.github.io/fathom/commands/label.html).
22
 
 
31
  )
32
 
33
  print(
34
+ classifier('Card information input Card number cc-number <SEP> <SEP> input First name <SEP> <SEP>')
35
  )
36
 
37
  ```
38
 
39
  ## Model Training Info
40
  ```python
41
+ HyperParameters = {
42
+ 'learning_rate': 2.3878733582558547e-05,
43
+ 'num_train_epochs': 21,
44
+ 'weight_decay': 0.0005288040458920454,
45
+ 'per_device_train_batch_size': 32
46
  }
47
  ```
48
 
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "_attn_implementation_autoset": true,
3
- "_name_or_path": "/content/cross-encoder/ms-marco-TinyBERT-L-2-v2",
4
  "architectures": [
5
  "BertForSequenceClassification"
6
  ],
@@ -9,47 +9,69 @@
9
  "gradient_checkpointing": false,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
- "hidden_size": 128,
13
  "id2label": {
14
- "0": "cc-csc",
15
- "1": "cc-exp",
16
- "2": "cc-exp-month",
17
- "3": "cc-exp-year",
18
- "4": "cc-name",
19
- "5": "cc-number",
20
- "6": "cc-type",
21
- "7": "email",
22
- "8": "given-name",
23
- "9": "last-name",
24
- "10": "other",
25
- "11": "postal-code"
 
 
 
 
 
 
 
 
 
 
 
26
  },
27
  "initializer_range": 0.02,
28
- "intermediate_size": 512,
29
  "label2id": {
30
- "cc-csc": 0,
31
- "cc-exp": 1,
32
- "cc-exp-month": 2,
33
- "cc-exp-year": 3,
34
- "cc-name": 4,
35
- "cc-number": 5,
36
- "cc-type": 6,
37
- "email": 7,
38
- "given-name": 8,
39
- "last-name": 9,
40
- "other": 10,
41
- "postal-code": 11
 
 
 
 
 
 
 
 
 
 
 
42
  },
43
  "layer_norm_eps": 1e-12,
44
  "max_position_embeddings": 512,
45
  "model_type": "bert",
46
- "num_attention_heads": 2,
47
- "num_hidden_layers": 2,
48
  "pad_token_id": 0,
49
  "position_embedding_type": "absolute",
50
  "problem_type": "single_label_classification",
51
  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
52
- "transformers_version": "4.46.3",
53
  "type_vocab_size": 2,
54
  "use_cache": true,
55
  "vocab_size": 30522
 
1
  {
2
  "_attn_implementation_autoset": true,
3
+ "_name_or_path": "/content/cross-encoder/ms-marco-MiniLM-L-4-v2",
4
  "architectures": [
5
  "BertForSequenceClassification"
6
  ],
 
9
  "gradient_checkpointing": false,
10
  "hidden_act": "gelu",
11
  "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 384,
13
  "id2label": {
14
+ "0": "address-level1",
15
+ "1": "address-level2",
16
+ "2": "address-line1",
17
+ "3": "address-line2",
18
+ "4": "cc-csc",
19
+ "5": "cc-exp",
20
+ "6": "cc-exp-month",
21
+ "7": "cc-exp-year",
22
+ "8": "cc-name",
23
+ "9": "cc-number",
24
+ "10": "cc-type",
25
+ "11": "country-name",
26
+ "12": "current-password",
27
+ "13": "email",
28
+ "14": "family-name",
29
+ "15": "given-name",
30
+ "16": "name",
31
+ "17": "new-password",
32
+ "18": "organization",
33
+ "19": "other",
34
+ "20": "postal-code",
35
+ "21": "street-address",
36
+ "22": "tel"
37
  },
38
  "initializer_range": 0.02,
39
+ "intermediate_size": 1536,
40
  "label2id": {
41
+ "address-level1": 0,
42
+ "address-level2": 1,
43
+ "address-line1": 2,
44
+ "address-line2": 3,
45
+ "cc-csc": 4,
46
+ "cc-exp": 5,
47
+ "cc-exp-month": 6,
48
+ "cc-exp-year": 7,
49
+ "cc-name": 8,
50
+ "cc-number": 9,
51
+ "cc-type": 10,
52
+ "country-name": 11,
53
+ "current-password": 12,
54
+ "email": 13,
55
+ "family-name": 14,
56
+ "given-name": 15,
57
+ "name": 16,
58
+ "new-password": 17,
59
+ "organization": 18,
60
+ "other": 19,
61
+ "postal-code": 20,
62
+ "street-address": 21,
63
+ "tel": 22
64
  },
65
  "layer_norm_eps": 1e-12,
66
  "max_position_embeddings": 512,
67
  "model_type": "bert",
68
+ "num_attention_heads": 12,
69
+ "num_hidden_layers": 4,
70
  "pad_token_id": 0,
71
  "position_embedding_type": "absolute",
72
  "problem_type": "single_label_classification",
73
  "sbert_ce_default_activation_function": "torch.nn.modules.linear.Identity",
74
+ "transformers_version": "4.46.1",
75
  "type_vocab_size": 2,
76
  "use_cache": true,
77
  "vocab_size": 30522
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d095f0e0dd6ba39462fec2067c71b4e27fe1808abf0db79114f98a8ac3acb5b2
3
- size 17554472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:340a1f388c0a35abde4edc524be81f4cdd9acdbb8b3e7430fae31068abda9c3e
3
+ size 76700884
onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da31488b459a7851731ec263425d7af53b51bb497487fc76922677d8aea111f7
3
- size 17612158
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e1443fbd2a9ac7862468ce3b6433442e657ab3d44e3b0ea3b06e14bc113ae66
3
+ size 76801703
onnx/model_bnb4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef8c27f9dbaf27ba39d979442a822c1ff4051bfcf9ba32d535618a784c4cf13
3
- size 16262309
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62fe679f535a90b246c2531e2e349fd4c3f3fe13e8dca02e8d9b488e0432b191
3
+ size 52475154
onnx/model_fp16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70dc3df0f2c9420f8092090a51f73a83a5e535e65025533e02c1feb6a1e3d054
3
- size 8837300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b8b969f498202e7d9923addb97d1d9725d3ad4d6f6184f6bd23927b41d4cb12
3
+ size 38455479
onnx/model_int8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe3aaada3f9b2699f57dc534d35a9e5882de6cd2ab0ae25c5b915cb335befcb0
3
- size 4479682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe8711f9b3897dbb10439e7c72def73dca020acd48e4feb3bcf5a1362c7d9fa
3
+ size 19451059
onnx/model_q4.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d2441385e5c3dc7e9a2639e58c335011152a757e8a4cf098ca2fb368cbdc92f
3
- size 16286797
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a57408bc6b196c2251fbd7cd7aae3fcdbe9152d1477c38df111b9743c301b838
3
+ size 52917370
onnx/model_q4f16.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2281c5e2b61e2bc7d7c0cf4cb2a392de10d2c2e81e25c94ef82a77efeee1ebf
3
- size 8273795
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0933fd729d3dceea9166ec135818d71b06933599b7942e7efb4cb0980368f832
3
+ size 28284538
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe3aaada3f9b2699f57dc534d35a9e5882de6cd2ab0ae25c5b915cb335befcb0
3
- size 4479682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbe8711f9b3897dbb10439e7c72def73dca020acd48e4feb3bcf5a1362c7d9fa
3
+ size 19451059
onnx/model_uint8.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175b941a81aac05aaae2cdb0aed7755f6c7d0efacb13ca682f2d0e49e021a900
3
- size 4479682
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:669c6a29cb8f05c079eb28e831c10c0f7c2c99f9f5d9ac45b4f1a7e0f2cf5495
3
+ size 19451056
quantize_config.json CHANGED
@@ -8,11 +8,10 @@
8
  "q4f16",
9
  "bnb4"
10
  ],
11
- "per_channel": false,
12
  "reduce_range": true,
13
  "block_size": null,
14
  "is_symmetric": true,
15
  "accuracy_level": null,
16
- "quant_type": 1,
17
- "op_block_list": null
18
  }
 
8
  "q4f16",
9
  "bnb4"
10
  ],
11
+ "per_channel": true,
12
  "reduce_range": true,
13
  "block_size": null,
14
  "is_symmetric": true,
15
  "accuracy_level": null,
16
+ "quant_type": 1
 
17
  }
tokenizer_config.json CHANGED
@@ -45,6 +45,7 @@
45
  "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
 
48
  "mask_token": "[MASK]",
49
  "max_length": 512,
50
  "model_max_length": 512,
 
45
  "cls_token": "[CLS]",
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
  "max_length": 512,
51
  "model_max_length": 512,