andorei commited on
Commit
07d5a36
1 Parent(s): 58ca07a

Upload 7 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/etutubalina/graph_entity_linking/huggingface_models/xlm-roberta-base",
3
+ "architectures": [
4
+ "XLMRobertaModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "xlm-roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.24.0",
25
+ "type_vocab_size": 1,
26
+ "use_cache": true,
27
+ "vocab_size": 250002
28
+ }
model_description.tsv ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ train_dir /home/etutubalina/graph_entity_linking/pos_pairs_graph_data/2020AB/ENG_SPA_POR_FRE_JPN_RUS_DUT_GER_ITA_CZE_SWE_KOR_LAV_HUN_CHI_NOR_POL_TUR_EST_FIN_SCR_UKR_GRE_DAN_BAQ_HEB_MULTILINGUAL_ALL_LANGUAGES_MAP_TO_ENG_20_20_20_FULL
2
+ validate False
3
+ output_dir /home/etutubalina/graph_entity_linking/results/pretrained_graphsapbert/2020AB/768_0.2_FINAL_NEIGHBORS_MS_LOSS_ALL_LANGUAGES/GAT_DGI_MULTILINGUAL_NO_LOOPS_MAP_ENG_20_20_20/
4
+ gat_num_outer_layers 1
5
+ gat_num_inner_layers 3
6
+ gat_num_hidden_channels 768
7
+ gat_num_neighbors [3]
8
+ gat_num_att_heads 2
9
+ gat_dropout_p 0.3
10
+ gat_attention_dropout_p 0.1
11
+ gat_use_relational_features False
12
+ use_rel_or_rela rel
13
+ graph_loss_weight 1.0
14
+ dgi_loss_weight 0.1
15
+ remove_selfloops True
16
+ text_loss_weight 1.0
17
+ intermodal_loss_weight 1.0
18
+ use_intermodal_miner True
19
+ intermodal_miner_margin 0.2
20
+ freeze_neighbors False
21
+ apply_text_loss_to_all_neighbors False
22
+ modality_distance sapbert
23
+ intermodal_loss_type sapbert
24
+ intermodal_strategy None
25
+ use_detached_text False
26
+ remove_activations False
27
+ common_hard_pairs False
28
+ fuse_unimodal_embeddings False
29
+ cross_fusion False
30
+ inmodal_fusion False
31
+ global_fusion False
32
+ fusion_text_weight None
33
+ max_length 32
34
+ use_cuda True
35
+ learning_rate 2e-05
36
+ weight_decay 0.01
37
+ batch_size 256
38
+ num_epochs 1
39
+ amp True
40
+ parallel True
41
+ random_seed 42
42
+ loss ms_loss
43
+ use_miner True
44
+ miner_margin 0.2
45
+ type_of_triplets all
46
+ agg_mode cls
47
+ text_encoder /home/etutubalina/graph_entity_linking/huggingface_models/xlm-roberta-base/
48
+ dataloader_num_workers 0
49
+ save_every_N_epoch 1
50
+ model_checkpoint_path None
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f2bdac1ad7417622e153e86efe3e0bbe2675e65b8c7a6006cd22c424b188df
3
+ size 1112241265
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
3
+ size 5069051
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c24cdc13d4c9952d63718d6c9fa4c287974249e16b7ade6d5a85e7bbb75626
3
+ size 17082660
tokenizer_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "__type": "AddedToken",
7
+ "content": "<mask>",
8
+ "lstrip": true,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false
12
+ },
13
+ "model_max_length": 512,
14
+ "name_or_path": "/home/etutubalina/graph_entity_linking/huggingface_models/xlm-roberta-base",
15
+ "pad_token": "<pad>",
16
+ "sep_token": "</s>",
17
+ "special_tokens_map_file": null,
18
+ "tokenizer_class": "XLMRobertaTokenizer",
19
+ "unk_token": "<unk>"
20
+ }