oddadmix commited on
Commit
ed7b702
·
verified ·
1 Parent(s): 238a080

oddadmix/masrawy-english-arabic-translator-clauda-opus-v1

Browse files
README.md ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: Helsinki-NLP/opus-mt-en-ar
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: masrawy-english-arabic-translator-clauda-opus-v1
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # masrawy-english-arabic-translator-clauda-opus-v1
15
+
16
+ This model is a fine-tuned version of [Helsinki-NLP/opus-mt-en-ar](https://huggingface.co/Helsinki-NLP/opus-mt-en-ar) on an unknown dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: 1.8534
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 2e-05
38
+ - train_batch_size: 32
39
+ - eval_batch_size: 32
40
+ - seed: 42
41
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
+ - lr_scheduler_type: linear
43
+ - num_epochs: 15
44
+
45
+ ### Training results
46
+
47
+ | Training Loss | Epoch | Step | Validation Loss |
48
+ |:-------------:|:-----:|:-----:|:---------------:|
49
+ | 2.8787 | 1.0 | 3529 | 2.7030 |
50
+ | 2.6107 | 2.0 | 7058 | 2.5211 |
51
+ | 2.4579 | 3.0 | 10587 | 2.4222 |
52
+ | 2.3906 | 4.0 | 14116 | 2.3666 |
53
+ | 2.2673 | 5.0 | 17645 | 2.3260 |
54
+ | 2.2383 | 6.0 | 21174 | 2.2936 |
55
+ | 2.1464 | 7.0 | 24703 | 2.2754 |
56
+ | 2.1304 | 8.0 | 28232 | 2.2530 |
57
+ | 2.0802 | 9.0 | 31761 | 2.2417 |
58
+ | 2.048 | 10.0 | 35290 | 2.2341 |
59
+ | 2.0323 | 11.0 | 38819 | 2.2255 |
60
+ | 2.0012 | 12.0 | 42348 | 2.2233 |
61
+ | 1.9905 | 13.0 | 45877 | 2.2193 |
62
+ | 1.9648 | 14.0 | 49406 | 2.2162 |
63
+ | 1.9716 | 15.0 | 52935 | 1.8534 |
64
+
65
+
66
+ ### Framework versions
67
+
68
+ - Transformers 4.35.2
69
+ - Pytorch 2.1.1+cu121
70
+ - Datasets 2.14.5
71
+ - Tokenizers 0.15.1
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-en-ar",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "swish",
5
+ "add_bias_logits": false,
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "MarianMTModel"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bad_words_ids": [
12
+ [
13
+ 62801
14
+ ]
15
+ ],
16
+ "bos_token_id": 0,
17
+ "classif_dropout": 0.0,
18
+ "classifier_dropout": 0.0,
19
+ "d_model": 512,
20
+ "decoder_attention_heads": 8,
21
+ "decoder_ffn_dim": 2048,
22
+ "decoder_layerdrop": 0.0,
23
+ "decoder_layers": 6,
24
+ "decoder_start_token_id": 62801,
25
+ "decoder_vocab_size": 62802,
26
+ "dropout": 0.1,
27
+ "encoder_attention_heads": 8,
28
+ "encoder_ffn_dim": 2048,
29
+ "encoder_layerdrop": 0.0,
30
+ "encoder_layers": 6,
31
+ "eos_token_id": 0,
32
+ "extra_pos_embeddings": 62802,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 62801,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.35.2",
59
+ "use_cache": true,
60
+ "vocab_size": 62802
61
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 62801
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 62801,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 62801,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.35.2"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c06045c1aa28e023e9f07d69ceb22a44c1b1ea9ebbf1c1bf81ddb84fbf1fcda
3
+ size 305452744
runs/Dec06_05-25-54_ngqm1asm95/events.out.tfevents.1733462755.ngqm1asm95.82.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:309dc6a5c60242083db0097a62839dcb02e607be07cac854b81d3fab281bc866
3
+ size 25287
runs/Dec06_11-43-39_ny9ms9c3tk/events.out.tfevents.1733485423.ny9ms9c3tk.82.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78666e48e25702053f1c971906e1816050251428b679b8e89409c1d18d3273c9
3
+ size 6335
source.spm ADDED
Binary file (801 kB). View file
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
target.spm ADDED
Binary file (917 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "62801": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "separate_vocabs": false,
33
+ "source_lang": "eng",
34
+ "sp_model_kwargs": {},
35
+ "target_lang": "ara",
36
+ "tokenizer_class": "MarianTokenizer",
37
+ "unk_token": "<unk>"
38
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e43657cb6f7277d8f711eb2f810cbc3fdc53afe035e5f42cd4193197189777d8
3
+ size 4792
vocab.json ADDED
The diff for this file is too large to render. See raw diff