Spaces:

bkhmsi
/

Partial-Arabic-Diacritization

Running

App Files Files Community

Partial-Arabic-Diacritization / config.yaml

bkhmsi

support for TD2

d7c4b94 10 months ago

raw

history blame

2.22 kB

	run-title: tashkeela-d2
	debug: false
	model-name: TD2

	paths:
	base: ./dataset/ashaar
	save: ./models
	load: tashkeela-d2.pt
	load-td2: td2/tashkeela-ashaar-td2.pt
	resume: ./models/Tashkeela-D2/tashkeela-d2.pt
	constants: ./dataset/helpers/constants
	word-embs: vocab.vec
	test: test

	modeling:
	"checkpoint": munael/Partial-Arabic-Diacritization-TD2
	"base_model": CAMeL-Lab/bert-base-arabic-camelbert-mix-ner
	# "base_model": UBC-NLP/MARBERTv2
	# "base_model": UBC-NLP/ARBERTv2
	"deep-cls": true
	"full-finetune": true #< From true
	"keep-token-model-layers": 2
	# "num-finetune-last-layers": 2 #
	"num-chars": 40
	"char-embed-dim": 128
	"token_hidden_size": 768
	"deep-down-proj": true
	"dropout": 0.2
	"sentence_dropout": 0.1
	"diac_model_config": {
	"vocab_size": 1,
	"num_hidden_layers": 2,
	"hidden_size": 768,
	"intermediate_size": 2304,
	"num_attention_heads": 8,
	}

	loader:
	wembs-limit: -1
	num-workers: 0

	train:
	epochs: 1000
	batch-size: 1
	char-embed-dim: 32
	resume: false
	resume-lr: false

	max-word-len: 13
	max-sent-len: 10

	rnn-cell: lstm
	sent-lstm-layers: 2
	word-lstm-layers: 2

	sent-lstm-units: 256
	word-lstm-units: 512
	decoder-units: 256

	sent-dropout: 0.2
	diac-dropout: 0
	final-dropout: 0.2

	sent-mask-zero: false

	lr-factor: 0.5
	lr-patience: 1
	lr-min: 1.e-7
	lr-init: 0.002

	weight-decay: 0
	vertical-dropout: 0.25
	recurrent-dropout: 0.25

	stopping-delta: 1.e-7
	stopping-patience: 3

	predictor:
	batch-size: 1
	stride: 2
	window: 20
	gt-signal-prob: 0
	seed-idx: 0

	sentence-break:
	stride: 2
	window: 10
	min-window: 1
	export-map: false
	files:
	- train/train.txt
	- val/val.txt
	delimeters:
	- ،
	- ؛
	- ','
	- ;
	- «
	- »
	- '{'
	- '}'
	- '('
	- ')'
	- '['
	- ']'
	- '.'
	- '*'
	- '-'
	- ':'
	- '?'
	- '!'
	- ؟


	segment:
	stride: 2
	window: 10
	min-window: 1
	export-map: false
	files:
	- train/train.txt
	- val/val.txt
	delimeters:
	- ،
	- ؛
	- ','
	- ;
	- «
	- »
	- '{'
	- '}'
	- '('
	- ')'
	- '['
	- ']'
	- '.'
	- '*'
	- '-'
	- ':'
	- '?'
	- '!'
	- ؟