|
--- |
|
license: apache-2.0 |
|
--- |
|
### Deprem NER Training Results |
|
|
|
``` |
|
precision recall f1-score support |
|
|
|
0 0.85 0.91 0.88 734 |
|
1 0.77 0.84 0.80 207 |
|
2 0.71 0.88 0.79 130 |
|
3 0.68 0.76 0.72 94 |
|
4 0.80 0.85 0.82 362 |
|
5 0.63 0.59 0.61 112 |
|
6 0.73 0.82 0.77 108 |
|
7 0.55 0.77 0.64 78 |
|
8 0.65 0.71 0.68 31 |
|
9 0.70 0.85 0.76 117 |
|
|
|
micro avg 0.77 0.85 0.81 1973 |
|
macro avg 0.71 0.80 0.75 1973 |
|
weighted avg 0.77 0.85 0.81 1973 |
|
samples avg 0.82 0.87 0.83 1973 |
|
``` |
|
|
|
### Preprocessing Funcs |
|
``` |
|
tr_stopwords = stopwords.words('turkish') |
|
tr_stopwords.append("hic") |
|
tr_stopwords.append("dm") |
|
tr_stopwords.append("vs") |
|
tr_stopwords.append("ya") |
|
|
|
def remove_punct(tok): |
|
tok = re.sub(r'[^\w\s]', '', tok) |
|
return tok |
|
|
|
def normalize(tok): |
|
if tok.isdigit(): |
|
tok = "digit" |
|
return tok |
|
|
|
def clean(tok): |
|
tok = remove_punct(tok) |
|
tok = normalize(tok) |
|
|
|
return tok |
|
|
|
def exceptions(tok): |
|
if not tok.isdigit() and len(tok)==1: |
|
return False |
|
|
|
if not tok: |
|
return False |
|
|
|
if tok in tr_stopwords: |
|
return False |
|
|
|
if tok.startswith('#') or tok.startswith("@"): |
|
return False |
|
|
|
return True |
|
|
|
|
|
sm_tok = lambda text: [clean(tok) for tok in text.split(" ") if exceptions(tok)] |
|
``` |
|
|
|
### Other HyperParams |
|
``` |
|
training_args = TrainingArguments( |
|
output_dir="./output", |
|
evaluation_strategy="epoch", |
|
per_device_train_batch_size=32, |
|
per_device_eval_batch_size=32, |
|
weight_decay=0.01, |
|
report_to=None, |
|
num_train_epochs=4 |
|
) |
|
``` |
|
|
|
``` |
|
class_weights[0] = 1.0 |
|
class_weights[1] = 1.5167249178108022 |
|
class_weights[2] = 1.7547338578655642 |
|
class_weights[3] = 1.9610520059358458 |
|
class_weights[4] = 1.269341370129623 |
|
class_weights[5] = 1.8684086209021484 |
|
class_weights[6] = 1.8019018017117145 |
|
class_weights[7] = 2.110648663094536 |
|
class_weights[8] = 3.081208739200435 |
|
class_weights[9] = 1.7994815143101963 |
|
``` |
|
|
|
Threshold: 0.25 |
|
|
|
``` |