dafajon commited on
Commit
5831a16
1 Parent(s): 6a38925

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +76 -18
README.md CHANGED
@@ -3,6 +3,69 @@ license: apache-2.0
3
  ---
4
  ### Deprem NER Training Results
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  ```
7
  training_args = TrainingArguments(
8
  output_dir="./output",
@@ -15,24 +78,19 @@ training_args = TrainingArguments(
15
  )
16
  ```
17
 
18
- Threshold: 0.1
19
-
20
  ```
21
- precision recall f1-score support
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- Alakasiz 0.92 0.87 0.89 734
24
- Barinma 0.87 0.79 0.83 207
25
- Elektronik 0.72 0.73 0.73 130
26
- Giysi 0.84 0.66 0.74 94
27
- Kurtarma 0.84 0.80 0.82 362
28
- Lojistik 0.75 0.51 0.61 112
29
- Saglik 0.79 0.80 0.79 108
30
- Su 0.63 0.47 0.54 78
31
- Yagma 0.75 0.58 0.65 31
32
- Yemek 0.80 0.77 0.79 117
33
-
34
- micro avg 0.85 0.78 0.81 1973
35
- macro avg 0.79 0.70 0.74 1973
36
- weighted avg 0.84 0.78 0.81 1973
37
- samples avg 0.84 0.82 0.82 1973
38
  ```
 
3
  ---
4
  ### Deprem NER Training Results
5
 
6
+ ```
7
+ precision recall f1-score support
8
+
9
+ 0 0.85 0.91 0.88 734
10
+ 1 0.77 0.84 0.80 207
11
+ 2 0.71 0.88 0.79 130
12
+ 3 0.68 0.76 0.72 94
13
+ 4 0.80 0.85 0.82 362
14
+ 5 0.63 0.59 0.61 112
15
+ 6 0.73 0.82 0.77 108
16
+ 7 0.55 0.77 0.64 78
17
+ 8 0.65 0.71 0.68 31
18
+ 9 0.70 0.85 0.76 117
19
+
20
+ micro avg 0.77 0.85 0.81 1973
21
+ macro avg 0.71 0.80 0.75 1973
22
+ weighted avg 0.77 0.85 0.81 1973
23
+ samples avg 0.82 0.87 0.83 1973
24
+ ```
25
+
26
+ ### Preprocessing Funcs
27
+ ```
28
+ tr_stopwords = stopwords.words('turkish')
29
+ tr_stopwords.append("hic")
30
+ tr_stopwords.append("dm")
31
+ tr_stopwords.append("vs")
32
+ tr_stopwords.append("ya")
33
+
34
+ def remove_punct(tok):
35
+ tok = re.sub(r'[^\w\s]', '', tok)
36
+ return tok
37
+
38
+ def normalize(tok):
39
+ if tok.isdigit():
40
+ tok = "digit"
41
+ return tok
42
+
43
+ def clean(tok):
44
+ tok = remove_punct(tok)
45
+ tok = normalize(tok)
46
+
47
+ return tok
48
+
49
+ def exceptions(tok):
50
+ if not tok.isdigit() and len(tok)==1:
51
+ return False
52
+
53
+ if not tok:
54
+ return False
55
+
56
+ if tok in tr_stopwords:
57
+ return False
58
+
59
+ if tok.startswith('#') or tok.startswith("@"):
60
+ return False
61
+
62
+ return True
63
+
64
+
65
+ sm_tok = lambda text: [clean(tok) for tok in text.split(" ") if exceptions(tok)]
66
+ ```
67
+
68
+ ### Other HyperParams
69
  ```
70
  training_args = TrainingArguments(
71
  output_dir="./output",
 
78
  )
79
  ```
80
 
 
 
81
  ```
82
+ class_weights[0] = 1.0
83
+ class_weights[1] = 1.5167249178108022
84
+ class_weights[2] = 1.7547338578655642
85
+ class_weights[3] = 1.9610520059358458
86
+ class_weights[4] = 1.269341370129623
87
+ class_weights[5] = 1.8684086209021484
88
+ class_weights[6] = 1.8019018017117145
89
+ class_weights[7] = 2.110648663094536
90
+ class_weights[8] = 3.081208739200435
91
+ class_weights[9] = 1.7994815143101963
92
+ ```
93
+
94
+ Threshold: 0.25
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  ```