Update README.md
Browse files
README.md
CHANGED
@@ -4,7 +4,7 @@ widget:
|
|
4 |
- text: "Mustafa Kemal Atatürk 19 Mayıs 1919'da Samsun'a çıktı."
|
5 |
---
|
6 |
# Turkish Named Entity Recognition (NER) Model
|
7 |
-
This model is the fine-tuned
|
8 |
(a multilingual version of RoBERTa)
|
9 |
using a reviewed version of well known Turkish NER dataset
|
10 |
(https://github.com/stefan-it/turkish-bert/files/4558187/nerdata.txt).
|
@@ -16,19 +16,19 @@ batch_size = 8
|
|
16 |
label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
|
17 |
max_length = 512
|
18 |
learning_rate = 2e-5
|
19 |
-
num_train_epochs =
|
20 |
weight_decay = 0.01
|
21 |
```
|
22 |
# How to use:
|
23 |
```
|
24 |
model = AutoModelForTokenClassification.from_pretrained("akdeniz27/xlm-roberta-base-turkish-ner")
|
25 |
tokenizer = AutoTokenizer.from_pretrained("akdeniz27/xlm-roberta-base-turkish-ner")
|
26 |
-
ner = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="
|
27 |
ner("<your text here>")
|
28 |
```
|
29 |
Pls refer "https://huggingface.co/transformers/_modules/transformers/pipelines/token_classification.html" for entity grouping with aggregation_strategy parameter.
|
30 |
# Reference test results:
|
31 |
* accuracy: 0.9919343118732742
|
32 |
-
* f1: 0.
|
33 |
-
* precision: 0.
|
34 |
-
* recall: 0.
|
|
|
4 |
- text: "Mustafa Kemal Atatürk 19 Mayıs 1919'da Samsun'a çıktı."
|
5 |
---
|
6 |
# Turkish Named Entity Recognition (NER) Model
|
7 |
+
This model is the fine-tuned version of "xlm-roberta-base"
|
8 |
(a multilingual version of RoBERTa)
|
9 |
using a reviewed version of well known Turkish NER dataset
|
10 |
(https://github.com/stefan-it/turkish-bert/files/4558187/nerdata.txt).
|
|
|
16 |
label_list = ['O', 'B-PER', 'I-PER', 'B-ORG', 'I-ORG', 'B-LOC', 'I-LOC']
|
17 |
max_length = 512
|
18 |
learning_rate = 2e-5
|
19 |
+
num_train_epochs = 2
|
20 |
weight_decay = 0.01
|
21 |
```
|
22 |
# How to use:
|
23 |
```
|
24 |
model = AutoModelForTokenClassification.from_pretrained("akdeniz27/xlm-roberta-base-turkish-ner")
|
25 |
tokenizer = AutoTokenizer.from_pretrained("akdeniz27/xlm-roberta-base-turkish-ner")
|
26 |
+
ner = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy="simple")
|
27 |
ner("<your text here>")
|
28 |
```
|
29 |
Pls refer "https://huggingface.co/transformers/_modules/transformers/pipelines/token_classification.html" for entity grouping with aggregation_strategy parameter.
|
30 |
# Reference test results:
|
31 |
* accuracy: 0.9919343118732742
|
32 |
+
* f1: 0.9492100796448622
|
33 |
+
* precision: 0.9407349896480332
|
34 |
+
* recall: 0.9578392621870883
|