keisuke-kiryu
commited on
Commit
·
ba37e17
1
Parent(s):
ba95365
Update README.md
Browse files
README.md
CHANGED
@@ -23,7 +23,26 @@ widget:
|
|
23 |
|
24 |
# モデルの使い方
|
25 |
```python
|
26 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
```
|
28 |
|
29 |
# 学習データ
|
|
|
23 |
|
24 |
# モデルの使い方
|
25 |
```python
|
26 |
+
from transformers import AutoTokenizer,AutoModelForTokenClassification
|
27 |
+
|
28 |
+
model_name('recruit-jp/japanese-typo-detector-roberta-base')
|
29 |
+
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
31 |
+
model = AutoModelForTokenClassification.from_pretrained(model_name)
|
32 |
+
|
33 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
34 |
+
model = model.to(device)
|
35 |
+
|
36 |
+
in_text = "これは日本語の誤植を検出する真相学習モデルです。"
|
37 |
+
|
38 |
+
test_inputs = tokenizer(in_text, return_tensors='pt').get('input_ids')
|
39 |
+
test_outputs = model(test_inputs.to(torch.device(device)))
|
40 |
+
|
41 |
+
for chara, logit in zip(["[CLS]"] + list(in_text) + ["[SEP]"], test_outputs.logits.squeeze().tolist()):
|
42 |
+
err_type_ind = np.argmax(logit)
|
43 |
+
err_name = model.config.id2label[err_type_ind]
|
44 |
+
err_desc = f"★誤字(err_index={err_type_ind}, err_name={err_name})" if err_type_ind > 0 else f""
|
45 |
+
print(f"{chara} : {err_desc}")
|
46 |
```
|
47 |
|
48 |
# 学習データ
|