update readme
Browse files- README.md +3 -1
- config.json +2 -2
README.md
CHANGED
@@ -12,13 +12,15 @@ license: mit
|
|
12 |
datasets:
|
13 |
- cc100
|
14 |
- wikipedia
|
|
|
|
|
15 |
---
|
16 |
|
17 |
# japanese-gpt2-xsmall
|
18 |
|
19 |
![rinna-icon](./rinna.png)
|
20 |
|
21 |
-
This repository provides an extra-small-sized Japanese GPT-2 model. The model
|
22 |
|
23 |
# How to use the model
|
24 |
|
|
|
12 |
datasets:
|
13 |
- cc100
|
14 |
- wikipedia
|
15 |
+
widget:
|
16 |
+
- text: "生命、宇宙、そして万物についての究極の疑問の答えは"
|
17 |
---
|
18 |
|
19 |
# japanese-gpt2-xsmall
|
20 |
|
21 |
![rinna-icon](./rinna.png)
|
22 |
|
23 |
+
This repository provides an extra-small-sized Japanese GPT-2 model. The model was trained using code from Github repository [rinnakk/japanese-pretrained-models](https://github.com/rinnakk/japanese-pretrained-models) by [rinna Co., Ltd.](https://corp.rinna.co.jp/)
|
24 |
|
25 |
# How to use the model
|
26 |
|
config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
@@ -12,7 +12,7 @@
|
|
12 |
"initializer_range": 0.02,
|
13 |
"layer_norm_epsilon": 1e-05,
|
14 |
"model_type": "gpt2",
|
15 |
-
"n_ctx":
|
16 |
"n_embd": 512,
|
17 |
"n_head": 8,
|
18 |
"n_inner": 2304,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "rinna/japanese-gpt2-xsmall",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPT2LMHeadModel"
|
|
|
12 |
"initializer_range": 0.02,
|
13 |
"layer_norm_epsilon": 1e-05,
|
14 |
"model_type": "gpt2",
|
15 |
+
"n_ctx": 1024,
|
16 |
"n_embd": 512,
|
17 |
"n_head": 8,
|
18 |
"n_inner": 2304,
|