Upload tokenizer
Browse files- README.md +201 -0
- tokenizer.json +364 -3
- tokenizer_config.json +27 -0
README.md
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
library_name: transformers
|
3 |
+
tags: []
|
4 |
+
---
|
5 |
+
|
6 |
+
# Model Card for Model ID
|
7 |
+
|
8 |
+
<!-- Provide a quick summary of what the model is/does. -->
|
9 |
+
|
10 |
+
|
11 |
+
|
12 |
+
## Model Details
|
13 |
+
|
14 |
+
### Model Description
|
15 |
+
|
16 |
+
<!-- Provide a longer summary of what this model is. -->
|
17 |
+
|
18 |
+
This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
|
19 |
+
|
20 |
+
- **Developed by:** [More Information Needed]
|
21 |
+
- **Funded by [optional]:** [More Information Needed]
|
22 |
+
- **Shared by [optional]:** [More Information Needed]
|
23 |
+
- **Model type:** [More Information Needed]
|
24 |
+
- **Language(s) (NLP):** [More Information Needed]
|
25 |
+
- **License:** [More Information Needed]
|
26 |
+
- **Finetuned from model [optional]:** [More Information Needed]
|
27 |
+
|
28 |
+
### Model Sources [optional]
|
29 |
+
|
30 |
+
<!-- Provide the basic links for the model. -->
|
31 |
+
|
32 |
+
- **Repository:** [More Information Needed]
|
33 |
+
- **Paper [optional]:** [More Information Needed]
|
34 |
+
- **Demo [optional]:** [More Information Needed]
|
35 |
+
|
36 |
+
## Uses
|
37 |
+
|
38 |
+
<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
|
39 |
+
|
40 |
+
### Direct Use
|
41 |
+
|
42 |
+
<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
|
43 |
+
|
44 |
+
[More Information Needed]
|
45 |
+
|
46 |
+
### Downstream Use [optional]
|
47 |
+
|
48 |
+
<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
|
49 |
+
|
50 |
+
[More Information Needed]
|
51 |
+
|
52 |
+
### Out-of-Scope Use
|
53 |
+
|
54 |
+
<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
|
55 |
+
|
56 |
+
[More Information Needed]
|
57 |
+
|
58 |
+
## Bias, Risks, and Limitations
|
59 |
+
|
60 |
+
<!-- This section is meant to convey both technical and sociotechnical limitations. -->
|
61 |
+
|
62 |
+
[More Information Needed]
|
63 |
+
|
64 |
+
### Recommendations
|
65 |
+
|
66 |
+
<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
|
67 |
+
|
68 |
+
Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
|
69 |
+
|
70 |
+
## How to Get Started with the Model
|
71 |
+
|
72 |
+
Use the code below to get started with the model.
|
73 |
+
|
74 |
+
[More Information Needed]
|
75 |
+
|
76 |
+
## Training Details
|
77 |
+
|
78 |
+
### Training Data
|
79 |
+
|
80 |
+
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
81 |
+
|
82 |
+
[More Information Needed]
|
83 |
+
|
84 |
+
### Training Procedure
|
85 |
+
|
86 |
+
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
87 |
+
|
88 |
+
#### Preprocessing [optional]
|
89 |
+
|
90 |
+
[More Information Needed]
|
91 |
+
|
92 |
+
|
93 |
+
#### Training Hyperparameters
|
94 |
+
|
95 |
+
- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
|
96 |
+
|
97 |
+
#### Speeds, Sizes, Times [optional]
|
98 |
+
|
99 |
+
<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
|
100 |
+
|
101 |
+
[More Information Needed]
|
102 |
+
|
103 |
+
## Evaluation
|
104 |
+
|
105 |
+
<!-- This section describes the evaluation protocols and provides the results. -->
|
106 |
+
|
107 |
+
### Testing Data, Factors & Metrics
|
108 |
+
|
109 |
+
#### Testing Data
|
110 |
+
|
111 |
+
<!-- This should link to a Dataset Card if possible. -->
|
112 |
+
|
113 |
+
[More Information Needed]
|
114 |
+
|
115 |
+
#### Factors
|
116 |
+
|
117 |
+
<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
|
118 |
+
|
119 |
+
[More Information Needed]
|
120 |
+
|
121 |
+
#### Metrics
|
122 |
+
|
123 |
+
<!-- These are the evaluation metrics being used, ideally with a description of why. -->
|
124 |
+
|
125 |
+
[More Information Needed]
|
126 |
+
|
127 |
+
### Results
|
128 |
+
|
129 |
+
[More Information Needed]
|
130 |
+
|
131 |
+
#### Summary
|
132 |
+
|
133 |
+
|
134 |
+
|
135 |
+
## Model Examination [optional]
|
136 |
+
|
137 |
+
<!-- Relevant interpretability work for the model goes here -->
|
138 |
+
|
139 |
+
[More Information Needed]
|
140 |
+
|
141 |
+
## Environmental Impact
|
142 |
+
|
143 |
+
<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
|
144 |
+
|
145 |
+
Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
|
146 |
+
|
147 |
+
- **Hardware Type:** [More Information Needed]
|
148 |
+
- **Hours used:** [More Information Needed]
|
149 |
+
- **Cloud Provider:** [More Information Needed]
|
150 |
+
- **Compute Region:** [More Information Needed]
|
151 |
+
- **Carbon Emitted:** [More Information Needed]
|
152 |
+
|
153 |
+
## Technical Specifications [optional]
|
154 |
+
|
155 |
+
### Model Architecture and Objective
|
156 |
+
|
157 |
+
[More Information Needed]
|
158 |
+
|
159 |
+
### Compute Infrastructure
|
160 |
+
|
161 |
+
[More Information Needed]
|
162 |
+
|
163 |
+
#### Hardware
|
164 |
+
|
165 |
+
[More Information Needed]
|
166 |
+
|
167 |
+
#### Software
|
168 |
+
|
169 |
+
[More Information Needed]
|
170 |
+
|
171 |
+
## Citation [optional]
|
172 |
+
|
173 |
+
<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
|
174 |
+
|
175 |
+
**BibTeX:**
|
176 |
+
|
177 |
+
[More Information Needed]
|
178 |
+
|
179 |
+
**APA:**
|
180 |
+
|
181 |
+
[More Information Needed]
|
182 |
+
|
183 |
+
## Glossary [optional]
|
184 |
+
|
185 |
+
<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
|
186 |
+
|
187 |
+
[More Information Needed]
|
188 |
+
|
189 |
+
## More Information [optional]
|
190 |
+
|
191 |
+
[More Information Needed]
|
192 |
+
|
193 |
+
## Model Card Authors [optional]
|
194 |
+
|
195 |
+
[More Information Needed]
|
196 |
+
|
197 |
+
## Model Card Contact
|
198 |
+
|
199 |
+
[More Information Needed]
|
200 |
+
|
201 |
+
|
tokenizer.json
CHANGED
@@ -302,7 +302,6 @@
|
|
302 |
"ॽ": 164,
|
303 |
"ॾ": 165,
|
304 |
"“": 166,
|
305 |
-
"▁": 167,
|
306 |
"▁क": 168,
|
307 |
"े▁": 169,
|
308 |
"ा▁": 170,
|
@@ -32134,7 +32133,278 @@
|
|
32134 |
"रेक": 31996,
|
32135 |
"वरन▁": 31997,
|
32136 |
"उपयोगकर्ता▁को▁": 31998,
|
32137 |
-
"से▁से▁": 31999
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32138 |
},
|
32139 |
"merges": [
|
32140 |
"▁ क",
|
@@ -63968,7 +64238,98 @@
|
|
63968 |
"रे क",
|
63969 |
"वर न▁",
|
63970 |
"उपयोगकर् ता▁को▁",
|
63971 |
-
"से▁ से▁"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63972 |
]
|
63973 |
}
|
63974 |
}
|
|
|
302 |
"ॽ": 164,
|
303 |
"ॾ": 165,
|
304 |
"“": 166,
|
|
|
305 |
"▁क": 168,
|
306 |
"े▁": 169,
|
307 |
"ा▁": 170,
|
|
|
32133 |
"रेक": 31996,
|
32134 |
"वरन▁": 31997,
|
32135 |
"उपयोगकर्ता▁को▁": 31998,
|
32136 |
+
"से▁से▁": 31999,
|
32137 |
+
"<0x00>": 32000,
|
32138 |
+
"<0x01>": 32001,
|
32139 |
+
"<0x02>": 32002,
|
32140 |
+
"<0x03>": 32003,
|
32141 |
+
"<0x04>": 32004,
|
32142 |
+
"<0x05>": 32005,
|
32143 |
+
"<0x06>": 32006,
|
32144 |
+
"<0x07>": 32007,
|
32145 |
+
"<0x08>": 32008,
|
32146 |
+
"<0x09>": 32009,
|
32147 |
+
"<0x0a>": 32010,
|
32148 |
+
"<0x0b>": 32011,
|
32149 |
+
"<0x0c>": 32012,
|
32150 |
+
"<0x0d>": 32013,
|
32151 |
+
"<0x0e>": 32014,
|
32152 |
+
"<0x0f>": 32015,
|
32153 |
+
"<0x10>": 32016,
|
32154 |
+
"<0x11>": 32017,
|
32155 |
+
"<0x12>": 32018,
|
32156 |
+
"<0x13>": 32019,
|
32157 |
+
"<0x14>": 32020,
|
32158 |
+
"<0x15>": 32021,
|
32159 |
+
"<0x16>": 32022,
|
32160 |
+
"<0x17>": 32023,
|
32161 |
+
"<0x18>": 32024,
|
32162 |
+
"<0x19>": 32025,
|
32163 |
+
"<0x1a>": 32026,
|
32164 |
+
"<0x1b>": 32027,
|
32165 |
+
"<0x1c>": 32028,
|
32166 |
+
"<0x1d>": 32029,
|
32167 |
+
"<0x1e>": 32030,
|
32168 |
+
"<0x1f>": 32031,
|
32169 |
+
"<0x20>": 32032,
|
32170 |
+
"<0x21>": 32033,
|
32171 |
+
"<0x22>": 32034,
|
32172 |
+
"<0x23>": 32035,
|
32173 |
+
"<0x24>": 32036,
|
32174 |
+
"<0x25>": 32037,
|
32175 |
+
"<0x26>": 32038,
|
32176 |
+
"<0x27>": 32039,
|
32177 |
+
"<0x28>": 32040,
|
32178 |
+
"<0x29>": 32041,
|
32179 |
+
"<0x2a>": 32042,
|
32180 |
+
"<0x2b>": 32043,
|
32181 |
+
"<0x2c>": 32044,
|
32182 |
+
"<0x2d>": 32045,
|
32183 |
+
"<0x2e>": 32046,
|
32184 |
+
"<0x2f>": 32047,
|
32185 |
+
"<0x30>": 32048,
|
32186 |
+
"<0x31>": 32049,
|
32187 |
+
"<0x32>": 32050,
|
32188 |
+
"<0x33>": 32051,
|
32189 |
+
"<0x34>": 32052,
|
32190 |
+
"<0x35>": 32053,
|
32191 |
+
"<0x36>": 32054,
|
32192 |
+
"<0x37>": 32055,
|
32193 |
+
"<0x38>": 32056,
|
32194 |
+
"<0x39>": 32057,
|
32195 |
+
"<0x3a>": 32058,
|
32196 |
+
"<0x3b>": 32059,
|
32197 |
+
"<0x3c>": 32060,
|
32198 |
+
"<0x3d>": 32061,
|
32199 |
+
"<0x3e>": 32062,
|
32200 |
+
"<0x3f>": 32063,
|
32201 |
+
"<0x40>": 32064,
|
32202 |
+
"<0x41>": 32065,
|
32203 |
+
"<0x42>": 32066,
|
32204 |
+
"<0x43>": 32067,
|
32205 |
+
"<0x44>": 32068,
|
32206 |
+
"<0x45>": 32069,
|
32207 |
+
"<0x46>": 32070,
|
32208 |
+
"<0x47>": 32071,
|
32209 |
+
"<0x48>": 32072,
|
32210 |
+
"<0x49>": 32073,
|
32211 |
+
"<0x4a>": 32074,
|
32212 |
+
"<0x4b>": 32075,
|
32213 |
+
"<0x4c>": 32076,
|
32214 |
+
"<0x4d>": 32077,
|
32215 |
+
"<0x4e>": 32078,
|
32216 |
+
"<0x4f>": 32079,
|
32217 |
+
"<0x50>": 32080,
|
32218 |
+
"<0x51>": 32081,
|
32219 |
+
"<0x52>": 32082,
|
32220 |
+
"<0x53>": 32083,
|
32221 |
+
"<0x54>": 32084,
|
32222 |
+
"<0x55>": 32085,
|
32223 |
+
"<0x56>": 32086,
|
32224 |
+
"<0x57>": 32087,
|
32225 |
+
"<0x58>": 32088,
|
32226 |
+
"<0x59>": 32089,
|
32227 |
+
"<0x5a>": 32090,
|
32228 |
+
"<0x5b>": 32091,
|
32229 |
+
"<0x5c>": 32092,
|
32230 |
+
"<0x5d>": 32093,
|
32231 |
+
"<0x5e>": 32094,
|
32232 |
+
"<0x5f>": 32095,
|
32233 |
+
"<0x60>": 32096,
|
32234 |
+
"<0x61>": 32097,
|
32235 |
+
"<0x62>": 32098,
|
32236 |
+
"<0x63>": 32099,
|
32237 |
+
"<0x64>": 32100,
|
32238 |
+
"<0x65>": 32101,
|
32239 |
+
"<0x66>": 32102,
|
32240 |
+
"<0x67>": 32103,
|
32241 |
+
"<0x68>": 32104,
|
32242 |
+
"<0x69>": 32105,
|
32243 |
+
"<0x6a>": 32106,
|
32244 |
+
"<0x6b>": 32107,
|
32245 |
+
"<0x6c>": 32108,
|
32246 |
+
"<0x6d>": 32109,
|
32247 |
+
"<0x6e>": 32110,
|
32248 |
+
"<0x6f>": 32111,
|
32249 |
+
"<0x70>": 32112,
|
32250 |
+
"<0x71>": 32113,
|
32251 |
+
"<0x72>": 32114,
|
32252 |
+
"<0x73>": 32115,
|
32253 |
+
"<0x74>": 32116,
|
32254 |
+
"<0x75>": 32117,
|
32255 |
+
"<0x76>": 32118,
|
32256 |
+
"<0x77>": 32119,
|
32257 |
+
"<0x78>": 32120,
|
32258 |
+
"<0x79>": 32121,
|
32259 |
+
"<0x7a>": 32122,
|
32260 |
+
"<0x7b>": 32123,
|
32261 |
+
"<0x7c>": 32124,
|
32262 |
+
"<0x7d>": 32125,
|
32263 |
+
"<0x7e>": 32126,
|
32264 |
+
"<0x7f>": 32127,
|
32265 |
+
"<0x80>": 32128,
|
32266 |
+
"<0x81>": 32129,
|
32267 |
+
"<0x82>": 32130,
|
32268 |
+
"<0x83>": 32131,
|
32269 |
+
"<0x84>": 32132,
|
32270 |
+
"<0x85>": 32133,
|
32271 |
+
"<0x86>": 32134,
|
32272 |
+
"<0x87>": 32135,
|
32273 |
+
"<0x88>": 32136,
|
32274 |
+
"<0x89>": 32137,
|
32275 |
+
"<0x8a>": 32138,
|
32276 |
+
"<0x8b>": 32139,
|
32277 |
+
"<0x8c>": 32140,
|
32278 |
+
"<0x8d>": 32141,
|
32279 |
+
"<0x8e>": 32142,
|
32280 |
+
"<0x8f>": 32143,
|
32281 |
+
"<0x90>": 32144,
|
32282 |
+
"<0x91>": 32145,
|
32283 |
+
"<0x92>": 32146,
|
32284 |
+
"<0x93>": 32147,
|
32285 |
+
"<0x94>": 32148,
|
32286 |
+
"<0x95>": 32149,
|
32287 |
+
"<0x96>": 32150,
|
32288 |
+
"<0x97>": 32151,
|
32289 |
+
"<0x98>": 32152,
|
32290 |
+
"<0x99>": 32153,
|
32291 |
+
"<0x9a>": 32154,
|
32292 |
+
"<0x9b>": 32155,
|
32293 |
+
"<0x9c>": 32156,
|
32294 |
+
"<0x9d>": 32157,
|
32295 |
+
"<0x9e>": 32158,
|
32296 |
+
"<0x9f>": 32159,
|
32297 |
+
"<0xa0>": 32160,
|
32298 |
+
"<0xa1>": 32161,
|
32299 |
+
"<0xa2>": 32162,
|
32300 |
+
"<0xa3>": 32163,
|
32301 |
+
"<0xa4>": 32164,
|
32302 |
+
"<0xa5>": 32165,
|
32303 |
+
"<0xa6>": 32166,
|
32304 |
+
"<0xa7>": 32167,
|
32305 |
+
"<0xa8>": 32168,
|
32306 |
+
"<0xa9>": 32169,
|
32307 |
+
"<0xaa>": 32170,
|
32308 |
+
"<0xab>": 32171,
|
32309 |
+
"<0xac>": 32172,
|
32310 |
+
"<0xad>": 32173,
|
32311 |
+
"<0xae>": 32174,
|
32312 |
+
"<0xaf>": 32175,
|
32313 |
+
"<0xb0>": 32176,
|
32314 |
+
"<0xb1>": 32177,
|
32315 |
+
"<0xb2>": 32178,
|
32316 |
+
"<0xb3>": 32179,
|
32317 |
+
"<0xb4>": 32180,
|
32318 |
+
"<0xb5>": 32181,
|
32319 |
+
"<0xb6>": 32182,
|
32320 |
+
"<0xb7>": 32183,
|
32321 |
+
"<0xb8>": 32184,
|
32322 |
+
"<0xb9>": 32185,
|
32323 |
+
"<0xba>": 32186,
|
32324 |
+
"<0xbb>": 32187,
|
32325 |
+
"<0xbc>": 32188,
|
32326 |
+
"<0xbd>": 32189,
|
32327 |
+
"<0xbe>": 32190,
|
32328 |
+
"<0xbf>": 32191,
|
32329 |
+
"<0xc0>": 32192,
|
32330 |
+
"<0xc1>": 32193,
|
32331 |
+
"<0xc2>": 32194,
|
32332 |
+
"<0xc3>": 32195,
|
32333 |
+
"<0xc4>": 32196,
|
32334 |
+
"<0xc5>": 32197,
|
32335 |
+
"<0xc6>": 32198,
|
32336 |
+
"<0xc7>": 32199,
|
32337 |
+
"<0xc8>": 32200,
|
32338 |
+
"<0xc9>": 32201,
|
32339 |
+
"<0xca>": 32202,
|
32340 |
+
"<0xcb>": 32203,
|
32341 |
+
"<0xcc>": 32204,
|
32342 |
+
"<0xcd>": 32205,
|
32343 |
+
"<0xce>": 32206,
|
32344 |
+
"<0xcf>": 32207,
|
32345 |
+
"<0xd0>": 32208,
|
32346 |
+
"<0xd1>": 32209,
|
32347 |
+
"<0xd2>": 32210,
|
32348 |
+
"<0xd3>": 32211,
|
32349 |
+
"<0xd4>": 32212,
|
32350 |
+
"<0xd5>": 32213,
|
32351 |
+
"<0xd6>": 32214,
|
32352 |
+
"<0xd7>": 32215,
|
32353 |
+
"<0xd8>": 32216,
|
32354 |
+
"<0xd9>": 32217,
|
32355 |
+
"<0xda>": 32218,
|
32356 |
+
"<0xdb>": 32219,
|
32357 |
+
"<0xdc>": 32220,
|
32358 |
+
"<0xdd>": 32221,
|
32359 |
+
"<0xde>": 32222,
|
32360 |
+
"<0xdf>": 32223,
|
32361 |
+
"<0xe0>": 32224,
|
32362 |
+
"<0xe1>": 32225,
|
32363 |
+
"<0xe2>": 32226,
|
32364 |
+
"<0xe3>": 32227,
|
32365 |
+
"<0xe4>": 32228,
|
32366 |
+
"<0xe5>": 32229,
|
32367 |
+
"<0xe6>": 32230,
|
32368 |
+
"<0xe7>": 32231,
|
32369 |
+
"<0xe8>": 32232,
|
32370 |
+
"<0xe9>": 32233,
|
32371 |
+
"<0xea>": 32234,
|
32372 |
+
"<0xeb>": 32235,
|
32373 |
+
"<0xec>": 32236,
|
32374 |
+
"<0xed>": 32237,
|
32375 |
+
"<0xee>": 32238,
|
32376 |
+
"<0xef>": 32239,
|
32377 |
+
"<0xf0>": 32240,
|
32378 |
+
"<0xf1>": 32241,
|
32379 |
+
"<0xf2>": 32242,
|
32380 |
+
"<0xf3>": 32243,
|
32381 |
+
"<0xf4>": 32244,
|
32382 |
+
"<0xf5>": 32245,
|
32383 |
+
"<0xf6>": 32246,
|
32384 |
+
"<0xf7>": 32247,
|
32385 |
+
"<0xf8>": 32248,
|
32386 |
+
"<0xf9>": 32249,
|
32387 |
+
"<0xfa>": 32250,
|
32388 |
+
"<0xfb>": 32251,
|
32389 |
+
"<0xfc>": 32252,
|
32390 |
+
"<0xfd>": 32253,
|
32391 |
+
"<0xfe>": 32254,
|
32392 |
+
"<0xff>": 32255,
|
32393 |
+
"▁": 32256,
|
32394 |
+
"▁▁": 32257,
|
32395 |
+
"▁▁▁": 32258,
|
32396 |
+
"▁▁▁▁": 32259,
|
32397 |
+
"▁▁▁▁▁": 32260,
|
32398 |
+
"▁▁▁▁▁▁": 32261,
|
32399 |
+
"▁▁▁▁▁▁▁": 32262,
|
32400 |
+
"▁▁▁▁▁▁▁▁": 32263,
|
32401 |
+
"▁▁▁▁▁▁▁▁▁": 32264,
|
32402 |
+
"▁▁▁▁▁▁▁▁▁▁": 32265,
|
32403 |
+
"▁▁▁▁▁▁▁▁▁▁▁": 32266,
|
32404 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁": 32267,
|
32405 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁": 32268,
|
32406 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 32269,
|
32407 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁": 32270
|
32408 |
},
|
32409 |
"merges": [
|
32410 |
"▁ क",
|
|
|
64238 |
"रे क",
|
64239 |
"वर न▁",
|
64240 |
"उपयोगकर् ता▁को▁",
|
64241 |
+
"से▁ से▁",
|
64242 |
+
"▁ ▁",
|
64243 |
+
"▁ ▁▁",
|
64244 |
+
"▁ ▁▁▁",
|
64245 |
+
"▁ ▁▁▁▁",
|
64246 |
+
"▁ ▁▁▁▁▁",
|
64247 |
+
"▁ ▁▁▁▁▁▁",
|
64248 |
+
"▁ ▁▁▁▁▁▁▁",
|
64249 |
+
"▁ ▁▁▁▁▁▁▁▁",
|
64250 |
+
"▁ ▁▁▁▁▁▁▁▁▁",
|
64251 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁",
|
64252 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁",
|
64253 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁",
|
64254 |
+
"▁ ▁▁▁▁▁▁▁▁▁▁▁▁▁",
|
64255 |
+
"▁▁ ▁",
|
64256 |
+
"▁▁ ▁▁",
|
64257 |
+
"▁▁ ▁▁▁",
|
64258 |
+
"▁▁ ▁▁▁▁",
|
64259 |
+
"▁▁ ▁▁▁▁▁",
|
64260 |
+
"▁▁ ▁▁▁▁▁▁",
|
64261 |
+
"▁▁ ▁▁▁▁▁▁▁",
|
64262 |
+
"▁▁ ▁▁▁▁▁▁▁▁",
|
64263 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁",
|
64264 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁▁",
|
64265 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁▁▁",
|
64266 |
+
"▁▁ ▁▁▁▁▁▁▁▁▁▁▁▁",
|
64267 |
+
"▁▁▁ ▁",
|
64268 |
+
"▁▁▁ ▁▁",
|
64269 |
+
"▁▁▁ ▁▁▁",
|
64270 |
+
"▁▁▁ ▁▁▁▁",
|
64271 |
+
"▁▁▁ ▁▁▁▁▁",
|
64272 |
+
"▁▁▁ ▁▁▁▁▁▁",
|
64273 |
+
"▁▁▁ ▁▁▁▁▁▁▁",
|
64274 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁",
|
64275 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁▁",
|
64276 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁▁▁",
|
64277 |
+
"▁▁▁ ▁▁▁▁▁▁▁▁▁▁▁",
|
64278 |
+
"▁▁▁▁ ▁",
|
64279 |
+
"▁▁▁▁ ▁▁",
|
64280 |
+
"▁▁▁▁ ▁▁▁",
|
64281 |
+
"▁▁▁▁ ▁▁▁▁",
|
64282 |
+
"▁▁▁▁ ▁▁▁▁▁",
|
64283 |
+
"▁▁▁▁ ▁▁▁▁▁▁",
|
64284 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁",
|
64285 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
64286 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁▁▁",
|
64287 |
+
"▁▁▁▁ ▁▁▁▁▁▁▁▁▁▁",
|
64288 |
+
"▁▁▁▁▁ ▁",
|
64289 |
+
"▁▁▁▁▁ ▁▁",
|
64290 |
+
"▁▁▁▁▁ ▁▁▁",
|
64291 |
+
"▁▁▁▁▁ ▁▁▁▁",
|
64292 |
+
"▁▁▁▁▁ ▁▁▁▁▁",
|
64293 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁",
|
64294 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁▁",
|
64295 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
64296 |
+
"▁▁▁▁▁ ▁▁▁▁▁▁▁▁▁",
|
64297 |
+
"▁▁▁▁▁▁ ▁",
|
64298 |
+
"▁▁▁▁▁▁ ▁▁",
|
64299 |
+
"▁▁▁▁▁▁ ▁▁▁",
|
64300 |
+
"▁▁▁▁▁▁ ▁▁▁▁",
|
64301 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁",
|
64302 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
64303 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁▁▁",
|
64304 |
+
"▁▁▁▁▁▁ ▁▁▁▁▁▁▁▁",
|
64305 |
+
"▁▁▁▁▁▁▁ ▁",
|
64306 |
+
"▁▁▁▁▁▁▁ ▁▁",
|
64307 |
+
"▁▁▁▁▁▁▁ ▁▁▁",
|
64308 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁",
|
64309 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁▁",
|
64310 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
64311 |
+
"▁▁▁▁▁▁▁ ▁▁▁▁▁▁▁",
|
64312 |
+
"▁▁▁▁▁▁▁▁ ▁",
|
64313 |
+
"▁▁▁▁▁▁▁▁ ▁▁",
|
64314 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁",
|
64315 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
64316 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁▁▁",
|
64317 |
+
"▁▁▁▁▁▁▁▁ ▁▁▁▁▁▁",
|
64318 |
+
"▁▁▁▁▁▁▁▁▁ ▁",
|
64319 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁",
|
64320 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁▁",
|
64321 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
64322 |
+
"▁▁▁▁▁▁▁▁▁ ▁▁▁▁▁",
|
64323 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁",
|
64324 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁▁",
|
64325 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁▁▁",
|
64326 |
+
"▁▁▁▁▁▁▁▁▁▁ ▁▁▁▁",
|
64327 |
+
"▁▁▁▁▁▁▁▁▁▁▁ ▁",
|
64328 |
+
"▁▁▁▁▁▁▁▁▁▁▁ ▁▁",
|
64329 |
+
"▁▁▁▁▁▁▁▁▁▁▁ ▁▁▁",
|
64330 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁ ▁",
|
64331 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁ ▁▁",
|
64332 |
+
"▁▁▁▁▁▁▁▁▁▁▁▁▁ ▁"
|
64333 |
]
|
64334 |
}
|
64335 |
}
|
tokenizer_config.json
CHANGED
@@ -1,5 +1,32 @@
|
|
1 |
{
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
"bos_token": "<s>",
|
|
|
3 |
"clean_up_tokenization_spaces": true,
|
4 |
"eos_token": "</s>",
|
5 |
"model_max_length": 1000000000000000019884624838656,
|
|
|
1 |
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<unk>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<s>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
}
|
27 |
+
},
|
28 |
"bos_token": "<s>",
|
29 |
+
"chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|उपयोगकर्ता|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|प्रणाली|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|सहायक|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|सहायक|>' }}\n{% endif %}\n{% endfor %}\n",
|
30 |
"clean_up_tokenization_spaces": true,
|
31 |
"eos_token": "</s>",
|
32 |
"model_max_length": 1000000000000000019884624838656,
|