Johannes
commited on
Commit
·
690bf20
1
Parent(s):
7c2db66
speciesLM k6, metazoa, upstream
Browse files- added_tokens.json +496 -0
- config.json +26 -0
- generation_config.json +5 -0
- modeling_rotarybert.py +162 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +503 -0
- tokenizer.json +0 -0
- tokenizer_config.json +17 -0
added_tokens.json
ADDED
@@ -0,0 +1,496 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"acanthaster_planci_gca001949145v1": 4101,
|
3 |
+
"acanthochromis_polyacanthus": 4102,
|
4 |
+
"accipiter_nisus": 4103,
|
5 |
+
"acromyrmex_echinatior_gca000204515v1rs": 4104,
|
6 |
+
"acropora_millepora_gca013753865v1": 4105,
|
7 |
+
"actinia_equina_gca011057435": 4106,
|
8 |
+
"actinia_tenebrosa_gca009602425v1": 4107,
|
9 |
+
"acyrthosiphon_pisum": 4108,
|
10 |
+
"adineta_vaga": 4109,
|
11 |
+
"aedes_aegypti_lvpagwg": 4110,
|
12 |
+
"aedes_albopictus": 4111,
|
13 |
+
"agrilus_planipennis_gca000699045v2": 4112,
|
14 |
+
"ailuropoda_melanoleuca": 4113,
|
15 |
+
"amazona_collaria": 4114,
|
16 |
+
"amphibalanus_amphitrite_gca019059575v1": 4115,
|
17 |
+
"amphilophus_citrinellus": 4116,
|
18 |
+
"amphimedon_queenslandica": 4117,
|
19 |
+
"amphiprion_ocellaris": 4118,
|
20 |
+
"amphiprion_percula": 4119,
|
21 |
+
"amyelois_transitella_gca001186105v1rs": 4120,
|
22 |
+
"anabas_testudineus": 4121,
|
23 |
+
"anas_platyrhynchos": 4122,
|
24 |
+
"anas_zonorhyncha": 4123,
|
25 |
+
"anneissia_japonica_gca011630105v1": 4124,
|
26 |
+
"anolis_carolinensis": 4125,
|
27 |
+
"anopheles_albimanus": 4126,
|
28 |
+
"anopheles_arabiensis": 4127,
|
29 |
+
"anopheles_atroparvus_gca914969975": 4128,
|
30 |
+
"anopheles_christyi": 4129,
|
31 |
+
"anopheles_coluzzii_ngousso": 4130,
|
32 |
+
"anopheles_culicifacies": 4131,
|
33 |
+
"anopheles_darlingi": 4132,
|
34 |
+
"anopheles_dirus": 4133,
|
35 |
+
"anopheles_epiroticus": 4134,
|
36 |
+
"anopheles_farauti": 4135,
|
37 |
+
"anopheles_funestus": 4136,
|
38 |
+
"anopheles_gambiae": 4137,
|
39 |
+
"anopheles_maculatus": 4138,
|
40 |
+
"anopheles_melas": 4139,
|
41 |
+
"anopheles_merus": 4140,
|
42 |
+
"anopheles_minimus": 4141,
|
43 |
+
"anopheles_quadriannulatus": 4142,
|
44 |
+
"anopheles_sinensis_china": 4143,
|
45 |
+
"anopheles_stephensi": 4144,
|
46 |
+
"anoplophora_glabripennis": 4145,
|
47 |
+
"anser_brachyrhynchus": 4146,
|
48 |
+
"anser_cygnoides": 4147,
|
49 |
+
"anthonomus_grandis_gca022605725v3rs": 4148,
|
50 |
+
"aotus_nancymaae": 4149,
|
51 |
+
"aphidius_gifuensis_gca014905175v1": 4150,
|
52 |
+
"apis_dorsata_gca000469605v1rs": 4151,
|
53 |
+
"apis_florea_gca000184785v2rs": 4152,
|
54 |
+
"apis_mellifera": 4153,
|
55 |
+
"aplysia_californica_gca000002075v2": 4154,
|
56 |
+
"apteryx_haastii": 4155,
|
57 |
+
"apteryx_owenii": 4156,
|
58 |
+
"apteryx_rowi": 4157,
|
59 |
+
"aquila_chrysaetos_chrysaetos": 4158,
|
60 |
+
"ascaris_suum": 4159,
|
61 |
+
"astatotilapia_calliptera": 4160,
|
62 |
+
"asterias_rubens_gca902459465v3": 4161,
|
63 |
+
"astyanax_mexicanus": 4162,
|
64 |
+
"athalia_rosae_gca917208135v1": 4163,
|
65 |
+
"athene_cunicularia": 4164,
|
66 |
+
"atta_cephalotes": 4165,
|
67 |
+
"bactrocera_dorsalis_gca000789215v2": 4166,
|
68 |
+
"bactrocera_latifrons_gca001853355v1": 4167,
|
69 |
+
"bactrocera_tryoni_gca016617805v2": 4168,
|
70 |
+
"balaenoptera_musculus": 4169,
|
71 |
+
"belgica_antarctica": 4170,
|
72 |
+
"bemisia_tabaci_ssa3nig": 4171,
|
73 |
+
"betta_splendens": 4172,
|
74 |
+
"bicyclus_anynana_gca900239965v1rs": 4173,
|
75 |
+
"biomphalaria_glabrata": 4174,
|
76 |
+
"bison_bison_bison": 4175,
|
77 |
+
"bombus_impatiens": 4176,
|
78 |
+
"bombus_terrestris_gca910591885v2": 4177,
|
79 |
+
"bombyx_mandarina_gca003987935v1rs": 4178,
|
80 |
+
"bombyx_mori": 4179,
|
81 |
+
"bos_grunniens": 4180,
|
82 |
+
"bos_indicus_hybrid": 4181,
|
83 |
+
"bos_mutus": 4182,
|
84 |
+
"bos_taurus_hybrid": 4183,
|
85 |
+
"branchiostoma_lanceolatum": 4184,
|
86 |
+
"brugia_malayi": 4185,
|
87 |
+
"bubo_bubo": 4186,
|
88 |
+
"buteo_japonicus": 4187,
|
89 |
+
"caenorhabditis_brenneri": 4188,
|
90 |
+
"caenorhabditis_briggsae": 4189,
|
91 |
+
"caenorhabditis_elegans": 4190,
|
92 |
+
"caenorhabditis_japonica": 4191,
|
93 |
+
"caenorhabditis_remanei": 4192,
|
94 |
+
"cairina_moschata_domestica": 4193,
|
95 |
+
"calidris_pugnax": 4194,
|
96 |
+
"calidris_pygmaea": 4195,
|
97 |
+
"callithrix_jacchus": 4196,
|
98 |
+
"callorhinchus_milii": 4197,
|
99 |
+
"camarhynchus_parvulus": 4198,
|
100 |
+
"camelus_dromedarius": 4199,
|
101 |
+
"camponotus_floridanus_gca003227725v1rs": 4200,
|
102 |
+
"canis_lupus_familiarisgsd": 4201,
|
103 |
+
"capitella_teleta": 4202,
|
104 |
+
"capra_hircus": 4203,
|
105 |
+
"carassius_auratus": 4204,
|
106 |
+
"carlito_syrichta": 4205,
|
107 |
+
"castor_canadensis": 4206,
|
108 |
+
"catagonus_wagneri": 4207,
|
109 |
+
"catharus_ustulatus": 4208,
|
110 |
+
"cavia_aperea": 4209,
|
111 |
+
"cavia_porcellus": 4210,
|
112 |
+
"cebus_imitator": 4211,
|
113 |
+
"centruroides_sculpturatus_gca000671375v2": 4212,
|
114 |
+
"ceratitis_capitata_gca000347755v4": 4213,
|
115 |
+
"cercocebus_atys": 4214,
|
116 |
+
"cervus_hanglu_yarkandensis": 4215,
|
117 |
+
"chelonoidis_abingdonii": 4216,
|
118 |
+
"chelonus_insularis_gca013357705v1rs": 4217,
|
119 |
+
"chelydra_serpentina": 4218,
|
120 |
+
"chinchilla_lanigera": 4219,
|
121 |
+
"chlorocebus_sabaeus": 4220,
|
122 |
+
"choloepus_hoffmanni": 4221,
|
123 |
+
"chrysemys_picta_bellii": 4222,
|
124 |
+
"chrysolophus_pictus": 4223,
|
125 |
+
"cimex_lectularius": 4224,
|
126 |
+
"ciona_intestinalis": 4225,
|
127 |
+
"ciona_savignyi": 4226,
|
128 |
+
"clupea_harengus": 4227,
|
129 |
+
"clytia_hemisphaerica_gca902728285": 4228,
|
130 |
+
"colobus_angolensis_palliatus": 4229,
|
131 |
+
"copidosoma_floridanum_gca000648655v2": 4230,
|
132 |
+
"corvus_moneduloides": 4231,
|
133 |
+
"cotesia_glomerata_gca020080835v1": 4232,
|
134 |
+
"cottoperca_gobio": 4233,
|
135 |
+
"coturnix_japonica": 4234,
|
136 |
+
"crassostrea_gigas": 4235,
|
137 |
+
"crassostrea_virginica_gca002022765v4": 4236,
|
138 |
+
"cricetulus_griseus_picr": 4237,
|
139 |
+
"crocodylus_porosus": 4238,
|
140 |
+
"culex_quinquefasciatus_gca015732765v1": 4239,
|
141 |
+
"culicoides_sonorensis": 4240,
|
142 |
+
"cyanistes_caeruleus": 4241,
|
143 |
+
"cyclopterus_lumpus": 4242,
|
144 |
+
"cynoglossus_semilaevis": 4243,
|
145 |
+
"cyprinodon_variegatus": 4244,
|
146 |
+
"cyprinus_carpio_carpio": 4245,
|
147 |
+
"danaus_plexippus": 4246,
|
148 |
+
"danio_rerio": 4247,
|
149 |
+
"daphnia_magna_gca020631705v2": 4248,
|
150 |
+
"daphnia_pulex": 4249,
|
151 |
+
"daphnia_pulicaria_gca021234035v2rs": 4250,
|
152 |
+
"dasypus_novemcinctus": 4251,
|
153 |
+
"delphinapterus_leucas": 4252,
|
154 |
+
"dendroctonus_ponderosae_gca000355655v1": 4253,
|
155 |
+
"dendronephthya_gigantea_gca004324835v1": 4254,
|
156 |
+
"denticeps_clupeoides": 4255,
|
157 |
+
"dermacentor_andersoni_gca023375885v2rs": 4256,
|
158 |
+
"dermacentor_silvarum_gca013339745v1": 4257,
|
159 |
+
"dermatophagoides_pteronyssinus_gca001901225v2": 4258,
|
160 |
+
"diabrotica_virgifera_gca917563875v2rs": 4259,
|
161 |
+
"dicentrarchus_labrax": 4260,
|
162 |
+
"dimorphilus_gyrociliatus_gca904063045v1": 4261,
|
163 |
+
"dinothrombium_tinctorium": 4262,
|
164 |
+
"dipodomys_ordii": 4263,
|
165 |
+
"diuraphis_noxia_gca001186385v1": 4264,
|
166 |
+
"dromaius_novaehollandiae": 4265,
|
167 |
+
"drosophila_ananassae": 4266,
|
168 |
+
"drosophila_erecta": 4267,
|
169 |
+
"drosophila_grimshawi": 4268,
|
170 |
+
"drosophila_melanogaster": 4269,
|
171 |
+
"drosophila_mojavensis": 4270,
|
172 |
+
"drosophila_persimilis": 4271,
|
173 |
+
"drosophila_pseudoobscura": 4272,
|
174 |
+
"drosophila_sechellia": 4273,
|
175 |
+
"drosophila_simulans": 4274,
|
176 |
+
"drosophila_virilis": 4275,
|
177 |
+
"drosophila_willistoni": 4276,
|
178 |
+
"drosophila_yakuba": 4277,
|
179 |
+
"dufourea_novaeangliae_gca001272555v1rs": 4278,
|
180 |
+
"echeneis_naucrates": 4279,
|
181 |
+
"echinococcus_granulosus_gca000524195v1rs": 4280,
|
182 |
+
"echinops_telfairi": 4281,
|
183 |
+
"electrophorus_electricus": 4282,
|
184 |
+
"eptatretus_burgeri": 4283,
|
185 |
+
"equus_asinus": 4284,
|
186 |
+
"equus_caballus": 4285,
|
187 |
+
"erinaceus_europaeus": 4286,
|
188 |
+
"erpetoichthys_calabaricus": 4287,
|
189 |
+
"erythrura_gouldiae": 4288,
|
190 |
+
"esox_lucius": 4289,
|
191 |
+
"eufriesea_mexicana_gca001483705v1rs": 4290,
|
192 |
+
"eurytemora_affinis_gca000591075v2": 4291,
|
193 |
+
"exaiptasia_diaphana_gca001417965v1": 4292,
|
194 |
+
"falco_tinnunculus": 4293,
|
195 |
+
"felis_catus": 4294,
|
196 |
+
"ficedula_albicollis": 4295,
|
197 |
+
"folsomia_candida": 4296,
|
198 |
+
"fukomys_damarensis": 4297,
|
199 |
+
"fundulus_heteroclitus": 4298,
|
200 |
+
"gadus_morhua": 4299,
|
201 |
+
"galendromus_occidentalis_gca000255335v2rs": 4300,
|
202 |
+
"galleria_mellonella_gca003640425v2rs": 4301,
|
203 |
+
"gallus_gallus_gca000002315v5": 4302,
|
204 |
+
"gambusia_affinis": 4303,
|
205 |
+
"gasterosteus_aculeatus": 4304,
|
206 |
+
"geospiza_fortis": 4305,
|
207 |
+
"gigantopelta_aegis_gca016097555v1": 4306,
|
208 |
+
"glossina_austeni": 4307,
|
209 |
+
"glossina_brevipalpis": 4308,
|
210 |
+
"glossina_fuscipes": 4309,
|
211 |
+
"glossina_morsitans": 4310,
|
212 |
+
"glossina_pallidipes": 4311,
|
213 |
+
"glossina_palpalis": 4312,
|
214 |
+
"gopherus_agassizii": 4313,
|
215 |
+
"gopherus_evgoodei": 4314,
|
216 |
+
"gorilla_gorilla": 4315,
|
217 |
+
"gouania_willdenowi": 4316,
|
218 |
+
"habropoda_laboriosa_gca001263275v1rs": 4317,
|
219 |
+
"haemaphysalis_longicornis_gca013339765v1": 4318,
|
220 |
+
"haliotis_rubra_gca003918875v1rs": 4319,
|
221 |
+
"haliotis_rufescens_gca023055435v1rs": 4320,
|
222 |
+
"haplochromis_burtoni": 4321,
|
223 |
+
"harpegnathos_saltator_gca003227715v2rs": 4322,
|
224 |
+
"heliconius_melpomene": 4323,
|
225 |
+
"helicoverpa_armigera_gca023701775v1rs": 4324,
|
226 |
+
"helicoverpa_zea_gca022581195v1rs": 4325,
|
227 |
+
"helobdella_robusta": 4326,
|
228 |
+
"hermetia_illucens_gca905115235v1": 4327,
|
229 |
+
"heterocephalus_glaber_male": 4328,
|
230 |
+
"hippocampus_comes": 4329,
|
231 |
+
"hofstenia_miamia": 4330,
|
232 |
+
"homalodisca_vitripennis_gca021130785v2rs": 4331,
|
233 |
+
"homarus_americanus_gca018991925v1": 4332,
|
234 |
+
"homo_sapiens": 4333,
|
235 |
+
"hucho_hucho": 4334,
|
236 |
+
"hyalella_azteca_gca000764305v2": 4335,
|
237 |
+
"hyalomma_asiaticum_gca013339685v1": 4336,
|
238 |
+
"hydra_vulgaris_gca022113875v1rs": 4337,
|
239 |
+
"hymenolepis_microstoma": 4338,
|
240 |
+
"hypsibius_exemplaris_gca002082055v1": 4339,
|
241 |
+
"ictalurus_punctatus": 4340,
|
242 |
+
"ictidomys_tridecemlineatus": 4341,
|
243 |
+
"ixodes_persulcatus_gca013358835v1": 4342,
|
244 |
+
"ixodes_scapularis_gca016920785v2": 4343,
|
245 |
+
"jaculus_jaculus": 4344,
|
246 |
+
"junco_hyemalis": 4345,
|
247 |
+
"kryptolebias_marmoratus": 4346,
|
248 |
+
"labrus_bergylta": 4347,
|
249 |
+
"larimichthys_crocea": 4348,
|
250 |
+
"lates_calcarifer": 4349,
|
251 |
+
"laticauda_laticaudata": 4350,
|
252 |
+
"latimeria_chalumnae": 4351,
|
253 |
+
"leguminivora_glycinivorella_gca023078275v1rs": 4352,
|
254 |
+
"lepeophtheirus_salmonis_gca016086655v3rs": 4353,
|
255 |
+
"lepidothrix_coronata": 4354,
|
256 |
+
"lepisosteus_oculatus": 4355,
|
257 |
+
"leptinotarsa_decemlineata_gca000500325v2": 4356,
|
258 |
+
"leptobrachium_leishanense": 4357,
|
259 |
+
"leptotrombidium_deliense": 4358,
|
260 |
+
"limulus_polyphemus_gca000517525v1": 4359,
|
261 |
+
"linepithema_humile_gca000217595v1rs": 4360,
|
262 |
+
"lingula_anatina": 4361,
|
263 |
+
"loa_loa": 4362,
|
264 |
+
"lonchura_striata_domestica": 4363,
|
265 |
+
"lottia_gigantea": 4364,
|
266 |
+
"loxodonta_africana": 4365,
|
267 |
+
"lucilia_cuprina_gca022045245v1rs": 4366,
|
268 |
+
"lutzomyia_longipalpis": 4367,
|
269 |
+
"lynx_canadensis": 4368,
|
270 |
+
"lytechinus_variegatus_gca018143015v1": 4369,
|
271 |
+
"macaca_fascicularis": 4370,
|
272 |
+
"macaca_mulatta": 4371,
|
273 |
+
"macaca_nemestrina": 4372,
|
274 |
+
"malurus_cyaneus_samueli": 4373,
|
275 |
+
"manacus_vitellinus": 4374,
|
276 |
+
"mandrillus_leucophaeus": 4375,
|
277 |
+
"manduca_sexta_gca014839805v1rs": 4376,
|
278 |
+
"marmota_marmota_marmota": 4377,
|
279 |
+
"mastacembelus_armatus": 4378,
|
280 |
+
"mayetiola_destructor": 4379,
|
281 |
+
"maylandia_zebra": 4380,
|
282 |
+
"megachile_rotundata_gca000220905v1rs": 4381,
|
283 |
+
"megaselia_scalaris": 4382,
|
284 |
+
"meleagris_gallopavo": 4383,
|
285 |
+
"melitaea_cinxia_gca905220565v1": 4384,
|
286 |
+
"melopsittacus_undulatus": 4385,
|
287 |
+
"mercenaria_mercenaria_gca014805675v2": 4386,
|
288 |
+
"meriones_unguiculatus": 4387,
|
289 |
+
"mesocricetus_auratus": 4388,
|
290 |
+
"microcebus_murinus": 4389,
|
291 |
+
"microtus_ochrogaster": 4390,
|
292 |
+
"mizuhopecten_yessoensis_gca002113885v2": 4391,
|
293 |
+
"mnemiopsis_leidyi": 4392,
|
294 |
+
"mola_mola": 4393,
|
295 |
+
"monodelphis_domestica": 4394,
|
296 |
+
"monodon_monoceros": 4395,
|
297 |
+
"monomorium_pharaonis_gca013373865v2": 4396,
|
298 |
+
"monopterus_albus": 4397,
|
299 |
+
"moschus_moschiferus": 4398,
|
300 |
+
"mus_caroli": 4399,
|
301 |
+
"mus_musculus": 4400,
|
302 |
+
"mus_pahari": 4401,
|
303 |
+
"mus_spicilegus": 4402,
|
304 |
+
"mus_spretus": 4403,
|
305 |
+
"musca_domestica": 4404,
|
306 |
+
"mustela_putorius_furo": 4405,
|
307 |
+
"myotis_lucifugus": 4406,
|
308 |
+
"myripristis_murdjan": 4407,
|
309 |
+
"naja_naja": 4408,
|
310 |
+
"nannospalax_galili": 4409,
|
311 |
+
"nasonia_vitripennis": 4410,
|
312 |
+
"necator_americanus": 4411,
|
313 |
+
"nematostella_vectensis": 4412,
|
314 |
+
"neodiprion_lecontei_gca021901455v1rs": 4413,
|
315 |
+
"neodiprion_pinetum_gca021155775v1rs": 4414,
|
316 |
+
"neogobius_melanostomus": 4415,
|
317 |
+
"neolamprologus_brichardi": 4416,
|
318 |
+
"neovison_vison": 4417,
|
319 |
+
"nilaparvata_lugens_gca014356525v1rs": 4418,
|
320 |
+
"nomascus_leucogenys": 4419,
|
321 |
+
"notamacropus_eugenii": 4420,
|
322 |
+
"notechis_scutatus": 4421,
|
323 |
+
"nothobranchius_furzeri": 4422,
|
324 |
+
"nothoprocta_perdicaria": 4423,
|
325 |
+
"numida_meleagris": 4424,
|
326 |
+
"ochotona_princeps": 4425,
|
327 |
+
"octodon_degus": 4426,
|
328 |
+
"octopus_bimaculoides": 4427,
|
329 |
+
"octopus_sinensis_gca006345805v1": 4428,
|
330 |
+
"onchocerca_volvulus": 4429,
|
331 |
+
"oncorhynchus_kisutch": 4430,
|
332 |
+
"oncorhynchus_mykiss": 4431,
|
333 |
+
"oncorhynchus_tshawytscha": 4432,
|
334 |
+
"onthophagus_taurus_gca000648695v2": 4433,
|
335 |
+
"ooceraea_biroi_gca003672135v1": 4434,
|
336 |
+
"orbicella_faveolata_gca002042975v1": 4435,
|
337 |
+
"orchesella_cincta": 4436,
|
338 |
+
"oreochromis_aureus": 4437,
|
339 |
+
"oreochromis_niloticus": 4438,
|
340 |
+
"ornithorhynchus_anatinus": 4439,
|
341 |
+
"orussus_abietinus_gca000612105v2": 4440,
|
342 |
+
"oryctolagus_cuniculus": 4441,
|
343 |
+
"oryzias_javanicus": 4442,
|
344 |
+
"oryzias_latipes": 4443,
|
345 |
+
"oryzias_melastigma": 4444,
|
346 |
+
"oryzias_sinensis": 4445,
|
347 |
+
"otolemur_garnettii": 4446,
|
348 |
+
"otus_sunia": 4447,
|
349 |
+
"ovis_aries_rambouillet": 4448,
|
350 |
+
"owenia_fusiformis_gca903813345v1": 4449,
|
351 |
+
"pan_paniscus": 4450,
|
352 |
+
"pan_troglodytes": 4451,
|
353 |
+
"panthera_leo": 4452,
|
354 |
+
"panthera_pardus": 4453,
|
355 |
+
"panthera_tigris_altaica": 4454,
|
356 |
+
"papio_anubis": 4455,
|
357 |
+
"parambassis_ranga": 4456,
|
358 |
+
"paramormyrops_kingsleyae": 4457,
|
359 |
+
"parasteatoda_tepidariorum_gca000365465v3": 4458,
|
360 |
+
"parus_major": 4459,
|
361 |
+
"patiria_miniata_gca015706575v1": 4460,
|
362 |
+
"pavo_cristatus": 4461,
|
363 |
+
"pectinophora_gossypiella_gca024362695v1rs": 4462,
|
364 |
+
"pediculus_humanus": 4463,
|
365 |
+
"pelodiscus_sinensis": 4464,
|
366 |
+
"pelusios_castaneus": 4465,
|
367 |
+
"penaeus_chinensis_gca019202785v2rs": 4466,
|
368 |
+
"penaeus_japonicus_gca017312705v1": 4467,
|
369 |
+
"penaeus_monodon_gca015228065v1": 4468,
|
370 |
+
"penaeus_vannamei_gca003789085v1": 4469,
|
371 |
+
"periophthalmus_magnuspinnatus": 4470,
|
372 |
+
"peromyscus_maniculatus_bairdii": 4471,
|
373 |
+
"petromyzon_marinus": 4472,
|
374 |
+
"phascolarctos_cinereus": 4473,
|
375 |
+
"phasianus_colchicus": 4474,
|
376 |
+
"phlebotomus_papatasi": 4475,
|
377 |
+
"phlebotomus_perniciosus_gca918844115v2": 4476,
|
378 |
+
"phocoena_sinus": 4477,
|
379 |
+
"physeter_catodon": 4478,
|
380 |
+
"piliocolobus_tephrosceles": 4479,
|
381 |
+
"pocillopora_damicornis_gca003704095v1": 4480,
|
382 |
+
"podarcis_muralis": 4481,
|
383 |
+
"poecilia_formosa": 4482,
|
384 |
+
"poecilia_latipinna": 4483,
|
385 |
+
"poecilia_mexicana": 4484,
|
386 |
+
"poecilia_reticulata": 4485,
|
387 |
+
"pogona_vitticeps": 4486,
|
388 |
+
"pogonomyrmex_barbatus_gca000187915v1rs": 4487,
|
389 |
+
"polistes_canadensis_gca001313835v1rs": 4488,
|
390 |
+
"polistes_dominula_gca001465965v1rs": 4489,
|
391 |
+
"polistes_fuscatus_gca010416935v1rs": 4490,
|
392 |
+
"pollicipes_pollicipes_gca011947565v2": 4491,
|
393 |
+
"pomacea_canaliculata_gca003073045v1": 4492,
|
394 |
+
"pomphorhynchus_laevis_gca012934845v2gb": 4493,
|
395 |
+
"pongo_abelii": 4494,
|
396 |
+
"portunus_trituberculatus_gca017591435v1": 4495,
|
397 |
+
"priapulus_caudatus_gca000485595v2": 4496,
|
398 |
+
"pristionchus_pacificus": 4497,
|
399 |
+
"procambarus_clarkii_gca020424385v2": 4498,
|
400 |
+
"procavia_capensis": 4499,
|
401 |
+
"prolemur_simus": 4500,
|
402 |
+
"propithecus_coquereli": 4501,
|
403 |
+
"pseudonaja_textilis": 4502,
|
404 |
+
"pteropus_vampyrus": 4503,
|
405 |
+
"pundamilia_nyererei": 4504,
|
406 |
+
"pygocentrus_nattereri": 4505,
|
407 |
+
"rattus_norvegicus_wkybbb": 4506,
|
408 |
+
"rhagoletis_pomonella_gca013731165v1": 4507,
|
409 |
+
"rhinolophus_ferrumequinum": 4508,
|
410 |
+
"rhinopithecus_bieti": 4509,
|
411 |
+
"rhinopithecus_roxellana": 4510,
|
412 |
+
"rhipicephalus_microplus_gca013339725v1": 4511,
|
413 |
+
"rhipicephalus_sanguineus_gca013339695v1": 4512,
|
414 |
+
"rhodnius_prolixus": 4513,
|
415 |
+
"rhopalosiphum_maidis_gca003676215v3": 4514,
|
416 |
+
"saccharomyces_cerevisiae": 4515,
|
417 |
+
"saccoglossus_kowalevskii_gca000003605v1": 4516,
|
418 |
+
"saimiri_boliviensis_boliviensis": 4517,
|
419 |
+
"salarias_fasciatus": 4518,
|
420 |
+
"salmo_salar": 4519,
|
421 |
+
"salmo_trutta": 4520,
|
422 |
+
"salvator_merianae": 4521,
|
423 |
+
"sander_lucioperca": 4522,
|
424 |
+
"sarcophilus_harrisii": 4523,
|
425 |
+
"sarcoptes_scabiei": 4524,
|
426 |
+
"schistocerca_americana_gca021461395v2rs": 4525,
|
427 |
+
"schistosoma_haematobium_gca000699445v2rs": 4526,
|
428 |
+
"schistosoma_mansoni": 4527,
|
429 |
+
"sciurus_vulgaris": 4528,
|
430 |
+
"scleropages_formosus": 4529,
|
431 |
+
"scophthalmus_maximus": 4530,
|
432 |
+
"serinus_canaria": 4531,
|
433 |
+
"seriola_dumerili": 4532,
|
434 |
+
"seriola_lalandi_dorsalis": 4533,
|
435 |
+
"sinocyclocheilus_anshuiensis": 4534,
|
436 |
+
"sinocyclocheilus_grahami": 4535,
|
437 |
+
"sinocyclocheilus_rhinocerous": 4536,
|
438 |
+
"sipha_flava_gca003268045v1": 4537,
|
439 |
+
"sitophilus_oryzae_gca002938485v2rs": 4538,
|
440 |
+
"solenopsis_invicta": 4539,
|
441 |
+
"sorex_araneus": 4540,
|
442 |
+
"sparus_aurata": 4541,
|
443 |
+
"spermophilus_dauricus": 4542,
|
444 |
+
"sphaeramia_orbicularis": 4543,
|
445 |
+
"sphenodon_punctatus": 4544,
|
446 |
+
"stachyris_ruficeps": 4545,
|
447 |
+
"stegastes_partitus": 4546,
|
448 |
+
"stegodyphus_dumicola_gca010614865v2rs": 4547,
|
449 |
+
"stegodyphus_mimosarum": 4548,
|
450 |
+
"stomoxys_calcitrans": 4549,
|
451 |
+
"strigamia_maritima": 4550,
|
452 |
+
"strigops_habroptila": 4551,
|
453 |
+
"strix_occidentalis_caurina": 4552,
|
454 |
+
"strongylocentrotus_purpuratus": 4553,
|
455 |
+
"strongyloides_ratti": 4554,
|
456 |
+
"struthio_camelus_australis": 4555,
|
457 |
+
"stylophora_pistillata_gca002571385v1": 4556,
|
458 |
+
"suricata_suricatta": 4557,
|
459 |
+
"sus_scrofa_usmarc": 4558,
|
460 |
+
"taeniopygia_guttata": 4559,
|
461 |
+
"takifugu_rubripes": 4560,
|
462 |
+
"teleopsis_dalmanni": 4561,
|
463 |
+
"terrapene_carolina_triunguis": 4562,
|
464 |
+
"tetranychus_urticae": 4563,
|
465 |
+
"tetraodon_nigroviridis": 4564,
|
466 |
+
"thelohanellus_kitauei": 4565,
|
467 |
+
"theropithecus_gelada": 4566,
|
468 |
+
"thrips_palmi_gca012932325v1rs": 4567,
|
469 |
+
"tigriopus_californicus_gca007210705": 4568,
|
470 |
+
"trialeurodes_vaporariorum_gca011764245": 4569,
|
471 |
+
"tribolium_castaneum": 4570,
|
472 |
+
"trichinella_spiralis": 4571,
|
473 |
+
"trichogramma_pretiosum_gca000599845v3": 4572,
|
474 |
+
"trichoplax_adhaerens": 4573,
|
475 |
+
"trichuris_muris": 4574,
|
476 |
+
"tupaia_belangeri": 4575,
|
477 |
+
"tursiops_truncatus": 4576,
|
478 |
+
"urocitellus_parryii": 4577,
|
479 |
+
"ursus_americanus": 4578,
|
480 |
+
"ursus_maritimus": 4579,
|
481 |
+
"ursus_thibetanus_thibetanus": 4580,
|
482 |
+
"varanus_komodoensis": 4581,
|
483 |
+
"varroa_destructor_gca002443255": 4582,
|
484 |
+
"venturia_canescens_gca019457755v1rs": 4583,
|
485 |
+
"vicugna_pacos": 4584,
|
486 |
+
"vombatus_ursinus": 4585,
|
487 |
+
"vulpes_vulpes": 4586,
|
488 |
+
"xenopus_tropicalis": 4587,
|
489 |
+
"xiphophorus_couchianus": 4588,
|
490 |
+
"xiphophorus_maculatus": 4589,
|
491 |
+
"zalophus_californianus": 4590,
|
492 |
+
"zerene_cesonia_gca012273895v2rs": 4591,
|
493 |
+
"zonotrichia_albicollis": 4592,
|
494 |
+
"zootermopsis_nevadensis": 4593,
|
495 |
+
"zosterops_lateralis_melanops": 4594
|
496 |
+
}
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/s/project/denovo-prosit/JohannesHingerl/BERTADN/final_models/huggingface_compatible/rotary_bert_huggingface",
|
3 |
+
"auto_map":
|
4 |
+
{"AutoModel": "modeling_rotarybert.RotaryBertModel",
|
5 |
+
"AutoModelForMaskedLM": "modeling_rotarybert.RotaryBertForMaskedLM"
|
6 |
+
},
|
7 |
+
"attention_probs_dropout_prob": 0.1,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"hidden_act": "gelu_new",
|
10 |
+
"hidden_dropout_prob": 0.1,
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_range": 0.02,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"layer_norm_eps": 1e-12,
|
15 |
+
"max_position_embeddings": 8192,
|
16 |
+
"model_type": "bert",
|
17 |
+
"num_attention_heads": 12,
|
18 |
+
"num_hidden_layers": 12,
|
19 |
+
"pad_token_id": 0,
|
20 |
+
"position_embedding_type": "absolute",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.45.2",
|
23 |
+
"type_vocab_size": 2,
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 4608
|
26 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"pad_token_id": 0,
|
4 |
+
"transformers_version": "4.45.2"
|
5 |
+
}
|
modeling_rotarybert.py
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
from einops import rearrange, repeat
|
3 |
+
from transformers.models.bert.modeling_bert import BertSelfAttention, BertAttention, BertLayer, BertEncoder, BertModel, BertForMaskedLM
|
4 |
+
from typing import List, Optional, Tuple, Union
|
5 |
+
from packaging import version
|
6 |
+
import torch
|
7 |
+
import torch.nn as nn
|
8 |
+
|
9 |
+
def rotate_half(x, interleaved=False):
|
10 |
+
if not interleaved:
|
11 |
+
x1, x2 = x.chunk(2, dim=-1)
|
12 |
+
return torch.cat((-x2, x1), dim=-1)
|
13 |
+
else:
|
14 |
+
x1, x2 = x[..., ::2], x[..., 1::2]
|
15 |
+
return rearrange(torch.stack((-x2, x1), dim=-1), "... d two -> ... (d two)", two=2)
|
16 |
+
|
17 |
+
|
18 |
+
def apply_rotary_emb_torch(x, cos, sin, interleaved=False):
|
19 |
+
"""
|
20 |
+
x: (batch_size, seqlen, nheads, headdim)
|
21 |
+
cos, sin: (seqlen, rotary_dim / 2) or (batch_size, seqlen, rotary_dim / 2)
|
22 |
+
"""
|
23 |
+
ro_dim = cos.shape[-1] * 2
|
24 |
+
assert ro_dim <= x.shape[-1]
|
25 |
+
cos = repeat(cos, "... d -> ... 1 (2 d)" if not interleaved else "... d -> ... 1 (d 2)")
|
26 |
+
sin = repeat(sin, "... d -> ... 1 (2 d)" if not interleaved else "... d -> ... 1 (d 2)")
|
27 |
+
return torch.cat(
|
28 |
+
[x[..., :ro_dim] * cos + rotate_half(x[..., :ro_dim], interleaved) * sin, x[..., ro_dim:]],
|
29 |
+
dim=-1,
|
30 |
+
)
|
31 |
+
|
32 |
+
|
33 |
+
def generate_cos_sin(seqlen, rotary_dim, device, dtype):
|
34 |
+
inv_freq = 1.0 / (
|
35 |
+
10000.0
|
36 |
+
** (torch.arange(0, rotary_dim, 2, device=device, dtype=torch.float32) / rotary_dim)
|
37 |
+
)
|
38 |
+
t = torch.arange(seqlen, device=device, dtype=torch.float32)
|
39 |
+
freqs = torch.outer(t, inv_freq)
|
40 |
+
cos = torch.cos(freqs).to(dtype)
|
41 |
+
sin = torch.sin(freqs).to(dtype)
|
42 |
+
return cos, sin
|
43 |
+
|
44 |
+
# from transformers.models.roformer import RoFormerSinusoidalPositionalEmbedding
|
45 |
+
|
46 |
+
|
47 |
+
class RotaryBertSdpaSelfAttention(BertSelfAttention):
|
48 |
+
def __init__(self, config, position_embedding_type=None):
|
49 |
+
super().__init__(config, position_embedding_type=position_embedding_type)
|
50 |
+
self.dropout_prob = config.attention_probs_dropout_prob
|
51 |
+
self.require_contiguous_qkv = False
|
52 |
+
# self.rotary_sinuses = RoFormerSinusoidalPositionalEmbedding(config.max_position_embeddings)
|
53 |
+
|
54 |
+
# Adapted from BertSelfAttention
|
55 |
+
def forward(
|
56 |
+
self,
|
57 |
+
hidden_states: torch.Tensor,
|
58 |
+
attention_mask = None,
|
59 |
+
head_mask = None,
|
60 |
+
encoder_hidden_states = None,
|
61 |
+
encoder_attention_mask = None,
|
62 |
+
past_key_value = None,
|
63 |
+
output_attentions = False,
|
64 |
+
) -> Tuple[torch.Tensor]:
|
65 |
+
if output_attentions or head_mask is not None:
|
66 |
+
# TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once implemented.
|
67 |
+
logger.warning_once(
|
68 |
+
"BertSdpaSelfAttention is used but `torch.nn.functional.scaled_dot_product_attention` does not support "
|
69 |
+
"non-absolute `position_embedding_type` or `output_attentions=True` or `head_mask`. Falling back to "
|
70 |
+
"the manual attention implementation, but specifying the manual implementation will be required from "
|
71 |
+
"Transformers version v5.0.0 onwards. This warning can be removed using the argument "
|
72 |
+
'`attn_implementation="eager"` when loading the model.'
|
73 |
+
)
|
74 |
+
|
75 |
+
|
76 |
+
bsz, tgt_len, _ = hidden_states.size()
|
77 |
+
|
78 |
+
query_layer = self.query(hidden_states)
|
79 |
+
|
80 |
+
|
81 |
+
is_cross_attention = encoder_hidden_states is not None
|
82 |
+
|
83 |
+
current_states = encoder_hidden_states if is_cross_attention else hidden_states
|
84 |
+
attention_mask = encoder_attention_mask if is_cross_attention else attention_mask
|
85 |
+
|
86 |
+
# Check `seq_length` of `past_key_value` == `len(current_states)` to support prefix tuning
|
87 |
+
|
88 |
+
key_layer = self.key(current_states)
|
89 |
+
value_layer = self.value(current_states)
|
90 |
+
|
91 |
+
|
92 |
+
query_layer = self.transpose_for_scores(query_layer)
|
93 |
+
key_layer = self.transpose_for_scores(key_layer)
|
94 |
+
value_layer = self.transpose_for_scores(value_layer)
|
95 |
+
|
96 |
+
|
97 |
+
|
98 |
+
query_layer, key_layer = query_layer.permute(0,2,1,3), key_layer.permute(0,2,1,3)
|
99 |
+
|
100 |
+
|
101 |
+
cos, sin = generate_cos_sin(query_layer.shape[1], query_layer.shape[-1], device = query_layer.device, dtype = torch.float32)
|
102 |
+
|
103 |
+
|
104 |
+
query_layer, key_layer = apply_rotary_emb_torch(query_layer, cos, sin), apply_rotary_emb_torch(key_layer, cos, sin)
|
105 |
+
|
106 |
+
query_layer, key_layer = query_layer.permute(0,2,1,3), key_layer.permute(0,2,1,3)
|
107 |
+
|
108 |
+
|
109 |
+
# SDPA with memory-efficient backend is broken in torch==2.1.2 when using non-contiguous inputs and a custom
|
110 |
+
# attn_mask, so we need to call `.contiguous()` here. This was fixed in torch==2.2.0.
|
111 |
+
# Reference: https://github.com/pytorch/pytorch/issues/112577
|
112 |
+
if self.require_contiguous_qkv and query_layer.device.type == "cuda" and attention_mask is not None:
|
113 |
+
query_layer = query_layer.contiguous()
|
114 |
+
key_layer = key_layer.contiguous()
|
115 |
+
value_layer = value_layer.contiguous()
|
116 |
+
|
117 |
+
|
118 |
+
is_causal = (
|
119 |
+
True if self.is_decoder and not is_cross_attention and attention_mask is None and tgt_len > 1 else False
|
120 |
+
)
|
121 |
+
attn_output = torch.nn.functional.scaled_dot_product_attention(
|
122 |
+
query_layer,
|
123 |
+
key_layer,
|
124 |
+
value_layer,
|
125 |
+
attn_mask=attention_mask,
|
126 |
+
dropout_p=self.dropout_prob if self.training else 0.0,
|
127 |
+
is_causal=is_causal,
|
128 |
+
)
|
129 |
+
|
130 |
+
attn_output = attn_output.transpose(1, 2)
|
131 |
+
attn_output = attn_output.reshape(bsz, tgt_len, self.all_head_size)
|
132 |
+
|
133 |
+
outputs = (attn_output,)
|
134 |
+
if self.is_decoder:
|
135 |
+
outputs = outputs + (past_key_value,)
|
136 |
+
return outputs
|
137 |
+
|
138 |
+
|
139 |
+
class RotaryBertAttention(BertAttention):
|
140 |
+
def __init__(self, config):
|
141 |
+
super().__init__(config)
|
142 |
+
self.self = RotaryBertSdpaSelfAttention(config)
|
143 |
+
|
144 |
+
class RotaryBertLayer(BertLayer):
|
145 |
+
def __init__(self, config):
|
146 |
+
super().__init__(config)
|
147 |
+
self.attention = RotaryBertAttention(config)
|
148 |
+
|
149 |
+
class RotaryBertEncoder(BertEncoder):
|
150 |
+
def __init__(self, config):
|
151 |
+
super().__init__(config)
|
152 |
+
self.layer = nn.ModuleList([RotaryBertLayer(config) for _ in range(config.num_hidden_layers)])
|
153 |
+
|
154 |
+
class RotaryBertModel(BertModel):
|
155 |
+
def __init__(self, config):
|
156 |
+
super().__init__(config)
|
157 |
+
self.encoder = RotaryBertEncoder(config)
|
158 |
+
|
159 |
+
class RotaryBertForMaskedLM(BertForMaskedLM):
|
160 |
+
def __init__(self, config):
|
161 |
+
super().__init__(config)
|
162 |
+
self.bert = RotaryBertModel(config)
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78bb502f89c7f13649c5787f672f045618affb36ec0d70bdca90c972877ba64c
|
3 |
+
size 398592581
|
special_tokens_map.json
ADDED
@@ -0,0 +1,503 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"acanthaster_planci_gca001949145v1",
|
4 |
+
"acanthochromis_polyacanthus",
|
5 |
+
"accipiter_nisus",
|
6 |
+
"acromyrmex_echinatior_gca000204515v1rs",
|
7 |
+
"acropora_millepora_gca013753865v1",
|
8 |
+
"actinia_equina_gca011057435",
|
9 |
+
"actinia_tenebrosa_gca009602425v1",
|
10 |
+
"acyrthosiphon_pisum",
|
11 |
+
"adineta_vaga",
|
12 |
+
"aedes_aegypti_lvpagwg",
|
13 |
+
"aedes_albopictus",
|
14 |
+
"agrilus_planipennis_gca000699045v2",
|
15 |
+
"ailuropoda_melanoleuca",
|
16 |
+
"amazona_collaria",
|
17 |
+
"amphibalanus_amphitrite_gca019059575v1",
|
18 |
+
"amphilophus_citrinellus",
|
19 |
+
"amphimedon_queenslandica",
|
20 |
+
"amphiprion_ocellaris",
|
21 |
+
"amphiprion_percula",
|
22 |
+
"amyelois_transitella_gca001186105v1rs",
|
23 |
+
"anabas_testudineus",
|
24 |
+
"anas_platyrhynchos",
|
25 |
+
"anas_zonorhyncha",
|
26 |
+
"anneissia_japonica_gca011630105v1",
|
27 |
+
"anolis_carolinensis",
|
28 |
+
"anopheles_albimanus",
|
29 |
+
"anopheles_arabiensis",
|
30 |
+
"anopheles_atroparvus_gca914969975",
|
31 |
+
"anopheles_christyi",
|
32 |
+
"anopheles_coluzzii_ngousso",
|
33 |
+
"anopheles_culicifacies",
|
34 |
+
"anopheles_darlingi",
|
35 |
+
"anopheles_dirus",
|
36 |
+
"anopheles_epiroticus",
|
37 |
+
"anopheles_farauti",
|
38 |
+
"anopheles_funestus",
|
39 |
+
"anopheles_gambiae",
|
40 |
+
"anopheles_maculatus",
|
41 |
+
"anopheles_melas",
|
42 |
+
"anopheles_merus",
|
43 |
+
"anopheles_minimus",
|
44 |
+
"anopheles_quadriannulatus",
|
45 |
+
"anopheles_sinensis_china",
|
46 |
+
"anopheles_stephensi",
|
47 |
+
"anoplophora_glabripennis",
|
48 |
+
"anser_brachyrhynchus",
|
49 |
+
"anser_cygnoides",
|
50 |
+
"anthonomus_grandis_gca022605725v3rs",
|
51 |
+
"aotus_nancymaae",
|
52 |
+
"aphidius_gifuensis_gca014905175v1",
|
53 |
+
"apis_dorsata_gca000469605v1rs",
|
54 |
+
"apis_florea_gca000184785v2rs",
|
55 |
+
"apis_mellifera",
|
56 |
+
"aplysia_californica_gca000002075v2",
|
57 |
+
"apteryx_haastii",
|
58 |
+
"apteryx_owenii",
|
59 |
+
"apteryx_rowi",
|
60 |
+
"aquila_chrysaetos_chrysaetos",
|
61 |
+
"ascaris_suum",
|
62 |
+
"astatotilapia_calliptera",
|
63 |
+
"asterias_rubens_gca902459465v3",
|
64 |
+
"astyanax_mexicanus",
|
65 |
+
"athalia_rosae_gca917208135v1",
|
66 |
+
"athene_cunicularia",
|
67 |
+
"atta_cephalotes",
|
68 |
+
"bactrocera_dorsalis_gca000789215v2",
|
69 |
+
"bactrocera_latifrons_gca001853355v1",
|
70 |
+
"bactrocera_tryoni_gca016617805v2",
|
71 |
+
"balaenoptera_musculus",
|
72 |
+
"belgica_antarctica",
|
73 |
+
"bemisia_tabaci_ssa3nig",
|
74 |
+
"betta_splendens",
|
75 |
+
"bicyclus_anynana_gca900239965v1rs",
|
76 |
+
"biomphalaria_glabrata",
|
77 |
+
"bison_bison_bison",
|
78 |
+
"bombus_impatiens",
|
79 |
+
"bombus_terrestris_gca910591885v2",
|
80 |
+
"bombyx_mandarina_gca003987935v1rs",
|
81 |
+
"bombyx_mori",
|
82 |
+
"bos_grunniens",
|
83 |
+
"bos_indicus_hybrid",
|
84 |
+
"bos_mutus",
|
85 |
+
"bos_taurus_hybrid",
|
86 |
+
"branchiostoma_lanceolatum",
|
87 |
+
"brugia_malayi",
|
88 |
+
"bubo_bubo",
|
89 |
+
"buteo_japonicus",
|
90 |
+
"caenorhabditis_brenneri",
|
91 |
+
"caenorhabditis_briggsae",
|
92 |
+
"caenorhabditis_elegans",
|
93 |
+
"caenorhabditis_japonica",
|
94 |
+
"caenorhabditis_remanei",
|
95 |
+
"cairina_moschata_domestica",
|
96 |
+
"calidris_pugnax",
|
97 |
+
"calidris_pygmaea",
|
98 |
+
"callithrix_jacchus",
|
99 |
+
"callorhinchus_milii",
|
100 |
+
"camarhynchus_parvulus",
|
101 |
+
"camelus_dromedarius",
|
102 |
+
"camponotus_floridanus_gca003227725v1rs",
|
103 |
+
"canis_lupus_familiarisgsd",
|
104 |
+
"capitella_teleta",
|
105 |
+
"capra_hircus",
|
106 |
+
"carassius_auratus",
|
107 |
+
"carlito_syrichta",
|
108 |
+
"castor_canadensis",
|
109 |
+
"catagonus_wagneri",
|
110 |
+
"catharus_ustulatus",
|
111 |
+
"cavia_aperea",
|
112 |
+
"cavia_porcellus",
|
113 |
+
"cebus_imitator",
|
114 |
+
"centruroides_sculpturatus_gca000671375v2",
|
115 |
+
"ceratitis_capitata_gca000347755v4",
|
116 |
+
"cercocebus_atys",
|
117 |
+
"cervus_hanglu_yarkandensis",
|
118 |
+
"chelonoidis_abingdonii",
|
119 |
+
"chelonus_insularis_gca013357705v1rs",
|
120 |
+
"chelydra_serpentina",
|
121 |
+
"chinchilla_lanigera",
|
122 |
+
"chlorocebus_sabaeus",
|
123 |
+
"choloepus_hoffmanni",
|
124 |
+
"chrysemys_picta_bellii",
|
125 |
+
"chrysolophus_pictus",
|
126 |
+
"cimex_lectularius",
|
127 |
+
"ciona_intestinalis",
|
128 |
+
"ciona_savignyi",
|
129 |
+
"clupea_harengus",
|
130 |
+
"clytia_hemisphaerica_gca902728285",
|
131 |
+
"colobus_angolensis_palliatus",
|
132 |
+
"copidosoma_floridanum_gca000648655v2",
|
133 |
+
"corvus_moneduloides",
|
134 |
+
"cotesia_glomerata_gca020080835v1",
|
135 |
+
"cottoperca_gobio",
|
136 |
+
"coturnix_japonica",
|
137 |
+
"crassostrea_gigas",
|
138 |
+
"crassostrea_virginica_gca002022765v4",
|
139 |
+
"cricetulus_griseus_picr",
|
140 |
+
"crocodylus_porosus",
|
141 |
+
"culex_quinquefasciatus_gca015732765v1",
|
142 |
+
"culicoides_sonorensis",
|
143 |
+
"cyanistes_caeruleus",
|
144 |
+
"cyclopterus_lumpus",
|
145 |
+
"cynoglossus_semilaevis",
|
146 |
+
"cyprinodon_variegatus",
|
147 |
+
"cyprinus_carpio_carpio",
|
148 |
+
"danaus_plexippus",
|
149 |
+
"danio_rerio",
|
150 |
+
"daphnia_magna_gca020631705v2",
|
151 |
+
"daphnia_pulex",
|
152 |
+
"daphnia_pulicaria_gca021234035v2rs",
|
153 |
+
"dasypus_novemcinctus",
|
154 |
+
"delphinapterus_leucas",
|
155 |
+
"dendroctonus_ponderosae_gca000355655v1",
|
156 |
+
"dendronephthya_gigantea_gca004324835v1",
|
157 |
+
"denticeps_clupeoides",
|
158 |
+
"dermacentor_andersoni_gca023375885v2rs",
|
159 |
+
"dermacentor_silvarum_gca013339745v1",
|
160 |
+
"dermatophagoides_pteronyssinus_gca001901225v2",
|
161 |
+
"diabrotica_virgifera_gca917563875v2rs",
|
162 |
+
"dicentrarchus_labrax",
|
163 |
+
"dimorphilus_gyrociliatus_gca904063045v1",
|
164 |
+
"dinothrombium_tinctorium",
|
165 |
+
"dipodomys_ordii",
|
166 |
+
"diuraphis_noxia_gca001186385v1",
|
167 |
+
"dromaius_novaehollandiae",
|
168 |
+
"drosophila_ananassae",
|
169 |
+
"drosophila_erecta",
|
170 |
+
"drosophila_grimshawi",
|
171 |
+
"drosophila_melanogaster",
|
172 |
+
"drosophila_mojavensis",
|
173 |
+
"drosophila_persimilis",
|
174 |
+
"drosophila_pseudoobscura",
|
175 |
+
"drosophila_sechellia",
|
176 |
+
"drosophila_simulans",
|
177 |
+
"drosophila_virilis",
|
178 |
+
"drosophila_willistoni",
|
179 |
+
"drosophila_yakuba",
|
180 |
+
"dufourea_novaeangliae_gca001272555v1rs",
|
181 |
+
"echeneis_naucrates",
|
182 |
+
"echinococcus_granulosus_gca000524195v1rs",
|
183 |
+
"echinops_telfairi",
|
184 |
+
"electrophorus_electricus",
|
185 |
+
"eptatretus_burgeri",
|
186 |
+
"equus_asinus",
|
187 |
+
"equus_caballus",
|
188 |
+
"erinaceus_europaeus",
|
189 |
+
"erpetoichthys_calabaricus",
|
190 |
+
"erythrura_gouldiae",
|
191 |
+
"esox_lucius",
|
192 |
+
"eufriesea_mexicana_gca001483705v1rs",
|
193 |
+
"eurytemora_affinis_gca000591075v2",
|
194 |
+
"exaiptasia_diaphana_gca001417965v1",
|
195 |
+
"falco_tinnunculus",
|
196 |
+
"felis_catus",
|
197 |
+
"ficedula_albicollis",
|
198 |
+
"folsomia_candida",
|
199 |
+
"fukomys_damarensis",
|
200 |
+
"fundulus_heteroclitus",
|
201 |
+
"gadus_morhua",
|
202 |
+
"galendromus_occidentalis_gca000255335v2rs",
|
203 |
+
"galleria_mellonella_gca003640425v2rs",
|
204 |
+
"gallus_gallus_gca000002315v5",
|
205 |
+
"gambusia_affinis",
|
206 |
+
"gasterosteus_aculeatus",
|
207 |
+
"geospiza_fortis",
|
208 |
+
"gigantopelta_aegis_gca016097555v1",
|
209 |
+
"glossina_austeni",
|
210 |
+
"glossina_brevipalpis",
|
211 |
+
"glossina_fuscipes",
|
212 |
+
"glossina_morsitans",
|
213 |
+
"glossina_pallidipes",
|
214 |
+
"glossina_palpalis",
|
215 |
+
"gopherus_agassizii",
|
216 |
+
"gopherus_evgoodei",
|
217 |
+
"gorilla_gorilla",
|
218 |
+
"gouania_willdenowi",
|
219 |
+
"habropoda_laboriosa_gca001263275v1rs",
|
220 |
+
"haemaphysalis_longicornis_gca013339765v1",
|
221 |
+
"haliotis_rubra_gca003918875v1rs",
|
222 |
+
"haliotis_rufescens_gca023055435v1rs",
|
223 |
+
"haplochromis_burtoni",
|
224 |
+
"harpegnathos_saltator_gca003227715v2rs",
|
225 |
+
"heliconius_melpomene",
|
226 |
+
"helicoverpa_armigera_gca023701775v1rs",
|
227 |
+
"helicoverpa_zea_gca022581195v1rs",
|
228 |
+
"helobdella_robusta",
|
229 |
+
"hermetia_illucens_gca905115235v1",
|
230 |
+
"heterocephalus_glaber_male",
|
231 |
+
"hippocampus_comes",
|
232 |
+
"hofstenia_miamia",
|
233 |
+
"homalodisca_vitripennis_gca021130785v2rs",
|
234 |
+
"homarus_americanus_gca018991925v1",
|
235 |
+
"homo_sapiens",
|
236 |
+
"hucho_hucho",
|
237 |
+
"hyalella_azteca_gca000764305v2",
|
238 |
+
"hyalomma_asiaticum_gca013339685v1",
|
239 |
+
"hydra_vulgaris_gca022113875v1rs",
|
240 |
+
"hymenolepis_microstoma",
|
241 |
+
"hypsibius_exemplaris_gca002082055v1",
|
242 |
+
"ictalurus_punctatus",
|
243 |
+
"ictidomys_tridecemlineatus",
|
244 |
+
"ixodes_persulcatus_gca013358835v1",
|
245 |
+
"ixodes_scapularis_gca016920785v2",
|
246 |
+
"jaculus_jaculus",
|
247 |
+
"junco_hyemalis",
|
248 |
+
"kryptolebias_marmoratus",
|
249 |
+
"labrus_bergylta",
|
250 |
+
"larimichthys_crocea",
|
251 |
+
"lates_calcarifer",
|
252 |
+
"laticauda_laticaudata",
|
253 |
+
"latimeria_chalumnae",
|
254 |
+
"leguminivora_glycinivorella_gca023078275v1rs",
|
255 |
+
"lepeophtheirus_salmonis_gca016086655v3rs",
|
256 |
+
"lepidothrix_coronata",
|
257 |
+
"lepisosteus_oculatus",
|
258 |
+
"leptinotarsa_decemlineata_gca000500325v2",
|
259 |
+
"leptobrachium_leishanense",
|
260 |
+
"leptotrombidium_deliense",
|
261 |
+
"limulus_polyphemus_gca000517525v1",
|
262 |
+
"linepithema_humile_gca000217595v1rs",
|
263 |
+
"lingula_anatina",
|
264 |
+
"loa_loa",
|
265 |
+
"lonchura_striata_domestica",
|
266 |
+
"lottia_gigantea",
|
267 |
+
"loxodonta_africana",
|
268 |
+
"lucilia_cuprina_gca022045245v1rs",
|
269 |
+
"lutzomyia_longipalpis",
|
270 |
+
"lynx_canadensis",
|
271 |
+
"lytechinus_variegatus_gca018143015v1",
|
272 |
+
"macaca_fascicularis",
|
273 |
+
"macaca_mulatta",
|
274 |
+
"macaca_nemestrina",
|
275 |
+
"malurus_cyaneus_samueli",
|
276 |
+
"manacus_vitellinus",
|
277 |
+
"mandrillus_leucophaeus",
|
278 |
+
"manduca_sexta_gca014839805v1rs",
|
279 |
+
"marmota_marmota_marmota",
|
280 |
+
"mastacembelus_armatus",
|
281 |
+
"mayetiola_destructor",
|
282 |
+
"maylandia_zebra",
|
283 |
+
"megachile_rotundata_gca000220905v1rs",
|
284 |
+
"megaselia_scalaris",
|
285 |
+
"meleagris_gallopavo",
|
286 |
+
"melitaea_cinxia_gca905220565v1",
|
287 |
+
"melopsittacus_undulatus",
|
288 |
+
"mercenaria_mercenaria_gca014805675v2",
|
289 |
+
"meriones_unguiculatus",
|
290 |
+
"mesocricetus_auratus",
|
291 |
+
"microcebus_murinus",
|
292 |
+
"microtus_ochrogaster",
|
293 |
+
"mizuhopecten_yessoensis_gca002113885v2",
|
294 |
+
"mnemiopsis_leidyi",
|
295 |
+
"mola_mola",
|
296 |
+
"monodelphis_domestica",
|
297 |
+
"monodon_monoceros",
|
298 |
+
"monomorium_pharaonis_gca013373865v2",
|
299 |
+
"monopterus_albus",
|
300 |
+
"moschus_moschiferus",
|
301 |
+
"mus_caroli",
|
302 |
+
"mus_musculus",
|
303 |
+
"mus_pahari",
|
304 |
+
"mus_spicilegus",
|
305 |
+
"mus_spretus",
|
306 |
+
"musca_domestica",
|
307 |
+
"mustela_putorius_furo",
|
308 |
+
"myotis_lucifugus",
|
309 |
+
"myripristis_murdjan",
|
310 |
+
"naja_naja",
|
311 |
+
"nannospalax_galili",
|
312 |
+
"nasonia_vitripennis",
|
313 |
+
"necator_americanus",
|
314 |
+
"nematostella_vectensis",
|
315 |
+
"neodiprion_lecontei_gca021901455v1rs",
|
316 |
+
"neodiprion_pinetum_gca021155775v1rs",
|
317 |
+
"neogobius_melanostomus",
|
318 |
+
"neolamprologus_brichardi",
|
319 |
+
"neovison_vison",
|
320 |
+
"nilaparvata_lugens_gca014356525v1rs",
|
321 |
+
"nomascus_leucogenys",
|
322 |
+
"notamacropus_eugenii",
|
323 |
+
"notechis_scutatus",
|
324 |
+
"nothobranchius_furzeri",
|
325 |
+
"nothoprocta_perdicaria",
|
326 |
+
"numida_meleagris",
|
327 |
+
"ochotona_princeps",
|
328 |
+
"octodon_degus",
|
329 |
+
"octopus_bimaculoides",
|
330 |
+
"octopus_sinensis_gca006345805v1",
|
331 |
+
"onchocerca_volvulus",
|
332 |
+
"oncorhynchus_kisutch",
|
333 |
+
"oncorhynchus_mykiss",
|
334 |
+
"oncorhynchus_tshawytscha",
|
335 |
+
"onthophagus_taurus_gca000648695v2",
|
336 |
+
"ooceraea_biroi_gca003672135v1",
|
337 |
+
"orbicella_faveolata_gca002042975v1",
|
338 |
+
"orchesella_cincta",
|
339 |
+
"oreochromis_aureus",
|
340 |
+
"oreochromis_niloticus",
|
341 |
+
"ornithorhynchus_anatinus",
|
342 |
+
"orussus_abietinus_gca000612105v2",
|
343 |
+
"oryctolagus_cuniculus",
|
344 |
+
"oryzias_javanicus",
|
345 |
+
"oryzias_latipes",
|
346 |
+
"oryzias_melastigma",
|
347 |
+
"oryzias_sinensis",
|
348 |
+
"otolemur_garnettii",
|
349 |
+
"otus_sunia",
|
350 |
+
"ovis_aries_rambouillet",
|
351 |
+
"owenia_fusiformis_gca903813345v1",
|
352 |
+
"pan_paniscus",
|
353 |
+
"pan_troglodytes",
|
354 |
+
"panthera_leo",
|
355 |
+
"panthera_pardus",
|
356 |
+
"panthera_tigris_altaica",
|
357 |
+
"papio_anubis",
|
358 |
+
"parambassis_ranga",
|
359 |
+
"paramormyrops_kingsleyae",
|
360 |
+
"parasteatoda_tepidariorum_gca000365465v3",
|
361 |
+
"parus_major",
|
362 |
+
"patiria_miniata_gca015706575v1",
|
363 |
+
"pavo_cristatus",
|
364 |
+
"pectinophora_gossypiella_gca024362695v1rs",
|
365 |
+
"pediculus_humanus",
|
366 |
+
"pelodiscus_sinensis",
|
367 |
+
"pelusios_castaneus",
|
368 |
+
"penaeus_chinensis_gca019202785v2rs",
|
369 |
+
"penaeus_japonicus_gca017312705v1",
|
370 |
+
"penaeus_monodon_gca015228065v1",
|
371 |
+
"penaeus_vannamei_gca003789085v1",
|
372 |
+
"periophthalmus_magnuspinnatus",
|
373 |
+
"peromyscus_maniculatus_bairdii",
|
374 |
+
"petromyzon_marinus",
|
375 |
+
"phascolarctos_cinereus",
|
376 |
+
"phasianus_colchicus",
|
377 |
+
"phlebotomus_papatasi",
|
378 |
+
"phlebotomus_perniciosus_gca918844115v2",
|
379 |
+
"phocoena_sinus",
|
380 |
+
"physeter_catodon",
|
381 |
+
"piliocolobus_tephrosceles",
|
382 |
+
"pocillopora_damicornis_gca003704095v1",
|
383 |
+
"podarcis_muralis",
|
384 |
+
"poecilia_formosa",
|
385 |
+
"poecilia_latipinna",
|
386 |
+
"poecilia_mexicana",
|
387 |
+
"poecilia_reticulata",
|
388 |
+
"pogona_vitticeps",
|
389 |
+
"pogonomyrmex_barbatus_gca000187915v1rs",
|
390 |
+
"polistes_canadensis_gca001313835v1rs",
|
391 |
+
"polistes_dominula_gca001465965v1rs",
|
392 |
+
"polistes_fuscatus_gca010416935v1rs",
|
393 |
+
"pollicipes_pollicipes_gca011947565v2",
|
394 |
+
"pomacea_canaliculata_gca003073045v1",
|
395 |
+
"pomphorhynchus_laevis_gca012934845v2gb",
|
396 |
+
"pongo_abelii",
|
397 |
+
"portunus_trituberculatus_gca017591435v1",
|
398 |
+
"priapulus_caudatus_gca000485595v2",
|
399 |
+
"pristionchus_pacificus",
|
400 |
+
"procambarus_clarkii_gca020424385v2",
|
401 |
+
"procavia_capensis",
|
402 |
+
"prolemur_simus",
|
403 |
+
"propithecus_coquereli",
|
404 |
+
"pseudonaja_textilis",
|
405 |
+
"pteropus_vampyrus",
|
406 |
+
"pundamilia_nyererei",
|
407 |
+
"pygocentrus_nattereri",
|
408 |
+
"rattus_norvegicus_wkybbb",
|
409 |
+
"rhagoletis_pomonella_gca013731165v1",
|
410 |
+
"rhinolophus_ferrumequinum",
|
411 |
+
"rhinopithecus_bieti",
|
412 |
+
"rhinopithecus_roxellana",
|
413 |
+
"rhipicephalus_microplus_gca013339725v1",
|
414 |
+
"rhipicephalus_sanguineus_gca013339695v1",
|
415 |
+
"rhodnius_prolixus",
|
416 |
+
"rhopalosiphum_maidis_gca003676215v3",
|
417 |
+
"saccharomyces_cerevisiae",
|
418 |
+
"saccoglossus_kowalevskii_gca000003605v1",
|
419 |
+
"saimiri_boliviensis_boliviensis",
|
420 |
+
"salarias_fasciatus",
|
421 |
+
"salmo_salar",
|
422 |
+
"salmo_trutta",
|
423 |
+
"salvator_merianae",
|
424 |
+
"sander_lucioperca",
|
425 |
+
"sarcophilus_harrisii",
|
426 |
+
"sarcoptes_scabiei",
|
427 |
+
"schistocerca_americana_gca021461395v2rs",
|
428 |
+
"schistosoma_haematobium_gca000699445v2rs",
|
429 |
+
"schistosoma_mansoni",
|
430 |
+
"sciurus_vulgaris",
|
431 |
+
"scleropages_formosus",
|
432 |
+
"scophthalmus_maximus",
|
433 |
+
"serinus_canaria",
|
434 |
+
"seriola_dumerili",
|
435 |
+
"seriola_lalandi_dorsalis",
|
436 |
+
"sinocyclocheilus_anshuiensis",
|
437 |
+
"sinocyclocheilus_grahami",
|
438 |
+
"sinocyclocheilus_rhinocerous",
|
439 |
+
"sipha_flava_gca003268045v1",
|
440 |
+
"sitophilus_oryzae_gca002938485v2rs",
|
441 |
+
"solenopsis_invicta",
|
442 |
+
"sorex_araneus",
|
443 |
+
"sparus_aurata",
|
444 |
+
"spermophilus_dauricus",
|
445 |
+
"sphaeramia_orbicularis",
|
446 |
+
"sphenodon_punctatus",
|
447 |
+
"stachyris_ruficeps",
|
448 |
+
"stegastes_partitus",
|
449 |
+
"stegodyphus_dumicola_gca010614865v2rs",
|
450 |
+
"stegodyphus_mimosarum",
|
451 |
+
"stomoxys_calcitrans",
|
452 |
+
"strigamia_maritima",
|
453 |
+
"strigops_habroptila",
|
454 |
+
"strix_occidentalis_caurina",
|
455 |
+
"strongylocentrotus_purpuratus",
|
456 |
+
"strongyloides_ratti",
|
457 |
+
"struthio_camelus_australis",
|
458 |
+
"stylophora_pistillata_gca002571385v1",
|
459 |
+
"suricata_suricatta",
|
460 |
+
"sus_scrofa_usmarc",
|
461 |
+
"taeniopygia_guttata",
|
462 |
+
"takifugu_rubripes",
|
463 |
+
"teleopsis_dalmanni",
|
464 |
+
"terrapene_carolina_triunguis",
|
465 |
+
"tetranychus_urticae",
|
466 |
+
"tetraodon_nigroviridis",
|
467 |
+
"thelohanellus_kitauei",
|
468 |
+
"theropithecus_gelada",
|
469 |
+
"thrips_palmi_gca012932325v1rs",
|
470 |
+
"tigriopus_californicus_gca007210705",
|
471 |
+
"trialeurodes_vaporariorum_gca011764245",
|
472 |
+
"tribolium_castaneum",
|
473 |
+
"trichinella_spiralis",
|
474 |
+
"trichogramma_pretiosum_gca000599845v3",
|
475 |
+
"trichoplax_adhaerens",
|
476 |
+
"trichuris_muris",
|
477 |
+
"tupaia_belangeri",
|
478 |
+
"tursiops_truncatus",
|
479 |
+
"urocitellus_parryii",
|
480 |
+
"ursus_americanus",
|
481 |
+
"ursus_maritimus",
|
482 |
+
"ursus_thibetanus_thibetanus",
|
483 |
+
"varanus_komodoensis",
|
484 |
+
"varroa_destructor_gca002443255",
|
485 |
+
"venturia_canescens_gca019457755v1rs",
|
486 |
+
"vicugna_pacos",
|
487 |
+
"vombatus_ursinus",
|
488 |
+
"vulpes_vulpes",
|
489 |
+
"xenopus_tropicalis",
|
490 |
+
"xiphophorus_couchianus",
|
491 |
+
"xiphophorus_maculatus",
|
492 |
+
"zalophus_californianus",
|
493 |
+
"zerene_cesonia_gca012273895v2rs",
|
494 |
+
"zonotrichia_albicollis",
|
495 |
+
"zootermopsis_nevadensis",
|
496 |
+
"zosterops_lateralis_melanops"
|
497 |
+
],
|
498 |
+
"cls_token": "[CLS]",
|
499 |
+
"mask_token": "[MASK]",
|
500 |
+
"pad_token": "[PAD]",
|
501 |
+
"sep_token": "[SEP]",
|
502 |
+
"unk_token": "[UNK]"
|
503 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"do_basic_tokenize": true,
|
4 |
+
"do_lower_case": false,
|
5 |
+
"mask_token": "[MASK]",
|
6 |
+
"max_len": 512,
|
7 |
+
"model_max_length": 2048,
|
8 |
+
"name_or_path": "/data/ouga/home/ag_gagneur/hingerl/BERTADN/preprocessing/tokenizer_upstream_metazoa",
|
9 |
+
"never_split": null,
|
10 |
+
"pad_token": "[PAD]",
|
11 |
+
"sep_token": "[SEP]",
|
12 |
+
"special_tokens_map_file": "/data/ouga/home/ag_gagneur/hingerl/.cache/huggingface/hub/models--zhihan1996--DNA_bert_6/snapshots/a79a8fd96ad172f964a4dbef3f4d7545a5034baa/special_tokens_map.json",
|
13 |
+
"strip_accents": null,
|
14 |
+
"tokenize_chinese_chars": true,
|
15 |
+
"tokenizer_class": "BertTokenizer",
|
16 |
+
"unk_token": "[UNK]"
|
17 |
+
}
|