Generalized
Browse files- geneformer/tokenizer.py +1 -4
geneformer/tokenizer.py
CHANGED
@@ -288,10 +288,7 @@ class TranscriptomeTokenizer:
|
|
288 |
# create dataset
|
289 |
def dict_generator():
|
290 |
for i in range(len(tokenized_cells)):
|
291 |
-
yield {
|
292 |
-
'input_ids': dataset_dict['input_ids'][i],
|
293 |
-
'cell_type': dataset_dict['cell_type'][i]
|
294 |
-
}
|
295 |
output_dataset = Dataset.from_generator(dict_generator, num_proc=self.nproc)
|
296 |
|
297 |
# truncate dataset
|
|
|
288 |
# create dataset
|
289 |
def dict_generator():
|
290 |
for i in range(len(tokenized_cells)):
|
291 |
+
yield {k: dataset_dict[k][i] for k in dataset_dict.keys()}
|
|
|
|
|
|
|
292 |
output_dataset = Dataset.from_generator(dict_generator, num_proc=self.nproc)
|
293 |
|
294 |
# truncate dataset
|