Filter gene mapping dict for items that exist in gene_token_dict
Browse files- geneformer/tokenizer.py +3 -0
geneformer/tokenizer.py
CHANGED
@@ -276,6 +276,9 @@ class TranscriptomeTokenizer:
|
|
276 |
# gene keys for full vocabulary
|
277 |
self.gene_keys = list(self.gene_token_dict.keys())
|
278 |
|
|
|
|
|
|
|
279 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
280 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|
281 |
|
|
|
276 |
# gene keys for full vocabulary
|
277 |
self.gene_keys = list(self.gene_token_dict.keys())
|
278 |
|
279 |
+
# Filter gene mapping dict for items that exist in gene_token_dict
|
280 |
+
self.gene_mapping_dict = {k: v for k, v in self.gene_mapping_dict.items() if v in self.gene_keys}
|
281 |
+
|
282 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
283 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|
284 |
|