hchen725 commited on
Commit
1e8d481
1 Parent(s): 5197a60

Filter gene mapping dict for items that exist in gene_token_dict

Browse files
Files changed (1) hide show
  1. geneformer/tokenizer.py +3 -0
geneformer/tokenizer.py CHANGED
@@ -276,6 +276,9 @@ class TranscriptomeTokenizer:
276
  # gene keys for full vocabulary
277
  self.gene_keys = list(self.gene_token_dict.keys())
278
 
 
 
 
279
  # protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
280
  self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
281
 
 
276
  # gene keys for full vocabulary
277
  self.gene_keys = list(self.gene_token_dict.keys())
278
 
279
+ # Filter gene mapping dict for items that exist in gene_token_dict
280
+ self.gene_mapping_dict = {k: v for k, v in self.gene_mapping_dict.items() if v in self.gene_keys}
281
+
282
  # protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
283
  self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
284