Get the gene keys and gene list keys from the token dictionary instead of medians
Browse files- geneformer/tokenizer.py +1 -1
geneformer/tokenizer.py
CHANGED
@@ -132,7 +132,7 @@ class TranscriptomeTokenizer:
|
|
132 |
self.gene_token_dict = pickle.load(f)
|
133 |
|
134 |
# gene keys for full vocabulary
|
135 |
-
self.gene_keys = list(self.
|
136 |
|
137 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
138 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|
|
|
132 |
self.gene_token_dict = pickle.load(f)
|
133 |
|
134 |
# gene keys for full vocabulary
|
135 |
+
self.gene_keys = list(self.gene_token_dict.keys())
|
136 |
|
137 |
# protein-coding and miRNA gene list dictionary for selecting .loom rows for tokenization
|
138 |
self.genelist_dict = dict(zip(self.gene_keys, [True] * len(self.gene_keys)))
|