Christina Theodoris
commited on
Commit
•
e78c44d
1
Parent(s):
2181aa4
Modify tokenizer to allow renaming attr names btwn loom and .dataset
Browse files- geneformer/tokenizer.py +4 -3
geneformer/tokenizer.py
CHANGED
@@ -106,7 +106,8 @@ class TranscriptomeTokenizer:
|
|
106 |
|
107 |
def tokenize_files(self, loom_data_directory):
|
108 |
tokenized_cells = []
|
109 |
-
|
|
|
110 |
|
111 |
# loops through directories to tokenize .loom files
|
112 |
for loom_file_path in loom_data_directory.glob("*.loom"):
|
@@ -115,8 +116,8 @@ class TranscriptomeTokenizer:
|
|
115 |
loom_file_path
|
116 |
)
|
117 |
tokenized_cells += file_tokenized_cells
|
118 |
-
for k in
|
119 |
-
cell_metadata[k] += file_cell_metadata[k]
|
120 |
|
121 |
return tokenized_cells, cell_metadata
|
122 |
|
|
|
106 |
|
107 |
def tokenize_files(self, loom_data_directory):
|
108 |
tokenized_cells = []
|
109 |
+
loom_cell_attr = [attr_key for attr_key in self.custom_attr_name_dict.keys()]
|
110 |
+
cell_metadata = {attr_key: [] for attr_key in self.custom_attr_name_dict.values()}
|
111 |
|
112 |
# loops through directories to tokenize .loom files
|
113 |
for loom_file_path in loom_data_directory.glob("*.loom"):
|
|
|
116 |
loom_file_path
|
117 |
)
|
118 |
tokenized_cells += file_tokenized_cells
|
119 |
+
for k in loom_cell_attr:
|
120 |
+
cell_metadata[self.custom_attr_name_dict[k]] += file_cell_metadata[k]
|
121 |
|
122 |
return tokenized_cells, cell_metadata
|
123 |
|