Fixed issue, loom and h5ad now produce same checksums
Browse files- geneformer/tokenizer.py +9 -9
geneformer/tokenizer.py
CHANGED
@@ -194,11 +194,11 @@ class TranscriptomeTokenizer:
|
|
194 |
else:
|
195 |
var_exists = True
|
196 |
|
197 |
-
if var_exists
|
198 |
filter_pass_loc = np.where(
|
199 |
-
[
|
200 |
)[0]
|
201 |
-
elif var_exists
|
202 |
print(
|
203 |
f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
204 |
)
|
@@ -208,10 +208,10 @@ class TranscriptomeTokenizer:
|
|
208 |
|
209 |
for i in range(0, len(filter_pass_loc), chunk_size):
|
210 |
idx = filter_pass_loc[i:i+chunk_size]
|
211 |
-
X = adata[idx].X
|
212 |
|
213 |
-
X_view =
|
214 |
-
|
|
|
215 |
X_norm = sp.csr_matrix(X_norm)
|
216 |
|
217 |
tokenized_cells += [
|
@@ -258,11 +258,11 @@ class TranscriptomeTokenizer:
|
|
258 |
else:
|
259 |
var_exists = True
|
260 |
|
261 |
-
if var_exists
|
262 |
filter_pass_loc = np.where(
|
263 |
-
[
|
264 |
)[0]
|
265 |
-
elif var_exists
|
266 |
print(
|
267 |
f"{loom_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
268 |
)
|
|
|
194 |
else:
|
195 |
var_exists = True
|
196 |
|
197 |
+
if var_exists:
|
198 |
filter_pass_loc = np.where(
|
199 |
+
[i == 1 for i in adata.obs["filter_pass"]]
|
200 |
)[0]
|
201 |
+
elif not var_exists:
|
202 |
print(
|
203 |
f"{adata_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
204 |
)
|
|
|
208 |
|
209 |
for i in range(0, len(filter_pass_loc), chunk_size):
|
210 |
idx = filter_pass_loc[i:i+chunk_size]
|
|
|
211 |
|
212 |
+
X_view = adata[idx, coding_miRNA_loc].X
|
213 |
+
n_counts = adata[idx].obs['n_counts'].values[:, None]
|
214 |
+
X_norm = (X_view / n_counts * target_sum / norm_factor_vector)
|
215 |
X_norm = sp.csr_matrix(X_norm)
|
216 |
|
217 |
tokenized_cells += [
|
|
|
258 |
else:
|
259 |
var_exists = True
|
260 |
|
261 |
+
if var_exists:
|
262 |
filter_pass_loc = np.where(
|
263 |
+
[i == 1 for i in data.ca["filter_pass"]]
|
264 |
)[0]
|
265 |
+
elif not var_exists:
|
266 |
print(
|
267 |
f"{loom_file_path} has no column attribute 'filter_pass'; tokenizing all cells."
|
268 |
)
|