lixiangchun
commited on
Commit
·
691a03c
1
Parent(s):
3910ab9
update README
Browse files
README.md
CHANGED
@@ -2,10 +2,16 @@
|
|
2 |
|
3 |
```python
|
4 |
from transformers import PreTrainedTokenizerFast, BertForMaskedLM
|
|
|
|
|
5 |
tokenizer = PreTrainedTokenizerFast.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
|
6 |
iseeek = BertForMaskedLM.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
|
7 |
|
8 |
a = ["B2M MTRNR2L8 UBC FOS TMSB4X UBB FTH1 IFITM1 TPT1 FTL DUSP1", "KRT14 MTRNR2L8 KRT6A B2M GAPDH S100A8 S100A9 KRT5"]
|
|
|
|
|
|
|
|
|
9 |
batch = tokenizer(a, max_length=128, truncation=True, padding=True, return_tensors="pt")
|
10 |
out = iseeek.bert(**batch)
|
11 |
|
|
|
2 |
|
3 |
```python
|
4 |
from transformers import PreTrainedTokenizerFast, BertForMaskedLM
|
5 |
+
import re
|
6 |
+
|
7 |
tokenizer = PreTrainedTokenizerFast.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
|
8 |
iseeek = BertForMaskedLM.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
|
9 |
|
10 |
a = ["B2M MTRNR2L8 UBC FOS TMSB4X UBB FTH1 IFITM1 TPT1 FTL DUSP1", "KRT14 MTRNR2L8 KRT6A B2M GAPDH S100A8 S100A9 KRT5"]
|
11 |
+
|
12 |
+
# Replace '-' and '.' with '_'
|
13 |
+
a = [re.sub(r'\-|\.', '_', s) for s in a]
|
14 |
+
|
15 |
batch = tokenizer(a, max_length=128, truncation=True, padding=True, return_tensors="pt")
|
16 |
out = iseeek.bert(**batch)
|
17 |
|