lixiangchun commited on
Commit
691a03c
·
1 Parent(s): 3910ab9

update README

Browse files
Files changed (1) hide show
  1. README.md +6 -0
README.md CHANGED
@@ -2,10 +2,16 @@
2
 
3
  ```python
4
  from transformers import PreTrainedTokenizerFast, BertForMaskedLM
 
 
5
  tokenizer = PreTrainedTokenizerFast.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
6
  iseeek = BertForMaskedLM.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
7
 
8
  a = ["B2M MTRNR2L8 UBC FOS TMSB4X UBB FTH1 IFITM1 TPT1 FTL DUSP1", "KRT14 MTRNR2L8 KRT6A B2M GAPDH S100A8 S100A9 KRT5"]
 
 
 
 
9
  batch = tokenizer(a, max_length=128, truncation=True, padding=True, return_tensors="pt")
10
  out = iseeek.bert(**batch)
11
 
 
2
 
3
  ```python
4
  from transformers import PreTrainedTokenizerFast, BertForMaskedLM
5
+ import re
6
+
7
  tokenizer = PreTrainedTokenizerFast.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
8
  iseeek = BertForMaskedLM.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
9
 
10
  a = ["B2M MTRNR2L8 UBC FOS TMSB4X UBB FTH1 IFITM1 TPT1 FTL DUSP1", "KRT14 MTRNR2L8 KRT6A B2M GAPDH S100A8 S100A9 KRT5"]
11
+
12
+ # Replace '-' and '.' with '_'
13
+ a = [re.sub(r'\-|\.', '_', s) for s in a]
14
+
15
  batch = tokenizer(a, max_length=128, truncation=True, padding=True, return_tensors="pt")
16
  out = iseeek.bert(**batch)
17