TJMUCH
/

transcriptome-iseeek

Inference Endpoints

Model card Files Files and versions Community

lixiangchun commited on Dec 1, 2021

Commit

691a03c

·

1 Parent(s): 3910ab9

update README

Files changed (1) hide show

README.md +6 -0

README.md CHANGED Viewed

@@ -2,10 +2,16 @@
 ```python
 from transformers import PreTrainedTokenizerFast, BertForMaskedLM
 tokenizer = PreTrainedTokenizerFast.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
 iseeek = BertForMaskedLM.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
 a = ["B2M MTRNR2L8 UBC FOS TMSB4X UBB FTH1 IFITM1 TPT1 FTL DUSP1", "KRT14 MTRNR2L8 KRT6A B2M GAPDH S100A8 S100A9 KRT5"]
 batch = tokenizer(a, max_length=128, truncation=True, padding=True, return_tensors="pt")
 out = iseeek.bert(**batch)

 ```python
 from transformers import PreTrainedTokenizerFast, BertForMaskedLM
+import re
 tokenizer = PreTrainedTokenizerFast.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
 iseeek = BertForMaskedLM.from_pretrained("lixiangchun/transcriptome_iseeek_13millioncells_128tokens")
 a = ["B2M MTRNR2L8 UBC FOS TMSB4X UBB FTH1 IFITM1 TPT1 FTL DUSP1", "KRT14 MTRNR2L8 KRT6A B2M GAPDH S100A8 S100A9 KRT5"]
+# Replace '-' and '.' with '_'
+a = [re.sub(r'\-|\.', '_', s) for s in a]
 batch = tokenizer(a, max_length=128, truncation=True, padding=True, return_tensors="pt")
 out = iseeek.bert(**batch)