lhallee commited on
Commit
538e1ea
·
verified ·
1 Parent(s): 40a09c4

Upload README.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. README.md +2 -1
README.md CHANGED
@@ -87,11 +87,12 @@ embedding_dict = model.embed_dataset(
87
  sequences=[
88
  'MALWMRLLPLLALLALWGPDPAAA', ... # list of protein sequences
89
  ],
 
90
  batch_size=2, # adjust for your GPU memory
91
  max_len=512, # adjust for your needs
92
  full_embeddings=False, # if True, no pooling is performed
93
  embed_dtype=torch.float32, # cast to what dtype you want
94
- pooling_type=['mean', 'cls'], # more than one pooling type will be concatenated together
95
  num_workers=0, # if you have many cpu cores, we find that num_workers = 4 is fast for large datasets
96
  sql=False, # if True, embeddings will be stored in SQLite database
97
  sql_db_path='embeddings.db',
 
87
  sequences=[
88
  'MALWMRLLPLLALLALWGPDPAAA', ... # list of protein sequences
89
  ],
90
+ tokenizer=model.tokenizer,
91
  batch_size=2, # adjust for your GPU memory
92
  max_len=512, # adjust for your needs
93
  full_embeddings=False, # if True, no pooling is performed
94
  embed_dtype=torch.float32, # cast to what dtype you want
95
+ pooling_types=['mean', 'cls'], # more than one pooling type will be concatenated together
96
  num_workers=0, # if you have many cpu cores, we find that num_workers = 4 is fast for large datasets
97
  sql=False, # if True, embeddings will be stored in SQLite database
98
  sql_db_path='embeddings.db',