mobicham commited on
Commit
eaea27b
·
verified ·
1 Parent(s): eec7eaa

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -0
README.md CHANGED
@@ -82,6 +82,7 @@ prepare_for_inference(model, backend="bitblas") #takes a while to init...
82
 
83
  #Generate
84
  ###################################################
 
85
  gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
86
 
87
  gen.generate("Write an essay about large language models", print_tokens=True)
 
82
 
83
  #Generate
84
  ###################################################
85
+ #For longer context, make sure to allocate enough cache via the cache_size= parameter
86
  gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
87
 
88
  gen.generate("Write an essay about large language models", print_tokens=True)