Update README.md
Browse files
README.md
CHANGED
@@ -82,6 +82,7 @@ prepare_for_inference(model, backend="bitblas") #takes a while to init...
|
|
82 |
|
83 |
#Generate
|
84 |
###################################################
|
|
|
85 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
86 |
|
87 |
gen.generate("Write an essay about large language models", print_tokens=True)
|
|
|
82 |
|
83 |
#Generate
|
84 |
###################################################
|
85 |
+
#For longer context, make sure to allocate enough cache via the cache_size= parameter
|
86 |
gen = HFGenerator(model, tokenizer, max_new_tokens=1000, do_sample=True, compile="partial").warmup() #Warm-up takes a while
|
87 |
|
88 |
gen.generate("Write an essay about large language models", print_tokens=True)
|