fix cuda inference code
Browse files
README.md
CHANGED
@@ -25,6 +25,8 @@ please note int2 **may be slower** than int4 on CUDA due to kernel issue.
|
|
25 |
~~~python
|
26 |
import transformers
|
27 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
|
|
|
|
28 |
|
29 |
# https://github.com/huggingface/transformers/pull/35493
|
30 |
def set_initialized_submodules(model, state_dict_keys):
|
|
|
25 |
~~~python
|
26 |
import transformers
|
27 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
28 |
+
from auto_round import AutoRoundConfig ##must import for auto-round format
|
29 |
+
|
30 |
|
31 |
# https://github.com/huggingface/transformers/pull/35493
|
32 |
def set_initialized_submodules(model, state_dict_keys):
|