Spaces:
Running
Running
Update README.md
Browse files
README.md
CHANGED
@@ -63,7 +63,6 @@ model = AutoGPTQForCausalLM.from_quantized(
|
|
63 |
# inject_fused_attention=False, # or
|
64 |
disable_exllama=True,
|
65 |
device_map='auto',
|
66 |
-
revision='2bit_128g',
|
67 |
)
|
68 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
|
69 |
input_ids = tokenizer('Hello! I am a VITA-compressed-LLM chatbot!', return_tensors='pt').input_ids.to('cuda')
|
|
|
63 |
# inject_fused_attention=False, # or
|
64 |
disable_exllama=True,
|
65 |
device_map='auto',
|
|
|
66 |
)
|
67 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
|
68 |
input_ids = tokenizer('Hello! I am a VITA-compressed-LLM chatbot!', return_tensors='pt').input_ids.to('cuda')
|