Update README.md
Browse files
README.md
CHANGED
@@ -74,15 +74,22 @@ Usage:
|
|
74 |
#Load base model weight & tokenizer
|
75 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name,trust_remote_code=True)
|
76 |
|
77 |
-
model = AutoModelForCausalLM.from_pretrained(
|
78 |
|
79 |
#Load adapter
|
80 |
fine_tuned_model = PeftModel.from_pretrained(model, model_id)
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
#Run inference
|
83 |
outputs = fine_tuned_model.generate(**inputs, max_length=1000)
|
84 |
|
85 |
-
|
|
|
86 |
print(text)
|
87 |
|
88 |
Usage (with quantization):
|
|
|
74 |
#Load base model weight & tokenizer
|
75 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name,trust_remote_code=True)
|
76 |
|
77 |
+
model = AutoModelForCausalLM.from_pretrained(basemodel_name, device_map='auto', trust_remote_code=True)
|
78 |
|
79 |
#Load adapter
|
80 |
fine_tuned_model = PeftModel.from_pretrained(model, model_id)
|
81 |
+
|
82 |
+
# Tokenize
|
83 |
+
inputs = tokenizer(prompt,
|
84 |
+
return_tensors="pt",
|
85 |
+
return_attention_mask=True,
|
86 |
+
padding=False,
|
87 |
+
truncation=True)
|
88 |
#Run inference
|
89 |
outputs = fine_tuned_model.generate(**inputs, max_length=1000)
|
90 |
|
91 |
+
# Decode output
|
92 |
+
text = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
|
93 |
print(text)
|
94 |
|
95 |
Usage (with quantization):
|