saishshinde15 commited on
Commit
633ccb5
·
verified ·
1 Parent(s): 2494cf8

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +29 -1
README.md CHANGED
@@ -74,4 +74,32 @@ output = model.generate(
74
 
75
  # Decode and print output
76
  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
77
- print(output_text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
 
75
  # Decode and print output
76
  output_text = tokenizer.decode(output[0], skip_special_tokens=True)
77
+ print(output_text)
78
+ ```
79
+
80
+ <details>
81
+ <summary>Fast inference</summary>
82
+
83
+ ```python
84
+ pip install transformers vllm vllm[lora] torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
85
+
86
+ text = tokenizer.apply_chat_template([
87
+ {"role" : "system", "content" : SYSTEM_PROMPT},
88
+ {"role" : "user", "content" : "What is 2x+3=4"},
89
+ ], tokenize = False, add_generation_prompt = True)
90
+
91
+ from vllm import SamplingParams
92
+ sampling_params = SamplingParams(
93
+ temperature = 0.8,
94
+ top_p = 0.95,
95
+ max_tokens = 1024,
96
+ )
97
+ output = model.fast_generate(
98
+ text,
99
+ sampling_params = sampling_params,
100
+ lora_request = model.load_lora("grpo_saved_lora"),
101
+ )[0].outputs[0].text
102
+
103
+ output
104
+ ```
105
+ </details>