Upload folder using huggingface_hub
Browse files- handler.py +4 -4
handler.py
CHANGED
@@ -28,15 +28,15 @@ class EndpointHandler():
|
|
28 |
# run generation
|
29 |
samples = []
|
30 |
with torch.autocast(device_type=self.device_type, dtype=torch.bfloat16):
|
31 |
-
for k in range(data['parameters']['num_samples']):
|
32 |
-
for _ in range(data['parameters']['max_new_tokens']):
|
33 |
# forward the model to get the logits for the index in the sequence
|
34 |
logits, _ = self.model(idx)
|
35 |
# pluck the logits at the final step and scale by desired temperature
|
36 |
logits = logits[:, -1, :] / data['parameters']['temperature']
|
37 |
# optionally crop the logits to only the top k options
|
38 |
-
if data['parameters']['top_k'] is not None:
|
39 |
-
v, _ = torch.topk(logits, min(data['parameters']['top_k'], logits.size(-1)))
|
40 |
logits[logits < v[:, [-1]]] = -float('Inf')
|
41 |
# apply softmax to convert logits to (normalized) probabilities
|
42 |
probs = torch.nn.functional.softmax(logits, dim=-1)
|
|
|
28 |
# run generation
|
29 |
samples = []
|
30 |
with torch.autocast(device_type=self.device_type, dtype=torch.bfloat16):
|
31 |
+
for k in range(int(data['parameters']['num_samples'])):
|
32 |
+
for _ in range(int(data['parameters']['max_new_tokens'])):
|
33 |
# forward the model to get the logits for the index in the sequence
|
34 |
logits, _ = self.model(idx)
|
35 |
# pluck the logits at the final step and scale by desired temperature
|
36 |
logits = logits[:, -1, :] / data['parameters']['temperature']
|
37 |
# optionally crop the logits to only the top k options
|
38 |
+
if int(data['parameters']['top_k']) is not None:
|
39 |
+
v, _ = torch.topk(logits, min(int(data['parameters']['top_k']), logits.size(-1)))
|
40 |
logits[logits < v[:, [-1]]] = -float('Inf')
|
41 |
# apply softmax to convert logits to (normalized) probabilities
|
42 |
probs = torch.nn.functional.softmax(logits, dim=-1)
|