kgourgou commited on
Commit
eab7f9b
·
verified ·
1 Parent(s): 58318c4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -10
app.py CHANGED
@@ -51,15 +51,16 @@ def generate_completion(prompt, strategy, params):
51
  """
52
  Generate a complete answer using model.generate with specified parameters.
53
  """
54
- # Encode the prompt and get the attention mask.
55
- encoded = tokenizer(prompt, return_tensors="pt")
56
- input_ids = encoded["input_ids"]
57
- attention_mask = encoded["attention_mask"]
58
-
59
- # Generate the output.
60
- output_ids = model.generate(
61
- input_ids, attention_mask=attention_mask, max_length=50, **params
62
- )
 
63
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
64
 
65
 
@@ -89,7 +90,6 @@ def generate_all(prompt):
89
  Run multiple decoding strategies concurrently and yield updates as each completes.
90
  """
91
  # Define each decoding strategy and its parameters.
92
- # For the default strategies, we use model.generate; for "Min‑p Sampling" we use our custom function.
93
  methods = {
94
  "Greedy": {"type": "default", "params": {"do_sample": False}},
95
  "Top-k Sampling": {
@@ -152,6 +152,7 @@ def generate_all(prompt):
152
  except Exception as exc:
153
  result = f"Error: {exc}"
154
  results[method] = result
 
155
  # Yield the results in the pre-defined order; pending methods show "Processing..."
156
  yield tuple(
157
  results[m] if results[m] is not None else "Processing..."
 
51
  """
52
  Generate a complete answer using model.generate with specified parameters.
53
  """
54
+ with torch.no_grad():
55
+ # Encode the prompt and get the attention mask.
56
+ encoded = tokenizer(prompt, return_tensors="pt")
57
+ input_ids = encoded["input_ids"]
58
+ attention_mask = encoded["attention_mask"]
59
+
60
+ # Generate the output.
61
+ output_ids = model.generate(
62
+ input_ids, attention_mask=attention_mask, max_length=50, **params
63
+ )
64
  return tokenizer.decode(output_ids[0], skip_special_tokens=True)
65
 
66
 
 
90
  Run multiple decoding strategies concurrently and yield updates as each completes.
91
  """
92
  # Define each decoding strategy and its parameters.
 
93
  methods = {
94
  "Greedy": {"type": "default", "params": {"do_sample": False}},
95
  "Top-k Sampling": {
 
152
  except Exception as exc:
153
  result = f"Error: {exc}"
154
  results[method] = result
155
+
156
  # Yield the results in the pre-defined order; pending methods show "Processing..."
157
  yield tuple(
158
  results[m] if results[m] is not None else "Processing..."