Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -233,20 +233,24 @@ Focus on clear, concise, and evidence-based improvements that align with the ove
|
|
233 |
add_generation_prompt=True
|
234 |
)
|
235 |
|
236 |
-
# Check input length and truncate to
|
237 |
input_tokens = tokenizer.encode(text)
|
238 |
-
if len(input_tokens) >
|
239 |
-
input_tokens = input_tokens[:
|
240 |
text = tokenizer.decode(input_tokens)
|
241 |
-
print(f"Input truncated to
|
242 |
|
243 |
progress(0.5, desc="Generating improved text...")
|
244 |
# Generate non-streaming
|
245 |
input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)
|
246 |
|
|
|
|
|
|
|
247 |
with torch.no_grad():
|
248 |
output_ids = model.generate(
|
249 |
input_ids,
|
|
|
250 |
max_new_tokens=max_new_tokens,
|
251 |
do_sample=(temperature > 0),
|
252 |
temperature=temperature if temperature > 0 else 1.0,
|
|
|
233 |
add_generation_prompt=True
|
234 |
)
|
235 |
|
236 |
+
# Check input length and truncate to 15000 tokens before encoding
|
237 |
input_tokens = tokenizer.encode(text)
|
238 |
+
if len(input_tokens) > 15000: # Limit to 15k tokens
|
239 |
+
input_tokens = input_tokens[:15000]
|
240 |
text = tokenizer.decode(input_tokens)
|
241 |
+
print(f"Input truncated to 15000 tokens")
|
242 |
|
243 |
progress(0.5, desc="Generating improved text...")
|
244 |
# Generate non-streaming
|
245 |
input_ids = tokenizer.encode(text, return_tensors="pt").to(model.device)
|
246 |
|
247 |
+
# Create attention mask
|
248 |
+
attention_mask = torch.ones_like(input_ids)
|
249 |
+
|
250 |
with torch.no_grad():
|
251 |
output_ids = model.generate(
|
252 |
input_ids,
|
253 |
+
attention_mask=attention_mask, # 添加注意力掩码
|
254 |
max_new_tokens=max_new_tokens,
|
255 |
do_sample=(temperature > 0),
|
256 |
temperature=temperature if temperature > 0 else 1.0,
|