nyuuzyou commited on
Commit
16ddfbb
1 Parent(s): e6115bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -8,8 +8,8 @@ import gradio as gr
8
  import torch
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
10
 
11
- MAX_MAX_NEW_TOKENS = 2048
12
- DEFAULT_MAX_NEW_TOKENS = 1024
13
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
14
 
15
  if not torch.cuda.is_available():
@@ -20,11 +20,11 @@ if torch.cuda.is_available():
20
  tokenizer = AutoTokenizer.from_pretrained(model_id)
21
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
22
 
23
- @spaces.GPU
24
  def generate(
25
  message: str,
26
  chat_history: list[tuple[str, str]],
27
- max_new_tokens: int = 1024,
28
  temperature: float = 0.06,
29
  top_p: float = 0.95,
30
  top_k: int = 40,
 
8
  import torch
9
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
10
 
11
+ MAX_MAX_NEW_TOKENS = 1024
12
+ DEFAULT_MAX_NEW_TOKENS = 512
13
  MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "8192"))
14
 
15
  if not torch.cuda.is_available():
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_id)
21
  model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
22
 
23
+ @spaces.GPU(duration=30)
24
  def generate(
25
  message: str,
26
  chat_history: list[tuple[str, str]],
27
+ max_new_tokens: int = 512,
28
  temperature: float = 0.06,
29
  top_p: float = 0.95,
30
  top_k: int = 40,