lewtun HF staff commited on
Commit
c2e5d86
·
1 Parent(s): b0042a5

Set better defaults

Browse files
Files changed (1) hide show
  1. app.py +3 -29
app.py CHANGED
@@ -17,7 +17,6 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
18
 
19
 
20
- # Load peft config for pre-trained checkpoint etc.
21
  device = "cuda" if torch.cuda.is_available() else "cpu"
22
  model_id = "trl-lib/llama-se-rl-merged"
23
  if device == "cpu":
@@ -32,33 +31,8 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=HF_TOKEN)
32
  PROMPT_TEMPLATE = """Question: {prompt}\n\nAnswer: """
33
 
34
 
35
- def generate(instruction, temperature=1, max_new_tokens=256, top_p=1, top_k=50):
36
  formatted_instruction = PROMPT_TEMPLATE.format(prompt=instruction)
37
- # COMMENT IN FOR NON STREAMING
38
- # generation_config = GenerationConfig(
39
- # do_sample=True,
40
- # top_p=top_p,
41
- # temperature=temperature,
42
- # max_new_tokens=max_new_tokens,
43
- # early_stopping=True,
44
- # length_penalty=length_penalty,
45
- # eos_token_id=tokenizer.eos_token_id,
46
- # pad_token_id=tokenizer.pad_token_id,
47
- # )
48
-
49
- # input_ids = tokenizer(
50
- # formatted_instruction, return_tensors="pt", truncation=True, max_length=2048
51
- # ).input_ids.cuda()
52
-
53
- # with torch.inference_mode(), torch.autocast("cuda"):
54
- # outputs = model.generate(input_ids=input_ids, generation_config=generation_config)[0]
55
-
56
- # output = tokenizer.decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)
57
- # return output.split("### Antwort:\n")[1]
58
-
59
- # STREAMING BASED ON git+https://github.com/gante/transformers.git@streamer_iterator
60
-
61
- # streaming
62
  streamer = TextIteratorStreamer(tokenizer)
63
  model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048).to(device)
64
 
@@ -93,7 +67,7 @@ examples = [
93
  "How do I create an array in C++ of length 5 which contains all even numbers between 1 and 10?",
94
  "How can I write a Java function to generate the nth Fibonacci number?",
95
  "How can I write a Python function that checks if a given number is a palindrome or not?",
96
- "What is the output of the following code?\n\n```\nlist1 = ['a', 'b', 'c']\nlist2 = [1, 2, 3]\n\nfor x, y in zip(list1, list2):\n print(x * y)\n```",
97
  ]
98
 
99
 
@@ -164,7 +138,7 @@ with gr.Blocks(theme=theme) as demo:
164
  )
165
  top_k = gr.Slider(
166
  label="Top-k",
167
- value=50,
168
  minimum=0,
169
  maximum=100,
170
  step=2,
 
17
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
18
 
19
 
 
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
  model_id = "trl-lib/llama-se-rl-merged"
22
  if device == "cpu":
 
31
  PROMPT_TEMPLATE = """Question: {prompt}\n\nAnswer: """
32
 
33
 
34
+ def generate(instruction, temperature=1, max_new_tokens=256, top_p=1, top_k=0):
35
  formatted_instruction = PROMPT_TEMPLATE.format(prompt=instruction)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  streamer = TextIteratorStreamer(tokenizer)
37
  model_inputs = tokenizer(formatted_instruction, return_tensors="pt", truncation=True, max_length=2048).to(device)
38
 
 
67
  "How do I create an array in C++ of length 5 which contains all even numbers between 1 and 10?",
68
  "How can I write a Java function to generate the nth Fibonacci number?",
69
  "How can I write a Python function that checks if a given number is a palindrome or not?",
70
+ "I have a lion in my garden. How can I get rid of it?",
71
  ]
72
 
73
 
 
138
  )
139
  top_k = gr.Slider(
140
  label="Top-k",
141
+ value=0,
142
  minimum=0,
143
  maximum=100,
144
  step=2,