Text Generation
Transformers
PyTorch
skywork
custom_code
zhao1iang commited on
Commit
83874b2
ยท
1 Parent(s): bbf8902

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -5
README.md CHANGED
@@ -99,14 +99,32 @@ def special_encode(input, tokenizer):
99
  res_id.append(sep_id)
100
 
101
  return res_id
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  if __name__ == '__main__':
103
  text = "ๅฐ็Ž‹่ฆๅฐ†150ๅƒๅ…‹ๅซ่ฏ้‡20%็š„ๅ†œ่ฏ็จ€้‡Šๆˆๅซ่ฏ้‡5%็š„่ฏๆฐด๏ผŽ้œ€่ฆๅŠ ๆฐดๅคšๅฐ‘ๅƒๅ…‹๏ผŸ"
104
  text_token_ids = torch.tensor(special_encode(
105
  text, tokenizer)).to(model.device).reshape(1, -1)
106
  response = model.generate(text_token_ids, do_sample=False, max_length=512)
107
- response_text = tokenizer.decode(response.cpu()[0], skip_special_tokens=True).split(
108
- "[BOT]")[-1].split("[SEP]")[0].strip()
109
- print(response_text)
 
110
  """่พ“ๅ‡บ็ป“ๆžœ๏ผš
111
  ้ฆ–ๅ…ˆ๏ผŒๆˆ‘ไปฌ้œ€่ฆ่ฎก็ฎ—ๅ‡บ150ๅƒๅ…‹ๅซ่ฏ้‡20%็š„ๅ†œ่ฏไธญๅซๆœ‰ๅคšๅฐ‘ๅƒๅ…‹็š„่ฏใ€‚\n\n150ๅƒๅ…‹ * 20% = 30ๅƒๅ…‹\n\n็„ถๅŽ๏ผŒๆˆ‘ไปฌ้œ€่ฆ่ฎก็ฎ—ๅ‡บ่ฆๅพ—ๅˆฐๅซ่ฏ้‡5%็š„่ฏๆฐด๏ผŒ้œ€่ฆๅคšๅฐ‘ๅƒๅ…‹็š„่ฏๆฐดใ€‚\n\n30ๅƒๅ…‹ / 5% = 600ๅƒๅ…‹\n\nๆœ€ๅŽ๏ผŒๆˆ‘ไปฌ้œ€่ฆ่ฎก็ฎ—ๅ‡บ้œ€่ฆๅŠ ๅคšๅฐ‘ๅƒๅ…‹็š„ๆฐดใ€‚\n\n600ๅƒๅ…‹ - 150ๅƒๅ…‹ = 450ๅƒๅ…‹\n\nๆ‰€ไปฅ็ญ”ๆกˆๆ˜ฏ๏ผŒๅฐ็Ž‹้œ€่ฆๅŠ 450ๅƒๅ…‹็š„ๆฐดใ€‚
112
  """
@@ -141,13 +159,25 @@ def special_encode(input, tokenizer):
141
  res_id.append(sep_id)
142
 
143
  return res_id
 
 
 
 
 
 
 
 
 
 
 
 
144
  if __name__ == '__main__':
145
  text="Janetโ€™s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
146
  text_token_ids = torch.tensor(special_encode(
147
  text, tokenizer)).to(model.device).reshape(1, -1)
148
  response = model.generate(text_token_ids, do_sample=False, max_length=512)
149
- response_text = tokenizer.decode(response.cpu()[0], skip_special_tokens=True).split(
150
- "[BOT]")[-1].split("[SEP]")[0].strip()
151
  print(response_text)
152
  """Skywork-13B-Math Response:
153
  First, we need to find out how many eggs Janet has left after eating for breakfast and baking for her friends. \n\nShe has 16 eggs per day, eats 3 for breakfast and uses 4 for baking. So, 16 - 3 - 4 = 9 eggs are left for selling at the farmers' market.\n\nSince she sells each egg for $2, she makes 9 * 2 = $<<9*2=18>>18 every day at the farmers' market.\n\nSo, the answer is $18.
 
99
  res_id.append(sep_id)
100
 
101
  return res_id
102
+
103
+ def special_encode(input, tokenizer):
104
+ raw_str = "[USER]%s[SEP][BOT]" % input.strip().replace("\r", "")
105
+ eos_id = tokenizer.eos_token_id
106
+ bos_id = tokenizer.bos_token_id
107
+ sep_id = tokenizer.encode("[SEP]")[-1]
108
+ res_id = [eos_id, bos_id]
109
+ arr = raw_str.split("[SEP]")
110
+ for elem_idx in range(len(arr)):
111
+ elem = arr[elem_idx]
112
+ elem_id = tokenizer.encode(elem)[1:]
113
+ res_id += elem_id
114
+ if elem_idx < len(arr) - 1:
115
+ res_id.append(sep_id)
116
+
117
+ return res_id
118
+
119
  if __name__ == '__main__':
120
  text = "ๅฐ็Ž‹่ฆๅฐ†150ๅƒๅ…‹ๅซ่ฏ้‡20%็š„ๅ†œ่ฏ็จ€้‡Šๆˆๅซ่ฏ้‡5%็š„่ฏๆฐด๏ผŽ้œ€่ฆๅŠ ๆฐดๅคšๅฐ‘ๅƒๅ…‹๏ผŸ"
121
  text_token_ids = torch.tensor(special_encode(
122
  text, tokenizer)).to(model.device).reshape(1, -1)
123
  response = model.generate(text_token_ids, do_sample=False, max_length=512)
124
+ response_text = tokenizer.decode(response.cpu()[0], skip_special_tokens=True)
125
+
126
+ response_text = extract_res(response_text)
127
+ print(response_text)
128
  """่พ“ๅ‡บ็ป“ๆžœ๏ผš
129
  ้ฆ–ๅ…ˆ๏ผŒๆˆ‘ไปฌ้œ€่ฆ่ฎก็ฎ—ๅ‡บ150ๅƒๅ…‹ๅซ่ฏ้‡20%็š„ๅ†œ่ฏไธญๅซๆœ‰ๅคšๅฐ‘ๅƒๅ…‹็š„่ฏใ€‚\n\n150ๅƒๅ…‹ * 20% = 30ๅƒๅ…‹\n\n็„ถๅŽ๏ผŒๆˆ‘ไปฌ้œ€่ฆ่ฎก็ฎ—ๅ‡บ่ฆๅพ—ๅˆฐๅซ่ฏ้‡5%็š„่ฏๆฐด๏ผŒ้œ€่ฆๅคšๅฐ‘ๅƒๅ…‹็š„่ฏๆฐดใ€‚\n\n30ๅƒๅ…‹ / 5% = 600ๅƒๅ…‹\n\nๆœ€ๅŽ๏ผŒๆˆ‘ไปฌ้œ€่ฆ่ฎก็ฎ—ๅ‡บ้œ€่ฆๅŠ ๅคšๅฐ‘ๅƒๅ…‹็š„ๆฐดใ€‚\n\n600ๅƒๅ…‹ - 150ๅƒๅ…‹ = 450ๅƒๅ…‹\n\nๆ‰€ไปฅ็ญ”ๆกˆๆ˜ฏ๏ผŒๅฐ็Ž‹้œ€่ฆๅŠ 450ๅƒๅ…‹็š„ๆฐดใ€‚
130
  """
 
159
  res_id.append(sep_id)
160
 
161
  return res_id
162
+
163
+ def extract_res(response):
164
+ if "[BOT]" in response:
165
+ response = response.split("[BOT]")[1]
166
+ if "<s>" in response:
167
+ response = response.split("<s>")[-1]
168
+ if "</s>" in response:
169
+ response = response.split("</s>")[0]
170
+ if "[SEP]" in response:
171
+ response = response.split("[SEP]")[0]
172
+ return response
173
+
174
  if __name__ == '__main__':
175
  text="Janetโ€™s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?"
176
  text_token_ids = torch.tensor(special_encode(
177
  text, tokenizer)).to(model.device).reshape(1, -1)
178
  response = model.generate(text_token_ids, do_sample=False, max_length=512)
179
+ response_text = tokenizer.decode(response.cpu()[0], skip_special_tokens=True)
180
+ response_text = extract_res(response_text)
181
  print(response_text)
182
  """Skywork-13B-Math Response:
183
  First, we need to find out how many eggs Janet has left after eating for breakfast and baking for her friends. \n\nShe has 16 eggs per day, eats 3 for breakfast and uses 4 for baking. So, 16 - 3 - 4 = 9 eggs are left for selling at the farmers' market.\n\nSince she sells each egg for $2, she makes 9 * 2 = $<<9*2=18>>18 every day at the farmers' market.\n\nSo, the answer is $18.