qhduan commited on
Commit
e4808e7
1 Parent(s): 35cc9cd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +82 -0
README.md CHANGED
@@ -44,4 +44,86 @@ generated_ids = model.generate(
44
  ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False)
45
  print(ret)
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  ```
 
44
  ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False)
45
  print(ret)
46
 
47
+ ```
48
+
49
+ ## Prompt
50
+
51
+ You could give model a style or a specific language, for example:
52
+
53
+ ```python
54
+ inputs = tokenizer('''<|endoftext|>
55
+ def add(a, b):
56
+ return a + b
57
+
58
+ # docstring
59
+ """
60
+ Calculate numbers add.
61
+
62
+ Args:
63
+ a: the first number to add
64
+ b: the second number to add
65
+
66
+ Return:
67
+ The result of a + b
68
+ """
69
+ <|endoftext|>
70
+ def load_excel(path):
71
+ return pd.read_excel(path)
72
+
73
+ # docstring
74
+ """''', return_tensors='pt')
75
+
76
+ doc_max_length = 128
77
+
78
+ generated_ids = model.generate(
79
+ **inputs,
80
+ max_length=inputs.input_ids.shape[1] + doc_max_length,
81
+ do_sample=False,
82
+ return_dict_in_generate=True,
83
+ num_return_sequences=1,
84
+ output_scores=True,
85
+ pad_token_id=50256,
86
+ eos_token_id=50256 # <|endoftext|>
87
+ )
88
+
89
+ ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False)
90
+ print(ret)
91
+
92
+ inputs = tokenizer('''<|endoftext|>
93
+ def add(a, b):
94
+ return a + b
95
+
96
+ # docstring
97
+ """
98
+ 计算数字相加
99
+
100
+ Args:
101
+ a: 第一个加数
102
+ b: 第二个加数
103
+
104
+ Return:
105
+ 相加的结果
106
+ """
107
+ <|endoftext|>
108
+ def load_excel(path):
109
+ return pd.read_excel(path)
110
+
111
+ # docstring
112
+ """''', return_tensors='pt')
113
+
114
+ doc_max_length = 128
115
+
116
+ generated_ids = model.generate(
117
+ **inputs,
118
+ max_length=inputs.input_ids.shape[1] + doc_max_length,
119
+ do_sample=False,
120
+ return_dict_in_generate=True,
121
+ num_return_sequences=1,
122
+ output_scores=True,
123
+ pad_token_id=50256,
124
+ eos_token_id=50256 # <|endoftext|>
125
+ )
126
+
127
+ ret = tokenizer.decode(generated_ids.sequences[0], skip_special_tokens=False)
128
+ print(ret)
129
  ```