File size: 14,472 Bytes
5446331
 
 
 
76e2397
 
5446331
 
 
 
 
 
 
 
 
 
 
7a4858f
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76e2397
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76e2397
5446331
 
 
 
76e2397
5446331
 
76e2397
5446331
 
 
 
 
 
 
 
 
76e2397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5446331
 
 
 
7a4858f
76e2397
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76e2397
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76e2397
5446331
76e2397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
686461a
76e2397
686461a
76e2397
686461a
76e2397
 
 
 
 
 
 
 
 
 
 
 
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76e2397
5446331
76e2397
 
 
 
 
 
 
 
 
 
5446331
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
import gradio
import LlamaManager
import os
import huggingface_hub
import random
import ast

HF_API = huggingface_hub.HfApi()
LLAMAMANAGER = LlamaManager.LlamaManager(os.environ.get("HF_KEY_2"), True)

def store_generated_data(data):
    token = os.environ.get("HF_BOT")
    data = f"{data}"
    HF_API.comment_discussion("xqt/SyntheticMBPP2", 1, data, repo_type = "dataset", token = token)


def authenticate(secret_textbox):
    global LLAMAMANAGER
    password_list = os.environ.get("PASSWORD_LIST")
    password_list = password_list.split(":")
    api_key = ""
    if secret_textbox in password_list:
        api_key = os.environ.get("HF_KEY")
    else:
        api_key = secret_textbox
    
    LLAMAMANAGER = LlamaManager.LlamaManager(api_key, True)


def generate_categories(categories_count, seed, temperature, top_p, frequency_penalty):
    categories = LLAMAMANAGER.auto_generate_questions_categories(
        count = categories_count,
        seed = seed,
        temperature = temperature,
        top_p = top_p,
        frequency_penalty = frequency_penalty
    )
    data = {
        "type": "generate_categories",
        "categories": categories,
        "count": categories_count,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.Dropdown(choices = categories, value = random.choice(categories), label = "Select Category", interactive = True)


def generate_shots(category, shots_count, seed, temperature, top_p, frequency_penalty):     
    shots = LLAMAMANAGER.auto_generate_shots_for_category(category, shots_count, seed, temperature, top_p, frequency_penalty)
    shots = [[shot] for shot in shots]
    data = {
        "type": "generate_shots",
        "category": category,
        "shots": shots,
        "count": shots_count,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.DataFrame(value = shots, type = "array", label = "Generated Shots", interactive = False, headers = ["Shots"])


def generate_questions(questions_count, category, shots, seed, temperature, top_p, frequency_penalty):
    questions = LLAMAMANAGER.auto_generate_questions_from_shots(questions_count, category, shots, seed, temperature, top_p, frequency_penalty)
    questions_for_dataframe = [[question] for question in questions]
    data = {
        "type": "generate_questions",
        "questions": questions_for_dataframe,
        "count": questions_count,
        "category": category,
        "shots": shots,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.DataFrame(value = questions_for_dataframe, type = "array", label = "Generated Shots", interactive = False, headers = ["Questions"]), \
        gradio.Dropdown(choices = questions, value = random.choice(questions), label = "Select a Question", interactive = True)
    

def generate_function(question, temperature, top_p, frequency_penalty, seed):
    function_name, function_parameters, function_return = LLAMAMANAGER.auto_generate_function_signature_from_question(
        question, seed, temperature, top_p, frequency_penalty
    )
    data = {
        "type": "generate_function",
        "question": question,
        "function_name": function_name,
        "function_parameters": function_parameters,
        "function_return": function_return,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "seed": seed
    }
    store_generated_data(data)
    return function_name, function_parameters, function_return


def generate_answers_and_tests(question, function_name, function_parameters, function_return, temperature, top_p, frequency_penalty, seed):
    function_parameters = ast.literal_eval(function_parameters)
    code, tests = LLAMAMANAGER.auto_generate_answers_and_tests(
        question, function_name, function_parameters, function_return, seed, temperature, top_p, frequency_penalty
    )
    data = {
        "type": "generate_answers_and_test",
        "question": question,
        "function_name": function_name,
        "function_parameters": function_parameters,
        "function_return": function_return,
        "code": code,
        "tests": tests,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "seed": seed
    }
    store_generated_data(data)
    for test in tests:
        code += f"\n{test}"
    return  gradio.Markdown(f"\n```python\n{code}\n```", show_copy_button = True)


with gradio.Blocks(fill_height=True) as base_app:
    gradio.Markdown("# Synthetic Python Programming Data Generation βš™οΈ")
    gradio.Markdown("# ❗️ Note: The data generated here by Llama3 and the settings used to generate it will be stored in the repository [here](https://huggingface.co/datasets/xqt/SyntheticMBPP2) for future use.")
    gradio.Markdown("# ❗️ Each successful interaction is saved [here](https://huggingface.co/datasets/xqt/SyntheticMBPP2/discussions/1).")
    gradio.Markdown("# ❗️ Feel free to use your own API key if the key here is rate limited. API Key is never stored in the repository.")
    gradio.Markdown("# ❗️ If you want to use a passcode, please text me.")
    gradio.Markdown("# Step 0: Use your own API Key/Passcode")
    with gradio.Row():
        with gradio.Column():
            __secret_textbox = gradio.Textbox(label = "API Key/Passcode", placeholder = "Enter your API Key/Passcode here", type = "password", interactive = True)
        with gradio.Column():
            __passcode_authenticate = gradio.Button("Authenticate", scale = 2)
    
    gradio.Markdown("# Step 1: How many categories do you want to generate?")
    with gradio.Row(equal_height = True):
        with gradio.Column(scale = 2):
            __categories_count = gradio.Slider(minimum = 1, maximum = 20, step = 1, value = 10, label = "Number of Categories", interactive = True)
        with gradio.Column():
            __categories_generate = gradio.Button("Generate Categories", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __categories_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __categories_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __categories_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __categories_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    
    gradio.Markdown("# Step 2: Select a category to generate shots for and select the number of shots to generate")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __shots_category = gradio.Dropdown(choices = [], label = "Select Category", interactive = True)
            __shots_count = gradio.Slider(minimum = 2, maximum = 5, step = 1, value = 2, label = "Number of Shots", interactive = True)
        with gradio.Column():
            __shots_generate = gradio.Button("Generate Shots", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __shots_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __shots_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __shots_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __shots_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __generated_shots = gradio.DataFrame(value = [], col_count = 1, type = "array", label = "Generated Shots", interactive = False, headers = ["Shots"])
    
    gradio.Markdown("# Step 3: Generate Python Programming Questions for the generated shots")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __questions_count = gradio.Slider(minimum = 1, maximum = 30, step = 1, value = 10, label = "Number of Questions", interactive = True)
        with gradio.Column():
            __questions_generate = gradio.Button("Generate Questions", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __questions_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __questions_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __questions_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __questions_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __generated_questions = gradio.DataFrame(value = [], col_count = 1, type = "array", label = "Generated Questions", interactive = False, headers = ["Questions"])

    gradio.Markdown("# Step 4: Generate a function name, input parameters, and return type for the generated questions")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __function_question_dropdown = gradio.Dropdown(choices = [], label = "Select a Question", interactive = True, scale = 2)
        with gradio.Column():
            __function_generate = gradio.Button("Generate Function", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __function_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __function_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __function_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __function_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    with gradio.Row():
        with gradio.Column():
            __function_name = gradio.Textbox(label = "Function Name", placeholder = "dummy_foo", interactive = False)
        with gradio.Column():
            __function_parameters = gradio.Textbox(label = "Input Parameters", placeholder = "['input_dict: dict, 'a': int]", interactive = False)
        with gradio.Column():
            __function_return = gradio.Textbox(label = "Return Type", placeholder = "str", interactive = False)
    
    gradio.Markdown("# πŸš€ Step 5: Generate a code.")
    __code_generate = gradio.Button("Generate Code", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __code_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __code_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __code_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __code_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __code = gradio.Markdown("πŸš€ Code will be generated here...", show_copy_button = True)
    
    __passcode_authenticate.click(authenticate,
                                    inputs = [__secret_textbox],
                                    outputs = []
                                    )
    
    __categories_generate.click(generate_categories, 
                                inputs = [__categories_count, __categories_seed, __categories_temperature, __categories_top_p, __categories_frequency_penalty],
                                outputs = [__shots_category]
                                )
    __shots_generate.click(generate_shots,
                            inputs = [__shots_category, __shots_count, __shots_seed, __shots_temperature, __shots_top_p, __shots_frequency_penalty],
                            outputs = [__generated_shots]
                            )
    
    __questions_generate.click(generate_questions,
                                inputs = [__questions_count, __shots_category, __generated_shots, __questions_seed, __questions_temperature, __questions_top_p, __questions_frequency_penalty],
                                outputs = [__generated_questions, __function_question_dropdown]
                                )

    __function_generate.click(generate_function,
                                inputs = [__function_question_dropdown, __function_temperature, __function_top_p, __function_frequency_penalty, __function_seed],
                                outputs = [__function_name, __function_parameters, __function_return]
                                )
    
    __code_generate.click(generate_answers_and_tests,
                            inputs = [__function_question_dropdown, __function_name, __function_parameters, __function_return, __code_temperature, __code_top_p, __code_frequency_penalty, __code_seed],
                            outputs = [__code]
                            )
                           
                                
if __name__ == "__main__":
    base_app.launch()