File size: 9,207 Bytes
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a4858f
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a4858f
 
5446331
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import gradio
import LlamaManager
import os
import huggingface_hub

HF_API = huggingface_hub.HfApi()
LLAMAMANAGER = LlamaManager.LlamaManager(os.environ.get("HF_KEY_2"), True)

def store_generated_data(data):
    token = os.environ.get("HF_BOT")
    data = f"{data}"
    HF_API.comment_discussion("xqt/SyntheticMBPP2", 1, data, repo_type = "dataset", token = token)


def authenticate(secret_textbox):
    global LLAMAMANAGER
    password_list = os.environ.get("PASSWORD_LIST")
    password_list = password_list.split(":")
    api_key = ""
    if secret_textbox in password_list:
        api_key = os.environ.get("HF_KEY")
    else:
        api_key = secret_textbox
    
    LLAMAMANAGER = LlamaManager.LlamaManager(api_key, True)


def generate_categories(categories_count, seed, temperature, top_p, frequency_penalty):
    categories = LLAMAMANAGER.auto_generate_questions_categories(
        count = categories_count,
        seed = seed,
        temperature = temperature,
        top_p = top_p,
        frequency_penalty = frequency_penalty
    )
    data = {
        "type": "generate_categories",
        "categories": categories,
        "count": categories_count,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.Dropdown(choices = categories, value = categories[0], label = "Select Category", interactive = True)


def generate_shots(category, shots_count, seed, temperature, top_p, frequency_penalty):     
    shots = LLAMAMANAGER.auto_generate_shots_for_category(category, shots_count, seed, temperature, top_p, frequency_penalty)
    shots = [[shot] for shot in shots]
    data = {
        "type": "generate_shots",
        "category": category,
        "shots": shots,
        "count": shots_count,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.DataFrame(value = shots, type = "array", label = "Generated Shots", interactive = False, headers = None)


def generate_questions(questions_count, category, shots, seed, temperature, top_p, frequency_penalty):
    questions = LLAMAMANAGER.auto_generate_questions_from_shots(questions_count, category, shots, seed, temperature, top_p, frequency_penalty)
    questions = [[question] for question in questions]
    data = {
        "type": "generate_questions",
        "questions": questions,
        "count": questions_count,
        "category": category,
        "shots": shots,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.DataFrame(value = questions, type = "array", label = "Generated Shots", interactive = False, headers = None)


with gradio.Blocks(fill_height=True) as base_app:
    gradio.Markdown("# Synthetic Python Programming Data Generation βš™οΈ")
    gradio.Markdown("# ❗️ Note: The data generated here by Llama3 and the settings used to generate it will be stored in the repository [here](https://huggingface.co/datasets/xqt/SyntheticMBPP2) for future use.")
    gradio.Markdown("# ❗️ Each successful interaction is saved [here](https://huggingface.co/datasets/xqt/SyntheticMBPP2/discussions/1)")
    gradio.Markdown("# ❗️ Feel free to use your own API key if the key here is rate limited. API Key is never stored in the repository.")
    gradio.Markdown("# ❗️ If you want to use a passcode, please text me.")
    gradio.Markdown("# Step 0: Use your own API Key/Passcode")
    with gradio.Row():
        with gradio.Column():
            __secret_textbox = gradio.Textbox(label = "API Key/Passcode", placeholder = "Enter your API Key/Passcode here", type = "password", interactive = True)
        with gradio.Column():
            __passcode_authenticate = gradio.Button("Authenticate", scale = 2)
    
    gradio.Markdown("# Step 1: How many categories do you want to generate?")
    with gradio.Row(equal_height = True):
        with gradio.Column(scale = 2):
            __categories_count = gradio.Slider(minimum = 1, maximum = 20, step = 1, value = 10, label = "Number of Categories", interactive = True)
        with gradio.Column():
            __categories_generate = gradio.Button("Generate Categories", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __categories_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __categories_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __categories_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __categories_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    
    gradio.Markdown("# Step 2: Select a category to generate shots for and select the number of shots to generate")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __shots_category = gradio.Dropdown(choices = [], label = "Select Category", interactive = True)
            __shots_count = gradio.Slider(minimum = 2, maximum = 5, step = 1, value = 2, label = "Number of Shots", interactive = True)
        with gradio.Column():
            __shots_generate = gradio.Button("Generate Shots", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __shots_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __shots_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __shots_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __shots_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __generated_shots = gradio.DataFrame(value = [], col_count = 1, type = "array", label = "Generated Shots", interactive = False, headers = None)
    
    gradio.Markdown("# Step 3: Generate Python Programming Questions for the generated shots")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __questions_count = gradio.Slider(minimum = 1, maximum = 30, step = 1, value = 10, label = "Number of Questions", interactive = True)
        with gradio.Column():
            __questions_generate = gradio.Button("Generate Questions", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __questions_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __questions_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __questions_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __questions_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __generated_questions = gradio.DataFrame(value = [], col_count = 1, type = "array", label = "Generated Shots", interactive = False, headers = None)

    
    __passcode_authenticate.click(authenticate,
                                    inputs = [__secret_textbox],
                                    outputs = []
                                    )
    
    __categories_generate.click(generate_categories, 
                                inputs = [__categories_count, __categories_seed, __categories_temperature, __categories_top_p, __categories_frequency_penalty],
                                outputs = [__shots_category]
                                )
    __shots_generate.click(generate_shots,
                            inputs = [__shots_category, __shots_count, __shots_seed, __shots_temperature, __shots_top_p, __shots_frequency_penalty],
                            outputs = [__generated_shots]
                            )
    
    __questions_generate.click(generate_questions,
                                inputs = [__questions_count, __shots_category, __generated_shots, __questions_seed, __questions_temperature, __questions_top_p, __questions_frequency_penalty],
                                outputs = [__generated_questions]
                                )
                           
                                
if __name__ == "__main__":
    base_app.launch()