Spaces:

xqt
/

Synthetic-Python-Programing-Data-Generator

Sleeping

Synthetic-Python-Programing-Data-Generator

File size: 14,472 Bytes

import gradio
import LlamaManager
import os
import huggingface_hub
import random
import ast

HF_API = huggingface_hub.HfApi()
LLAMAMANAGER = LlamaManager.LlamaManager(os.environ.get("HF_KEY_2"), True)

def store_generated_data(data):
    token = os.environ.get("HF_BOT")
    data = f"{data}"
    HF_API.comment_discussion("xqt/SyntheticMBPP2", 1, data, repo_type = "dataset", token = token)


def authenticate(secret_textbox):
    global LLAMAMANAGER
    password_list = os.environ.get("PASSWORD_LIST")
    password_list = password_list.split(":")
    api_key = ""
    if secret_textbox in password_list:
        api_key = os.environ.get("HF_KEY")
    else:
        api_key = secret_textbox
    
    LLAMAMANAGER = LlamaManager.LlamaManager(api_key, True)


def generate_categories(categories_count, seed, temperature, top_p, frequency_penalty):
    categories = LLAMAMANAGER.auto_generate_questions_categories(
        count = categories_count,
        seed = seed,
        temperature = temperature,
        top_p = top_p,
        frequency_penalty = frequency_penalty
    )
    data = {
        "type": "generate_categories",
        "categories": categories,
        "count": categories_count,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.Dropdown(choices = categories, value = random.choice(categories), label = "Select Category", interactive = True)


def generate_shots(category, shots_count, seed, temperature, top_p, frequency_penalty):     
    shots = LLAMAMANAGER.auto_generate_shots_for_category(category, shots_count, seed, temperature, top_p, frequency_penalty)
    shots = [[shot] for shot in shots]
    data = {
        "type": "generate_shots",
        "category": category,
        "shots": shots,
        "count": shots_count,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.DataFrame(value = shots, type = "array", label = "Generated Shots", interactive = False, headers = ["Shots"])


def generate_questions(questions_count, category, shots, seed, temperature, top_p, frequency_penalty):
    questions = LLAMAMANAGER.auto_generate_questions_from_shots(questions_count, category, shots, seed, temperature, top_p, frequency_penalty)
    questions_for_dataframe = [[question] for question in questions]
    data = {
        "type": "generate_questions",
        "questions": questions_for_dataframe,
        "count": questions_count,
        "category": category,
        "shots": shots,
        "seed": seed,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty
    }
    store_generated_data(data)
    return gradio.DataFrame(value = questions_for_dataframe, type = "array", label = "Generated Shots", interactive = False, headers = ["Questions"]), \
        gradio.Dropdown(choices = questions, value = random.choice(questions), label = "Select a Question", interactive = True)
    

def generate_function(question, temperature, top_p, frequency_penalty, seed):
    function_name, function_parameters, function_return = LLAMAMANAGER.auto_generate_function_signature_from_question(
        question, seed, temperature, top_p, frequency_penalty
    )
    data = {
        "type": "generate_function",
        "question": question,
        "function_name": function_name,
        "function_parameters": function_parameters,
        "function_return": function_return,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "seed": seed
    }
    store_generated_data(data)
    return function_name, function_parameters, function_return


def generate_answers_and_tests(question, function_name, function_parameters, function_return, temperature, top_p, frequency_penalty, seed):
    function_parameters = ast.literal_eval(function_parameters)
    code, tests = LLAMAMANAGER.auto_generate_answers_and_tests(
        question, function_name, function_parameters, function_return, seed, temperature, top_p, frequency_penalty
    )
    data = {
        "type": "generate_answers_and_test",
        "question": question,
        "function_name": function_name,
        "function_parameters": function_parameters,
        "function_return": function_return,
        "code": code,
        "tests": tests,
        "temperature": temperature,
        "top_p": top_p,
        "frequency_penalty": frequency_penalty,
        "seed": seed
    }
    store_generated_data(data)
    for test in tests:
        code += f"\n{test}"
    return  gradio.Markdown(f"\n```python\n{code}\n```", show_copy_button = True)


with gradio.Blocks(fill_height=True) as base_app:
    gradio.Markdown("# Synthetic Python Programming Data Generation ⚙️")
    gradio.Markdown("# ❗️ Note: The data generated here by Llama3 and the settings used to generate it will be stored in the repository [here](https://huggingface.co/datasets/xqt/SyntheticMBPP2) for future use.")
    gradio.Markdown("# ❗️ Each successful interaction is saved [here](https://huggingface.co/datasets/xqt/SyntheticMBPP2/discussions/1).")
    gradio.Markdown("# ❗️ Feel free to use your own API key if the key here is rate limited. API Key is never stored in the repository.")
    gradio.Markdown("# ❗️ If you want to use a passcode, please text me.")
    gradio.Markdown("# Step 0: Use your own API Key/Passcode")
    with gradio.Row():
        with gradio.Column():
            __secret_textbox = gradio.Textbox(label = "API Key/Passcode", placeholder = "Enter your API Key/Passcode here", type = "password", interactive = True)
        with gradio.Column():
            __passcode_authenticate = gradio.Button("Authenticate", scale = 2)
    
    gradio.Markdown("# Step 1: How many categories do you want to generate?")
    with gradio.Row(equal_height = True):
        with gradio.Column(scale = 2):
            __categories_count = gradio.Slider(minimum = 1, maximum = 20, step = 1, value = 10, label = "Number of Categories", interactive = True)
        with gradio.Column():
            __categories_generate = gradio.Button("Generate Categories", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __categories_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __categories_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __categories_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __categories_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    
    gradio.Markdown("# Step 2: Select a category to generate shots for and select the number of shots to generate")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __shots_category = gradio.Dropdown(choices = [], label = "Select Category", interactive = True)
            __shots_count = gradio.Slider(minimum = 2, maximum = 5, step = 1, value = 2, label = "Number of Shots", interactive = True)
        with gradio.Column():
            __shots_generate = gradio.Button("Generate Shots", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __shots_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __shots_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __shots_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __shots_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __generated_shots = gradio.DataFrame(value = [], col_count = 1, type = "array", label = "Generated Shots", interactive = False, headers = ["Shots"])
    
    gradio.Markdown("# Step 3: Generate Python Programming Questions for the generated shots")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __questions_count = gradio.Slider(minimum = 1, maximum = 30, step = 1, value = 10, label = "Number of Questions", interactive = True)
        with gradio.Column():
            __questions_generate = gradio.Button("Generate Questions", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __questions_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __questions_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __questions_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __questions_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __generated_questions = gradio.DataFrame(value = [], col_count = 1, type = "array", label = "Generated Questions", interactive = False, headers = ["Questions"])

    gradio.Markdown("# Step 4: Generate a function name, input parameters, and return type for the generated questions")
    with gradio.Row():
        with gradio.Column(scale = 2):
            __function_question_dropdown = gradio.Dropdown(choices = [], label = "Select a Question", interactive = True, scale = 2)
        with gradio.Column():
            __function_generate = gradio.Button("Generate Function", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __function_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __function_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __function_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __function_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    with gradio.Row():
        with gradio.Column():
            __function_name = gradio.Textbox(label = "Function Name", placeholder = "dummy_foo", interactive = False)
        with gradio.Column():
            __function_parameters = gradio.Textbox(label = "Input Parameters", placeholder = "['input_dict: dict, 'a': int]", interactive = False)
        with gradio.Column():
            __function_return = gradio.Textbox(label = "Return Type", placeholder = "str", interactive = False)
    
    gradio.Markdown("# 🚀 Step 5: Generate a code.")
    __code_generate = gradio.Button("Generate Code", scale = 2)
    with gradio.Accordion("Advanced Settings", open = False):
        with gradio.Row():
            with gradio.Column():
                __code_temperature = gradio.Slider(minimum = 0.1, maximum = 2.0, step = 0.01, value = 1.0, label = "Temperature", interactive = True)
                __code_top_p = gradio.Slider(minimum = 0.1, maximum = 0.99, step = 0.01, value = 0.9, label = "Top P", interactive = True)
            with gradio.Column():
                __code_frequency_penalty = gradio.Slider(minimum = -2.0, maximum = 2.0, step = 0.01, value = 0.0, label = "Frequency Penalty", interactive = True)
                __code_seed = gradio.Slider(minimum = 0, maximum = 1000, step = 1, value = 123, label = "Seed", interactive = True)
    __code = gradio.Markdown("🚀 Code will be generated here...", show_copy_button = True)
    
    __passcode_authenticate.click(authenticate,
                                    inputs = [__secret_textbox],
                                    outputs = []
                                    )
    
    __categories_generate.click(generate_categories, 
                                inputs = [__categories_count, __categories_seed, __categories_temperature, __categories_top_p, __categories_frequency_penalty],
                                outputs = [__shots_category]
                                )
    __shots_generate.click(generate_shots,
                            inputs = [__shots_category, __shots_count, __shots_seed, __shots_temperature, __shots_top_p, __shots_frequency_penalty],
                            outputs = [__generated_shots]
                            )
    
    __questions_generate.click(generate_questions,
                                inputs = [__questions_count, __shots_category, __generated_shots, __questions_seed, __questions_temperature, __questions_top_p, __questions_frequency_penalty],
                                outputs = [__generated_questions, __function_question_dropdown]
                                )

    __function_generate.click(generate_function,
                                inputs = [__function_question_dropdown, __function_temperature, __function_top_p, __function_frequency_penalty, __function_seed],
                                outputs = [__function_name, __function_parameters, __function_return]
                                )
    
    __code_generate.click(generate_answers_and_tests,
                            inputs = [__function_question_dropdown, __function_name, __function_parameters, __function_return, __code_temperature, __code_top_p, __code_frequency_penalty, __code_seed],
                            outputs = [__code]
                            )
                           
                                
if __name__ == "__main__":
    base_app.launch()