File size: 7,875 Bytes
7b28e1c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
import gradio 
import re
import spaces
import torch
import transformers

MODEL_DICT = {
    "NousResearch/Llama-2-7b-chat-hf": "Llama 2 7B Chat",
    "xqt/llama_2_7b_chat_mbpp_base": "Llama 2 7B Chat fine tuned with Base MBPP",
    "xqt/llama_2_7b_chat_mbpp_synthetic": "Llama 2 7B Chat fine tuned with Synthetic MBPP",
    "xqt/llama_2_7b_chat_mbpp_mixed": "Llama 2 7B Chat fine tuned with Base and Synthetic MBPP"
} 

def generate_prompt(sample):
    sample = f"""<s>[INST] <<SYS>>
You are a python programming assistant that obeys the constraints and passes the example test case.
You wrap the code answer without any comments between [PYTHON] and [/PYTHON] tags.
In case a test case is available, it is written inside [TEST] and [/TEST] tags.
<</SYS>>
{sample}
[TEST][/TEST]
[/INST]
[PYTHON]
"""
    return sample

def extract_text_between_tags(input_string, tag1, tag2):
    pattern = r'' + tag1 + '(.*?)' + tag2 + ''
    return re.findall(pattern, input_string, re.DOTALL)


def load_model(name):
    gradio.Info(f"Loading Model {name} πŸ€—", duration = 5)

    current_key = None
    for model_key in MODEL_DICT.keys():
        if name == MODEL_DICT[model_key]:
            current_key = model_key

    if current_key == None:
        raise gradio.Error(f"Model {name} could not be found 😭", duration = 5)

    try:
        nf4_config = transformers.BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_use_double_quant = True,
            bnb_4bit_compute_dtype = torch.bfloat16
        )

        MODEL = transformers.AutoModelForCausalLM.from_pretrained(
            current_key,
            quantization_config = nf4_config,
            device_map = {"": 0},
            use_cache = True
        )
        MODEL.config.pretraining_tp = 1
        MODEL_NAME = current_key

        tokenizer = transformers.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code=True)
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "right"

        gradio.Info(f"Loaded Model {name} from {current_key} successfully πŸ”₯.", duration = 5)

        return MODEL, tokenizer
    except Exception as e:
        raise gradio.Error(f"Encountered a problem πŸ₯Ί: {e}")
        return None, None

@spaces.GPU(duration = 120)
def respond(model, message, chat_history):
    model, tokenizer = load_model(model)

    if model is None or tokenizer is None:
        raise gradio.Error("Could not load model πŸ˜”", duration = 5)
    
    prompt = generate_prompt(message)
    max_new_tokens = 20

    input = None
    while max_new_tokens <= 500:
        if input is None:
            prompt = [prompt]
        else:
            prompt = [output]
        
        input = tokenizer(prompt, return_tensors = "pt", padding = True).to(model.device)
        output_sequences = model.generate(**input, max_new_tokens = 500, do_sample = True, top_p = 0.9)
        output = tokenizer.batch_decode(output_sequences, skip_special_tokens = True)[0]

        try:
            code = extract_text_between_tags(output, r"\[PYTHON\]", r"\[/PYTHON\]")[1]
            break
        except:
            code = ""
            max_new_tokens += 20

    if len(code) > 0:
        response = f"""Here is what I could write πŸ’­
```python
{code}
```
"""
    else:
        response = "Could not generate the code with the following configurations 😦."

    chat_history.append((message, response))
    return "", chat_history

with gradio.Blocks() as base_app:
    header = gradio.Markdown("""
# πŸ§‘β€πŸ’» Python Code Generation Assistant

Welcome to the **Python Code Generation Assistant** powered by **Llama 2** models! This application helps generate Python code solutions by leveraging fine-tuned large language models (LLMs) on benchmark and synthetic datasets. Whether you need help solving basic Python problems or want to explore code generation from AI, this app has you covered. Use the documentation below for help.
    """)
    model_choice_dropdown = gradio.Dropdown(
        choices = MODEL_DICT.values(),
        value = "Llama 2 7B Chat fine tuned with Base MBPP",
        interactive = True
    )
    chatbot = gradio.Chatbot()
    with gradio.Row():
        with gradio.Column():
            message_box = gradio.Textbox(placeholder = "Write a python programming question you need the code for.")
        
        with gradio.Column():
            send_button = gradio.Button()
            clear_button = gradio.ClearButton([message_box, chatbot])

    send_button.click(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])
    message_box.submit(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])

    example_dataset = gradio.Dataset(components = [message_box], samples = [
        ["Write a function to find sequences of lowercase letters joined with an underscore."],
        ["Write a python function to count hexadecimal numbers for a given range."],
        ["Write a function to perform the concatenation of two string tuples."]
    ])

    example_dataset.select(lambda x: x[0], [example_dataset], [message_box])

    with gradio.Accordion("Documentation", open = False):
        documentation = gradio.Markdown("""
## πŸš€ Features

- **Model Selection**: Choose from multiple fine-tuned models:
  - **Llama 2 7B Chat**: Standard pre-trained model.
  - **Llama 2 7B Chat (Base MBPP)**: Fine-tuned on the MBPP benchmark dataset.
  - **Llama 2 7B Chat (Synthetic MBPP)**: Fine-tuned on synthetic data generated from MBPP.
  - **Llama 2 7B Chat (Mixed)**: Fine-tuned on both base and synthetic MBPP data.

- **Automatic Python Code Generation**: Generate Python code for your problem.

## 🎯 How to Use the App

1. **Select a Model**:
   - Use the dropdown menu to choose the model you'd like to use for code generation.
   - By default, the app selects **Llama 2 7B Chat fine-tuned with Base MBPP**.

2. **Ask a Python Question**:
   - Type a Python-related question or problem in the text box.
   - Example: `"Write a function to find sequences of lowercase letters joined with an underscore."`

3. **Generate Python Code**:
   - Press the **Send** button or hit **Enter** to generate the code.
   - The model will respond with Python code wrapped in ```python blocks.

4. **Explore Example Questions**:
   - You can try out some preloaded examples from the **Examples Dataset** at the bottom. Just click on one to automatically populate the input.

5. **Clear the Chat**:
   - Use the **Clear** button to reset the chat and start fresh.

## πŸ“Š Models

| Model Name | Description |
|------------|-------------|
| **Llama 2 7B Chat** | A pre-trained model for general Python code generation. |
| **Base MBPP** | Fine-tuned on the **MBPP** (Most Basic Python Problems) dataset. |
| **Synthetic MBPP** | Fine-tuned on a synthetic dataset generated from MBPP. |
| **Mixed MBPP** | Fine-tuned on both base and synthetic MBPP datasets. |

## πŸ› οΈ Troubleshooting

If you encounter issues:
- Ensure you're selecting the correct model.
- If the code isn't generating as expected, try reformulating the question.
- For further debugging, error messages will be displayed if something goes wrong.

## πŸ“œ Example Prompts
- `"Write a Python function to count hexadecimal numbers for a given range."`
- `"Write a function to perform the concatenation of two string tuples."`
- `"Generate a Python program to reverse a string."`

## πŸ–₯️ About the Technology

This app uses a **4-bit quantized version of Llama 2 7B** models to enhance performance while minimizing resource consumption. These models have been fine-tuned on **MBPP** and **synthetic datasets** to provide optimized code generation for Python programming tasks.

---

Happy Coding! πŸ˜„βœ¨
""")

if __name__=="__main__":
    base_app.launch()