Spaces:

xqt
/

Code-with-Llama2

Sleeping

File size: 7,875 Bytes

7b28e1c

import gradio 
import re
import spaces
import torch
import transformers

MODEL_DICT = {
    "NousResearch/Llama-2-7b-chat-hf": "Llama 2 7B Chat",
    "xqt/llama_2_7b_chat_mbpp_base": "Llama 2 7B Chat fine tuned with Base MBPP",
    "xqt/llama_2_7b_chat_mbpp_synthetic": "Llama 2 7B Chat fine tuned with Synthetic MBPP",
    "xqt/llama_2_7b_chat_mbpp_mixed": "Llama 2 7B Chat fine tuned with Base and Synthetic MBPP"
} 

def generate_prompt(sample):
    sample = f"""<s>[INST] <<SYS>>
You are a python programming assistant that obeys the constraints and passes the example test case.
You wrap the code answer without any comments between [PYTHON] and [/PYTHON] tags.
In case a test case is available, it is written inside [TEST] and [/TEST] tags.
<</SYS>>
{sample}
[TEST][/TEST]
[/INST]
[PYTHON]
"""
    return sample

def extract_text_between_tags(input_string, tag1, tag2):
    pattern = r'' + tag1 + '(.*?)' + tag2 + ''
    return re.findall(pattern, input_string, re.DOTALL)


def load_model(name):
    gradio.Info(f"Loading Model {name} 🤗", duration = 5)

    current_key = None
    for model_key in MODEL_DICT.keys():
        if name == MODEL_DICT[model_key]:
            current_key = model_key

    if current_key == None:
        raise gradio.Error(f"Model {name} could not be found 😭", duration = 5)

    try:
        nf4_config = transformers.BitsAndBytesConfig(
            load_in_4bit = True,
            bnb_4bit_quant_type = "nf4",
            bnb_4bit_use_double_quant = True,
            bnb_4bit_compute_dtype = torch.bfloat16
        )

        MODEL = transformers.AutoModelForCausalLM.from_pretrained(
            current_key,
            quantization_config = nf4_config,
            device_map = {"": 0},
            use_cache = True
        )
        MODEL.config.pretraining_tp = 1
        MODEL_NAME = current_key

        tokenizer = transformers.AutoTokenizer.from_pretrained("NousResearch/Llama-2-7b-chat-hf", trust_remote_code=True)
        tokenizer.pad_token = tokenizer.eos_token
        tokenizer.padding_side = "right"

        gradio.Info(f"Loaded Model {name} from {current_key} successfully 🔥.", duration = 5)

        return MODEL, tokenizer
    except Exception as e:
        raise gradio.Error(f"Encountered a problem 🥺: {e}")
        return None, None

@spaces.GPU(duration = 120)
def respond(model, message, chat_history):
    model, tokenizer = load_model(model)

    if model is None or tokenizer is None:
        raise gradio.Error("Could not load model 😔", duration = 5)
    
    prompt = generate_prompt(message)
    max_new_tokens = 20

    input = None
    while max_new_tokens <= 500:
        if input is None:
            prompt = [prompt]
        else:
            prompt = [output]
        
        input = tokenizer(prompt, return_tensors = "pt", padding = True).to(model.device)
        output_sequences = model.generate(**input, max_new_tokens = 500, do_sample = True, top_p = 0.9)
        output = tokenizer.batch_decode(output_sequences, skip_special_tokens = True)[0]

        try:
            code = extract_text_between_tags(output, r"\[PYTHON\]", r"\[/PYTHON\]")[1]
            break
        except:
            code = ""
            max_new_tokens += 20

    if len(code) > 0:
        response = f"""Here is what I could write 💭
```python
{code}
```
"""
    else:
        response = "Could not generate the code with the following configurations 😦."

    chat_history.append((message, response))
    return "", chat_history

with gradio.Blocks() as base_app:
    header = gradio.Markdown("""
# 🧑‍💻 Python Code Generation Assistant

Welcome to the **Python Code Generation Assistant** powered by **Llama 2** models! This application helps generate Python code solutions by leveraging fine-tuned large language models (LLMs) on benchmark and synthetic datasets. Whether you need help solving basic Python problems or want to explore code generation from AI, this app has you covered. Use the documentation below for help.
    """)
    model_choice_dropdown = gradio.Dropdown(
        choices = MODEL_DICT.values(),
        value = "Llama 2 7B Chat fine tuned with Base MBPP",
        interactive = True
    )
    chatbot = gradio.Chatbot()
    with gradio.Row():
        with gradio.Column():
            message_box = gradio.Textbox(placeholder = "Write a python programming question you need the code for.")
        
        with gradio.Column():
            send_button = gradio.Button()
            clear_button = gradio.ClearButton([message_box, chatbot])

    send_button.click(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])
    message_box.submit(respond, [model_choice_dropdown, message_box, chatbot], [message_box, chatbot])

    example_dataset = gradio.Dataset(components = [message_box], samples = [
        ["Write a function to find sequences of lowercase letters joined with an underscore."],
        ["Write a python function to count hexadecimal numbers for a given range."],
        ["Write a function to perform the concatenation of two string tuples."]
    ])

    example_dataset.select(lambda x: x[0], [example_dataset], [message_box])

    with gradio.Accordion("Documentation", open = False):
        documentation = gradio.Markdown("""
## 🚀 Features

- **Model Selection**: Choose from multiple fine-tuned models:
  - **Llama 2 7B Chat**: Standard pre-trained model.
  - **Llama 2 7B Chat (Base MBPP)**: Fine-tuned on the MBPP benchmark dataset.
  - **Llama 2 7B Chat (Synthetic MBPP)**: Fine-tuned on synthetic data generated from MBPP.
  - **Llama 2 7B Chat (Mixed)**: Fine-tuned on both base and synthetic MBPP data.

- **Automatic Python Code Generation**: Generate Python code for your problem.

## 🎯 How to Use the App

1. **Select a Model**:
   - Use the dropdown menu to choose the model you'd like to use for code generation.
   - By default, the app selects **Llama 2 7B Chat fine-tuned with Base MBPP**.

2. **Ask a Python Question**:
   - Type a Python-related question or problem in the text box.
   - Example: `"Write a function to find sequences of lowercase letters joined with an underscore."`

3. **Generate Python Code**:
   - Press the **Send** button or hit **Enter** to generate the code.
   - The model will respond with Python code wrapped in ```python blocks.

4. **Explore Example Questions**:
   - You can try out some preloaded examples from the **Examples Dataset** at the bottom. Just click on one to automatically populate the input.

5. **Clear the Chat**:
   - Use the **Clear** button to reset the chat and start fresh.

## 📊 Models

| Model Name | Description |
|------------|-------------|
| **Llama 2 7B Chat** | A pre-trained model for general Python code generation. |
| **Base MBPP** | Fine-tuned on the **MBPP** (Most Basic Python Problems) dataset. |
| **Synthetic MBPP** | Fine-tuned on a synthetic dataset generated from MBPP. |
| **Mixed MBPP** | Fine-tuned on both base and synthetic MBPP datasets. |

## 🛠️ Troubleshooting

If you encounter issues:
- Ensure you're selecting the correct model.
- If the code isn't generating as expected, try reformulating the question.
- For further debugging, error messages will be displayed if something goes wrong.

## 📜 Example Prompts
- `"Write a Python function to count hexadecimal numbers for a given range."`
- `"Write a function to perform the concatenation of two string tuples."`
- `"Generate a Python program to reverse a string."`

## 🖥️ About the Technology

This app uses a **4-bit quantized version of Llama 2 7B** models to enhance performance while minimizing resource consumption. These models have been fine-tuned on **MBPP** and **synthetic datasets** to provide optimized code generation for Python programming tasks.

---

Happy Coding! 😄✨
""")

if __name__=="__main__":
    base_app.launch()