Dhenu2-Climate-Exp

Sleeping

File size: 4,884 Bytes

cc5b602
6f619d7
ae90620
6386510
677d853
51a7d9e
652620b
6386510
9660cc6
51a7d9e
652620b
e6367a7
c3b4085
51a7d9e
6386510
bd34f0b
38f4825
bd34f0b
 
51a7d9e
6386510
51a7d9e
 
bd34f0b
 
 
 
 
 
 
51a7d9e
 
da59244
652620b
 
7cb9567
 
 
 
 
 
652620b
0486bff
 
b179e70
6b67af9
677d853
f77fb99
0486bff
4ed884e
 
3d7390f
 
4ed884e
 
 
 
652620b
4ed884e
 
 
652620b
3d7390f
 
 
652620b
 
 
 
 
 
 
 
ce84a62
652620b
 
 
 
 
c4592e6
4ed884e
c4592e6
 
 
f77fb99
652620b
 
27dc368
652620b
 
 
 
 
 
 
 
51a7d9e
652620b
6386510
51a7d9e
fed0852
51a7d9e
 
 
 
 
0486bff
51a7d9e
3d7390f
0ee4a44
3d7390f
 
 
51a7d9e
 
 
 
 
 
 
 
 
 
4ed884e
51a7d9e
 
652620b
51a7d9e
 
bd34f0b
 
 
 
4ed884e
bd34f0b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4ed884e
bd34f0b
 
 
51a7d9e
 
268e4f5
 
 
 
0ee4a44
51a7d9e
 
 
 
 
 
652620b

import os
import time
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, BitsAndBytesConfig
import gradio as gr
from threading import Thread

MODEL_LIST = ["chheplo/Dhenu2-In-Llama3.1-8B-FFT"]
HF_TOKEN = os.environ.get("HF_TOKEN", None)
MODEL = os.environ.get("MODEL_ID")

TITLE = "<h1><center>KissanAI - Dhenu2 India - Llama-3.1-8b-instruct</center></h1>"

PLACEHOLDER = """
<center>
<p>Hi, I'm Dhenu. Ask me anything about Agriculture in India.</p>
</center>
"""


CSS = """
.duplicate-button {
    margin: auto !important;
    color: white !important;
    background: black !important;
    border-radius: 100vh !important;
}
h3 {
    text-align: center;
}
"""

device = "cuda" # for GPU usage or "cpu" for CPU usage

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type= "nf4")

tokenizer = AutoTokenizer.from_pretrained(MODEL)
model = AutoModelForCausalLM.from_pretrained(
    MODEL,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    quantization_config=quantization_config)

@spaces.GPU()
def stream_chat(
    message: str, 
    history: list,
    system_prompt: str,
    temperature: float = 0.8, 
    max_new_tokens: int = 1024, 
    top_p: float = 1.0, 
    top_k: int = 20, 
    penalty: float = 1.2,
):
    print(f'message: {message}')
    print(f'history: {history}')

    conversation = [
        {"role": "system", "content": system_prompt}
    ]
    for prompt, answer in history:
        conversation.extend([
            {"role": "user", "content": prompt}, 
            {"role": "assistant", "content": answer},
        ])

    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt").to(model.device)
    
    streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
    
    generate_kwargs = dict(
        input_ids=input_ids, 
        max_new_tokens = max_new_tokens,
        do_sample = False if temperature == 0 else True,
        top_p = top_p,
        top_k = top_k,
        temperature = temperature,
        eos_token_id=[128001,128008,128009],
        streamer=streamer,
    )

    with torch.no_grad():
        thread = Thread(target=model.generate, kwargs=generate_kwargs)
        thread.start()
        
    buffer = ""
    for new_text in streamer:
        buffer += new_text
        yield buffer

            
chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)

with gr.Blocks(css=CSS, theme="gradio/soft") as demo:
    gr.HTML(TITLE)
    gr.ChatInterface(
        fn=stream_chat,
        chatbot=chatbot,
        fill_height=True,
        additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
        additional_inputs=[
            gr.Textbox(
                value="You are an agriculture assistant in the context of India. Provide precise and actionable response in proper markdown format.",
                label="System Prompt",
                render=False,
            ),
            gr.Slider(
                minimum=0,
                maximum=1,
                step=0.1,
                value=0.8,
                label="Temperature",
                render=False,
            ),
            gr.Slider(
                minimum=128,
                maximum=8192,
                step=1,
                value=1024,
                label="Max new tokens",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=1.0,
                step=0.1,
                value=1.0,
                label="top_p",
                render=False,
            ),
            gr.Slider(
                minimum=1,
                maximum=20,
                step=1,
                value=20,
                label="top_k",
                render=False,
            ),
            gr.Slider(
                minimum=0.0,
                maximum=2.0,
                step=0.1,
                value=1.2,
                label="Repetition penalty",
                render=False,
            ),
        ],
        examples=[
            ["Which type of mango orchards in India require more frequent fruit thinning?"],
            ["How to control black pepper disease through the use of biological control agents in India?"],
            ["What are the different methods of hybrid seed production and their suitability for small-scale farmers in India?"],
            ["What are the government subsidies available for pulses farmers in Telangana?"],
            ["Suggested paddy varieties in Kerala and Karnataka."],
        ],
        cache_examples=False,
    )


if __name__ == "__main__":
    demo.launch()