{title}

# gradio app for the LLM model --> use the retr environment
# Run the script and open the link in the browser.

import os
import json
import pandas as pd
import datasets
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# training from scratch with latbert tokenizer
CHECKPOINT_PATH= 'scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi/'
CHECKPOINT_PATH= 'itserr/scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi'

print(f"Loading model from: {CHECKPOINT_PATH}")
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ'])
model = AutoModelForCausalLM.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ'])

preference_dataset_name= "itserr/latin_gpt_preferences"
global dataset_hf
dataset_hf = datasets.load_dataset(preference_dataset_name, token=os.environ['HF_TOKEN_READ'], download_mode='force_redownload')
dataset_hf = dataset_hf['train'].to_pandas()
print(dataset_hf.shape)

description="""
This is a Latin Language Model (LLM) based on GPT-2 and it was trained on a large corpus of Latin texts and can generate text in Latin. \n
Demo instructions:
- Enter a prompt in Latin in the Input Text box.
- Select the temperature value to control the randomness of the generated text (higher value produce a more creative and unstable answer).
- Click the 'Generate Text' button to trigger model generation.
- (Optional) insert a Feedback text in the box.
- Click the 'Like' or 'Dislike' button to judge the generation correctness. 
"""
title= "(L<sup>2</sup>) - Latin Language Model"
article= "hello world ..."
examples= ['Accidere ex una scintilla', 'Audacter calumniare,', 'Consolatium misero comites']
logo_image= '/work/pnrr_itserr/latin_model/demo_gpt/ITSERR_row_logo.png'

def generate_text(prompt, slider):
    if torch.cuda.is_available(): device = torch.device("cuda")      
    else: 
        device = torch.device("cpu")
        print("No GPU available")
    
    print("***** Generate *****")
    text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
    #generated_text = text_generator(prompt, max_length=100)
    generated_text = text_generator(prompt, max_length=50, do_sample=True, temperature=slider, repetition_penalty=2.0, truncation=True)
    return generated_text[0]['generated_text']

# Function to handle user preferences
def handle_preference(preference, input, output, feedback, temp_value):
    """
    Format values stored in preferences:
        - input text
        - output generated text
        - user feedback
        - float temperature value
    """
    # first time staring from a csv file (edited the present one), then work with parquet file
    # input_text,generated_text,feedback,temperature,like,dislike,count_like,count_dislike
    global dataset_hf
    if input == output:
        output_tuple= ("", "")
    else:
        output_tuple= (input, output.split(input)[-1])
    if preference == "like":
        dislike=0
        like=1
        count_like= dataset_hf.iloc[-1]['count_like']
        count_dislike= dataset_hf.iloc[-1]['count_dislike']
        if output_tuple[1] != "" :
            count_like= dataset_hf.iloc[-1]['count_like'] + 1

    elif preference == "dislike":
        dislike=1
        like=0
        count_like= dataset_hf.iloc[-1]['count_like']
        count_dislike= dataset_hf.iloc[-1]['count_dislike']
        if output_tuple[1] != "" :
            count_dislike= dataset_hf.iloc[-1]['count_dislike'] + 1

    inp_text= output_tuple[0]
    out_text= output_tuple[1]
    new_data = pd.DataFrame({'input_text': inp_text, 'generated_text': out_text, 'feedback': feedback,
                              'temperature': float(temp_value), 'like': like, 'dislike': dislike, 
                              'count_like': count_like, 'count_dislike': count_dislike}, index=[0])
    dataset_hf = pd.concat([dataset_hf, new_data], ignore_index=True)
    hf_dataset = datasets.Dataset.from_pandas(dataset_hf)
    dataset_dict = datasets.DatasetDict({"train": hf_dataset})
    dataset_dict.push_to_hub(preference_dataset_name, token=os.environ['HF_TOKEN_WRITE'])
    
    # print dataset statistics
    print(f"Admin log: like: {count_like} and dislike: {count_dislike}")
    return f"You select '{preference}' as answer of the model generation. Thank you for your time!"

custom_css = """
#logo {
    display: block;
    margin-left: auto;
    margin-right: auto;
    width: 280px;
    height: 140px;
}
"""

with gr.Blocks(css=custom_css) as demo:
    gr.Image(logo_image, elem_id="logo")
    gr.Markdown(f"<h1 style='text-align: center;'>{title}</h1>")
    gr.Markdown(description)
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(lines=5, placeholder="Enter latin text here...", label="Input Text")
        with gr.Column():
            output_text = gr.Textbox(lines=5, placeholder="Output text will appear here...", label="Output Text")

    gr.Examples(examples=examples, inputs=input_text)
    temperature_slider = gr.Slider(minimum=0.1, maximum=5.0, step=0.1, value=1.0, label="Temperature")
    
    clean_button = gr.Button("Generate Text")
    clean_button.click(fn=generate_text, inputs=[input_text, temperature_slider], outputs=output_text)
    feedback_output = gr.Textbox(lines=1, placeholder="If you want to provide a feedback, please fill this box ...", label="Feedback")

    with gr.Row():
        like_button = gr.Button("Like")
        dislike_button = gr.Button("Dislike")

    button_output = gr.Textbox(lines=1, placeholder="Please submit your choice", label="Latin Language Model Demo")
    like_button.click(fn=lambda x,y,z,v: handle_preference("like", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output)
    dislike_button.click(fn=lambda x,y,z,v: handle_preference("dislike", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output)
    #gr.Markdown(article)

demo.launch(share=True, debug=True)