# gradio app for the LLM model --> use the retr environment # Run the script and open the link in the browser. import os import json import pandas as pd import datasets import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # training from scratch with latbert tokenizer CHECKPOINT_PATH= 'scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi/' CHECKPOINT_PATH= 'itserr/scratch_2-nodes_tokenizer_latbert-original_packing_fcocchi' print(f"Loading model from: {CHECKPOINT_PATH}") tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ']) model = AutoModelForCausalLM.from_pretrained(CHECKPOINT_PATH, token=os.environ['HF_TOKEN_READ']) preference_dataset_name= "itserr/latin_gpt_preferences" global dataset_hf dataset_hf = datasets.load_dataset(preference_dataset_name, token=os.environ['HF_TOKEN_READ'], download_mode='force_redownload') dataset_hf = dataset_hf['train'].to_pandas() print(dataset_hf.shape) description=""" This is a Latin Language Model (LLM) based on GPT-2 and it was trained on a large corpus of Latin texts and can generate text in Latin. \n Demo instructions: - Enter a prompt in Latin in the Input Text box. - Select the temperature value to control the randomness of the generated text (higher value produce a more creative and unstable answer). - Click the 'Generate Text' button to trigger model generation. - (Optional) insert a Feedback text in the box. - Click the 'Like' or 'Dislike' button to judge the generation correctness. """ title= "(L2) - Latin Language Model" article= "hello world ..." examples= ['Accidere ex una scintilla', 'Audacter calumniare,', 'Consolatium misero comites'] logo_image= '/work/pnrr_itserr/latin_model/demo_gpt/ITSERR_row_logo.png' def generate_text(prompt, slider): if torch.cuda.is_available(): device = torch.device("cuda") else: device = torch.device("cpu") print("No GPU available") print("***** Generate *****") text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device) #generated_text = text_generator(prompt, max_length=100) generated_text = text_generator(prompt, max_length=50, do_sample=True, temperature=slider, repetition_penalty=2.0, truncation=True) return generated_text[0]['generated_text'] # Function to handle user preferences def handle_preference(preference, input, output, feedback, temp_value): """ Format values stored in preferences: - input text - output generated text - user feedback - float temperature value """ # first time staring from a csv file (edited the present one), then work with parquet file # input_text,generated_text,feedback,temperature,like,dislike,count_like,count_dislike global dataset_hf if input == output: output_tuple= ("", "") else: output_tuple= (input, output.split(input)[-1]) if preference == "like": dislike=0 like=1 count_like= dataset_hf.iloc[-1]['count_like'] count_dislike= dataset_hf.iloc[-1]['count_dislike'] if output_tuple[1] != "" : count_like= dataset_hf.iloc[-1]['count_like'] + 1 elif preference == "dislike": dislike=1 like=0 count_like= dataset_hf.iloc[-1]['count_like'] count_dislike= dataset_hf.iloc[-1]['count_dislike'] if output_tuple[1] != "" : count_dislike= dataset_hf.iloc[-1]['count_dislike'] + 1 inp_text= output_tuple[0] out_text= output_tuple[1] new_data = pd.DataFrame({'input_text': inp_text, 'generated_text': out_text, 'feedback': feedback, 'temperature': float(temp_value), 'like': like, 'dislike': dislike, 'count_like': count_like, 'count_dislike': count_dislike}, index=[0]) dataset_hf = pd.concat([dataset_hf, new_data], ignore_index=True) hf_dataset = datasets.Dataset.from_pandas(dataset_hf) dataset_dict = datasets.DatasetDict({"train": hf_dataset}) dataset_dict.push_to_hub(preference_dataset_name, token=os.environ['HF_TOKEN_WRITE']) # print dataset statistics print(f"Admin log: like: {count_like} and dislike: {count_dislike}") return f"You select '{preference}' as answer of the model generation. Thank you for your time!" custom_css = """ #logo { display: block; margin-left: auto; margin-right: auto; width: 280px; height: 140px; } """ with gr.Blocks(css=custom_css) as demo: gr.Image(logo_image, elem_id="logo") gr.Markdown(f"

{title}

") gr.Markdown(description) with gr.Row(): with gr.Column(): input_text = gr.Textbox(lines=5, placeholder="Enter latin text here...", label="Input Text") with gr.Column(): output_text = gr.Textbox(lines=5, placeholder="Output text will appear here...", label="Output Text") gr.Examples(examples=examples, inputs=input_text) temperature_slider = gr.Slider(minimum=0.1, maximum=5.0, step=0.1, value=1.0, label="Temperature") clean_button = gr.Button("Generate Text") clean_button.click(fn=generate_text, inputs=[input_text, temperature_slider], outputs=output_text) feedback_output = gr.Textbox(lines=1, placeholder="If you want to provide a feedback, please fill this box ...", label="Feedback") with gr.Row(): like_button = gr.Button("Like") dislike_button = gr.Button("Dislike") button_output = gr.Textbox(lines=1, placeholder="Please submit your choice", label="Latin Language Model Demo") like_button.click(fn=lambda x,y,z,v: handle_preference("like", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output) dislike_button.click(fn=lambda x,y,z,v: handle_preference("dislike", x, y, z, v), inputs=[input_text, output_text, feedback_output, temperature_slider], outputs=button_output) #gr.Markdown(article) demo.launch(share=True, debug=True)