File size: 3,994 Bytes
30ef1d4
 
1557b00
3c739a1
 
75c3f8c
 
3c739a1
 
 
 
1557b00
 
 
30ef1d4
 
e6ae8f1
30ef1d4
348a268
 
30ef1d4
3c73224
 
 
 
7a9fdcc
 
 
 
 
 
 
 
 
 
 
 
75c3f8c
 
 
 
3c739a1
7a9fdcc
30ef1d4
 
 
 
 
 
 
 
7a9fdcc
30ef1d4
 
3c73224
 
30ef1d4
75c3f8c
 
 
 
 
30ef1d4
75c3f8c
 
 
 
 
 
 
30ef1d4
33cf987
 
 
 
 
 
01090a1
 
 
33cf987
 
 
 
 
 
 
 
 
75c3f8c
 
 
 
 
 
 
 
 
 
d56d00d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import os
import spaces
import torch
from datasets import load_dataset
from huggingface_hub import CommitScheduler


device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f'[INFO] Using device: {device}')

# token
token = os.environ['TOKEN']

# Load the pretrained model and tokenizer
MODEL_NAME = "atlasia/Al-Atlas-0.5B" # "atlasia/Al-Atlas-LLM-mid-training" # "BounharAbdelaziz/Al-Atlas-LLM-0.5B" #"atlasia/Al-Atlas-LLM"

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME,token=token) # , token=token
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME,token=token).to(device)

# Fix tokenizer padding
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token  # Set pad token

# Predefined examples
examples = [
    ["ุงู„ุฐูƒุงุก ุงู„ุงุตุทู†ุงุนูŠ ู‡ูˆ ูุฑุน ู…ู† ุนู„ูˆู… ุงู„ูƒู…ุจูŠูˆุชุฑ ุงู„ู„ูŠ ูƒูŠุฑูƒุฒ"
     , 256, 0.7, 0.9, 150, 8, 1.5],
    ["ุงู„ู…ุณุชู‚ุจู„ ุฏูŠุงู„ ุงู„ุฐูƒุงุก ุงู„ุตู†ุงุนูŠ ูุงู„ู…ุบุฑุจ"
     , 256, 0.7, 0.9, 150, 8, 1.5],
    [" ุงู„ู…ุทุจุฎ ุงู„ู…ุบุฑุจูŠ"
     , 256, 0.7, 0.9, 150, 8, 1.5],
    ["ุงู„ู…ุงูƒู„ุฉ ุงู„ู…ุบุฑุจูŠุฉ ูƒุชุนุชุจุฑ ู…ู† ุฃุญุณู† ุงู„ู…ุงูƒู„ุงุช ูุงู„ุนุงู„ู…"
     , 256, 0.7, 0.9, 150, 8, 1.5],
]

#inf_dataset=load_dataset("atlasia/atlaset_inference_ds",token=token,split="test",name="llm")
detected_commit=False
submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"

@spaces.GPU
def generate_text(prompt, max_length=256, temperature=0.7, top_p=0.9, top_k=150, num_beams=8, repetition_penalty=1.5):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    output = model.generate(
        **inputs, 
        max_length=max_length, 
        temperature=temperature, 
        top_p=top_p, 
        do_sample=True,
        repetition_penalty=repetition_penalty,
        num_beams=num_beams,
        top_k= top_k,
        early_stopping = True,
        pad_token_id=tokenizer.pad_token_id,  # Explicit pad token
        eos_token_id=tokenizer.eos_token_id,  # Explicit eos token
    )
    result=tokenizer.decode(output[0], skip_special_tokens=True)
    #inf_dataset.add_item({"inputs":prompt,"outputs":result,"params":f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}"})
    save_feedback(prompt,result,f"{max_length},{temperature},{top_p},{top_k},{num_beams},{repetition_penalty}")
    detected_commit=True
    return result

def save_feedback(input,output,params) -> None:
    with scheduler.lock:
        with feedback_file.open("a") as f:
            f.write(json.dumps({"input": input, "output": output, "params": params}))
            f.write("\n")
    detected_commit=True
    
if __name__ == "__main__":
    # Create the Gradio interface
    with gr.Blocks() as app:
        gr.Interface(
            fn=generate_text,
            inputs=[
                gr.Textbox(label="Prompt: ุฏุฎู„ ุงู„ู†ุต ุจุงู„ุฏุงุฑุฌุฉ"),
                gr.Slider(8, 4096, value=256, label="Max Length"),
                gr.Slider(0.0, 2, value=0.7, label="Temperature"),
                gr.Slider(0.0, 1.0, value=0.9, label="Top-p"),
                gr.Slider(1, 10000, value=150, label="Top-k"),
                gr.Slider(1, 20, value=8, label="Number of Beams"),
                gr.Slider(0.0, 100.0, value=1.5, label="Repetition Penalty"),
            ],
            outputs=gr.Textbox(label="Generated Text in Moroccan Darija"),
            title="Moroccan Darija LLM",
            description="Enter a prompt and get AI-generated text using our pretrained LLM on Moroccan Darija.",
            examples=examples,
        )
    if detected_commit:
        print("[INFO] CommitScheduler...")
        scheduler = CommitScheduler(
            repo_id="atlasia/atlaset_inference_ds",
            repo_type="dataset",
            folder_path=submit_file,
            every=5,
            token=token
        )
        detected_commit=False
    app.launch()