File size: 2,206 Bytes
3f29b1c
1788430
 
9a10325
1788430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab36d60
 
 
 
 
1788430
ab36d60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9ae4d82
ab36d60
 
 
 
 
 
 
 
 
9ae4d82
 
 
 
 
 
ab36d60
 
1788430
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import time
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer

def run_LLM (model, tokenizer, streamer, prompt):

    token_ids = tokenizer.encode(prompt, return_tensors="pt")
    output_ids = model.generate(
        input_ids=token_ids.to(model.device),
        #max_new_tokens=300,
        max_new_tokens=3000000,
        do_sample=True,
        temperature=0.8,
    )

    n_tokens = len(output_ids[0])
    output_text = tokenizer.decode(output_ids[0])

    return (output_text, n_tokens)

def display_message():
    model = AutoModelForCausalLM.from_pretrained("cyberagent/calm2-7b-chat",
                                                 device_map="cuda",
                                                 torch_dtype="auto")
    tokenizer = AutoTokenizer.from_pretrained("cyberagent/calm2-7b-chat")
    streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

    prompt = """わが国の経済について今後の予想を教えてください。
    ASSISTANT: """

    
    t_start = time.perf_counter()
    t_prev = t_start
    t_sum = 0.0
    total_tokens = 0

    log = ''

    for i in range(10):
        (result, n_tokens) = run_LLM(model, tokenizer, streamer, prompt)

        total_tokens = total_tokens + n_tokens
        t_curr = time.perf_counter()
        t_lap = t_curr - t_prev
        t_prev = t_curr
        t_sum = t_sum + t_lap
        t_avg = t_sum/(i+1.0)

        speed_now = n_tokens/t_lap
        speed_avg = total_tokens/t_sum

        
        row = []
        row.append(i+1)
        row.append(t_lap)
        row.append(t_avg)
        row.append(speed_now)
        row.append(speed_avg)
        row.append(n_tokens)
        row.append(total_tokens)
        row.append(t_sum)

        with open("log.csv", "a") as ofile:
            print(*row, sep="\t", file=ofile)
        
        line = "%d    %f    %f    %f    %f    %d    %d    %f" % (i+1, t_lap, t_avg, speed_now, speed_avg, n_tokens, total_tokens, t_sum)
        log = log + line  + "<br>"
    
    return log


if __name__ == '__main__':

    iface = gr.Interface(fn=display_message, inputs=None, outputs="text")
    iface.launch()