File size: 2,164 Bytes
efa9075
738953f
 
117ba24
 
efa9075
 
 
b409549
738953f
a32decb
 
9dac1fa
b409549
efa9075
b409549
efa9075
b93a25e
 
efa9075
b93a25e
b409549
efa9075
 
 
738953f
 
 
67f9ec4
 
 
738953f
 
17ffb0c
738953f
 
 
 
 
 
 
 
 
 
 
 
 
 
d40212f
738953f
 
 
 
 
 
 
 
 
d40212f
17ffb0c
a000d3e
d40212f
a000d3e
17ffb0c
a000d3e
 
 
 
dd1a4cb
efa9075
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#from huggingface_hub import InferenceClient
import gradio as gr



#client = InferenceClient("""K00B404/BagOMistral_14X_Coders-ties-7B""")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

#model_id = 'EleutherAI/pythia-1b' # 16
model_id = 'EleutherAI/GPT-Neo-2.7B' #32 layers
#gr.load(f"models/{model_id}").launch()
tokenizer = AutoTokenizer.from_pretrained(model_id)

model = AutoModelForCausalLM.from_pretrained(model_id)

text = "TinyPixel/Llama-2-7B-bf16-sharded"
inputs = tokenizer(text, return_tensors="pt")

outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


"""
def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
    prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(prompt, history, temperature=0.2, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )

    formatted_prompt = format_prompt(prompt, history)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        yield output
    return output

    
mychatbot = gr.Chatbot(avatar_images=["./user.png", "./botm.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True)

demo = gr.ChatInterface(fn=generate, 
                        chatbot=mychatbot,
                        title="K00B404's Merged Models Test Chat",
                        retry_btn=None,
                        undo_btn=None
                       )

demo.queue().launch(show_api=False)
"""