Spaces:
Runtime error
Runtime error
File size: 2,164 Bytes
efa9075 738953f 117ba24 efa9075 b409549 738953f a32decb 9dac1fa b409549 efa9075 b409549 efa9075 b93a25e efa9075 b93a25e b409549 efa9075 738953f 67f9ec4 738953f 17ffb0c 738953f d40212f 738953f d40212f 17ffb0c a000d3e d40212f a000d3e 17ffb0c a000d3e dd1a4cb efa9075 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#from huggingface_hub import InferenceClient
import gradio as gr
#client = InferenceClient("""K00B404/BagOMistral_14X_Coders-ties-7B""")
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
#model_id = 'EleutherAI/pythia-1b' # 16
model_id = 'EleutherAI/GPT-Neo-2.7B' #32 layers
#gr.load(f"models/{model_id}").launch()
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
text = "TinyPixel/Llama-2-7B-bf16-sharded"
inputs = tokenizer(text, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=20)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
"""
def format_prompt(message, history):
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history, temperature=0.2, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield output
return output
mychatbot = gr.Chatbot(avatar_images=["./user.png", "./botm.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True)
demo = gr.ChatInterface(fn=generate,
chatbot=mychatbot,
title="K00B404's Merged Models Test Chat",
retry_btn=None,
undo_btn=None
)
demo.queue().launch(show_api=False)
""" |