Spaces:
Sleeping
Sleeping
from transformers import AutoTokenizer | |
from petals import AutoDistributedModelForCausalLM | |
import streamlit as st | |
# Choose any model available at https://health.petals.dev | |
model_name = "petals-team/StableBeluga2" # This one is fine-tuned Llama 2 (70B) | |
# Connect to a distributed network hosting model layers | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoDistributedModelForCausalLM.from_pretrained(model_name) | |
system_prompt = "### System:\nYou are Stable Beluga, an AI that is very precise and creative. Be as creative and accurate you can.\n\n" | |
conv=system_prompt | |
message = st.chat_input('Message') | |
if message: | |
prompt = f"### User: {message}\n\n### Assistant:\n" | |
conv+=prompt | |
print(conv) | |
# Run the model as if it were on your computer | |
inputs = tokenizer(conv, return_tensors="pt")["input_ids"] | |
outputs = model.generate(inputs, max_new_tokens=256) | |
st.write(tokenizer.decode(outputs[0])[3:-4]) |