Spaces:
Running
Running
File size: 3,075 Bytes
8b8b0b2 bb35b84 49d2457 8b8b0b2 49d2457 8b8b0b2 49d2457 276c17b 49d2457 bb35b84 8b8b0b2 bb35b84 49d2457 bb35b84 49d2457 bb35b84 8b8b0b2 49d2457 8b8b0b2 49d2457 8b8b0b2 49d2457 bb35b84 49d2457 5b4a85e 49d2457 8b8b0b2 49d2457 8b8b0b2 49d2457 8b8b0b2 1300829 49d2457 1300829 8b8b0b2 49d2457 23ac168 49d2457 23ac168 db3d063 23ac168 49d2457 af05c23 49d2457 23ac168 49d2457 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
from threading import Thread
import gradio as gr
DEVICE = "cpu"
if torch.cuda.is_available():
DEVICE = "cuda"
# The huggingface model id for phi-2 instruct model
checkpoint = "rasyosef/phi-2-instruct-v0.1"
# Download and load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = AutoModelForCausalLM.from_pretrained(
checkpoint,
torch_dtype=torch.float32,
device_map=DEVICE
)
# Text generation pipeline
phi2 = pipeline(
"text-generation",
tokenizer=tokenizer,
model=model,
pad_token_id=tokenizer.eos_token_id,
eos_token_id=[tokenizer.eos_token_id],
device_map=DEVICE
)
# Function that accepts a prompt and generates text using the phi2 pipeline
def generate(message, chat_history, max_new_tokens=64):
history = [
{"role": "system", "content": "You are Phi, a helpful AI assistant made by Microsoft and RasYosef. User will you give you a task. Your goal is to complete the task as faithfully as you can."}
]
for sent, received in chat_history:
history.append({"role": "user", "content": sent})
history.append({"role": "assistant", "content": received})
history.append({"role": "user", "content": message})
#print(history)
if len(tokenizer.apply_chat_template(history)) > 512:
yield "chat history is too long"
else:
# Streamer
streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
thread = Thread(target=phi2, kwargs={"text_inputs":history, "max_new_tokens":max_new_tokens, "streamer":streamer})
thread.start()
generated_text = ""
for word in streamer:
generated_text += word
response = generated_text.strip()
yield response
# Chat interface with gradio
with gr.Blocks() as demo:
gr.Markdown("""
# Phi-2 Chatbot Demo
This chatbot was created using a finetuned version of Microsoft's 2.7 billion parameter Phi 2 transformer model, [Phi-2-Instruct-v0.1](https://huggingface.co/rasyosef/Phi-1_5-Instruct-v0.1) that has underwent a post-training process that incorporates both **supervised fine-tuning** and **direct preference optimization** for instruction following.
""")
tokens_slider = gr.Slider(8, 256, value=64, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")
chatbot = gr.ChatInterface(
chatbot=gr.Chatbot(height=400),
fn=generate,
additional_inputs=[tokens_slider],
stop_btn=None,
examples=[
["Hi"],
["What's the German word for 'car'?"],
["Molly and Abigail want to attend a beauty and modeling contest. They both want to buy new pairs of shoes and dresses. Molly buys a pair of shoes which costs $40 and a dress which costs $160. How much should Abigail budget if she wants to spend half of what Molly spent on the pair of shoes and dress?"],
]
)
demo.queue().launch(debug=True) |