Spaces:

rasyosef
/

phi-2-chat

Running

App Files Files Community

rasyosef commited on Aug 11

Commit

49d2457

•

1 Parent(s): ad93906

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -44

app.py CHANGED Viewed

@@ -1,76 +1,82 @@
-import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
 from threading import Thread
-# The huggingface model id for Microsoft's phi-2 model
-checkpoint = "microsoft/phi-2"
 # Download and load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
-model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True)
 # Text generation pipeline
 phi2 = pipeline(
-    "text-generation",
-    tokenizer=tokenizer,
-    model=model,
     pad_token_id=tokenizer.eos_token_id,
-    eos_token_id=tokenizer.eos_token_id,
-    device_map="cpu"
 )
 # Function that accepts a prompt and generates text using the phi2 pipeline
-def generate(message, chat_history, max_new_tokens=21):
-  instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
-  final_prompt = f"Instruction: {instruction}\n"
   for sent, received in chat_history:
-    final_prompt += "User: " + sent + "\n"
-    final_prompt += "Assistant: " + received + "\n"
-  final_prompt += "User: " + message + "\n"
-  final_prompt += "Output:"
-  if len(tokenizer.tokenize(final_prompt)) >= 512:
-    final_prompt = "Instruction: Say 'Input exceeded context size, please clear the chat history and retry!' Output:"
-  # Streamer
-  streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
-  thread = Thread(target=phi2, kwargs={"text_inputs":final_prompt, "max_new_tokens":max_new_tokens, "streamer":streamer})
-  thread.start()
-  generated_text = ""
-  for word in streamer:
-    generated_text += word
-    response = generated_text.strip()
-    if "User:" in response:
-      response = response.split("User:")[0].strip()
-    if "Assistant:" in response:
-      response = response.split("Assistant:")[1].strip()
-    yield response
 # Chat interface with gradio
 with gr.Blocks() as demo:
   gr.Markdown("""
   # Phi-2 Chatbot Demo
-  This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
-  In order to reduce the response time on this hardware, `max_new_tokens` has been set to `21` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
   """)
-  tokens_slider = gr.Slider(8, 128, value=21, render=True, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")
   chatbot = gr.ChatInterface(
     fn=generate,
     additional_inputs=[tokens_slider],
     stop_btn=None,
-    examples=[["Who is Leonhard Euler?"]]
   )
-demo.queue().launch()

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
 from threading import Thread
+import gradio as gr
+DEVICE = "cpu"
+if torch.cuda.is_available():
+  DEVICE = "cuda"
+# The huggingface model id for phi-2 instruct model
+checkpoint = "rasyosef/phi-2-instruct-v0.1"
 # Download and load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(
+    checkpoint,
+    torch_dtype=torch.float16,
+    device_map=DEVICE
+  )
 # Text generation pipeline
 phi2 = pipeline(
+    "text-generation",
+    tokenizer=tokenizer,
+    model=model,
     pad_token_id=tokenizer.eos_token_id,
+    eos_token_id=[tokenizer.eos_token_id],
+    device_map=DEVICE
 )
 # Function that accepts a prompt and generates text using the phi2 pipeline
+def generate(message, chat_history, max_new_tokens=64):
+  history = [
+      {"role": "system", "content": "You are Phi, a helpful AI assistant made by Microsoft and RasYosef. User will you give you a task. Your goal is to complete the task as faithfully as you can."}
+  ]
   for sent, received in chat_history:
+    history.append({"role": "user", "content": sent})
+    history.append({"role": "assistant", "content": received})
+  history.append({"role": "user", "content": message})
+  #print(history)
+  if len(tokenizer.apply_chat_template(history)) > 512:
+    yield "chat history is too long"
+  else:
+    # Streamer
+    streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
+    thread = Thread(target=phi2, kwargs={"text_inputs":history, "max_new_tokens":max_new_tokens, "streamer":streamer})
+    thread.start()
+    generated_text = ""
+    for word in streamer:
+      generated_text += word
+      response = generated_text.strip()
+      yield response
 # Chat interface with gradio
 with gr.Blocks() as demo:
   gr.Markdown("""
   # Phi-2 Chatbot Demo
+  This chatbot was created using a finetuned version of Microsoft's 2.7 billion parameter Phi 2 transformer model, [Phi-2-Instruct-v0.1](https://huggingface.co/rasyosef/Phi-1_5-Instruct-v0.1) that has underwent a post-training process that incorporates both **supervised fine-tuning** and **direct preference optimization** for instruction following.
   """)
+  tokens_slider = gr.Slider(8, 256, value=64, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")
   chatbot = gr.ChatInterface(
+    chatbot=gr.Chatbot(height=400),
     fn=generate,
     additional_inputs=[tokens_slider],
     stop_btn=None,
+    examples=[
+        ["Hi"],
+        ["What's the German word for car?"],
+        ["Molly and Abigail want to attend a beauty and modeling contest. They both want to buy new pairs of shoes and dresses. Molly buys a pair of shoes which costs $40 and a dress which costs $160. How much should Abigail budget if she wants to spend half of what Molly spent on the pair of shoes and dress?"],
+      ]
   )
+demo.queue().launch(debug=True)