rasyosef commited on
Commit
49d2457
1 Parent(s): ad93906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -44
app.py CHANGED
@@ -1,76 +1,82 @@
1
- import gradio as gr
2
-
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
5
  from threading import Thread
 
6
 
7
- # The huggingface model id for Microsoft's phi-2 model
8
- checkpoint = "microsoft/phi-2"
 
 
 
 
9
 
10
  # Download and load model and tokenizer
11
- tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
12
- model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.float32, device_map="cpu", trust_remote_code=True)
 
 
 
 
 
13
 
14
  # Text generation pipeline
15
  phi2 = pipeline(
16
- "text-generation",
17
- tokenizer=tokenizer,
18
- model=model,
19
  pad_token_id=tokenizer.eos_token_id,
20
- eos_token_id=tokenizer.eos_token_id,
21
- device_map="cpu"
22
  )
23
 
24
  # Function that accepts a prompt and generates text using the phi2 pipeline
25
- def generate(message, chat_history, max_new_tokens=21):
26
 
27
- instruction = "You are a helpful assistant to 'User'. You do not respond as 'User' or pretend to be 'User'. You only respond once as 'Assistant'."
28
- final_prompt = f"Instruction: {instruction}\n"
 
29
 
30
  for sent, received in chat_history:
31
- final_prompt += "User: " + sent + "\n"
32
- final_prompt += "Assistant: " + received + "\n"
33
-
34
- final_prompt += "User: " + message + "\n"
35
- final_prompt += "Output:"
36
-
37
- if len(tokenizer.tokenize(final_prompt)) >= 512:
38
- final_prompt = "Instruction: Say 'Input exceeded context size, please clear the chat history and retry!' Output:"
39
-
40
- # Streamer
41
- streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
42
- thread = Thread(target=phi2, kwargs={"text_inputs":final_prompt, "max_new_tokens":max_new_tokens, "streamer":streamer})
43
- thread.start()
44
 
45
- generated_text = ""
46
- for word in streamer:
47
- generated_text += word
48
- response = generated_text.strip()
49
 
50
- if "User:" in response:
51
- response = response.split("User:")[0].strip()
 
 
 
 
 
52
 
53
- if "Assistant:" in response:
54
- response = response.split("Assistant:")[1].strip()
 
 
55
 
56
- yield response
57
 
58
  # Chat interface with gradio
59
  with gr.Blocks() as demo:
60
  gr.Markdown("""
61
  # Phi-2 Chatbot Demo
62
- This chatbot was created using Microsoft's 2.7 billion parameter [phi-2](https://huggingface.co/microsoft/phi-2) Transformer model.
63
-
64
- In order to reduce the response time on this hardware, `max_new_tokens` has been set to `21` in the text generation pipeline. With this default configuration, it takes approximately `60 seconds` for the response to start being generated, and streamed one word at a time. Use the slider below to increase or decrease the length of the generated text.
65
  """)
66
 
67
- tokens_slider = gr.Slider(8, 128, value=21, render=True, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")
68
-
69
  chatbot = gr.ChatInterface(
 
70
  fn=generate,
71
  additional_inputs=[tokens_slider],
72
  stop_btn=None,
73
- examples=[["Who is Leonhard Euler?"]]
 
 
 
 
74
  )
75
-
76
- demo.queue().launch()
 
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer, pipeline
3
  from threading import Thread
4
+ import gradio as gr
5
 
6
+ DEVICE = "cpu"
7
+ if torch.cuda.is_available():
8
+ DEVICE = "cuda"
9
+
10
+ # The huggingface model id for phi-2 instruct model
11
+ checkpoint = "rasyosef/phi-2-instruct-v0.1"
12
 
13
  # Download and load model and tokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
15
+ model = AutoModelForCausalLM.from_pretrained(
16
+ checkpoint,
17
+ torch_dtype=torch.float16,
18
+ device_map=DEVICE
19
+ )
20
+
21
 
22
  # Text generation pipeline
23
  phi2 = pipeline(
24
+ "text-generation",
25
+ tokenizer=tokenizer,
26
+ model=model,
27
  pad_token_id=tokenizer.eos_token_id,
28
+ eos_token_id=[tokenizer.eos_token_id],
29
+ device_map=DEVICE
30
  )
31
 
32
  # Function that accepts a prompt and generates text using the phi2 pipeline
33
+ def generate(message, chat_history, max_new_tokens=64):
34
 
35
+ history = [
36
+ {"role": "system", "content": "You are Phi, a helpful AI assistant made by Microsoft and RasYosef. User will you give you a task. Your goal is to complete the task as faithfully as you can."}
37
+ ]
38
 
39
  for sent, received in chat_history:
40
+ history.append({"role": "user", "content": sent})
41
+ history.append({"role": "assistant", "content": received})
 
 
 
 
 
 
 
 
 
 
 
42
 
43
+ history.append({"role": "user", "content": message})
44
+ #print(history)
 
 
45
 
46
+ if len(tokenizer.apply_chat_template(history)) > 512:
47
+ yield "chat history is too long"
48
+ else:
49
+ # Streamer
50
+ streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, skip_special_tokens=True, timeout=300.0)
51
+ thread = Thread(target=phi2, kwargs={"text_inputs":history, "max_new_tokens":max_new_tokens, "streamer":streamer})
52
+ thread.start()
53
 
54
+ generated_text = ""
55
+ for word in streamer:
56
+ generated_text += word
57
+ response = generated_text.strip()
58
 
59
+ yield response
60
 
61
  # Chat interface with gradio
62
  with gr.Blocks() as demo:
63
  gr.Markdown("""
64
  # Phi-2 Chatbot Demo
65
+ This chatbot was created using a finetuned version of Microsoft's 2.7 billion parameter Phi 2 transformer model, [Phi-2-Instruct-v0.1](https://huggingface.co/rasyosef/Phi-1_5-Instruct-v0.1) that has underwent a post-training process that incorporates both **supervised fine-tuning** and **direct preference optimization** for instruction following.
 
 
66
  """)
67
 
68
+ tokens_slider = gr.Slider(8, 256, value=64, label="Maximum new tokens", info="A larger `max_new_tokens` parameter value gives you longer text responses but at the cost of a slower response time.")
69
+
70
  chatbot = gr.ChatInterface(
71
+ chatbot=gr.Chatbot(height=400),
72
  fn=generate,
73
  additional_inputs=[tokens_slider],
74
  stop_btn=None,
75
+ examples=[
76
+ ["Hi"],
77
+ ["What's the German word for car?"],
78
+ ["Molly and Abigail want to attend a beauty and modeling contest. They both want to buy new pairs of shoes and dresses. Molly buys a pair of shoes which costs $40 and a dress which costs $160. How much should Abigail budget if she wants to spend half of what Molly spent on the pair of shoes and dress?"],
79
+ ]
80
  )
81
+
82
+ demo.queue().launch(debug=True)