stablelm-2-1_6b-zephyr

Running

App Files Files Community

akhaliq HF staff

radames commited on Apr 19, 2023

Commit

d14c800

•

1 Parent(s): e9f9901

fix queue (#6)

Browse files

- fix queue (9431ee6c6359396371b5551bbcb2e678cfa4b060)

Co-authored-by: Radamés Ajna <[email protected]>

Files changed (1) hide show

app.py +37 -24

app.py CHANGED Viewed

@@ -4,11 +4,12 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, Stopping
 import time
 import numpy as np
 from torch.nn import functional as F
-import os
-auth_key = os.environ["HF_ACCESS_TOKEN"]
 print(f"Starting to load the model to memory")
-m = AutoModelForCausalLM.from_pretrained("stabilityai/stablelm-tuned-alpha-7b",use_auth_token=auth_key, torch_dtype=torch.float16).cuda()
-tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b",use_auth_token=auth_key)
 generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")
@@ -30,8 +31,10 @@ class StopOnTokens(StoppingCriteria):
 def contrastive_generate(text, bad_text):
  with torch.no_grad():
- tokens = tok(text, return_tensors="pt")['input_ids'].cuda()[:,:4096-1024]
- bad_tokens = tok(bad_text, return_tensors="pt")['input_ids'].cuda()[:,:4096-1024]
  history = None
  bad_history = None
  curr_output = list()
@@ -39,7 +42,8 @@ def contrastive_generate(text, bad_text):
  out = m(tokens, past_key_values=history, use_cache=True)
  logits = out.logits
  history = out.past_key_values
- bad_out = m(bad_tokens, past_key_values=bad_history, use_cache=True)
  bad_logits = bad_out.logits
  bad_history = bad_out.past_key_values
  probs = F.softmax(logits.float(), dim=-1)[0][-1].cpu()
@@ -60,39 +64,48 @@ def contrastive_generate(text, bad_text):
  tokens.device)
  return tok.decode(curr_output)
 def generate(text, bad_text=None):
  stop = StopOnTokens()
- result = generator(text, max_new_tokens=1024, num_return_sequences=1, num_beams=1, do_sample=True, temperature=1.0, top_p=0.95, top_k=1000, stopping_criteria=StoppingCriteriaList([stop]))
  return result[0]["generated_text"].replace(text, "")
 def user(user_message, history):
- return "", history + [[user_message, ""]]
 def bot(history, curr_system_message):
- messages = curr_system_message + "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]]) for item in history])
  output = generate(messages)
  history[-1][1] = output
  time.sleep(1)
- return history
 with gr.Blocks() as demo:
- num = gr.State(value=0)
  gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
  gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
- chatbot = gr.Chatbot([])
- clear = gr.Button("Clear Chat History")
- system_msg = gr.Textbox(start_message, label="System Message", interactive=False,visible=False)
- #system_msg = start_message
- msg = gr.Textbox(label="Chat Message Box")
- msg.submit(user, [msg, chatbot], [msg, chatbot], queue=True).then(
- bot, [chatbot, system_msg], chatbot
- )
- clear.click(lambda: None, None, chatbot, queue=True)
 demo.queue(concurrency_count=5)
-demo.launch()

 import time
 import numpy as np
 from torch.nn import functional as F
+import os
+# auth_key = os.environ["HF_ACCESS_TOKEN"]
 print(f"Starting to load the model to memory")
+m = AutoModelForCausalLM.from_pretrained(
+ "stabilityai/stablelm-tuned-alpha-7b", torch_dtype=torch.float16).cuda()
+tok = AutoTokenizer.from_pretrained("stabilityai/stablelm-tuned-alpha-7b")
 generator = pipeline('text-generation', model=m, tokenizer=tok, device=0)
 print(f"Sucessfully loaded the model to the memory")
 def contrastive_generate(text, bad_text):
  with torch.no_grad():
+ tokens = tok(text, return_tensors="pt")[
+ 'input_ids'].cuda()[:, :4096-1024]
+ bad_tokens = tok(bad_text, return_tensors="pt")[
+ 'input_ids'].cuda()[:, :4096-1024]
  history = None
  bad_history = None
  curr_output = list()
  out = m(tokens, past_key_values=history, use_cache=True)
  logits = out.logits
  history = out.past_key_values
+ bad_out = m(bad_tokens, past_key_values=bad_history,
+ use_cache=True)
  bad_logits = bad_out.logits
  bad_history = bad_out.past_key_values
  probs = F.softmax(logits.float(), dim=-1)[0][-1].cpu()
  tokens.device)
  return tok.decode(curr_output)
 def generate(text, bad_text=None):
  stop = StopOnTokens()
+ result = generator(text, max_new_tokens=1024, num_return_sequences=1, num_beams=1, do_sample=True,
+ temperature=1.0, top_p=0.95, top_k=1000, stopping_criteria=StoppingCriteriaList([stop]))
  return result[0]["generated_text"].replace(text, "")
 def user(user_message, history):
+ history = history + [[user_message, ""]]
+ return "", history, history
 def bot(history, curr_system_message):
+ messages = curr_system_message + \
+ "".join(["".join(["<|USER|>"+item[0], "<|ASSISTANT|>"+item[1]])
+ for item in history])
  output = generate(messages)
  history[-1][1] = output
  time.sleep(1)
+ return history, history
 with gr.Blocks() as demo:
+ history = gr.State([])
  gr.Markdown("## StableLM-Tuned-Alpha-7b Chat")
  gr.HTML('''<center><a href="https://huggingface.co/spaces/stabilityai/stablelm-tuned-alpha-chat?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate the Space to skip the queue and run in a private space</center>''')
+ chatbot = gr.Chatbot().style(height=500)
+ with gr.Row():
+ with gr.Column(scale=0.70):
+ msg = gr.Textbox(label="", placeholder="Chat Message Box")
+ with gr.Column(scale=0.30, min_width=0):
+ with gr.Row():
+ submit = gr.Button("Submit")
+ clear = gr.Button("Clear")
+ system_msg = gr.Textbox(
+ start_message, label="System Message", interactive=False, visible=False)
+ msg.submit(fn=user, inputs=[msg, history], outputs=[msg, chatbot, history], queue=False).then(
+ fn=bot, inputs=[chatbot, system_msg], outputs=[chatbot, history], queue=True)
+ submit.click(fn=user, inputs=[msg, history], outputs=[msg, chatbot, history], queue=False).then(
+ fn=bot, inputs=[chatbot, system_msg], outputs=[chatbot, history], queue=True)
+ clear.click(lambda: [None, []], None, [chatbot, history], queue=False)
 demo.queue(concurrency_count=5)
+demo.launch()