vilarin commited on
Commit
3fb77c6
·
verified ·
1 Parent(s): 9746484

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import os
2
  import time
 
3
  import torch
4
  from transformers import AutoModelForCausalLM, AutoTokenizer
5
  import gradio as gr
@@ -9,14 +10,14 @@ HF_TOKEN = os.environ.get("HF_TOKEN", None)
9
  MODEL_ID = os.environ.get("MODEL_ID", None)
10
  MODEL_NAME = MODEL_ID.split("/")[-1]
11
 
12
- TITLE = "<h1><center>MiniCPM-1B-chat</center></h1>"
13
 
14
  DESCRIPTION = f"""
15
  <h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
16
  """
17
  PLACEHOLDER = """
18
  <center>
19
- <p>MiniCPM is an End-Size LLM developed by ModelBest Inc. and TsinghuaNLP, with only 1.2B parameters excluding embeddings.</p>
20
  </center>
21
  """
22
 
@@ -36,11 +37,12 @@ h3 {
36
  model = AutoModelForCausalLM.from_pretrained(
37
  MODEL_ID,
38
  torch_dtype=torch.bfloat16,
 
39
  low_cpu_mem_usage=True,
40
  trust_remote_code=True)
41
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
42
 
43
-
44
  def stream_chat(
45
  message: str,
46
  history: list,
@@ -52,6 +54,7 @@ def stream_chat(
52
  ):
53
  print(f'message: {message}')
54
  print(f'history: {history}')
 
55
  resp, history = model.chat(
56
  tokenizer,
57
  query = message,
@@ -124,7 +127,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
124
  ["Tell me a random fun fact about the Roman Empire."],
125
  ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
126
  ],
127
- cache_examples="lazy",
128
  )
129
 
130
 
 
1
  import os
2
  import time
3
+ import spaces
4
  import torch
5
  from transformers import AutoModelForCausalLM, AutoTokenizer
6
  import gradio as gr
 
10
  MODEL_ID = os.environ.get("MODEL_ID", None)
11
  MODEL_NAME = MODEL_ID.split("/")[-1]
12
 
13
+ TITLE = "<h1><center>MiniCPM-S-1B-chat</center></h1>"
14
 
15
  DESCRIPTION = f"""
16
  <h3>MODEL NOW: <a href="https://hf.co/{MODEL_ID}">{MODEL_NAME}</a></h3>
17
  """
18
  PLACEHOLDER = """
19
  <center>
20
+ <p>MiniCPM is an End-Size LLM with only 1.2B parameters excluding embeddings.</p>
21
  </center>
22
  """
23
 
 
37
  model = AutoModelForCausalLM.from_pretrained(
38
  MODEL_ID,
39
  torch_dtype=torch.bfloat16,
40
+ device_map='auto',
41
  low_cpu_mem_usage=True,
42
  trust_remote_code=True)
43
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, torch_dtype=torch.bfloat16)
44
 
45
+ @spaces.GPU()
46
  def stream_chat(
47
  message: str,
48
  history: list,
 
54
  ):
55
  print(f'message: {message}')
56
  print(f'history: {history}')
57
+ torch.manual_seed(0)
58
  resp, history = model.chat(
59
  tokenizer,
60
  query = message,
 
127
  ["Tell me a random fun fact about the Roman Empire."],
128
  ["Show me a code snippet of a website's sticky header in CSS and JavaScript."],
129
  ],
130
+ cache_examples=False,
131
  )
132
 
133