vilarin commited on
Commit
f4eb23f
·
verified ·
1 Parent(s): d992f1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -1,26 +1,26 @@
1
  import spaces
2
  import os
3
- import subprocess
4
- import shlex
5
- if os.getenv('SYSTEM') == 'spaces':
6
- git_repo = "https://github.com/huggingface/transformers.git"
7
- subprocess.call(shlex.split(f'pip install git+{git_repo}'))
8
 
9
  import time
10
  import torch
11
- from transformers import OlmoeForCausalLM, AutoTokenizer, TextIteratorStreamer
12
  import gradio as gr
13
  from threading import Thread
14
 
15
- MODEL_LIST = ["allenai/OLMoE-1B-7B-0924-Instruct"]
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
  MODEL = os.environ.get("MODEL_ID")
18
 
19
- TITLE = "<h1><center>OLMoE</center></h1>"
20
 
21
  PLACEHOLDER = """
22
  <center>
23
- <p>Fully open, state-of-the-art Mixture of Expert model with 1.3 billion active and 6.9 billion total parameters.</p>
24
  </center>
25
  """
26
 
@@ -39,19 +39,19 @@ h3 {
39
 
40
  device = "cuda" # for GPU usage or "cpu" for CPU usage
41
 
42
- tokenizer = AutoTokenizer.from_pretrained(MODEL)
43
- model = OlmoeForCausalLM.from_pretrained(
44
  MODEL,
45
  torch_dtype=torch.bfloat16,
46
  device_map="auto",
47
- ignore_mismatched_sizes=True)
48
 
49
  @spaces.GPU()
50
  def stream_chat(
51
  message: str,
52
  history: list,
53
- temperature: float = 0.5,
54
- max_new_tokens: int = 256,
55
  top_p: float = 1.0,
56
  top_k: int = 20,
57
  penalty: float = 1.2,
@@ -110,15 +110,15 @@ with gr.Blocks(css=CSS, theme="Nymbo/Nymbo_Theme") as demo:
110
  minimum=0,
111
  maximum=1,
112
  step=0.1,
113
- value=0.5,
114
  label="Temperature",
115
  render=False,
116
  ),
117
  gr.Slider(
118
  minimum=128,
119
- maximum=4096,
120
  step=1,
121
- value=256,
122
  label="Max new tokens",
123
  render=False,
124
  ),
 
1
  import spaces
2
  import os
3
+ # import subprocess
4
+ # import shlex
5
+ # if os.getenv('SYSTEM') == 'spaces':
6
+ # git_repo = "https://github.com/huggingface/transformers.git"
7
+ # subprocess.call(shlex.split(f'pip install git+{git_repo}'))
8
 
9
  import time
10
  import torch
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
12
  import gradio as gr
13
  from threading import Thread
14
 
15
+ MODEL_LIST = ["openbmb/MiniCPM3-4B"]
16
  HF_TOKEN = os.environ.get("HF_TOKEN", None)
17
  MODEL = os.environ.get("MODEL_ID")
18
 
19
+ TITLE = "<h1><center>MiniCPM3-4B</center></h1>"
20
 
21
  PLACEHOLDER = """
22
  <center>
23
+ <p>MiniCPM3-4B is the 3rd generation of MiniCPM series.</p>
24
  </center>
25
  """
26
 
 
39
 
40
  device = "cuda" # for GPU usage or "cpu" for CPU usage
41
 
42
+ tokenizer = AutoTokenizer.from_pretrained(MODEL, trust_remote_code=True)
43
+ model = AutoModelForCausalLM.from_pretrained(
44
  MODEL,
45
  torch_dtype=torch.bfloat16,
46
  device_map="auto",
47
+ trust_remote_code=True)
48
 
49
  @spaces.GPU()
50
  def stream_chat(
51
  message: str,
52
  history: list,
53
+ temperature: float = 0.7,
54
+ max_new_tokens: int = 1024,
55
  top_p: float = 1.0,
56
  top_k: int = 20,
57
  penalty: float = 1.2,
 
110
  minimum=0,
111
  maximum=1,
112
  step=0.1,
113
+ value=0.7,
114
  label="Temperature",
115
  render=False,
116
  ),
117
  gr.Slider(
118
  minimum=128,
119
+ maximum=32768,
120
  step=1,
121
+ value=1024,
122
  label="Max new tokens",
123
  render=False,
124
  ),