ruihanglai commited on
Commit
d2b253f
·
1 Parent(s): f5b3331

initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
README.md ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: mlc-llm
3
+ base_model: meta-llama/Meta-Llama-3-70B-Instruct
4
+ tags:
5
+ - mlc-llm
6
+ ---
7
+
8
+ # Llama-3-70B-Instruct-fp8-MLC
9
+
10
+ This is the [Llama-3-70B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-70B-Instruct) model in MLC format `e4m3_e4m3_f16` (FP8 quantization).
11
+ The model can be used for projects [MLC-LLM](https://github.com/mlc-ai/mlc-llm).
12
+
13
+ ## Example Usage
14
+
15
+ Here are some examples of using this model in MLC LLM.
16
+ Before running the examples, please install MLC LLM by following the [installation documentation](https://llm.mlc.ai/docs/install/mlc_llm.html#install-mlc-packages).
17
+
18
+ ### Chat
19
+
20
+ In command line, run
21
+ ```bash
22
+ mlc_llm chat HF://mlc-ai/Llama-3-70B-Instruct-fp8-MLC
23
+ ```
24
+
25
+ ### REST Server
26
+
27
+ In command line, run
28
+ ```bash
29
+ mlc_llm serve HF://mlc-ai/Llama-3-70B-Instruct-fp8-MLC
30
+ ```
31
+
32
+ ### Python API
33
+
34
+ ```python
35
+ from mlc_llm import MLCEngine
36
+
37
+ # Create engine
38
+ model = "HF://mlc-ai/Llama-3-70B-Instruct-fp8-MLC"
39
+ engine = MLCEngine(model)
40
+
41
+ # Run chat completion in OpenAI API.
42
+ for response in engine.chat.completions.create(
43
+ messages=[{"role": "user", "content": "What is the meaning of life?"}],
44
+ model=model,
45
+ stream=True,
46
+ ):
47
+ for choice in response.choices:
48
+ print(choice.delta.content, end="", flush=True)
49
+ print("\n")
50
+
51
+ engine.terminate()
52
+ ```
53
+
54
+ ## Documentation
55
+
56
+ For more information on MLC LLM project, please visit our [documentation](https://llm.mlc.ai/docs/) and [GitHub repo](http://github.com/mlc-ai/mlc-llm).
mlc-chat-config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "e4m3_e4m3_f16",
5
+ "model_config": {
6
+ "hidden_size": 8192,
7
+ "intermediate_size": 28672,
8
+ "num_attention_heads": 64,
9
+ "num_hidden_layers": 80,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 128256,
12
+ "tie_word_embeddings": false,
13
+ "position_embedding_base": 500000.0,
14
+ "rope_scaling": null,
15
+ "context_window_size": 8192,
16
+ "prefill_chunk_size": 8192,
17
+ "num_key_value_heads": 8,
18
+ "head_dim": 128,
19
+ "tensor_parallel_shards": 1,
20
+ "pipeline_parallel_stages": 1,
21
+ "max_batch_size": 128
22
+ },
23
+ "vocab_size": 128256,
24
+ "context_window_size": 8192,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 8192,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "pipeline_parallel_stages": 1,
30
+ "temperature": 0.6,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.0,
34
+ "top_p": 0.9,
35
+ "tokenizer_files": [
36
+ "tokenizer.json",
37
+ "tokenizer_config.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_level",
41
+ "prepend_space_in_encode": false,
42
+ "strip_space_in_decode": false
43
+ },
44
+ "conv_template": {
45
+ "name": "llama-3",
46
+ "system_template": "<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
47
+ "system_message": "You are a helpful, respectful and honest assistant.",
48
+ "system_prefix_token_ids": [
49
+ 128000
50
+ ],
51
+ "add_role_after_system_message": true,
52
+ "roles": {
53
+ "user": "<|start_header_id|>user",
54
+ "assistant": "<|start_header_id|>assistant"
55
+ },
56
+ "role_templates": {
57
+ "user": "{user_message}",
58
+ "assistant": "{assistant_message}",
59
+ "tool": "{tool_message}"
60
+ },
61
+ "messages": [],
62
+ "seps": [
63
+ "<|eot_id|>"
64
+ ],
65
+ "role_content_sep": "<|end_header_id|>\n\n",
66
+ "role_empty_sep": "<|end_header_id|>\n\n",
67
+ "stop_str": [
68
+ "<|end_of_text|>",
69
+ "<|eot_id|>"
70
+ ],
71
+ "stop_token_ids": [
72
+ 128001,
73
+ 128009
74
+ ],
75
+ "function_string": "",
76
+ "use_function_calling": false
77
+ },
78
+ "pad_token_id": 0,
79
+ "bos_token_id": 128000,
80
+ "eos_token_id": [
81
+ 128001,
82
+ 128009
83
+ ]
84
+ }
ndarray-cache-b16.json ADDED
The diff for this file is too large to render. See raw diff
 
ndarray-cache.json ADDED
The diff for this file is too large to render. See raw diff
 
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a2220dabf07e6433049fe65f6780f9c8c0e9ec66a30f4f48929354f128fe57
3
+ size 2101346304
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ef9d9273cdcd68bb66f999cd45a8c169225fd659c2330dd60c547623a0d7c4
3
+ size 2101346304
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25039c597772f9d8fe000552c90c82cafddd1c45e8244022d128ce543383620e
3
+ size 234881024
params_shard_100.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95da9c13ce65efe891e407f89114c614845d2458c3e74135e275b5252812b36e
3
+ size 234881024
params_shard_101.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73d6803c1c3a8800180501493976f63cf0e46eed4e475f0efc27b989f4ce9d21
3
+ size 469762048
params_shard_102.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf340ecd5a0ff7d5e5b037d9e525e37dc7fd345a106c25902f826d1b31ea309f
3
+ size 83886080
params_shard_103.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6968e2e7c28494163f85f8aca09effc25b28159ac3674190252699f756d2bd1
3
+ size 67108864
params_shard_104.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db3261b6c60edf19c0acaefc8779a4145c035de974765815280f803bd5f0bef2
3
+ size 234881024
params_shard_105.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdaa6a1103d44eeab187fb973350a046a9bdf5f2c976a42cf8f11cd531c3ee34
3
+ size 469762048
params_shard_106.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cfc20e258564579634c3c5e8e1e7b42e080259e7aacd6224af3d14cd223fbaf
3
+ size 83886080
params_shard_107.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41c380a79932dd2d7fd29c975be76fd30b352e76a32a166d1f597059b4fcc4ff
3
+ size 67108864
params_shard_108.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fdca6d20c1c74a1c766cc3667371899fbf2e66b4e87e06a163833b292ca537a
3
+ size 83886080
params_shard_109.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc6654b37a793e0e99bd52eee47a3cbf83bcede336d6f03d874cd37e3442e110
3
+ size 67108864
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1935062d373785fdde68bb80adb1d4ce6bd468ea1bb2dbca1f3acfae6654339
3
+ size 469762048
params_shard_110.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9ec6808472226f17ab7a2804509a32c011655a404bd2b6907eff2db4ba1f93e
3
+ size 234881024
params_shard_111.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a868cc94b5d7cb263703980238fa326969451c9c814d05f42dbee9e5ac448f7
3
+ size 469762048
params_shard_112.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40bff82113c5e6635eb0996d101cd6666ad6c10ab3ba2e048f795708e03df484
3
+ size 234881024
params_shard_113.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8596b78ac2e6eb258b8f6094478aafaf11742aa731bde87cb42b20b78457888e
3
+ size 469762048
params_shard_114.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b53a78e8bfc0dae1e40592db0c59c6e32066dbf5ecaf691f80f213860ddf2c
3
+ size 83886080
params_shard_115.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb4c8839ecccbe09f8536ac0c87435dee5a61691bf117d2a9583003c2d4466aa
3
+ size 67108864
params_shard_116.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e73cd3a20b42b8979f6a2ae0a5b5597af7294bc6644e49b2d44648e5eff10c
3
+ size 234881024
params_shard_117.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31ef74d3758b01c1af0f9d34013d5f65bc6173736f12ff2f887031555e211639
3
+ size 469762048
params_shard_118.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c29a12cec0a9ae0f2b918b3a102c22a1f1c146218ac8c1028670a6c67529b26e
3
+ size 83886080
params_shard_119.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ae8f18045e550902102413c89c169aea62c577e57b48293d84233d4315b3a1b
3
+ size 67108864
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350dae385ece4156b280abda346128e7abc3e4fc4dfe45dac10ac1a170e827f9
3
+ size 83886080
params_shard_120.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a939a01c8a4c7312d85b5b5513b1246459afd3901d8d36aaf2c60058cd625a9
3
+ size 83886080
params_shard_121.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbe9bf77835cdcbe3b6dbc3d880bf762373f24dc560154743afbc2676ebfab8b
3
+ size 234881024
params_shard_122.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea007d714e95db5966865d901cb0f9bfefd6a5685aa17b6b7f92674873384fa8
3
+ size 469762048
params_shard_123.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6adce46602aa9171a5b878306ae754cf18912a45500d9d1a4d4ca8d67e1d023a
3
+ size 67108864
params_shard_124.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cd70af7e18273a852b68a7c4baae11dd53a6494cad118e6eaa2e4cce827f75c
3
+ size 234881024
params_shard_125.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7ea158cbf60a75df2a11ae59a49e0ef3b2707ef45b7db389005295393c94e16
3
+ size 469762048
params_shard_126.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34fced621f9866da6888f413a055575103905f64218bde8071677db5cc8daf06
3
+ size 83886080
params_shard_127.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d09a6820598df3e2d7724c823c83b566efb672c80518fc366bb5c9caa01cab8
3
+ size 67108864
params_shard_128.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d330c9ab5611573a9d9646bc5906d97823bad37e2f007f2de8bb2d18a498e32b
3
+ size 234881024
params_shard_129.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:117051dd52c1f1e5a20cfdcc21660aa3fc0d7c9ac10d957c426119642fbc46a2
3
+ size 469762048
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73b4b95cdaab52a440321a3bab7481d42154e0f2eacdff61ec2487660c8b677b
3
+ size 67108864
params_shard_130.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bc8461d18bb6b98e20060e60f7b2e47302437ac9a8eeccd023ed0af01e9918e
3
+ size 83886080
params_shard_131.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7727005f277524fbb4287d83dca0def9e4ac17b3da46cb70d1a86744055693f9
3
+ size 67108864
params_shard_132.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:054ff3a08d05883386d328b4214945961fb46d98a24d652d855f3f2f8e9007fb
3
+ size 234881024
params_shard_133.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3560d089eea3d440b0330890003068b104c0f7e44e4ffd46dc80e0a3efe89623
3
+ size 469762048
params_shard_134.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3cdee4a81a1838d1e542f496a4da744e3e153a1f2e7569372873ed9e46ce2a2
3
+ size 83886080
params_shard_135.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:810f9ae423514fe4841c51808eb88bacefa821f79d9a40f2c7d2396ddc62f2f9
3
+ size 67108864
params_shard_136.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05af365acfa12a3e23978151319718cdd7779b4817813b2f1853dd60e96a9eb
3
+ size 234881024
params_shard_137.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2170bc7cc967a3483f4085a3c3a64992351ada017a46f8483ba88c849f41cab
3
+ size 469762048
params_shard_138.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d3abde3e2c3fd274ceeb047358b2eb7d7bb2f988b61292e010e85330c749098
3
+ size 83886080
params_shard_139.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45c792f801077820a8441d0831cd52f3a7da3b8a1a127302745a44f092161d17
3
+ size 67108864