Upload folder using huggingface_hub

Files changed (6) hide show

README.md CHANGED Viewed

@@ -9,5 +9,42 @@ tags:
 # chatglm2-6b-MNN
 ## Introduction
-This model is a 4-bit quantized version of the MNN model exported from chatglm2-6b using [llm-export](https://github.com/wangzhaode/llm-export).

 # chatglm2-6b-MNN
 ## Introduction
+This model is a 4-bit quantized version of the MNN model exported from [chatglm2-6b](https://modelscope.cn/models/ZhipuAI/chatglm2-6b/summary) using [llmexport](https://github.com/alibaba/MNN/tree/master/transformers/llm/export).
+## Download
+```bash
+# install huggingface
+pip install huggingface
+```
+```bash
+# shell download
+huggingface download --model 'taobao-mnn/chatglm2-6b-MNN' --local_dir 'path/to/dir'
+```
+```python
+# SDK download
+from huggingface_hub import snapshot_download
+model_dir = snapshot_download('taobao-mnn/chatglm2-6b-MNN')
+```
+```bash
+# git clone
+git clone https://www.modelscope.cn/taobao-mnn/chatglm2-6b-MNN
+```
+## Usage
+```bash
+# clone MNN source
+git clone https://github.com/alibaba/MNN.git
+# compile
+cd MNN
+mkdir build && cd build
+cmake .. -DMNN_LOW_MEMORY=true -DMNN_CPU_WEIGHT_DEQUANT_GEMM=true -DMNN_BUILD_LLM=true -DMNN_SUPPORT_TRANSFORMER_FUSE=true
+make -j
+# run
+./llm_demo /path/to/chatglm2-6b-MNN/config.json prompt.txt
+```
+## Document
+[MNN-LLM](https://mnn-docs.readthedocs.io/en/latest/transformers/llm.html#)

config.json CHANGED Viewed

@@ -1,9 +1,8 @@
 {
-    "llm_model": "chatglm2-6b-int4.mnn",
-    "llm_weight": "chatglm2-6b-int4.mnn.weight",
     "backend_type": "cpu",
     "thread_num": 4,
     "precision": "low",
     "memory": "low"
-}

 {
+    "llm_model": "llm.mnn",
+    "llm_weight": "llm.mnn.weight",
     "backend_type": "cpu",
     "thread_num": 4,
     "precision": "low",
     "memory": "low"
+}

llm.mnn ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c3deb1d93a58fdefb9a2b5752ecedbedb095bbfb5fa80cf715a1e71c2456af6
+size 2722368

llm.mnn.json ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fd14bee2a4a93615d519bf607ccd87f22a6d0f45c4dcd883b8844224c39446a
+size 16353637

llm.mnn.weight ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a56e478747e18bba08d260d3dcc19f6073eb0a68445a0364da4ddb595ede5c5
+size 3362516614

llm_config.json CHANGED Viewed

@@ -1,15 +1,14 @@
 {
     "hidden_size": 4096,
     "layer_nums": 28,
-    "attention_mask": "glm2",
     "key_value_shape": [
         2,
-        0,
         1,
         2,
         128
     ],
     "prompt_template": "[Round 1]\n\n问：%s\n\n答：",
-    "is_visual": false,
-    "is_single": true
 }

 {
     "hidden_size": 4096,
     "layer_nums": 28,
+    "attention_mask": "float",
     "key_value_shape": [
         2,
         1,
+        0,
         2,
         128
     ],
     "prompt_template": "[Round 1]\n\n问：%s\n\n答：",
+    "is_visual": false
 }