doberst commited on
Commit
781bbf7
1 Parent(s): 0ccc93d

Upload 2 files

Browse files
Files changed (2) hide show
  1. README.md +39 -3
  2. config.json +28 -0
README.md CHANGED
@@ -1,3 +1,39 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ inference: false
4
+ tags:
5
+ - green
6
+ - llmware-chat
7
+ - p1
8
+ - ov
9
+ - emerald
10
+ ---
11
+
12
+ # tiny-llama-chat-ov
13
+
14
+ **tiny-llama-chat-ov** is an OpenVino int4 quantized version of TinyLlama-Chat, providing a very fast, very small inference implementation, optimized for AI PCs using Intel GPU, CPU and NPU.
15
+
16
+ [**tiny-llama-chat**](https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0) is the official chat finetuned version of tiny-llama.
17
+
18
+
19
+ ### Model Description
20
+
21
+ - **Developed by:** TinyLlama
22
+ - **Quantized by:** llmware
23
+ - **Model type:** llama
24
+ - **Parameters:** 1.1 billion
25
+ - **Model Parent:** TinyLlama-1.1B-Chat-v1.0
26
+ - **Language(s) (NLP):** English
27
+ - **License:** Apache 2.0
28
+ - **Uses:** Chat and general purpose LLM
29
+ - **RAG Benchmark Accuracy Score:** NA
30
+ - **Quantization:** int4
31
+
32
+
33
+ ## Model Card Contact
34
+
35
+ [llmware on github](https://www.github.com/llmware-ai/llmware)
36
+
37
+ [llmware on hf](https://www.huggingface.co/llmware)
38
+
39
+ [llmware website](https://www.llmware.ai)
config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "tiny-llama-chat-ov",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5632,
14
+ "max_position_embeddings": 2048,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 22,
19
+ "num_key_value_heads": 4,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": false,
25
+ "transformers_version": "4.41.2",
26
+ "use_cache": true,
27
+ "vocab_size": 32000
28
+ }