init commit
Browse files- README.md +101 -0
- gitattributes +35 -0
README.md
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: llama3
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
pipeline_tag: text-generation
|
6 |
+
tags:
|
7 |
+
- chatqa-1.5
|
8 |
+
- llama-3
|
9 |
+
- pytorch
|
10 |
+
---
|
11 |
+
|
12 |
+
|
13 |
+
## Model Details
|
14 |
+
Llama3-ChatQA-1.5, which excels at conversational question answering (QA) and retrieval-augmented generation (RAG).
|
15 |
+
|
16 |
+
## Prompt Format
|
17 |
+
**We highly recommend that you use the prompt format we provide, as follows:**
|
18 |
+
### when context is available
|
19 |
+
<pre>
|
20 |
+
System: {System}
|
21 |
+
|
22 |
+
{Context}
|
23 |
+
|
24 |
+
User: {Question}
|
25 |
+
|
26 |
+
Assistant: {Response}
|
27 |
+
|
28 |
+
User: {Question}
|
29 |
+
|
30 |
+
Assistant:
|
31 |
+
</pre>
|
32 |
+
|
33 |
+
### when context is not available
|
34 |
+
<pre>
|
35 |
+
System: {System}
|
36 |
+
|
37 |
+
User: {Question}
|
38 |
+
|
39 |
+
Assistant: {Response}
|
40 |
+
|
41 |
+
User: {Question}
|
42 |
+
|
43 |
+
Assistant:
|
44 |
+
</pre>
|
45 |
+
**The content of the system's turn (i.e., {System}) for both scenarios is as follows:**
|
46 |
+
<pre>
|
47 |
+
This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context.
|
48 |
+
</pre>
|
49 |
+
**Note that our ChatQA-1.5 models are optimized for the capability with context, e.g., over documents or retrieved context.**
|
50 |
+
|
51 |
+
## How to use
|
52 |
+
|
53 |
+
### take the whole document as context
|
54 |
+
This can be applied to the scenario where the whole document can be fitted into the model, so that there is no need to run retrieval over the document.
|
55 |
+
```python
|
56 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
57 |
+
import torch
|
58 |
+
|
59 |
+
model_id = "cminja/llama3-chatqa-1.5-8b"
|
60 |
+
|
61 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
62 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
63 |
+
|
64 |
+
messages = [
|
65 |
+
{"role": "user", "content": "what is the percentage change of the net income from Q4 FY23 to Q4 FY24?"}
|
66 |
+
]
|
67 |
+
|
68 |
+
document = """NVIDIA (NASDAQ: NVDA) today reported revenue for the fourth quarter ended January 28, 2024, of $22.1 billion, up 22% from the previous quarter and up 265% from a year ago.\nFor the quarter, GAAP earnings per diluted share was $4.93, up 33% from the previous quarter and up 765% from a year ago. Non-GAAP earnings per diluted share was $5.16, up 28% from the previous quarter and up 486% from a year ago.\nQ4 Fiscal 2024 Summary\nGAAP\n| $ in millions, except earnings per share | Q4 FY24 | Q3 FY24 | Q4 FY23 | Q/Q | Y/Y |\n| Revenue | $22,103 | $18,120 | $6,051 | Up 22% | Up 265% |\n| Gross margin | 76.0% | 74.0% | 63.3% | Up 2.0 pts | Up 12.7 pts |\n| Operating expenses | $3,176 | $2,983 | $2,576 | Up 6% | Up 23% |\n| Operating income | $13,615 | $10,417 | $1,257 | Up 31% | Up 983% |\n| Net income | $12,285 | $9,243 | $1,414 | Up 33% | Up 769% |\n| Diluted earnings per share | $4.93 | $3.71 | $0.57 | Up 33% | Up 765% |"""
|
69 |
+
|
70 |
+
def get_formatted_input(messages, context):
|
71 |
+
system = "System: This is a chat between a user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions based on the context. The assistant should also indicate when the answer cannot be found in the context."
|
72 |
+
instruction = "Please give a full and complete answer for the question."
|
73 |
+
|
74 |
+
for item in messages:
|
75 |
+
if item['role'] == "user":
|
76 |
+
## only apply this instruction for the first user turn
|
77 |
+
item['content'] = instruction + " " + item['content']
|
78 |
+
break
|
79 |
+
|
80 |
+
conversation = '\n\n'.join(["User: " + item["content"] if item["role"] == "user" else "Assistant: " + item["content"] for item in messages]) + "\n\nAssistant:"
|
81 |
+
formatted_input = system + "\n\n" + context + "\n\n" + conversation
|
82 |
+
|
83 |
+
return formatted_input
|
84 |
+
|
85 |
+
formatted_input = get_formatted_input(messages, document)
|
86 |
+
tokenized_prompt = tokenizer(tokenizer.bos_token + formatted_input, return_tensors="pt").to(model.device)
|
87 |
+
|
88 |
+
terminators = [
|
89 |
+
tokenizer.eos_token_id,
|
90 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
91 |
+
]
|
92 |
+
|
93 |
+
outputs = model.generate(input_ids=tokenized_prompt.input_ids, attention_mask=tokenized_prompt.attention_mask, max_new_tokens=128, eos_token_id=terminators)
|
94 |
+
|
95 |
+
response = outputs[0][tokenized_prompt.input_ids.shape[-1]:]
|
96 |
+
print(tokenizer.decode(response, skip_special_tokens=True))
|
97 |
+
```
|
98 |
+
|
99 |
+
## License
|
100 |
+
The use of this model is governed by the [META LLAMA 3 COMMUNITY LICENSE AGREEMENT](https://llama.meta.com/llama3/license/)
|
101 |
+
|
gitattributes
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|