smartinez1
commited on
Upload model_usage.ipynb
Browse files- model_usage.ipynb +276 -0
model_usage.ipynb
ADDED
@@ -0,0 +1,276 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 1,
|
6 |
+
"metadata": {},
|
7 |
+
"outputs": [
|
8 |
+
{
|
9 |
+
"name": "stdout",
|
10 |
+
"output_type": "stream",
|
11 |
+
"text": [
|
12 |
+
"nvidia-smi output:\n",
|
13 |
+
"\n",
|
14 |
+
"Fri Nov 8 21:11:36 2024 \n",
|
15 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
16 |
+
"| NVIDIA-SMI 560.35.03 Driver Version: 560.35.03 CUDA Version: 12.6 |\n",
|
17 |
+
"|-----------------------------------------+------------------------+----------------------+\n",
|
18 |
+
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
|
19 |
+
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
|
20 |
+
"| | | MIG M. |\n",
|
21 |
+
"|=========================================+========================+======================|\n",
|
22 |
+
"| 0 NVIDIA A40 On | 00000000:17:00.0 Off | 0 |\n",
|
23 |
+
"| 0% 53C P0 85W / 300W | 3717MiB / 46068MiB | 0% Default |\n",
|
24 |
+
"| | | N/A |\n",
|
25 |
+
"+-----------------------------------------+------------------------+----------------------+\n",
|
26 |
+
"| 1 NVIDIA A40 On | 00000000:65:00.0 Off | 0 |\n",
|
27 |
+
"| 0% 55C P0 86W / 300W | 3195MiB / 46068MiB | 0% Default |\n",
|
28 |
+
"| | | N/A |\n",
|
29 |
+
"+-----------------------------------------+------------------------+----------------------+\n",
|
30 |
+
"| 2 NVIDIA A40 On | 00000000:CA:00.0 Off | 0 |\n",
|
31 |
+
"| 0% 45C P0 65W / 300W | 4MiB / 46068MiB | 0% Default |\n",
|
32 |
+
"| | | N/A |\n",
|
33 |
+
"+-----------------------------------------+------------------------+----------------------+\n",
|
34 |
+
"| 3 NVIDIA A40 On | 00000000:E3:00.0 Off | 0 |\n",
|
35 |
+
"| 0% 38C P8 25W / 300W | 4MiB / 46068MiB | 0% Default |\n",
|
36 |
+
"| | | N/A |\n",
|
37 |
+
"+-----------------------------------------+------------------------+----------------------+\n",
|
38 |
+
" \n",
|
39 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
40 |
+
"| Processes: |\n",
|
41 |
+
"| GPU GI CI PID Type Process name GPU Memory |\n",
|
42 |
+
"| ID ID Usage |\n",
|
43 |
+
"|=========================================================================================|\n",
|
44 |
+
"| 0 N/A N/A 96617 C ...10/models_citation/myenv/bin/python 3708MiB |\n",
|
45 |
+
"| 1 N/A N/A 1230721 C /home/historynlp/venv/bin/python3 1414MiB |\n",
|
46 |
+
"| 1 N/A N/A 2212129 C /home/historynlp/venv/bin/python3 1766MiB |\n",
|
47 |
+
"+-----------------------------------------------------------------------------------------+\n",
|
48 |
+
"\n"
|
49 |
+
]
|
50 |
+
}
|
51 |
+
],
|
52 |
+
"source": [
|
53 |
+
"import subprocess\n",
|
54 |
+
"\n",
|
55 |
+
"def run_nvidia_smi():\n",
|
56 |
+
" try:\n",
|
57 |
+
" # Run the nvidia-smi command\n",
|
58 |
+
" result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)\n",
|
59 |
+
"\n",
|
60 |
+
" # Check if the command was successful\n",
|
61 |
+
" if result.returncode == 0:\n",
|
62 |
+
" print(\"nvidia-smi output:\\n\")\n",
|
63 |
+
" print(result.stdout)\n",
|
64 |
+
" else:\n",
|
65 |
+
" print(\"Error running nvidia-smi:\\n\")\n",
|
66 |
+
" print(result.stderr)\n",
|
67 |
+
"\n",
|
68 |
+
" except Exception as e:\n",
|
69 |
+
" print(f\"An error occurred: {e}\")\n",
|
70 |
+
"\n",
|
71 |
+
"# Call the function to run nvidia-smi\n",
|
72 |
+
"run_nvidia_smi()"
|
73 |
+
]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"cell_type": "code",
|
77 |
+
"execution_count": null,
|
78 |
+
"metadata": {},
|
79 |
+
"outputs": [
|
80 |
+
{
|
81 |
+
"name": "stderr",
|
82 |
+
"output_type": "stream",
|
83 |
+
"text": [
|
84 |
+
"/home/smartinez1/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
85 |
+
" from .autonotebook import tqdm as notebook_tqdm\n",
|
86 |
+
"/home/smartinez1/.venv/lib/python3.10/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: libtorch_cuda_cu.so: cannot open shared object file: No such file or directory\n",
|
87 |
+
" warn(f\"Failed to load image Python extension: {e}\")\n",
|
88 |
+
"2024-11-08 21:11:38.912317: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA\n",
|
89 |
+
"To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
|
90 |
+
"2024-11-08 21:11:39.068802: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.\n",
|
91 |
+
"2024-11-08 21:11:39.677787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory\n",
|
92 |
+
"2024-11-08 21:11:39.677842: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory\n",
|
93 |
+
"2024-11-08 21:11:39.677848: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.\n"
|
94 |
+
]
|
95 |
+
}
|
96 |
+
],
|
97 |
+
"source": [
|
98 |
+
"import os\n",
|
99 |
+
"gpu_index = 2\n",
|
100 |
+
"os.environ[\"CUDA_VISIBLE_DEVICES\"] = f\"{gpu_index}\"\n",
|
101 |
+
"from huggingface_hub import login\n",
|
102 |
+
"from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer\n",
|
103 |
+
"import torch\n",
|
104 |
+
"from peft import PeftModel, PeftConfig\n",
|
105 |
+
"from transformers import AutoModelForCausalLM, pipeline\n",
|
106 |
+
"import logging\n",
|
107 |
+
"# Suppress all warnings\n",
|
108 |
+
"logging.getLogger(\"transformers\").setLevel(logging.CRITICAL) #weird warning when using model for inference"
|
109 |
+
]
|
110 |
+
},
|
111 |
+
{
|
112 |
+
"cell_type": "code",
|
113 |
+
"execution_count": 3,
|
114 |
+
"metadata": {},
|
115 |
+
"outputs": [
|
116 |
+
{
|
117 |
+
"name": "stdout",
|
118 |
+
"output_type": "stream",
|
119 |
+
"text": [
|
120 |
+
"Number of available CUDA devices: 1\n",
|
121 |
+
"\n",
|
122 |
+
"Device 0: NVIDIA A40\n"
|
123 |
+
]
|
124 |
+
}
|
125 |
+
],
|
126 |
+
"source": [
|
127 |
+
"# Check if CUDA is available\n",
|
128 |
+
"if torch.cuda.is_available():\n",
|
129 |
+
" num_devices = torch.cuda.device_count()\n",
|
130 |
+
" print(f\"Number of available CUDA devices: {num_devices}\")\n",
|
131 |
+
" \n",
|
132 |
+
" for i in range(num_devices):\n",
|
133 |
+
" device_name = torch.cuda.get_device_name(i)\n",
|
134 |
+
" print(f\"\\nDevice {i}: {device_name}\")\n",
|
135 |
+
"else:\n",
|
136 |
+
" print(\"CUDA is not available.\")\n",
|
137 |
+
"# Specify the device (0 for GPU or -1 for CPU)\n",
|
138 |
+
"device = 0 if torch.cuda.is_available() else -1"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": 4,
|
144 |
+
"metadata": {},
|
145 |
+
"outputs": [
|
146 |
+
{
|
147 |
+
"name": "stderr",
|
148 |
+
"output_type": "stream",
|
149 |
+
"text": [
|
150 |
+
"Loading checkpoint shards: 100%|ββββββββββββββββββ| 4/4 [00:02<00:00, 1.55it/s]\n"
|
151 |
+
]
|
152 |
+
}
|
153 |
+
],
|
154 |
+
"source": [
|
155 |
+
"config = PeftConfig.from_pretrained(\"smartinez1/Llama-3.1-8B-FINLLM\")\n",
|
156 |
+
"base_model = AutoModelForCausalLM.from_pretrained(\"meta-llama/Llama-3.1-8B\")\n",
|
157 |
+
"model = PeftModel.from_pretrained(base_model, \"smartinez1/Llama-3.1-8B-FINLLM\")\n",
|
158 |
+
"# Load the tokenizer associated with the base model\n",
|
159 |
+
"tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Llama-3.1-8B\")\n",
|
160 |
+
"# Define the unique padding token for fine-tuning\n",
|
161 |
+
"custom_pad_token = \"<|finetune_right_pad_id|>\"\n",
|
162 |
+
"tokenizer.add_special_tokens({'pad_token': custom_pad_token})\n",
|
163 |
+
"pad_token_id = tokenizer.pad_token_id"
|
164 |
+
]
|
165 |
+
},
|
166 |
+
{
|
167 |
+
"cell_type": "markdown",
|
168 |
+
"metadata": {},
|
169 |
+
"source": [
|
170 |
+
"The input should be given as presented in the Challenge website."
|
171 |
+
]
|
172 |
+
},
|
173 |
+
{
|
174 |
+
"cell_type": "code",
|
175 |
+
"execution_count": null,
|
176 |
+
"metadata": {},
|
177 |
+
"outputs": [
|
178 |
+
{
|
179 |
+
"name": "stdout",
|
180 |
+
"output_type": "stream",
|
181 |
+
"text": [
|
182 |
+
"User: Provide a link for Regulation A (Extensions of Credit by Federal Reserve Banks) law\n",
|
183 |
+
"AI: Regulation A (Extensions of Credit by Federal Reserve Banks): https://www.federalreserve.gov/newsevents/pressreleases/files/bcreg20240705c1.pdf\n",
|
184 |
+
"--------------------------------------------------\n",
|
185 |
+
"User: Define the following term: Insurance Scores.\n",
|
186 |
+
"AI: A credit score based on an insurance underwriting model that evaluates the risk of an individual for insurance purposes.\n",
|
187 |
+
"--------------------------------------------------\n",
|
188 |
+
"User: Expand the following acronym into its full form: ESCB.\n",
|
189 |
+
"AI: European System of Central Banks\n",
|
190 |
+
"--------------------------------------------------\n",
|
191 |
+
"User: Provide a concise answer to the following question: Which countries currently have bilateral FTAs in effect with the U.S.?\n",
|
192 |
+
"AI: As of June 2023, the U.S. has bilateral FTAs in effect with 20 countries: Australia, Bahrain, Canada, Chile, Colombia, Costa Rica, Dominican Republic, El Salvador, Guatemala, Honduras, Israel, Jordan, Mexico, Morocco, Nicaragua, Oman, Panama, Peru, Singapore, and South Korea.\n",
|
193 |
+
"--------------------------------------------------\n",
|
194 |
+
"User: Given the following text, only list the following for each: specific Organizations, Legislations, Dates, Monetary Values, and Statistics When can counterparties start notifying the national competent authorities (NCAs) of their intention to apply the reporting exemption in accordance with Article 9(1) EMIR, as amended by Regulation 2019/834?\n",
|
195 |
+
"AI: From 1 January 2021\n",
|
196 |
+
"--------------------------------------------------\n",
|
197 |
+
"User: Provide a concise answer to the following question: What type of license is the Apache License, Version 2.0?\n",
|
198 |
+
"AI: The Apache License, Version 2.0 is a permissive free software license.\n",
|
199 |
+
"--------------------------------------------------\n"
|
200 |
+
]
|
201 |
+
}
|
202 |
+
],
|
203 |
+
"source": [
|
204 |
+
"# Set up the text generation pipeline with the PEFT model, specifying the device\n",
|
205 |
+
"generator = pipeline(\"text-generation\", model=model, tokenizer=tokenizer, device=device)\n",
|
206 |
+
"\n",
|
207 |
+
"# List of user inputs\n",
|
208 |
+
"user_inputs = [\n",
|
209 |
+
" \"Provide a link for Regulation A (Extensions of Credit by Federal Reserve Banks) law\",\n",
|
210 |
+
" \"Define the following term: Insurance Scores.\",\n",
|
211 |
+
" \"Expand the following acronym into its full form: ESCB.\",\n",
|
212 |
+
" \"Provide a concise answer to the following question: Which countries currently have bilateral FTAs in effect with the U.S.?\",\n",
|
213 |
+
" \"\"\"Given the following text, only list the following for each: specific Organizations, Legislations, Dates, Monetary Values, \n",
|
214 |
+
" and Statistics When can counterparties start notifying the national competent authorities (NCAs) of their intention to apply \n",
|
215 |
+
" the reporting exemption in accordance with Article 9(1) EMIR, as amended by Regulation 2019/834?\"\"\",\n",
|
216 |
+
" \"Provide a concise answer to the following question: What type of license is the Apache License, Version 2.0?\"\n",
|
217 |
+
"]\n",
|
218 |
+
"\n",
|
219 |
+
"# Define the prompt template\n",
|
220 |
+
"prompt_template = \"\"\"Below is an instruction that describes a task. Write a response that appropriately completes the request.\n",
|
221 |
+
"\n",
|
222 |
+
"### Instruction:\n",
|
223 |
+
"{}\n",
|
224 |
+
"\n",
|
225 |
+
"### Answer:\n",
|
226 |
+
"\"\"\"\n",
|
227 |
+
"\n",
|
228 |
+
"# Loop over each user input and generate a response\n",
|
229 |
+
"for user_input in user_inputs:\n",
|
230 |
+
" # Format the user input into the prompt\n",
|
231 |
+
" prompt = prompt_template.format(user_input)\n",
|
232 |
+
"\n",
|
233 |
+
" # Generate a response from the model\n",
|
234 |
+
" response = generator(prompt, max_length=200, num_return_sequences=1, do_sample=True)\n",
|
235 |
+
"\n",
|
236 |
+
" # Extract and clean up the AI's response\n",
|
237 |
+
" response_str = response[0]['generated_text'].split('### Answer:')[1].strip()\n",
|
238 |
+
" cut_ind = response_str.find(\"#\") # Remove extra information after the response\n",
|
239 |
+
" response_str = response_str[:cut_ind].strip() if cut_ind != -1 else response_str\n",
|
240 |
+
"\n",
|
241 |
+
" # Display the AI's response\n",
|
242 |
+
" print(f\"User: {user_input}\")\n",
|
243 |
+
" print(f\"AI: {response_str}\")\n",
|
244 |
+
" print(\"-\" * 50) # Separator for clarity\n"
|
245 |
+
]
|
246 |
+
},
|
247 |
+
{
|
248 |
+
"cell_type": "code",
|
249 |
+
"execution_count": null,
|
250 |
+
"metadata": {},
|
251 |
+
"outputs": [],
|
252 |
+
"source": []
|
253 |
+
}
|
254 |
+
],
|
255 |
+
"metadata": {
|
256 |
+
"kernelspec": {
|
257 |
+
"display_name": "Python 3 (ipykernel)",
|
258 |
+
"language": "python",
|
259 |
+
"name": "python3"
|
260 |
+
},
|
261 |
+
"language_info": {
|
262 |
+
"codemirror_mode": {
|
263 |
+
"name": "ipython",
|
264 |
+
"version": 3
|
265 |
+
},
|
266 |
+
"file_extension": ".py",
|
267 |
+
"mimetype": "text/x-python",
|
268 |
+
"name": "python",
|
269 |
+
"nbconvert_exporter": "python",
|
270 |
+
"pygments_lexer": "ipython3",
|
271 |
+
"version": "3.10.12"
|
272 |
+
}
|
273 |
+
},
|
274 |
+
"nbformat": 4,
|
275 |
+
"nbformat_minor": 4
|
276 |
+
}
|