In [1]:
import subprocess

def run_nvidia_smi():
    try:
        # Run the nvidia-smi command
        result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)

        # Check if the command was successful
        if result.returncode == 0:
            print("nvidia-smi output:\n")
            print(result.stdout)
        else:
            print("Error running nvidia-smi:\n")
            print(result.stderr)

    except Exception as e:
        print(f"An error occurred: {e}")

# Call the function to run nvidia-smi
run_nvidia_smi()

nvidia-smi output:

Fri Nov  8 21:11:36 2024       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 560.35.03              Driver Version: 560.35.03      CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA A40                     On  |   00000000:17:00.0 Off |                    0 |
|  0%   53C    P0             85W /  300W |    3717MiB /  46068MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
|   1  NVIDIA A40           

In [None]:
import os
gpu_index = 2
os.environ["CUDA_VISIBLE_DEVICES"] = f"{gpu_index}"
from huggingface_hub import login
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, pipeline
import logging
# Suppress all warnings
logging.getLogger("transformers").setLevel(logging.CRITICAL) #weird warning when using model for inference

  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")
2024-11-08 21:11:38.912317: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-08 21:11:39.068802: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-08 21:11:39.677787: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2

In [3]:
# Check if CUDA is available
if torch.cuda.is_available():
    num_devices = torch.cuda.device_count()
    print(f"Number of available CUDA devices: {num_devices}")
    
    for i in range(num_devices):
        device_name = torch.cuda.get_device_name(i)
        print(f"\nDevice {i}: {device_name}")
else:
    print("CUDA is not available.")
# Specify the device (0 for GPU or -1 for CPU)
device = 0 if torch.cuda.is_available() else -1

Number of available CUDA devices: 1

Device 0: NVIDIA A40


In [4]:
config = PeftConfig.from_pretrained("smartinez1/Llama-3.1-8B-FINLLM")
base_model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B")
model = PeftModel.from_pretrained(base_model, "smartinez1/Llama-3.1-8B-FINLLM")
# Load the tokenizer associated with the base model
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")
# Define the unique padding token for fine-tuning
custom_pad_token = "<|finetune_right_pad_id|>"
tokenizer.add_special_tokens({'pad_token': custom_pad_token})
pad_token_id = tokenizer.pad_token_id

Loading checkpoint shards: 100%|██████████████████| 4/4 [00:02<00:00,  1.55it/s]


The input should be given as presented in the Challenge website.

In [None]:
# Set up the text generation pipeline with the PEFT model, specifying the device
generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)

# List of user inputs
user_inputs = [
    "Provide a link for Regulation A (Extensions of Credit by Federal Reserve Banks) law",
    "Define the following term: Insurance Scores.",
    "Expand the following acronym into its full form: ESCB.",
    "Provide a concise answer to the following question: Which countries currently have bilateral FTAs in effect with the U.S.?",
    """Given the following text, only list the following for each: specific Organizations, Legislations, Dates, Monetary Values, 
    and Statistics When can counterparties start notifying the national competent authorities (NCAs) of their intention to apply 
    the reporting exemption in accordance with Article 9(1) EMIR, as amended by Regulation 2019/834?""",
    "Provide a concise answer to the following question: What type of license is the Apache License, Version 2.0?"
]

# Define the prompt template
prompt_template = """Below is an instruction that describes a task. Write a response that appropriately completes the request.

### Instruction:
{}

### Answer:
"""

# Loop over each user input and generate a response
for user_input in user_inputs:
    # Format the user input into the prompt
    prompt = prompt_template.format(user_input)

    # Generate a response from the model
    response = generator(prompt, max_length=200, num_return_sequences=1, do_sample=True)

    # Extract and clean up the AI's response
    response_str = response[0]['generated_text'].split('### Answer:')[1].strip()
    cut_ind = response_str.find("#")  # Remove extra information after the response
    response_str = response_str[:cut_ind].strip() if cut_ind != -1 else response_str

    # Display the AI's response
    print(f"User: {user_input}")
    print(f"AI: {response_str}")
    print("-" * 50)  # Separator for clarity


User: Provide a link for Regulation A (Extensions of Credit by Federal Reserve Banks) law
AI: Regulation A (Extensions of Credit by Federal Reserve Banks): https://www.federalreserve.gov/newsevents/pressreleases/files/bcreg20240705c1.pdf
--------------------------------------------------
User: Define the following term: Insurance Scores.
AI: A credit score based on an insurance underwriting model that evaluates the risk of an individual for insurance purposes.
--------------------------------------------------
User: Expand the following acronym into its full form: ESCB.
AI: European System of Central Banks
--------------------------------------------------
User: Provide a concise answer to the following question: Which countries currently have bilateral FTAs in effect with the U.S.?
AI: As of June 2023, the U.S. has bilateral FTAs in effect with 20 countries: Australia, Bahrain, Canada, Chile, Colombia, Costa Rica, Dominican Republic, El Salvador, Guatemala, Honduras, Israel, Jordan, M