Caslow's picture
change to Hugging Face Model
501ac4e
raw
history blame
4.57 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Tuple, List, Dict
import torch
def load_model(
model_name: str,
dtype: torch.dtype = torch.float32,
) -> Tuple[AutoModelForCausalLM, any]:
"""
Load and initialize the language model for CPU-only inference.
Args:
model_name (str): Name of the pre-trained model to load
dtype (torch.dtype): Data type for model weights (default: torch.float32)
Returns:
Tuple[AutoModelForCausalLM, any]: Tuple containing the model and tokenizer
"""
kwargs = {
"device_map": "cpu", # Explicitly set to CPU
"torch_dtype": dtype,
"low_cpu_mem_usage": True, # Optimize memory usage for CPU
}
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("CodeTranslatorLLM/LinguistLLM")
# Load the model
model = AutoModelForCausalLM.from_pretrained(
pretrained_model_name_or_path="CodeTranslatorLLM/LinguistLLM",
**kwargs
)
model.eval() # Set model to evaluation mode
return model, tokenizer
def prepare_input(
messages: List[Dict[str, str]],
tokenizer: any,
) -> torch.Tensor:
"""
Prepare input for the model by applying chat template and tokenization.
Args:
messages (List[Dict[str, str]]): List of message dictionaries
tokenizer: The tokenizer instance
Returns:
torch.Tensor: Prepared input tensor
"""
# Combine messages into a single string (simple concatenation for this example)
input_text = " ".join([msg["content"] for msg in messages])
# Tokenize the input
return tokenizer(
input_text,
return_tensors="pt",
padding=True,
truncation=True,
)["input_ids"]
def generate_response(
model: AutoModelForCausalLM,
inputs: torch.Tensor,
tokenizer: any,
max_new_tokens: int = 200,
) -> str:
"""
Generate response using the model.
Args:
model (AutoModelForCausalLM): The language model
inputs (torch.Tensor): Prepared input tensor
tokenizer: The tokenizer instance
max_new_tokens (int): Maximum number of tokens to generate
Returns:
str: Generated response
"""
outputs = model.generate(
inputs,
max_new_tokens=max_new_tokens,
do_sample=False, # Deterministic generation for reproducibility
)
# Decode the generated tokens
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def main(
USER_INPUT_CODE: str,
USER_INPUT_EXPLANATION: str,
MODEL_PATH: str,
):
"""
Main function to demonstrate the inference pipeline.
"""
# Example messages
messages = [
{
"role": "user",
"content": f"[Fortran Code]\n{USER_INPUT_CODE}\n[Fortran Code Explain]\n{USER_INPUT_EXPLANATION}"
}
]
# Load model
model, tokenizer = load_model(MODEL_PATH)
# Prepare input
inputs = prepare_input(messages, tokenizer)
# Generate response
response = generate_response(model, inputs, tokenizer)
print("Generated Response:\n", response)
if __name__ == "__main__":
# Define your Fortran code and explanation
USER_INPUT_CODE = """
program sum_of_numbers
implicit none
integer :: n, i, sum
! Initialize variables
sum = 0
! Get user input
print *, "Enter a positive integer:"
read *, n
! Calculate the sum of numbers from 1 to n
do i = 1, n
sum = sum + i
end do
! Print the result
print *, "The sum of numbers from 1 to", n, "is", sum
end program sum_of_numbers
"""
USER_INPUT_EXPLANATION = """
The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user.
It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.
The program starts by initializing variables and prompting the user for input.
It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable.
Finally, it prints the computed sum to the console.
This program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations.
"""
# Path to your model
MODEL_PATH = "lora_model"
# Run the main function
main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)