File size: 4,568 Bytes
1903078
b807047
 
2868b03
b807047
 
8a3e945
a29980e
b807047
1903078
2868b03
b807047
 
1903078
2868b03
b807047
1903078
b807047
f6dfa4e
1903078
5b7d699
1903078
f6dfa4e
a29980e
1903078
501ac4e
a29980e
1903078
a29980e
501ac4e
5b7d699
9efc447
a29980e
1903078
b807047
2868b03
b807047
 
 
 
 
 
2868b03
b807047
 
 
2868b03
b807047
 
 
1903078
 
 
5b7d699
1903078
5b7d699
 
 
 
2868b03
b807047
a29980e
b807047
 
1903078
b807047
 
 
2868b03
b807047
1903078
b807047
 
 
2868b03
b807047
 
 
 
78ad200
1903078
 
b807047
1903078
 
2868b03
b807047
1903078
 
 
 
b807047
 
 
 
 
 
 
1903078
b807047
 
2868b03
b807047
1903078
2868b03
b807047
 
2868b03
b807047
1903078
 
2868b03
b807047
1903078
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b807047
1903078
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from transformers import AutoModelForCausalLM, AutoTokenizer
from typing import Tuple, List, Dict
import torch

def load_model(
    model_name: str,
    dtype: torch.dtype = torch.float32,
) -> Tuple[AutoModelForCausalLM, any]:
    """
    Load and initialize the language model for CPU-only inference.
    
    Args:
        model_name (str): Name of the pre-trained model to load
        dtype (torch.dtype): Data type for model weights (default: torch.float32)
    
    Returns:
        Tuple[AutoModelForCausalLM, any]: Tuple containing the model and tokenizer
    """
    kwargs = {
        "device_map": "cpu",  # Explicitly set to CPU
        "torch_dtype": dtype,
        "low_cpu_mem_usage": True,  # Optimize memory usage for CPU
    }

    # Load the tokenizer
    tokenizer = AutoTokenizer.from_pretrained("CodeTranslatorLLM/LinguistLLM")

    # Load the model
    model = AutoModelForCausalLM.from_pretrained(
        pretrained_model_name_or_path="CodeTranslatorLLM/LinguistLLM",
        **kwargs
    )

    model.eval()  # Set model to evaluation mode
    return model, tokenizer

def prepare_input(
    messages: List[Dict[str, str]],
    tokenizer: any,
) -> torch.Tensor:
    """
    Prepare input for the model by applying chat template and tokenization.
    
    Args:
        messages (List[Dict[str, str]]): List of message dictionaries
        tokenizer: The tokenizer instance
    
    Returns:
        torch.Tensor: Prepared input tensor
    """
    # Combine messages into a single string (simple concatenation for this example)
    input_text = " ".join([msg["content"] for msg in messages])
    # Tokenize the input
    return tokenizer(
        input_text,
        return_tensors="pt",
        padding=True,
        truncation=True,
    )["input_ids"]

def generate_response(
    model: AutoModelForCausalLM,
    inputs: torch.Tensor,
    tokenizer: any,
    max_new_tokens: int = 200,
) -> str:
    """
    Generate response using the model.
    
    Args:
        model (AutoModelForCausalLM): The language model
        inputs (torch.Tensor): Prepared input tensor
        tokenizer: The tokenizer instance
        max_new_tokens (int): Maximum number of tokens to generate
    
    Returns:
        str: Generated response
    """
    outputs = model.generate(
        inputs,
        max_new_tokens=max_new_tokens,
        do_sample=False,  # Deterministic generation for reproducibility
    )
    # Decode the generated tokens
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def main(
    USER_INPUT_CODE: str,
    USER_INPUT_EXPLANATION: str,
    MODEL_PATH: str,
):
    """
    Main function to demonstrate the inference pipeline.
    """
    # Example messages
    messages = [
        {
            "role": "user",
            "content": f"[Fortran Code]\n{USER_INPUT_CODE}\n[Fortran Code Explain]\n{USER_INPUT_EXPLANATION}"
        }
    ]
    
    # Load model
    model, tokenizer = load_model(MODEL_PATH)
    
    # Prepare input
    inputs = prepare_input(messages, tokenizer)
    
    # Generate response
    response = generate_response(model, inputs, tokenizer)
    print("Generated Response:\n", response)

if __name__ == "__main__":
    # Define your Fortran code and explanation
    USER_INPUT_CODE = """
    program sum_of_numbers
        implicit none
        integer :: n, i, sum

        ! Initialize variables
        sum = 0

        ! Get user input
        print *, "Enter a positive integer:"
        read *, n

        ! Calculate the sum of numbers from 1 to n
        do i = 1, n
            sum = sum + i
        end do

        ! Print the result
        print *, "The sum of numbers from 1 to", n, "is", sum
    end program sum_of_numbers
    """
    USER_INPUT_EXPLANATION = """
    The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. 
    It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.

    The program starts by initializing variables and prompting the user for input. 
    It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. 
    Finally, it prints the computed sum to the console.

    This program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations.
    """
    # Path to your model
    MODEL_PATH = "lora_model"

    # Run the main function
    main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)