File size: 6,861 Bytes
a29980e
b807047
 
a29980e
2868b03
b807047
 
 
 
 
a29980e
b807047
 
2868b03
b807047
 
 
 
 
2868b03
b807047
 
 
d177a85
78ad200
 
 
 
 
a29980e
 
 
 
d177a85
9efc447
12bb3cd
78ad200
 
9efc447
a29980e
 
 
b807047
2868b03
b807047
 
 
a4b9456
b807047
 
 
2868b03
b807047
 
 
 
2868b03
b807047
 
 
 
 
 
 
 
 
2868b03
b807047
a29980e
b807047
 
 
 
 
 
 
 
 
2868b03
b807047
 
 
 
 
 
 
 
2868b03
b807047
 
 
d177a85
 
 
a29980e
d177a85
b807047
78ad200
d177a85
78ad200
d177a85
a29980e
 
 
 
 
b807047
a29980e
 
2868b03
b807047
 
 
 
 
 
 
 
 
 
2868b03
b807047
 
 
 
 
 
 
2868b03
b807047
 
78ad200
b807047
2868b03
b807047
 
2868b03
b807047
 
2868b03
b807047
 
 
2868b03
b807047
 
2868b03
b807047
 
2868b03
b807047
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
from transformers import TextStreamer, AutoModelForCausalLM, AutoTokenizer
from typing import Tuple, List, Dict
import torch
# from unsloth import FastLanguageModel

def load_model(
    model_name: str,
    max_seq_length: int,
    dtype: torch.dtype,
    load_in_4bit: bool
) -> Tuple[AutoModelForCausalLM, any]:
    """
    Load and initialize the language model for inference.
    
    Args:
        model_name (str): Name of the pre-trained model to load
        max_seq_length (int): Maximum sequence length for the model
        dtype (torch.dtype): Data type for model weights
        load_in_4bit (bool): Whether to load model in 4-bit quantization
    
    Returns:
        Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
    """

    try:
        from transformers import BitsAndBytesConfig
        bnb_config = BitsAndBytesConfig(load_in_4bit=False)
    except ImportError:
        bnb_config = None

    tokenizer = AutoTokenizer.from_pretrained(model_name)

    model = AutoModelForCausalLM.from_pretrained(
        pretrained_model_name_or_path=model_name,
        device_map="cpu",
        torch_dtype=torch.float32, # Use float32 for CPU
        low_cpu_mem_usage=True, # Helps with memory efficiency
        quantization_config=bnb_config
    )

    model.eval() # Set model to evaluation mode

    return model, tokenizer

def prepare_input(
    messages: List[Dict[str, str]],
    tokenizer: any,
    device: str = "cpu"
) -> torch.Tensor:
    """
    Prepare input for the model by applying chat template and tokenization.
    
    Args:
        messages (List[Dict[str, str]]): List of message dictionaries
        tokenizer: The tokenizer instance
        device (str): Device to load tensors to ("cuda" or "cpu")
    
    Returns:
        torch.Tensor: Prepared input tensor
    """
    return tokenizer.apply_chat_template(
        messages,
        tokenize=True,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(device)

def generate_response(
    model: AutoModelForCausalLM,
    inputs: torch.Tensor,
    tokenizer: any,
    max_new_tokens: int = 2000,
    temperature: float = 1.5,
    min_p: float = 0.1,
    skip_prompt: bool = True
) -> str:
    """
    Generate response using the model.
    
    Args:
        model (FastLanguageModel): The language model
        inputs (torch.Tensor): Prepared input tensor
        tokenizer: The tokenizer instance
        max_new_tokens (int): Maximum number of tokens to generate
        temperature (float): Sampling temperature
        min_p (float): Minimum probability for nucleus sampling
        skip_prompt (bool): Whether to skip prompt in output
    
    Returns:
        str: Generated response
    """

    device = torch.device("cpu")

    # text_streamer = TextStreamer(tokenizer, skip_prompt=skip_prompt)
    inputs = tokenizer(inputs, return_tensors="pt").to(device)
    outputs = model.generate(
        inputs,
        max_length=2000,
        do_sample=False  # Deterministic generation
        # num_return_sequences=1,
        # streamer=text_streamer,
        # max_new_tokens=max_new_tokens,
        # use_cache=True,
        # temperature=temperature,
        # min_p=min_p
    )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

def main(
        USER_INPUT_CODE = "program sum_of_numbers\n    implicit none\n    integer :: n, i, sum\n\n    ! Initialize variables\n    sum = 0\n\n    ! Get user input\n    print *, \"Enter a positive integer:\"\n    read *, n\n\n    ! Calculate the sum of numbers from 1 to n\n    do i = 1, n\n        sum = sum + i\n    end do\n\n    ! Print the result\n    print *, \"The sum of numbers from 1 to\", n, \"is\", sum\nend program sum_of_numbers",
        USER_INPUT_EXPLANATION = "The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.\n\nThe functionality of the program is explained in detail in the elaboration. The program starts by initializing variables and prompting the user for input. It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. Finally, it prints the computed sum to the console.\n\nThis program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations. It is a clear example of how Fortran can be used to solve mathematical problems involving user interaction and iterative computations.",
        MODEL_PATH  = "lora_model"
        ):
    """
    Main function to demonstrate the inference pipeline.
    """
    # Import configuration
    from config import max_seq_length, dtype, load_in_4bit
    
    # Example messages
    messages = [
        {
            "role": "user",
            "content": str("[Fortran Code]") + str(USER_INPUT_CODE) + str("[Fortran Code Explain]") + str(USER_INPUT_EXPLANATION)
        }
    ]
    
    # Load model
    model, tokenizer = load_model(
        model_name=MODEL_PATH
    )
    
    # Prepare input
    inputs = prepare_input(messages, tokenizer)
    
    # Generate response
    return generate_response(model, inputs, tokenizer)

if __name__ == "__main__":
    # YOUR_FORTRAN_CODE_HERE
    USER_INPUT_CODE = "program sum_of_numbers\n    implicit none\n    integer :: n, i, sum\n\n    ! Initialize variables\n    sum = 0\n\n    ! Get user input\n    print *, \"Enter a positive integer:\"\n    read *, n\n\n    ! Calculate the sum of numbers from 1 to n\n    do i = 1, n\n        sum = sum + i\n    end do\n\n    ! Print the result\n    print *, \"The sum of numbers from 1 to\", n, \"is\", sum\nend program sum_of_numbers" 
    
    # YOUR_EXPLANATION_HERE
    USER_INPUT_EXPLANATION = "The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.\n\nThe functionality of the program is explained in detail in the elaboration. The program starts by initializing variables and prompting the user for input. It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. Finally, it prints the computed sum to the console.\n\nThis program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations. It is a clear example of how Fortran can be used to solve mathematical problems involving user interaction and iterative computations."
    
    # YOUR_MODEL_PATH_HERE
    MODEL_PATH = "lora_model"
    
    main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)