Caslow commited on
Commit
1903078
·
1 Parent(s): 5b7d699

Chat GPT code

Browse files
Files changed (1) hide show
  1. inference.py +63 -65
inference.py CHANGED
@@ -1,50 +1,42 @@
1
- from transformers import TextStreamer, AutoModelForCausalLM, AutoTokenizer
2
  from typing import Tuple, List, Dict
3
  import torch
4
- # from unsloth import FastLanguageModel
5
 
6
  def load_model(
7
  model_name: str,
8
- max_seq_length: int = 2048,
9
  dtype: torch.dtype = torch.float32,
10
- load_in_4bit: bool = False
11
  ) -> Tuple[AutoModelForCausalLM, any]:
12
  """
13
- Load and initialize the language model for inference.
14
 
15
  Args:
16
  model_name (str): Name of the pre-trained model to load
17
- max_seq_length (int): Maximum sequence length for the model
18
- dtype (torch.dtype): Data type for model weights
19
- load_in_4bit (bool): Whether to load model in 4-bit quantization
20
 
21
  Returns:
22
- Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
23
  """
24
-
25
  kwargs = {
26
- "device_map": "cpu",
27
  "torch_dtype": dtype,
28
- "low_cpu_mem_usage": True,
29
- "_from_auto": False, # Prevent automatic quantization detection
30
- "quantization_config": None # Explicitly set no quantization
31
  }
32
 
 
33
  tokenizer = AutoTokenizer.from_pretrained(model_name)
34
 
 
35
  model = AutoModelForCausalLM.from_pretrained(
36
  pretrained_model_name_or_path=model_name,
37
  **kwargs
38
  )
39
 
40
- model.eval() # Set model to evaluation mode
41
-
42
  return model, tokenizer
43
 
44
  def prepare_input(
45
  messages: List[Dict[str, str]],
46
  tokenizer: any,
47
- device: str = "cpu"
48
  ) -> torch.Tensor:
49
  """
50
  Prepare input for the model by applying chat template and tokenization.
@@ -52,15 +44,15 @@ def prepare_input(
52
  Args:
53
  messages (List[Dict[str, str]]): List of message dictionaries
54
  tokenizer: The tokenizer instance
55
- device (str): Device to load tensors to ("cuda" or "cpu")
56
 
57
  Returns:
58
  torch.Tensor: Prepared input tensor
59
  """
 
 
 
60
  return tokenizer(
61
- messages,
62
- # tokenize=True,
63
- # add_generation_prompt=True,
64
  return_tensors="pt",
65
  padding=True,
66
  truncation=True,
@@ -70,83 +62,89 @@ def generate_response(
70
  model: AutoModelForCausalLM,
71
  inputs: torch.Tensor,
72
  tokenizer: any,
73
- max_new_tokens: int = 2000,
74
- temperature: float = 1.5,
75
- min_p: float = 0.1,
76
- skip_prompt: bool = True
77
  ) -> str:
78
  """
79
  Generate response using the model.
80
 
81
  Args:
82
- model (FastLanguageModel): The language model
83
  inputs (torch.Tensor): Prepared input tensor
84
  tokenizer: The tokenizer instance
85
  max_new_tokens (int): Maximum number of tokens to generate
86
- temperature (float): Sampling temperature
87
- min_p (float): Minimum probability for nucleus sampling
88
- skip_prompt (bool): Whether to skip prompt in output
89
 
90
  Returns:
91
  str: Generated response
92
  """
93
-
94
- device = torch.device("cpu")
95
-
96
- # text_streamer = TextStreamer(tokenizer, skip_prompt=skip_prompt)
97
- inputs = tokenizer(inputs, return_tensors="pt").to(device)
98
  outputs = model.generate(
99
  inputs,
100
- max_length=2000,
101
- do_sample=False # Deterministic generation
102
- # num_return_sequences=1,
103
- # streamer=text_streamer,
104
- # max_new_tokens=max_new_tokens,
105
- # use_cache=True,
106
- # temperature=temperature,
107
- # min_p=min_p
108
  )
109
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
110
- return generated_text
111
 
112
  def main(
113
- USER_INPUT_CODE = "program sum_of_numbers\n implicit none\n integer :: n, i, sum\n\n ! Initialize variables\n sum = 0\n\n ! Get user input\n print *, \"Enter a positive integer:\"\n read *, n\n\n ! Calculate the sum of numbers from 1 to n\n do i = 1, n\n sum = sum + i\n end do\n\n ! Print the result\n print *, \"The sum of numbers from 1 to\", n, \"is\", sum\nend program sum_of_numbers",
114
- USER_INPUT_EXPLANATION = "The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.\n\nThe functionality of the program is explained in detail in the elaboration. The program starts by initializing variables and prompting the user for input. It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. Finally, it prints the computed sum to the console.\n\nThis program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations. It is a clear example of how Fortran can be used to solve mathematical problems involving user interaction and iterative computations.",
115
- MODEL_PATH = "lora_model"
116
- ):
117
  """
118
  Main function to demonstrate the inference pipeline.
119
  """
120
- # Import configuration
121
- from config import max_seq_length, dtype, load_in_4bit
122
-
123
  # Example messages
124
  messages = [
125
  {
126
  "role": "user",
127
- "content": str("[Fortran Code]") + str(USER_INPUT_CODE) + str("[Fortran Code Explain]") + str(USER_INPUT_EXPLANATION)
128
  }
129
  ]
130
 
131
  # Load model
132
- model, tokenizer = load_model(
133
- model_name=MODEL_PATH
134
- )
135
 
136
  # Prepare input
137
  inputs = prepare_input(messages, tokenizer)
138
 
139
  # Generate response
140
- return generate_response(model, inputs, tokenizer)
 
141
 
142
  if __name__ == "__main__":
143
- # YOUR_FORTRAN_CODE_HERE
144
- USER_INPUT_CODE = "program sum_of_numbers\n implicit none\n integer :: n, i, sum\n\n ! Initialize variables\n sum = 0\n\n ! Get user input\n print *, \"Enter a positive integer:\"\n read *, n\n\n ! Calculate the sum of numbers from 1 to n\n do i = 1, n\n sum = sum + i\n end do\n\n ! Print the result\n print *, \"The sum of numbers from 1 to\", n, \"is\", sum\nend program sum_of_numbers"
145
-
146
- # YOUR_EXPLANATION_HERE
147
- USER_INPUT_EXPLANATION = "The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.\n\nThe functionality of the program is explained in detail in the elaboration. The program starts by initializing variables and prompting the user for input. It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. Finally, it prints the computed sum to the console.\n\nThis program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations. It is a clear example of how Fortran can be used to solve mathematical problems involving user interaction and iterative computations."
148
-
149
- # YOUR_MODEL_PATH_HERE
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  MODEL_PATH = "lora_model"
151
-
152
- main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  from typing import Tuple, List, Dict
3
  import torch
 
4
 
5
  def load_model(
6
  model_name: str,
 
7
  dtype: torch.dtype = torch.float32,
 
8
  ) -> Tuple[AutoModelForCausalLM, any]:
9
  """
10
+ Load and initialize the language model for CPU-only inference.
11
 
12
  Args:
13
  model_name (str): Name of the pre-trained model to load
14
+ dtype (torch.dtype): Data type for model weights (default: torch.float32)
 
 
15
 
16
  Returns:
17
+ Tuple[AutoModelForCausalLM, any]: Tuple containing the model and tokenizer
18
  """
 
19
  kwargs = {
20
+ "device_map": "cpu", # Explicitly set to CPU
21
  "torch_dtype": dtype,
22
+ "low_cpu_mem_usage": True, # Optimize memory usage for CPU
 
 
23
  }
24
 
25
+ # Load the tokenizer
26
  tokenizer = AutoTokenizer.from_pretrained(model_name)
27
 
28
+ # Load the model
29
  model = AutoModelForCausalLM.from_pretrained(
30
  pretrained_model_name_or_path=model_name,
31
  **kwargs
32
  )
33
 
34
+ model.eval() # Set model to evaluation mode
 
35
  return model, tokenizer
36
 
37
  def prepare_input(
38
  messages: List[Dict[str, str]],
39
  tokenizer: any,
 
40
  ) -> torch.Tensor:
41
  """
42
  Prepare input for the model by applying chat template and tokenization.
 
44
  Args:
45
  messages (List[Dict[str, str]]): List of message dictionaries
46
  tokenizer: The tokenizer instance
 
47
 
48
  Returns:
49
  torch.Tensor: Prepared input tensor
50
  """
51
+ # Combine messages into a single string (simple concatenation for this example)
52
+ input_text = " ".join([msg["content"] for msg in messages])
53
+ # Tokenize the input
54
  return tokenizer(
55
+ input_text,
 
 
56
  return_tensors="pt",
57
  padding=True,
58
  truncation=True,
 
62
  model: AutoModelForCausalLM,
63
  inputs: torch.Tensor,
64
  tokenizer: any,
65
+ max_new_tokens: int = 200,
 
 
 
66
  ) -> str:
67
  """
68
  Generate response using the model.
69
 
70
  Args:
71
+ model (AutoModelForCausalLM): The language model
72
  inputs (torch.Tensor): Prepared input tensor
73
  tokenizer: The tokenizer instance
74
  max_new_tokens (int): Maximum number of tokens to generate
 
 
 
75
 
76
  Returns:
77
  str: Generated response
78
  """
 
 
 
 
 
79
  outputs = model.generate(
80
  inputs,
81
+ max_new_tokens=max_new_tokens,
82
+ do_sample=False, # Deterministic generation for reproducibility
 
 
 
 
 
 
83
  )
84
+ # Decode the generated tokens
85
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
86
 
87
  def main(
88
+ USER_INPUT_CODE: str,
89
+ USER_INPUT_EXPLANATION: str,
90
+ MODEL_PATH: str,
91
+ ):
92
  """
93
  Main function to demonstrate the inference pipeline.
94
  """
 
 
 
95
  # Example messages
96
  messages = [
97
  {
98
  "role": "user",
99
+ "content": f"[Fortran Code]\n{USER_INPUT_CODE}\n[Fortran Code Explain]\n{USER_INPUT_EXPLANATION}"
100
  }
101
  ]
102
 
103
  # Load model
104
+ model, tokenizer = load_model(MODEL_PATH)
 
 
105
 
106
  # Prepare input
107
  inputs = prepare_input(messages, tokenizer)
108
 
109
  # Generate response
110
+ response = generate_response(model, inputs, tokenizer)
111
+ print("Generated Response:\n", response)
112
 
113
  if __name__ == "__main__":
114
+ # Define your Fortran code and explanation
115
+ USER_INPUT_CODE = """
116
+ program sum_of_numbers
117
+ implicit none
118
+ integer :: n, i, sum
119
+
120
+ ! Initialize variables
121
+ sum = 0
122
+
123
+ ! Get user input
124
+ print *, "Enter a positive integer:"
125
+ read *, n
126
+
127
+ ! Calculate the sum of numbers from 1 to n
128
+ do i = 1, n
129
+ sum = sum + i
130
+ end do
131
+
132
+ ! Print the result
133
+ print *, "The sum of numbers from 1 to", n, "is", sum
134
+ end program sum_of_numbers
135
+ """
136
+ USER_INPUT_EXPLANATION = """
137
+ The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user.
138
+ It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.
139
+
140
+ The program starts by initializing variables and prompting the user for input.
141
+ It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable.
142
+ Finally, it prints the computed sum to the console.
143
+
144
+ This program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations.
145
+ """
146
+ # Path to your model
147
  MODEL_PATH = "lora_model"
148
+
149
+ # Run the main function
150
+ main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)