Spaces:

Caslow
/

Fortran_to_Rust_Translator

Sleeping

App Files Files Community

Caslow commited on Nov 21, 2024

Commit

1903078

1 Parent(s): 5b7d699

Chat GPT code

Browse files

Files changed (1) hide show

inference.py +63 -65

inference.py CHANGED Viewed

@@ -1,50 +1,42 @@
-from transformers import TextStreamer, AutoModelForCausalLM, AutoTokenizer
 from typing import Tuple, List, Dict
 import torch
-# from unsloth import FastLanguageModel
 def load_model(
     model_name: str,
-    max_seq_length: int = 2048,
     dtype: torch.dtype = torch.float32,
-    load_in_4bit: bool = False
 ) -> Tuple[AutoModelForCausalLM, any]:
     """
-    Load and initialize the language model for inference.
     Args:
         model_name (str): Name of the pre-trained model to load
-        max_seq_length (int): Maximum sequence length for the model
-        dtype (torch.dtype): Data type for model weights
-        load_in_4bit (bool): Whether to load model in 4-bit quantization
     Returns:
-        Tuple[FastLanguageModel, any]: Tuple containing the model and tokenizer
     """
     kwargs = {
-        "device_map": "cpu",
         "torch_dtype": dtype,
-        "low_cpu_mem_usage": True,
-        "_from_auto": False,  # Prevent automatic quantization detection
-        "quantization_config": None  # Explicitly set no quantization
     }
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         pretrained_model_name_or_path=model_name,
         **kwargs
     )
-    model.eval() # Set model to evaluation mode
     return model, tokenizer
 def prepare_input(
     messages: List[Dict[str, str]],
     tokenizer: any,
-    device: str = "cpu"
 ) -> torch.Tensor:
     """
     Prepare input for the model by applying chat template and tokenization.
@@ -52,15 +44,15 @@ def prepare_input(
     Args:
         messages (List[Dict[str, str]]): List of message dictionaries
         tokenizer: The tokenizer instance
-        device (str): Device to load tensors to ("cuda" or "cpu")
     Returns:
         torch.Tensor: Prepared input tensor
     """
     return tokenizer(
-        messages,
-        # tokenize=True,
-        # add_generation_prompt=True,
         return_tensors="pt",
         padding=True,
         truncation=True,
@@ -70,83 +62,89 @@ def generate_response(
     model: AutoModelForCausalLM,
     inputs: torch.Tensor,
     tokenizer: any,
-    max_new_tokens: int = 2000,
-    temperature: float = 1.5,
-    min_p: float = 0.1,
-    skip_prompt: bool = True
 ) -> str:
     """
     Generate response using the model.
     Args:
-        model (FastLanguageModel): The language model
         inputs (torch.Tensor): Prepared input tensor
         tokenizer: The tokenizer instance
         max_new_tokens (int): Maximum number of tokens to generate
-        temperature (float): Sampling temperature
-        min_p (float): Minimum probability for nucleus sampling
-        skip_prompt (bool): Whether to skip prompt in output
     Returns:
         str: Generated response
     """
-    device = torch.device("cpu")
-    # text_streamer = TextStreamer(tokenizer, skip_prompt=skip_prompt)
-    inputs = tokenizer(inputs, return_tensors="pt").to(device)
     outputs = model.generate(
         inputs,
-        max_length=2000,
-        do_sample=False  # Deterministic generation
-        # num_return_sequences=1,
-        # streamer=text_streamer,
-        # max_new_tokens=max_new_tokens,
-        # use_cache=True,
-        # temperature=temperature,
-        # min_p=min_p
     )
-    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    return generated_text
 def main(
-        USER_INPUT_CODE = "program sum_of_numbers\n    implicit none\n    integer :: n, i, sum\n\n    ! Initialize variables\n    sum = 0\n\n    ! Get user input\n    print *, \"Enter a positive integer:\"\n    read *, n\n\n    ! Calculate the sum of numbers from 1 to n\n    do i = 1, n\n        sum = sum + i\n    end do\n\n    ! Print the result\n    print *, \"The sum of numbers from 1 to\", n, \"is\", sum\nend program sum_of_numbers",
-        USER_INPUT_EXPLANATION = "The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.\n\nThe functionality of the program is explained in detail in the elaboration. The program starts by initializing variables and prompting the user for input. It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. Finally, it prints the computed sum to the console.\n\nThis program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations. It is a clear example of how Fortran can be used to solve mathematical problems involving user interaction and iterative computations.",
-        MODEL_PATH  = "lora_model"
-        ):
     """
     Main function to demonstrate the inference pipeline.
     """
-    # Import configuration
-    from config import max_seq_length, dtype, load_in_4bit
     # Example messages
     messages = [
         {
             "role": "user",
-            "content": str("[Fortran Code]") + str(USER_INPUT_CODE) + str("[Fortran Code Explain]") + str(USER_INPUT_EXPLANATION)
         }
     ]
     # Load model
-    model, tokenizer = load_model(
-        model_name=MODEL_PATH
-    )
     # Prepare input
     inputs = prepare_input(messages, tokenizer)
     # Generate response
-    return generate_response(model, inputs, tokenizer)
 if __name__ == "__main__":
-    # YOUR_FORTRAN_CODE_HERE
-    USER_INPUT_CODE = "program sum_of_numbers\n    implicit none\n    integer :: n, i, sum\n\n    ! Initialize variables\n    sum = 0\n\n    ! Get user input\n    print *, \"Enter a positive integer:\"\n    read *, n\n\n    ! Calculate the sum of numbers from 1 to n\n    do i = 1, n\n        sum = sum + i\n    end do\n\n    ! Print the result\n    print *, \"The sum of numbers from 1 to\", n, \"is\", sum\nend program sum_of_numbers"
-    # YOUR_EXPLANATION_HERE
-    USER_INPUT_EXPLANATION = "The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user. It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.\n\nThe functionality of the program is explained in detail in the elaboration. The program starts by initializing variables and prompting the user for input. It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable. Finally, it prints the computed sum to the console.\n\nThis program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations. It is a clear example of how Fortran can be used to solve mathematical problems involving user interaction and iterative computations."
-    # YOUR_MODEL_PATH_HERE
     MODEL_PATH = "lora_model"
-    main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)

+from transformers import AutoModelForCausalLM, AutoTokenizer
 from typing import Tuple, List, Dict
 import torch
 def load_model(
     model_name: str,
     dtype: torch.dtype = torch.float32,
 ) -> Tuple[AutoModelForCausalLM, any]:
     """
+    Load and initialize the language model for CPU-only inference.
     Args:
         model_name (str): Name of the pre-trained model to load
+        dtype (torch.dtype): Data type for model weights (default: torch.float32)
     Returns:
+        Tuple[AutoModelForCausalLM, any]: Tuple containing the model and tokenizer
     """
     kwargs = {
+        "device_map": "cpu",  # Explicitly set to CPU
         "torch_dtype": dtype,
+        "low_cpu_mem_usage": True,  # Optimize memory usage for CPU
     }
+    # Load the tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    # Load the model
     model = AutoModelForCausalLM.from_pretrained(
         pretrained_model_name_or_path=model_name,
         **kwargs
     )
+    model.eval()  # Set model to evaluation mode
     return model, tokenizer
 def prepare_input(
     messages: List[Dict[str, str]],
     tokenizer: any,
 ) -> torch.Tensor:
     """
     Prepare input for the model by applying chat template and tokenization.
     Args:
         messages (List[Dict[str, str]]): List of message dictionaries
         tokenizer: The tokenizer instance
     Returns:
         torch.Tensor: Prepared input tensor
     """
+    # Combine messages into a single string (simple concatenation for this example)
+    input_text = " ".join([msg["content"] for msg in messages])
+    # Tokenize the input
     return tokenizer(
+        input_text,
         return_tensors="pt",
         padding=True,
         truncation=True,
     model: AutoModelForCausalLM,
     inputs: torch.Tensor,
     tokenizer: any,
+    max_new_tokens: int = 200,
 ) -> str:
     """
     Generate response using the model.
     Args:
+        model (AutoModelForCausalLM): The language model
         inputs (torch.Tensor): Prepared input tensor
         tokenizer: The tokenizer instance
         max_new_tokens (int): Maximum number of tokens to generate
     Returns:
         str: Generated response
     """
     outputs = model.generate(
         inputs,
+        max_new_tokens=max_new_tokens,
+        do_sample=False,  # Deterministic generation for reproducibility
     )
+    # Decode the generated tokens
+    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 def main(
+    USER_INPUT_CODE: str,
+    USER_INPUT_EXPLANATION: str,
+    MODEL_PATH: str,
+):
     """
     Main function to demonstrate the inference pipeline.
     """
     # Example messages
     messages = [
         {
             "role": "user",
+            "content": f"[Fortran Code]\n{USER_INPUT_CODE}\n[Fortran Code Explain]\n{USER_INPUT_EXPLANATION}"
         }
     ]
     # Load model
+    model, tokenizer = load_model(MODEL_PATH)
     # Prepare input
     inputs = prepare_input(messages, tokenizer)
     # Generate response
+    response = generate_response(model, inputs, tokenizer)
+    print("Generated Response:\n", response)
 if __name__ == "__main__":
+    # Define your Fortran code and explanation
+    USER_INPUT_CODE = """
+    program sum_of_numbers
+        implicit none
+        integer :: n, i, sum
+        ! Initialize variables
+        sum = 0
+        ! Get user input
+        print *, "Enter a positive integer:"
+        read *, n
+        ! Calculate the sum of numbers from 1 to n
+        do i = 1, n
+            sum = sum + i
+        end do
+        ! Print the result
+        print *, "The sum of numbers from 1 to", n, "is", sum
+    end program sum_of_numbers
+    """
+    USER_INPUT_EXPLANATION = """
+    The provided Fortran code snippet is a program that calculates the sum of integers from 1 to n, where n is provided by the user.
+    It uses a simple procedural approach, including variable declarations, input handling, and a loop for the summation.
+    The program starts by initializing variables and prompting the user for input.
+    It then calculates the sum using a do loop, iterating from 1 to n, and accumulating the result in a variable.
+    Finally, it prints the computed sum to the console.
+    This program demonstrates a straightforward application of Fortran's capabilities for handling loops and basic arithmetic operations.
+    """
+    # Path to your model
     MODEL_PATH = "lora_model"
+    # Run the main function
+    main(USER_INPUT_CODE, USER_INPUT_EXPLANATION, MODEL_PATH)