IST199655 commited on
Commit
ef4866e
·
1 Parent(s): 474adaa
Files changed (2) hide show
  1. app.py +8 -3
  2. requirements.txt +2 -1
app.py CHANGED
@@ -9,13 +9,18 @@ from transformers import AutoModel, AutoTokenizer , AutoModelForCausalLM
9
  import torch
10
 
11
  # Load model and tokenizer globally to avoid reloading for every request
12
- model_path = "llama_lora_model_1"
13
 
14
  # Load tokenizer
15
  tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, legacy=False)
16
 
17
- # Load model
18
- model = AutoModelForCausalLM.from_pretrained("Heit39/llama_lora_model_1")
 
 
 
 
 
19
 
20
  # Define the response function
21
  def respond(
 
9
  import torch
10
 
11
  # Load model and tokenizer globally to avoid reloading for every request
12
+ model_path = "Heit39/llama_lora_model_1"
13
 
14
  # Load tokenizer
15
  tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, legacy=False)
16
 
17
+ # Load the base model (e.g., LLaMA)
18
+ base_model = AutoModelForCausalLM.from_pretrained("unsloth/Llama-3.2-3B-Instruct")
19
+
20
+ # Load LoRA adapter
21
+ from peft import PeftModel
22
+ model = PeftModel.from_pretrained(base_model, model_path)
23
+
24
 
25
  # Define the response function
26
  def respond(
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  huggingface_hub==0.25.2
2
 
3
  transformers
4
- accelerate
 
 
1
  huggingface_hub==0.25.2
2
 
3
  transformers
4
+ accelerate
5
+ peft