Ozaii commited on
Commit
696e8bc
·
verified ·
1 Parent(s): 23cf608

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -27
app.py CHANGED
@@ -4,38 +4,41 @@ from peft import PeftConfig
4
  from transformers import AutoTokenizer, TextIteratorStreamer
5
  from threading import Thread
6
  import gradio as gr
7
- from huggingface import spaces
8
 
9
  MODEL_PATH = "Ozaii/zephyr-bae"
 
10
  max_seq_length = 2048
11
 
12
-
13
  print("Attempting to load Zephyr... Cross your fingers! 🤞")
14
- try:
15
- @spaces.GPU
16
- peft_config = PeftConfig.from_pretrained(MODEL_PATH)
17
- base_model_name = peft_config.base_model_name_or_path
18
- print(f"Loading base model: {base_model_name}")
19
-
20
- model, tokenizer = FastLanguageModel.from_pretrained(
21
- model_name=base_model_name,
22
- max_seq_length=max_seq_length,
23
- dtype=None, # Auto-detect
24
- load_in_4bit=True,
25
- )
26
-
27
- model = FastLanguageModel.get_peft_model(
28
- model,
29
- peft_config=peft_config,
30
- adapter_name="default",
31
- use_gradient_checkpointing=True,
32
- )
33
-
34
- FastLanguageModel.for_inference(model)
35
- print("Zephyr loaded successfully! Time to charm!")
36
- except Exception as e:
37
- print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}")
38
- raise
 
 
 
39
 
40
  @spaces.GPU
41
  def generate_response(prompt, max_new_tokens=128):
 
4
  from transformers import AutoTokenizer, TextIteratorStreamer
5
  from threading import Thread
6
  import gradio as gr
7
+ import spaces
8
 
9
  MODEL_PATH = "Ozaii/zephyr-bae"
10
+ BASE_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit"
11
  max_seq_length = 2048
12
 
 
13
  print("Attempting to load Zephyr... Cross your fingers! 🤞")
14
+
15
+ @spaces.GPU
16
+ def load_model():
17
+ try:
18
+ peft_config = PeftConfig.from_pretrained(MODEL_PATH)
19
+
20
+ model, tokenizer = FastLanguageModel.from_pretrained(
21
+ model_name=BASE_MODEL,
22
+ max_seq_length=max_seq_length,
23
+ dtype=None, # Auto-detect
24
+ load_in_4bit=True,
25
+ )
26
+
27
+ model = FastLanguageModel.get_peft_model(
28
+ model,
29
+ peft_config=peft_config,
30
+ adapter_name="default",
31
+ use_gradient_checkpointing=True,
32
+ )
33
+
34
+ FastLanguageModel.for_inference(model)
35
+ print("Zephyr loaded successfully! Time to charm!")
36
+ return model, tokenizer
37
+ except Exception as e:
38
+ print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}")
39
+ raise
40
+
41
+ model, tokenizer = load_model()
42
 
43
  @spaces.GPU
44
  def generate_response(prompt, max_new_tokens=128):