jedeland commited on
Commit
7ca3ebf
·
1 Parent(s): 59b7c26
Files changed (2) hide show
  1. app.py +8 -12
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,11 +1,12 @@
1
  import gradio as gr
2
 
3
- # Load model directly
4
- from transformers import AutoModel, AutoTokenizer
 
 
 
 
5
 
6
- # Load the LoRA model and tokenizer
7
- tokenizer = AutoTokenizer.from_pretrained("ID2223JR/lora_model")
8
- model = AutoModel.from_pretrained("ID2223JR/lora_model")
9
 
10
  # Data storage
11
  ingredients_list = []
@@ -39,13 +40,8 @@ def submit_to_model():
39
  ingredients_list
40
  )
41
 
42
- # Tokenize and pass the prompt to the model
43
- inputs = tokenizer(prompt, return_tensors="pt")
44
- outputs = model.generate(**inputs, max_new_tokens=100)
45
-
46
- # Decode the model output
47
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
48
- return response
49
 
50
 
51
  # App
 
1
  import gradio as gr
2
 
3
+ from llama_cpp import Llama
4
+
5
+ llm = Llama.from_pretrained(
6
+ repo_id="ID2223JR/gguf_model",
7
+ filename="GGUF_FILE",
8
+ )
9
 
 
 
 
10
 
11
  # Data storage
12
  ingredients_list = []
 
40
  ingredients_list
41
  )
42
 
43
+ response = llm.create_chat_completion(messages=prompt)
44
+ return response.choices[0].text
 
 
 
 
 
45
 
46
 
47
  # App
requirements.txt CHANGED
@@ -1,3 +1,2 @@
1
- transformers==4.46.3
2
  gradio==5.1.0
3
- torch==2.5.1
 
 
1
  gradio==5.1.0
2
+ llama_cpp==0.2.24