vislupus commited on
Commit
b0e3bef
·
verified ·
1 Parent(s): ae13704

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -17
app.py CHANGED
@@ -25,20 +25,11 @@ hf_hub_download(
25
  if not os.path.exists(model_path):
26
  raise FileNotFoundError(f"Model file not found at {model_path}")
27
 
28
- print("Loading the model...")
29
  llm = Llama(model_path=model_path)
30
- print("Model loaded successfully!")
31
 
32
  def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=256):
33
- try:
34
- conversation = ""
35
- for msg in history:
36
- role, content = msg
37
- conversation += f"<start_of_turn>{role}\n{content}\n<end_of_turn>"
38
-
39
- conversation += f"<start_of_turn>user\n{message}\n<end_of_turn>"
40
-
41
- response = llm(conversation, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
42
  return response["choices"][0]["text"].strip()
43
 
44
  except Exception as e:
@@ -52,9 +43,3 @@ if __name__ == "__main__":
52
  )
53
 
54
  gguf_demo.launch(share=True)
55
-
56
- try:
57
- if llm:
58
- llm.close()
59
- except Exception as e:
60
- print(f"Error closing model: {e}")
 
25
  if not os.path.exists(model_path):
26
  raise FileNotFoundError(f"Model file not found at {model_path}")
27
 
 
28
  llm = Llama(model_path=model_path)
 
29
 
30
  def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=256):
31
+ try:
32
+ response = llm(message, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
 
 
 
 
 
 
 
33
  return response["choices"][0]["text"].strip()
34
 
35
  except Exception as e:
 
43
  )
44
 
45
  gguf_demo.launch(share=True)