jerukperas commited on
Commit
8da7e7b
·
verified ·
1 Parent(s): dd1d1c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -15
app.py CHANGED
@@ -3,27 +3,18 @@ from llama_cpp import Llama
3
 
4
 
5
  llm = Llama.from_pretrained(
6
- repo_id="bartowski/Phi-3.5-mini-instruct-GGUF",
7
- filename="Phi-3.5-mini-instruct-Q4_K_M.gguf",
8
  numa=True,
9
- use_mmap=False,
10
- use_mlock=True,
11
  seed=-1,
12
- # flash_attn=True,
13
- # n_gpu_layers=-1,
14
  n_batch=1024,
15
  n_ctx=4095,
16
  )
17
 
18
  def respond(prompt: str):
19
- stream = llm.create_chat_completion(stream=True, messages=[{"role": "user", "content": prompt}])
 
20
 
21
- response = ""
22
- for chunk in stream:
23
- if "content" in chunk["choices"][0]["delta"]:
24
- response += chunk["choices"][0]["delta"]["content"]
25
- yield response
26
-
27
-
28
- demo = gr.Interface(fn=respond, inputs=[gr.TextArea("What is the capital of France?")], outputs=[gr.TextArea()])
29
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
3
 
4
 
5
  llm = Llama.from_pretrained(
6
+ repo_id="maddes8cht/smallcloudai-Refact-1_6B-fim-gguf",
7
+ filename="smallcloudai-Refact-1_6B-fim-Q4_K_M.gguf",
8
  numa=True,
 
 
9
  seed=-1,
 
 
10
  n_batch=1024,
11
  n_ctx=4095,
12
  )
13
 
14
  def respond(prompt: str):
15
+ print(llm(prompt, max_tokens=64))
16
+ return prompt
17
 
18
+ prompt = "<fim_prefix>import socket\n\ndef ping_exponential_backoff():\n <fim_suffix>\n\nif __name__ == \"main\":\n ping_exponential_backoff()<fim_middle>"
19
+ demo = gr.Interface(fn=respond, inputs=[gr.TextArea(prompt)], outputs=[gr.TextArea()])
 
 
 
 
 
 
20
  demo.launch(server_name="0.0.0.0", server_port=7860)