MrOvkill commited on
Commit
afd4afc
·
1 Parent(s): 116b82a
Files changed (2) hide show
  1. app.py +33 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from llama_cpp import Llama
2
+ from huggingface_hub import hf_hub_download
3
+ from flask import Flask, request, jsonify
4
+
5
+ app = Flask(__name__)
6
+
7
+ hf_hub_download("TheBloke/phi-2-GGUF", "phi-2.Q8_0.gguf", local_dir="./")
8
+ phi = Llama(model_path="./phi-2.Q8_0.gguf", n_ctx=2048, n_gpu_layers=999)
9
+
10
+ app.route("/", methods=["GET"])
11
+ def index():
12
+ return "<html><body><h1>Use API</h1><p>Use /completion as POST with a prompt in a JSON query.</p></body></html>"
13
+
14
+ @app.route("/completion", methods=["POST"])
15
+ def completion():
16
+ prompt = request.json["prompt"]
17
+ res = phi(
18
+ prompt,
19
+ temperature=0.33,
20
+ top_p=0.95,
21
+ top_k=42,
22
+ max_tokens=1024,
23
+ num_completions=2,
24
+ )
25
+ return jsonify({
26
+ "responses": [
27
+ res["choices"][0]["text"],
28
+ res["choices"][1]["text"]
29
+ ]
30
+ })
31
+
32
+ if __name__ == "__main__":
33
+ app.run(host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ llama-cpp-python
2
+ huggingface-hub
3
+ flask