Demo1
Browse files- app.py +33 -0
- requirements.txt +3 -0
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from llama_cpp import Llama
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
+
from flask import Flask, request, jsonify
|
4 |
+
|
5 |
+
app = Flask(__name__)
|
6 |
+
|
7 |
+
hf_hub_download("TheBloke/phi-2-GGUF", "phi-2.Q8_0.gguf", local_dir="./")
|
8 |
+
phi = Llama(model_path="./phi-2.Q8_0.gguf", n_ctx=2048, n_gpu_layers=999)
|
9 |
+
|
10 |
+
app.route("/", methods=["GET"])
|
11 |
+
def index():
|
12 |
+
return "<html><body><h1>Use API</h1><p>Use /completion as POST with a prompt in a JSON query.</p></body></html>"
|
13 |
+
|
14 |
+
@app.route("/completion", methods=["POST"])
|
15 |
+
def completion():
|
16 |
+
prompt = request.json["prompt"]
|
17 |
+
res = phi(
|
18 |
+
prompt,
|
19 |
+
temperature=0.33,
|
20 |
+
top_p=0.95,
|
21 |
+
top_k=42,
|
22 |
+
max_tokens=1024,
|
23 |
+
num_completions=2,
|
24 |
+
)
|
25 |
+
return jsonify({
|
26 |
+
"responses": [
|
27 |
+
res["choices"][0]["text"],
|
28 |
+
res["choices"][1]["text"]
|
29 |
+
]
|
30 |
+
})
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
app.run(host="0.0.0.0", port=7860)
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
llama-cpp-python
|
2 |
+
huggingface-hub
|
3 |
+
flask
|