Spaces:
Sleeping
Sleeping
File size: 3,590 Bytes
6cff55d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
"""
You call this ENDPOINT and it returns you a JSON which is of this format:
POST FORMAT:
{
"query": "????",
"llm": "llama70b-whatever",
"knn": "3",
"stream": False
}
RESPONSE FORMAT:
{
"response": "blabla",
"references": ["1", "2", "3", ...]
}
"""
# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)
import ast
import json
import aiohttp
from flask import Flask
from flask import request
import requests # NOTE: AIOHTTP, THIS IS FOR RIGHT NOW ONLY
app = Flask(__name__)
def llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]:
"""
Returns output from the LLM using the given user-question and retrived context
"""
URL_LLM = 'https://robinroy03-fury-bot.hf.space'
# URL_LLM = 'http://localhost:11434' # NOTE: FOR TESTING
context = ""
references = ""
for i in range(len(db_knn['matches'])):
data = db_knn['matches'][i]['metadata']['data']
context += (data + "\n")
data = ast.literal_eval(data)
references += ("<https://github.com/fury-gl/fury/tree/master/" + data['path'] + ">").replace("//home/robin/Desktop/l/fury", "")
if data.get("function_name"):
references += f"\tFunction Name: {data.get('function_name')}"
elif data.get("class_name"):
references += f"\tClass Name: {data.get('class_name')}"
elif data['type'] == 'rst':
references += f"\tDocumentation: {data['path'].split("/")[-1]}"
elif data['type'] == 'documentation_examples':
references += f"\tDocumentation: {data['path'].split("/")[-1]}"
references += "\n"
prompt = f"""
You are a senior developer. Answer the users question based on the context provided.
Question: {question}
Context: {context}
"""
obj = {
'model': llm,
'prompt': prompt,
'stream': stream
}
response = requests.post(URL_LLM + "/api/generate", json=obj)
response_json = json.loads(response.text)
return (response_json['choices'][0]['message']['content'], references)
def embedding_output(message: str) -> list:
"""
Returns embeddings for the given message
rtype: list of embeddings. Length depends on the model.
"""
URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space'
response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message})
response_json = json.loads(response.text)
return response_json['output']
def db_output(embedding: list, knn: int) -> dict:
"""
Returns the KNN results.
rtype: JSON
"""
URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space'
response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn})
response_json = json.loads(response.text)
return response_json
@app.route("/api/generate", methods=['POST'])
def completion():
message = request.get_json()
query: str = message['query']
llm: str = message['llm']
knn: int = int(message['knn'])
stream: bool = bool(message['stream'])
embedding_data = embedding_output(query)
db_knn = db_output(embedding_data, knn)
output, references = llm_output(query, db_knn, llm, stream)
return {
"response": output,
"references": references
}
"""
curl -X POST http://localhost:8000/api/generate -H "Content-Type: application/json" -d '{
"query": "How do I create a sphere in FURY?",
"llm": "llama3-70b-8192",
"knn": "3",
"stream": false
}'
"""
|