Spaces:
Sleeping
Sleeping
""" | |
You call this ENDPOINT and it returns you a JSON which is of this format: | |
POST FORMAT: | |
{ | |
"query": "????", | |
"llm": "llama70b-whatever", | |
"knn": "3", | |
"stream": False | |
} | |
RESPONSE FORMAT: | |
{ | |
"response": "blabla", | |
"references": ["1", "2", "3", ...] | |
} | |
""" | |
# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING) | |
import ast | |
import json | |
import aiohttp | |
from flask import Flask | |
from flask import request | |
import requests # NOTE: AIOHTTP, THIS IS FOR RIGHT NOW ONLY | |
app = Flask(__name__) | |
def llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]: | |
""" | |
Returns output from the LLM using the given user-question and retrived context | |
""" | |
URL_LLM = 'https://robinroy03-fury-bot.hf.space' | |
# URL_LLM = 'http://localhost:11434' # NOTE: FOR TESTING | |
context = "" | |
references = "" | |
for i in range(len(db_knn['matches'])): | |
data = db_knn['matches'][i]['metadata']['data'] | |
context += (data + "\n") | |
data = ast.literal_eval(data) | |
references += ("<https://github.com/fury-gl/fury/tree/master/" + data['path'] + ">").replace("//home/robin/Desktop/l/fury", "") | |
if data.get("function_name"): | |
references += f"\tFunction Name: {data.get('function_name')}" | |
elif data.get("class_name"): | |
references += f"\tClass Name: {data.get('class_name')}" | |
elif data['type'] == 'rst': | |
references += f"\tDocumentation: {data['path'].split("/")[-1]}" | |
elif data['type'] == 'documentation_examples': | |
references += f"\tDocumentation: {data['path'].split("/")[-1]}" | |
references += "\n" | |
prompt = f""" | |
You are a senior developer. Answer the users question based on the context provided. | |
Question: {question} | |
Context: {context} | |
""" | |
obj = { | |
'model': llm, | |
'prompt': prompt, | |
'stream': stream | |
} | |
response = requests.post(URL_LLM + "/api/generate", json=obj) | |
response_json = json.loads(response.text) | |
return (response_json['choices'][0]['message']['content'], references) | |
def embedding_output(message: str) -> list: | |
""" | |
Returns embeddings for the given message | |
rtype: list of embeddings. Length depends on the model. | |
""" | |
URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space' | |
response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message}) | |
response_json = json.loads(response.text) | |
return response_json['output'] | |
def db_output(embedding: list, knn: int) -> dict: | |
""" | |
Returns the KNN results. | |
rtype: JSON | |
""" | |
URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space' | |
response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn}) | |
response_json = json.loads(response.text) | |
return response_json | |
def completion(): | |
message = request.get_json() | |
query: str = message['query'] | |
llm: str = message['llm'] | |
knn: int = int(message['knn']) | |
stream: bool = bool(message['stream']) | |
embedding_data = embedding_output(query) | |
db_knn = db_output(embedding_data, knn) | |
output, references = llm_output(query, db_knn, llm, stream) | |
return { | |
"response": output, | |
"references": references | |
} | |
""" | |
curl -X POST http://localhost:8000/api/generate -H "Content-Type: application/json" -d '{ | |
"query": "How do I create a sphere in FURY?", | |
"llm": "llama3-70b-8192", | |
"knn": "3", | |
"stream": false | |
}' | |
""" | |