""" You call this ENDPOINT and it returns you a JSON which is of this format: POST FORMAT: { "query": "????", "llm": "llama70b-whatever", "knn": "3", "stream": False } RESPONSE FORMAT: { "response": "blabla", "references": ["1", "2", "3", ...] } """ # TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING) import ast import json import aiohttp from flask import Flask from flask import request import requests # NOTE: AIOHTTP, THIS IS FOR RIGHT NOW ONLY app = Flask(__name__) def llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]: """ Returns output from the LLM using the given user-question and retrived context """ URL_LLM = 'https://robinroy03-fury-bot.hf.space' # URL_LLM = 'http://localhost:11434' # NOTE: FOR TESTING context = "" references = "" for i in range(len(db_knn['matches'])): data = db_knn['matches'][i]['metadata']['data'] context += (data + "\n") data = ast.literal_eval(data) references += ("").replace("//home/robin/Desktop/l/fury", "") if data.get("function_name"): references += f"\tFunction Name: {data.get('function_name')}" elif data.get("class_name"): references += f"\tClass Name: {data.get('class_name')}" elif data['type'] == 'rst': references += f"\tDocumentation: {data['path'].split("/")[-1]}" elif data['type'] == 'documentation_examples': references += f"\tDocumentation: {data['path'].split("/")[-1]}" references += "\n" prompt = f""" You are a senior developer. Answer the users question based on the context provided. Question: {question} Context: {context} """ obj = { 'model': llm, 'prompt': prompt, 'stream': stream } response = requests.post(URL_LLM + "/api/generate", json=obj) response_json = json.loads(response.text) return (response_json['choices'][0]['message']['content'], references) def embedding_output(message: str) -> list: """ Returns embeddings for the given message rtype: list of embeddings. Length depends on the model. """ URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space' response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message}) response_json = json.loads(response.text) return response_json['output'] def db_output(embedding: list, knn: int) -> dict: """ Returns the KNN results. rtype: JSON """ URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space' response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn}) response_json = json.loads(response.text) return response_json @app.route("/api/generate", methods=['POST']) def completion(): message = request.get_json() query: str = message['query'] llm: str = message['llm'] knn: int = int(message['knn']) stream: bool = bool(message['stream']) embedding_data = embedding_output(query) db_knn = db_output(embedding_data, knn) output, references = llm_output(query, db_knn, llm, stream) return { "response": output, "references": references } """ curl -X POST http://localhost:8000/api/generate -H "Content-Type: application/json" -d '{ "query": "How do I create a sphere in FURY?", "llm": "llama3-70b-8192", "knn": "3", "stream": false }' """