""" You call this ENDPOINT and it returns you a JSON which is of this format: POST FORMAT: (/api/groq or api/google or /api/ollama ...) { "query": "????", "llm": "llama70b-whatever", "knn": "3", "stream": False } RESPONSE FORMAT: { "response": "blabla", "references": "1, 2, 3" } """ # TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING) from flask import Flask from flask import request from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output, google_llm_output app = Flask(__name__) @app.route("/api/groq/generate", methods=['POST']) def groq_completion(): message = request.get_json() query: str = message['query'] llm: str = message['llm'] knn: int = int(message['knn']) stream: bool = bool(message['stream']) embedding_data = embedding_output(query) db_knn = db_output(embedding_data, knn) output, references = groq_llm_output(query, db_knn, llm, stream) return { "response": output, "references": references } @app.route("/api/ollama/generate", methods=['POST']) def ollama_completion(): message = request.get_json() query: str = message['query'] llm: str = message['llm'] knn: int = int(message['knn']) stream: bool = bool(message['stream']) embedding_data = embedding_output(query) db_knn = db_output(embedding_data, knn) response_json, references = ollama_llm_output(query, db_knn, llm, stream) if response_json.get("error"): print(response_json) return { "response": "An error occured, try again.", "references": "No references" } return { "response": response_json['response'], "references": references } @app.route("/api/google/generate", methods=['POST']) def google_completion(): message = request.get_json() query: str = message['query'] llm: str = message['llm'] knn: int = int(message['knn']) stream: bool = bool(message['stream']) embedding_data = embedding_output(query) db_knn = db_output(embedding_data, knn) response_json, references = google_llm_output(query, db_knn, llm, stream) return { "response": response_json, "references": references } """ curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{ "query": "How do I create a sphere in FURY?", "llm": "llama3-70b-8192", "knn": "3", "stream": false }' curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{ "query": "How do I create a sphere in FURY?", "llm": "phi3", "knn": "3", "stream": false }' """