Spaces:
Sleeping
Sleeping
""" | |
You call this ENDPOINT and it returns you a JSON which is of this format: | |
POST FORMAT: (/api/groq or api/google or /api/ollama ...) | |
{ | |
"query": "????", | |
"llm": "llama70b-whatever", | |
"knn": "3", | |
"stream": False | |
} | |
RESPONSE FORMAT: | |
{ | |
"response": "blabla", | |
"references": "1, 2, 3" | |
} | |
""" | |
# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING) | |
from flask import Flask | |
from flask import request | |
from utils import embedding_output, db_output, groq_llm_output, ollama_llm_output, google_llm_output | |
app = Flask(__name__) | |
def groq_completion(): | |
message = request.get_json() | |
query: str = message['query'] | |
llm: str = message['llm'] | |
knn: int = int(message['knn']) | |
stream: bool = bool(message['stream']) | |
embedding_data = embedding_output(query) | |
db_knn = db_output(embedding_data, knn) | |
output, references = groq_llm_output(query, db_knn, llm, stream) | |
return { | |
"response": output, | |
"references": references | |
} | |
def ollama_completion(): | |
message = request.get_json() | |
query: str = message['query'] | |
llm: str = message['llm'] | |
knn: int = int(message['knn']) | |
stream: bool = bool(message['stream']) | |
embedding_data = embedding_output(query) | |
db_knn = db_output(embedding_data, knn) | |
response_json, references = ollama_llm_output(query, db_knn, llm, stream) | |
if response_json.get("error"): | |
print(response_json) | |
return { | |
"response": "An error occured, try again.", | |
"references": "No references" | |
} | |
return { | |
"response": response_json['response'], | |
"references": references | |
} | |
def google_completion(): | |
message = request.get_json() | |
query: str = message['query'] | |
llm: str = message['llm'] | |
knn: int = int(message['knn']) | |
stream: bool = bool(message['stream']) | |
embedding_data = embedding_output(query) | |
db_knn = db_output(embedding_data, knn) | |
response_json, references = google_llm_output(query, db_knn, llm, stream) | |
return { | |
"response": response_json, | |
"references": references | |
} | |
""" | |
curl -X POST http://localhost:8000/api/groq/generate -H "Content-Type: application/json" -d '{ | |
"query": "How do I create a sphere in FURY?", | |
"llm": "llama3-70b-8192", | |
"knn": "3", | |
"stream": false | |
}' | |
curl -X POST http://localhost:8000/api/ollama/generate -H "Content-Type: application/json" -d '{ | |
"query": "How do I create a sphere in FURY?", | |
"llm": "phi3", | |
"knn": "3", | |
"stream": false | |
}' | |
""" | |