Spaces:

robinroy03
/

fury-engine

Sleeping

File size: 3,590 Bytes

6cff55d

"""
You call this ENDPOINT and it returns you a JSON which is of this format:

POST FORMAT:
{
    "query": "????",
    "llm": "llama70b-whatever",
    "knn": "3",
    "stream": False
}

RESPONSE FORMAT:
{
    "response": "blabla",
    "references": ["1", "2", "3", ...]
}
"""

# TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)

import ast
import json

import aiohttp
from flask import Flask
from flask import request
import requests         # NOTE: AIOHTTP, THIS IS FOR RIGHT NOW ONLY

app = Flask(__name__)

def llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]:
    """
    Returns output from the LLM using the given user-question and retrived context
    """

    URL_LLM = 'https://robinroy03-fury-bot.hf.space'
    # URL_LLM = 'http://localhost:11434'    # NOTE: FOR TESTING

    context = ""
    references = ""
    for i in range(len(db_knn['matches'])):
        data = db_knn['matches'][i]['metadata']['data']
        context += (data + "\n")
        data = ast.literal_eval(data)
        references += ("<https://github.com/fury-gl/fury/tree/master/" + data['path'] + ">").replace("//home/robin/Desktop/l/fury", "")
        if data.get("function_name"):
            references += f"\tFunction Name: {data.get('function_name')}"
        elif data.get("class_name"):
            references += f"\tClass Name: {data.get('class_name')}"
        elif data['type'] == 'rst':
            references += f"\tDocumentation: {data['path'].split("/")[-1]}"
        elif data['type'] == 'documentation_examples':
            references += f"\tDocumentation: {data['path'].split("/")[-1]}"
        references += "\n"

    prompt = f"""
    You are a senior developer. Answer the users question based on the context provided.
    Question: {question}
    Context: {context}
    """
    obj = {
            'model': llm,
            'prompt': prompt,
            'stream': stream
        }
    response = requests.post(URL_LLM + "/api/generate", json=obj)
    response_json = json.loads(response.text)
    return (response_json['choices'][0]['message']['content'], references)


def embedding_output(message: str) -> list:
    """
    Returns embeddings for the given message
    rtype: list of embeddings. Length depends on the model.
    """

    URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space'
    response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message})
    response_json = json.loads(response.text)
    return response_json['output']


def db_output(embedding: list, knn: int) -> dict:
    """
    Returns the KNN results.
    rtype: JSON
    """

    URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space'
    response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn})
    response_json = json.loads(response.text)
    return response_json


@app.route("/api/generate", methods=['POST'])
def completion():
    message = request.get_json()

    query: str = message['query']
    llm: str = message['llm']
    knn: int = int(message['knn'])
    stream: bool = bool(message['stream'])

    embedding_data = embedding_output(query)
    db_knn = db_output(embedding_data, knn)
    output, references = llm_output(query, db_knn, llm, stream)

    return {
        "response": output,
        "references": references
    }


"""
curl -X POST http://localhost:8000/api/generate -H "Content-Type: application/json" -d '{
    "query": "How do I create a sphere in FURY?",
    "llm": "llama3-70b-8192",
    "knn": "3",
    "stream": false
  }'
"""