robinroy03 commited on
Commit
6cff55d
·
1 Parent(s): 759135b

the engine is here

Browse files
Files changed (4) hide show
  1. .gitignore +4 -0
  2. Dockerfile +16 -0
  3. app.py +121 -0
  4. requirements.txt +20 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ venv
2
+ .env
3
+ __pycache__
4
+ test.*
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV HOME=/home/user \
6
+ PATH=/home/user/.local/bin:$PATH
7
+
8
+ COPY --chown=user . $HOME/engine
9
+
10
+ WORKDIR $HOME/engine
11
+
12
+ RUN mkdir $HOME/.cache
13
+
14
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
15
+
16
+ CMD ["gunicorn", "-w", "5", "-b", "0.0.0.0:7860","app:app"]
app.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ You call this ENDPOINT and it returns you a JSON which is of this format:
3
+
4
+ POST FORMAT:
5
+ {
6
+ "query": "????",
7
+ "llm": "llama70b-whatever",
8
+ "knn": "3",
9
+ "stream": False
10
+ }
11
+
12
+ RESPONSE FORMAT:
13
+ {
14
+ "response": "blabla",
15
+ "references": ["1", "2", "3", ...]
16
+ }
17
+ """
18
+
19
+ # TODO: MOVE IT ALL TO ASYNC FASTAPI, FOR NOW THIS IS A QUICK SPIN UP (IMPORTANT FOR SCALING)
20
+
21
+ import ast
22
+ import json
23
+
24
+ import aiohttp
25
+ from flask import Flask
26
+ from flask import request
27
+ import requests # NOTE: AIOHTTP, THIS IS FOR RIGHT NOW ONLY
28
+
29
+ app = Flask(__name__)
30
+
31
+ def llm_output(question: str, db_knn: dict, llm: str, stream: bool) -> tuple[str, str]:
32
+ """
33
+ Returns output from the LLM using the given user-question and retrived context
34
+ """
35
+
36
+ URL_LLM = 'https://robinroy03-fury-bot.hf.space'
37
+ # URL_LLM = 'http://localhost:11434' # NOTE: FOR TESTING
38
+
39
+ context = ""
40
+ references = ""
41
+ for i in range(len(db_knn['matches'])):
42
+ data = db_knn['matches'][i]['metadata']['data']
43
+ context += (data + "\n")
44
+ data = ast.literal_eval(data)
45
+ references += ("<https://github.com/fury-gl/fury/tree/master/" + data['path'] + ">").replace("//home/robin/Desktop/l/fury", "")
46
+ if data.get("function_name"):
47
+ references += f"\tFunction Name: {data.get('function_name')}"
48
+ elif data.get("class_name"):
49
+ references += f"\tClass Name: {data.get('class_name')}"
50
+ elif data['type'] == 'rst':
51
+ references += f"\tDocumentation: {data['path'].split("/")[-1]}"
52
+ elif data['type'] == 'documentation_examples':
53
+ references += f"\tDocumentation: {data['path'].split("/")[-1]}"
54
+ references += "\n"
55
+
56
+ prompt = f"""
57
+ You are a senior developer. Answer the users question based on the context provided.
58
+ Question: {question}
59
+ Context: {context}
60
+ """
61
+ obj = {
62
+ 'model': llm,
63
+ 'prompt': prompt,
64
+ 'stream': stream
65
+ }
66
+ response = requests.post(URL_LLM + "/api/generate", json=obj)
67
+ response_json = json.loads(response.text)
68
+ return (response_json['choices'][0]['message']['content'], references)
69
+
70
+
71
+ def embedding_output(message: str) -> list:
72
+ """
73
+ Returns embeddings for the given message
74
+ rtype: list of embeddings. Length depends on the model.
75
+ """
76
+
77
+ URL_EMBEDDING = 'https://robinroy03-fury-embeddings-endpoint.hf.space'
78
+ response = requests.post(URL_EMBEDDING + "/embedding", json={"text": message})
79
+ response_json = json.loads(response.text)
80
+ return response_json['output']
81
+
82
+
83
+ def db_output(embedding: list, knn: int) -> dict:
84
+ """
85
+ Returns the KNN results.
86
+ rtype: JSON
87
+ """
88
+
89
+ URL_DB = 'https://robinroy03-fury-db-endpoint.hf.space'
90
+ response = requests.post(URL_DB + "/query", json={"embeddings": embedding, "knn": knn})
91
+ response_json = json.loads(response.text)
92
+ return response_json
93
+
94
+
95
+ @app.route("/api/generate", methods=['POST'])
96
+ def completion():
97
+ message = request.get_json()
98
+
99
+ query: str = message['query']
100
+ llm: str = message['llm']
101
+ knn: int = int(message['knn'])
102
+ stream: bool = bool(message['stream'])
103
+
104
+ embedding_data = embedding_output(query)
105
+ db_knn = db_output(embedding_data, knn)
106
+ output, references = llm_output(query, db_knn, llm, stream)
107
+
108
+ return {
109
+ "response": output,
110
+ "references": references
111
+ }
112
+
113
+
114
+ """
115
+ curl -X POST http://localhost:8000/api/generate -H "Content-Type: application/json" -d '{
116
+ "query": "How do I create a sphere in FURY?",
117
+ "llm": "llama3-70b-8192",
118
+ "knn": "3",
119
+ "stream": false
120
+ }'
121
+ """
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiohttp==3.9.5
2
+ aiosignal==1.3.1
3
+ attrs==23.2.0
4
+ blinker==1.8.2
5
+ certifi==2024.6.2
6
+ charset-normalizer==3.3.2
7
+ click==8.1.7
8
+ Flask==3.0.3
9
+ frozenlist==1.4.1
10
+ gunicorn==22.0.0
11
+ idna==3.7
12
+ itsdangerous==2.2.0
13
+ Jinja2==3.1.4
14
+ MarkupSafe==2.1.5
15
+ multidict==6.0.5
16
+ packaging==24.1
17
+ requests==2.32.3
18
+ urllib3==2.2.2
19
+ Werkzeug==3.0.3
20
+ yarl==1.9.4