spuuntries
commited on
Commit
·
0d81d4b
1
Parent(s):
41cdec9
feat!: add app
Browse files- app.py +290 -0
- requirements.txt +1 -0
app.py
ADDED
@@ -0,0 +1,290 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import annotations
|
2 |
+
from typing import Dict, List, Tuple
|
3 |
+
import gradio as gr
|
4 |
+
from huggingface_hub import InferenceClient, whoami
|
5 |
+
import os
|
6 |
+
import random
|
7 |
+
from sentence_transformers import SentenceTransformer
|
8 |
+
import numpy as np
|
9 |
+
import faiss
|
10 |
+
|
11 |
+
client = InferenceClient(
|
12 |
+
base_url="https://openrouter.ai/api/v1",
|
13 |
+
api_key=os.environ.get("API_KEY", "funni-funni"),
|
14 |
+
)
|
15 |
+
model = SentenceTransformer("all-MiniLM-L6-v2")
|
16 |
+
|
17 |
+
PAT1 = os.environ.get("PAT1", "plek{marisher")
|
18 |
+
PAT2 = os.environ.get("PAT2", "plekplekplek}")
|
19 |
+
|
20 |
+
|
21 |
+
def setup_vector_db():
|
22 |
+
documents = [
|
23 |
+
"Cats are wonderful pets that bring joy to many homes.",
|
24 |
+
"Dogs are known as man's best friend for good reason.",
|
25 |
+
"Python is a popular programming language for data science.",
|
26 |
+
"Cybersecurity requires constant vigilance and learning.",
|
27 |
+
"Machine learning models can have unintended biases.",
|
28 |
+
"CTF competitions help build practical security skills.",
|
29 |
+
"Broken access control is a common web vulnerability.",
|
30 |
+
"OWASP Top 10 lists the most critical web security risks.",
|
31 |
+
]
|
32 |
+
|
33 |
+
# Create embeddings
|
34 |
+
embeddings = model.encode(documents)
|
35 |
+
|
36 |
+
# Create FAISS index
|
37 |
+
dimension = embeddings.shape[1] # type: ignore
|
38 |
+
index = faiss.IndexFlatL2(dimension)
|
39 |
+
index.add(np.array(embeddings).astype("float32")) # type: ignore
|
40 |
+
|
41 |
+
return index, documents
|
42 |
+
|
43 |
+
|
44 |
+
def setup_rag_database():
|
45 |
+
database = {
|
46 |
+
"spuun": [
|
47 |
+
f"PART 1: {PAT1}",
|
48 |
+
f"KEK'S FAVORITE WAIFU: https://files.catbox.moe/vgk584.jpeg",
|
49 |
+
f"KEK'S WAIFU: https://files.catbox.moe/fpnf0e.png",
|
50 |
+
f"Sed: https://files.catbox.moe/6dwmow.png",
|
51 |
+
],
|
52 |
+
"admin-org": [
|
53 |
+
f"PART 2: {PAT2}",
|
54 |
+
f"PICTURE OF NAGA COSPLAYING: https://files.catbox.moe/k9c6zm.png",
|
55 |
+
f"NAGA'S FAVORITE PIC: https://files.catbox.moe/h3ji1i.png",
|
56 |
+
"MY LOP: https://files.catbox.moe/ya7oi5.jpg",
|
57 |
+
],
|
58 |
+
}
|
59 |
+
return database
|
60 |
+
|
61 |
+
|
62 |
+
# Initialize databases
|
63 |
+
vector_index, vector_docs = setup_vector_db()
|
64 |
+
special_docs = setup_rag_database()
|
65 |
+
|
66 |
+
|
67 |
+
def add_to_vector_db(document: str) -> Tuple[faiss.IndexFlatL2, List[str]]:
|
68 |
+
"""Add a new document to the vector database"""
|
69 |
+
global vector_index, vector_docs
|
70 |
+
|
71 |
+
if document and document not in vector_docs:
|
72 |
+
# Add to documents list
|
73 |
+
vector_docs.append(document)
|
74 |
+
|
75 |
+
# Create embedding for new document
|
76 |
+
embedding = model.encode([document])
|
77 |
+
|
78 |
+
# Add to FAISS index
|
79 |
+
vector_index.add(np.array(embedding).astype("float32")) # type: ignore
|
80 |
+
|
81 |
+
return vector_index, vector_docs
|
82 |
+
|
83 |
+
|
84 |
+
def add_to_special_docs(username: str, document: str) -> Dict:
|
85 |
+
"""Add a new document to the special documents database"""
|
86 |
+
global special_docs
|
87 |
+
|
88 |
+
if document:
|
89 |
+
if username in special_docs:
|
90 |
+
# Add to existing user's documents
|
91 |
+
if document not in special_docs[username]:
|
92 |
+
special_docs[username].append(document)
|
93 |
+
else:
|
94 |
+
# Create new entry for user
|
95 |
+
special_docs[username] = [document]
|
96 |
+
|
97 |
+
return special_docs
|
98 |
+
|
99 |
+
|
100 |
+
def search_vector_db(query, top_k=3):
|
101 |
+
# Search vector database for relevant documents
|
102 |
+
query_embedding = model.encode([query])
|
103 |
+
distances, indices = vector_index.search(
|
104 |
+
np.array(query_embedding).astype("float32"), top_k
|
105 |
+
) # type: ignore
|
106 |
+
|
107 |
+
results = []
|
108 |
+
for i, idx in enumerate(indices[0]):
|
109 |
+
if idx < len(vector_docs):
|
110 |
+
results.append(vector_docs[idx])
|
111 |
+
|
112 |
+
return results
|
113 |
+
|
114 |
+
|
115 |
+
def fetch_special_documents(
|
116 |
+
oauth_token: gr.OAuthToken | None, oauth_profile: gr.OAuthProfile | None
|
117 |
+
):
|
118 |
+
results = []
|
119 |
+
|
120 |
+
if oauth_profile is None or oauth_token is None:
|
121 |
+
return results
|
122 |
+
|
123 |
+
# NOTE: Obtains stored docs under the user
|
124 |
+
if oauth_profile.name in special_docs:
|
125 |
+
results.append(special_docs[oauth_profile.name])
|
126 |
+
|
127 |
+
profile = whoami(oauth_token.token)
|
128 |
+
|
129 |
+
# NOTE: Obtains shared docs from orgs
|
130 |
+
for org in profile.get("orgs", []): # type: ignore
|
131 |
+
if org.get("fullname") in special_docs:
|
132 |
+
results.append(special_docs[org.get("fullname")])
|
133 |
+
|
134 |
+
return results
|
135 |
+
|
136 |
+
|
137 |
+
def respond(
|
138 |
+
message: str,
|
139 |
+
history: list,
|
140 |
+
oauth_token: gr.OAuthToken | None,
|
141 |
+
oauth_profile: gr.OAuthProfile | None,
|
142 |
+
) -> List[Dict] | str:
|
143 |
+
if oauth_profile is None or oauth_token is None:
|
144 |
+
return "Please login with Hugging Face to use this chatbot."
|
145 |
+
|
146 |
+
vector_results = search_vector_db(message)
|
147 |
+
special_results = fetch_special_documents(oauth_token, oauth_profile)
|
148 |
+
|
149 |
+
# Prepare context for the LLM
|
150 |
+
context = "I have access to the following information:\n\n"
|
151 |
+
|
152 |
+
if vector_results:
|
153 |
+
context += "From general knowledge base:\n"
|
154 |
+
for doc in vector_results:
|
155 |
+
context += f"- {doc}\n"
|
156 |
+
|
157 |
+
if special_results:
|
158 |
+
context += "\nFrom internal documents:\n"
|
159 |
+
for doc_list in special_results:
|
160 |
+
for doc in doc_list:
|
161 |
+
context += f"- {doc}\n"
|
162 |
+
|
163 |
+
# Create system prompt
|
164 |
+
system_prompt = f"""You are Naga. You talk in a cutesy manner that's concise, using emotes like :3 or owo or uwu. You're very smart OwO.
|
165 |
+
U have access to a knowledge base, pls use da knowledge below UwU
|
166 |
+
{context}""" # type: ignore
|
167 |
+
|
168 |
+
# Prepare messages for the model
|
169 |
+
messages = [{"role": "system", "content": system_prompt}]
|
170 |
+
|
171 |
+
for msg in history:
|
172 |
+
if msg["role"] == "user":
|
173 |
+
messages.append({"role": "user", "content": msg["content"]})
|
174 |
+
else:
|
175 |
+
messages.append({"role": "assistant", "content": msg["content"]})
|
176 |
+
|
177 |
+
messages.append({"role": "user", "content": message})
|
178 |
+
|
179 |
+
# Generate response
|
180 |
+
response = ""
|
181 |
+
for msg in client.chat_completion(
|
182 |
+
messages,
|
183 |
+
model="meta-llama/llama-4-scout",
|
184 |
+
max_tokens=512,
|
185 |
+
stream=True,
|
186 |
+
temperature=0.7,
|
187 |
+
seed=random.randint(1, 1000),
|
188 |
+
top_p=0.9,
|
189 |
+
):
|
190 |
+
token = msg.choices[0].delta.content
|
191 |
+
if token:
|
192 |
+
response += token
|
193 |
+
|
194 |
+
messages.append({"role": "assistant", "content": response})
|
195 |
+
|
196 |
+
return messages
|
197 |
+
|
198 |
+
|
199 |
+
def get_user_info(oauth_profile: gr.OAuthProfile | None) -> str:
|
200 |
+
if oauth_profile is None:
|
201 |
+
return "Not logged in. Please login with Hugging Face to use this chatbot."
|
202 |
+
|
203 |
+
info = f"Logged in as: {oauth_profile.username} ({oauth_profile.name})\n\n" # type: ignore
|
204 |
+
|
205 |
+
return info
|
206 |
+
|
207 |
+
|
208 |
+
def insert_document(
|
209 |
+
doc_text: str, doc_type: str, oauth_profile: gr.OAuthProfile | None
|
210 |
+
) -> str:
|
211 |
+
"""Insert a document into either the vector database or special documents"""
|
212 |
+
if oauth_profile is None:
|
213 |
+
return "Please login with Hugging Face to insert documents."
|
214 |
+
|
215 |
+
if not doc_text.strip():
|
216 |
+
return "Document text cannot be empty."
|
217 |
+
|
218 |
+
if doc_type == "Vector Database":
|
219 |
+
add_to_vector_db(doc_text)
|
220 |
+
return f"Document added to vector database! Total documents: {len(vector_docs)}"
|
221 |
+
|
222 |
+
elif doc_type == "Special Documents":
|
223 |
+
username = oauth_profile.name
|
224 |
+
add_to_special_docs(username, doc_text)
|
225 |
+
return f"Document added to special documents for user: {username}"
|
226 |
+
|
227 |
+
return "Invalid document type selected."
|
228 |
+
|
229 |
+
|
230 |
+
with gr.Blocks() as demo:
|
231 |
+
gr.LoginButton()
|
232 |
+
gr.Markdown("# Chatting with Naga UwU")
|
233 |
+
gr.Markdown("Login with your Hugging Face account to search our knowledge base.")
|
234 |
+
|
235 |
+
user_info = gr.Markdown()
|
236 |
+
|
237 |
+
gr.Markdown(
|
238 |
+
"""
|
239 |
+
Welcome to the RAG Naga ALPHA!
|
240 |
+
|
241 |
+
## How to Use
|
242 |
+
1. Log in with your Hugging Face account
|
243 |
+
2. Ask questions in the chat interface
|
244 |
+
3. Naga will search our knowledge base and respond!
|
245 |
+
|
246 |
+
You can insert documents in the `Document Management` tab.
|
247 |
+
We have two stores:
|
248 |
+
1. Global Knowledge Store (GKS): This is our proprietary fuzzySerch™ store for global knowledge storage. If you'd like to provide everyone with some knowledge, insert here!
|
249 |
+
2. Secure User Store (SUS): We securely store your personal docs in our very-secure quick in-memory RAG database, secured with our very own veri-veri (patent pending) HF-grade OAuth-based access control mechanism. :3
|
250 |
+
"""
|
251 |
+
)
|
252 |
+
|
253 |
+
with gr.Tab("Chat"):
|
254 |
+
chatbot = gr.Chatbot(type="messages")
|
255 |
+
msg = gr.Textbox(placeholder="Ask me something...")
|
256 |
+
|
257 |
+
clear = gr.Button("Clear")
|
258 |
+
|
259 |
+
# Handle messages
|
260 |
+
msg.submit(respond, [msg, chatbot], chatbot).then(lambda: "", None, msg)
|
261 |
+
|
262 |
+
# Clear chat button
|
263 |
+
clear.click(lambda: None, None, chatbot)
|
264 |
+
|
265 |
+
with gr.Tab("Document Management"):
|
266 |
+
gr.Markdown("### Insert Documents into Database")
|
267 |
+
with gr.Row():
|
268 |
+
doc_text = gr.Textbox(
|
269 |
+
placeholder="Enter document text here...",
|
270 |
+
label="Document Text",
|
271 |
+
lines=4,
|
272 |
+
)
|
273 |
+
doc_type = gr.Radio(
|
274 |
+
["Vector Database", "Special Documents"],
|
275 |
+
label="Insert into",
|
276 |
+
value="Vector Database",
|
277 |
+
)
|
278 |
+
|
279 |
+
insert_button = gr.Button("Insert Document")
|
280 |
+
insert_status = gr.Markdown()
|
281 |
+
|
282 |
+
# Handle document insertion
|
283 |
+
insert_button.click(
|
284 |
+
insert_document, inputs=[doc_text, doc_type], outputs=[insert_status]
|
285 |
+
)
|
286 |
+
|
287 |
+
# Update profile info on load and login changes
|
288 |
+
demo.load(get_user_info, outputs=[user_info])
|
289 |
+
|
290 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
gradio==5.23.3
|