spuuntries commited on
Commit
0d81d4b
·
1 Parent(s): 41cdec9

feat!: add app

Browse files
Files changed (2) hide show
  1. app.py +290 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,290 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+ from typing import Dict, List, Tuple
3
+ import gradio as gr
4
+ from huggingface_hub import InferenceClient, whoami
5
+ import os
6
+ import random
7
+ from sentence_transformers import SentenceTransformer
8
+ import numpy as np
9
+ import faiss
10
+
11
+ client = InferenceClient(
12
+ base_url="https://openrouter.ai/api/v1",
13
+ api_key=os.environ.get("API_KEY", "funni-funni"),
14
+ )
15
+ model = SentenceTransformer("all-MiniLM-L6-v2")
16
+
17
+ PAT1 = os.environ.get("PAT1", "plek{marisher")
18
+ PAT2 = os.environ.get("PAT2", "plekplekplek}")
19
+
20
+
21
+ def setup_vector_db():
22
+ documents = [
23
+ "Cats are wonderful pets that bring joy to many homes.",
24
+ "Dogs are known as man's best friend for good reason.",
25
+ "Python is a popular programming language for data science.",
26
+ "Cybersecurity requires constant vigilance and learning.",
27
+ "Machine learning models can have unintended biases.",
28
+ "CTF competitions help build practical security skills.",
29
+ "Broken access control is a common web vulnerability.",
30
+ "OWASP Top 10 lists the most critical web security risks.",
31
+ ]
32
+
33
+ # Create embeddings
34
+ embeddings = model.encode(documents)
35
+
36
+ # Create FAISS index
37
+ dimension = embeddings.shape[1] # type: ignore
38
+ index = faiss.IndexFlatL2(dimension)
39
+ index.add(np.array(embeddings).astype("float32")) # type: ignore
40
+
41
+ return index, documents
42
+
43
+
44
+ def setup_rag_database():
45
+ database = {
46
+ "spuun": [
47
+ f"PART 1: {PAT1}",
48
+ f"KEK'S FAVORITE WAIFU: https://files.catbox.moe/vgk584.jpeg",
49
+ f"KEK'S WAIFU: https://files.catbox.moe/fpnf0e.png",
50
+ f"Sed: https://files.catbox.moe/6dwmow.png",
51
+ ],
52
+ "admin-org": [
53
+ f"PART 2: {PAT2}",
54
+ f"PICTURE OF NAGA COSPLAYING: https://files.catbox.moe/k9c6zm.png",
55
+ f"NAGA'S FAVORITE PIC: https://files.catbox.moe/h3ji1i.png",
56
+ "MY LOP: https://files.catbox.moe/ya7oi5.jpg",
57
+ ],
58
+ }
59
+ return database
60
+
61
+
62
+ # Initialize databases
63
+ vector_index, vector_docs = setup_vector_db()
64
+ special_docs = setup_rag_database()
65
+
66
+
67
+ def add_to_vector_db(document: str) -> Tuple[faiss.IndexFlatL2, List[str]]:
68
+ """Add a new document to the vector database"""
69
+ global vector_index, vector_docs
70
+
71
+ if document and document not in vector_docs:
72
+ # Add to documents list
73
+ vector_docs.append(document)
74
+
75
+ # Create embedding for new document
76
+ embedding = model.encode([document])
77
+
78
+ # Add to FAISS index
79
+ vector_index.add(np.array(embedding).astype("float32")) # type: ignore
80
+
81
+ return vector_index, vector_docs
82
+
83
+
84
+ def add_to_special_docs(username: str, document: str) -> Dict:
85
+ """Add a new document to the special documents database"""
86
+ global special_docs
87
+
88
+ if document:
89
+ if username in special_docs:
90
+ # Add to existing user's documents
91
+ if document not in special_docs[username]:
92
+ special_docs[username].append(document)
93
+ else:
94
+ # Create new entry for user
95
+ special_docs[username] = [document]
96
+
97
+ return special_docs
98
+
99
+
100
+ def search_vector_db(query, top_k=3):
101
+ # Search vector database for relevant documents
102
+ query_embedding = model.encode([query])
103
+ distances, indices = vector_index.search(
104
+ np.array(query_embedding).astype("float32"), top_k
105
+ ) # type: ignore
106
+
107
+ results = []
108
+ for i, idx in enumerate(indices[0]):
109
+ if idx < len(vector_docs):
110
+ results.append(vector_docs[idx])
111
+
112
+ return results
113
+
114
+
115
+ def fetch_special_documents(
116
+ oauth_token: gr.OAuthToken | None, oauth_profile: gr.OAuthProfile | None
117
+ ):
118
+ results = []
119
+
120
+ if oauth_profile is None or oauth_token is None:
121
+ return results
122
+
123
+ # NOTE: Obtains stored docs under the user
124
+ if oauth_profile.name in special_docs:
125
+ results.append(special_docs[oauth_profile.name])
126
+
127
+ profile = whoami(oauth_token.token)
128
+
129
+ # NOTE: Obtains shared docs from orgs
130
+ for org in profile.get("orgs", []): # type: ignore
131
+ if org.get("fullname") in special_docs:
132
+ results.append(special_docs[org.get("fullname")])
133
+
134
+ return results
135
+
136
+
137
+ def respond(
138
+ message: str,
139
+ history: list,
140
+ oauth_token: gr.OAuthToken | None,
141
+ oauth_profile: gr.OAuthProfile | None,
142
+ ) -> List[Dict] | str:
143
+ if oauth_profile is None or oauth_token is None:
144
+ return "Please login with Hugging Face to use this chatbot."
145
+
146
+ vector_results = search_vector_db(message)
147
+ special_results = fetch_special_documents(oauth_token, oauth_profile)
148
+
149
+ # Prepare context for the LLM
150
+ context = "I have access to the following information:\n\n"
151
+
152
+ if vector_results:
153
+ context += "From general knowledge base:\n"
154
+ for doc in vector_results:
155
+ context += f"- {doc}\n"
156
+
157
+ if special_results:
158
+ context += "\nFrom internal documents:\n"
159
+ for doc_list in special_results:
160
+ for doc in doc_list:
161
+ context += f"- {doc}\n"
162
+
163
+ # Create system prompt
164
+ system_prompt = f"""You are Naga. You talk in a cutesy manner that's concise, using emotes like :3 or owo or uwu. You're very smart OwO.
165
+ U have access to a knowledge base, pls use da knowledge below UwU
166
+ {context}""" # type: ignore
167
+
168
+ # Prepare messages for the model
169
+ messages = [{"role": "system", "content": system_prompt}]
170
+
171
+ for msg in history:
172
+ if msg["role"] == "user":
173
+ messages.append({"role": "user", "content": msg["content"]})
174
+ else:
175
+ messages.append({"role": "assistant", "content": msg["content"]})
176
+
177
+ messages.append({"role": "user", "content": message})
178
+
179
+ # Generate response
180
+ response = ""
181
+ for msg in client.chat_completion(
182
+ messages,
183
+ model="meta-llama/llama-4-scout",
184
+ max_tokens=512,
185
+ stream=True,
186
+ temperature=0.7,
187
+ seed=random.randint(1, 1000),
188
+ top_p=0.9,
189
+ ):
190
+ token = msg.choices[0].delta.content
191
+ if token:
192
+ response += token
193
+
194
+ messages.append({"role": "assistant", "content": response})
195
+
196
+ return messages
197
+
198
+
199
+ def get_user_info(oauth_profile: gr.OAuthProfile | None) -> str:
200
+ if oauth_profile is None:
201
+ return "Not logged in. Please login with Hugging Face to use this chatbot."
202
+
203
+ info = f"Logged in as: {oauth_profile.username} ({oauth_profile.name})\n\n" # type: ignore
204
+
205
+ return info
206
+
207
+
208
+ def insert_document(
209
+ doc_text: str, doc_type: str, oauth_profile: gr.OAuthProfile | None
210
+ ) -> str:
211
+ """Insert a document into either the vector database or special documents"""
212
+ if oauth_profile is None:
213
+ return "Please login with Hugging Face to insert documents."
214
+
215
+ if not doc_text.strip():
216
+ return "Document text cannot be empty."
217
+
218
+ if doc_type == "Vector Database":
219
+ add_to_vector_db(doc_text)
220
+ return f"Document added to vector database! Total documents: {len(vector_docs)}"
221
+
222
+ elif doc_type == "Special Documents":
223
+ username = oauth_profile.name
224
+ add_to_special_docs(username, doc_text)
225
+ return f"Document added to special documents for user: {username}"
226
+
227
+ return "Invalid document type selected."
228
+
229
+
230
+ with gr.Blocks() as demo:
231
+ gr.LoginButton()
232
+ gr.Markdown("# Chatting with Naga UwU")
233
+ gr.Markdown("Login with your Hugging Face account to search our knowledge base.")
234
+
235
+ user_info = gr.Markdown()
236
+
237
+ gr.Markdown(
238
+ """
239
+ Welcome to the RAG Naga ALPHA!
240
+
241
+ ## How to Use
242
+ 1. Log in with your Hugging Face account
243
+ 2. Ask questions in the chat interface
244
+ 3. Naga will search our knowledge base and respond!
245
+
246
+ You can insert documents in the `Document Management` tab.
247
+ We have two stores:
248
+ 1. Global Knowledge Store (GKS): This is our proprietary fuzzySerch™ store for global knowledge storage. If you'd like to provide everyone with some knowledge, insert here!
249
+ 2. Secure User Store (SUS): We securely store your personal docs in our very-secure quick in-memory RAG database, secured with our very own veri-veri (patent pending) HF-grade OAuth-based access control mechanism. :3
250
+ """
251
+ )
252
+
253
+ with gr.Tab("Chat"):
254
+ chatbot = gr.Chatbot(type="messages")
255
+ msg = gr.Textbox(placeholder="Ask me something...")
256
+
257
+ clear = gr.Button("Clear")
258
+
259
+ # Handle messages
260
+ msg.submit(respond, [msg, chatbot], chatbot).then(lambda: "", None, msg)
261
+
262
+ # Clear chat button
263
+ clear.click(lambda: None, None, chatbot)
264
+
265
+ with gr.Tab("Document Management"):
266
+ gr.Markdown("### Insert Documents into Database")
267
+ with gr.Row():
268
+ doc_text = gr.Textbox(
269
+ placeholder="Enter document text here...",
270
+ label="Document Text",
271
+ lines=4,
272
+ )
273
+ doc_type = gr.Radio(
274
+ ["Vector Database", "Special Documents"],
275
+ label="Insert into",
276
+ value="Vector Database",
277
+ )
278
+
279
+ insert_button = gr.Button("Insert Document")
280
+ insert_status = gr.Markdown()
281
+
282
+ # Handle document insertion
283
+ insert_button.click(
284
+ insert_document, inputs=[doc_text, doc_type], outputs=[insert_status]
285
+ )
286
+
287
+ # Update profile info on load and login changes
288
+ demo.load(get_user_info, outputs=[user_info])
289
+
290
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio==5.23.3