Spaces:
Sleeping
Sleeping
Commit
·
3c7472d
0
Parent(s):
Initial commit with core files
Browse files- .gitignore +0 -0
- README.md +24 -0
- app_hf.py +68 -0
- queryrun.py +422 -0
- requirements_hf.txt +13 -0
.gitignore
ADDED
Binary file (186 Bytes). View file
|
|
README.md
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# RAG API with Hugging Face Spaces
|
2 |
+
|
3 |
+
This is a RAG (Retrieval-Augmented Generation) API deployed on Hugging Face Spaces.
|
4 |
+
|
5 |
+
## API Endpoints
|
6 |
+
|
7 |
+
### Query Endpoint
|
8 |
+
- URL: `https://[your-space-name].hf.space/api/query`
|
9 |
+
- Method: `POST`
|
10 |
+
- Body:
|
11 |
+
```json
|
12 |
+
{
|
13 |
+
"query": "Your question here"
|
14 |
+
}
|
15 |
+
```
|
16 |
+
|
17 |
+
## Local Development
|
18 |
+
1. Clone this repository
|
19 |
+
2. Install dependencies: `pip install -r requirements_hf.txt`
|
20 |
+
3. Set up environment variables in `.env`
|
21 |
+
4. Run: `python app_hf.py`
|
22 |
+
|
23 |
+
## Deployment
|
24 |
+
This application is deployed on Hugging Face Spaces.
|
app_hf.py
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from flask import Flask, request, jsonify
|
2 |
+
from flask_cors import CORS
|
3 |
+
import traceback
|
4 |
+
import os
|
5 |
+
from queryrun import FAISSQuerySystem
|
6 |
+
from huggingface_hub import HfFolder
|
7 |
+
|
8 |
+
# --- Flask App Setup ---
|
9 |
+
app = Flask(__name__)
|
10 |
+
CORS(app)
|
11 |
+
|
12 |
+
# --- Initialize the RAG system ---
|
13 |
+
query_system = None
|
14 |
+
try:
|
15 |
+
print("Initializing RAG system...")
|
16 |
+
query_system = FAISSQuerySystem()
|
17 |
+
print("RAG system ready for queries.")
|
18 |
+
except Exception as e:
|
19 |
+
print("--- APPLICATION FAILED TO START: RAG SYSTEM INITIALIZATION ERROR ---")
|
20 |
+
print(f"Error: {str(e)}")
|
21 |
+
|
22 |
+
@app.route('/')
|
23 |
+
def home():
|
24 |
+
return jsonify({
|
25 |
+
"status": "ok",
|
26 |
+
"message": "RAG API is running",
|
27 |
+
"endpoints": {
|
28 |
+
"/api/query": "POST - Send queries to the RAG system"
|
29 |
+
}
|
30 |
+
})
|
31 |
+
|
32 |
+
@app.route('/api/query', methods=['POST'])
|
33 |
+
def handle_query():
|
34 |
+
if query_system is None:
|
35 |
+
return jsonify({"error": "RAG system is not initialized"}), 500
|
36 |
+
|
37 |
+
try:
|
38 |
+
data = request.get_json()
|
39 |
+
if not data or 'query' not in data:
|
40 |
+
return jsonify({"error": "No query provided"}), 400
|
41 |
+
|
42 |
+
query = str(data['query'])
|
43 |
+
print(f"Received query: {query}")
|
44 |
+
|
45 |
+
search_results = query_system.search(query, k=5)
|
46 |
+
response_text = query_system.generate_response(query, search_results)
|
47 |
+
|
48 |
+
sources_for_response = [
|
49 |
+
{
|
50 |
+
"id": i + 1,
|
51 |
+
"score": round(doc['score'], 4),
|
52 |
+
"metadata": doc.get('metadata', {})
|
53 |
+
}
|
54 |
+
for i, doc in enumerate(search_results)
|
55 |
+
]
|
56 |
+
|
57 |
+
return jsonify({
|
58 |
+
"response": response_text,
|
59 |
+
"sources": sources_for_response
|
60 |
+
})
|
61 |
+
|
62 |
+
except Exception as e:
|
63 |
+
print(f"Error processing query: {str(e)}")
|
64 |
+
traceback.print_exc()
|
65 |
+
return jsonify({"error": "An internal error occurred"}), 500
|
66 |
+
|
67 |
+
if __name__ == '__main__':
|
68 |
+
app.run(host='0.0.0.0', port=7860) # Hugging Face Spaces uses port 7860
|
queryrun.py
ADDED
@@ -0,0 +1,422 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cohere
|
2 |
+
import numpy as np
|
3 |
+
import faiss
|
4 |
+
import pickle
|
5 |
+
import os
|
6 |
+
import traceback # Import traceback for detailed error printing
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from langchain_community.docstore.document import Document
|
9 |
+
# Corrected import based on the deprecation warning
|
10 |
+
from langchain_community.docstore.in_memory import InMemoryDocstore
|
11 |
+
from openai import OpenAI
|
12 |
+
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
15 |
+
cohere_api_key = os.getenv("COHEREAPIKEY")
|
16 |
+
api_key = os.getenv("DEEPKEY")
|
17 |
+
|
18 |
+
# Initialize OpenAI client with minimal configuration
|
19 |
+
client = OpenAI(
|
20 |
+
api_key=api_key,
|
21 |
+
base_url="https://api.deepseek.com/v1"
|
22 |
+
)
|
23 |
+
|
24 |
+
# Initialize Cohere client
|
25 |
+
if not cohere_api_key:
|
26 |
+
raise ValueError("COHERE_API_KEY not found in environment variables")
|
27 |
+
co = cohere.Client(cohere_api_key)
|
28 |
+
|
29 |
+
# --- Custom Cohere Embeddings Class (for query embedding) ---
|
30 |
+
class CohereEmbeddingsForQuery:
|
31 |
+
def __init__(self, client):
|
32 |
+
self.client = client
|
33 |
+
self.embed_dim = self._get_embed_dim()
|
34 |
+
|
35 |
+
def _get_embed_dim(self):
|
36 |
+
try:
|
37 |
+
response = self.client.embed(
|
38 |
+
texts=["test"], model="embed-english-v3.0", input_type="search_query"
|
39 |
+
)
|
40 |
+
return len(response.embeddings[0])
|
41 |
+
except Exception as e:
|
42 |
+
print(f"Warning: Could not determine embedding dimension automatically: {e}. Defaulting to 4096.")
|
43 |
+
return 4096
|
44 |
+
|
45 |
+
def embed_query(self, text):
|
46 |
+
try:
|
47 |
+
# Ensure text is properly encoded as a string
|
48 |
+
if not isinstance(text, str):
|
49 |
+
try:
|
50 |
+
text = str(text)
|
51 |
+
except UnicodeEncodeError:
|
52 |
+
# If there's an encoding error, try to normalize the text
|
53 |
+
import unicodedata
|
54 |
+
text = unicodedata.normalize('NFKD', str(text))
|
55 |
+
|
56 |
+
response = self.client.embed(
|
57 |
+
texts=[text],
|
58 |
+
model="embed-english-v3.0",
|
59 |
+
input_type="search_query"
|
60 |
+
)
|
61 |
+
if hasattr(response, 'embeddings') and len(response.embeddings) > 0:
|
62 |
+
return np.array(response.embeddings[0]).astype('float32')
|
63 |
+
else:
|
64 |
+
print("Warning: No query embedding found in the response. Returning zero vector.")
|
65 |
+
return np.zeros(self.embed_dim, dtype=np.float32)
|
66 |
+
except Exception as e:
|
67 |
+
print(f"Query embedding error: {e}")
|
68 |
+
return np.zeros(self.embed_dim, dtype=np.float32)
|
69 |
+
|
70 |
+
# --- FAISS Query System ---
|
71 |
+
class FAISSQuerySystem:
|
72 |
+
def __init__(self, persist_dir='docs/faiss/'):
|
73 |
+
self.persist_dir = persist_dir
|
74 |
+
self.index = None
|
75 |
+
self.documents = [] # List to hold LangChain Document objects
|
76 |
+
self.metadata_list = [] # List to hold metadata dictionaries
|
77 |
+
self.embedding_function = CohereEmbeddingsForQuery(co) # Use the query-specific class
|
78 |
+
self.load_index()
|
79 |
+
|
80 |
+
def stream_chat_completions(self, input_text):
|
81 |
+
# Ensure input_text is properly encoded as a string
|
82 |
+
if not isinstance(input_text, str):
|
83 |
+
try:
|
84 |
+
input_text = str(input_text)
|
85 |
+
except UnicodeEncodeError:
|
86 |
+
# If there's an encoding error, try to normalize the text
|
87 |
+
import unicodedata
|
88 |
+
input_text = unicodedata.normalize('NFKD', str(input_text))
|
89 |
+
|
90 |
+
response = client.chat.completions.create(
|
91 |
+
model="deepseek-chat",
|
92 |
+
messages=[
|
93 |
+
{"role": "system", "content": "Your job is to make text more appealing by adding emojis, formatting, and other enhancements. Do not include any awkward markup though."},
|
94 |
+
{"role": "user", "content": input_text},
|
95 |
+
],
|
96 |
+
stream=False
|
97 |
+
)
|
98 |
+
try:
|
99 |
+
resp = response.choices[0].message.content.split("\n---")[1]
|
100 |
+
except:
|
101 |
+
resp = response.choices[0].message.content
|
102 |
+
# Extracting just the core content without the extra sections
|
103 |
+
resp = resp.replace('**', '') # Remove bold formatting
|
104 |
+
resp = resp.replace('*', '')
|
105 |
+
return resp
|
106 |
+
|
107 |
+
|
108 |
+
def load_index(self):
|
109 |
+
"""Load the FAISS index and associated document/metadata files"""
|
110 |
+
faiss_index_path = os.path.join(self.persist_dir, "index.faiss")
|
111 |
+
pkl_path = os.path.join(self.persist_dir, "index.pkl")
|
112 |
+
metadata_path = os.path.join(self.persist_dir, "metadata.pkl")
|
113 |
+
|
114 |
+
print(f"Loading FAISS index from: {faiss_index_path}")
|
115 |
+
print(f"Loading docstore info from: {pkl_path}")
|
116 |
+
print(f"Loading separate metadata from: {metadata_path}")
|
117 |
+
|
118 |
+
if not os.path.exists(faiss_index_path) or not os.path.exists(pkl_path):
|
119 |
+
raise FileNotFoundError(f"Required index files (index.faiss, index.pkl) not found in {self.persist_dir}")
|
120 |
+
|
121 |
+
try:
|
122 |
+
# 1. Load FAISS index
|
123 |
+
self.index = faiss.read_index(faiss_index_path)
|
124 |
+
print(f"FAISS index loaded successfully with {self.index.ntotal} vectors.")
|
125 |
+
|
126 |
+
# 2. Load LangChain docstore pickle file
|
127 |
+
with open(pkl_path, 'rb') as f:
|
128 |
+
try:
|
129 |
+
docstore, index_to_docstore_id = pickle.load(f)
|
130 |
+
except (KeyError, AttributeError) as e:
|
131 |
+
print(f"Error loading pickle file: {str(e)}")
|
132 |
+
print("This might be due to a Pydantic version mismatch.")
|
133 |
+
print("Attempting to recreate the index...")
|
134 |
+
# Delete the incompatible files
|
135 |
+
if os.path.exists(faiss_index_path):
|
136 |
+
os.remove(faiss_index_path)
|
137 |
+
if os.path.exists(pkl_path):
|
138 |
+
os.remove(pkl_path)
|
139 |
+
if os.path.exists(metadata_path):
|
140 |
+
os.remove(metadata_path)
|
141 |
+
# Recreate the index
|
142 |
+
from test import main as recreate_index
|
143 |
+
recreate_index()
|
144 |
+
# Try loading again
|
145 |
+
with open(pkl_path, 'rb') as f:
|
146 |
+
docstore, index_to_docstore_id = pickle.load(f)
|
147 |
+
except UnicodeDecodeError:
|
148 |
+
print("Unicode decode error when loading pickle file. Attempting to handle special characters...")
|
149 |
+
# Try to handle the Unicode decode error
|
150 |
+
import codecs
|
151 |
+
with codecs.open(pkl_path, 'rb', encoding='utf-8', errors='replace') as f:
|
152 |
+
docstore, index_to_docstore_id = pickle.load(f)
|
153 |
+
|
154 |
+
# Verify the types after loading
|
155 |
+
print(f"Docstore object loaded. Type: {type(docstore)}")
|
156 |
+
print(f"Index-to-ID mapping loaded. Type: {type(index_to_docstore_id)}")
|
157 |
+
|
158 |
+
# Now this line should work
|
159 |
+
if isinstance(index_to_docstore_id, dict):
|
160 |
+
print(f"Mapping contains {len(index_to_docstore_id)} entries.")
|
161 |
+
else:
|
162 |
+
# This case should ideally not happen now, but good to have a check
|
163 |
+
raise TypeError(f"Expected index_to_docstore_id to be a dict, but got {type(index_to_docstore_id)}")
|
164 |
+
|
165 |
+
if not isinstance(docstore, InMemoryDocstore):
|
166 |
+
# Add a check for the docstore type too
|
167 |
+
print(f"Warning: Expected docstore to be InMemoryDocstore, but got {type(docstore)}")
|
168 |
+
|
169 |
+
|
170 |
+
# 3. Reconstruct the list of documents in FAISS index order
|
171 |
+
self.documents = []
|
172 |
+
num_vectors = self.index.ntotal
|
173 |
+
|
174 |
+
# Verify consistency
|
175 |
+
if num_vectors != len(index_to_docstore_id):
|
176 |
+
print(f"Warning: FAISS index size ({num_vectors}) does not match mapping size ({len(index_to_docstore_id)}). Reconstruction might be incomplete.")
|
177 |
+
|
178 |
+
print("Reconstructing document list...")
|
179 |
+
reconstructed_count = 0
|
180 |
+
missing_in_mapping = 0
|
181 |
+
missing_in_docstore = 0
|
182 |
+
# Ensure docstore has the 'search' method needed.
|
183 |
+
if not hasattr(docstore, 'search'):
|
184 |
+
raise AttributeError(f"Loaded docstore object (type: {type(docstore)}) does not have a 'search' method.")
|
185 |
+
|
186 |
+
for i in range(num_vectors):
|
187 |
+
docstore_id = index_to_docstore_id.get(i)
|
188 |
+
if docstore_id:
|
189 |
+
# Use the correct method for InMemoryDocstore to retrieve by ID
|
190 |
+
doc = docstore.search(docstore_id)
|
191 |
+
if doc:
|
192 |
+
self.documents.append(doc)
|
193 |
+
reconstructed_count += 1
|
194 |
+
else:
|
195 |
+
print(f"Warning: Document with ID '{docstore_id}' (for FAISS index {i}) not found in the loaded docstore.")
|
196 |
+
missing_in_docstore += 1
|
197 |
+
else:
|
198 |
+
print(f"Warning: No docstore ID found in mapping for FAISS index {i}.")
|
199 |
+
missing_in_mapping += 1
|
200 |
+
|
201 |
+
print(f"Successfully reconstructed {reconstructed_count} documents.")
|
202 |
+
if missing_in_mapping > 0: print(f"Could not find mapping for {missing_in_mapping} indices.")
|
203 |
+
if missing_in_docstore > 0: print(f"Could not find {missing_in_docstore} documents in docstore despite having mapping.")
|
204 |
+
|
205 |
+
|
206 |
+
# 4. Load the separate metadata list
|
207 |
+
if os.path.exists(metadata_path):
|
208 |
+
with open(metadata_path, 'rb') as f:
|
209 |
+
self.metadata_list = pickle.load(f)
|
210 |
+
print(f"Loaded separate metadata list with {len(self.metadata_list)} entries.")
|
211 |
+
|
212 |
+
if len(self.metadata_list) != len(self.documents):
|
213 |
+
print(f"Warning: Mismatch between reconstructed documents ({len(self.documents)}) and loaded metadata list ({len(self.metadata_list)}).")
|
214 |
+
print("Falling back to using metadata attached to Document objects if available.")
|
215 |
+
self.metadata_list = [getattr(doc, 'metadata', {}) for doc in self.documents]
|
216 |
+
elif not self.documents and self.metadata_list:
|
217 |
+
print("Warning: Loaded metadata but no documents were reconstructed. Discarding metadata.")
|
218 |
+
self.metadata_list = []
|
219 |
+
|
220 |
+
else:
|
221 |
+
print("Warning: Separate metadata file (metadata.pkl) not found.")
|
222 |
+
print("Attempting to use metadata attached to Document objects.")
|
223 |
+
self.metadata_list = [getattr(doc, 'metadata', {}) for doc in self.documents]
|
224 |
+
|
225 |
+
print(f"Final document count: {len(self.documents)}")
|
226 |
+
print(f"Final metadata count: {len(self.metadata_list)}")
|
227 |
+
|
228 |
+
except FileNotFoundError as e:
|
229 |
+
print(f"Error loading index files: {e}")
|
230 |
+
raise
|
231 |
+
except Exception as e:
|
232 |
+
print(f"An unexpected error occurred during index loading: {e}")
|
233 |
+
traceback.print_exc()
|
234 |
+
raise
|
235 |
+
|
236 |
+
def search(self, query, k=3):
|
237 |
+
"""Search the index and return relevant documents with metadata and scores"""
|
238 |
+
if not self.index or self.index.ntotal == 0:
|
239 |
+
print("Warning: FAISS index is not loaded or is empty.")
|
240 |
+
return []
|
241 |
+
if not self.documents:
|
242 |
+
print("Warning: No documents were successfully loaded.")
|
243 |
+
return []
|
244 |
+
|
245 |
+
actual_k = min(k, len(self.documents))
|
246 |
+
if actual_k == 0:
|
247 |
+
return []
|
248 |
+
|
249 |
+
# Ensure query is properly encoded as a string
|
250 |
+
if not isinstance(query, str):
|
251 |
+
try:
|
252 |
+
query = str(query)
|
253 |
+
except UnicodeEncodeError:
|
254 |
+
# If there's an encoding error, try to normalize the text
|
255 |
+
import unicodedata
|
256 |
+
query = unicodedata.normalize('NFKD', str(query))
|
257 |
+
|
258 |
+
query_embedding = self.embedding_function.embed_query(query)
|
259 |
+
if np.all(query_embedding == 0):
|
260 |
+
print("Warning: Query embedding failed, search may be ineffective.")
|
261 |
+
|
262 |
+
query_embedding_batch = np.array([query_embedding])
|
263 |
+
distances, indices = self.index.search(query_embedding_batch, actual_k)
|
264 |
+
results = []
|
265 |
+
retrieved_indices = indices[0]
|
266 |
+
|
267 |
+
for i, idx in enumerate(retrieved_indices):
|
268 |
+
if idx == -1:
|
269 |
+
continue
|
270 |
+
|
271 |
+
if idx < len(self.documents):
|
272 |
+
doc = self.documents[idx]
|
273 |
+
metadata = self.metadata_list[idx] if idx < len(self.metadata_list) else getattr(doc, 'metadata', {})
|
274 |
+
distance = distances[0][i]
|
275 |
+
similarity_score = 1.0 / (1.0 + distance) # Basic L2 -> Similarity
|
276 |
+
|
277 |
+
# Ensure content is properly encoded as a string
|
278 |
+
content = getattr(doc, 'page_content', str(doc))
|
279 |
+
if not isinstance(content, str):
|
280 |
+
try:
|
281 |
+
content = str(content)
|
282 |
+
except UnicodeEncodeError:
|
283 |
+
# If there's an encoding error, try to normalize the text
|
284 |
+
import unicodedata
|
285 |
+
content = unicodedata.normalize('NFKD', str(content))
|
286 |
+
|
287 |
+
results.append({
|
288 |
+
"content": content,
|
289 |
+
"metadata": metadata,
|
290 |
+
"score": float(similarity_score)
|
291 |
+
})
|
292 |
+
else:
|
293 |
+
print(f"Warning: Search returned index {idx} which is out of bounds for loaded documents ({len(self.documents)}).")
|
294 |
+
|
295 |
+
results.sort(key=lambda x: x['score'], reverse=True)
|
296 |
+
return results
|
297 |
+
|
298 |
+
def generate_response(self, query, context_docs):
|
299 |
+
"""Generate RAG response using Cohere's chat API"""
|
300 |
+
if not context_docs:
|
301 |
+
print("No context documents provided to generate_response.")
|
302 |
+
try:
|
303 |
+
response = co.chat(
|
304 |
+
message=f"I could not find relevant documents in my knowledge base to answer your question: '{query}'. Please try rephrasing or asking about topics covered in the source material.",
|
305 |
+
model="command-r-plus",
|
306 |
+
temperature=0.3,
|
307 |
+
preamble="You are an AI assistant explaining limitations."
|
308 |
+
)
|
309 |
+
return response.text
|
310 |
+
except Exception as e:
|
311 |
+
print(f"Error calling Cohere even without documents: {e}")
|
312 |
+
return "I could not find relevant documents and encountered an error trying to respond."
|
313 |
+
|
314 |
+
formatted_docs = []
|
315 |
+
# Process documents in batches to reduce memory usage
|
316 |
+
batch_size = 3
|
317 |
+
for i in range(0, len(context_docs), batch_size):
|
318 |
+
batch_end = min(i + batch_size, len(context_docs))
|
319 |
+
for j in range(i, batch_end):
|
320 |
+
doc = context_docs[j]
|
321 |
+
# Ensure content is properly encoded as a string
|
322 |
+
content = doc['content']
|
323 |
+
if not isinstance(content, str):
|
324 |
+
try:
|
325 |
+
content = str(content)
|
326 |
+
except UnicodeEncodeError:
|
327 |
+
# If there's an encoding error, try to normalize the text
|
328 |
+
import unicodedata
|
329 |
+
content = unicodedata.normalize('NFKD', str(content))
|
330 |
+
|
331 |
+
content_preview = content[:3000]
|
332 |
+
doc_info = f"Source: {doc['metadata'].get('source', 'Unknown')}\n"
|
333 |
+
doc_info += f"Type: {doc['metadata'].get('type', 'Unknown')}\n"
|
334 |
+
doc_info += f"Content Snippet: {content_preview}"
|
335 |
+
formatted_docs.append({"title": f"Document {j+1} (Source: {doc['metadata'].get('source', 'Unknown')})", "snippet": doc_info})
|
336 |
+
|
337 |
+
# Force garbage collection after each batch
|
338 |
+
import gc
|
339 |
+
gc.collect()
|
340 |
+
|
341 |
+
try:
|
342 |
+
response = co.chat(
|
343 |
+
message=query,
|
344 |
+
documents=formatted_docs,
|
345 |
+
model="command-r-plus",
|
346 |
+
temperature=0.3,
|
347 |
+
prompt_truncation='AUTO',
|
348 |
+
preamble="You are an expert AI assistant. Answer the user's question based *only* on the provided document snippets. Cite the source document number (e.g., [Document 1]) when using information from it. If the answer isn't in the documents, state that clearly."
|
349 |
+
)
|
350 |
+
return self.stream_chat_completions(response.text)
|
351 |
+
except Exception as e:
|
352 |
+
print(f"Error during Cohere chat API call: {e}")
|
353 |
+
traceback.print_exc()
|
354 |
+
return "Sorry, I encountered an error while trying to generate a response using the retrieved documents."
|
355 |
+
|
356 |
+
def main():
|
357 |
+
try:
|
358 |
+
# Initialize query system
|
359 |
+
query_system = FAISSQuerySystem() # Defaults to 'docs/faiss/'
|
360 |
+
|
361 |
+
# Interactive query loop
|
362 |
+
print("\n--- FAISS RAG Query System ---")
|
363 |
+
print("Ask questions about the content indexed from web, PDFs, and audio.")
|
364 |
+
print("Type 'exit' or 'quit' to stop.")
|
365 |
+
|
366 |
+
while True:
|
367 |
+
query = input("\nYour question: ")
|
368 |
+
if query.lower() in ('exit', 'quit'):
|
369 |
+
print("Exiting...")
|
370 |
+
break
|
371 |
+
if not query:
|
372 |
+
continue
|
373 |
+
|
374 |
+
try:
|
375 |
+
# 1. Search for relevant documents
|
376 |
+
print("Searching for relevant documents...")
|
377 |
+
docs = query_system.search(query, k=5) # Get top 5 results
|
378 |
+
|
379 |
+
if not docs:
|
380 |
+
print("Could not find relevant documents in the knowledge base.")
|
381 |
+
response = query_system.generate_response(query, [])
|
382 |
+
print("\nResponse:")
|
383 |
+
print("-" * 50)
|
384 |
+
print(response)
|
385 |
+
print("-" * 50)
|
386 |
+
continue
|
387 |
+
|
388 |
+
print(f"Found {len(docs)} relevant document chunks.")
|
389 |
+
|
390 |
+
# 2. Generate and display response using RAG
|
391 |
+
print("Generating response based on documents...")
|
392 |
+
response = query_system.generate_response(query, docs)
|
393 |
+
print("\nResponse:")
|
394 |
+
print("-" * 50)
|
395 |
+
print(response)
|
396 |
+
print("-" * 50)
|
397 |
+
|
398 |
+
# 3. Show sources (optional)
|
399 |
+
print("\nRetrieved Sources (Snippets):")
|
400 |
+
for i, doc in enumerate(docs, 1):
|
401 |
+
print(f"\n--- Source {i} ---")
|
402 |
+
print(f" Score: {doc['score']:.4f}")
|
403 |
+
print(f" Source File: {doc['metadata'].get('source', 'Unknown')}")
|
404 |
+
print(f" Type: {doc['metadata'].get('type', 'Unknown')}")
|
405 |
+
if 'page' in doc['metadata']:
|
406 |
+
print(f" Page (PDF): {doc['metadata']['page']}")
|
407 |
+
print(f" Content: {doc['content'][:250]}...")
|
408 |
+
|
409 |
+
except Exception as e:
|
410 |
+
print(f"\nAn error occurred while processing your query: {e}")
|
411 |
+
traceback.print_exc()
|
412 |
+
|
413 |
+
except FileNotFoundError as e:
|
414 |
+
print(f"\nInitialization Error: Could not find necessary index files.")
|
415 |
+
print(f"Details: {e}")
|
416 |
+
print("Please ensure you have run the indexing script first and the 'docs/faiss/' directory contains 'index.faiss' and 'index.pkl'.")
|
417 |
+
except Exception as e:
|
418 |
+
print(f"\nA critical initialization error occurred: {e}")
|
419 |
+
traceback.print_exc()
|
420 |
+
|
421 |
+
if __name__ == "__main__":
|
422 |
+
main()
|
requirements_hf.txt
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
flask==3.0.2
|
2 |
+
flask-cors==4.0.0
|
3 |
+
cohere==4.48
|
4 |
+
faiss-cpu==1.7.4
|
5 |
+
numpy==1.26.4
|
6 |
+
python-dotenv==1.0.1
|
7 |
+
openai==1.1.1
|
8 |
+
httpx==0.24.1
|
9 |
+
langchain-community==0.3.14
|
10 |
+
pydantic>=2.0.0
|
11 |
+
faster-whisper==1.1.1
|
12 |
+
langchain==0.3.14
|
13 |
+
huggingface-hub>=0.19.0
|