no message
Browse files- config.py +1 -1
- model/retriever.py +3 -0
config.py
CHANGED
@@ -5,5 +5,5 @@ CONFIG = {
|
|
5 |
"CHUNK_SIZE": 200,
|
6 |
"OPENAI_ENGINE": "gpt-4o-mini",
|
7 |
"MAX_TOKENS": 500,
|
8 |
-
"TOP_DOCS":
|
9 |
}
|
|
|
5 |
"CHUNK_SIZE": 200,
|
6 |
"OPENAI_ENGINE": "gpt-4o-mini",
|
7 |
"MAX_TOKENS": 500,
|
8 |
+
"TOP_DOCS": 5
|
9 |
}
|
model/retriever.py
CHANGED
@@ -23,6 +23,9 @@ class Retriever:
|
|
23 |
|
24 |
def compute_embeddings(self):
|
25 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
|
|
|
26 |
self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
|
27 |
|
28 |
def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
|
|
|
23 |
|
24 |
def compute_embeddings(self):
|
25 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
26 |
+
# tokenizer = self.model._first_module().tokenizer
|
27 |
+
# if tokenizer.pad_token is None:
|
28 |
+
# tokenizer.pad_token = tokenizer.eos_token
|
29 |
self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
|
30 |
|
31 |
def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
|