droushb commited on
Commit
80ff195
1 Parent(s): 8b52ce3

no message

Browse files
Files changed (2) hide show
  1. config.py +1 -1
  2. model/retriever.py +3 -0
config.py CHANGED
@@ -5,5 +5,5 @@ CONFIG = {
5
  "CHUNK_SIZE": 200,
6
  "OPENAI_ENGINE": "gpt-4o-mini",
7
  "MAX_TOKENS": 500,
8
- "TOP_DOCS": 3
9
  }
 
5
  "CHUNK_SIZE": 200,
6
  "OPENAI_ENGINE": "gpt-4o-mini",
7
  "MAX_TOKENS": 500,
8
+ "TOP_DOCS": 5
9
  }
model/retriever.py CHANGED
@@ -23,6 +23,9 @@ class Retriever:
23
 
24
  def compute_embeddings(self):
25
  self.model = SentenceTransformer('all-MiniLM-L6-v2')
 
 
 
26
  self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
27
 
28
  def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):
 
23
 
24
  def compute_embeddings(self):
25
  self.model = SentenceTransformer('all-MiniLM-L6-v2')
26
+ # tokenizer = self.model._first_module().tokenizer
27
+ # if tokenizer.pad_token is None:
28
+ # tokenizer.pad_token = tokenizer.eos_token
29
  self.chunk_embeddings = self.model.encode(self.corpus, convert_to_tensor=True)
30
 
31
  def chunk_text(self, text, chunk_size=CONFIG['CHUNK_SIZE']):