unhcr / tools.py
elyxlz
update prompt
15524d6
import faiss
import pickle
import os
from PyPDF2 import PdfReader
import glob
from pathlib import Path
import re
import requests
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain import PromptTemplate
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
import dotenv
dotenv.load_dotenv()
class SemanticSearch():
def __init__(
self,
threshold: float,
with_source=False,
k=5,
):
self.threshold = threshold
self.with_source = with_source
self.k = k
with open('./data/store.pkl', 'rb') as f:
self.db = pickle.load(f)
def __call__(self, query):
documents = self.db.similarity_search_with_score(query, k=self.k)
if len(documents) == 0:
return None
if not self.with_source:
output = '\n\n\n'.join([i[0].page_content for i in documents])
else:
output = '\n\n\n'.join([i[0].page_content + '\n\nSource:' + os.path.basename(
str(i[0].metadata['source']) + '\n') for i in documents])
return output
class ContentSearch():
def __init__(
self,
semantic_search,
prompt_template,
):
self.semantic_search = semantic_search
self.prompt_template = prompt_template
def __call__(self, query):
content = self.semantic_search(query)
if content is None:
return "No results found"
else:
return self.prompt_template.format(content=content)