|
import faiss |
|
import pickle |
|
import os |
|
from PyPDF2 import PdfReader |
|
import glob |
|
from pathlib import Path |
|
import re |
|
import requests |
|
|
|
from langchain.chains import LLMChain |
|
from langchain.llms import OpenAI |
|
from langchain import PromptTemplate |
|
|
|
from langchain.vectorstores import FAISS |
|
from langchain.embeddings import OpenAIEmbeddings |
|
|
|
import dotenv |
|
dotenv.load_dotenv() |
|
|
|
|
|
class SemanticSearch(): |
|
def __init__( |
|
self, |
|
threshold: float, |
|
with_source=False, |
|
k=5, |
|
): |
|
self.threshold = threshold |
|
self.with_source = with_source |
|
self.k = k |
|
|
|
with open('./data/store.pkl', 'rb') as f: |
|
self.db = pickle.load(f) |
|
|
|
def __call__(self, query): |
|
|
|
documents = self.db.similarity_search_with_score(query, k=self.k) |
|
if len(documents) == 0: |
|
return None |
|
|
|
if not self.with_source: |
|
output = '\n\n\n'.join([i[0].page_content for i in documents]) |
|
else: |
|
output = '\n\n\n'.join([i[0].page_content + '\n\nSource:' + os.path.basename( |
|
str(i[0].metadata['source']) + '\n') for i in documents]) |
|
|
|
return output |
|
|
|
|
|
class ContentSearch(): |
|
def __init__( |
|
self, |
|
semantic_search, |
|
prompt_template, |
|
): |
|
self.semantic_search = semantic_search |
|
self.prompt_template = prompt_template |
|
|
|
def __call__(self, query): |
|
content = self.semantic_search(query) |
|
if content is None: |
|
return "No results found" |
|
else: |
|
return self.prompt_template.format(content=content) |
|
|