Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
# -*- coding:utf-8 _*- | |
""" | |
@author:quincy qiang | |
@license: Apache Licence | |
@file: search.py | |
@time: 2023/04/17 | |
@contact: [email protected] | |
@software: PyCharm | |
@description: coding.. | |
""" | |
import os | |
from duckduckgo_search import ddg | |
from duckduckgo_search.utils import SESSION | |
from langchain.document_loaders import UnstructuredFileLoader | |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
class SourceService(object): | |
def __init__(self, config): | |
self.vector_store = None | |
self.config = config | |
self.embeddings = HuggingFaceEmbeddings(model_name=self.config.embedding_model_name) | |
self.docs_path = self.config.docs_path | |
self.vector_store_path = self.config.vector_store_path | |
def init_source_vector(self): | |
""" | |
初始化本地知识库向量 | |
:return: | |
""" | |
docs = [] | |
for doc in os.listdir(self.docs_path): | |
if doc.endswith('.txt'): | |
print(doc) | |
loader = UnstructuredFileLoader(f'{self.docs_path}/{doc}', mode="elements") | |
doc = loader.load() | |
docs.extend(doc) | |
self.vector_store = FAISS.from_documents(docs, self.embeddings) | |
self.vector_store.save_local(self.vector_store_path) | |
def add_document(self, document_path): | |
loader = UnstructuredFileLoader(document_path, mode="elements") | |
doc = loader.load() | |
self.vector_store.add_documents(doc) | |
self.vector_store.save_local(self.vector_store_path) | |
def load_vector_store(self, path): | |
if path is None: | |
self.vector_store = FAISS.load_local(self.vector_store_path, self.embeddings) | |
else: | |
self.vector_store = FAISS.load_local(path, self.embeddings) | |
return self.vector_store | |
def search_web(self, query): | |
SESSION.proxies = { | |
"http": f"socks5h://localhost:7890", | |
"https": f"socks5h://localhost:7890" | |
} | |
results = ddg(query) | |
web_content = '' | |
if results: | |
for result in results: | |
web_content += result['body'] | |
return web_content | |
# if __name__ == '__main__': | |
# config = LangChainCFG() | |
# source_service = SourceService(config) | |
# source_service.init_source_vector() | |
# search_result = source_service.vector_store.similarity_search_with_score('科比') | |
# print(search_result) | |
# | |
# source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/科比.txt') | |
# search_result = source_service.vector_store.similarity_search_with_score('科比') | |
# print(search_result) | |
# | |
# vector_store=source_service.load_vector_store() | |
# search_result = source_service.vector_store.similarity_search_with_score('科比') | |
# print(search_result) | |