Spaces:
Runtime error
Runtime error
File size: 2,964 Bytes
96cd96f 3c24b5a 96cd96f a10fbed 96cd96f a10fbed 96cd96f 3c24b5a 2a7b7b1 65d97b1 96cd96f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
#!/usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author:quincy qiang
@license: Apache Licence
@file: search.py
@time: 2023/04/17
@contact: [email protected]
@software: PyCharm
@description: coding..
"""
import os
from duckduckgo_search import ddg
from langchain.document_loaders import UnstructuredFileLoader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
class SourceService(object):
def __init__(self, config):
self.vector_store = None
self.config = config
self.embeddings = HuggingFaceEmbeddings(model_name=self.config.embedding_model_name)
self.docs_path = self.config.docs_path
self.vector_store_path = self.config.vector_store_path
def init_source_vector(self):
"""
初始化本地知识库向量
:return:
"""
docs = []
for doc in os.listdir(self.docs_path):
if doc.endswith('.txt'):
print(doc)
loader = UnstructuredFileLoader(f'{self.docs_path}/{doc}', mode="elements")
doc = loader.load()
docs.extend(doc)
self.vector_store = FAISS.from_documents(docs, self.embeddings)
self.vector_store.save_local(self.vector_store_path)
def add_document(self, document_path):
loader = UnstructuredFileLoader(document_path, mode="elements")
doc = loader.load()
self.vector_store.add_documents(doc)
self.vector_store.save_local(self.vector_store_path)
def load_vector_store(self, path):
if path is None:
self.vector_store = FAISS.load_local(self.vector_store_path, self.embeddings)
else:
self.vector_store = FAISS.load_local(path, self.embeddings)
return self.vector_store
def search_web(self, query):
# SESSION.proxies = {
# "http": f"socks5h://localhost:7890",
# "https": f"socks5h://localhost:7890"
# }
try:
results = ddg(query)
web_content = ''
if results:
for result in results:
web_content += result['body']
return web_content
except Exception as e:
print(f"网络检索异常:{query}")
return ''
# if __name__ == '__main__':
# config = LangChainCFG()
# source_service = SourceService(config)
# source_service.init_source_vector()
# search_result = source_service.vector_store.similarity_search_with_score('科比')
# print(search_result)
#
# source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/科比.txt')
# search_result = source_service.vector_store.similarity_search_with_score('科比')
# print(search_result)
#
# vector_store=source_service.load_vector_store()
# search_result = source_service.vector_store.similarity_search_with_score('科比')
# print(search_result)
|