Spaces:
Runtime error
Runtime error
File size: 1,002 Bytes
bd111f7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
#!/usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author:quincy qiang
@license: Apache Licence
@file: create_knowledge.py
@time: 2023/04/18
@contact: [email protected]
@software: PyCharm
@description: coding..
"""
from langchain.docstore.document import Document
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from tqdm import tqdm
# 中文Wikipedia数据导入示例:
embedding_model_name = '/home/searchgpt/pretrained_models/ernie-gram-zh'
docs_path = '/home/searchgpt/yq/Knowledge-ChatGLM/docs'
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)
docs = []
with open('docs/zh_wikipedia/zhwiki.sim.utf8', 'r', encoding='utf-8') as f:
for idx, line in tqdm(enumerate(f.readlines())):
metadata = {"source": f'doc_id_{idx}'}
docs.append(Document(page_content=line.strip(), metadata=metadata))
vector_store = FAISS.from_documents(docs, embeddings)
vector_store.save_local('cache/zh_wikipedia/')
|