Spaces:
Runtime error
Runtime error
#!/usr/bin/env python | |
# -*- coding:utf-8 _*- | |
""" | |
@author:quincy qiang | |
@license: Apache Licence | |
@file: create_knowledge.py | |
@time: 2023/04/18 | |
@contact: [email protected] | |
@software: PyCharm | |
@description: coding.. | |
""" | |
from langchain.docstore.document import Document | |
from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from tqdm import tqdm | |
# 中文Wikipedia数据导入示例: | |
embedding_model_name = '/home/searchgpt/pretrained_models/ernie-gram-zh' | |
docs_path = '/home/searchgpt/yq/Knowledge-ChatGLM/docs' | |
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name) | |
docs = [] | |
with open('docs/zh_wikipedia/zhwiki.sim.utf8', 'r', encoding='utf-8') as f: | |
for idx, line in tqdm(enumerate(f.readlines())): | |
metadata = {"source": f'doc_id_{idx}'} | |
docs.append(Document(page_content=line.strip(), metadata=metadata)) | |
vector_store = FAISS.from_documents(docs, embeddings) | |
vector_store.save_local('cache/zh_wikipedia/') | |