import pickle | |
from langchain.schema import Document | |
from autorag.data.corpus import langchain_documents_to_parquet | |
with open("/Users/anpigon/Documents/Embed/αα ₯αΈαα ―α«αα ‘α«α α ¨/cases.pkl", "rb") as file: | |
data = pickle.load(file) | |
print(len(data)) # μ΄ 2736κ°μ λ°°μΉ | |
docs = [] | |
for i in range(100): | |
for sentence in data[i][1]: | |
print(sentence) | |
doc = Document(page_content=sentence) | |
docs.append(doc) | |
langchain_documents_to_parquet(docs, "evaluation/data/corpus.parquet") | |