import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline import spacy from typing import Iterable, Iterator from langchain.docstore.document import Document from langchain.text_splitter import SpacyTextSplitter class SpacySplitter: def __init__(self): self.splitter = SpacyTextSplitter(chunk_size=1000, pipeline="en_core_web_trf") def split_documents(self, docs: Iterable[Document]) -> Iterator[Document]: spacy.prefer_gpu(gpu_id=1) chunks = self.splitter.split_documents(docs) return chunks