from tasks.base_task import BaseTask from utils.llama_index_utils import setup_directories from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, StorageContext class DataIngestionTask(BaseTask): def load_input(self, input_data): # No input data required; handles its own input (documents in the data directory) self.data_dir, self.persist_dir = setup_directories() def process(self): # Process the documents and store the index documents = SimpleDirectoryReader(self.data_dir).load_data() storage_context = StorageContext.from_defaults() self.index = VectorStoreIndex.from_documents(documents) self.index.storage_context.persist(persist_dir=self.persist_dir) def save_output(self, result): # No output to return for this task pass