Spaces:
Runtime error
Runtime error
| from sklearn.datasets import fetch_20newsgroups | |
| class DocumentRetriever: | |
| def __init__(self): | |
| self.documents = [] | |
| def load_documents(self, subset_size=500): | |
| """Load a subset of 20 Newsgroups dataset.""" | |
| newsgroups_data = fetch_20newsgroups(subset='all') | |
| self.documents = newsgroups_data.data[:subset_size] # Load only the first `subset_size` documents | |
| print(f"Loaded {len(self.documents)} documents.") | |
| def retrieve(self, query): | |
| """Retrieve documents related to the query.""" | |
| if not self.documents: | |
| return ["Document retrieval is not initialized."] | |
| return [doc for doc in self.documents if query.lower() in doc.lower()] | |