Spaces:

Sunbird
/

acres

Sleeping

App Files Files Community

Patrick Walukagga commited on Sep 11, 2024

Commit

7d19cfc

1 Parent(s): 59d3a91

Adding zotero manager

Browse files

Files changed (8) hide show

app.py +39 -1
config.py +4 -5
data/zotero-collection-pastan_zotero_items.json +0 -0
requirements.txt +5 -1
study_files.json +6 -0
utils/helpers.py +88 -0
utils/prompts.py +7 -2
utils/zotero_manager.py +644 -0

app.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import json
 from typing import List, Tuple
 import gradio as gr
 from config import STUDY_FILES
 from rag.rag_pipeline import RAGPipeline
-from utils.helpers import generate_follow_up_questions
 from utils.prompts import (
     highlight_prompt,
     evidence_based_prompt,
@@ -14,12 +17,45 @@ from utils.prompts import (
 import openai
 from config import STUDY_FILES, OPENAI_API_KEY
 openai.api_key = OPENAI_API_KEY
 # Cache for RAG pipelines
 rag_cache = {}
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Get or create a RAGPipeline instance for the given study."""
@@ -66,6 +102,8 @@ def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.upd
     study_info = get_study_info(study_name)
     questions = sample_questions.get(study_name, [])[:3]
     visible_questions = [gr.update(visible=True, value=q) for q in questions]
     hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
     return (study_info, *visible_questions, *hidden_questions)

 import json
 from typing import List, Tuple
+import os
 import gradio as gr
+from dotenv import load_dotenv
+from slugify import slugify
 from config import STUDY_FILES
 from rag.rag_pipeline import RAGPipeline
+from utils.helpers import generate_follow_up_questions, append_to_study_files
 from utils.prompts import (
     highlight_prompt,
     evidence_based_prompt,
 import openai
 from config import STUDY_FILES, OPENAI_API_KEY
+from utils.zotero_manager import ZoteroManager
+load_dotenv()
 openai.api_key = OPENAI_API_KEY
 # Cache for RAG pipelines
 rag_cache = {}
+zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
+zotero_library_type = "user"  # or "group"
+zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
+zotero_manager = ZoteroManager(
+    zotero_library_id, zotero_library_type, zotero_api_access_key
+)
+zotero_collections = zotero_manager.get_collections()
+zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
+filtered_zotero_collection_lists = (
+    zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
+)
+for collection in filtered_zotero_collection_lists:
+    collection_name = collection.get("name")
+    if collection_name not in STUDY_FILES:
+        collection_key = collection.get("key")
+        collection_items = zotero_manager.get_collection_items(collection_key)
+        zotero_collection_items = (
+            zotero_manager.get_collection_zotero_items_by_key(collection_key)
+        )
+        #### Export zotero collection items to json ####
+        zotero_items_json = zotero_manager.zotero_items_to_json(zotero_collection_items)
+        export_file = f"{slugify(collection_name)}_zotero_items.json"
+        zotero_manager.write_zotero_items_to_json_file(
+            zotero_items_json, f"data/{export_file}"
+        )
+        append_to_study_files("study_files.json", collection_name, f"data/{export_file}")
 def get_rag_pipeline(study_name: str) -> RAGPipeline:
     """Get or create a RAGPipeline instance for the given study."""
     study_info = get_study_info(study_name)
     questions = sample_questions.get(study_name, [])[:3]
+    if not questions:
+        questions = sample_questions.get("General", [])[:3]
     visible_questions = [gr.update(visible=True, value=q) for q in questions]
     hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
     return (study_info, *visible_questions, *hidden_questions)

config.py CHANGED Viewed

@@ -2,12 +2,11 @@ import os
 from dotenv import load_dotenv
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
-STUDY_FILES = {
-    "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
-    "Ebola Virus": "data/ebola_virus_zotero_items.json",
-    "Gene Xpert": "data/gene_xpert_zotero_items.json",
-}

 from dotenv import load_dotenv
+from utils.helpers import read_study_files
 load_dotenv()
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
+STUDY_FILES = read_study_files(("study_files.json"))

data/zotero-collection-pastan_zotero_items.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,7 +1,11 @@
 fastapi==0.112.2
 gradio
 llama-index
 openai
 pandas
 pydantic
-python-dotenv

+chromadb==0.5.5
 fastapi==0.112.2
 gradio
 llama-index
+nest-asyncio==1.6.0
 openai
 pandas
 pydantic
+python-dotenv
+pyzotero
+python-slugify

study_files.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
+    "Ebola Virus": "data/ebola_virus_zotero_items.json",
+    "GeneXpert": "data/gene_xpert_zotero_items.json",
+    "Zotero Collection Pastan": "data/zotero-collection-pastan_zotero_items.json"
+}

utils/helpers.py CHANGED Viewed

@@ -7,6 +7,94 @@ from utils.prompts import (
     VaccineCoverageVariables,
     StudyCharacteristics,
 )
 def generate_follow_up_questions(

     VaccineCoverageVariables,
     StudyCharacteristics,
 )
+import json
+def read_study_files(file_path):
+    """
+    Reads a JSON file and returns the parsed JSON data.
+    Args:
+        file_path (str): The path to the JSON file to be read.
+    Returns:
+        dict: The data from the JSON file as a Python dictionary.
+    Raises:
+        FileNotFoundError: If the file is not found at the provided path.
+        json.JSONDecodeError: If the file contents are not valid JSON.
+    Example:
+        Given a JSON file 'study_files.json' with content like:
+        {
+            "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
+            "Ebola Virus": "data/ebola_virus_zotero_items.json",
+            "Gene Xpert": "data/gene_xpert_zotero_items.json"
+        }
+        Calling `read_json_file("study_files.json")` will return:
+        {
+            "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
+            "Ebola Virus": "data/ebola_virus_zotero_items.json",
+            "Gene Xpert": "data/gene_xpert_zotero_items.json"
+        }
+    """
+    try:
+        with open(file_path, 'r') as file:
+            data = json.load(file)
+        return data
+    except FileNotFoundError as e:
+        raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
+    except json.JSONDecodeError as e:
+        raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
+def append_to_study_files(file_path, new_key, new_value):
+    """
+    Appends a new key-value entry to an existing JSON file.
+    Args:
+        file_path (str): The path to the JSON file.
+        new_key (str): The new key to add to the JSON file.
+        new_value (any): The value associated with the new key (can be any valid JSON data type).
+    Raises:
+        FileNotFoundError: If the file is not found at the provided path.
+        json.JSONDecodeError: If the file contents are not valid JSON.
+        IOError: If the file cannot be written.
+    Example:
+        If the file 'study_files.json' initially contains:
+        {
+            "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
+            "Ebola Virus": "data/ebola_virus_zotero_items.json"
+        }
+        Calling `append_to_json_file("study_files.json", "Gene Xpert", "data/gene_xpert_zotero_items.json")`
+        will modify the file to:
+        {
+            "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
+            "Ebola Virus": "data/ebola_virus_zotero_items.json",
+            "Gene Xpert": "data/gene_xpert_zotero_items.json"
+        }
+    """
+    try:
+        # Read the existing data from the file
+        with open(file_path, 'r') as file:
+            data = json.load(file)
+        # Append the new key-value pair to the dictionary
+        data[new_key] = new_value
+        # Write the updated data back to the file
+        with open(file_path, 'w') as file:
+            json.dump(data, file, indent=4)  # indent for pretty printing
+    except FileNotFoundError as e:
+        raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
+    except json.JSONDecodeError as e:
+        raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
+    except IOError as e:
+        raise IOError(f"Failed to write to the file at {file_path}.") from e
 def generate_follow_up_questions(

utils/prompts.py CHANGED Viewed

@@ -71,7 +71,7 @@ vaccine_coverage_prompt = PromptTemplate(
 )
 sample_questions = {
-    "Vaccine Coverage": [
         "What are the vaccine coverage rates reported in the study?",
         "Are there any reported adverse events following immunization (AEFI)?",
         "How does the study account for different vaccine types or schedules?",
@@ -84,12 +84,17 @@ sample_questions = {
         "Were there any ethical considerations or challenges reported?",
         "Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
     ],
-    "Gene Xpert": [
         "What is the main objective of the study?",
         "What is the study design?",
         "What disease condition is being studied?",
         "Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
     ],
 }

 )
 sample_questions = {
+    "Vaccine coverage": [
         "What are the vaccine coverage rates reported in the study?",
         "Are there any reported adverse events following immunization (AEFI)?",
         "How does the study account for different vaccine types or schedules?",
         "Were there any ethical considerations or challenges reported?",
         "Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
     ],
+    "GeneXpert": [
         "What is the main objective of the study?",
         "What is the study design?",
         "What disease condition is being studied?",
         "Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
     ],
+    "General": [
+        "What is the main objective of the study?",
+        "What is the study design?",
+        "Extract and present in a tabular format the following variables for each study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, OBJECTIVE, and STUDY_COMMENTS.",
+    ],
 }

utils/zotero_manager.py ADDED Viewed

	@@ -0,0 +1,644 @@

+import json
+import os
+from typing import Any, Dict, List, Optional
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+from pyzotero import zotero
+from slugify import slugify
+load_dotenv()
+class ZoteroItem(BaseModel):
+    """
+    Represents metadata about a Zotero item.
+    """
+    key: str = Field(..., description="Unique key of the item")
+    title: str = Field(..., description="Title of the item")
+    abstract: Optional[str] = Field(None, description="Abstract or note of the item")
+    full_text: Optional[str] = Field(None, description="Full text of the item")
+    authors: Optional[List[str]] = Field(
+        None, description="List of authors"
+    )  # Make optional
+    doi: Optional[str] = Field(None, description="Digital Object Identifier (DOI)")
+    year: Optional[int] = Field(None, description="Publication year")
+    item_type: Optional[str] = Field(
+        None, description="Type of the item (e.g., journalArticle)"
+    )  # Make optional
+    url: Optional[str] = Field(None, description="URL of the item")
+class ZoteroCollection(BaseModel):
+    """
+    Represents a Zotero collection with metadata.
+    """
+    key: str = Field(..., description="Unique identifier for the collection.")
+    name: str = Field(..., description="Name of the collection.")
+    number_of_items: int = Field(
+        ..., description="Number of items contained in the collection."
+    )
+class ZoteroManager:
+    '''
+    #### Example Usage ####
+    zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
+    zotero_library_type = "user"  # or "group"
+    zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
+    zotero_manager = ZoteroManager(zotero_library_id, zotero_library_type, zotero_api_access_key)
+    #### GET Zotero topics (Collections) ####
+    zotero_collections = zotero_manager.get_collections()
+    # print(zotero_collections)
+    #### Zotero collections parsed with pydantic ####
+    zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
+    # print(zotero_collection_lists)
+    """
+    [
+        ZoteroCollection(key='IXU5ZWRM', name='RR 10', number_of_items=0),
+        ZoteroCollection(key='G6AZZGPQ', name='RR 9', number_of_items=0),
+        ZoteroCollection(key='DZ45SJHF', name='RR 8', number_of_items=0),
+        ZoteroCollection(key='DM5FVG74', name='RR 7', number_of_items=0),
+        ZoteroCollection(key='43N5CI48', name='RR 6', number_of_items=0),
+        ZoteroCollection(key='2TCX6JC2', name='RR 5', number_of_items=0),
+        ZoteroCollection(key='QVSNAJWV', name='RR 4', number_of_items=0),
+        ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17),
+        ZoteroCollection(key='UB7AEMB6', name='GeneXpert', number_of_items=31),
+        ZoteroCollection(key='UDQ9JSD9', name='Vaccine coverage', number_of_items=22),
+        ZoteroCollection(key='SGNLNIAT', name='Zotero Collection Pastan', number_of_items=227)
+    ]
+    """
+    #### Collections with items ####
+    filtered_zotero_collection_lists = zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
+    # print(filtered_zotero_collection_lists)
+    """
+    [
+        {'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17},
+        {'key': 'UB7AEMB6', 'name': 'GeneXpert', 'number_of_items': 31},
+        {'key': 'UDQ9JSD9', 'name': 'Vaccine coverage', 'number_of_items': 22},
+        {'key': 'SGNLNIAT',
+        'name': 'Zotero Collection Pastan',
+        'number_of_items': 227}
+    ]
+    """
+    #### Collection by name from a list of zotero collections
+    ebola_virus_collection = zotero_manager.find_zotero_collection_by_name(zotero_collection_lists, "Ebola Virus")
+    # print(ebola_virus_collection)
+    """ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17)"""
+    # print(ebola_virus_collection.model_dump())
+    """{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17}"""
+    #### Get single collection by key ####
+    ebola_virus_collection_key = "96UJANPP" # Ebola Virus
+    ebola_virus_collection = zotero_manager.get_collection_by_key(ebola_virus_collection_key)
+    # print(ebola_virus_collection)
+    """
+    {
+        'key': '96UJANPP',
+        'version': 72,
+        'library': {'type': 'user',
+        'id': 11201324,
+        'name': 'pjlus',
+        'links': {'alternate': {'href': 'https://www.zotero.org/pjlus',
+            'type': 'text/html'}}},
+        'links': {'self': {'href': 'https://api.zotero.org/users/11201324/collections/96UJANPP',
+        'type': 'application/json'},
+        'alternate': {'href': 'https://www.zotero.org/pjlus/collections/96UJANPP',
+        'type': 'text/html'}},
+        'meta': {'numCollections': 0, 'numItems': 17},
+        'data': {'key': '96UJANPP',
+        'version': 72,
+        'name': 'Ebola Virus',
+        'parentCollection': False,
+        'relations': {}}
+    }
+    """
+    #### Get collection items by collection key ####
+    ebora_virus_collection_items = zotero_manager.get_collection_items(ebola_virus_collection_key)
+    print(len(ebora_virus_collection_items))
+    # print(ebora_virus_collection_items[:2])
+    #### Getting zotero collection items and full text
+    # Here the collections have been parsed using the zotero item pydantic model defined in the zotero manager.
+    ####
+    ebora_virus_zotero_collection_items = zotero_manager.get_collection_zotero_items_by_key(ebola_virus_collection_key)
+    # print(len(ebora_virus_zotero_collection_items))
+    # print(ebora_virus_zotero_collection_items[0])
+    #### Get item children (attachments)
+    # Listed items in zotero are items together with their attachments (pdf content)
+    ####
+    zotero_manager.get_item_children("2Q7HFERL")
+    #### Get an item full text ####
+    zotero_manager.get_item_full_text("BMYMEW76")["content"]
+    #### Save the item pdf content to disc ####
+    ## Function to save a pdf file
+    zotero_manager.save_item_file("BMYMEW76")
+    #### Export zotero collection items to json ####
+    ebora_virus_zotero_items_json = zotero_manager.zotero_items_to_json(ebora_virus_zotero_collection_items)
+    print(len(ebora_virus_zotero_items_json))
+    # print(ebora_virus_zotero_items_json[0])
+    ## Save to disc
+    zotero_manager.write_zotero_items_to_json_file(ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json")
+    '''
+    def __init__(self, library_id: str, library_type: str, api_key: str):
+        self.zot = zotero.Zotero(library_id, library_type, api_key)
+    def create_zotero_item_from_json(self, json_obj: Dict[str, Any]) -> ZoteroItem:
+        """
+        Creates a ZoteroItem instance from a JSON object.
+        Args:
+            json_obj (Dict[str, Any]): A JSON object containing the Zotero item data.
+                The JSON structure is expected to have a 'data' field which includes
+                the metadata for the Zotero item.
+        Returns:
+            ZoteroItem: An instance of ZoteroItem populated with the data extracted
+                from the JSON object. The fields include key, title, abstract, authors,
+                doi, year, item_type, and url.
+        """
+        data = json_obj.get("data", {})
+        # Extract item full text from it's attachement
+        key = data.get("key")
+        full_text = self.get_full_text_from_children(key)
+        # Extract the list of authors
+        authors = [
+            f"{creator.get('name', '')} {creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
+            for creator in data.get("creators", [])
+            if creator.get("creatorType") == "author"
+        ]
+        # Create the ZoteroItem instance
+        zotero_item = ZoteroItem(
+            key=data.get("key"),
+            title=data.get("title"),
+            abstract=data.get("abstractNote"),
+            full_text=full_text,
+            authors=authors,
+            doi=data.get("DOI"),
+            # year=int(data.get('date', '').split('-')[0]) if data.get('date') else None,
+            item_type=data.get("itemType"),
+            url=data.get("url"),
+        )
+        return zotero_item
+    def create_zotero_collection(
+        self, collection_dict: Dict[str, Any]
+    ) -> ZoteroCollection:
+        """
+        Converts a dictionary representing a Zotero collection into a ZoteroCollection instance.
+        Args:
+            collection_dict (Dict[str, Any]): A dictionary containing data for a Zotero collection.
+                The expected structure includes keys 'data' and 'meta' from which relevant fields
+                such as 'key', 'name', and 'numItems' are extracted.
+        Returns:
+            ZoteroCollection: An instance of ZoteroCollection populated with the data extracted
+                from the input dictionary.
+        """
+        data = collection_dict.get("data", {})
+        meta = collection_dict.get("meta", {})
+        zotero_collection = ZoteroCollection(
+            key=data.get("key"),
+            name=data.get("name"),
+            number_of_items=meta.get("numItems", 0),
+        )
+        return zotero_collection
+    def list_zotero_collections(
+        self, collection_items: List[Dict[str, Any]]
+    ) -> List[ZoteroCollection]:
+        """
+        Converts a list of dictionaries representing Zotero collections into a list of ZoteroCollection instances.
+        Args:
+            collection_items (List[Dict[str, Any]]): A list of collection items, each containing data for a Zotero collection.
+                Each dictionary is expected to have a 'data' key with nested 'key' and 'name' fields, and a 'meta' key
+                with a 'numItems' field.
+        Returns:
+            List[ZoteroCollection]: A list of ZoteroCollection instances populated with the data extracted
+                from the input dictionaries.
+        """
+        collections = [
+            self.create_zotero_collection(collection_item)
+            for collection_item in collection_items
+        ]
+        return collections
+    def list_all_papers(self) -> List[ZoteroItem]:
+        """
+        Lists all papers (journal articles) in your Zotero library.
+        Returns:
+            List of ZoteroItem objects representing the papers in your library.
+        """
+        # print(self.zot.items())
+        results = self.zot.items(itemType="journalArticle")
+        # print(f"results: {results}")
+        papers = []
+        for item in results:
+            zotero_item = self.create_zotero_item_from_json(item)
+            papers.append(zotero_item)
+        return papers
+    def list_items(self, limit: int = 5):
+        return self.zot.items(limit=limit)
+    def query_items(self, query: str, limit: int = 10) -> List[ZoteroItem]:
+        """
+        Queries Zotero for items matching the given query.
+        Args:
+            query: The search query.
+            limit: Maximum number of items to return.
+        Returns:
+            List of ZoteroItem objects representing the search results.
+        """
+        results = self.zot.items(q=query, limit=limit)
+        return [
+            self.create_zotero_item_from_json(item) for item in results
+        ]  # Use ** to unpack the dictionary
+    def get_item_by_key(self, key: str) -> ZoteroItem:
+        """
+        Retrieves a Zotero item by its key.
+        Args:
+            key: The unique key of the item.
+        Returns:
+            ZoteroItem object representing the retrieved item.
+        """
+        item = self.zot.item(key)
+        return self.create_zotero_item_from_json(item)
+    def get_item_by_doi(self, doi: str) -> Optional[ZoteroItem]:
+        """
+        Searches for a Zotero item by its DOI.
+        Args:
+            doi: The DOI of the item.
+        Returns:
+            ZoteroItem object if found, otherwise None.
+        """
+        results = self.zot.items(q=doi)
+        for item in results:
+            if item["data"].get("DOI") == doi:
+                self.create_zotero_item_from_json(item)
+        return None
+    def get_item_tags(self, item_key: str) -> List[str]:
+        """
+        Retrieves the tags associated with a Zotero item.
+        Args:
+            item_key: The unique key of the item.
+        Returns:
+            List of strings representing the tags associated with the item.
+        """
+        return self.zot.item_tags(item_key)
+    def get_collections(self) -> List[Dict[str, Any]]:
+        """
+        Retrieves the list of collections in your Zotero library.
+        Returns:
+            List of dictionaries representing the collections.
+        """
+        return self.zot.collections()
+    def get_collection_by_key(self, collection_key: str) -> Dict[str, Any]:
+        """
+        Retrieves a collection by its key.
+        Args:
+            collection_key: The unique key of the collection.
+        Returns:
+            Dictionary representing the collection.
+        """
+        return self.zot.collection(collection_key)
+    def get_collection_items(self, collection_key: str) -> List[Dict[str, Any]]:
+        """
+        Retrieves the items in a collection.
+        Args:
+            collection_key: The unique key of the collection.
+        Returns:
+            List of dictionaries representing the items in the collection.
+        """
+        return self.zot.collection_items(collection_key, itemType="journalArticle")
+    def get_item_children(self, item_key: str) -> List[Dict[str, Any]]:
+        """
+        Retrieves the children of a Zotero item.
+        Args:
+            item_key: The unique key of the item.
+        Returns:
+            List of dictionaries representing the children of the item.
+        """
+        return self.zot.children(item_key)
+    def get_collection_zotero_items_by_key(
+        self, collection_key: str
+    ) -> List[ZoteroItem]:
+        """
+        Retrieves the items in a collection.
+        Args:
+            collection_key: The unique key of the collection.
+        Returns:
+            List of ZoteroItem objects representing the items in the collection.
+        """
+        items = self.zot.collection_items(collection_key, itemType="journalArticle")
+        return [self.create_zotero_item_from_json(item) for item in items]
+    def filter_and_return_collections_with_items(
+        self, zotero_collections: List[ZoteroCollection]
+    ) -> List[Dict[str, Any]]:
+        """
+        Filters a list of ZoteroCollection instances to return only those with more than one item,
+        and returns them as a list of dictionaries.
+        Args:
+          zotero_collections (List[CollectionModel]): A list of CollectionModel instances.
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries representing collections with more than one item.
+        """
+        filtered_collections = [
+            collection.model_dump()
+            for collection in zotero_collections
+            if collection.number_of_items > 0
+        ]
+        return filtered_collections
+    def find_zotero_collection_by_name(
+        self, zotero_collections: List[ZoteroCollection], name: str
+    ) -> ZoteroCollection:
+        """
+        Finds and returns a ZoteroCollection instance by its name.
+        Args:
+            zotero_collections (List[CollectionModel]): A list of CollectionModel instances.
+            name (str): The name of the collection to find.
+        Returns:
+            ZoteroCollection: The ZoteroCollection instance that matches the given name.
+        Raises:
+            ValueError: If no collection with the given name is found.
+        """
+        for collection in zotero_collections:
+            if collection.name == name:
+                return collection
+        raise ValueError(f"Collection with name '{name}' not found.")
+    def zotero_items_to_json(
+        self, zotero_items: List[ZoteroItem]
+    ) -> List[Dict[str, Any]]:
+        """
+        Converts a list of ZoteroItem instances into a JSON-compatible list of dictionaries.
+        Args:
+            zotero_items (List[ZoteroItem]): A list of ZoteroItem instances.
+        Returns:
+            List[Dict[str, Any]]: A list of dictionaries representing the Zotero items.
+                Each dictionary is a JSON-compatible representation of a ZoteroItem.
+        """
+        items = [item for item in zotero_items if item.abstract or item.full_text]
+        return [item.model_dump() for item in items]
+    def write_zotero_items_to_json_file(
+        self, zotero_items_json: List[Dict[str, Any]], file_path: str
+    ) -> None:
+        """
+        Writes a JSON object of Zotero items to a JSON file.
+        Args:
+            zotero_items_json (List[Dict[str, Any]]): A JSON-compatible list of dictionaries
+                representing Zotero items.
+            file_path (str): The file path where the JSON file should be written.
+        Returns:
+            None
+        """
+        with open(file_path, "w") as json_file:
+            json.dump(zotero_items_json, json_file, indent=2)
+    def get_item_full_text(self, key: str) -> Optional[dict]:
+        """
+        Retrieves an item by its key and dumps it file.
+        Args:
+              key: The unique key of the item.
+        Returns:
+              A dictionary containing the metadata for full text:
+        """
+        try:
+            return self.zot.fulltext_item(key)
+        except Exception as e:
+            print(f"Error: {str(e)}")
+            return None
+    def get_full_text_from_children(self, key: str) -> Optional[str]:
+        """
+        Retrieves an item by its key and dumps it file.
+        Args:
+              key: The unique key of the item.
+        Returns:
+              A text containing the metadata for full text:
+        """
+        children_items = self.get_item_children(key)
+        full_text = ""
+        if children_items:
+            for item in children_items:
+                if item.get("data", {}).get("itemType") == "attachment":
+                    content_dict = self.get_item_full_text(
+                        item.get("data", {}).get("key", "")
+                    )
+                    if content_dict is not None:
+                        content = content_dict.get("content", "")
+                        full_text += content + "\n"
+        return full_text
+    def save_item_file(self, key: str) -> None:
+        """
+        Retrieves an item by its key and dumps it file.
+        Args:
+              key: The unique key of the item.
+        """
+        item = self.zot.item(key)
+        zotero_item = self.create_zotero_item_from_json(item)
+        item_title = slugify(zotero_item.title)
+        try:
+            self.zot.dump(key, f"{item_title}.pdf", "zotero_data")
+        except Exception as e:
+            print(f"Error: {str(e)}")
+if __name__ == "__main__":
+    """Sample driver code"""
+    zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
+    zotero_library_type = "user"  # or "group"
+    zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
+    zotero_manager = ZoteroManager(
+        zotero_library_id, zotero_library_type, zotero_api_access_key
+    )
+    #### GET Zotero topics (Collections) ####
+    zotero_collections = zotero_manager.get_collections()
+    # print(zotero_collections)
+    #### Zotero collections parsed with pydantic ####
+    zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
+    # print(zotero_collection_lists)
+    """
+    [
+        ZoteroCollection(key='IXU5ZWRM', name='RR 10', number_of_items=0),
+        ZoteroCollection(key='G6AZZGPQ', name='RR 9', number_of_items=0),
+        ZoteroCollection(key='DZ45SJHF', name='RR 8', number_of_items=0),
+        ZoteroCollection(key='DM5FVG74', name='RR 7', number_of_items=0),
+        ZoteroCollection(key='43N5CI48', name='RR 6', number_of_items=0),
+        ZoteroCollection(key='2TCX6JC2', name='RR 5', number_of_items=0),
+        ZoteroCollection(key='QVSNAJWV', name='RR 4', number_of_items=0),
+        ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17),
+        ZoteroCollection(key='UB7AEMB6', name='GeneXpert', number_of_items=31),
+        ZoteroCollection(key='UDQ9JSD9', name='Vaccine coverage', number_of_items=22),
+        ZoteroCollection(key='SGNLNIAT', name='Zotero Collection Pastan', number_of_items=227)
+    ]
+    """
+    #### Collections with items ####
+    filtered_zotero_collection_lists = (
+        zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
+    )
+    # print(filtered_zotero_collection_lists)
+    """
+    [
+        {'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17},
+        {'key': 'UB7AEMB6', 'name': 'GeneXpert', 'number_of_items': 31},
+        {'key': 'UDQ9JSD9', 'name': 'Vaccine coverage', 'number_of_items': 22},
+        {'key': 'SGNLNIAT',
+        'name': 'Zotero Collection Pastan',
+        'number_of_items': 227}
+    ]
+    """
+    #### Collection by name from a list of zotero collections
+    ebola_virus_collection = zotero_manager.find_zotero_collection_by_name(
+        zotero_collection_lists, "Ebola Virus"
+    )
+    # print(ebola_virus_collection)
+    """ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17)"""
+    # print(ebola_virus_collection.model_dump())
+    """{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17}"""
+    #### Get single collection by key ####
+    ebola_virus_collection_key = "96UJANPP"  # Ebola Virus
+    ebola_virus_collection = zotero_manager.get_collection_by_key(
+        ebola_virus_collection_key
+    )
+    # print(ebola_virus_collection)
+    """
+    {
+        'key': '96UJANPP',
+        'version': 72,
+        'library': {'type': 'user',
+        'id': 11201324,
+        'name': 'pjlus',
+        'links': {'alternate': {'href': 'https://www.zotero.org/pjlus',
+            'type': 'text/html'}}},
+        'links': {'self': {'href': 'https://api.zotero.org/users/11201324/collections/96UJANPP',
+        'type': 'application/json'},
+        'alternate': {'href': 'https://www.zotero.org/pjlus/collections/96UJANPP',
+        'type': 'text/html'}},
+        'meta': {'numCollections': 0, 'numItems': 17},
+        'data': {'key': '96UJANPP',
+        'version': 72,
+        'name': 'Ebola Virus',
+        'parentCollection': False,
+        'relations': {}}
+    }
+    """
+    #### Get collection items by collection key ####
+    ebora_virus_collection_items = zotero_manager.get_collection_items(
+        ebola_virus_collection_key
+    )
+    print(len(ebora_virus_collection_items))
+    # print(ebora_virus_collection_items[:2])
+    #### Getting zotero collection items and full text
+    # Here the collections have been parsed using the zotero item pydantic model defined in the zotero manager.
+    ####
+    ebora_virus_zotero_collection_items = (
+        zotero_manager.get_collection_zotero_items_by_key(ebola_virus_collection_key)
+    )
+    # print(len(ebora_virus_zotero_collection_items))
+    # print(ebora_virus_zotero_collection_items[0])
+    #### Get item children (attachments)
+    # Listed items in zotero are items together with their attachments (pdf content)
+    ####
+    zotero_manager.get_item_children("2Q7HFERL")
+    #### Get an item full text ####
+    zotero_manager.get_item_full_text("BMYMEW76")["content"]
+    #### Save the item pdf content to disc ####
+    ## Function to save a pdf file
+    zotero_manager.save_item_file("BMYMEW76")
+    #### Export zotero collection items to json ####
+    ebora_virus_zotero_items_json = zotero_manager.zotero_items_to_json(
+        ebora_virus_zotero_collection_items
+    )
+    print(len(ebora_virus_zotero_items_json))
+    # print(ebora_virus_zotero_items_json[0])
+    ## Save to disc
+    zotero_manager.write_zotero_items_to_json_file(
+        ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json"
+    )