Patrick Walukagga commited on
Commit
7d19cfc
·
1 Parent(s): 59d3a91

Adding zotero manager

Browse files
app.py CHANGED
@@ -1,11 +1,14 @@
1
  import json
2
  from typing import List, Tuple
 
3
 
4
  import gradio as gr
 
 
5
 
6
  from config import STUDY_FILES
7
  from rag.rag_pipeline import RAGPipeline
8
- from utils.helpers import generate_follow_up_questions
9
  from utils.prompts import (
10
  highlight_prompt,
11
  evidence_based_prompt,
@@ -14,12 +17,45 @@ from utils.prompts import (
14
  import openai
15
 
16
  from config import STUDY_FILES, OPENAI_API_KEY
 
 
 
17
 
18
  openai.api_key = OPENAI_API_KEY
19
 
20
  # Cache for RAG pipelines
21
  rag_cache = {}
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
25
  """Get or create a RAGPipeline instance for the given study."""
@@ -66,6 +102,8 @@ def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.upd
66
 
67
  study_info = get_study_info(study_name)
68
  questions = sample_questions.get(study_name, [])[:3]
 
 
69
  visible_questions = [gr.update(visible=True, value=q) for q in questions]
70
  hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
71
  return (study_info, *visible_questions, *hidden_questions)
 
1
  import json
2
  from typing import List, Tuple
3
+ import os
4
 
5
  import gradio as gr
6
+ from dotenv import load_dotenv
7
+ from slugify import slugify
8
 
9
  from config import STUDY_FILES
10
  from rag.rag_pipeline import RAGPipeline
11
+ from utils.helpers import generate_follow_up_questions, append_to_study_files
12
  from utils.prompts import (
13
  highlight_prompt,
14
  evidence_based_prompt,
 
17
  import openai
18
 
19
  from config import STUDY_FILES, OPENAI_API_KEY
20
+ from utils.zotero_manager import ZoteroManager
21
+
22
+ load_dotenv()
23
 
24
  openai.api_key = OPENAI_API_KEY
25
 
26
  # Cache for RAG pipelines
27
  rag_cache = {}
28
 
29
+ zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
30
+ zotero_library_type = "user" # or "group"
31
+ zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
32
+
33
+ zotero_manager = ZoteroManager(
34
+ zotero_library_id, zotero_library_type, zotero_api_access_key
35
+ )
36
+
37
+ zotero_collections = zotero_manager.get_collections()
38
+ zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
39
+ filtered_zotero_collection_lists = (
40
+ zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
41
+ )
42
+
43
+ for collection in filtered_zotero_collection_lists:
44
+ collection_name = collection.get("name")
45
+ if collection_name not in STUDY_FILES:
46
+ collection_key = collection.get("key")
47
+ collection_items = zotero_manager.get_collection_items(collection_key)
48
+ zotero_collection_items = (
49
+ zotero_manager.get_collection_zotero_items_by_key(collection_key)
50
+ )
51
+ #### Export zotero collection items to json ####
52
+ zotero_items_json = zotero_manager.zotero_items_to_json(zotero_collection_items)
53
+ export_file = f"{slugify(collection_name)}_zotero_items.json"
54
+ zotero_manager.write_zotero_items_to_json_file(
55
+ zotero_items_json, f"data/{export_file}"
56
+ )
57
+ append_to_study_files("study_files.json", collection_name, f"data/{export_file}")
58
+
59
 
60
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
61
  """Get or create a RAGPipeline instance for the given study."""
 
102
 
103
  study_info = get_study_info(study_name)
104
  questions = sample_questions.get(study_name, [])[:3]
105
+ if not questions:
106
+ questions = sample_questions.get("General", [])[:3]
107
  visible_questions = [gr.update(visible=True, value=q) for q in questions]
108
  hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
109
  return (study_info, *visible_questions, *hidden_questions)
config.py CHANGED
@@ -2,12 +2,11 @@ import os
2
 
3
  from dotenv import load_dotenv
4
 
 
 
5
  load_dotenv()
6
 
7
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
8
 
9
- STUDY_FILES = {
10
- "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
11
- "Ebola Virus": "data/ebola_virus_zotero_items.json",
12
- "Gene Xpert": "data/gene_xpert_zotero_items.json",
13
- }
 
2
 
3
  from dotenv import load_dotenv
4
 
5
+ from utils.helpers import read_study_files
6
+
7
  load_dotenv()
8
 
9
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
10
 
11
+
12
+ STUDY_FILES = read_study_files(("study_files.json"))
 
 
 
data/zotero-collection-pastan_zotero_items.json ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt CHANGED
@@ -1,7 +1,11 @@
 
1
  fastapi==0.112.2
2
  gradio
3
  llama-index
 
4
  openai
5
  pandas
6
  pydantic
7
- python-dotenv
 
 
 
1
+ chromadb==0.5.5
2
  fastapi==0.112.2
3
  gradio
4
  llama-index
5
+ nest-asyncio==1.6.0
6
  openai
7
  pandas
8
  pydantic
9
+ python-dotenv
10
+ pyzotero
11
+ python-slugify
study_files.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
3
+ "Ebola Virus": "data/ebola_virus_zotero_items.json",
4
+ "GeneXpert": "data/gene_xpert_zotero_items.json",
5
+ "Zotero Collection Pastan": "data/zotero-collection-pastan_zotero_items.json"
6
+ }
utils/helpers.py CHANGED
@@ -7,6 +7,94 @@ from utils.prompts import (
7
  VaccineCoverageVariables,
8
  StudyCharacteristics,
9
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
 
12
  def generate_follow_up_questions(
 
7
  VaccineCoverageVariables,
8
  StudyCharacteristics,
9
  )
10
+ import json
11
+
12
+ def read_study_files(file_path):
13
+ """
14
+ Reads a JSON file and returns the parsed JSON data.
15
+
16
+ Args:
17
+ file_path (str): The path to the JSON file to be read.
18
+
19
+ Returns:
20
+ dict: The data from the JSON file as a Python dictionary.
21
+
22
+ Raises:
23
+ FileNotFoundError: If the file is not found at the provided path.
24
+ json.JSONDecodeError: If the file contents are not valid JSON.
25
+
26
+ Example:
27
+ Given a JSON file 'study_files.json' with content like:
28
+ {
29
+ "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
30
+ "Ebola Virus": "data/ebola_virus_zotero_items.json",
31
+ "Gene Xpert": "data/gene_xpert_zotero_items.json"
32
+ }
33
+
34
+ Calling `read_json_file("study_files.json")` will return:
35
+ {
36
+ "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
37
+ "Ebola Virus": "data/ebola_virus_zotero_items.json",
38
+ "Gene Xpert": "data/gene_xpert_zotero_items.json"
39
+ }
40
+ """
41
+ try:
42
+ with open(file_path, 'r') as file:
43
+ data = json.load(file)
44
+ return data
45
+ except FileNotFoundError as e:
46
+ raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
47
+ except json.JSONDecodeError as e:
48
+ raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
49
+
50
+
51
+ def append_to_study_files(file_path, new_key, new_value):
52
+ """
53
+ Appends a new key-value entry to an existing JSON file.
54
+
55
+ Args:
56
+ file_path (str): The path to the JSON file.
57
+ new_key (str): The new key to add to the JSON file.
58
+ new_value (any): The value associated with the new key (can be any valid JSON data type).
59
+
60
+ Raises:
61
+ FileNotFoundError: If the file is not found at the provided path.
62
+ json.JSONDecodeError: If the file contents are not valid JSON.
63
+ IOError: If the file cannot be written.
64
+
65
+ Example:
66
+ If the file 'study_files.json' initially contains:
67
+ {
68
+ "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
69
+ "Ebola Virus": "data/ebola_virus_zotero_items.json"
70
+ }
71
+
72
+ Calling `append_to_json_file("study_files.json", "Gene Xpert", "data/gene_xpert_zotero_items.json")`
73
+ will modify the file to:
74
+ {
75
+ "Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
76
+ "Ebola Virus": "data/ebola_virus_zotero_items.json",
77
+ "Gene Xpert": "data/gene_xpert_zotero_items.json"
78
+ }
79
+ """
80
+ try:
81
+ # Read the existing data from the file
82
+ with open(file_path, 'r') as file:
83
+ data = json.load(file)
84
+
85
+ # Append the new key-value pair to the dictionary
86
+ data[new_key] = new_value
87
+
88
+ # Write the updated data back to the file
89
+ with open(file_path, 'w') as file:
90
+ json.dump(data, file, indent=4) # indent for pretty printing
91
+
92
+ except FileNotFoundError as e:
93
+ raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
94
+ except json.JSONDecodeError as e:
95
+ raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
96
+ except IOError as e:
97
+ raise IOError(f"Failed to write to the file at {file_path}.") from e
98
 
99
 
100
  def generate_follow_up_questions(
utils/prompts.py CHANGED
@@ -71,7 +71,7 @@ vaccine_coverage_prompt = PromptTemplate(
71
  )
72
 
73
  sample_questions = {
74
- "Vaccine Coverage": [
75
  "What are the vaccine coverage rates reported in the study?",
76
  "Are there any reported adverse events following immunization (AEFI)?",
77
  "How does the study account for different vaccine types or schedules?",
@@ -84,12 +84,17 @@ sample_questions = {
84
  "Were there any ethical considerations or challenges reported?",
85
  "Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
86
  ],
87
- "Gene Xpert": [
88
  "What is the main objective of the study?",
89
  "What is the study design?",
90
  "What disease condition is being studied?",
91
  "Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
92
  ],
 
 
 
 
 
93
  }
94
 
95
 
 
71
  )
72
 
73
  sample_questions = {
74
+ "Vaccine coverage": [
75
  "What are the vaccine coverage rates reported in the study?",
76
  "Are there any reported adverse events following immunization (AEFI)?",
77
  "How does the study account for different vaccine types or schedules?",
 
84
  "Were there any ethical considerations or challenges reported?",
85
  "Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
86
  ],
87
+ "GeneXpert": [
88
  "What is the main objective of the study?",
89
  "What is the study design?",
90
  "What disease condition is being studied?",
91
  "Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
92
  ],
93
+ "General": [
94
+ "What is the main objective of the study?",
95
+ "What is the study design?",
96
+ "Extract and present in a tabular format the following variables for each study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, OBJECTIVE, and STUDY_COMMENTS.",
97
+ ],
98
  }
99
 
100
 
utils/zotero_manager.py ADDED
@@ -0,0 +1,644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from typing import Any, Dict, List, Optional
4
+
5
+ from dotenv import load_dotenv
6
+ from pydantic import BaseModel, Field
7
+ from pyzotero import zotero
8
+ from slugify import slugify
9
+
10
+ load_dotenv()
11
+
12
+
13
+ class ZoteroItem(BaseModel):
14
+ """
15
+ Represents metadata about a Zotero item.
16
+ """
17
+
18
+ key: str = Field(..., description="Unique key of the item")
19
+ title: str = Field(..., description="Title of the item")
20
+ abstract: Optional[str] = Field(None, description="Abstract or note of the item")
21
+ full_text: Optional[str] = Field(None, description="Full text of the item")
22
+ authors: Optional[List[str]] = Field(
23
+ None, description="List of authors"
24
+ ) # Make optional
25
+ doi: Optional[str] = Field(None, description="Digital Object Identifier (DOI)")
26
+ year: Optional[int] = Field(None, description="Publication year")
27
+ item_type: Optional[str] = Field(
28
+ None, description="Type of the item (e.g., journalArticle)"
29
+ ) # Make optional
30
+ url: Optional[str] = Field(None, description="URL of the item")
31
+
32
+
33
+ class ZoteroCollection(BaseModel):
34
+ """
35
+ Represents a Zotero collection with metadata.
36
+ """
37
+
38
+ key: str = Field(..., description="Unique identifier for the collection.")
39
+ name: str = Field(..., description="Name of the collection.")
40
+ number_of_items: int = Field(
41
+ ..., description="Number of items contained in the collection."
42
+ )
43
+
44
+
45
+ class ZoteroManager:
46
+ '''
47
+ #### Example Usage ####
48
+
49
+ zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
50
+ zotero_library_type = "user" # or "group"
51
+ zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
52
+
53
+ zotero_manager = ZoteroManager(zotero_library_id, zotero_library_type, zotero_api_access_key)
54
+
55
+ #### GET Zotero topics (Collections) ####
56
+ zotero_collections = zotero_manager.get_collections()
57
+ # print(zotero_collections)
58
+
59
+ #### Zotero collections parsed with pydantic ####
60
+ zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
61
+ # print(zotero_collection_lists)
62
+ """
63
+ [
64
+ ZoteroCollection(key='IXU5ZWRM', name='RR 10', number_of_items=0),
65
+ ZoteroCollection(key='G6AZZGPQ', name='RR 9', number_of_items=0),
66
+ ZoteroCollection(key='DZ45SJHF', name='RR 8', number_of_items=0),
67
+ ZoteroCollection(key='DM5FVG74', name='RR 7', number_of_items=0),
68
+ ZoteroCollection(key='43N5CI48', name='RR 6', number_of_items=0),
69
+ ZoteroCollection(key='2TCX6JC2', name='RR 5', number_of_items=0),
70
+ ZoteroCollection(key='QVSNAJWV', name='RR 4', number_of_items=0),
71
+ ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17),
72
+ ZoteroCollection(key='UB7AEMB6', name='GeneXpert', number_of_items=31),
73
+ ZoteroCollection(key='UDQ9JSD9', name='Vaccine coverage', number_of_items=22),
74
+ ZoteroCollection(key='SGNLNIAT', name='Zotero Collection Pastan', number_of_items=227)
75
+ ]
76
+ """
77
+
78
+ #### Collections with items ####
79
+ filtered_zotero_collection_lists = zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
80
+ # print(filtered_zotero_collection_lists)
81
+ """
82
+ [
83
+ {'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17},
84
+ {'key': 'UB7AEMB6', 'name': 'GeneXpert', 'number_of_items': 31},
85
+ {'key': 'UDQ9JSD9', 'name': 'Vaccine coverage', 'number_of_items': 22},
86
+ {'key': 'SGNLNIAT',
87
+ 'name': 'Zotero Collection Pastan',
88
+ 'number_of_items': 227}
89
+ ]
90
+ """
91
+
92
+ #### Collection by name from a list of zotero collections
93
+ ebola_virus_collection = zotero_manager.find_zotero_collection_by_name(zotero_collection_lists, "Ebola Virus")
94
+ # print(ebola_virus_collection)
95
+ """ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17)"""
96
+ # print(ebola_virus_collection.model_dump())
97
+ """{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17}"""
98
+
99
+ #### Get single collection by key ####
100
+ ebola_virus_collection_key = "96UJANPP" # Ebola Virus
101
+ ebola_virus_collection = zotero_manager.get_collection_by_key(ebola_virus_collection_key)
102
+ # print(ebola_virus_collection)
103
+ """
104
+ {
105
+ 'key': '96UJANPP',
106
+ 'version': 72,
107
+ 'library': {'type': 'user',
108
+ 'id': 11201324,
109
+ 'name': 'pjlus',
110
+ 'links': {'alternate': {'href': 'https://www.zotero.org/pjlus',
111
+ 'type': 'text/html'}}},
112
+ 'links': {'self': {'href': 'https://api.zotero.org/users/11201324/collections/96UJANPP',
113
+ 'type': 'application/json'},
114
+ 'alternate': {'href': 'https://www.zotero.org/pjlus/collections/96UJANPP',
115
+ 'type': 'text/html'}},
116
+ 'meta': {'numCollections': 0, 'numItems': 17},
117
+ 'data': {'key': '96UJANPP',
118
+ 'version': 72,
119
+ 'name': 'Ebola Virus',
120
+ 'parentCollection': False,
121
+ 'relations': {}}
122
+ }
123
+ """
124
+
125
+ #### Get collection items by collection key ####
126
+ ebora_virus_collection_items = zotero_manager.get_collection_items(ebola_virus_collection_key)
127
+ print(len(ebora_virus_collection_items))
128
+ # print(ebora_virus_collection_items[:2])
129
+
130
+ #### Getting zotero collection items and full text
131
+ # Here the collections have been parsed using the zotero item pydantic model defined in the zotero manager.
132
+ ####
133
+ ebora_virus_zotero_collection_items = zotero_manager.get_collection_zotero_items_by_key(ebola_virus_collection_key)
134
+ # print(len(ebora_virus_zotero_collection_items))
135
+ # print(ebora_virus_zotero_collection_items[0])
136
+
137
+ #### Get item children (attachments)
138
+ # Listed items in zotero are items together with their attachments (pdf content)
139
+ ####
140
+ zotero_manager.get_item_children("2Q7HFERL")
141
+
142
+ #### Get an item full text ####
143
+ zotero_manager.get_item_full_text("BMYMEW76")["content"]
144
+
145
+ #### Save the item pdf content to disc ####
146
+ ## Function to save a pdf file
147
+ zotero_manager.save_item_file("BMYMEW76")
148
+
149
+ #### Export zotero collection items to json ####
150
+ ebora_virus_zotero_items_json = zotero_manager.zotero_items_to_json(ebora_virus_zotero_collection_items)
151
+ print(len(ebora_virus_zotero_items_json))
152
+ # print(ebora_virus_zotero_items_json[0])
153
+ ## Save to disc
154
+ zotero_manager.write_zotero_items_to_json_file(ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json")
155
+ '''
156
+
157
+ def __init__(self, library_id: str, library_type: str, api_key: str):
158
+ self.zot = zotero.Zotero(library_id, library_type, api_key)
159
+
160
+ def create_zotero_item_from_json(self, json_obj: Dict[str, Any]) -> ZoteroItem:
161
+ """
162
+ Creates a ZoteroItem instance from a JSON object.
163
+
164
+ Args:
165
+ json_obj (Dict[str, Any]): A JSON object containing the Zotero item data.
166
+ The JSON structure is expected to have a 'data' field which includes
167
+ the metadata for the Zotero item.
168
+
169
+ Returns:
170
+ ZoteroItem: An instance of ZoteroItem populated with the data extracted
171
+ from the JSON object. The fields include key, title, abstract, authors,
172
+ doi, year, item_type, and url.
173
+ """
174
+ data = json_obj.get("data", {})
175
+
176
+ # Extract item full text from it's attachement
177
+ key = data.get("key")
178
+ full_text = self.get_full_text_from_children(key)
179
+
180
+ # Extract the list of authors
181
+ authors = [
182
+ f"{creator.get('name', '')} {creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
183
+ for creator in data.get("creators", [])
184
+ if creator.get("creatorType") == "author"
185
+ ]
186
+
187
+ # Create the ZoteroItem instance
188
+ zotero_item = ZoteroItem(
189
+ key=data.get("key"),
190
+ title=data.get("title"),
191
+ abstract=data.get("abstractNote"),
192
+ full_text=full_text,
193
+ authors=authors,
194
+ doi=data.get("DOI"),
195
+ # year=int(data.get('date', '').split('-')[0]) if data.get('date') else None,
196
+ item_type=data.get("itemType"),
197
+ url=data.get("url"),
198
+ )
199
+
200
+ return zotero_item
201
+
202
+ def create_zotero_collection(
203
+ self, collection_dict: Dict[str, Any]
204
+ ) -> ZoteroCollection:
205
+ """
206
+ Converts a dictionary representing a Zotero collection into a ZoteroCollection instance.
207
+
208
+ Args:
209
+ collection_dict (Dict[str, Any]): A dictionary containing data for a Zotero collection.
210
+ The expected structure includes keys 'data' and 'meta' from which relevant fields
211
+ such as 'key', 'name', and 'numItems' are extracted.
212
+
213
+ Returns:
214
+ ZoteroCollection: An instance of ZoteroCollection populated with the data extracted
215
+ from the input dictionary.
216
+ """
217
+ data = collection_dict.get("data", {})
218
+ meta = collection_dict.get("meta", {})
219
+
220
+ zotero_collection = ZoteroCollection(
221
+ key=data.get("key"),
222
+ name=data.get("name"),
223
+ number_of_items=meta.get("numItems", 0),
224
+ )
225
+
226
+ return zotero_collection
227
+
228
+ def list_zotero_collections(
229
+ self, collection_items: List[Dict[str, Any]]
230
+ ) -> List[ZoteroCollection]:
231
+ """
232
+ Converts a list of dictionaries representing Zotero collections into a list of ZoteroCollection instances.
233
+
234
+ Args:
235
+ collection_items (List[Dict[str, Any]]): A list of collection items, each containing data for a Zotero collection.
236
+ Each dictionary is expected to have a 'data' key with nested 'key' and 'name' fields, and a 'meta' key
237
+ with a 'numItems' field.
238
+
239
+ Returns:
240
+ List[ZoteroCollection]: A list of ZoteroCollection instances populated with the data extracted
241
+ from the input dictionaries.
242
+ """
243
+ collections = [
244
+ self.create_zotero_collection(collection_item)
245
+ for collection_item in collection_items
246
+ ]
247
+ return collections
248
+
249
+ def list_all_papers(self) -> List[ZoteroItem]:
250
+ """
251
+ Lists all papers (journal articles) in your Zotero library.
252
+
253
+ Returns:
254
+ List of ZoteroItem objects representing the papers in your library.
255
+ """
256
+ # print(self.zot.items())
257
+ results = self.zot.items(itemType="journalArticle")
258
+ # print(f"results: {results}")
259
+
260
+ papers = []
261
+
262
+ for item in results:
263
+ zotero_item = self.create_zotero_item_from_json(item)
264
+ papers.append(zotero_item)
265
+
266
+ return papers
267
+
268
+ def list_items(self, limit: int = 5):
269
+ return self.zot.items(limit=limit)
270
+
271
+ def query_items(self, query: str, limit: int = 10) -> List[ZoteroItem]:
272
+ """
273
+ Queries Zotero for items matching the given query.
274
+
275
+ Args:
276
+ query: The search query.
277
+ limit: Maximum number of items to return.
278
+
279
+ Returns:
280
+ List of ZoteroItem objects representing the search results.
281
+ """
282
+ results = self.zot.items(q=query, limit=limit)
283
+
284
+ return [
285
+ self.create_zotero_item_from_json(item) for item in results
286
+ ] # Use ** to unpack the dictionary
287
+
288
+ def get_item_by_key(self, key: str) -> ZoteroItem:
289
+ """
290
+ Retrieves a Zotero item by its key.
291
+
292
+ Args:
293
+ key: The unique key of the item.
294
+
295
+ Returns:
296
+ ZoteroItem object representing the retrieved item.
297
+ """
298
+ item = self.zot.item(key)
299
+ return self.create_zotero_item_from_json(item)
300
+
301
+ def get_item_by_doi(self, doi: str) -> Optional[ZoteroItem]:
302
+ """
303
+ Searches for a Zotero item by its DOI.
304
+
305
+ Args:
306
+ doi: The DOI of the item.
307
+
308
+ Returns:
309
+ ZoteroItem object if found, otherwise None.
310
+ """
311
+ results = self.zot.items(q=doi)
312
+ for item in results:
313
+ if item["data"].get("DOI") == doi:
314
+ self.create_zotero_item_from_json(item)
315
+ return None
316
+
317
+ def get_item_tags(self, item_key: str) -> List[str]:
318
+ """
319
+ Retrieves the tags associated with a Zotero item.
320
+
321
+ Args:
322
+ item_key: The unique key of the item.
323
+
324
+ Returns:
325
+ List of strings representing the tags associated with the item.
326
+ """
327
+ return self.zot.item_tags(item_key)
328
+
329
+ def get_collections(self) -> List[Dict[str, Any]]:
330
+ """
331
+ Retrieves the list of collections in your Zotero library.
332
+
333
+ Returns:
334
+ List of dictionaries representing the collections.
335
+ """
336
+ return self.zot.collections()
337
+
338
+ def get_collection_by_key(self, collection_key: str) -> Dict[str, Any]:
339
+ """
340
+ Retrieves a collection by its key.
341
+
342
+ Args:
343
+ collection_key: The unique key of the collection.
344
+
345
+ Returns:
346
+ Dictionary representing the collection.
347
+ """
348
+ return self.zot.collection(collection_key)
349
+
350
+ def get_collection_items(self, collection_key: str) -> List[Dict[str, Any]]:
351
+ """
352
+ Retrieves the items in a collection.
353
+
354
+ Args:
355
+ collection_key: The unique key of the collection.
356
+
357
+ Returns:
358
+ List of dictionaries representing the items in the collection.
359
+ """
360
+ return self.zot.collection_items(collection_key, itemType="journalArticle")
361
+
362
+ def get_item_children(self, item_key: str) -> List[Dict[str, Any]]:
363
+ """
364
+ Retrieves the children of a Zotero item.
365
+
366
+ Args:
367
+ item_key: The unique key of the item.
368
+
369
+ Returns:
370
+ List of dictionaries representing the children of the item.
371
+ """
372
+ return self.zot.children(item_key)
373
+
374
+ def get_collection_zotero_items_by_key(
375
+ self, collection_key: str
376
+ ) -> List[ZoteroItem]:
377
+ """
378
+ Retrieves the items in a collection.
379
+
380
+ Args:
381
+ collection_key: The unique key of the collection.
382
+
383
+ Returns:
384
+ List of ZoteroItem objects representing the items in the collection.
385
+ """
386
+ items = self.zot.collection_items(collection_key, itemType="journalArticle")
387
+ return [self.create_zotero_item_from_json(item) for item in items]
388
+
389
+ def filter_and_return_collections_with_items(
390
+ self, zotero_collections: List[ZoteroCollection]
391
+ ) -> List[Dict[str, Any]]:
392
+ """
393
+ Filters a list of ZoteroCollection instances to return only those with more than one item,
394
+ and returns them as a list of dictionaries.
395
+
396
+ Args:
397
+ zotero_collections (List[CollectionModel]): A list of CollectionModel instances.
398
+
399
+ Returns:
400
+ List[Dict[str, Any]]: A list of dictionaries representing collections with more than one item.
401
+ """
402
+ filtered_collections = [
403
+ collection.model_dump()
404
+ for collection in zotero_collections
405
+ if collection.number_of_items > 0
406
+ ]
407
+ return filtered_collections
408
+
409
+ def find_zotero_collection_by_name(
410
+ self, zotero_collections: List[ZoteroCollection], name: str
411
+ ) -> ZoteroCollection:
412
+ """
413
+ Finds and returns a ZoteroCollection instance by its name.
414
+
415
+ Args:
416
+ zotero_collections (List[CollectionModel]): A list of CollectionModel instances.
417
+ name (str): The name of the collection to find.
418
+
419
+ Returns:
420
+ ZoteroCollection: The ZoteroCollection instance that matches the given name.
421
+
422
+ Raises:
423
+ ValueError: If no collection with the given name is found.
424
+ """
425
+ for collection in zotero_collections:
426
+ if collection.name == name:
427
+ return collection
428
+ raise ValueError(f"Collection with name '{name}' not found.")
429
+
430
+ def zotero_items_to_json(
431
+ self, zotero_items: List[ZoteroItem]
432
+ ) -> List[Dict[str, Any]]:
433
+ """
434
+ Converts a list of ZoteroItem instances into a JSON-compatible list of dictionaries.
435
+
436
+ Args:
437
+ zotero_items (List[ZoteroItem]): A list of ZoteroItem instances.
438
+
439
+ Returns:
440
+ List[Dict[str, Any]]: A list of dictionaries representing the Zotero items.
441
+ Each dictionary is a JSON-compatible representation of a ZoteroItem.
442
+ """
443
+ items = [item for item in zotero_items if item.abstract or item.full_text]
444
+
445
+ return [item.model_dump() for item in items]
446
+
447
+ def write_zotero_items_to_json_file(
448
+ self, zotero_items_json: List[Dict[str, Any]], file_path: str
449
+ ) -> None:
450
+ """
451
+ Writes a JSON object of Zotero items to a JSON file.
452
+
453
+ Args:
454
+ zotero_items_json (List[Dict[str, Any]]): A JSON-compatible list of dictionaries
455
+ representing Zotero items.
456
+ file_path (str): The file path where the JSON file should be written.
457
+
458
+ Returns:
459
+ None
460
+ """
461
+ with open(file_path, "w") as json_file:
462
+ json.dump(zotero_items_json, json_file, indent=2)
463
+
464
+ def get_item_full_text(self, key: str) -> Optional[dict]:
465
+ """
466
+ Retrieves an item by its key and dumps it file.
467
+
468
+ Args:
469
+ key: The unique key of the item.
470
+
471
+ Returns:
472
+ A dictionary containing the metadata for full text:
473
+ """
474
+
475
+ try:
476
+ return self.zot.fulltext_item(key)
477
+ except Exception as e:
478
+ print(f"Error: {str(e)}")
479
+ return None
480
+
481
+ def get_full_text_from_children(self, key: str) -> Optional[str]:
482
+ """
483
+ Retrieves an item by its key and dumps it file.
484
+
485
+ Args:
486
+ key: The unique key of the item.
487
+
488
+ Returns:
489
+ A text containing the metadata for full text:
490
+ """
491
+ children_items = self.get_item_children(key)
492
+ full_text = ""
493
+ if children_items:
494
+ for item in children_items:
495
+ if item.get("data", {}).get("itemType") == "attachment":
496
+ content_dict = self.get_item_full_text(
497
+ item.get("data", {}).get("key", "")
498
+ )
499
+ if content_dict is not None:
500
+ content = content_dict.get("content", "")
501
+ full_text += content + "\n"
502
+
503
+ return full_text
504
+
505
+ def save_item_file(self, key: str) -> None:
506
+ """
507
+ Retrieves an item by its key and dumps it file.
508
+
509
+ Args:
510
+ key: The unique key of the item.
511
+ """
512
+ item = self.zot.item(key)
513
+ zotero_item = self.create_zotero_item_from_json(item)
514
+ item_title = slugify(zotero_item.title)
515
+ try:
516
+ self.zot.dump(key, f"{item_title}.pdf", "zotero_data")
517
+ except Exception as e:
518
+ print(f"Error: {str(e)}")
519
+
520
+
521
+ if __name__ == "__main__":
522
+ """Sample driver code"""
523
+ zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
524
+ zotero_library_type = "user" # or "group"
525
+ zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
526
+
527
+ zotero_manager = ZoteroManager(
528
+ zotero_library_id, zotero_library_type, zotero_api_access_key
529
+ )
530
+
531
+ #### GET Zotero topics (Collections) ####
532
+ zotero_collections = zotero_manager.get_collections()
533
+ # print(zotero_collections)
534
+
535
+ #### Zotero collections parsed with pydantic ####
536
+ zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
537
+ # print(zotero_collection_lists)
538
+ """
539
+ [
540
+ ZoteroCollection(key='IXU5ZWRM', name='RR 10', number_of_items=0),
541
+ ZoteroCollection(key='G6AZZGPQ', name='RR 9', number_of_items=0),
542
+ ZoteroCollection(key='DZ45SJHF', name='RR 8', number_of_items=0),
543
+ ZoteroCollection(key='DM5FVG74', name='RR 7', number_of_items=0),
544
+ ZoteroCollection(key='43N5CI48', name='RR 6', number_of_items=0),
545
+ ZoteroCollection(key='2TCX6JC2', name='RR 5', number_of_items=0),
546
+ ZoteroCollection(key='QVSNAJWV', name='RR 4', number_of_items=0),
547
+ ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17),
548
+ ZoteroCollection(key='UB7AEMB6', name='GeneXpert', number_of_items=31),
549
+ ZoteroCollection(key='UDQ9JSD9', name='Vaccine coverage', number_of_items=22),
550
+ ZoteroCollection(key='SGNLNIAT', name='Zotero Collection Pastan', number_of_items=227)
551
+ ]
552
+ """
553
+
554
+ #### Collections with items ####
555
+ filtered_zotero_collection_lists = (
556
+ zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
557
+ )
558
+ # print(filtered_zotero_collection_lists)
559
+ """
560
+ [
561
+ {'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17},
562
+ {'key': 'UB7AEMB6', 'name': 'GeneXpert', 'number_of_items': 31},
563
+ {'key': 'UDQ9JSD9', 'name': 'Vaccine coverage', 'number_of_items': 22},
564
+ {'key': 'SGNLNIAT',
565
+ 'name': 'Zotero Collection Pastan',
566
+ 'number_of_items': 227}
567
+ ]
568
+ """
569
+
570
+ #### Collection by name from a list of zotero collections
571
+ ebola_virus_collection = zotero_manager.find_zotero_collection_by_name(
572
+ zotero_collection_lists, "Ebola Virus"
573
+ )
574
+ # print(ebola_virus_collection)
575
+ """ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17)"""
576
+ # print(ebola_virus_collection.model_dump())
577
+ """{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17}"""
578
+
579
+ #### Get single collection by key ####
580
+ ebola_virus_collection_key = "96UJANPP" # Ebola Virus
581
+ ebola_virus_collection = zotero_manager.get_collection_by_key(
582
+ ebola_virus_collection_key
583
+ )
584
+ # print(ebola_virus_collection)
585
+ """
586
+ {
587
+ 'key': '96UJANPP',
588
+ 'version': 72,
589
+ 'library': {'type': 'user',
590
+ 'id': 11201324,
591
+ 'name': 'pjlus',
592
+ 'links': {'alternate': {'href': 'https://www.zotero.org/pjlus',
593
+ 'type': 'text/html'}}},
594
+ 'links': {'self': {'href': 'https://api.zotero.org/users/11201324/collections/96UJANPP',
595
+ 'type': 'application/json'},
596
+ 'alternate': {'href': 'https://www.zotero.org/pjlus/collections/96UJANPP',
597
+ 'type': 'text/html'}},
598
+ 'meta': {'numCollections': 0, 'numItems': 17},
599
+ 'data': {'key': '96UJANPP',
600
+ 'version': 72,
601
+ 'name': 'Ebola Virus',
602
+ 'parentCollection': False,
603
+ 'relations': {}}
604
+ }
605
+ """
606
+
607
+ #### Get collection items by collection key ####
608
+ ebora_virus_collection_items = zotero_manager.get_collection_items(
609
+ ebola_virus_collection_key
610
+ )
611
+ print(len(ebora_virus_collection_items))
612
+ # print(ebora_virus_collection_items[:2])
613
+
614
+ #### Getting zotero collection items and full text
615
+ # Here the collections have been parsed using the zotero item pydantic model defined in the zotero manager.
616
+ ####
617
+ ebora_virus_zotero_collection_items = (
618
+ zotero_manager.get_collection_zotero_items_by_key(ebola_virus_collection_key)
619
+ )
620
+ # print(len(ebora_virus_zotero_collection_items))
621
+ # print(ebora_virus_zotero_collection_items[0])
622
+
623
+ #### Get item children (attachments)
624
+ # Listed items in zotero are items together with their attachments (pdf content)
625
+ ####
626
+ zotero_manager.get_item_children("2Q7HFERL")
627
+
628
+ #### Get an item full text ####
629
+ zotero_manager.get_item_full_text("BMYMEW76")["content"]
630
+
631
+ #### Save the item pdf content to disc ####
632
+ ## Function to save a pdf file
633
+ zotero_manager.save_item_file("BMYMEW76")
634
+
635
+ #### Export zotero collection items to json ####
636
+ ebora_virus_zotero_items_json = zotero_manager.zotero_items_to_json(
637
+ ebora_virus_zotero_collection_items
638
+ )
639
+ print(len(ebora_virus_zotero_items_json))
640
+ # print(ebora_virus_zotero_items_json[0])
641
+ ## Save to disc
642
+ zotero_manager.write_zotero_items_to_json_file(
643
+ ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json"
644
+ )