Spaces:
Sleeping
Sleeping
Patrick Walukagga
commited on
Commit
·
7d19cfc
1
Parent(s):
59d3a91
Adding zotero manager
Browse files- app.py +39 -1
- config.py +4 -5
- data/zotero-collection-pastan_zotero_items.json +0 -0
- requirements.txt +5 -1
- study_files.json +6 -0
- utils/helpers.py +88 -0
- utils/prompts.py +7 -2
- utils/zotero_manager.py +644 -0
app.py
CHANGED
@@ -1,11 +1,14 @@
|
|
1 |
import json
|
2 |
from typing import List, Tuple
|
|
|
3 |
|
4 |
import gradio as gr
|
|
|
|
|
5 |
|
6 |
from config import STUDY_FILES
|
7 |
from rag.rag_pipeline import RAGPipeline
|
8 |
-
from utils.helpers import generate_follow_up_questions
|
9 |
from utils.prompts import (
|
10 |
highlight_prompt,
|
11 |
evidence_based_prompt,
|
@@ -14,12 +17,45 @@ from utils.prompts import (
|
|
14 |
import openai
|
15 |
|
16 |
from config import STUDY_FILES, OPENAI_API_KEY
|
|
|
|
|
|
|
17 |
|
18 |
openai.api_key = OPENAI_API_KEY
|
19 |
|
20 |
# Cache for RAG pipelines
|
21 |
rag_cache = {}
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
25 |
"""Get or create a RAGPipeline instance for the given study."""
|
@@ -66,6 +102,8 @@ def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.upd
|
|
66 |
|
67 |
study_info = get_study_info(study_name)
|
68 |
questions = sample_questions.get(study_name, [])[:3]
|
|
|
|
|
69 |
visible_questions = [gr.update(visible=True, value=q) for q in questions]
|
70 |
hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
|
71 |
return (study_info, *visible_questions, *hidden_questions)
|
|
|
1 |
import json
|
2 |
from typing import List, Tuple
|
3 |
+
import os
|
4 |
|
5 |
import gradio as gr
|
6 |
+
from dotenv import load_dotenv
|
7 |
+
from slugify import slugify
|
8 |
|
9 |
from config import STUDY_FILES
|
10 |
from rag.rag_pipeline import RAGPipeline
|
11 |
+
from utils.helpers import generate_follow_up_questions, append_to_study_files
|
12 |
from utils.prompts import (
|
13 |
highlight_prompt,
|
14 |
evidence_based_prompt,
|
|
|
17 |
import openai
|
18 |
|
19 |
from config import STUDY_FILES, OPENAI_API_KEY
|
20 |
+
from utils.zotero_manager import ZoteroManager
|
21 |
+
|
22 |
+
load_dotenv()
|
23 |
|
24 |
openai.api_key = OPENAI_API_KEY
|
25 |
|
26 |
# Cache for RAG pipelines
|
27 |
rag_cache = {}
|
28 |
|
29 |
+
zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
|
30 |
+
zotero_library_type = "user" # or "group"
|
31 |
+
zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
|
32 |
+
|
33 |
+
zotero_manager = ZoteroManager(
|
34 |
+
zotero_library_id, zotero_library_type, zotero_api_access_key
|
35 |
+
)
|
36 |
+
|
37 |
+
zotero_collections = zotero_manager.get_collections()
|
38 |
+
zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
|
39 |
+
filtered_zotero_collection_lists = (
|
40 |
+
zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
|
41 |
+
)
|
42 |
+
|
43 |
+
for collection in filtered_zotero_collection_lists:
|
44 |
+
collection_name = collection.get("name")
|
45 |
+
if collection_name not in STUDY_FILES:
|
46 |
+
collection_key = collection.get("key")
|
47 |
+
collection_items = zotero_manager.get_collection_items(collection_key)
|
48 |
+
zotero_collection_items = (
|
49 |
+
zotero_manager.get_collection_zotero_items_by_key(collection_key)
|
50 |
+
)
|
51 |
+
#### Export zotero collection items to json ####
|
52 |
+
zotero_items_json = zotero_manager.zotero_items_to_json(zotero_collection_items)
|
53 |
+
export_file = f"{slugify(collection_name)}_zotero_items.json"
|
54 |
+
zotero_manager.write_zotero_items_to_json_file(
|
55 |
+
zotero_items_json, f"data/{export_file}"
|
56 |
+
)
|
57 |
+
append_to_study_files("study_files.json", collection_name, f"data/{export_file}")
|
58 |
+
|
59 |
|
60 |
def get_rag_pipeline(study_name: str) -> RAGPipeline:
|
61 |
"""Get or create a RAGPipeline instance for the given study."""
|
|
|
102 |
|
103 |
study_info = get_study_info(study_name)
|
104 |
questions = sample_questions.get(study_name, [])[:3]
|
105 |
+
if not questions:
|
106 |
+
questions = sample_questions.get("General", [])[:3]
|
107 |
visible_questions = [gr.update(visible=True, value=q) for q in questions]
|
108 |
hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
|
109 |
return (study_info, *visible_questions, *hidden_questions)
|
config.py
CHANGED
@@ -2,12 +2,11 @@ import os
|
|
2 |
|
3 |
from dotenv import load_dotenv
|
4 |
|
|
|
|
|
5 |
load_dotenv()
|
6 |
|
7 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
8 |
|
9 |
-
|
10 |
-
|
11 |
-
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
12 |
-
"Gene Xpert": "data/gene_xpert_zotero_items.json",
|
13 |
-
}
|
|
|
2 |
|
3 |
from dotenv import load_dotenv
|
4 |
|
5 |
+
from utils.helpers import read_study_files
|
6 |
+
|
7 |
load_dotenv()
|
8 |
|
9 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
10 |
|
11 |
+
|
12 |
+
STUDY_FILES = read_study_files(("study_files.json"))
|
|
|
|
|
|
data/zotero-collection-pastan_zotero_items.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
CHANGED
@@ -1,7 +1,11 @@
|
|
|
|
1 |
fastapi==0.112.2
|
2 |
gradio
|
3 |
llama-index
|
|
|
4 |
openai
|
5 |
pandas
|
6 |
pydantic
|
7 |
-
python-dotenv
|
|
|
|
|
|
1 |
+
chromadb==0.5.5
|
2 |
fastapi==0.112.2
|
3 |
gradio
|
4 |
llama-index
|
5 |
+
nest-asyncio==1.6.0
|
6 |
openai
|
7 |
pandas
|
8 |
pydantic
|
9 |
+
python-dotenv
|
10 |
+
pyzotero
|
11 |
+
python-slugify
|
study_files.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Vaccine coverage": "data/vaccine_coverage_zotero_items.json",
|
3 |
+
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
4 |
+
"GeneXpert": "data/gene_xpert_zotero_items.json",
|
5 |
+
"Zotero Collection Pastan": "data/zotero-collection-pastan_zotero_items.json"
|
6 |
+
}
|
utils/helpers.py
CHANGED
@@ -7,6 +7,94 @@ from utils.prompts import (
|
|
7 |
VaccineCoverageVariables,
|
8 |
StudyCharacteristics,
|
9 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
|
11 |
|
12 |
def generate_follow_up_questions(
|
|
|
7 |
VaccineCoverageVariables,
|
8 |
StudyCharacteristics,
|
9 |
)
|
10 |
+
import json
|
11 |
+
|
12 |
+
def read_study_files(file_path):
|
13 |
+
"""
|
14 |
+
Reads a JSON file and returns the parsed JSON data.
|
15 |
+
|
16 |
+
Args:
|
17 |
+
file_path (str): The path to the JSON file to be read.
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
dict: The data from the JSON file as a Python dictionary.
|
21 |
+
|
22 |
+
Raises:
|
23 |
+
FileNotFoundError: If the file is not found at the provided path.
|
24 |
+
json.JSONDecodeError: If the file contents are not valid JSON.
|
25 |
+
|
26 |
+
Example:
|
27 |
+
Given a JSON file 'study_files.json' with content like:
|
28 |
+
{
|
29 |
+
"Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
|
30 |
+
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
31 |
+
"Gene Xpert": "data/gene_xpert_zotero_items.json"
|
32 |
+
}
|
33 |
+
|
34 |
+
Calling `read_json_file("study_files.json")` will return:
|
35 |
+
{
|
36 |
+
"Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
|
37 |
+
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
38 |
+
"Gene Xpert": "data/gene_xpert_zotero_items.json"
|
39 |
+
}
|
40 |
+
"""
|
41 |
+
try:
|
42 |
+
with open(file_path, 'r') as file:
|
43 |
+
data = json.load(file)
|
44 |
+
return data
|
45 |
+
except FileNotFoundError as e:
|
46 |
+
raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
|
47 |
+
except json.JSONDecodeError as e:
|
48 |
+
raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
|
49 |
+
|
50 |
+
|
51 |
+
def append_to_study_files(file_path, new_key, new_value):
|
52 |
+
"""
|
53 |
+
Appends a new key-value entry to an existing JSON file.
|
54 |
+
|
55 |
+
Args:
|
56 |
+
file_path (str): The path to the JSON file.
|
57 |
+
new_key (str): The new key to add to the JSON file.
|
58 |
+
new_value (any): The value associated with the new key (can be any valid JSON data type).
|
59 |
+
|
60 |
+
Raises:
|
61 |
+
FileNotFoundError: If the file is not found at the provided path.
|
62 |
+
json.JSONDecodeError: If the file contents are not valid JSON.
|
63 |
+
IOError: If the file cannot be written.
|
64 |
+
|
65 |
+
Example:
|
66 |
+
If the file 'study_files.json' initially contains:
|
67 |
+
{
|
68 |
+
"Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
|
69 |
+
"Ebola Virus": "data/ebola_virus_zotero_items.json"
|
70 |
+
}
|
71 |
+
|
72 |
+
Calling `append_to_json_file("study_files.json", "Gene Xpert", "data/gene_xpert_zotero_items.json")`
|
73 |
+
will modify the file to:
|
74 |
+
{
|
75 |
+
"Vaccine Coverage": "data/vaccine_coverage_zotero_items.json",
|
76 |
+
"Ebola Virus": "data/ebola_virus_zotero_items.json",
|
77 |
+
"Gene Xpert": "data/gene_xpert_zotero_items.json"
|
78 |
+
}
|
79 |
+
"""
|
80 |
+
try:
|
81 |
+
# Read the existing data from the file
|
82 |
+
with open(file_path, 'r') as file:
|
83 |
+
data = json.load(file)
|
84 |
+
|
85 |
+
# Append the new key-value pair to the dictionary
|
86 |
+
data[new_key] = new_value
|
87 |
+
|
88 |
+
# Write the updated data back to the file
|
89 |
+
with open(file_path, 'w') as file:
|
90 |
+
json.dump(data, file, indent=4) # indent for pretty printing
|
91 |
+
|
92 |
+
except FileNotFoundError as e:
|
93 |
+
raise FileNotFoundError(f"The file at path {file_path} was not found.") from e
|
94 |
+
except json.JSONDecodeError as e:
|
95 |
+
raise ValueError(f"The file at path {file_path} does not contain valid JSON.") from e
|
96 |
+
except IOError as e:
|
97 |
+
raise IOError(f"Failed to write to the file at {file_path}.") from e
|
98 |
|
99 |
|
100 |
def generate_follow_up_questions(
|
utils/prompts.py
CHANGED
@@ -71,7 +71,7 @@ vaccine_coverage_prompt = PromptTemplate(
|
|
71 |
)
|
72 |
|
73 |
sample_questions = {
|
74 |
-
"Vaccine
|
75 |
"What are the vaccine coverage rates reported in the study?",
|
76 |
"Are there any reported adverse events following immunization (AEFI)?",
|
77 |
"How does the study account for different vaccine types or schedules?",
|
@@ -84,12 +84,17 @@ sample_questions = {
|
|
84 |
"Were there any ethical considerations or challenges reported?",
|
85 |
"Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
|
86 |
],
|
87 |
-
"
|
88 |
"What is the main objective of the study?",
|
89 |
"What is the study design?",
|
90 |
"What disease condition is being studied?",
|
91 |
"Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
|
92 |
],
|
|
|
|
|
|
|
|
|
|
|
93 |
}
|
94 |
|
95 |
|
|
|
71 |
)
|
72 |
|
73 |
sample_questions = {
|
74 |
+
"Vaccine coverage": [
|
75 |
"What are the vaccine coverage rates reported in the study?",
|
76 |
"Are there any reported adverse events following immunization (AEFI)?",
|
77 |
"How does the study account for different vaccine types or schedules?",
|
|
|
84 |
"Were there any ethical considerations or challenges reported?",
|
85 |
"Create a structured table for each Ebola virus study, including the following information: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, SAMPLE_SIZE, PLASMA_TYPE, DOSAGE, FREQUENCY, SIDE_EFFECTS, VIRAL_LOAD_CHANGE, SURVIVAL_RATE, INCLUSION_CRITERIA, EXCLUSION_CRITERIA, SUBGROUP_ANALYSES, FOLLOW_UP_DURATION, LONG_TERM_OUTCOMES, DISEASE_SEVERITY_ASSESSMENT, BIOSAFETY_MEASURES, ETHICAL_CONSIDERATIONS, and STUDY_COMMENTS.",
|
86 |
],
|
87 |
+
"GeneXpert": [
|
88 |
"What is the main objective of the study?",
|
89 |
"What is the study design?",
|
90 |
"What disease condition is being studied?",
|
91 |
"Extract and present in a tabular format the following variables for each Gene Xpert study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, DISEASE_CONDITION, OBJECTIVE, OUTCOME_MEASURES, SENSITIVITY, SPECIFICITY, COST_COMPARISON, TURNAROUND_TIME, IMPLEMENTATION_CHALLENGES, PERFORMANCE_VARIATIONS, QUALITY_CONTROL, EQUIPMENT_ISSUES, PATIENT_OUTCOME_IMPACT, TRAINING_REQUIREMENTS, SCALABILITY_CONSIDERATIONS, and STUDY_COMMENTS.",
|
92 |
],
|
93 |
+
"General": [
|
94 |
+
"What is the main objective of the study?",
|
95 |
+
"What is the study design?",
|
96 |
+
"Extract and present in a tabular format the following variables for each study: STUDYID, AUTHOR, YEAR, TITLE, PUBLICATION_TYPE, STUDY_DESIGN, STUDY_AREA_REGION, STUDY_POPULATION, OBJECTIVE, and STUDY_COMMENTS.",
|
97 |
+
],
|
98 |
}
|
99 |
|
100 |
|
utils/zotero_manager.py
ADDED
@@ -0,0 +1,644 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
from typing import Any, Dict, List, Optional
|
4 |
+
|
5 |
+
from dotenv import load_dotenv
|
6 |
+
from pydantic import BaseModel, Field
|
7 |
+
from pyzotero import zotero
|
8 |
+
from slugify import slugify
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
|
13 |
+
class ZoteroItem(BaseModel):
|
14 |
+
"""
|
15 |
+
Represents metadata about a Zotero item.
|
16 |
+
"""
|
17 |
+
|
18 |
+
key: str = Field(..., description="Unique key of the item")
|
19 |
+
title: str = Field(..., description="Title of the item")
|
20 |
+
abstract: Optional[str] = Field(None, description="Abstract or note of the item")
|
21 |
+
full_text: Optional[str] = Field(None, description="Full text of the item")
|
22 |
+
authors: Optional[List[str]] = Field(
|
23 |
+
None, description="List of authors"
|
24 |
+
) # Make optional
|
25 |
+
doi: Optional[str] = Field(None, description="Digital Object Identifier (DOI)")
|
26 |
+
year: Optional[int] = Field(None, description="Publication year")
|
27 |
+
item_type: Optional[str] = Field(
|
28 |
+
None, description="Type of the item (e.g., journalArticle)"
|
29 |
+
) # Make optional
|
30 |
+
url: Optional[str] = Field(None, description="URL of the item")
|
31 |
+
|
32 |
+
|
33 |
+
class ZoteroCollection(BaseModel):
|
34 |
+
"""
|
35 |
+
Represents a Zotero collection with metadata.
|
36 |
+
"""
|
37 |
+
|
38 |
+
key: str = Field(..., description="Unique identifier for the collection.")
|
39 |
+
name: str = Field(..., description="Name of the collection.")
|
40 |
+
number_of_items: int = Field(
|
41 |
+
..., description="Number of items contained in the collection."
|
42 |
+
)
|
43 |
+
|
44 |
+
|
45 |
+
class ZoteroManager:
|
46 |
+
'''
|
47 |
+
#### Example Usage ####
|
48 |
+
|
49 |
+
zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
|
50 |
+
zotero_library_type = "user" # or "group"
|
51 |
+
zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
|
52 |
+
|
53 |
+
zotero_manager = ZoteroManager(zotero_library_id, zotero_library_type, zotero_api_access_key)
|
54 |
+
|
55 |
+
#### GET Zotero topics (Collections) ####
|
56 |
+
zotero_collections = zotero_manager.get_collections()
|
57 |
+
# print(zotero_collections)
|
58 |
+
|
59 |
+
#### Zotero collections parsed with pydantic ####
|
60 |
+
zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
|
61 |
+
# print(zotero_collection_lists)
|
62 |
+
"""
|
63 |
+
[
|
64 |
+
ZoteroCollection(key='IXU5ZWRM', name='RR 10', number_of_items=0),
|
65 |
+
ZoteroCollection(key='G6AZZGPQ', name='RR 9', number_of_items=0),
|
66 |
+
ZoteroCollection(key='DZ45SJHF', name='RR 8', number_of_items=0),
|
67 |
+
ZoteroCollection(key='DM5FVG74', name='RR 7', number_of_items=0),
|
68 |
+
ZoteroCollection(key='43N5CI48', name='RR 6', number_of_items=0),
|
69 |
+
ZoteroCollection(key='2TCX6JC2', name='RR 5', number_of_items=0),
|
70 |
+
ZoteroCollection(key='QVSNAJWV', name='RR 4', number_of_items=0),
|
71 |
+
ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17),
|
72 |
+
ZoteroCollection(key='UB7AEMB6', name='GeneXpert', number_of_items=31),
|
73 |
+
ZoteroCollection(key='UDQ9JSD9', name='Vaccine coverage', number_of_items=22),
|
74 |
+
ZoteroCollection(key='SGNLNIAT', name='Zotero Collection Pastan', number_of_items=227)
|
75 |
+
]
|
76 |
+
"""
|
77 |
+
|
78 |
+
#### Collections with items ####
|
79 |
+
filtered_zotero_collection_lists = zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
|
80 |
+
# print(filtered_zotero_collection_lists)
|
81 |
+
"""
|
82 |
+
[
|
83 |
+
{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17},
|
84 |
+
{'key': 'UB7AEMB6', 'name': 'GeneXpert', 'number_of_items': 31},
|
85 |
+
{'key': 'UDQ9JSD9', 'name': 'Vaccine coverage', 'number_of_items': 22},
|
86 |
+
{'key': 'SGNLNIAT',
|
87 |
+
'name': 'Zotero Collection Pastan',
|
88 |
+
'number_of_items': 227}
|
89 |
+
]
|
90 |
+
"""
|
91 |
+
|
92 |
+
#### Collection by name from a list of zotero collections
|
93 |
+
ebola_virus_collection = zotero_manager.find_zotero_collection_by_name(zotero_collection_lists, "Ebola Virus")
|
94 |
+
# print(ebola_virus_collection)
|
95 |
+
"""ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17)"""
|
96 |
+
# print(ebola_virus_collection.model_dump())
|
97 |
+
"""{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17}"""
|
98 |
+
|
99 |
+
#### Get single collection by key ####
|
100 |
+
ebola_virus_collection_key = "96UJANPP" # Ebola Virus
|
101 |
+
ebola_virus_collection = zotero_manager.get_collection_by_key(ebola_virus_collection_key)
|
102 |
+
# print(ebola_virus_collection)
|
103 |
+
"""
|
104 |
+
{
|
105 |
+
'key': '96UJANPP',
|
106 |
+
'version': 72,
|
107 |
+
'library': {'type': 'user',
|
108 |
+
'id': 11201324,
|
109 |
+
'name': 'pjlus',
|
110 |
+
'links': {'alternate': {'href': 'https://www.zotero.org/pjlus',
|
111 |
+
'type': 'text/html'}}},
|
112 |
+
'links': {'self': {'href': 'https://api.zotero.org/users/11201324/collections/96UJANPP',
|
113 |
+
'type': 'application/json'},
|
114 |
+
'alternate': {'href': 'https://www.zotero.org/pjlus/collections/96UJANPP',
|
115 |
+
'type': 'text/html'}},
|
116 |
+
'meta': {'numCollections': 0, 'numItems': 17},
|
117 |
+
'data': {'key': '96UJANPP',
|
118 |
+
'version': 72,
|
119 |
+
'name': 'Ebola Virus',
|
120 |
+
'parentCollection': False,
|
121 |
+
'relations': {}}
|
122 |
+
}
|
123 |
+
"""
|
124 |
+
|
125 |
+
#### Get collection items by collection key ####
|
126 |
+
ebora_virus_collection_items = zotero_manager.get_collection_items(ebola_virus_collection_key)
|
127 |
+
print(len(ebora_virus_collection_items))
|
128 |
+
# print(ebora_virus_collection_items[:2])
|
129 |
+
|
130 |
+
#### Getting zotero collection items and full text
|
131 |
+
# Here the collections have been parsed using the zotero item pydantic model defined in the zotero manager.
|
132 |
+
####
|
133 |
+
ebora_virus_zotero_collection_items = zotero_manager.get_collection_zotero_items_by_key(ebola_virus_collection_key)
|
134 |
+
# print(len(ebora_virus_zotero_collection_items))
|
135 |
+
# print(ebora_virus_zotero_collection_items[0])
|
136 |
+
|
137 |
+
#### Get item children (attachments)
|
138 |
+
# Listed items in zotero are items together with their attachments (pdf content)
|
139 |
+
####
|
140 |
+
zotero_manager.get_item_children("2Q7HFERL")
|
141 |
+
|
142 |
+
#### Get an item full text ####
|
143 |
+
zotero_manager.get_item_full_text("BMYMEW76")["content"]
|
144 |
+
|
145 |
+
#### Save the item pdf content to disc ####
|
146 |
+
## Function to save a pdf file
|
147 |
+
zotero_manager.save_item_file("BMYMEW76")
|
148 |
+
|
149 |
+
#### Export zotero collection items to json ####
|
150 |
+
ebora_virus_zotero_items_json = zotero_manager.zotero_items_to_json(ebora_virus_zotero_collection_items)
|
151 |
+
print(len(ebora_virus_zotero_items_json))
|
152 |
+
# print(ebora_virus_zotero_items_json[0])
|
153 |
+
## Save to disc
|
154 |
+
zotero_manager.write_zotero_items_to_json_file(ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json")
|
155 |
+
'''
|
156 |
+
|
157 |
+
def __init__(self, library_id: str, library_type: str, api_key: str):
|
158 |
+
self.zot = zotero.Zotero(library_id, library_type, api_key)
|
159 |
+
|
160 |
+
def create_zotero_item_from_json(self, json_obj: Dict[str, Any]) -> ZoteroItem:
|
161 |
+
"""
|
162 |
+
Creates a ZoteroItem instance from a JSON object.
|
163 |
+
|
164 |
+
Args:
|
165 |
+
json_obj (Dict[str, Any]): A JSON object containing the Zotero item data.
|
166 |
+
The JSON structure is expected to have a 'data' field which includes
|
167 |
+
the metadata for the Zotero item.
|
168 |
+
|
169 |
+
Returns:
|
170 |
+
ZoteroItem: An instance of ZoteroItem populated with the data extracted
|
171 |
+
from the JSON object. The fields include key, title, abstract, authors,
|
172 |
+
doi, year, item_type, and url.
|
173 |
+
"""
|
174 |
+
data = json_obj.get("data", {})
|
175 |
+
|
176 |
+
# Extract item full text from it's attachement
|
177 |
+
key = data.get("key")
|
178 |
+
full_text = self.get_full_text_from_children(key)
|
179 |
+
|
180 |
+
# Extract the list of authors
|
181 |
+
authors = [
|
182 |
+
f"{creator.get('name', '')} {creator.get('firstName', '')} {creator.get('lastName', '')}".strip()
|
183 |
+
for creator in data.get("creators", [])
|
184 |
+
if creator.get("creatorType") == "author"
|
185 |
+
]
|
186 |
+
|
187 |
+
# Create the ZoteroItem instance
|
188 |
+
zotero_item = ZoteroItem(
|
189 |
+
key=data.get("key"),
|
190 |
+
title=data.get("title"),
|
191 |
+
abstract=data.get("abstractNote"),
|
192 |
+
full_text=full_text,
|
193 |
+
authors=authors,
|
194 |
+
doi=data.get("DOI"),
|
195 |
+
# year=int(data.get('date', '').split('-')[0]) if data.get('date') else None,
|
196 |
+
item_type=data.get("itemType"),
|
197 |
+
url=data.get("url"),
|
198 |
+
)
|
199 |
+
|
200 |
+
return zotero_item
|
201 |
+
|
202 |
+
def create_zotero_collection(
|
203 |
+
self, collection_dict: Dict[str, Any]
|
204 |
+
) -> ZoteroCollection:
|
205 |
+
"""
|
206 |
+
Converts a dictionary representing a Zotero collection into a ZoteroCollection instance.
|
207 |
+
|
208 |
+
Args:
|
209 |
+
collection_dict (Dict[str, Any]): A dictionary containing data for a Zotero collection.
|
210 |
+
The expected structure includes keys 'data' and 'meta' from which relevant fields
|
211 |
+
such as 'key', 'name', and 'numItems' are extracted.
|
212 |
+
|
213 |
+
Returns:
|
214 |
+
ZoteroCollection: An instance of ZoteroCollection populated with the data extracted
|
215 |
+
from the input dictionary.
|
216 |
+
"""
|
217 |
+
data = collection_dict.get("data", {})
|
218 |
+
meta = collection_dict.get("meta", {})
|
219 |
+
|
220 |
+
zotero_collection = ZoteroCollection(
|
221 |
+
key=data.get("key"),
|
222 |
+
name=data.get("name"),
|
223 |
+
number_of_items=meta.get("numItems", 0),
|
224 |
+
)
|
225 |
+
|
226 |
+
return zotero_collection
|
227 |
+
|
228 |
+
def list_zotero_collections(
|
229 |
+
self, collection_items: List[Dict[str, Any]]
|
230 |
+
) -> List[ZoteroCollection]:
|
231 |
+
"""
|
232 |
+
Converts a list of dictionaries representing Zotero collections into a list of ZoteroCollection instances.
|
233 |
+
|
234 |
+
Args:
|
235 |
+
collection_items (List[Dict[str, Any]]): A list of collection items, each containing data for a Zotero collection.
|
236 |
+
Each dictionary is expected to have a 'data' key with nested 'key' and 'name' fields, and a 'meta' key
|
237 |
+
with a 'numItems' field.
|
238 |
+
|
239 |
+
Returns:
|
240 |
+
List[ZoteroCollection]: A list of ZoteroCollection instances populated with the data extracted
|
241 |
+
from the input dictionaries.
|
242 |
+
"""
|
243 |
+
collections = [
|
244 |
+
self.create_zotero_collection(collection_item)
|
245 |
+
for collection_item in collection_items
|
246 |
+
]
|
247 |
+
return collections
|
248 |
+
|
249 |
+
def list_all_papers(self) -> List[ZoteroItem]:
|
250 |
+
"""
|
251 |
+
Lists all papers (journal articles) in your Zotero library.
|
252 |
+
|
253 |
+
Returns:
|
254 |
+
List of ZoteroItem objects representing the papers in your library.
|
255 |
+
"""
|
256 |
+
# print(self.zot.items())
|
257 |
+
results = self.zot.items(itemType="journalArticle")
|
258 |
+
# print(f"results: {results}")
|
259 |
+
|
260 |
+
papers = []
|
261 |
+
|
262 |
+
for item in results:
|
263 |
+
zotero_item = self.create_zotero_item_from_json(item)
|
264 |
+
papers.append(zotero_item)
|
265 |
+
|
266 |
+
return papers
|
267 |
+
|
268 |
+
def list_items(self, limit: int = 5):
|
269 |
+
return self.zot.items(limit=limit)
|
270 |
+
|
271 |
+
def query_items(self, query: str, limit: int = 10) -> List[ZoteroItem]:
|
272 |
+
"""
|
273 |
+
Queries Zotero for items matching the given query.
|
274 |
+
|
275 |
+
Args:
|
276 |
+
query: The search query.
|
277 |
+
limit: Maximum number of items to return.
|
278 |
+
|
279 |
+
Returns:
|
280 |
+
List of ZoteroItem objects representing the search results.
|
281 |
+
"""
|
282 |
+
results = self.zot.items(q=query, limit=limit)
|
283 |
+
|
284 |
+
return [
|
285 |
+
self.create_zotero_item_from_json(item) for item in results
|
286 |
+
] # Use ** to unpack the dictionary
|
287 |
+
|
288 |
+
def get_item_by_key(self, key: str) -> ZoteroItem:
|
289 |
+
"""
|
290 |
+
Retrieves a Zotero item by its key.
|
291 |
+
|
292 |
+
Args:
|
293 |
+
key: The unique key of the item.
|
294 |
+
|
295 |
+
Returns:
|
296 |
+
ZoteroItem object representing the retrieved item.
|
297 |
+
"""
|
298 |
+
item = self.zot.item(key)
|
299 |
+
return self.create_zotero_item_from_json(item)
|
300 |
+
|
301 |
+
def get_item_by_doi(self, doi: str) -> Optional[ZoteroItem]:
|
302 |
+
"""
|
303 |
+
Searches for a Zotero item by its DOI.
|
304 |
+
|
305 |
+
Args:
|
306 |
+
doi: The DOI of the item.
|
307 |
+
|
308 |
+
Returns:
|
309 |
+
ZoteroItem object if found, otherwise None.
|
310 |
+
"""
|
311 |
+
results = self.zot.items(q=doi)
|
312 |
+
for item in results:
|
313 |
+
if item["data"].get("DOI") == doi:
|
314 |
+
self.create_zotero_item_from_json(item)
|
315 |
+
return None
|
316 |
+
|
317 |
+
def get_item_tags(self, item_key: str) -> List[str]:
|
318 |
+
"""
|
319 |
+
Retrieves the tags associated with a Zotero item.
|
320 |
+
|
321 |
+
Args:
|
322 |
+
item_key: The unique key of the item.
|
323 |
+
|
324 |
+
Returns:
|
325 |
+
List of strings representing the tags associated with the item.
|
326 |
+
"""
|
327 |
+
return self.zot.item_tags(item_key)
|
328 |
+
|
329 |
+
def get_collections(self) -> List[Dict[str, Any]]:
|
330 |
+
"""
|
331 |
+
Retrieves the list of collections in your Zotero library.
|
332 |
+
|
333 |
+
Returns:
|
334 |
+
List of dictionaries representing the collections.
|
335 |
+
"""
|
336 |
+
return self.zot.collections()
|
337 |
+
|
338 |
+
def get_collection_by_key(self, collection_key: str) -> Dict[str, Any]:
|
339 |
+
"""
|
340 |
+
Retrieves a collection by its key.
|
341 |
+
|
342 |
+
Args:
|
343 |
+
collection_key: The unique key of the collection.
|
344 |
+
|
345 |
+
Returns:
|
346 |
+
Dictionary representing the collection.
|
347 |
+
"""
|
348 |
+
return self.zot.collection(collection_key)
|
349 |
+
|
350 |
+
def get_collection_items(self, collection_key: str) -> List[Dict[str, Any]]:
|
351 |
+
"""
|
352 |
+
Retrieves the items in a collection.
|
353 |
+
|
354 |
+
Args:
|
355 |
+
collection_key: The unique key of the collection.
|
356 |
+
|
357 |
+
Returns:
|
358 |
+
List of dictionaries representing the items in the collection.
|
359 |
+
"""
|
360 |
+
return self.zot.collection_items(collection_key, itemType="journalArticle")
|
361 |
+
|
362 |
+
def get_item_children(self, item_key: str) -> List[Dict[str, Any]]:
|
363 |
+
"""
|
364 |
+
Retrieves the children of a Zotero item.
|
365 |
+
|
366 |
+
Args:
|
367 |
+
item_key: The unique key of the item.
|
368 |
+
|
369 |
+
Returns:
|
370 |
+
List of dictionaries representing the children of the item.
|
371 |
+
"""
|
372 |
+
return self.zot.children(item_key)
|
373 |
+
|
374 |
+
def get_collection_zotero_items_by_key(
|
375 |
+
self, collection_key: str
|
376 |
+
) -> List[ZoteroItem]:
|
377 |
+
"""
|
378 |
+
Retrieves the items in a collection.
|
379 |
+
|
380 |
+
Args:
|
381 |
+
collection_key: The unique key of the collection.
|
382 |
+
|
383 |
+
Returns:
|
384 |
+
List of ZoteroItem objects representing the items in the collection.
|
385 |
+
"""
|
386 |
+
items = self.zot.collection_items(collection_key, itemType="journalArticle")
|
387 |
+
return [self.create_zotero_item_from_json(item) for item in items]
|
388 |
+
|
389 |
+
def filter_and_return_collections_with_items(
|
390 |
+
self, zotero_collections: List[ZoteroCollection]
|
391 |
+
) -> List[Dict[str, Any]]:
|
392 |
+
"""
|
393 |
+
Filters a list of ZoteroCollection instances to return only those with more than one item,
|
394 |
+
and returns them as a list of dictionaries.
|
395 |
+
|
396 |
+
Args:
|
397 |
+
zotero_collections (List[CollectionModel]): A list of CollectionModel instances.
|
398 |
+
|
399 |
+
Returns:
|
400 |
+
List[Dict[str, Any]]: A list of dictionaries representing collections with more than one item.
|
401 |
+
"""
|
402 |
+
filtered_collections = [
|
403 |
+
collection.model_dump()
|
404 |
+
for collection in zotero_collections
|
405 |
+
if collection.number_of_items > 0
|
406 |
+
]
|
407 |
+
return filtered_collections
|
408 |
+
|
409 |
+
def find_zotero_collection_by_name(
|
410 |
+
self, zotero_collections: List[ZoteroCollection], name: str
|
411 |
+
) -> ZoteroCollection:
|
412 |
+
"""
|
413 |
+
Finds and returns a ZoteroCollection instance by its name.
|
414 |
+
|
415 |
+
Args:
|
416 |
+
zotero_collections (List[CollectionModel]): A list of CollectionModel instances.
|
417 |
+
name (str): The name of the collection to find.
|
418 |
+
|
419 |
+
Returns:
|
420 |
+
ZoteroCollection: The ZoteroCollection instance that matches the given name.
|
421 |
+
|
422 |
+
Raises:
|
423 |
+
ValueError: If no collection with the given name is found.
|
424 |
+
"""
|
425 |
+
for collection in zotero_collections:
|
426 |
+
if collection.name == name:
|
427 |
+
return collection
|
428 |
+
raise ValueError(f"Collection with name '{name}' not found.")
|
429 |
+
|
430 |
+
def zotero_items_to_json(
|
431 |
+
self, zotero_items: List[ZoteroItem]
|
432 |
+
) -> List[Dict[str, Any]]:
|
433 |
+
"""
|
434 |
+
Converts a list of ZoteroItem instances into a JSON-compatible list of dictionaries.
|
435 |
+
|
436 |
+
Args:
|
437 |
+
zotero_items (List[ZoteroItem]): A list of ZoteroItem instances.
|
438 |
+
|
439 |
+
Returns:
|
440 |
+
List[Dict[str, Any]]: A list of dictionaries representing the Zotero items.
|
441 |
+
Each dictionary is a JSON-compatible representation of a ZoteroItem.
|
442 |
+
"""
|
443 |
+
items = [item for item in zotero_items if item.abstract or item.full_text]
|
444 |
+
|
445 |
+
return [item.model_dump() for item in items]
|
446 |
+
|
447 |
+
def write_zotero_items_to_json_file(
|
448 |
+
self, zotero_items_json: List[Dict[str, Any]], file_path: str
|
449 |
+
) -> None:
|
450 |
+
"""
|
451 |
+
Writes a JSON object of Zotero items to a JSON file.
|
452 |
+
|
453 |
+
Args:
|
454 |
+
zotero_items_json (List[Dict[str, Any]]): A JSON-compatible list of dictionaries
|
455 |
+
representing Zotero items.
|
456 |
+
file_path (str): The file path where the JSON file should be written.
|
457 |
+
|
458 |
+
Returns:
|
459 |
+
None
|
460 |
+
"""
|
461 |
+
with open(file_path, "w") as json_file:
|
462 |
+
json.dump(zotero_items_json, json_file, indent=2)
|
463 |
+
|
464 |
+
def get_item_full_text(self, key: str) -> Optional[dict]:
|
465 |
+
"""
|
466 |
+
Retrieves an item by its key and dumps it file.
|
467 |
+
|
468 |
+
Args:
|
469 |
+
key: The unique key of the item.
|
470 |
+
|
471 |
+
Returns:
|
472 |
+
A dictionary containing the metadata for full text:
|
473 |
+
"""
|
474 |
+
|
475 |
+
try:
|
476 |
+
return self.zot.fulltext_item(key)
|
477 |
+
except Exception as e:
|
478 |
+
print(f"Error: {str(e)}")
|
479 |
+
return None
|
480 |
+
|
481 |
+
def get_full_text_from_children(self, key: str) -> Optional[str]:
|
482 |
+
"""
|
483 |
+
Retrieves an item by its key and dumps it file.
|
484 |
+
|
485 |
+
Args:
|
486 |
+
key: The unique key of the item.
|
487 |
+
|
488 |
+
Returns:
|
489 |
+
A text containing the metadata for full text:
|
490 |
+
"""
|
491 |
+
children_items = self.get_item_children(key)
|
492 |
+
full_text = ""
|
493 |
+
if children_items:
|
494 |
+
for item in children_items:
|
495 |
+
if item.get("data", {}).get("itemType") == "attachment":
|
496 |
+
content_dict = self.get_item_full_text(
|
497 |
+
item.get("data", {}).get("key", "")
|
498 |
+
)
|
499 |
+
if content_dict is not None:
|
500 |
+
content = content_dict.get("content", "")
|
501 |
+
full_text += content + "\n"
|
502 |
+
|
503 |
+
return full_text
|
504 |
+
|
505 |
+
def save_item_file(self, key: str) -> None:
|
506 |
+
"""
|
507 |
+
Retrieves an item by its key and dumps it file.
|
508 |
+
|
509 |
+
Args:
|
510 |
+
key: The unique key of the item.
|
511 |
+
"""
|
512 |
+
item = self.zot.item(key)
|
513 |
+
zotero_item = self.create_zotero_item_from_json(item)
|
514 |
+
item_title = slugify(zotero_item.title)
|
515 |
+
try:
|
516 |
+
self.zot.dump(key, f"{item_title}.pdf", "zotero_data")
|
517 |
+
except Exception as e:
|
518 |
+
print(f"Error: {str(e)}")
|
519 |
+
|
520 |
+
|
521 |
+
if __name__ == "__main__":
|
522 |
+
"""Sample driver code"""
|
523 |
+
zotero_library_id = os.getenv("ZOTERO_LIBRARY_ID")
|
524 |
+
zotero_library_type = "user" # or "group"
|
525 |
+
zotero_api_access_key = os.getenv("ZOTERO_API_ACCESS_KEY")
|
526 |
+
|
527 |
+
zotero_manager = ZoteroManager(
|
528 |
+
zotero_library_id, zotero_library_type, zotero_api_access_key
|
529 |
+
)
|
530 |
+
|
531 |
+
#### GET Zotero topics (Collections) ####
|
532 |
+
zotero_collections = zotero_manager.get_collections()
|
533 |
+
# print(zotero_collections)
|
534 |
+
|
535 |
+
#### Zotero collections parsed with pydantic ####
|
536 |
+
zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
|
537 |
+
# print(zotero_collection_lists)
|
538 |
+
"""
|
539 |
+
[
|
540 |
+
ZoteroCollection(key='IXU5ZWRM', name='RR 10', number_of_items=0),
|
541 |
+
ZoteroCollection(key='G6AZZGPQ', name='RR 9', number_of_items=0),
|
542 |
+
ZoteroCollection(key='DZ45SJHF', name='RR 8', number_of_items=0),
|
543 |
+
ZoteroCollection(key='DM5FVG74', name='RR 7', number_of_items=0),
|
544 |
+
ZoteroCollection(key='43N5CI48', name='RR 6', number_of_items=0),
|
545 |
+
ZoteroCollection(key='2TCX6JC2', name='RR 5', number_of_items=0),
|
546 |
+
ZoteroCollection(key='QVSNAJWV', name='RR 4', number_of_items=0),
|
547 |
+
ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17),
|
548 |
+
ZoteroCollection(key='UB7AEMB6', name='GeneXpert', number_of_items=31),
|
549 |
+
ZoteroCollection(key='UDQ9JSD9', name='Vaccine coverage', number_of_items=22),
|
550 |
+
ZoteroCollection(key='SGNLNIAT', name='Zotero Collection Pastan', number_of_items=227)
|
551 |
+
]
|
552 |
+
"""
|
553 |
+
|
554 |
+
#### Collections with items ####
|
555 |
+
filtered_zotero_collection_lists = (
|
556 |
+
zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
|
557 |
+
)
|
558 |
+
# print(filtered_zotero_collection_lists)
|
559 |
+
"""
|
560 |
+
[
|
561 |
+
{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17},
|
562 |
+
{'key': 'UB7AEMB6', 'name': 'GeneXpert', 'number_of_items': 31},
|
563 |
+
{'key': 'UDQ9JSD9', 'name': 'Vaccine coverage', 'number_of_items': 22},
|
564 |
+
{'key': 'SGNLNIAT',
|
565 |
+
'name': 'Zotero Collection Pastan',
|
566 |
+
'number_of_items': 227}
|
567 |
+
]
|
568 |
+
"""
|
569 |
+
|
570 |
+
#### Collection by name from a list of zotero collections
|
571 |
+
ebola_virus_collection = zotero_manager.find_zotero_collection_by_name(
|
572 |
+
zotero_collection_lists, "Ebola Virus"
|
573 |
+
)
|
574 |
+
# print(ebola_virus_collection)
|
575 |
+
"""ZoteroCollection(key='96UJANPP', name='Ebola Virus', number_of_items=17)"""
|
576 |
+
# print(ebola_virus_collection.model_dump())
|
577 |
+
"""{'key': '96UJANPP', 'name': 'Ebola Virus', 'number_of_items': 17}"""
|
578 |
+
|
579 |
+
#### Get single collection by key ####
|
580 |
+
ebola_virus_collection_key = "96UJANPP" # Ebola Virus
|
581 |
+
ebola_virus_collection = zotero_manager.get_collection_by_key(
|
582 |
+
ebola_virus_collection_key
|
583 |
+
)
|
584 |
+
# print(ebola_virus_collection)
|
585 |
+
"""
|
586 |
+
{
|
587 |
+
'key': '96UJANPP',
|
588 |
+
'version': 72,
|
589 |
+
'library': {'type': 'user',
|
590 |
+
'id': 11201324,
|
591 |
+
'name': 'pjlus',
|
592 |
+
'links': {'alternate': {'href': 'https://www.zotero.org/pjlus',
|
593 |
+
'type': 'text/html'}}},
|
594 |
+
'links': {'self': {'href': 'https://api.zotero.org/users/11201324/collections/96UJANPP',
|
595 |
+
'type': 'application/json'},
|
596 |
+
'alternate': {'href': 'https://www.zotero.org/pjlus/collections/96UJANPP',
|
597 |
+
'type': 'text/html'}},
|
598 |
+
'meta': {'numCollections': 0, 'numItems': 17},
|
599 |
+
'data': {'key': '96UJANPP',
|
600 |
+
'version': 72,
|
601 |
+
'name': 'Ebola Virus',
|
602 |
+
'parentCollection': False,
|
603 |
+
'relations': {}}
|
604 |
+
}
|
605 |
+
"""
|
606 |
+
|
607 |
+
#### Get collection items by collection key ####
|
608 |
+
ebora_virus_collection_items = zotero_manager.get_collection_items(
|
609 |
+
ebola_virus_collection_key
|
610 |
+
)
|
611 |
+
print(len(ebora_virus_collection_items))
|
612 |
+
# print(ebora_virus_collection_items[:2])
|
613 |
+
|
614 |
+
#### Getting zotero collection items and full text
|
615 |
+
# Here the collections have been parsed using the zotero item pydantic model defined in the zotero manager.
|
616 |
+
####
|
617 |
+
ebora_virus_zotero_collection_items = (
|
618 |
+
zotero_manager.get_collection_zotero_items_by_key(ebola_virus_collection_key)
|
619 |
+
)
|
620 |
+
# print(len(ebora_virus_zotero_collection_items))
|
621 |
+
# print(ebora_virus_zotero_collection_items[0])
|
622 |
+
|
623 |
+
#### Get item children (attachments)
|
624 |
+
# Listed items in zotero are items together with their attachments (pdf content)
|
625 |
+
####
|
626 |
+
zotero_manager.get_item_children("2Q7HFERL")
|
627 |
+
|
628 |
+
#### Get an item full text ####
|
629 |
+
zotero_manager.get_item_full_text("BMYMEW76")["content"]
|
630 |
+
|
631 |
+
#### Save the item pdf content to disc ####
|
632 |
+
## Function to save a pdf file
|
633 |
+
zotero_manager.save_item_file("BMYMEW76")
|
634 |
+
|
635 |
+
#### Export zotero collection items to json ####
|
636 |
+
ebora_virus_zotero_items_json = zotero_manager.zotero_items_to_json(
|
637 |
+
ebora_virus_zotero_collection_items
|
638 |
+
)
|
639 |
+
print(len(ebora_virus_zotero_items_json))
|
640 |
+
# print(ebora_virus_zotero_items_json[0])
|
641 |
+
## Save to disc
|
642 |
+
zotero_manager.write_zotero_items_to_json_file(
|
643 |
+
ebora_virus_zotero_items_json, "zotero_data/ebora_virus_zotero_items.json"
|
644 |
+
)
|