acres / app.py
Patrick Walukagga
chroma integration
af11e83
raw
history blame
12.5 kB
import json
from typing import List, Tuple
import os
import logging
import gradio as gr
from dotenv import load_dotenv
from slugify import slugify
from rag.rag_pipeline import RAGPipeline
from utils.helpers import generate_follow_up_questions, append_to_study_files, add_study_files_to_chromadb, chromadb_client
from utils.prompts import (
highlight_prompt,
evidence_based_prompt,
sample_questions,
)
import openai
from config import STUDY_FILES, OPENAI_API_KEY
from utils.zotero_manager import ZoteroManager
load_dotenv()
logging.basicConfig(level=logging.INFO)
openai.api_key = OPENAI_API_KEY
# After loop, add all collected data to ChromaDB
add_study_files_to_chromadb("study_files.json", "study_files_collection")
# Cache for RAG pipelines
rag_cache = {}
def process_zotero_library_items(zotero_library_id: str, zotero_api_access_key: str) -> str:
if not zotero_library_id or not zotero_api_access_key:
return "Please enter your zotero library Id and API Access Key"
zotero_library_id = zotero_library_id
zotero_library_type = "user" # or "group"
zotero_api_access_key = zotero_api_access_key
message = ""
try:
zotero_manager = ZoteroManager(
zotero_library_id, zotero_library_type, zotero_api_access_key
)
zotero_collections = zotero_manager.get_collections()
zotero_collection_lists = zotero_manager.list_zotero_collections(zotero_collections)
filtered_zotero_collection_lists = (
zotero_manager.filter_and_return_collections_with_items(zotero_collection_lists)
)
study_files_data = {} # Dictionary to collect items for ChromaDB
for collection in filtered_zotero_collection_lists:
collection_name = collection.get("name")
if collection_name not in STUDY_FILES:
collection_key = collection.get("key")
collection_items = zotero_manager.get_collection_items(collection_key)
zotero_collection_items = (
zotero_manager.get_collection_zotero_items_by_key(collection_key)
)
#### Export zotero collection items to json ####
zotero_items_json = zotero_manager.zotero_items_to_json(zotero_collection_items)
export_file = f"{slugify(collection_name)}_zotero_items.json"
zotero_manager.write_zotero_items_to_json_file(
zotero_items_json, f"data/{export_file}"
)
append_to_study_files("study_files.json", collection_name, f"data/{export_file}")
# Collect for ChromaDB
study_files_data[collection_name] = f"data/{export_file}"
# Update in-memory STUDY_FILES for reference in current session
STUDY_FILES.update({collection_name: f"data/{export_file}"})
logging.info(f"STUDY_FILES: {STUDY_FILES}")
# After loop, add all collected data to ChromaDB
add_study_files_to_chromadb("study_files.json", "study_files_collection")
message = "Successfully processed items in your zotero library"
except Exception as e:
message = f"Error process your zotero library: {str(e)}"
return message
def get_rag_pipeline(study_name: str) -> RAGPipeline:
"""Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
if study_name not in rag_cache:
# Query ChromaDB for the study file path by ID
collection = chromadb_client.get_or_create_collection("study_files_collection")
result = collection.get(ids=[study_name]) # Retrieve document by ID
# Check if the result contains the requested document
if not result or len(result['metadatas']) == 0:
raise ValueError(f"Invalid study name: {study_name}")
# Extract the file path from the document metadata
study_file = result['metadatas'][0].get("file_path")
if not study_file:
raise ValueError(f"File path not found for study name: {study_name}")
# Create and cache the RAGPipeline instance
rag_cache[study_name] = RAGPipeline(study_file)
return rag_cache[study_name]
def chat_function(
message: str, study_name: str, prompt_type: str
) -> str:
"""Process a chat message and generate a response using the RAG pipeline."""
if not message.strip():
return "Please enter a valid query."
rag = get_rag_pipeline(study_name)
logging.info(f"rag: ==> {rag}")
prompt = {
"Highlight": highlight_prompt,
"Evidence-based": evidence_based_prompt,
}.get(prompt_type)
response = rag.query(message, prompt_template=prompt)
return response.response
def get_study_info(study_name: str) -> str:
"""Retrieve information about the specified study."""
collection = chromadb_client.get_or_create_collection("study_files_collection")
result = collection.get(ids=[study_name]) # Query by study name (as a list)
logging.info(f"Result: ======> {result}")
# Check if the document exists in the result
if not result or len(result['metadatas']) == 0:
raise ValueError(f"Invalid study name: {study_name}")
# Extract the file path from the document metadata
study_file = result['metadatas'][0].get("file_path")
logging.info(f"study_file: =======> {study_file}")
if not study_file:
raise ValueError(f"File path not found for study name: {study_name}")
with open(study_file, "r") as f:
data = json.load(f)
return f"### Number of documents: {len(data)}"
def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.update]:
"""Update the interface based on the selected study."""
study_info = get_study_info(study_name)
questions = sample_questions.get(study_name, [])[:3]
if not questions:
questions = sample_questions.get("General", [])[:3]
visible_questions = [gr.update(visible=True, value=q) for q in questions]
hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
return (study_info, *visible_questions, *hidden_questions)
def set_question(question: str) -> str:
return question.lstrip("✨ ")
def process_multi_input(text, study_name, prompt_type):
# Split input based on commas and strip any extra spaces
variable_list = [word.strip().upper() for word in text.split(',')]
user_message =f"Extract and present in a tabular format the following variables for each {study_name} study: {', '.join(variable_list)}"
logging.info(f"User message: ==> {user_message}")
response = chat_function(user_message, study_name, prompt_type)
return response
def create_gr_interface() -> gr.Blocks:
"""
Create and configure the Gradio interface for the RAG platform.
This function sets up the entire user interface, including:
- Chat interface with message input and display
- Study selection dropdown
- Sample and follow-up question buttons
- Prompt type selection
- Event handlers for user interactions
Returns:
gr.Blocks: The configured Gradio interface ready for launching.
"""
with gr.Blocks() as demo:
gr.Markdown("# ACRES RAG Platform")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Zotero Credentials")
zotero_library_id = gr.Textbox(label="Zotero Library ID", type="password", placeholder="Enter Your Zotero Library ID here...")
zotero_api_access_key = gr.Textbox(label="Zotero API Access Key", type="password", placeholder="Enter Your Zotero API Access Key...")
process_zotero_btn = gr.Button("Process your Zotero Library")
zotero_output = gr.Markdown(label="Zotero")
gr.Markdown("### Study Information")
# Query ChromaDB for all document IDs in the "study_files_collection" collection
collection = chromadb_client.get_or_create_collection("study_files_collection")
# Retrieve all documents by querying with an empty string and specifying a high n_results
all_documents = collection.query(query_texts=[""], n_results=1000)
logging.info(f"all_documents: =========> {all_documents}")
# Extract document IDs as study names
document_ids = all_documents.get("ids")
study_choices = [doc_id for doc_id in document_ids[0] if document_ids] # Get list of document IDs
logging.info(f"study_choices: ======> {study_choices}")
# Update the Dropdown with choices from ChromaDB
study_dropdown = gr.Dropdown(
choices=study_choices,
label="Select Study",
value=study_choices[0] if study_choices else None, # Set first choice as default, if available
)
study_info = gr.Markdown(label="Study Details")
gr.Markdown("### Settings")
prompt_type = gr.Radio(
["Default", "Highlight", "Evidence-based"],
label="Prompt Type",
value="Default",
)
# clear = gr.Button("Clear Chat")
with gr.Column(scale=3):
gr.Markdown("### Study Variables")
with gr.Row():
study_variables = gr.Textbox(
show_label=False,
placeholder="Type your variables separated by commas e.g (Study ID, Study Title, Authors etc)",
scale=4,
lines=1,
autofocus=True,
)
submit_btn = gr.Button("Submit", scale=1)
answer_output = gr.Markdown(label="Answer")
def user(
user_message: str, history: List[List[str]]
) -> Tuple[str, List[List[str]]]:
return "", (
history + [[user_message, None]] if user_message.strip() else history
)
def bot(
history: List[List[str]], study_name: str, prompt_type: str
) -> List[List[str]]:
"""
Generate bot response and update the interface.
This function:
1. Processes the latest user message
2. Generates a response using the RAG pipeline
3. Updates the chat history
4. Generates follow-up questions
5. Prepares interface updates for follow-up buttons
Args:
history (List[List[str]]): The current chat history.
study_name (str): The name of the current study.
prompt_type (str): The type of prompt being used.
Returns:
Tuple[List[List[str]], gr.update, gr.update, gr.update]:
Updated chat history and interface components for follow-up questions.
"""
if not history:
return history, [], [], []
user_message = history[-1][0]
bot_message = chat_function(user_message, history, study_name, prompt_type)
history[-1][1] = bot_message
return history
# msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then(
# bot,
# [chatbot, study_dropdown, prompt_type],
# [chatbot, *follow_up_btns],
# )
# send_btn.click(user, [msg, chatbot], [msg, chatbot], queue=False).then(
# bot,
# [chatbot, study_dropdown, prompt_type],
# [chatbot, *follow_up_btns],
# )
# for btn in follow_up_btns + sample_btns:
# btn.click(set_question, inputs=[btn], outputs=[msg])
# clear.click(lambda: None, None, chatbot, queue=False)
study_dropdown.change(
fn=get_study_info,
inputs=study_dropdown,
outputs=[study_info],
)
process_zotero_btn.click(process_zotero_library_items, inputs=[zotero_library_id, zotero_api_access_key], outputs=[zotero_output], queue=False)
submit_btn.click(process_multi_input, inputs=[study_variables, study_dropdown, prompt_type], outputs=[answer_output], queue=False)
return demo
demo = create_gr_interface()
if __name__ == "__main__":
# demo = create_gr_interface()
demo.launch(share=True, debug=True)