Spaces:
Sleeping
Sleeping
File size: 3,035 Bytes
6be2d6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import os
import glob
from pathlib import Path
import gradio as gr
import nest_asyncio
import dill as pickle
import streamlit as st
# Ensure async compatibility in Jupyter
nest_asyncio.apply()
# Import OpenAI key with helper function
from helper import get_openai_api_key
OPENAI_API_KEY = get_openai_api_key()
# Define the path to the directory containing the PDF files
folder_path = 'Ehlers-Danlos-1'
# Get the list of all PDF files in the directory
pdf_files = glob.glob(os.path.join(folder_path, '*.pdf'))
print(pdf_files)
# Extract just the filenames (optional)
pdf_filenames = [os.path.basename(pdf) for pdf in pdf_files]
print(pdf_filenames)
# Import utilities
from utils import get_doc_tools
# Truncate function names if necessary
def truncate_function_name(name, max_length=64):
return name if len(name) <= max_length else name[:max_length]
# Path to save/load serialized tools
tools_cache_path = 'tools_cache.pkl'
# Initialize paper_to_tools_dict
paper_to_tools_dict = {}
# Check if the cache file exists and is not empty
if os.path.exists(tools_cache_path) and os.path.getsize(tools_cache_path) > 0:
try:
with open(tools_cache_path, 'rb') as f:
paper_to_tools_dict = pickle.load(f)
except EOFError:
print("Cache file is corrupted. Recreating tools.")
paper_to_tools_dict = {}
else:
print("Cache file does not exist or is empty. Recreating tools.")
# Create tools for each PDF if not loaded from cache
if not paper_to_tools_dict:
for pdf in pdf_files:
print(f"Getting tools for paper: {pdf}")
vector_tool, summary_tool = get_doc_tools(pdf, Path(pdf).stem)
paper_to_tools_dict[pdf] = [vector_tool, summary_tool]
# Save tools to cache
with open(tools_cache_path, 'wb') as f:
pickle.dump(paper_to_tools_dict, f)
# Combine all tools into a single list
all_tools = [t for pdf in pdf_files for t in paper_to_tools_dict[pdf]]
# Define an object index and retriever over these tools
from llama_index.core import VectorStoreIndex
from llama_index.core.objects import ObjectIndex
obj_index = ObjectIndex.from_objects(
all_tools,
index_cls=VectorStoreIndex,
)
obj_retriever = obj_index.as_retriever(similarity_top_k=3)
# Initialize the OpenAI LLM
from llama_index.llms.openai import OpenAI
llm = OpenAI(model="gpt-3.5-turbo")
# Set up the agent
from llama_index.core.agent import FunctionCallingAgentWorker
from llama_index.core.agent import AgentRunner
agent_worker = FunctionCallingAgentWorker.from_tools(
tool_retriever=obj_retriever,
llm=llm,
verbose=True
)
agent = AgentRunner(agent_worker)
# Define the function to query the agent
def ask_agent(question):
response = agent.query(question)
return str(response)
# Streamlit interface
st.title("EDS Research Agent")
question = st.text_input("Ask a question:")
if question:
answer = ask_agent(question)
st.write(answer)
|