longevity / longevity.py
alx-d's picture
Update longevity.py
4eddbeb
from llama_index import SimpleDirectoryReader, LLMPredictor, PromptHelper, StorageContext, ServiceContext, GPTVectorStoreIndex, load_index_from_storage
from langchain.chat_models import ChatOpenAI
import gradio as gr
import sys
import os
import openai
from ratelimit import limits, sleep_and_retry
# fixing bugs
# 1. open ai key: https://stackoverflow.com/questions/76425556/tenacity-retryerror-retryerrorfuture-at-0x7f89bc35eb90-state-finished-raised
# 2. rate limit error in lang_chain default version - install langchain==0.0.188. https://github.com/jerryjliu/llama_index/issues/924
# 3. added true Config variable in langchain: https://github.com/pydantic/pydantic/issues/3320
os.environ["OPENAI_API_KEY"] = os.environ.get("openai_key")
openai.api_key = os.environ["OPENAI_API_KEY"]
# Define the rate limit for API calls (requests per second)
RATE_LIMIT = 3
# Implement the rate limiting decorator
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def create_service_context():
#constraint parameters
max_input_size = 4096
num_outputs = 512
max_chunk_overlap = 20
chunk_size_limit = 600
#allows the user to explicitly set certain constraint parameters
prompt_helper = PromptHelper(max_input_size, num_outputs, max_chunk_overlap, chunk_size_limit=chunk_size_limit)
#LLMPredictor is a wrapper class around LangChain's LLMChain that allows easy integration into LlamaIndex
llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.5, model_name="gpt-3.5-turbo", max_tokens=num_outputs))
#constructs service_context
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper)
return service_context
# Implement the rate limiting decorator
@sleep_and_retry
@limits(calls=RATE_LIMIT, period=1)
def data_ingestion_indexing(directory_path):
#loads data from the specified directory path
documents = SimpleDirectoryReader(directory_path).load_data()
#when first building the index
index = GPTVectorStoreIndex.from_documents(
documents, service_context=create_service_context()
)
#persist index to disk, default "storage" folder
index.storage_context.persist()
return index
def data_querying(input_text):
#rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./storage")
#loads index from storage
index = load_index_from_storage(storage_context, service_context=create_service_context())
#queries the index with the input text
response = index.as_query_engine().query(input_text)
return response.response
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
msg = gr.Textbox()
clear = gr.ClearButton([msg, chatbot])
def respond(message, chat_history):
bot_message = data_querying(message)
chat_history.append((message, bot_message))
# time.sleep(1)
return "", chat_history
msg.submit(respond, [msg, chatbot], [msg, chatbot])
#iface = gr.Interface(fn=data_querying,
# inputs=gr.components.Textbox(lines=7, label="Enter your question"),
# outputs="text",
# title="Longevity GPT 0.1 pre alpha")
#passes in data directory
#if not os.path.isdir("storage"):
index = data_ingestion_indexing("longevity_books")
#iface.launch(inline=True)
demo.launch()