import qdrant_client from llama_index.core import VectorStoreIndex, ServiceContext, SimpleDirectoryReader from llama_index.core import load_index_from_storage from llama_index.llms.ollama import Ollama from llama_index.llms.llama_cpp import LlamaCPP from llama_index.llms.llama_cpp.llama_utils import messages_to_prompt, completion_to_prompt from llama_index.core import StorageContext from llama_index.vector_stores.qdrant import QdrantVectorStore from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core import Settings from llama_index.core import set_global_service_context import gradio as gr DOC_PATH = './data/pdf_esg' INDEX_PATH = './storage' llm = LlamaCPP( # You can pass in the URL to a GGML model to download it automatically # model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF/resolve/main/mistral-7b-instruct-v0.1.Q4_K_M.gguf', model_url='https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF/resolve/main/mistral-7b-instruct-v0.2.Q4_K_M.gguf', # optionally, you can set the path to a pre-downloaded model instead of model_url model_path=None, temperature=0.1, max_new_tokens=256, # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room context_window=4096, # kwargs to pass to __call__() generate_kwargs={}, # kwargs to pass to __init__() # set to at least 1 to use GPU model_kwargs={"n_gpu_layers": -1}, # transform inputs into Llama2 format messages_to_prompt=messages_to_prompt, completion_to_prompt=completion_to_prompt, verbose=True, ) # Settings.llm = Ollama(model="mistral") Settings.llm = llm Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") service_context = ServiceContext.from_defaults(llm=llm,embed_model = embed_model) set_global_service_context(service_context) def construct_index(doc_path=DOC_PATH, index_store=INDEX_PATH, use_cache=False): client = qdrant_client.QdrantClient(path="./qdrant_data") vector_store = QdrantVectorStore(client=client, collection_name="esg") storage_context = StorageContext.from_defaults(vector_store=vector_store) if use_cache: # rebuild storage context storage_context = StorageContext.from_defaults(persist_dir=index_store) index = load_index_from_storage(storage_context) # load index else: reader = SimpleDirectoryReader(input_dir='./data/pdf_esg') documents = reader.load_data() index = VectorStoreIndex.from_documents(documents) index.storage_context.persist(index_store) return None def qabot(input_text, index_store = INDEX_PATH): storage_context = StorageContext.from_defaults(persist_dir=index_store) # Load the data index = load_index_from_storage(storage_context) query_engine = index.as_query_engine() response = query_engine.query(input_text) return response.response if __name__ == "__main__": # construct_index(DOC_PATH, use_cache=False) # create_index_retriever_query_engine() iface = gr.Interface(fn=qabot, inputs=gr.Textbox(lines=7, label='Enter your query'), outputs="text", title="ESG Chatbot") iface.launch(inline=False)