Spaces:
Sleeping
Sleeping
Zwea Htet
commited on
Commit
·
9fb0f7d
1
Parent(s):
0a665f4
integrated pinecone with llama index to store vector embeddings
Browse files- models/vector_database.py +41 -2
- pages/llama_custom_demo.py +6 -23
models/vector_database.py
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
|
|
| 1 |
from pinecone import Pinecone, ServerlessSpec
|
| 2 |
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
| 3 |
from dotenv import load_dotenv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
import os
|
| 6 |
|
|
@@ -30,5 +38,36 @@ if not index_exists(pc_index_name):
|
|
| 30 |
# Initialize your index
|
| 31 |
pinecone_index = pc.Index(pc_index_name)
|
| 32 |
|
| 33 |
-
#
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
from pinecone import Pinecone, ServerlessSpec
|
| 3 |
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
| 4 |
from dotenv import load_dotenv
|
| 5 |
+
from llama_index.core import (
|
| 6 |
+
SimpleDirectoryReader,
|
| 7 |
+
Document,
|
| 8 |
+
VectorStoreIndex,
|
| 9 |
+
StorageContext,
|
| 10 |
+
)
|
| 11 |
+
from huggingface_hub import HfFileSystem
|
| 12 |
|
| 13 |
import os
|
| 14 |
|
|
|
|
| 38 |
# Initialize your index
|
| 39 |
pinecone_index = pc.Index(pc_index_name)
|
| 40 |
|
| 41 |
+
# print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True))
|
| 42 |
+
# print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf"))
|
| 43 |
+
|
| 44 |
+
SAVE_DIR = "uploaded_files"
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _namespace_exists(namespace: str):
|
| 48 |
+
namespaces = pinecone_index.describe_index_stats()["namespaces"]
|
| 49 |
+
return namespace in namespaces
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def get_pinecone_index(filename: str) -> VectorStoreIndex:
|
| 53 |
+
"""This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
|
| 54 |
+
namespace = filename.replace(".", "_").replace(" ", "_")
|
| 55 |
+
pinecone_vector_store = PineconeVectorStore(
|
| 56 |
+
pinecone_index=pinecone_index,
|
| 57 |
+
namespace=namespace,
|
| 58 |
+
)
|
| 59 |
+
index = None
|
| 60 |
+
if _namespace_exists(namespace=namespace):
|
| 61 |
+
print(f"Namespace {namespace} exists.")
|
| 62 |
+
index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)
|
| 63 |
+
else:
|
| 64 |
+
reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
|
| 65 |
+
docs = reader.load_data(show_progress=True)
|
| 66 |
+
storage_context = StorageContext.from_defaults(
|
| 67 |
+
vector_store=pinecone_vector_store
|
| 68 |
+
)
|
| 69 |
+
index = VectorStoreIndex.from_documents(
|
| 70 |
+
documents=docs, show_progress=True, storage_context=storage_context
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
return index
|
pages/llama_custom_demo.py
CHANGED
|
@@ -5,11 +5,11 @@ from typing import List
|
|
| 5 |
|
| 6 |
# local imports
|
| 7 |
from models.llms import load_llm, integrated_llms
|
| 8 |
-
from models.embeddings import
|
| 9 |
from models.llamaCustom import LlamaCustom
|
| 10 |
-
from models.llamaCustomV2 import LlamaCustomV2
|
| 11 |
|
| 12 |
-
|
| 13 |
from utils.chatbox import show_previous_messages, show_chat_input
|
| 14 |
from utils.util import validate_openai_api_key
|
| 15 |
|
|
@@ -22,6 +22,7 @@ from llama_index.core import (
|
|
| 22 |
Settings,
|
| 23 |
load_index_from_storage,
|
| 24 |
)
|
|
|
|
| 25 |
from llama_index.core.memory import ChatMemoryBuffer
|
| 26 |
from llama_index.core.base.llms.types import ChatMessage
|
| 27 |
|
|
@@ -93,24 +94,6 @@ def get_index(
|
|
| 93 |
raise e
|
| 94 |
return index
|
| 95 |
|
| 96 |
-
|
| 97 |
-
# def get_pinecone_index(filename: str) -> VectorStoreIndex:
|
| 98 |
-
# """Thie function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
|
| 99 |
-
# reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
|
| 100 |
-
# docs = reader.load_data(show_progress=True)
|
| 101 |
-
# storage_context = StorageContext.from_defaults(vector_store=pinecone_vector_store)
|
| 102 |
-
# index = VectorStoreIndex.from_documents(
|
| 103 |
-
# documents=docs, show_progress=True, storage_context=storage_context
|
| 104 |
-
# )
|
| 105 |
-
|
| 106 |
-
# return index
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
def get_chroma_index(filename: str) -> VectorStoreIndex:
|
| 110 |
-
"""This function loads the index from Chroma if it exists, otherwise it creates a new index from the document."""
|
| 111 |
-
pass
|
| 112 |
-
|
| 113 |
-
|
| 114 |
def check_api_key(model_name: str, source: str):
|
| 115 |
if source.startswith("openai"):
|
| 116 |
if not st.session_state.openai_api_key:
|
|
@@ -205,8 +188,8 @@ with tab1:
|
|
| 205 |
Settings.llm = llama_llm
|
| 206 |
|
| 207 |
st.write("Processing Data ...")
|
| 208 |
-
index = get_index(selected_file)
|
| 209 |
-
|
| 210 |
|
| 211 |
st.write("Finishing Up ...")
|
| 212 |
llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
|
|
|
|
| 5 |
|
| 6 |
# local imports
|
| 7 |
from models.llms import load_llm, integrated_llms
|
| 8 |
+
from models.embeddings import openai_embed_model
|
| 9 |
from models.llamaCustom import LlamaCustom
|
| 10 |
+
# from models.llamaCustomV2 import LlamaCustomV2
|
| 11 |
|
| 12 |
+
from models.vector_database import get_pinecone_index
|
| 13 |
from utils.chatbox import show_previous_messages, show_chat_input
|
| 14 |
from utils.util import validate_openai_api_key
|
| 15 |
|
|
|
|
| 22 |
Settings,
|
| 23 |
load_index_from_storage,
|
| 24 |
)
|
| 25 |
+
from llama_index.vector_stores.pinecone import PineconeVectorStore
|
| 26 |
from llama_index.core.memory import ChatMemoryBuffer
|
| 27 |
from llama_index.core.base.llms.types import ChatMessage
|
| 28 |
|
|
|
|
| 94 |
raise e
|
| 95 |
return index
|
| 96 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
def check_api_key(model_name: str, source: str):
|
| 98 |
if source.startswith("openai"):
|
| 99 |
if not st.session_state.openai_api_key:
|
|
|
|
| 188 |
Settings.llm = llama_llm
|
| 189 |
|
| 190 |
st.write("Processing Data ...")
|
| 191 |
+
# index = get_index(selected_file)
|
| 192 |
+
index = get_pinecone_index(selected_file)
|
| 193 |
|
| 194 |
st.write("Finishing Up ...")
|
| 195 |
llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
|