Spaces:

DexterSptizu
/

langchain-vector-stores

Running

App Files Files Community

DexterSptizu commited on about 17 hours ago

Commit

aacc0d9

•

1 Parent(s): 5eadd9a

Create app.py

Browse files

Files changed (1) hide show

app.py +215 -0

app.py ADDED Viewed

	@@ -0,0 +1,215 @@

+import streamlit as st
+from langchain_community.vectorstores import FAISS, Chroma, Pinecone
+from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain_community.document_loaders import TextLoader, PyPDFLoader
+import tempfile
+import os
+import torch
+# Initialize session state variables
+if 'vectorstore' not in st.session_state:
+    st.session_state.vectorstore = None
+if 'documents' not in st.session_state:
+    st.session_state.documents = None
+st.set_page_config(page_title="🗃️ Vector Store Explorer", layout="wide")
+st.title("🗃️ Vector Store Explorer")
+st.markdown("""
+Explore different vector stores and embeddings in LangChain. Upload documents, create embeddings,
+and perform semantic search!
+""")
+# Main tabs
+main_tab1, main_tab2, main_tab3 = st.tabs(["📚 Document Processing", "🔍 Vector Store Operations", "📖 Learning Center"])
+with main_tab1:
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.header("Document Upload")
+        file_type = st.selectbox("Select File Type", ["Text", "PDF"])
+        uploaded_file = st.file_uploader(
+            "Upload your document",
+            type=["txt", "pdf"],
+            help="Upload a document to create vector embeddings"
+        )
+        if uploaded_file:
+            try:
+                with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_type.lower()}") as tmp_file:
+                    tmp_file.write(uploaded_file.getvalue())
+                    tmp_file_path = tmp_file.name
+                loader = TextLoader(tmp_file_path) if file_type == "Text" else PyPDFLoader(tmp_file_path)
+                st.session_state.documents = loader.load()
+                st.success("Document loaded successfully!")
+                # Clean up temp file
+                os.unlink(tmp_file_path)
+            except Exception as e:
+                st.error(f"Error loading document: {str(e)}")
+    with col2:
+        st.header("Text Processing")
+        if st.session_state.documents:
+            chunk_size = st.slider("Chunk Size", 100, 2000, 500)
+            chunk_overlap = st.slider("Chunk Overlap", 0, 200, 50)
+            if st.button("Process Text"):
+                text_splitter = CharacterTextSplitter(
+                    chunk_size=chunk_size,
+                    chunk_overlap=chunk_overlap
+                )
+                st.session_state.splits = text_splitter.split_documents(st.session_state.documents)
+                st.success(f"Created {len(st.session_state.splits)} text chunks!")
+                with st.expander("Preview Chunks"):
+                    for i, chunk in enumerate(st.session_state.splits[:3]):
+                        st.markdown(f"**Chunk {i+1}**")
+                        st.write(chunk.page_content)
+                        st.markdown("---")
+with main_tab2:
+    if 'splits' in st.session_state:
+        col1, col2 = st.columns([1, 1])
+        with col1:
+            st.header("Vector Store Configuration")
+            vectorstore_type = st.selectbox(
+                "Select Vector Store",
+                ["FAISS", "Chroma"],
+                help="Choose the vector store implementation"
+            )
+            embedding_type = st.selectbox(
+                "Select Embeddings",
+                ["OpenAI", "HuggingFace"],
+                help="Choose the embedding model"
+            )
+            if embedding_type == "OpenAI":
+                api_key = st.text_input("OpenAI API Key", type="password")
+                if api_key:
+                    os.environ["OPENAI_API_KEY"] = api_key
+                    embeddings = OpenAIEmbeddings()
+            else:
+                model_name = st.selectbox(
+                    "Select HuggingFace Model",
+                    ["sentence-transformers/all-mpnet-base-v2",
+                     "sentence-transformers/all-MiniLM-L6-v2"]
+                )
+                embeddings = HuggingFaceEmbeddings(model_name=model_name)
+            if st.button("Create Vector Store"):
+                try:
+                    with st.spinner("Creating vector store..."):
+                        if vectorstore_type == "FAISS":
+                            st.session_state.vectorstore = FAISS.from_documents(
+                                st.session_state.splits,
+                                embeddings
+                            )
+                        else:
+                            st.session_state.vectorstore = Chroma.from_documents(
+                                st.session_state.splits,
+                                embeddings
+                            )
+                    st.success("Vector store created successfully!")
+                except Exception as e:
+                    st.error(f"Error creating vector store: {str(e)}")
+        with col2:
+            st.header("Semantic Search")
+            if st.session_state.vectorstore:
+                query = st.text_input("Enter your search query")
+                k = st.slider("Number of results", 1, 10, 3)
+                if query:
+                    with st.spinner("Searching..."):
+                        results = st.session_state.vectorstore.similarity_search(query, k=k)
+                        st.subheader("Search Results")
+                        for i, doc in enumerate(results):
+                            with st.expander(f"Result {i+1}"):
+                                st.write(doc.page_content)
+                                st.markdown("**Metadata:**")
+                                st.json(doc.metadata)
+with main_tab3:
+    learn_tab1, learn_tab2, learn_tab3 = st.tabs(["Vector Stores", "Embeddings", "Best Practices"])
+    with learn_tab1:
+        st.markdown("""
+        ### What are Vector Stores?
+        Vector stores are specialized databases that store and retrieve vector embeddings efficiently. They enable:
+        - Semantic search capabilities
+        - Similarity matching
+        - Efficient nearest neighbor search
+        ### Available Vector Stores
+        | Store | Description | Best For |
+        |-------|-------------|----------|
+        | FAISS | In-memory, efficient similarity search | Local development, small-medium datasets |
+        | Chroma | Simple, persistent vector store | Local development, getting started |
+        | Pinecone | Managed vector database service | Production, large-scale deployments |
+        """)
+    with learn_tab2:
+        st.markdown("""
+        ### Understanding Embeddings
+        Embeddings are numerical representations of text that capture semantic meaning. They:
+        - Convert text to dense vectors
+        - Enable semantic similarity comparison
+        - Form the basis for vector search
+        ### Embedding Models
+        - **OpenAI**: High quality, but requires API key and costs money
+        - **HuggingFace**: Free, open-source alternatives
+            - all-mpnet-base-v2: High quality, slower
+            - all-MiniLM-L6-v2: Good quality, faster
+        """)
+    with learn_tab3:
+        st.markdown("""
+        ### Vector Store Best Practices
+        1. **Chunk Size Selection**
+           - Smaller chunks for precise retrieval
+           - Larger chunks for more context
+        2. **Embedding Model Selection**
+           - Consider cost vs. quality tradeoff
+           - Test different models for your use case
+        3. **Performance Optimization**
+           - Use appropriate batch sizes
+           - Consider hardware limitations
+           - Monitor memory usage
+        4. **Search Optimization**
+           - Experiment with different k values
+           - Use metadata filtering when available
+           - Consider hybrid search approaches
+        """)
+# Sidebar
+st.sidebar.header("📋 Instructions")
+st.sidebar.markdown("""
+1. **Upload Document**
+   - Select file type
+   - Upload your document
+   - Process into chunks
+2. **Create Vector Store**
+   - Choose vector store type
+   - Select embedding model
+   - Configure settings
+3. **Search**
+   - Enter search query
+   - Adjust number of results
+   - Explore similar documents
+""")