|
import streamlit as st |
|
from langchain_community.document_loaders import PyPDFLoader, TextLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain_community.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
from langchain_openai import ChatOpenAI |
|
from langchain_community.chat_models import ChatOllama |
|
from langchain.chains import RetrievalQA |
|
from langchain.prompts import PromptTemplate |
|
import tempfile |
|
import os |
|
import time |
|
|
|
|
|
if 'processed_data' not in st.session_state: |
|
st.session_state.processed_data = False |
|
if 'vectorstore' not in st.session_state: |
|
st.session_state.vectorstore = None |
|
if 'retriever' not in st.session_state: |
|
st.session_state.retriever = None |
|
if 'chain' not in st.session_state: |
|
st.session_state.chain = None |
|
if 'chat_history' not in st.session_state: |
|
st.session_state.chat_history = [] |
|
|
|
st.set_page_config(page_title="π€ RAG Explorer", layout="wide") |
|
st.title("π€ Retrieval Augmented Generation Explorer") |
|
st.markdown(""" |
|
Explore how RAG works by uploading documents, configuring the pipeline, and asking questions! |
|
""") |
|
|
|
|
|
setup_tab, chat_tab, learn_tab = st.tabs(["π οΈ Setup RAG Pipeline", "π¬ Chat Interface", "π Learning Center"]) |
|
|
|
with setup_tab: |
|
|
|
st.header("RAG Pipeline Configuration") |
|
|
|
|
|
doc_col, process_col = st.columns([1, 1]) |
|
|
|
with doc_col: |
|
st.subheader("1οΈβ£ Document Upload") |
|
file_type = st.selectbox("Select File Type", ["PDF", "Text"]) |
|
uploaded_file = st.file_uploader( |
|
"Upload your document", |
|
type=["pdf", "txt"], |
|
help="Upload a document to create the knowledge base" |
|
) |
|
|
|
if uploaded_file: |
|
try: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file_type.lower()}") as tmp_file: |
|
tmp_file.write(uploaded_file.getvalue()) |
|
tmp_file_path = tmp_file.name |
|
|
|
loader = PyPDFLoader(tmp_file_path) if file_type == "PDF" else TextLoader(tmp_file_path) |
|
documents = loader.load() |
|
st.success("Document loaded successfully!") |
|
|
|
|
|
st.subheader("2οΈβ£ Text Splitting") |
|
chunk_size = st.slider("Chunk Size", 100, 2000, 500) |
|
chunk_overlap = st.slider("Chunk Overlap", 0, 200, 50) |
|
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=chunk_size, |
|
chunk_overlap=chunk_overlap |
|
) |
|
splits = text_splitter.split_documents(documents) |
|
|
|
|
|
os.unlink(tmp_file_path) |
|
|
|
with st.expander("Preview Text Chunks"): |
|
for i, chunk in enumerate(splits[:3]): |
|
st.markdown(f"**Chunk {i+1}**") |
|
st.write(chunk.page_content) |
|
st.markdown("---") |
|
|
|
st.session_state.splits = splits |
|
|
|
except Exception as e: |
|
st.error(f"Error processing document: {str(e)}") |
|
|
|
with process_col: |
|
st.subheader("3οΈβ£ Embedding Configuration") |
|
embedding_type = st.selectbox( |
|
"Select Embeddings", |
|
["OpenAI", "HuggingFace"], |
|
help="Choose the embedding model" |
|
) |
|
|
|
if embedding_type == "OpenAI": |
|
api_key = st.text_input("OpenAI API Key", type="password") |
|
if api_key: |
|
os.environ["OPENAI_API_KEY"] = api_key |
|
embeddings = OpenAIEmbeddings() |
|
else: |
|
model_name = st.selectbox( |
|
"Select HuggingFace Model", |
|
["sentence-transformers/all-mpnet-base-v2", |
|
"sentence-transformers/all-MiniLM-L6-v2"] |
|
) |
|
embeddings = HuggingFaceEmbeddings(model_name=model_name) |
|
|
|
st.subheader("4οΈβ£ LLM Configuration") |
|
llm_type = st.selectbox( |
|
"Select Language Model", |
|
["OpenAI", "Ollama"], |
|
help="Choose the Large Language Model" |
|
) |
|
|
|
if llm_type == "OpenAI": |
|
model_name = st.selectbox("Select Model", ["gpt-3.5-turbo", "gpt-4"]) |
|
temperature = st.slider("Temperature", 0.0, 1.0, 0.7) |
|
if api_key: |
|
llm = ChatOpenAI(model_name=model_name, temperature=temperature) |
|
else: |
|
model_name = st.selectbox("Select Model", ["llama2", "mistral"]) |
|
temperature = st.slider("Temperature", 0.0, 1.0, 0.7) |
|
llm = ChatOllama(model=model_name, temperature=temperature) |
|
|
|
if 'splits' in st.session_state: |
|
if st.button("Create RAG Pipeline"): |
|
with st.spinner("Creating vector store and RAG pipeline..."): |
|
|
|
vectorstore = FAISS.from_documents( |
|
st.session_state.splits, |
|
embeddings |
|
) |
|
retriever = vectorstore.as_retriever( |
|
search_type="similarity", |
|
search_kwargs={"k": 3} |
|
) |
|
|
|
|
|
template = """Use the following pieces of context to answer the question at the end. |
|
If you don't know the answer, just say that you don't know, don't try to make up an answer. |
|
|
|
{context} |
|
|
|
Question: {question} |
|
Answer: """ |
|
|
|
QA_CHAIN_PROMPT = PromptTemplate( |
|
input_variables=["context", "question"], |
|
template=template, |
|
) |
|
|
|
chain = RetrievalQA.from_chain_type( |
|
llm=llm, |
|
chain_type="stuff", |
|
retriever=retriever, |
|
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT} |
|
) |
|
|
|
st.session_state.chain = chain |
|
st.session_state.processed_data = True |
|
st.success("RAG pipeline created successfully!") |
|
|
|
with chat_tab: |
|
st.header("Chat with your Documents") |
|
|
|
if not st.session_state.processed_data: |
|
st.warning("Please set up the RAG pipeline first in the Setup tab!") |
|
else: |
|
|
|
st.markdown("### Ask questions about your documents") |
|
|
|
|
|
query = st.text_input("Enter your question:") |
|
|
|
if query: |
|
with st.spinner("Generating response..."): |
|
try: |
|
response = st.session_state.chain.invoke(query) |
|
|
|
|
|
st.session_state.chat_history.append(("user", query)) |
|
st.session_state.chat_history.append(("assistant", response['result'])) |
|
except Exception as e: |
|
st.error(f"Error generating response: {str(e)}") |
|
|
|
|
|
st.markdown("### Chat History") |
|
for role, message in st.session_state.chat_history: |
|
if role == "user": |
|
st.markdown(f"**You:** {message}") |
|
else: |
|
st.markdown(f"**Assistant:** {message}") |
|
st.markdown("---") |
|
|
|
with learn_tab: |
|
concept_tab, architecture_tab, tips_tab = st.tabs(["Core Concepts", "RAG Architecture", "Best Practices"]) |
|
|
|
with concept_tab: |
|
st.markdown(""" |
|
### What is RAG? |
|
|
|
Retrieval Augmented Generation (RAG) is a technique that enhances Large Language Models by: |
|
1. Retrieving relevant information from a knowledge base |
|
2. Augmenting the prompt with this information |
|
3. Generating responses based on both the question and retrieved context |
|
|
|
### Key Components |
|
|
|
1. **Document Loader** |
|
- Imports documents into the system |
|
- Supports various file formats |
|
|
|
2. **Text Splitter** |
|
- Breaks documents into manageable chunks |
|
- Maintains context while splitting |
|
|
|
3. **Embeddings** |
|
- Converts text into vector representations |
|
- Enables semantic search |
|
|
|
4. **Vector Store** |
|
- Stores and indexes embeddings |
|
- Enables efficient retrieval |
|
|
|
5. **Language Model** |
|
- Generates responses using retrieved context |
|
- Ensures accurate and relevant answers |
|
""") |
|
|
|
with architecture_tab: |
|
st.markdown(""" |
|
### RAG Pipeline Architecture |
|
|
|
```mermaid |
|
graph LR |
|
A[Document] --> B[Text Splitter] |
|
B --> C[Embeddings] |
|
C --> D[Vector Store] |
|
E[Query] --> F[Embedding] |
|
F --> G[Retriever] |
|
D --> G |
|
G --> H[Context] |
|
H --> I[LLM] |
|
E --> I |
|
I --> J[Response] |
|
``` |
|
|
|
### Data Flow |
|
|
|
1. **Document Processing** |
|
- Document β Chunks β Embeddings β Vector Store |
|
|
|
2. **Query Processing** |
|
- Query β Embedding β Similarity Search β Retrieved Context |
|
|
|
3. **Response Generation** |
|
- Context + Query β LLM β Generated Response |
|
""") |
|
|
|
with tips_tab: |
|
st.markdown(""" |
|
### RAG Best Practices |
|
|
|
1. **Document Processing** |
|
- Choose appropriate chunk sizes |
|
- Ensure sufficient chunk overlap |
|
- Maintain document metadata |
|
|
|
2. **Retrieval Strategy** |
|
- Tune the number of retrieved chunks |
|
- Consider hybrid search approaches |
|
- Implement relevance filtering |
|
|
|
3. **Prompt Engineering** |
|
- Design clear and specific prompts |
|
- Include system instructions |
|
- Handle edge cases gracefully |
|
|
|
4. **Performance Optimization** |
|
- Cache frequent queries |
|
- Batch process documents |
|
- Monitor resource usage |
|
|
|
5. **Quality Control** |
|
- Implement answer validation |
|
- Track retrieval quality |
|
- Monitor LLM output |
|
""") |
|
|
|
|
|
st.sidebar.header("π Quick Guide") |
|
st.sidebar.markdown(""" |
|
1. **Setup Pipeline** |
|
- Upload document |
|
- Configure text splitting |
|
- Set up embeddings |
|
- Choose LLM |
|
|
|
2. **Ask Questions** |
|
- Switch to Chat tab |
|
- Enter your question |
|
- Review responses |
|
|
|
3. **Learn More** |
|
- Explore concepts |
|
- Understand architecture |
|
- Review best practices |
|
""") |
|
|
|
|
|
st.sidebar.markdown("---") |
|
st.sidebar.markdown("Made with β€οΈ using LangChain 0.3") |