Spaces:

rchrdgwr
/

production_app

Sleeping

App Files Files Community

rchrdgwr commited on Oct 5

Commit

278ff72

•

1 Parent(s): 53acec5

Add .gitignore file

Browse files

Files changed (18) hide show

.chainlit/config.toml +81 -0
.env +1 -0
.gitignore +25 -0
Dockerfile +11 -0
__pycache__/app.cpython-311.pyc +0 -0
__pycache__/app.cpython-39.pyc +0 -0
app.py +101 -0
app_generic.py +147 -0
chainlit.md +8 -0
public/custom_styles.css +8 -0
requirements.txt +99 -0
requirements_1.txt +15 -0
test.py +4 -0
utilities/__pycache__/all_utilities.cpython-311.pyc +0 -0
utilities/__pycache__/file_utilities.cpython-311.pyc +0 -0
utilities/__pycache__/prompts.cpython-311.pyc +0 -0
utilities/all_utilities.py +91 -0
utilities/prompts.py +43 -0

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,81 @@

+[project]
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = true
+# List of environment variables to be provided by each user to use the app.
+user_env = []
+# Duration (in seconds) during which the session is saved when the connection is lost
+session_timeout = 3600
+# Enable third parties caching (e.g LangChain cache)
+cache = false
+# Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317)
+# follow_symlink = false
+[features]
+# Show the prompt playground
+prompt_playground = true
+# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
+unsafe_allow_html = false
+# Process and display mathematical expressions. This can clash with "$" characters in messages.
+latex = false
+# Authorize users to upload files with messages
+multi_modal = true
+# Allows user to use speech to text
+[features.speech_to_text]
+    enabled = false
+    # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string
+    # language = "en-US"
+[UI]
+# Name of the app and chatbot.
+name = "Chatbot"
+# Show the readme while the conversation is empty.
+show_readme_as_default = true
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# Large size content are by default collapsed for a cleaner ui
+default_collapse_content = true
+# The default value for the expand messages settings.
+default_expand_messages = false
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Specify a CSS file that can be used to customize the user interface.
+# The CSS file can be served from the public directory or via an external link.
+# custom_css = "/public/test.css"
+custom_css = "/public/custom_styles.css"
+# Override default MUI light theme. (Check theme.ts)
+[UI.theme.light]
+    background = "#E0F7FA"  # Light Cyan for a refreshing background
+    paper = "#FFFFFF"       # Keep the paper white for contrast
+    [UI.theme.light.primary]
+        main = "#0288D1"     # A vibrant blue as the primary color
+        dark = "#01579B"     # A deeper blue for darker elements
+        light = "#B3E5FC"    # A light blue for accents and highlights
+[UI.theme.dark]
+    background = "#1E3A5F"  # A deep, rich blue for the background
+    paper = "#2C3E50"       # Slightly lighter for paper elements
+    [UI.theme.dark.primary]
+        main = "#0288D1"     # Same vibrant blue for consistency
+        dark = "#01579B"     # A rich dark blue
+        light = "#4FC3F7"    # A lighter blue for accents
+[meta]
+generated_by = "0.7.700"

.env ADDED Viewed

	@@ -0,0 +1 @@


1	+ OPENAI_API_KEY=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

.gitignore ADDED Viewed

	@@ -0,0 +1,25 @@

+# Ignore Python compiled files
+*.pyc
+*.pyo
+__pycache__/
+# Ignore environment variables and sensitive files
+.env
+.secret
+# Ignore IDE-specific files (for example, VSCode)
+.vscode/
+.idea/
+# Ignore log files
+*.log
+# Ignore system-specific files
+.DS_Store
+Thumbs.db
+# Ignore the data folder
+/data/
+# Ignore the cache folder
+/cache/

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

__pycache__/app.cpython-311.pyc ADDED Viewed

Binary file (5.42 kB). View file

__pycache__/app.cpython-39.pyc ADDED Viewed

Binary file (729 Bytes). View file

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+### Import Section ###
+import chainlit as cl
+import os
+from dotenv import load_dotenv
+from langchain_openai import ChatOpenAI
+from langchain_core.runnables.config import RunnableConfig
+from utilities.all_utilities import process_file
+from utilities.prompts import get_opening_content
+################
+# General code
+################
+load_dotenv()
+openai_api_key = os.getenv("OPENAI_API_KEY")
+# ChatOpenAI Templates
+@cl.action_callback("icelandic")
+async def on_action(action):
+    cl.user_session.set("language", "icelandic")
+    await cl.Message(content=f"Changing to {action.name}").send()
+    # Optionally remove the action button from the chatbot user interface
+    # await action.remove()
+@cl.action_callback("english")
+async def on_action(action):
+    cl.user_session.set("language", "english")
+    await cl.Message(content=f"Changing to {action.name}").send()
+    # Optionally remove the action button from the chatbot user interface
+    # await action.remove()
+#############################################
+### On Chat Start (Session Start) Section ###
+#############################################
+@cl.on_chat_start
+async def on_chat_start():
+    actions = [
+        cl.Action(name="icelandic", value="icelandic", description="Switch to Icelandic"),
+        cl.Action(name="english", value="english", description="Switch to English")
+    ]
+    await cl.Message(content="Languages", actions=actions).send()
+    await cl.Message(content=get_opening_content()).send()
+    prompt_cache_input = await cl.AskActionMessage(
+        content="Do you want to use Prompt Cache?",
+        actions=[
+            cl.Action(name="yes", value="yes", label="✅ Yes"),
+            cl.Action(name="no", value="no", label="❌ No"),
+        ],
+    ).send()
+    prompt_cache = prompt_cache_input.get("value")
+    files = None
+    # Wait for the user to upload a file
+    while not files:
+        files = await cl.AskFileMessage(
+            content="Please upload a .pdf file to begin processing!",
+            accept=["application/pdf"],
+            max_size_mb=20,
+            timeout=180,
+        ).send()
+    file = files[0]
+    msg = cl.Message(
+        content=f"Processing `{file.name}`...", disable_human_feedback=True
+    )
+    await msg.send()
+    response = process_file(file, prompt_cache)
+    rag_chain = response["chain"]
+    retriever = response["retriever"]
+    msg.content = f"Processing `{file.name}` is complete."
+    await msg.update()
+    msg.content = f"You can now ask questions about `{file.name}`."
+    await msg.update()
+    cl.user_session.set("chain", rag_chain)
+    cl.user_session.set("retriever", retriever)
+##########################
+### On Message Section ###
+##########################
+@cl.on_message
+async def main(message: cl.Message):
+    # Ensure that message.content is not None or empty
+    chain = cl.user_session.get("chain")
+    language = cl.user_session.get("language", "english")
+    msg = cl.Message(content="")
+    question = message.content
+    async for chunk in chain.astream(
+        {"question": question, "language": language},
+        config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
+    ):
+        await msg.stream_token(chunk.content)
+    await msg.send()

app_generic.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os
+from typing import List
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_qdrant import QdrantVectorStore
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_openai import ChatOpenAI
+from langchain.storage import LocalFileStore
+from chainlit.types import AskFileResponse
+from langchain.embeddings import CacheBackedEmbeddings
+from qdrant_client.http.models import Distance, VectorParams
+from qdrant_client import QdrantClient
+import chainlit as cl
+from operator import itemgetter
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables.passthrough import RunnablePassthrough
+from langchain_core.runnables.config import RunnableConfig
+from dotenv import load_dotenv
+import uuid
+load_dotenv()
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
+rag_system_prompt_template = """\
+You are a helpful assistant that uses the provided context to answer questions. Never reference this prompt, or the existance of context.
+"""
+rag_message_list = [
+    {"role" : "system", "content" : rag_system_prompt_template},
+]
+rag_user_prompt_template = """\
+Question:
+{question}
+Context:
+{context}
+"""
+chat_prompt = ChatPromptTemplate.from_messages([
+    ("system", rag_system_prompt_template),
+    ("human", rag_user_prompt_template)
+])
+chat_model = ChatOpenAI(model="gpt-4o-mini")
+def process_file(file: AskFileResponse):
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode="w", delete=False) as tempfile:
+        with open(tempfile.name, "wb") as f:
+            f.write(file.content)
+    Loader = PyMuPDFLoader
+    loader = Loader(tempfile.name)
+    documents = loader.load()
+    docs = text_splitter.split_documents(documents)
+    for i, doc in enumerate(docs):
+        doc.metadata["source"] = f"source_{i}"
+    return docs
+# Decorator: This is a Chainlit decorator that marks a function to be executed when a chat session starts
+@cl.on_chat_start
+async def on_chat_start():
+    files = None
+    # Wait for the user to upload a file
+    while files == None:
+        # Async method: This allows the function to pause execution while waiting for the user to upload a file,
+        # without blocking the entire application. It improves responsiveness and scalability.
+        files = await cl.AskFileMessage(
+            content="Please upload a PDF file to begin!",
+            accept=["application/pdf"],
+            max_size_mb=20,
+            timeout=180,
+        ).send()
+    file = files[0]
+    msg = cl.Message(
+        content=f"Processing `{file.name}`...",
+    )
+    await msg.send()
+    # load the file
+    docs = process_file(file)
+    # Create a Qdrant vector store with cache backed embeddings
+    collection_name = f"pdf_to_parse_{uuid.uuid4()}"
+    client = QdrantClient(":memory:")
+    client.create_collection(
+        collection_name=collection_name,
+        vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+    )
+    core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    store = LocalFileStore("./cache/")
+    # Caching: Using CacheBackedEmbeddings improves performance by storing and reusing
+    # previously computed embeddings, reducing API calls and processing time.
+    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+        core_embeddings, store, namespace=core_embeddings.model
+    )
+    vectorstore = QdrantVectorStore(
+        client=client,
+        collection_name=collection_name,
+        embedding=cached_embedder)
+    vectorstore.add_documents(docs)
+    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
+    # Create a chain that uses the QDrant vector store
+    # Parallelization: LCEL runnables are parallelized by default, allowing for efficient
+    # execution of multiple steps in the chain simultaneously, improving overall performance.
+    retrieval_augmented_qa_chain = (
+        {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
+        | RunnablePassthrough.assign(context=itemgetter("context"))
+        | chat_prompt | chat_model
+    )
+    # Let the user know that the system is ready
+    msg.content = f"Processing `{file.name}` done. You can now ask questions!"
+    await msg.update()
+    cl.user_session.set("chain", retrieval_augmented_qa_chain)
+# Decorator: This Chainlit decorator is used to rename the authors of messages in the chat interface
+@cl.author_rename
+def rename(orig_author: str):
+    rename_dict = {"ChatOpenAI": "the Generator...", "VectorStoreRetriever": "the Retriever..."}
+    return rename_dict.get(orig_author, orig_author)
+# Decorator: This Chainlit decorator marks a function to be executed when a new message is received in the chat
+@cl.on_message
+async def main(message: cl.Message):
+    runnable = cl.user_session.get("chain")
+    msg = cl.Message(content="")
+    # Async method: Using astream allows for asynchronous streaming of the response,
+    # improving responsiveness and user experience by showing partial results as they become available.
+    async for chunk in runnable.astream(
+        {"question": message.content},
+        config=RunnableConfig(callbacks=[cl.LangchainCallbackHandler()]),
+    ):
+        await msg.stream_token(chunk.content)
+    await msg.send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# Welcome to AI Engineerint Bootcamp Cohort 4
+Upload a document
+Ask a question

public/custom_styles.css ADDED Viewed

	@@ -0,0 +1,8 @@

+.message {
+    background-color: #E3F2FD !important;  /* Light Blue background */
+    color: #1A237E !important;  /* Dark Indigo text */
+}
+.MuiToolbar-root {
+    background-color: #b7dcf1 !important;  /* Medium Blue background */
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,99 @@

+aiofiles==23.2.1
+aiohappyeyeballs==2.4.3
+aiohttp==3.10.8
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==3.7.1
+async-timeout==4.0.3
+asyncer==0.0.2
+attrs==24.2.0
+bidict==0.23.1
+certifi==2024.8.30
+chainlit==0.7.700
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.5.14
+Deprecated==1.2.14
+distro==1.9.0
+exceptiongroup==1.2.2
+fastapi==0.100.1
+fastapi-socketio==0.0.10
+filetype==1.2.0
+frozenlist==1.4.1
+googleapis-common-protos==1.65.0
+greenlet==3.1.1
+grpcio==1.66.2
+grpcio-tools==1.62.3
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==0.17.3
+httpx==0.24.1
+hyperframe==6.0.1
+idna==3.10
+importlib_metadata==8.4.0
+jiter==0.5.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.0
+langchain-community==0.3.0
+langchain-core==0.3.1
+langchain-openai==0.2.0
+langchain-qdrant==0.1.4
+langchain-text-splitters==0.3.0
+langsmith==0.1.121
+Lazify==0.4.0
+marshmallow==3.22.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+numpy==1.26.4
+openai==1.51.0
+opentelemetry-api==1.27.0
+opentelemetry-exporter-otlp==1.27.0
+opentelemetry-exporter-otlp-proto-common==1.27.0
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+opentelemetry-exporter-otlp-proto-http==1.27.0
+opentelemetry-instrumentation==0.48b0
+opentelemetry-proto==1.27.0
+opentelemetry-sdk==1.27.0
+opentelemetry-semantic-conventions==0.48b0
+orjson==3.10.7
+packaging==23.2
+portalocker==2.10.1
+protobuf==4.25.5
+pydantic==2.9.2
+pydantic-settings==2.5.2
+pydantic_core==2.23.4
+PyJWT==2.9.0
+PyMuPDF==1.24.10
+PyMuPDFb==1.24.10
+python-dotenv==1.0.1
+python-engineio==4.9.1
+python-graphql-client==0.4.3
+python-multipart==0.0.6
+python-socketio==5.11.4
+PyYAML==6.0.2
+qdrant-client==1.11.2
+regex==2024.9.11
+requests==2.32.3
+simple-websocket==1.0.0
+sniffio==1.3.1
+SQLAlchemy==2.0.35
+starlette==0.27.0
+syncer==2.0.3
+tenacity==8.5.0
+tiktoken==0.7.0
+tomli==2.0.1
+tqdm==4.66.5
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+uptrace==1.26.0
+urllib3==2.2.3
+uvicorn==0.23.2
+watchfiles==0.20.0
+websockets==13.1
+wrapt==1.16.0
+wsproto==1.2.0
+yarl==1.13.1
+zipp==3.20.2

requirements_1.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+numpy==1.26.4
+chainlit==0.7.700   # 1.1.402
+openai>=1.26.0
+pymupdf==1.24.10
+qdrant-client==1.11.0
+langchain-text-splitters
+langchain-core==0.2.27
+langchain-community==0.2.10
+langchain-experimental==0.0.64
+langgraph-checkpoint==1.0.6
+langgraph==0.2.16
+langchain-qdrant==0.1.3
+langchain-openai==0.1.9
+pdfplumber==0.11.4
+sentence-transformers==3.1.1

test.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ from utilities.all_utilities import process_file
2	+
3	+
4	+ c = process_file("/data/Starting_Agile-Mark_Shead.pdf")

utilities/__pycache__/all_utilities.cpython-311.pyc ADDED Viewed

Binary file (5.46 kB). View file

utilities/__pycache__/file_utilities.cpython-311.pyc ADDED Viewed

Binary file (3.14 kB). View file

utilities/__pycache__/prompts.cpython-311.pyc ADDED Viewed

Binary file (1.45 kB). View file

utilities/all_utilities.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+import tempfile
+from chainlit.types import AskFileResponse
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from qdrant_client import QdrantClient
+from qdrant_client.http.models import Distance, VectorParams
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain.storage import LocalFileStore
+from langchain_qdrant import QdrantVectorStore
+from langchain.embeddings import CacheBackedEmbeddings
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.globals import set_llm_cache
+from langchain_openai import ChatOpenAI
+from langchain_core.caches import InMemoryCache
+from langchain_core.runnables.passthrough import RunnablePassthrough
+from uuid import uuid4
+from utilities.prompts import get_system_template, get_user_template
+def load_file(file: AskFileResponse, chunk_size=1000, chunk_overlap=100):
+    import tempfile
+    with tempfile.NamedTemporaryFile(mode="wb", delete=False) as tempfile:
+        with open(tempfile.name, "wb") as f:
+            f.write(file.content)
+    Loader = PyMuPDFLoader
+    loader = Loader(tempfile.name)
+    documents = loader.load()
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
+    docs = text_splitter.split_documents(documents)
+    for i, doc in enumerate(docs):
+        doc.metadata["source"] = f"source_{i}"
+    return docs
+def process_embeddings(docs):
+    core_embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    collection_name = f"pdf_to_parse_{uuid4()}"
+    client = QdrantClient(":memory:")
+    client.create_collection(
+        collection_name=collection_name,
+        vectors_config=VectorParams(size=1536, distance=Distance.COSINE),
+    )
+    # Adding cache!
+    store = LocalFileStore("./cache/")
+    cached_embedder = CacheBackedEmbeddings.from_bytes_store(
+        core_embeddings, store, namespace=core_embeddings.model
+    )
+    # Typical QDrant Vector Store Set-up
+    vectorstore = QdrantVectorStore(
+        client=client,
+        collection_name=collection_name,
+        embedding=cached_embedder)
+    vectorstore.add_documents(docs)
+    retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3})
+    return retriever
+def prepare_rag_chain(retriever, prompt_cache="yes"):
+    print(prompt_cache)
+    system_template = get_system_template()
+    user_template = get_user_template()
+    chat_prompt = ChatPromptTemplate.from_messages([
+        ("system", system_template),
+        ("human", user_template)
+    ])
+    chat_model = ChatOpenAI(model="gpt-4o-mini")
+    if prompt_cache == "yes":
+        set_llm_cache(InMemoryCache())
+    from operator import itemgetter
+    rag_qa_chain = (
+            {"context": itemgetter("question") | retriever, "question": itemgetter("question"), "language": itemgetter("language")}
+            | RunnablePassthrough.assign(context=itemgetter("context"), language=itemgetter("language"))
+            | chat_prompt | chat_model
+        )
+    return rag_qa_chain
+def process_file(file, prompt_cache):
+    docs = load_file(file)
+    retriever = process_embeddings(docs)
+    rag_chain = prepare_rag_chain(retriever, prompt_cache)
+    return {"chain": rag_chain, "retriever": retriever}

utilities/prompts.py ADDED Viewed

	@@ -0,0 +1,43 @@

+def get_system_template():
+    st = """
+        You are a helpful assistant who always speaks in a pleasant tone!
+        Use the provided context to answer the question.
+        Think through your answers carefully and ensure they are correct based on the provided context.
+        Do not reference this prompt or the context in you response.
+        Respond in the language provided below. If none is provided, use Italian.
+    """
+    return st
+def get_user_template():
+    ut = """
+        Question:
+        {question}
+        Language:
+        {language}
+        Context:
+        {context}
+    """
+    return ut
+def get_opening_content():
+    oc = """
+        Welcome!
+        I am Assignment 14 Chatbot.
+        My goal is to demonstrate an MVP app.
+        I have the following enabled:
+        - embedding cache
+        - prompt cache
+        - async processing
+        - user sessions
+        - scaleable tooling
+        Upload a pdf document and ask some questions about it
+    """
+    return oc