PropSentinelv1

Sleeping

App Files Files Community

Cheselle commited on Sep 24

Commit

1e61831

•

1 Parent(s): c75b207

Added Base RAG

Browse files

Files changed (3) hide show

Dockerfile +11 -0
app.py +107 -0
requirements.txt +96 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+COPY ./requirements.txt ~/app/requirements.txt
+RUN pip install -r requirements.txt
+COPY . .
+CMD ["chainlit", "run", "app.py", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,107 @@

+import re
+from langchain_openai import OpenAIEmbeddings
+from langchain_openai import ChatOpenAI
+from langchain_openai.embeddings import OpenAIEmbeddings
+from langchain.prompts import ChatPromptTemplate
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.schema import StrOutputParser
+from langchain_community.document_loaders import PyMuPDFLoader
+from langchain_community.vectorstores import Qdrant
+from langchain_core.runnables import RunnablePassthrough, RunnableParallel
+from langchain_core.documents import Document
+from operator import itemgetter
+import os
+from dotenv import load_dotenv
+import chainlit as cl
+load_dotenv()
+ai_framework_document = PyMuPDFLoader(file_path="https://nvlpubs.nist.gov/nistpubs/ai/NIST.AI.600-1.pdf").load()
+ai_blueprint_document = PyMuPDFLoader(file_path="https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf").load()
+def metadata_generator(document, name):
+    fixed_text_splitter = RecursiveCharacterTextSplitter(
+        chunk_size=500,
+        chunk_overlap=100,
+        separators=["\n\n", "\n", ".", "!", "?"]
+    )
+    collection = fixed_text_splitter.split_documents(document)
+    for doc in collection:
+        doc.metadata["source"] = name
+    return collection
+recursive_framework_document = metadata_generator(ai_framework_document, "AI Framework")
+recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
+combined_documents = recursive_framework_document + recursive_blueprint_document
+embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+vectorstore = Qdrant.from_documents(
+    documents=combined_documents,
+    embedding=embeddings,
+    location=":memory:",
+    collection_name="ai_policy"
+)
+alt_retriever = vectorstore.as_retriever()
+## Generation LLM
+llm = ChatOpenAI(model="gpt-4o-mini")
+RAG_PROMPT = """\
+You are an AI Policy Expert.
+Given a provided context and question, you must answer the question based only on context.
+Think through your answer carefully and step by step.
+Context: {context}
+Question: {question}
+"""
+rag_prompt = ChatPromptTemplate.from_template(RAG_PROMPT)
+retrieval_augmented_qa_chain = (
+    # INVOKE CHAIN WITH: {"question" : "<<SOME USER QUESTION>>"}
+    # "question" : populated by getting the value of the "question" key
+    # "context"  : populated by getting the value of the "question" key and chaining it into the base_retriever
+    {"context": itemgetter("question") | alt_retriever, "question": itemgetter("question")}
+    # "context"  : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
+    #              by getting the value of the "context" key from the previous step
+    | RunnablePassthrough.assign(context=itemgetter("context"))
+    # "response" : the "context" and "question" values are used to format our prompt object and then piped
+    #              into the LLM and stored in a key called "response"
+    # "context"  : populated by getting the value of the "context" key from the previous step
+    | {"response": rag_prompt | llm, "context": itemgetter("context")}
+)
+#alt_rag_chain.invoke({"question" : "What is the AI framework all about?"})
+@cl.on_message
+async def handle_message(message):
+    try:
+        # Process the incoming question using the RAG chain
+        result = retrieval_augmented_qa_chain.invoke({"question": message.content})
+        # Create a new message for the response
+        response_message = cl.Message(content=result["response"].content)
+        # Send the response back to the user
+        await response_message.send()
+    except Exception as e:
+        # Handle any exception and log it or send a response back to the user
+        error_message = cl.Message(content=f"An error occurred: {str(e)}")
+        await error_message.send()
+        print(f"Error occurred: {e}")
+# Run the ChainLit server
+if __name__ == "__main__":
+    try:
+        cl.run()
+    except Exception as e:
+        print(f"Server error occurred: {e}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,96 @@

+aiofiles==23.2.1
+aiohappyeyeballs==2.4.0
+aiohttp==3.10.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==3.7.1
+asyncer==0.0.2
+attrs==24.2.0
+bidict==0.23.1
+certifi==2024.8.30
+chainlit==0.7.700
+charset-normalizer==3.3.2
+click==8.1.7
+dataclasses-json==0.5.14
+Deprecated==1.2.14
+distro==1.9.0
+fastapi==0.100.1
+fastapi-socketio==0.0.10
+filetype==1.2.0
+frozenlist==1.4.1
+googleapis-common-protos==1.65.0
+grpcio==1.66.1
+grpcio-tools==1.62.3
+h11==0.14.0
+h2==4.1.0
+hpack==4.0.0
+httpcore==0.17.3
+httpx==0.24.1
+hyperframe==6.0.1
+idna==3.10
+importlib_metadata==8.4.0
+jiter==0.5.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.2.16
+langchain-community==0.2.17
+langchain-core==0.2.41
+langchain-experimental==0.0.65
+langchain-openai==0.1.25
+langchain-qdrant==0.1.4
+langchain-text-splitters==0.2.4
+langsmith==0.1.125
+Lazify==0.4.0
+marshmallow==3.22.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+nest-asyncio==1.6.0
+numpy==1.26.4
+openai==1.46.1
+opentelemetry-api==1.27.0
+opentelemetry-exporter-otlp==1.27.0
+opentelemetry-exporter-otlp-proto-common==1.27.0
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+opentelemetry-exporter-otlp-proto-http==1.27.0
+opentelemetry-instrumentation==0.48b0
+opentelemetry-proto==1.27.0
+opentelemetry-sdk==1.27.0
+opentelemetry-semantic-conventions==0.48b0
+orjson==3.10.7
+packaging==23.2
+portalocker==2.10.1
+protobuf==4.25.5
+pydantic==2.9.2
+pydantic_core==2.23.4
+PyJWT==2.9.0
+PyMuPDF==1.24.10
+PyMuPDFb==1.24.10
+python-dotenv==1.0.1
+python-engineio==4.9.1
+python-graphql-client==0.4.3
+python-multipart==0.0.6
+python-socketio==5.11.4
+PyYAML==6.0.2
+qdrant-client==1.11.2
+regex==2024.9.11
+requests==2.32.3
+simple-websocket==1.0.0
+sniffio==1.3.1
+SQLAlchemy==2.0.35
+starlette==0.27.0
+syncer==2.0.3
+tenacity==8.5.0
+tiktoken==0.7.0
+tomli==2.0.1
+tqdm==4.66.5
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+uptrace==1.26.0
+urllib3==2.2.3
+uvicorn==0.23.2
+watchfiles==0.20.0
+websockets==13.0.1
+wrapt==1.16.0
+wsproto==1.2.0
+yarl==1.11.1
+zipp==3.20.2