Rohan Kataria commited on
Commit
dc0d6c9
·
1 Parent(s): 793e2fe

dockerfile with app

Browse files
.gitattributes DELETED
@@ -1,35 +0,0 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.history/Dockerfile_20240310012154 ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /usr/src/app
6
+
7
+ # Copy the current directory contents into the container at /usr/src/app
8
+ COPY requirements.txt ./
9
+ COPY app.py ./
10
+ COPY src ./src
11
+
12
+ # Install any needed packages specified in requirements.txt
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Make port 8501 available to the world outside this container
16
+ EXPOSE 8501
17
+
18
+ # Define environment variable
19
+ ENV NAME Chat-w-git
20
+
21
+ # Run app.py when the container launches
22
+ CMD ["streamlit", "run", "app.py"]
.history/Dockerfile_20240310012228 ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /usr/src/app
6
+
7
+ # Copy the current directory contents into the container at /usr/src/app
8
+ COPY requirements.txt ./
9
+ COPY app.py ./
10
+ COPY src ./src
11
+
12
+ # Install any needed packages specified in requirements.txt
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Make port 8501 available to the world outside this container
16
+ EXPOSE 8501
17
+
18
+ # Run app.py when the container launches
19
+ CMD ["streamlit", "run", "app.py"]
Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Python runtime as a parent image
2
+ FROM python:3.9
3
+
4
+ # Set the working directory in the container
5
+ WORKDIR /usr/src/app
6
+
7
+ # Copy the current directory contents into the container at /usr/src/app
8
+ COPY requirements.txt ./
9
+ COPY app.py ./
10
+ COPY src ./src
11
+
12
+ # Install any needed packages specified in requirements.txt
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Make port 8501 available to the world outside this container
16
+ EXPOSE 8501
17
+
18
+ # Run app.py when the container launches
19
+ CMD ["streamlit", "run", "app.py"]
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from src.main import ConversationalResponse
3
+ import os
4
+
5
+ # Constants
6
+ ROLE_USER = "user"
7
+ ROLE_ASSISTANT = "assistant"
8
+ MAX_MESSAGES = 5
9
+
10
+ st.set_page_config(page_title="Chat with Git", page_icon="🦜")
11
+ st.title("Chat with Git 🤖📚")
12
+ st.markdown("by [Rohan Kataria](https://www.linkedin.com/in/imrohan/) view more at [VEW.AI](https://vew.ai/)")
13
+ st.markdown("This app allows you to chat with Git code files. You can paste link to the Git repository and ask questions about it. In the background uses the Git Loader and ConversationalRetrieval chain from langchain, Streamlit for UI.")
14
+
15
+ @st.cache_resource(ttl="1h")
16
+ def load_agent(url, branch, file_filter):
17
+ with st.spinner('Loading Git documents...'):
18
+ agent = ConversationalResponse(url, branch, file_filter)
19
+ st.success("Git Loaded Successfully")
20
+ return agent
21
+
22
+ def main():
23
+
24
+ git_link = st.sidebar.text_input("Enter your Git Link")
25
+ branch = st.sidebar.text_input("Enter your Git Branch")
26
+ file_filter = st.sidebar.text_input("Enter the Extension of Files to Load eg. py,sql,r (no spaces)")
27
+
28
+ if "agent" not in st.session_state:
29
+ st.session_state["agent"] = None
30
+ st.session_state["user_message_count"] = 0
31
+
32
+ if st.sidebar.button("Load Agent"):
33
+ if git_link and branch and file_filter:
34
+ try:
35
+ st.session_state["agent"] = load_agent(git_link, branch, file_filter)
36
+ st.session_state["messages"] = [{"role": ROLE_ASSISTANT, "content": "How can I help you?"}]
37
+ st.session_state["user_message_count"] = 0
38
+ except Exception as e:
39
+ st.sidebar.error(f"Error loading Git repository: {str(e)}")
40
+ return
41
+
42
+ if st.session_state["agent"]: # Chat will only appear if the agent is loaded
43
+ for msg in st.session_state.messages:
44
+ st.chat_message(msg["role"]).write(msg["content"])
45
+
46
+ if st.session_state["user_message_count"] < MAX_MESSAGES:
47
+ user_query = st.chat_input(placeholder="Ask me anything!")
48
+
49
+ if user_query:
50
+ st.session_state.messages.append({"role": ROLE_USER, "content": user_query})
51
+ st.chat_message(ROLE_USER).write(user_query)
52
+ st.session_state["user_message_count"] += 1
53
+
54
+ # Generate the response
55
+ with st.spinner("Generating response"):
56
+ response = st.session_state["agent"](user_query)
57
+
58
+ # Display the response immediately
59
+ st.chat_message(ROLE_ASSISTANT).write(response)
60
+
61
+ # Add the response to the message history
62
+ st.session_state.messages.append({"role": ROLE_ASSISTANT, "content": response})
63
+ else:
64
+ st.warning("Your message limit is over. Contact [Rohan Kataria](https://www.linkedin.com/in/imrohan/) to increase the limit.")
65
+
66
+ if __name__ == "__main__":
67
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ langchain==0.0.305
3
+ langchain[docarray]
4
+ tiktoken
5
+ openai
src/__pycache__/main.cpython-312.pyc ADDED
Binary file (5.66 kB). View file
 
src/main.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import openai
3
+ import sys
4
+ sys.path.append('../..')
5
+ from langchain.embeddings.openai import OpenAIEmbeddings
6
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
7
+ from langchain.vectorstores import DocArrayInMemorySearch
8
+ from langchain.document_loaders import TextLoader
9
+ from langchain.chains import RetrievalQA, ConversationalRetrievalChain
10
+ from langchain.memory import ConversationBufferMemory
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.document_loaders import TextLoader
13
+ from langchain.document_loaders import GitLoader
14
+ from langchain.llms import OpenAI
15
+ from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
16
+ from langchain.vectorstores import Chroma
17
+ from langchain.embeddings.openai import OpenAIEmbeddings
18
+ from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, ChatPromptTemplate
19
+ import datetime
20
+ import shutil
21
+
22
+
23
+ # Function to load the data from github using langchain with string type url, string type branch, string type file_filter
24
+ def loader(url: str, branch: str, file_filter: str):
25
+ repo_path = "./github_repo"
26
+ if os.path.exists(repo_path):
27
+ shutil.rmtree(repo_path)
28
+
29
+ loader = GitLoader(
30
+ clone_url= url,
31
+ repo_path="./github_repo/",
32
+ branch=branch,
33
+ file_filter=lambda file_path: file_path.endswith(tuple(file_filter.split(','))) # Filter out files in Data but whole repo is cloned
34
+ )
35
+
36
+ data = loader.load()
37
+ return data
38
+
39
+
40
+ #Function to split the data into chunks using recursive character text splitter
41
+ def split_data(data):
42
+ splitter = RecursiveCharacterTextSplitter(
43
+ chunk_size=1000,
44
+ chunk_overlap=150,
45
+ length_function=len, # Function to measure the length of chunks while splitting
46
+ add_start_index=True # Include the starting position of each chunk in metadata
47
+ )
48
+ chunks = splitter.split_documents(data)
49
+ return chunks
50
+
51
+ #Function to ingest the chunks into a vectorstore of doc
52
+ def ingest_chunks(chunks):
53
+ embedding = OpenAIEmbeddings(
54
+ # deployment="your-embeddings-deployment-name",
55
+ model="codellama",
56
+ openai_api_base="https://thewise-ollama-server.hf.space",
57
+ # openai_api_type="azure",
58
+ openai_api_key='nothing'
59
+ )
60
+ vector_store = DocArrayInMemorySearch.from_documents(chunks, embedding)
61
+
62
+ repo_path = "./github_repo"
63
+ if os.path.exists(repo_path):
64
+ shutil.rmtree(repo_path)
65
+
66
+ return vector_store
67
+
68
+ #Retreival function to get the data from the database and reply to the user
69
+ def retreival(vector_store, k):
70
+ #Creating LLM
71
+ llm = ChatOpenAI(model='codellama', temperature=0, openai_api_base='https://thewise-ollama-server.hf.space', openai_api_key='nothing')
72
+
73
+ # Define the system message template
74
+ #Adding CHAT HISTORY to the System template explicitly because mainly Chat history goes to Condense the Human Question with Backround (Not template), but System template goes straight the LLM Chain
75
+ #Explicitly adding chat history to access previous chats and answer "what is my previous question?"
76
+ #Great thing this also sends the chat history to the LLM Model along with the context and question
77
+ system_template = """You're a code summarisation assistant. Given the following extracted parts of a long document as "CONTEXT" create a final answer.
78
+ If you don't know the answer, just say that you don't know. Don't try to make up an answer.
79
+ Only If asked to create a "DIAGRAM" for code use "MERMAID SYNTAX LANGUAGE" in your answer from "CONTEXT" and "CHAT HISTORY" with a short explanation of diagram.
80
+ CONTEXT: {context}
81
+ =======
82
+ CHAT HISTORY: {chat_history}
83
+ =======
84
+ FINAL ANSWER:"""
85
+
86
+ human_template = """{question}"""
87
+
88
+ # ai_template = """
89
+ # FINAL ANSWER:"""
90
+
91
+ # Create the chat prompt templates
92
+ messages = [
93
+ SystemMessagePromptTemplate.from_template(system_template),
94
+ HumanMessagePromptTemplate.from_template(human_template)
95
+ # AIMessagePromptTemplate.from_template(ai_template)
96
+ ]
97
+
98
+ PROMPT = ChatPromptTemplate.from_messages(messages)
99
+
100
+ #Creating memory
101
+ # memory = ConversationBufferMemory(
102
+ # memory_key="chat_history",
103
+ # input_key="question",
104
+ # output_key="answer",
105
+ # return_messages=True)
106
+
107
+ memory = ConversationBufferWindowMemory(
108
+ memory_key="chat_history",
109
+ input_key="question",
110
+ output_key="answer",
111
+ return_messages=True,
112
+ k=5)
113
+
114
+ #Creating the retriever, this can also be a contextual compressed retriever
115
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k}) #search_type can be "similarity" or "mmr"
116
+
117
+ chain = ConversationalRetrievalChain.from_llm(
118
+ llm=llm,
119
+ chain_type="stuff", #chain type can be refine, stuff, map_reduce
120
+ retriever=retriever,
121
+ memory=memory,
122
+ return_source_documents=True, #When used these 2 properties, the output gets 3 properties: answer, source_document, source_document_score and then have to speocify input and output key in memory for it to work
123
+ combine_docs_chain_kwargs=dict({"prompt": PROMPT})
124
+ )
125
+
126
+ return chain
127
+
128
+ #Class using all above components to create QA system
129
+ class ConversationalResponse:
130
+ def __init__(self, url, branch, file_filter):
131
+ self.url = url
132
+ self.branch = branch
133
+ self.file_filter = file_filter
134
+ self.data = loader(self.url, self.branch, self.file_filter)
135
+ self.chunks = split_data(self.data)
136
+ self.vector_store = ingest_chunks(self.chunks)
137
+ self.chain_type = "stuff"
138
+ self.k = 10
139
+ self.chain = retreival(self.vector_store, self.k)
140
+
141
+ def __call__(self, question):
142
+ agent = self.chain(question)
143
+ return agent['answer']