Spaces:
Running
Running
Rohan Kataria
commited on
Commit
·
dc0d6c9
1
Parent(s):
793e2fe
dockerfile with app
Browse files- .gitattributes +0 -35
- .history/Dockerfile_20240310012154 +22 -0
- .history/Dockerfile_20240310012228 +19 -0
- Dockerfile +19 -0
- app.py +67 -0
- requirements.txt +5 -0
- src/__pycache__/main.cpython-312.pyc +0 -0
- src/main.py +143 -0
.gitattributes
DELETED
@@ -1,35 +0,0 @@
|
|
1 |
-
*.7z filter=lfs diff=lfs merge=lfs -text
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.history/Dockerfile_20240310012154
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /usr/src/app
|
6 |
+
|
7 |
+
# Copy the current directory contents into the container at /usr/src/app
|
8 |
+
COPY requirements.txt ./
|
9 |
+
COPY app.py ./
|
10 |
+
COPY src ./src
|
11 |
+
|
12 |
+
# Install any needed packages specified in requirements.txt
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
# Make port 8501 available to the world outside this container
|
16 |
+
EXPOSE 8501
|
17 |
+
|
18 |
+
# Define environment variable
|
19 |
+
ENV NAME Chat-w-git
|
20 |
+
|
21 |
+
# Run app.py when the container launches
|
22 |
+
CMD ["streamlit", "run", "app.py"]
|
.history/Dockerfile_20240310012228
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /usr/src/app
|
6 |
+
|
7 |
+
# Copy the current directory contents into the container at /usr/src/app
|
8 |
+
COPY requirements.txt ./
|
9 |
+
COPY app.py ./
|
10 |
+
COPY src ./src
|
11 |
+
|
12 |
+
# Install any needed packages specified in requirements.txt
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
# Make port 8501 available to the world outside this container
|
16 |
+
EXPOSE 8501
|
17 |
+
|
18 |
+
# Run app.py when the container launches
|
19 |
+
CMD ["streamlit", "run", "app.py"]
|
Dockerfile
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use an official Python runtime as a parent image
|
2 |
+
FROM python:3.9
|
3 |
+
|
4 |
+
# Set the working directory in the container
|
5 |
+
WORKDIR /usr/src/app
|
6 |
+
|
7 |
+
# Copy the current directory contents into the container at /usr/src/app
|
8 |
+
COPY requirements.txt ./
|
9 |
+
COPY app.py ./
|
10 |
+
COPY src ./src
|
11 |
+
|
12 |
+
# Install any needed packages specified in requirements.txt
|
13 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
14 |
+
|
15 |
+
# Make port 8501 available to the world outside this container
|
16 |
+
EXPOSE 8501
|
17 |
+
|
18 |
+
# Run app.py when the container launches
|
19 |
+
CMD ["streamlit", "run", "app.py"]
|
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from src.main import ConversationalResponse
|
3 |
+
import os
|
4 |
+
|
5 |
+
# Constants
|
6 |
+
ROLE_USER = "user"
|
7 |
+
ROLE_ASSISTANT = "assistant"
|
8 |
+
MAX_MESSAGES = 5
|
9 |
+
|
10 |
+
st.set_page_config(page_title="Chat with Git", page_icon="🦜")
|
11 |
+
st.title("Chat with Git 🤖📚")
|
12 |
+
st.markdown("by [Rohan Kataria](https://www.linkedin.com/in/imrohan/) view more at [VEW.AI](https://vew.ai/)")
|
13 |
+
st.markdown("This app allows you to chat with Git code files. You can paste link to the Git repository and ask questions about it. In the background uses the Git Loader and ConversationalRetrieval chain from langchain, Streamlit for UI.")
|
14 |
+
|
15 |
+
@st.cache_resource(ttl="1h")
|
16 |
+
def load_agent(url, branch, file_filter):
|
17 |
+
with st.spinner('Loading Git documents...'):
|
18 |
+
agent = ConversationalResponse(url, branch, file_filter)
|
19 |
+
st.success("Git Loaded Successfully")
|
20 |
+
return agent
|
21 |
+
|
22 |
+
def main():
|
23 |
+
|
24 |
+
git_link = st.sidebar.text_input("Enter your Git Link")
|
25 |
+
branch = st.sidebar.text_input("Enter your Git Branch")
|
26 |
+
file_filter = st.sidebar.text_input("Enter the Extension of Files to Load eg. py,sql,r (no spaces)")
|
27 |
+
|
28 |
+
if "agent" not in st.session_state:
|
29 |
+
st.session_state["agent"] = None
|
30 |
+
st.session_state["user_message_count"] = 0
|
31 |
+
|
32 |
+
if st.sidebar.button("Load Agent"):
|
33 |
+
if git_link and branch and file_filter:
|
34 |
+
try:
|
35 |
+
st.session_state["agent"] = load_agent(git_link, branch, file_filter)
|
36 |
+
st.session_state["messages"] = [{"role": ROLE_ASSISTANT, "content": "How can I help you?"}]
|
37 |
+
st.session_state["user_message_count"] = 0
|
38 |
+
except Exception as e:
|
39 |
+
st.sidebar.error(f"Error loading Git repository: {str(e)}")
|
40 |
+
return
|
41 |
+
|
42 |
+
if st.session_state["agent"]: # Chat will only appear if the agent is loaded
|
43 |
+
for msg in st.session_state.messages:
|
44 |
+
st.chat_message(msg["role"]).write(msg["content"])
|
45 |
+
|
46 |
+
if st.session_state["user_message_count"] < MAX_MESSAGES:
|
47 |
+
user_query = st.chat_input(placeholder="Ask me anything!")
|
48 |
+
|
49 |
+
if user_query:
|
50 |
+
st.session_state.messages.append({"role": ROLE_USER, "content": user_query})
|
51 |
+
st.chat_message(ROLE_USER).write(user_query)
|
52 |
+
st.session_state["user_message_count"] += 1
|
53 |
+
|
54 |
+
# Generate the response
|
55 |
+
with st.spinner("Generating response"):
|
56 |
+
response = st.session_state["agent"](user_query)
|
57 |
+
|
58 |
+
# Display the response immediately
|
59 |
+
st.chat_message(ROLE_ASSISTANT).write(response)
|
60 |
+
|
61 |
+
# Add the response to the message history
|
62 |
+
st.session_state.messages.append({"role": ROLE_ASSISTANT, "content": response})
|
63 |
+
else:
|
64 |
+
st.warning("Your message limit is over. Contact [Rohan Kataria](https://www.linkedin.com/in/imrohan/) to increase the limit.")
|
65 |
+
|
66 |
+
if __name__ == "__main__":
|
67 |
+
main()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit
|
2 |
+
langchain==0.0.305
|
3 |
+
langchain[docarray]
|
4 |
+
tiktoken
|
5 |
+
openai
|
src/__pycache__/main.cpython-312.pyc
ADDED
Binary file (5.66 kB). View file
|
|
src/main.py
ADDED
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import sys
|
4 |
+
sys.path.append('../..')
|
5 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
6 |
+
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
|
7 |
+
from langchain.vectorstores import DocArrayInMemorySearch
|
8 |
+
from langchain.document_loaders import TextLoader
|
9 |
+
from langchain.chains import RetrievalQA, ConversationalRetrievalChain
|
10 |
+
from langchain.memory import ConversationBufferMemory
|
11 |
+
from langchain.chat_models import ChatOpenAI
|
12 |
+
from langchain.document_loaders import TextLoader
|
13 |
+
from langchain.document_loaders import GitLoader
|
14 |
+
from langchain.llms import OpenAI
|
15 |
+
from langchain.memory import ConversationBufferMemory, ConversationBufferWindowMemory
|
16 |
+
from langchain.vectorstores import Chroma
|
17 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
18 |
+
from langchain.prompts import PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate, AIMessagePromptTemplate, ChatPromptTemplate
|
19 |
+
import datetime
|
20 |
+
import shutil
|
21 |
+
|
22 |
+
|
23 |
+
# Function to load the data from github using langchain with string type url, string type branch, string type file_filter
|
24 |
+
def loader(url: str, branch: str, file_filter: str):
|
25 |
+
repo_path = "./github_repo"
|
26 |
+
if os.path.exists(repo_path):
|
27 |
+
shutil.rmtree(repo_path)
|
28 |
+
|
29 |
+
loader = GitLoader(
|
30 |
+
clone_url= url,
|
31 |
+
repo_path="./github_repo/",
|
32 |
+
branch=branch,
|
33 |
+
file_filter=lambda file_path: file_path.endswith(tuple(file_filter.split(','))) # Filter out files in Data but whole repo is cloned
|
34 |
+
)
|
35 |
+
|
36 |
+
data = loader.load()
|
37 |
+
return data
|
38 |
+
|
39 |
+
|
40 |
+
#Function to split the data into chunks using recursive character text splitter
|
41 |
+
def split_data(data):
|
42 |
+
splitter = RecursiveCharacterTextSplitter(
|
43 |
+
chunk_size=1000,
|
44 |
+
chunk_overlap=150,
|
45 |
+
length_function=len, # Function to measure the length of chunks while splitting
|
46 |
+
add_start_index=True # Include the starting position of each chunk in metadata
|
47 |
+
)
|
48 |
+
chunks = splitter.split_documents(data)
|
49 |
+
return chunks
|
50 |
+
|
51 |
+
#Function to ingest the chunks into a vectorstore of doc
|
52 |
+
def ingest_chunks(chunks):
|
53 |
+
embedding = OpenAIEmbeddings(
|
54 |
+
# deployment="your-embeddings-deployment-name",
|
55 |
+
model="codellama",
|
56 |
+
openai_api_base="https://thewise-ollama-server.hf.space",
|
57 |
+
# openai_api_type="azure",
|
58 |
+
openai_api_key='nothing'
|
59 |
+
)
|
60 |
+
vector_store = DocArrayInMemorySearch.from_documents(chunks, embedding)
|
61 |
+
|
62 |
+
repo_path = "./github_repo"
|
63 |
+
if os.path.exists(repo_path):
|
64 |
+
shutil.rmtree(repo_path)
|
65 |
+
|
66 |
+
return vector_store
|
67 |
+
|
68 |
+
#Retreival function to get the data from the database and reply to the user
|
69 |
+
def retreival(vector_store, k):
|
70 |
+
#Creating LLM
|
71 |
+
llm = ChatOpenAI(model='codellama', temperature=0, openai_api_base='https://thewise-ollama-server.hf.space', openai_api_key='nothing')
|
72 |
+
|
73 |
+
# Define the system message template
|
74 |
+
#Adding CHAT HISTORY to the System template explicitly because mainly Chat history goes to Condense the Human Question with Backround (Not template), but System template goes straight the LLM Chain
|
75 |
+
#Explicitly adding chat history to access previous chats and answer "what is my previous question?"
|
76 |
+
#Great thing this also sends the chat history to the LLM Model along with the context and question
|
77 |
+
system_template = """You're a code summarisation assistant. Given the following extracted parts of a long document as "CONTEXT" create a final answer.
|
78 |
+
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
|
79 |
+
Only If asked to create a "DIAGRAM" for code use "MERMAID SYNTAX LANGUAGE" in your answer from "CONTEXT" and "CHAT HISTORY" with a short explanation of diagram.
|
80 |
+
CONTEXT: {context}
|
81 |
+
=======
|
82 |
+
CHAT HISTORY: {chat_history}
|
83 |
+
=======
|
84 |
+
FINAL ANSWER:"""
|
85 |
+
|
86 |
+
human_template = """{question}"""
|
87 |
+
|
88 |
+
# ai_template = """
|
89 |
+
# FINAL ANSWER:"""
|
90 |
+
|
91 |
+
# Create the chat prompt templates
|
92 |
+
messages = [
|
93 |
+
SystemMessagePromptTemplate.from_template(system_template),
|
94 |
+
HumanMessagePromptTemplate.from_template(human_template)
|
95 |
+
# AIMessagePromptTemplate.from_template(ai_template)
|
96 |
+
]
|
97 |
+
|
98 |
+
PROMPT = ChatPromptTemplate.from_messages(messages)
|
99 |
+
|
100 |
+
#Creating memory
|
101 |
+
# memory = ConversationBufferMemory(
|
102 |
+
# memory_key="chat_history",
|
103 |
+
# input_key="question",
|
104 |
+
# output_key="answer",
|
105 |
+
# return_messages=True)
|
106 |
+
|
107 |
+
memory = ConversationBufferWindowMemory(
|
108 |
+
memory_key="chat_history",
|
109 |
+
input_key="question",
|
110 |
+
output_key="answer",
|
111 |
+
return_messages=True,
|
112 |
+
k=5)
|
113 |
+
|
114 |
+
#Creating the retriever, this can also be a contextual compressed retriever
|
115 |
+
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": k}) #search_type can be "similarity" or "mmr"
|
116 |
+
|
117 |
+
chain = ConversationalRetrievalChain.from_llm(
|
118 |
+
llm=llm,
|
119 |
+
chain_type="stuff", #chain type can be refine, stuff, map_reduce
|
120 |
+
retriever=retriever,
|
121 |
+
memory=memory,
|
122 |
+
return_source_documents=True, #When used these 2 properties, the output gets 3 properties: answer, source_document, source_document_score and then have to speocify input and output key in memory for it to work
|
123 |
+
combine_docs_chain_kwargs=dict({"prompt": PROMPT})
|
124 |
+
)
|
125 |
+
|
126 |
+
return chain
|
127 |
+
|
128 |
+
#Class using all above components to create QA system
|
129 |
+
class ConversationalResponse:
|
130 |
+
def __init__(self, url, branch, file_filter):
|
131 |
+
self.url = url
|
132 |
+
self.branch = branch
|
133 |
+
self.file_filter = file_filter
|
134 |
+
self.data = loader(self.url, self.branch, self.file_filter)
|
135 |
+
self.chunks = split_data(self.data)
|
136 |
+
self.vector_store = ingest_chunks(self.chunks)
|
137 |
+
self.chain_type = "stuff"
|
138 |
+
self.k = 10
|
139 |
+
self.chain = retreival(self.vector_store, self.k)
|
140 |
+
|
141 |
+
def __call__(self, question):
|
142 |
+
agent = self.chain(question)
|
143 |
+
return agent['answer']
|