kidwaiaun commited on
Commit
24a8f30
·
verified ·
1 Parent(s): 9a4b835

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -0
app.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import os
4
+ from langchain.document_loaders import TextLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.vectorstores import SKLearnVectorStore
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.prompts import PromptTemplate
9
+ from langchain.llms import ChatOllama
10
+ from langchain.schema.output_parser import StrOutputParser
11
+
12
+ # Define the RAG application class
13
+ class RAGApplication:
14
+ def __init__(self, retriever, rag_chain, memory):
15
+ self.retriever = retriever
16
+ self.rag_chain = rag_chain
17
+ self.memory = memory
18
+ # Initialize metadata placeholders
19
+ self.metadata = {
20
+ 'File Name': None,
21
+ 'File Size': None,
22
+ 'Document Processing Time': None,
23
+ 'Response Time': None
24
+ }
25
+
26
+ def run(self, question):
27
+ start_time = time.time()
28
+ # Retrieve relevant documents
29
+ documents = self.retriever.invoke(question)
30
+ doc_texts = "\n".join([doc.page_content for doc in documents])
31
+
32
+ # Update the memory with the user's question
33
+ self.memory.append({"role": "user", "content": question})
34
+
35
+ # Build a conversation context from memory
36
+ conversation_history = "\n".join([
37
+ f"[{entry['role'].upper()}]: {entry['content']}" for entry in self.memory
38
+ ])
39
+
40
+ # Prepare input for the chain
41
+ chain_input = {
42
+ "question": question,
43
+ "documents": doc_texts,
44
+ "history": conversation_history
45
+ }
46
+
47
+ # Get the answer from the language model
48
+ answer = self.rag_chain.invoke(chain_input)
49
+
50
+ end_time = time.time()
51
+ # Record the response time
52
+ self.metadata['Response Time'] = f"{round(end_time - start_time, 2)} seconds"
53
+
54
+ # Update memory with the assistant's response
55
+ self.memory.append({"role": "assistant", "content": answer})
56
+ return answer
57
+
58
+
59
+ @st.cache_data
60
+ def load_data(json_file_path: str):
61
+ with open(json_file_path, "r", encoding="utf-8") as f:
62
+ return json.load(f)
63
+
64
+
65
+ def make_string(obj: dict):
66
+ string = ""
67
+ keys = list(obj.keys())
68
+
69
+ for k in keys:
70
+ if isinstance(obj[k], str):
71
+ string += f"\"{k.capitalize().replace('_', ' ')}\": {obj[k]}\n"
72
+ elif isinstance(obj[k], list):
73
+ string += f"\"{k.capitalize().replace('_', ' ')}\": {', '.join(obj[k])}\n"
74
+ return string
75
+
76
+
77
+ def main():
78
+ # Load the Markdown file
79
+ markdown_file_path = "file.md"
80
+ loader = TextLoader(markdown_file_path, encoding='utf-8')
81
+
82
+ # Time the document processing
83
+ start_time = time.time()
84
+ documents = loader.load()
85
+ end_time = time.time()
86
+ file_processing_time = round(end_time - start_time, 2)
87
+
88
+ # Get file size
89
+ file_size = os.path.getsize(markdown_file_path) if os.path.exists(markdown_file_path) else 0
90
+
91
+ # Split the text into chunks
92
+ text_splitter = RecursiveCharacterTextSplitter(
93
+ chunk_size=1000, # Adjust to needs
94
+ chunk_overlap=100 # Overlap to maintain context
95
+ )
96
+ split_docs = text_splitter.split_documents(documents)
97
+
98
+ # Create embeddings and vector store
99
+ vectorstore = SKLearnVectorStore.from_documents(
100
+ documents=split_docs,
101
+ embedding=OpenAIEmbeddings(openai_api_key="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"),
102
+ )
103
+ retriever = vectorstore.as_retriever(k=4)
104
+
105
+ # Define the prompt template
106
+ prompt = PromptTemplate(
107
+ template="""
108
+ You are an AI assistant specialized in providing information about HR policies and guidelines.
109
+ You have access to HR documents containing policies, guidelines, processes, and other related data.
110
+ Here is the conversation history so far:
111
+ {history}
112
+ Use the provided documents to answer the user’s question about HR matters in a concise and precise manner.
113
+ If you don't know the answer, just say that you don't know.
114
+ Use three sentences maximum.
115
+
116
+ Question: {question}
117
+ Documents: {documents}
118
+ Answer:""",
119
+ input_variables=["history", "question", "documents"],
120
+ )
121
+
122
+ # Initialize the LLM
123
+ llm = ChatOllama(
124
+ model="llama3.3:7b",
125
+ temperature=0.5,
126
+ )
127
+
128
+ # Create a chain combining the prompt and LLM
129
+ rag_chain = prompt | llm | StrOutputParser()
130
+
131
+ # Conversation history
132
+ conversation_history = []
133
+
134
+ # Function to handle user queries
135
+ def chat_interface(user_input, history):
136
+ nonlocal conversation_history
137
+
138
+ # Store conversation history
139
+ formatted_history = "\n".join([f"User: {h[0]}\nAssistant: {h[1]}" for h in history])
140
+
141
+ # Run RAG application
142
+ output = rag_chain.invoke({"history": formatted_history, "question": user_input, "documents": documents})
143
+ conversation_history.append({"role": "user", "content": user_input})
144
+ conversation_history.append({"role": "assistant", "content": output})
145
+ return output
146
+
147
+ # Create Gradio Interface
148
+ with gr.Blocks() as iface:
149
+ gr.Markdown("# HR Talk - AI Assistant")
150
+ with gr.Row():
151
+ with gr.Column():
152
+ chat = gr.Chatbot()
153
+ query = gr.Textbox(label="Ask a question about HR policies...")
154
+ submit = gr.Button("Submit")
155
+
156
+ with gr.Accordion("Metadata", open=False):
157
+ gr.Markdown(f"**File Name:** {markdown_file_path}\n")
158
+ gr.Markdown(f"**File Size:** {file_size} bytes\n")
159
+ gr.Markdown(f"**Processing Time:** {file_processing_time} seconds\n")
160
+
161
+ submit.click(chat_interface, inputs=[query, chat], outputs=chat)
162
+
163
+ # Launch app
164
+ iface.launch()
165
+
166
+ if __name__ == "__main__":
167
+ main()