flutterbasit commited on
Commit
901311a
·
verified ·
1 Parent(s): 86a675a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -99
app.py CHANGED
@@ -6,7 +6,7 @@ from sentence_transformers import SentenceTransformer
6
  import faiss
7
  from PyPDF2 import PdfReader
8
  from docx import Document
9
- from transformers import pipeline # Hugging Face for summarization
10
 
11
  # Initialize Sentence Transformer for embeddings
12
  model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -52,11 +52,10 @@ def chunk_text(text, chunk_size=500, overlap=50):
52
  # Function to create embeddings and populate FAISS index
53
  def create_embeddings_and_store(chunks):
54
  global index
55
- # Reset the FAISS index before adding new embeddings
56
  index = faiss.IndexFlatL2(dimension)
57
  for chunk in chunks:
58
  embedding = model.encode([chunk])
59
- embedding = embedding.astype('float32') # Ensure embedding is in correct format
60
  index.add(embedding)
61
 
62
  # Function for summarizing the text before sending
@@ -65,9 +64,9 @@ def summarize_text(text):
65
  return summary[0]['summary_text']
66
 
67
  # Function to dynamically truncate context to fit the Groq API's token limit
68
- def truncate_context(context, max_tokens=4000): # Adjust max_tokens based on Groq's limits
69
  if len(context) > max_tokens:
70
- context = context[:max_tokens] # Truncate context to fit within the token limit
71
  return context
72
 
73
  # Function to query Groq with context and question
@@ -78,11 +77,9 @@ def query_groq(question, context):
78
  if not context.strip():
79
  return "Error: No context available from the uploaded documents."
80
 
81
- # Dynamically truncate context to fit within the token limit
82
- max_context_tokens = 4000 # Groq's token limit for context
83
  context = truncate_context(context, max_tokens=max_context_tokens)
84
 
85
- # Query Groq API with the truncated context
86
  chat_completion = client.chat.completions.create(
87
  messages=[{"role": "system", "content": "You are a helpful assistant. Use the context provided to answer the question."},
88
  {"role": "assistant", "content": context},
@@ -101,131 +98,89 @@ def rag_pipeline(files, question, summarize_before_sending=False):
101
  if not files:
102
  return "Error: No files uploaded. Please upload at least one document."
103
 
104
- # Process uploaded files
105
  texts = process_files(files)
106
  if not texts:
107
  return "Error: Could not extract text from the uploaded files."
108
 
109
- # Combine all extracted text into a single context
110
  combined_text = " ".join(texts)
111
 
112
  if summarize_before_sending:
113
- # Summarize the text to reduce token count
114
  combined_text = summarize_text(combined_text)
115
 
116
- # Ensure the combined text is within Groq's token limit
117
- max_text_size = 4000 # Adjust based on Groq's token limits
118
  combined_text = truncate_context(combined_text, max_tokens=max_text_size)
119
 
120
- # Chunk and create embeddings
121
  chunks = chunk_text(combined_text)
122
  create_embeddings_and_store(chunks)
123
 
124
- # Query Groq LLM with context and question
125
  answer = query_groq(question, combined_text)
126
  return answer
127
  except Exception as e:
128
  return f"Error: {str(e)}"
129
 
130
- # # Enhanced UI with modern and clean style
131
- # with gr.Blocks() as app:
132
- # with gr.Row():
133
- # # Left Column for instructions
134
- # with gr.Column(scale=1, min_width=250):
135
- # gr.Markdown("""
136
- # <div style="background: linear-gradient(145deg, #6e7dff, #1c2b58); padding: 30px; border-radius: 12px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); font-family: 'Roboto', sans-serif;">
137
- # <h2 style="color: #fff; font-size: 32px; font-weight: bold;">DocAI: Document Assistant</h2>
138
- # <p style="color: #ddd; font-size: 18px;">Welcome to DocAI! Upload your documents and get intelligent answers based on their content.</p>
139
- # <p style="color: #ddd; font-size: 16px; line-height: 1.6;"><strong>Steps to use:</strong></p>
140
- # <ul style="color: #ddd; font-size: 16px; line-height: 1.6;">
141
- # <li>Upload your PDF or DOCX files.</li>
142
- # <li>Ask questions related to the document.</li>
143
- # <li>Enable "Summarize Before Sending" for a brief summary of the document.</li>
144
- # <li>Click "Submit" to get your answers.</li>
145
- # </ul>
146
- # <p style="color: #ddd; font-size: 16px; line-height: 1.6;">Upload multiple files and get answers based on their contents.</p>
147
- # </div>
148
- # """)
149
-
150
- # # Right Column for the main application content
151
- # with gr.Column(scale=2, min_width=600):
152
- # gr.Markdown("""
153
- # <div style="background: linear-gradient(135deg, #6e7dff, #1c2b58); padding: 20px; border-radius: 15px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); font-family: 'Roboto', sans-serif;">
154
- # <h2 style="color: #fff; font-size: 36px; font-weight: bold; text-align: center; letter-spacing: 2px; text-transform: uppercase;">
155
- # Ask Your Document
156
- # </h2>
157
- # <p style="color: #ddd; font-size: 18px; text-align: center; line-height: 1.6;">
158
- # Get intelligent answers based on the content of your uploaded documents. Just ask a question!
159
- # </p>
160
- # </div>
161
- # """)
162
-
163
- # # File input
164
- # file_input = gr.File(
165
- # label="Upload Documents (PDF/DOCX)",
166
- # file_types=[".pdf", ".docx"],
167
- # file_count="multiple",
168
- # interactive=True
169
- # )
170
-
171
- # # Question input
172
- # question_input = gr.Textbox(
173
- # label="Ask a question",
174
- # placeholder="Type your question here...",
175
- # interactive=True,
176
- # lines=2,
177
- # max_lines=4
178
- # )
179
-
180
- # # Summarize before sending checkbox
181
- # summarize_before_input = gr.Checkbox(
182
- # label="Summarize Before Sending",
183
- # value=False
184
- # )
185
-
186
- # # Output text box with enhanced styling
187
- # output = gr.Textbox(
188
- # label="Answer from LLM",
189
- # interactive=False,
190
- # lines=4,
191
- # max_lines=6
192
- # )
193
-
194
- # # Submit button with icon and modern styling
195
- # submit_button = gr.Button("Submit", icon="send")
196
-
197
- # # Loading spinner
198
- # with gr.Row():
199
- # with gr.Column(scale=1, min_width=250):
200
- # gr.Markdown("<div style='font-size: 14px; color: #555;'>Your answer will appear here...</div>")
201
-
202
- # # Apply the logic for the button to trigger the RAG pipeline
203
- # submit_button.click(rag_pipeline, inputs=[file_input, question_input, summarize_before_input], outputs=output)
204
-
205
- # Launch the app
206
- # app.launch()
207
  with gr.Blocks() as app:
208
  with gr.Row():
 
209
  with gr.Column(scale=1, min_width=250):
210
  gr.Markdown("""
211
- <h2>Welcome to DocAI</h2>
212
- <p>Upload your documents and get intelligent answers.</p>
 
 
 
 
 
 
 
 
 
 
213
  """)
214
 
 
215
  with gr.Column(scale=2, min_width=600):
216
  gr.Markdown("""
217
- <h2>Ask Your Document</h2>
218
- <p>Get intelligent answers based on the content of your uploaded documents. Just ask a question!</p>
 
 
 
 
 
 
219
  """)
220
 
221
  # File input
222
- file_input = gr.File(label="Upload Documents (PDF/DOCX)", file_types=[".pdf", ".docx"], file_count="multiple")
 
 
 
 
 
223
 
224
  # Question input
225
- question_input = gr.Textbox(label="Ask a question", placeholder="Type your question here...")
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  # Output text box
228
- output = gr.Textbox(label="Answer from LLM")
 
 
 
 
 
229
 
230
  # Submit button
231
  submit_button = gr.Button("Submit", icon="send")
 
6
  import faiss
7
  from PyPDF2 import PdfReader
8
  from docx import Document
9
+ from transformers import pipeline
10
 
11
  # Initialize Sentence Transformer for embeddings
12
  model = SentenceTransformer('all-MiniLM-L6-v2')
 
52
  # Function to create embeddings and populate FAISS index
53
  def create_embeddings_and_store(chunks):
54
  global index
 
55
  index = faiss.IndexFlatL2(dimension)
56
  for chunk in chunks:
57
  embedding = model.encode([chunk])
58
+ embedding = embedding.astype('float32')
59
  index.add(embedding)
60
 
61
  # Function for summarizing the text before sending
 
64
  return summary[0]['summary_text']
65
 
66
  # Function to dynamically truncate context to fit the Groq API's token limit
67
+ def truncate_context(context, max_tokens=4000):
68
  if len(context) > max_tokens:
69
+ context = context[:max_tokens]
70
  return context
71
 
72
  # Function to query Groq with context and question
 
77
  if not context.strip():
78
  return "Error: No context available from the uploaded documents."
79
 
80
+ max_context_tokens = 4000
 
81
  context = truncate_context(context, max_tokens=max_context_tokens)
82
 
 
83
  chat_completion = client.chat.completions.create(
84
  messages=[{"role": "system", "content": "You are a helpful assistant. Use the context provided to answer the question."},
85
  {"role": "assistant", "content": context},
 
98
  if not files:
99
  return "Error: No files uploaded. Please upload at least one document."
100
 
 
101
  texts = process_files(files)
102
  if not texts:
103
  return "Error: Could not extract text from the uploaded files."
104
 
 
105
  combined_text = " ".join(texts)
106
 
107
  if summarize_before_sending:
 
108
  combined_text = summarize_text(combined_text)
109
 
110
+ max_text_size = 4000
 
111
  combined_text = truncate_context(combined_text, max_tokens=max_text_size)
112
 
 
113
  chunks = chunk_text(combined_text)
114
  create_embeddings_and_store(chunks)
115
 
 
116
  answer = query_groq(question, combined_text)
117
  return answer
118
  except Exception as e:
119
  return f"Error: {str(e)}"
120
 
121
+ # Enhanced UI with modern and clean style
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  with gr.Blocks() as app:
123
  with gr.Row():
124
+ # Left Column for instructions
125
  with gr.Column(scale=1, min_width=250):
126
  gr.Markdown("""
127
+ <div style="background: linear-gradient(145deg, #6e7dff, #1c2b58); padding: 30px; border-radius: 12px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); font-family: 'Roboto', sans-serif;">
128
+ <h2 style="color: #fff; font-size: 32px; font-weight: bold;">DocAI: Document Assistant</h2>
129
+ <p style="color: #ddd; font-size: 18px;">Welcome to DocAI! Upload your documents and get intelligent answers based on their content.</p>
130
+ <p style="color: #ddd; font-size: 16px; line-height: 1.6;"><strong>Steps to use:</strong></p>
131
+ <ul style="color: #ddd; font-size: 16px; line-height: 1.6;">
132
+ <li>Upload your PDF or DOCX files.</li>
133
+ <li>Ask questions related to the document.</li>
134
+ <li>Enable "Summarize Before Sending" for a brief summary of the document.</li>
135
+ <li>Click "Submit" to get your answers.</li>
136
+ </ul>
137
+ <p style="color: #ddd; font-size: 16px; line-height: 1.6;">Upload multiple files and get answers based on their contents.</p>
138
+ </div>
139
  """)
140
 
141
+ # Right Column for the main application content
142
  with gr.Column(scale=2, min_width=600):
143
  gr.Markdown("""
144
+ <div style="background: linear-gradient(135deg, #6e7dff, #1c2b58); padding: 20px; border-radius: 15px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); font-family: 'Roboto', sans-serif;">
145
+ <h2 style="color: #fff; font-size: 36px; font-weight: bold; text-align: center; letter-spacing: 2px; text-transform: uppercase;">
146
+ Ask Your Document
147
+ </h2>
148
+ <p style="color: #ddd; font-size: 18px; text-align: center; line-height: 1.6;">
149
+ Get intelligent answers based on the content of your uploaded documents. Just ask a question!
150
+ </p>
151
+ </div>
152
  """)
153
 
154
  # File input
155
+ file_input = gr.File(
156
+ label="Upload Documents (PDF/DOCX)",
157
+ file_types=[".pdf", ".docx"],
158
+ file_count="multiple",
159
+ interactive=True
160
+ )
161
 
162
  # Question input
163
+ question_input = gr.Textbox(
164
+ label="Ask a question",
165
+ placeholder="Type your question here...",
166
+ interactive=True,
167
+ lines=2,
168
+ max_lines=4
169
+ )
170
+
171
+ # Summarize before sending checkbox
172
+ summarize_before_input = gr.Checkbox(
173
+ label="Summarize Before Sending",
174
+ value=False
175
+ )
176
 
177
  # Output text box
178
+ output = gr.Textbox(
179
+ label="Answer from LLM",
180
+ interactive=False,
181
+ lines=4,
182
+ max_lines=6
183
+ )
184
 
185
  # Submit button
186
  submit_button = gr.Button("Submit", icon="send")