flutterbasit commited on
Commit
c9aca66
·
verified ·
1 Parent(s): 273c4fc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +187 -39
app.py CHANGED
@@ -1,44 +1,192 @@
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
- def greet(name, intensity):
4
- return f"Hello {name.upper()}!" * intensity
 
 
 
 
5
 
6
- with gr.Blocks(theme=gr.themes.Soft()) as demo: # Use a built-in theme
7
- gr.Markdown(
8
- """
9
- # Welcome to My Colorful Gradio App! 👋
10
- This is a simple example demonstrating how to create a visually appealing Gradio interface.
11
- """
12
- )
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  with gr.Row():
15
- with gr.Column():
16
- name = gr.Textbox(label="Enter your name", placeholder="Your name here")
17
- intensity = gr.Slider(minimum=1, maximum=10, value=1, label="Intensity")
18
- greet_btn = gr.Button("Greet Me!", variant="primary") # Use a primary button
19
-
20
- with gr.Column():
21
- output = gr.Textbox(label="Greeting Output", lines=4)
22
-
23
- greet_btn.click(greet, inputs=[name, intensity], outputs=output)
24
-
25
- gr.Examples(
26
- examples=[
27
- ["John Doe", 3],
28
- ["Jane Smith", 1],
29
- ["A Very Long Name", 5],
30
- ],
31
- inputs=[name, intensity],
32
- outputs=output,
33
- label="Try these examples:",
34
- )
35
-
36
- gr.Markdown(
37
- """
38
- ---
39
- Created with ❤️ using Gradio.
40
- """
41
- )
42
-
43
-
44
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import os
3
+ from groq import Groq
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from sentence_transformers import SentenceTransformer
6
+ import faiss
7
+ from PyPDF2 import PdfReader
8
+ from docx import Document
9
+ from transformers import pipeline
10
 
11
+ # Initialize Sentence Transformer for embeddings
12
+ model = SentenceTransformer('all-MiniLM-L6-v2')
13
+ client = Groq(api_key=os.getenv("groq_api_key"))
14
+ # Vector Store (FAISS)
15
+ dimension = 384 # Embedding size
16
+ index = faiss.IndexFlatL2(dimension)
17
 
18
+ # Initialize Hugging Face summarization model
19
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
20
 
21
+ # Function to extract text from PDFs
22
+ def extract_text_from_pdf(file_path):
23
+ reader = PdfReader(file_path)
24
+ text = ""
25
+ for page in reader.pages:
26
+ text += page.extract_text()
27
+ return text
28
+
29
+ # Function to extract text from DOCX
30
+ def extract_text_from_docx(file_path):
31
+ doc = Document(file_path)
32
+ text = ""
33
+ for paragraph in doc.paragraphs:
34
+ text += paragraph.text + "\n"
35
+ return text
36
+
37
+ # Function to process files
38
+ def process_files(files):
39
+ texts = []
40
+ for file in files:
41
+ if file.name.endswith('.pdf'):
42
+ texts.append(extract_text_from_pdf(file.name))
43
+ elif file.name.endswith('.docx'):
44
+ texts.append(extract_text_from_docx(file.name))
45
+ return texts
46
+
47
+ # Function to tokenize and chunk text
48
+ def chunk_text(text, chunk_size=500, overlap=50):
49
+ text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
50
+ return text_splitter.split_text(text)
51
+
52
+ # Function to create embeddings and populate FAISS index
53
+ def create_embeddings_and_store(chunks):
54
+ global index
55
+ index = faiss.IndexFlatL2(dimension)
56
+ for chunk in chunks:
57
+ embedding = model.encode([chunk])
58
+ embedding = embedding.astype('float32')
59
+ index.add(embedding)
60
+
61
+ # Function for summarizing the text before sending
62
+ def summarize_text(text):
63
+ summary = summarizer(text, max_length=300, min_length=100, do_sample=False)
64
+ return summary[0]['summary_text']
65
+
66
+ # Function to dynamically truncate context to fit the Groq API's token limit
67
+ def truncate_context(context, max_tokens=4000):
68
+ if len(context) > max_tokens:
69
+ context = context[:max_tokens]
70
+ return context
71
+
72
+ # Function to query Groq with context and question
73
+ def query_groq(question, context):
74
+ try:
75
+ if not question.strip():
76
+ return "Error: Question is empty or invalid."
77
+ if not context.strip():
78
+ return "Error: No context available from the uploaded documents."
79
+
80
+ max_context_tokens = 4000
81
+ context = truncate_context(context, max_tokens=max_context_tokens)
82
+
83
+ chat_completion = client.chat.completions.create(
84
+ messages=[{"role": "system", "content": "You are a helpful assistant. Use the context provided to answer the question."},
85
+ {"role": "assistant", "content": context},
86
+ {"role": "user", "content": question}],
87
+ model="llama3-8b-8192", stream=False)
88
+ if chat_completion and chat_completion.choices:
89
+ return chat_completion.choices[0].message.content
90
+ else:
91
+ return "Error: Received an unexpected response from Groq API."
92
+ except Exception as e:
93
+ return f"Error: {str(e)}"
94
+
95
+ # Function to handle RAG pipeline
96
+ def rag_pipeline(files, question, summarize_before_sending=False):
97
+ try:
98
+ if not files:
99
+ return "Error: No files uploaded. Please upload at least one document."
100
+
101
+ texts = process_files(files)
102
+ if not texts:
103
+ return "Error: Could not extract text from the uploaded files."
104
+
105
+ combined_text = " ".join(texts)
106
+
107
+ if summarize_before_sending:
108
+ combined_text = summarize_text(combined_text)
109
+
110
+ max_text_size = 4000
111
+ combined_text = truncate_context(combined_text, max_tokens=max_text_size)
112
+
113
+ chunks = chunk_text(combined_text)
114
+ create_embeddings_and_store(chunks)
115
+
116
+ answer = query_groq(question, combined_text)
117
+ return answer
118
+ except Exception as e:
119
+ return f"Error: {str(e)}"
120
+
121
+ # Enhanced UI with modern and clean style
122
+ with gr.Blocks() as app:
123
  with gr.Row():
124
+ # Left Column for instructions
125
+ with gr.Column(scale=1, min_width=250):
126
+ gr.Markdown("""
127
+ <div style="background: linear-gradient(145deg, #6e7dff, #1c2b58); padding: 30px; border-radius: 12px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); font-family: 'Roboto', sans-serif;">
128
+ <h2 style="color: #fff; font-size: 32px; font-weight: bold;">DocAI: Document Assistant</h2>
129
+ <p style="color: #ddd; font-size: 18px;">Welcome to DocAI! Upload your documents and get intelligent answers based on their content.</p>
130
+ <p style="color: #ddd; font-size: 16px; line-height: 1.6;"><strong>Steps to use:</strong></p>
131
+ <ul style="color: #ddd; font-size: 16px; line-height: 1.6;">
132
+ <li>Upload your PDF or DOCX files.</li>
133
+ <li>Ask questions related to the document.</li>
134
+ <li>Enable "Summarize Before Sending" for a brief summary of the document.</li>
135
+ <li>Click "Submit" to get your answers.</li>
136
+ </ul>
137
+ <p style="color: #ddd; font-size: 16px; line-height: 1.6;">Upload multiple files and get answers based on their contents.</p>
138
+ </div>
139
+ """)
140
+
141
+ # Right Column for the main application content
142
+ with gr.Column(scale=2, min_width=600):
143
+ gr.Markdown("""
144
+ <div style="background: linear-gradient(135deg, #6e7dff, #1c2b58); padding: 20px; border-radius: 15px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); font-family: 'Roboto', sans-serif;">
145
+ <h2 style="color: #fff; font-size: 36px; font-weight: bold; text-align: center; letter-spacing: 2px; text-transform: uppercase;">
146
+ Ask Your Document
147
+ </h2>
148
+ <p style="color: #ddd; font-size: 18px; text-align: center; line-height: 1.6;">
149
+ Get intelligent answers based on the content of your uploaded documents. Just ask a question!
150
+ </p>
151
+ </div>
152
+ """)
153
+
154
+ # File input
155
+ file_input = gr.File(
156
+ label="Upload Documents (PDF/DOCX)",
157
+ file_types=[".pdf", ".docx"],
158
+ file_count="multiple",
159
+ interactive=True
160
+ )
161
+
162
+ # Question input
163
+ question_input = gr.Textbox(
164
+ label="Ask a question",
165
+ placeholder="Type your question here...",
166
+ interactive=True,
167
+ lines=2,
168
+ max_lines=4
169
+ )
170
+
171
+ # Summarize before sending checkbox
172
+ summarize_before_input = gr.Checkbox(
173
+ label="Summarize Before Sending",
174
+ value=False
175
+ )
176
+
177
+ # Output text box
178
+ output = gr.Textbox(
179
+ label="Answer from LLM",
180
+ interactive=False,
181
+ lines=4,
182
+ max_lines=6
183
+ )
184
+
185
+ # Submit button
186
+ submit_button = gr.Button("Submit", icon="send")
187
+
188
+ # Apply the logic for the button to trigger the RAG pipeline
189
+ submit_button.click(rag_pipeline, inputs=[file_input, question_input, summarize_before_input], outputs=output)
190
+
191
+ # Launch the app
192
+ app.launch()