flutterbasit commited on
Commit
273c4fc
·
verified ·
1 Parent(s): 901311a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -187
app.py CHANGED
@@ -1,192 +1,44 @@
1
  import gradio as gr
2
- import os
3
- from groq import Groq
4
- from langchain.text_splitter import CharacterTextSplitter
5
- from sentence_transformers import SentenceTransformer
6
- import faiss
7
- from PyPDF2 import PdfReader
8
- from docx import Document
9
- from transformers import pipeline
10
 
11
- # Initialize Sentence Transformer for embeddings
12
- model = SentenceTransformer('all-MiniLM-L6-v2')
13
- client = Groq(api_key=os.getenv("groq_api_key"))
14
- # Vector Store (FAISS)
15
- dimension = 384 # Embedding size
16
- index = faiss.IndexFlatL2(dimension)
17
 
18
- # Initialize Hugging Face summarization model
19
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 
 
 
 
 
20
 
21
- # Function to extract text from PDFs
22
- def extract_text_from_pdf(file_path):
23
- reader = PdfReader(file_path)
24
- text = ""
25
- for page in reader.pages:
26
- text += page.extract_text()
27
- return text
28
-
29
- # Function to extract text from DOCX
30
- def extract_text_from_docx(file_path):
31
- doc = Document(file_path)
32
- text = ""
33
- for paragraph in doc.paragraphs:
34
- text += paragraph.text + "\n"
35
- return text
36
-
37
- # Function to process files
38
- def process_files(files):
39
- texts = []
40
- for file in files:
41
- if file.name.endswith('.pdf'):
42
- texts.append(extract_text_from_pdf(file.name))
43
- elif file.name.endswith('.docx'):
44
- texts.append(extract_text_from_docx(file.name))
45
- return texts
46
-
47
- # Function to tokenize and chunk text
48
- def chunk_text(text, chunk_size=500, overlap=50):
49
- text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap)
50
- return text_splitter.split_text(text)
51
-
52
- # Function to create embeddings and populate FAISS index
53
- def create_embeddings_and_store(chunks):
54
- global index
55
- index = faiss.IndexFlatL2(dimension)
56
- for chunk in chunks:
57
- embedding = model.encode([chunk])
58
- embedding = embedding.astype('float32')
59
- index.add(embedding)
60
-
61
- # Function for summarizing the text before sending
62
- def summarize_text(text):
63
- summary = summarizer(text, max_length=300, min_length=100, do_sample=False)
64
- return summary[0]['summary_text']
65
-
66
- # Function to dynamically truncate context to fit the Groq API's token limit
67
- def truncate_context(context, max_tokens=4000):
68
- if len(context) > max_tokens:
69
- context = context[:max_tokens]
70
- return context
71
-
72
- # Function to query Groq with context and question
73
- def query_groq(question, context):
74
- try:
75
- if not question.strip():
76
- return "Error: Question is empty or invalid."
77
- if not context.strip():
78
- return "Error: No context available from the uploaded documents."
79
-
80
- max_context_tokens = 4000
81
- context = truncate_context(context, max_tokens=max_context_tokens)
82
-
83
- chat_completion = client.chat.completions.create(
84
- messages=[{"role": "system", "content": "You are a helpful assistant. Use the context provided to answer the question."},
85
- {"role": "assistant", "content": context},
86
- {"role": "user", "content": question}],
87
- model="llama3-8b-8192", stream=False)
88
- if chat_completion and chat_completion.choices:
89
- return chat_completion.choices[0].message.content
90
- else:
91
- return "Error: Received an unexpected response from Groq API."
92
- except Exception as e:
93
- return f"Error: {str(e)}"
94
-
95
- # Function to handle RAG pipeline
96
- def rag_pipeline(files, question, summarize_before_sending=False):
97
- try:
98
- if not files:
99
- return "Error: No files uploaded. Please upload at least one document."
100
-
101
- texts = process_files(files)
102
- if not texts:
103
- return "Error: Could not extract text from the uploaded files."
104
-
105
- combined_text = " ".join(texts)
106
-
107
- if summarize_before_sending:
108
- combined_text = summarize_text(combined_text)
109
-
110
- max_text_size = 4000
111
- combined_text = truncate_context(combined_text, max_tokens=max_text_size)
112
-
113
- chunks = chunk_text(combined_text)
114
- create_embeddings_and_store(chunks)
115
-
116
- answer = query_groq(question, combined_text)
117
- return answer
118
- except Exception as e:
119
- return f"Error: {str(e)}"
120
-
121
- # Enhanced UI with modern and clean style
122
- with gr.Blocks() as app:
123
  with gr.Row():
124
- # Left Column for instructions
125
- with gr.Column(scale=1, min_width=250):
126
- gr.Markdown("""
127
- <div style="background: linear-gradient(145deg, #6e7dff, #1c2b58); padding: 30px; border-radius: 12px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.1); font-family: 'Roboto', sans-serif;">
128
- <h2 style="color: #fff; font-size: 32px; font-weight: bold;">DocAI: Document Assistant</h2>
129
- <p style="color: #ddd; font-size: 18px;">Welcome to DocAI! Upload your documents and get intelligent answers based on their content.</p>
130
- <p style="color: #ddd; font-size: 16px; line-height: 1.6;"><strong>Steps to use:</strong></p>
131
- <ul style="color: #ddd; font-size: 16px; line-height: 1.6;">
132
- <li>Upload your PDF or DOCX files.</li>
133
- <li>Ask questions related to the document.</li>
134
- <li>Enable "Summarize Before Sending" for a brief summary of the document.</li>
135
- <li>Click "Submit" to get your answers.</li>
136
- </ul>
137
- <p style="color: #ddd; font-size: 16px; line-height: 1.6;">Upload multiple files and get answers based on their contents.</p>
138
- </div>
139
- """)
140
-
141
- # Right Column for the main application content
142
- with gr.Column(scale=2, min_width=600):
143
- gr.Markdown("""
144
- <div style="background: linear-gradient(135deg, #6e7dff, #1c2b58); padding: 20px; border-radius: 15px; box-shadow: 0 5px 15px rgba(0, 0, 0, 0.2); font-family: 'Roboto', sans-serif;">
145
- <h2 style="color: #fff; font-size: 36px; font-weight: bold; text-align: center; letter-spacing: 2px; text-transform: uppercase;">
146
- Ask Your Document
147
- </h2>
148
- <p style="color: #ddd; font-size: 18px; text-align: center; line-height: 1.6;">
149
- Get intelligent answers based on the content of your uploaded documents. Just ask a question!
150
- </p>
151
- </div>
152
- """)
153
-
154
- # File input
155
- file_input = gr.File(
156
- label="Upload Documents (PDF/DOCX)",
157
- file_types=[".pdf", ".docx"],
158
- file_count="multiple",
159
- interactive=True
160
- )
161
-
162
- # Question input
163
- question_input = gr.Textbox(
164
- label="Ask a question",
165
- placeholder="Type your question here...",
166
- interactive=True,
167
- lines=2,
168
- max_lines=4
169
- )
170
-
171
- # Summarize before sending checkbox
172
- summarize_before_input = gr.Checkbox(
173
- label="Summarize Before Sending",
174
- value=False
175
- )
176
-
177
- # Output text box
178
- output = gr.Textbox(
179
- label="Answer from LLM",
180
- interactive=False,
181
- lines=4,
182
- max_lines=6
183
- )
184
-
185
- # Submit button
186
- submit_button = gr.Button("Submit", icon="send")
187
-
188
- # Apply the logic for the button to trigger the RAG pipeline
189
- submit_button.click(rag_pipeline, inputs=[file_input, question_input], outputs=output)
190
-
191
- # Launch the app
192
- app.launch()
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
3
+ def greet(name, intensity):
4
+ return f"Hello {name.upper()}!" * intensity
 
 
 
 
5
 
6
+ with gr.Blocks(theme=gr.themes.Soft()) as demo: # Use a built-in theme
7
+ gr.Markdown(
8
+ """
9
+ # Welcome to My Colorful Gradio App! 👋
10
+ This is a simple example demonstrating how to create a visually appealing Gradio interface.
11
+ """
12
+ )
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  with gr.Row():
15
+ with gr.Column():
16
+ name = gr.Textbox(label="Enter your name", placeholder="Your name here")
17
+ intensity = gr.Slider(minimum=1, maximum=10, value=1, label="Intensity")
18
+ greet_btn = gr.Button("Greet Me!", variant="primary") # Use a primary button
19
+
20
+ with gr.Column():
21
+ output = gr.Textbox(label="Greeting Output", lines=4)
22
+
23
+ greet_btn.click(greet, inputs=[name, intensity], outputs=output)
24
+
25
+ gr.Examples(
26
+ examples=[
27
+ ["John Doe", 3],
28
+ ["Jane Smith", 1],
29
+ ["A Very Long Name", 5],
30
+ ],
31
+ inputs=[name, intensity],
32
+ outputs=output,
33
+ label="Try these examples:",
34
+ )
35
+
36
+ gr.Markdown(
37
+ """
38
+ ---
39
+ Created with ❤️ using Gradio.
40
+ """
41
+ )
42
+
43
+
44
+ demo.launch()