cuneytkaya commited on
Commit
05c845a
·
verified ·
1 Parent(s): 6588a85

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +440 -0
app.py ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import gradio as gr
4
+ import google.generativeai as genai
5
+ from PIL import Image
6
+ import numpy as np
7
+ from huggingface_hub import HfFolder
8
+ from dotenv import load_dotenv
9
+ import traceback
10
+ import pytesseract
11
+ import cv2
12
+ import time
13
+
14
+ # Load environment variables
15
+ load_dotenv()
16
+
17
+ # Set API key for Gemini
18
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or HfFolder.get_token("GEMINI_API_KEY")
19
+ if not GEMINI_API_KEY:
20
+ raise ValueError("Gemini API key not found. Please set the GEMINI_API_KEY environment variable.")
21
+ genai.configure(api_key=GEMINI_API_KEY)
22
+
23
+ # Define model names - using latest models
24
+ CLASSIFICATION_MODEL = "gemini-1.5-flash" # For classification
25
+ SOLUTION_MODEL = "gemini-1.5-pro-latest" # For solution generation
26
+ EXPLANATION_MODEL = "gemini-1.5-pro-latest" # For explanation generation
27
+ SIMILAR_MODEL = "gemini-1.5-pro-latest" # For similar problems generation
28
+
29
+ print(f"Using models: Classification: {CLASSIFICATION_MODEL}, Solution: {SOLUTION_MODEL}, Explanation: {EXPLANATION_MODEL}, Similar: {SIMILAR_MODEL}")
30
+
31
+ # Set up Gemini for image analysis
32
+ MODEL_IMAGE = "gemini-1.5-pro-latest" # Use Gemini for OCR as well
33
+
34
+ # Set Tesseract path - Mac with Homebrew default
35
+ pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
36
+
37
+ # Extract text using Gemini directly (with Tesseract as fallback)
38
+ def extract_text_with_gemini(image):
39
+ """Extract text from image using Gemini Pro Vision directly"""
40
+ try:
41
+ if isinstance(image, np.ndarray):
42
+ image = Image.fromarray(image)
43
+
44
+ model = genai.GenerativeModel(MODEL_IMAGE)
45
+ prompt = """
46
+ Extract ALL text, numbers, and mathematical equations from this image precisely.
47
+ Include ALL symbols, numbers, letters, and mathematical notation exactly as they appear.
48
+ Format any equations properly and maintain their layout.
49
+ Don't explain the content, just extract the text verbatim.
50
+ """
51
+
52
+ response = model.generate_content([prompt, image])
53
+ extracted_text = response.text.strip()
54
+
55
+ # If Gemini returns a very short result, try Tesseract as fallback
56
+ if len(extracted_text) < 10:
57
+ print("Gemini returned limited text, trying Tesseract as fallback")
58
+ if isinstance(image, Image.Image):
59
+ image_array = np.array(image)
60
+ else:
61
+ image_array = image
62
+
63
+ if len(image_array.shape) == 3:
64
+ gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
65
+ else:
66
+ gray = image_array
67
+
68
+ custom_config = r'--oem 1 --psm 6'
69
+ tesseract_text = pytesseract.image_to_string(gray, config=custom_config)
70
+
71
+ if len(tesseract_text) > len(extracted_text):
72
+ extracted_text = tesseract_text
73
+
74
+ print(f"Extracted text: {extracted_text[:100]}...")
75
+ return extracted_text
76
+
77
+ except Exception as e:
78
+ print(f"Extraction Error: {e}")
79
+ print(traceback.format_exc())
80
+ try:
81
+ if isinstance(image, Image.Image):
82
+ image_array = np.array(image)
83
+ else:
84
+ image_array = image
85
+
86
+ if len(image_array.shape) == 3:
87
+ gray = cv2.cvtColor(image_array, cv2.COLOR_RGB2GRAY)
88
+ else:
89
+ gray = image_array
90
+
91
+ return pytesseract.image_to_string(gray, config=r'--oem 1 --psm 6')
92
+ except Exception as e2:
93
+ print(f"Fallback OCR Error: {e2}")
94
+ return f"Error extracting text: {str(e)}"
95
+
96
+ # Classify the math problem using Gemini 1.5 Flash
97
+ def classify_with_gemini_flash(math_problem):
98
+ """Classify the math problem using Gemini model"""
99
+ try:
100
+ model = genai.GenerativeModel(
101
+ model_name=CLASSIFICATION_MODEL,
102
+ generation_config={
103
+ "temperature": 0.1,
104
+ "top_p": 0.95,
105
+ "max_output_tokens": 150,
106
+ "response_mime_type": "application/json",
107
+ }
108
+ )
109
+
110
+ prompt = f"""
111
+ Task: Classify the following math problem.
112
+
113
+ PROBLEM: {math_problem}
114
+
115
+ Classify this math problem according to:
116
+ 1. Primary category (e.g., Algebra, Calculus, Geometry, Trigonometry, Statistics, Number Theory)
117
+ 2. Specific subtopic (e.g., Linear Equations, Derivatives, Integrals, Probability)
118
+ 3. Difficulty level (Basic, Intermediate, Advanced)
119
+ 4. Key concepts involved
120
+
121
+ Format the response as a JSON object with the fields: "category", "subtopic", "difficulty", "key_concepts".
122
+ """
123
+
124
+ response = model.generate_content(prompt)
125
+ try:
126
+ classification = json.loads(response.text)
127
+ return classification
128
+ except json.JSONDecodeError:
129
+ print(f"JSON Decode Error: Unable to parse response: {response.text}")
130
+ return {
131
+ "category": "Unknown",
132
+ "subtopic": "Unknown",
133
+ "difficulty": "Unknown",
134
+ "key_concepts": ["Unknown"]
135
+ }
136
+ except Exception as e:
137
+ print(f"Classification Error: {e}")
138
+ print(traceback.format_exc())
139
+ return {
140
+ "category": "Error",
141
+ "subtopic": "Error",
142
+ "difficulty": "Error",
143
+ "key_concepts": [f"Error: {str(e)}"]
144
+ }
145
+
146
+ # Solve the math problem using Gemini model
147
+ def solve_with_gemini_pro(math_problem, classification):
148
+ """Solve the math problem using Gemini model"""
149
+ try:
150
+ model = genai.GenerativeModel(
151
+ model_name=SOLUTION_MODEL,
152
+ generation_config={
153
+ "temperature": 0.2,
154
+ "top_p": 0.9,
155
+ "max_output_tokens": 1000,
156
+ }
157
+ )
158
+
159
+ # Ensure classification has the required fields with fallbacks
160
+ if not isinstance(classification, dict):
161
+ classification = {
162
+ "category": "Unknown",
163
+ "subtopic": "Unknown",
164
+ "difficulty": "Unknown",
165
+ "key_concepts": ["Unknown"]
166
+ }
167
+
168
+ for field in ["category", "subtopic", "difficulty"]:
169
+ if field not in classification or not classification[field]:
170
+ classification[field] = "Unknown"
171
+
172
+ if "key_concepts" not in classification or not classification["key_concepts"]:
173
+ classification["key_concepts"] = ["Unknown"]
174
+
175
+ # Format key concepts as a string
176
+ if isinstance(classification["key_concepts"], list):
177
+ key_concepts = ", ".join(classification["key_concepts"])
178
+ else:
179
+ key_concepts = str(classification["key_concepts"])
180
+
181
+ prompt = f"""
182
+ Task: Solve the following math problem with clear step-by-step explanations.
183
+
184
+ PROBLEM: {math_problem}
185
+
186
+ CLASSIFICATION:
187
+ - Category: {classification["category"]}
188
+ - Subtopic: {classification["subtopic"]}
189
+ - Difficulty: {classification["difficulty"]}
190
+ - Key Concepts: {key_concepts}
191
+
192
+ Provide a complete solution following these guidelines:
193
+ 1. Start with an overview of the approach
194
+ 2. Break down the problem into clear, logical steps
195
+ 3. Explain each step thoroughly, mentioning the mathematical principles applied
196
+ 4. Show all work and calculations
197
+ 5. Verify the answer if possible
198
+ 6. Summarize the key takeaway from this problem
199
+
200
+ Format the solution to be readable on a mobile device, with appropriate spacing between steps.
201
+ """
202
+
203
+ response = model.generate_content(prompt)
204
+ return response.text
205
+ except Exception as e:
206
+ print(f"Solution Error: {e}")
207
+ print(traceback.format_exc())
208
+ return f"Error generating solution: {str(e)}"
209
+
210
+ # Explain the solution in more detail
211
+ def explain_solution(math_problem, solution):
212
+ """Provide a more detailed explanation of the solution"""
213
+ try:
214
+ print(f"Generating detailed explanation...")
215
+
216
+ model = genai.GenerativeModel(
217
+ model_name=EXPLANATION_MODEL,
218
+ generation_config={
219
+ "temperature": 0.3,
220
+ "top_p": 0.95,
221
+ "max_output_tokens": 1500,
222
+ }
223
+ )
224
+
225
+ prompt = f"""
226
+ Task: Provide a more detailed explanation of the solution to this math problem.
227
+
228
+ PROBLEM: {math_problem}
229
+ SOLUTION: {solution}
230
+
231
+ Provide a more comprehensive explanation that:
232
+ 1. Breaks down complex steps into simpler components
233
+ 2. Explains the underlying mathematical principles in depth
234
+ 3. Connects this problem to fundamental concepts
235
+ 4. Offers visual or intuitive ways to understand the concepts
236
+ 5. Highlights common mistakes students make with this type of problem
237
+ 6. Suggests alternative solution approaches if applicable
238
+
239
+ Make the explanation accessible to a student who is struggling with this topic.
240
+ """
241
+
242
+ response = model.generate_content(prompt)
243
+ return response.text
244
+ except Exception as e:
245
+ print(f"Explanation Error: {e}")
246
+ print(traceback.format_exc())
247
+ return f"Error generating explanation: {str(e)}"
248
+
249
+ # Generate similar practice problems
250
+ def generate_similar_problems(math_problem, classification):
251
+ """Generate similar practice math problems"""
252
+ try:
253
+ print(f"Generating similar problems...")
254
+
255
+ model = genai.GenerativeModel(
256
+ model_name=SIMILAR_MODEL,
257
+ generation_config={
258
+ "temperature": 0.7,
259
+ "top_p": 0.95,
260
+ "max_output_tokens": 1000,
261
+ }
262
+ )
263
+
264
+ # Prepare classification string
265
+ classification_str = json.dumps(classification, indent=2)
266
+
267
+ prompt = f"""
268
+ Task: Generate similar practice math problems based on the following problem.
269
+
270
+ ORIGINAL PROBLEM: {math_problem}
271
+ CLASSIFICATION: {classification_str}
272
+
273
+ Generate 3 similar practice problems that:
274
+ 1. Cover the same mathematical concepts and principles
275
+ 2. Vary in difficulty (one easier, one similar, one harder)
276
+ 3. Use different numerical values or variables
277
+ 4. Test the same underlying skills
278
+
279
+ For each problem:
280
+ - Provide the complete problem statement
281
+ - Include a brief hint for solving it
282
+ - Provide the correct answer (but not the full solution)
283
+
284
+ Format as three separate problems with clear numbering.
285
+ """
286
+
287
+ response = model.generate_content(prompt)
288
+ return response.text
289
+ except Exception as e:
290
+ print(f"Similar Problems Error: {e}")
291
+ print(traceback.format_exc())
292
+ return f"Error generating similar problems: {str(e)}"
293
+
294
+ # Main function for processing images
295
+ def process_image(image, progress=gr.Progress()):
296
+ """Main processing pipeline for the NerdAI app"""
297
+ try:
298
+ if image is None:
299
+ return None, "No image uploaded", "No image uploaded", "No image uploaded", "No image uploaded"
300
+
301
+ progress(0, desc="Starting processing...")
302
+
303
+ # Step 1: Extract text with Gemini model
304
+ progress(0.4, desc="Extracting text with Gemini Pro Vision...")
305
+ extracted_text = extract_text_with_gemini(image)
306
+
307
+ if not extracted_text or extracted_text.strip() == "":
308
+ return image, "No text was extracted from the image. Please try a clearer image.", "No text extracted", "No text was extracted from the image.", ""
309
+
310
+ # Step 2: Classify with Gemini model
311
+ progress(0.6, desc=f"Classifying problem with {CLASSIFICATION_MODEL}...")
312
+ classification = classify_with_gemini_flash(extracted_text)
313
+ classification_json = json.dumps(classification, indent=2)
314
+
315
+ # Step 3: Solve with Gemini model
316
+ progress(0.8, desc=f"Solving problem with {SOLUTION_MODEL}...")
317
+ solution = solve_with_gemini_pro(extracted_text, classification)
318
+
319
+ # Complete
320
+ progress(1.0, desc="Processing complete")
321
+
322
+ return image, extracted_text, classification_json, solution, extracted_text
323
+
324
+ except Exception as e:
325
+ print(f"Process Image Error: {e}")
326
+ print(traceback.format_exc())
327
+ return None, f"Error processing image: {str(e)}", "Error", "Error", ""
328
+
329
+ # Create the Gradio interface
330
+ with gr.Blocks(title="NerdAI Math Problem Solver") as demo:
331
+ gr.Markdown("# NerdAI Math Problem Solver")
332
+ gr.Markdown("Upload an image of a math problem to get a step-by-step solution")
333
+
334
+ # Store state variables
335
+ extracted_text_state = gr.State("")
336
+
337
+ with gr.Row():
338
+ with gr.Column(scale=1):
339
+ # Input section
340
+ input_image = gr.Image(label="Upload Math Problem Image", type="pil")
341
+ process_btn = gr.Button("Process Image", variant="primary")
342
+
343
+ with gr.Column(scale=1):
344
+ # Processed image output
345
+ processed_image = gr.Image(label="Processed Image")
346
+
347
+ with gr.Row():
348
+ # Text extraction output
349
+ extracted_text = gr.Textbox(label="Extracted Text", lines=3)
350
+
351
+ with gr.Row():
352
+ # Classification output
353
+ classification = gr.Textbox(label=f"Problem Classification", lines=6)
354
+
355
+ with gr.Row():
356
+ # Solution output
357
+ solution = gr.Markdown(label="Solution")
358
+
359
+ with gr.Row():
360
+ explain_btn = gr.Button("Explain It", variant="secondary")
361
+ similar_btn = gr.Button("Similar Questions", variant="secondary")
362
+
363
+ with gr.Row():
364
+ # Additional outputs
365
+ with gr.Tabs():
366
+ with gr.TabItem("Detailed Explanation"):
367
+ explanation = gr.Markdown()
368
+ with gr.TabItem("Similar Practice Problems"):
369
+ similar_problems = gr.Markdown()
370
+
371
+ # Event handlers for the buttons
372
+ def explain_button_handler(math_problem, solution_text):
373
+ """Handler for Explain It button"""
374
+ print(f"Explain button clicked")
375
+ if not math_problem or math_problem == "No image uploaded":
376
+ return "Please process an image first"
377
+ return explain_solution(math_problem, solution_text)
378
+
379
+ def similar_button_handler(math_problem, classification_json):
380
+ """Handler for Similar Questions button"""
381
+ print(f"Similar button clicked")
382
+ if not math_problem or math_problem == "No image uploaded":
383
+ return "Please process an image first"
384
+ try:
385
+ # Parse classification JSON
386
+ try:
387
+ classification = json.loads(classification_json)
388
+ except:
389
+ classification = {
390
+ "category": "Unknown",
391
+ "subtopic": "Unknown",
392
+ "difficulty": "Unknown",
393
+ "key_concepts": ["Unknown"]
394
+ }
395
+
396
+ # Validate classification
397
+ if not isinstance(classification, dict):
398
+ classification = {
399
+ "category": "Unknown",
400
+ "subtopic": "Unknown",
401
+ "difficulty": "Unknown",
402
+ "key_concepts": ["Unknown"]
403
+ }
404
+
405
+ # Ensure fields exist
406
+ for field in ["category", "subtopic", "difficulty"]:
407
+ if field not in classification or not classification[field]:
408
+ classification[field] = "Unknown"
409
+
410
+ if "key_concepts" not in classification or not classification["key_concepts"]:
411
+ classification["key_concepts"] = ["Unknown"]
412
+
413
+ return generate_similar_problems(math_problem, classification)
414
+ except Exception as e:
415
+ print(f"Error in similar_button_handler: {e}")
416
+ print(traceback.format_exc())
417
+ return f"Error generating similar problems: {str(e)}"
418
+
419
+ # Set up event handlers
420
+ process_btn.click(
421
+ fn=process_image,
422
+ inputs=[input_image],
423
+ outputs=[processed_image, extracted_text, classification, solution, extracted_text_state]
424
+ )
425
+
426
+ explain_btn.click(
427
+ fn=explain_button_handler,
428
+ inputs=[extracted_text_state, solution],
429
+ outputs=explanation
430
+ )
431
+
432
+ similar_btn.click(
433
+ fn=similar_button_handler,
434
+ inputs=[extracted_text_state, classification],
435
+ outputs=similar_problems
436
+ )
437
+
438
+ # Launch the app
439
+ if __name__ == "__main__":
440
+ demo.launch()