Spaces:
Sleeping
Sleeping
import os | |
import json | |
import gradio as gr | |
import gradio.themes as gr_themes | |
import google.generativeai as genai | |
from PIL import Image | |
import numpy as np | |
from huggingface_hub import HfFolder | |
from dotenv import load_dotenv | |
import traceback | |
import time | |
load_dotenv() | |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") or HfFolder.get_token("GEMINI_API_KEY") | |
if not GEMINI_API_KEY: | |
try: | |
import secrets | |
GEMINI_API_KEY = secrets.GEMINI_API_KEY | |
except (ImportError, AttributeError): | |
raise ValueError("Gemini API key not found. Please set the GEMINI_API_KEY environment variable or add it as a Secret if running on Hugging Face Spaces.") | |
if not GEMINI_API_KEY: | |
raise ValueError("Gemini API key not found. Please set the GEMINI_API_KEY environment variable.") | |
genai.configure(api_key=GEMINI_API_KEY) | |
CLASSIFICATION_MODEL = "gemini-1.5-flash" | |
SOLUTION_MODEL = "gemini-1.5-pro-latest" | |
EXPLANATION_MODEL = "gemini-1.5-pro-latest" | |
SIMILAR_MODEL = "gemini-1.5-pro-latest" | |
MODEL_IMAGE = "gemini-1.5-pro-latest" | |
print(f"Using models: Classification: {CLASSIFICATION_MODEL}, Solution: {SOLUTION_MODEL}, Explanation: {EXPLANATION_MODEL}, Similar: {SIMILAR_MODEL}, Image Analysis: {MODEL_IMAGE}") | |
def extract_text_with_gemini(image): | |
"""Extract text from image using Gemini Pro Vision ONLY""" | |
try: | |
if isinstance(image, np.ndarray): | |
image = Image.fromarray(image) | |
elif not isinstance(image, Image.Image): | |
raise TypeError("Input must be a PIL Image or NumPy array") | |
print("Attempting text extraction with Gemini Pro Vision...") | |
model = genai.GenerativeModel(MODEL_IMAGE) | |
prompt = """ | |
Extract ALL text, numbers, and mathematical equations from this image precisely. | |
Include ALL symbols, numbers, letters, and mathematical notation exactly as they appear. | |
Format any equations properly and maintain their layout. | |
Don't explain the content, just extract the text verbatim. | |
""" | |
response = model.generate_content([prompt, image], request_options={'timeout': 120}) | |
extracted_text = response.text.strip() | |
if not extracted_text: | |
print("Gemini returned empty text.") | |
return "Error: Gemini could not extract any text from the image." | |
else: | |
print(f"Gemini extracted text (first 100 chars): {extracted_text[:100]}...") | |
return extracted_text | |
except Exception as e: | |
print(f"Gemini Extraction Error: {e}") | |
print(traceback.format_exc()) | |
return f"Error during text extraction with Gemini: {str(e)}" | |
def classify_with_gemini_flash(math_problem): | |
"""Classify the math problem using Gemini model (Original settings)""" | |
if not math_problem or math_problem.startswith("Error:"): | |
print("Skipping classification due to invalid input text.") | |
return { "category": "Error", "subtopic": "Invalid Input", "difficulty": "N/A", "key_concepts": ["Text extraction failed or missing"] } | |
try: | |
model = genai.GenerativeModel( | |
model_name=CLASSIFICATION_MODEL, | |
generation_config={ | |
"temperature": 0.1, | |
"top_p": 0.95, | |
"max_output_tokens": 150, # Original value | |
"response_mime_type": "application/json", | |
} | |
) | |
prompt = f""" | |
Task: Classify the following math problem. | |
PROBLEM: {math_problem} | |
Classify this math problem according to: | |
1. Primary category (e.g., Algebra, Calculus, Geometry, Trigonometry, Statistics, Number Theory) | |
2. Specific subtopic (e.g., Linear Equations, Derivatives, Integrals, Probability) | |
3. Difficulty level (Basic, Intermediate, Advanced) | |
4. Key concepts involved | |
Format the response as a JSON object with the fields: "category", "subtopic", "difficulty", "key_concepts". | |
""" | |
response = model.generate_content(prompt, request_options={'timeout': 60}) | |
try: | |
cleaned_text = response.text.strip().replace("```json", "").replace("```", "").strip() | |
classification = json.loads(cleaned_text) | |
keys_needed = ["category", "subtopic", "difficulty", "key_concepts"] | |
for key in keys_needed: | |
classification.setdefault(key, "Unknown") | |
if not isinstance(classification.get("key_concepts"), list): | |
classification["key_concepts"] = [str(classification.get("key_concepts", "Unknown"))] | |
return classification | |
except (json.JSONDecodeError, AttributeError): | |
print(f"JSON Decode/Attribute Error: Unable to parse classification response: {response.text}") | |
return { "category": "Unknown", "subtopic": "Parse Error", "difficulty": "Unknown", "key_concepts": ["Failed to parse classification"] } | |
except Exception as e: | |
print(f"Classification Error: {e}") | |
print(traceback.format_exc()) | |
return { "category": "Error", "subtopic": "API Error", "difficulty": "Error", "key_concepts": [f"Classification failed: {str(e)}"] } | |
def solve_with_gemini_pro(math_problem, classification): | |
"""Solve the math problem using Gemini model (Original settings)""" | |
if not math_problem or math_problem.startswith("Error:"): | |
return "Error: Cannot solve problem due to invalid input text." | |
try: | |
model = genai.GenerativeModel( | |
model_name=SOLUTION_MODEL, | |
generation_config={ | |
"temperature": 0.2, | |
"top_p": 0.9, | |
"max_output_tokens": 1000, # Original value | |
} | |
) | |
if not isinstance(classification, dict): | |
classification = { "category": "Unknown", "subtopic": "Unknown", "difficulty": "Unknown", "key_concepts": ["Unknown"] } | |
for field in ["category", "subtopic", "difficulty"]: | |
if field not in classification or not classification[field]: | |
classification[field] = "Unknown" | |
if "key_concepts" not in classification or not classification["key_concepts"]: | |
classification["key_concepts"] = ["Unknown"] | |
if isinstance(classification["key_concepts"], list): | |
key_concepts_str = ", ".join(classification["key_concepts"]) | |
else: | |
key_concepts_str = str(classification["key_concepts"]) | |
prompt = f""" | |
Task: Solve the following math problem with clear step-by-step explanations. | |
PROBLEM: {math_problem} | |
CLASSIFICATION: | |
- Category: {classification["category"]} | |
- Subtopic: {classification["subtopic"]} | |
- Difficulty: {classification["difficulty"]} | |
- Key Concepts: {key_concepts_str} | |
Provide a complete solution following these guidelines: | |
1. Start with an overview of the approach | |
2. Break down the problem into clear, logical steps | |
3. Explain each step thoroughly, mentioning the mathematical principles applied | |
4. Show all work and calculations | |
5. Verify the answer if possible | |
6. Summarize the key takeaway from this problem | |
Format the solution to be readable on a mobile device, with appropriate spacing between steps. | |
""" | |
response = model.generate_content(prompt, request_options={'timeout': 180}) | |
if not response.text: | |
return "Error: Solution generation returned an empty response." | |
return response.text | |
except Exception as e: | |
print(f"Solution Error: {e}") | |
print(traceback.format_exc()) | |
return f"Error generating solution: {str(e)}" | |
def explain_solution(math_problem, solution): | |
"""Provide a more detailed explanation of the solution (Original settings)""" | |
if not math_problem or math_problem.startswith("Error:"): return "Error: Cannot explain problem due to invalid input text." | |
if not solution or solution.startswith("Error:") or "will appear here" in solution: return "Error: Cannot explain solution as it is missing or invalid." | |
try: | |
print(f"Generating detailed explanation...") | |
model = genai.GenerativeModel( | |
model_name=EXPLANATION_MODEL, | |
generation_config={ | |
"temperature": 0.3, | |
"top_p": 0.95, | |
"max_output_tokens": 1500, # Original value | |
} | |
) | |
prompt = f""" | |
Task: Provide a more detailed explanation of the solution to this math problem. | |
PROBLEM: {math_problem} | |
SOLUTION: {solution} | |
Provide a more comprehensive explanation that: | |
1. Breaks down complex steps into simpler components | |
2. Explains the underlying mathematical principles in depth | |
3. Connects this problem to fundamental concepts | |
4. Offers visual or intuitive ways to understand the concepts | |
5. Highlights common mistakes students make with this type of problem | |
6. Suggests alternative solution approaches if applicable | |
Make the explanation accessible to a student who is struggling with this topic. | |
""" | |
response = model.generate_content(prompt, request_options={'timeout': 180}) | |
if not response.text: return "Error: Explanation generation returned an empty response." | |
return response.text | |
except Exception as e: | |
print(f"Explanation Error: {e}") | |
print(traceback.format_exc()) | |
return f"Error generating explanation: {str(e)}" | |
def generate_similar_problems(math_problem, classification): | |
"""Generate similar practice math problems (Original settings)""" | |
if not math_problem or math_problem.startswith("Error:"): return "Error: Cannot generate problems due to invalid input text." | |
if not isinstance(classification, dict) or classification.get("category") == "Error": return "Error: Cannot generate problems due to invalid classification data." | |
try: | |
print(f"Generating similar problems...") | |
model = genai.GenerativeModel( | |
model_name=SIMILAR_MODEL, | |
generation_config={ | |
"temperature": 0.7, | |
"top_p": 0.95, | |
"max_output_tokens": 1000, # Original value | |
} | |
) | |
classification_str = json.dumps(classification, indent=2) | |
prompt = f""" | |
Task: Generate similar practice math problems based on the following problem. | |
ORIGINAL PROBLEM: {math_problem} | |
CLASSIFICATION: {classification_str} | |
Generate 3 similar practice problems that: | |
1. Cover the same mathematical concepts and principles | |
2. Vary in difficulty (one easier, one similar, one harder) | |
3. Use different numerical values or variables | |
4. Test the same underlying skills | |
For each problem: | |
- Provide the complete problem statement | |
- Include a brief hint for solving it | |
- Provide the correct answer (but not the full solution) | |
Format as three separate problems with clear numbering. | |
""" | |
response = model.generate_content(prompt, request_options={'timeout': 180}) | |
if not response.text: return "Error: Similar problems generation returned an empty response." | |
return response.text | |
except Exception as e: | |
print(f"Similar Problems Error: {e}") | |
print(traceback.format_exc()) | |
return f"Error generating similar problems: {str(e)}" | |
def process_image(image): | |
"""Main processing pipeline for the NerdAI app (No Tesseract, No Progress)""" | |
start_time = time.time() # Keep start time | |
try: | |
if image is None: | |
return None, "Error: No image uploaded.", "{}", "Error: No image uploaded.", "", "{}" | |
print("๐ Starting processing...") | |
# time.sleep(0.5) # Removed sleep associated with progress update | |
print("๐ Extracting text with Gemini...") | |
extracted_text = extract_text_with_gemini(image) | |
if extracted_text.startswith("Error:"): | |
print(f"Text extraction failed: {extracted_text}") | |
img_display = None | |
if image is not None: | |
try: | |
img_display = image if isinstance(image, Image.Image) else Image.fromarray(image) | |
except Exception: pass | |
return img_display, extracted_text, "{}", extracted_text, "", "{}" | |
print(f"๐ Classifying problem ({CLASSIFICATION_MODEL})...") | |
classification = classify_with_gemini_flash(extracted_text) | |
classification_json = json.dumps(classification, indent=2) | |
print(f"๐ก Solving problem ({SOLUTION_MODEL})...") | |
solution = solve_with_gemini_pro(extracted_text, classification) | |
end_time = time.time() # Keep end time | |
print(f"โ Done in {end_time - start_time:.2f}s!") | |
img_display = image if isinstance(image, Image.Image) else Image.fromarray(image) | |
return img_display, extracted_text, classification_json, solution, extracted_text, classification_json | |
except Exception as e: | |
print(f"Process Image Error: {e}") | |
print(traceback.format_exc()) | |
error_message = f"An unexpected error occurred: {str(e)}" | |
img_display = None | |
if image is not None: | |
try: | |
img_display = image if isinstance(image, Image.Image) else Image.fromarray(image) | |
except Exception: pass | |
return img_display, error_message, "{}", error_message, "", "{}" | |
css = """ | |
body { font-family: 'Inter', sans-serif; } | |
.gradio-container { background-color: #f8f9fa; } | |
#title_markdown h1 { text-align: center; color: #4A90E2; font-weight: 600; margin-bottom: 0px; } | |
#subtitle_markdown p { text-align: center; color: #555; margin-top: 5px; margin-bottom: 20px; } | |
#input_col, #output_col { padding: 10px; } | |
#input_image, #processed_image { border-radius: 8px; border: 1px solid #dee2e6; overflow: hidden; height: 350px; object-fit: contain; } | |
#input_image div[data-testid="image"], #processed_image div[data-testid="image"] { height: 100%; } | |
#input_image img, #processed_image img { height: 100%; object-fit: contain; } | |
#process_button { margin-top: 15px; } | |
#results_group { border: 1px solid #e9ecef; border-radius: 8px; padding: 15px; background-color: #ffffff; box-shadow: 0 2px 4px rgba(0,0,0,0.05); margin-top: 20px; } | |
#extracted_text_output textarea, #classification_output textarea { background-color: #f1f3f4 !important; border-radius: 4px; margin-top: 10px; margin-bottom: 10px; } | |
#solution_output { margin-top: 15px; } | |
#action_buttons { margin-top: 15px; margin-bottom: 15px; } | |
.gradio-accordion > button { background-color: #eef2f6; border-radius: 5px 5px 0 0; font-weight: 500; } | |
.gradio-accordion .gradio-markdown { border: 1px solid #dee2e6; border-top: none; padding: 15px; border-radius: 0 0 5px 5px; background-color: #fff; } | |
footer { visibility: hidden } | |
""" | |
theme = gr_themes.Default(primary_hue=gr.themes.colors.blue, secondary_hue=gr.themes.colors.sky).set( | |
button_primary_background_fill="#4A90E2", button_primary_background_fill_hover="#357ABD", | |
button_secondary_background_fill="#E1E8ED", button_secondary_background_fill_hover="#CED9E0", | |
block_radius="8px", | |
) | |
with gr.Blocks(theme=theme, css=css, title="NerdAI Math Solver") as demo: | |
extracted_text_state = gr.State("") | |
classification_state = gr.State("{}") | |
gr.Markdown("# ๐ง NerdAI Math Problem Solver", elem_id="title_markdown") | |
gr.Markdown("Upload a clear image of a math problem. NerdAI will extract the text, classify it, solve it step-by-step, and offer further help!", elem_id="subtitle_markdown") | |
with gr.Row(): | |
with gr.Column(scale=1, elem_id="input_col"): | |
input_image = gr.Image(label="Upload Math Problem", type="pil", elem_id="input_image", height=350) | |
process_btn = gr.Button("โจ Process Image and Solve", variant="primary", elem_id="process_button") | |
with gr.Column(scale=1, elem_id="output_col"): | |
processed_image = gr.Image(label="Processed Image", interactive=False, elem_id="processed_image", height=350) | |
with gr.Group(elem_id="results_group"): | |
gr.Markdown("### Results") | |
extracted_text_output = gr.Textbox(label="๐ Extracted Text", lines=3, interactive=False, placeholder="Text from the image will appear here...", elem_id="extracted_text_output") | |
classification_output = gr.Textbox(label=f"๐ Problem Classification ({CLASSIFICATION_MODEL})", lines=5, interactive=False, placeholder="Problem type analysis will appear here...", elem_id="classification_output") | |
solution_output = gr.Markdown(label="โ Solution Steps", value="*Solution steps will appear here after processing...*", elem_id="solution_output") | |
with gr.Row(elem_id="action_buttons"): | |
explain_btn = gr.Button("๐ค Explain Further", variant="secondary") | |
similar_btn = gr.Button("๐ Similar Questions", variant="secondary") | |
with gr.Accordion("Detailed Explanation", open=False): | |
explanation_output = gr.Markdown(value="*Click 'Explain Further' above to get a detailed breakdown.*") | |
with gr.Accordion("Similar Practice Problems", open=False): | |
similar_problems_output = gr.Markdown(value="*Click 'Similar Questions' above to generate practice problems.*") | |
process_btn.click( | |
fn=process_image, | |
inputs=[input_image], | |
outputs=[ | |
processed_image, extracted_text_output, classification_output, | |
solution_output, extracted_text_state, classification_state | |
], | |
) | |
def explain_button_handler(current_problem_text, current_solution_md): | |
print("Explain button clicked.") | |
if not current_problem_text or current_problem_text.startswith("Error:") : yield "Please process an image successfully first." ; return | |
if not current_solution_md or current_solution_md.startswith("Error:") or "will appear here" in current_solution_md: yield "A valid solution needs to be generated first." ; return | |
yield "*Generating detailed explanation... please wait.*" # Provide feedback | |
explanation_result = explain_solution(current_problem_text, current_solution_md) | |
yield explanation_result | |
explain_btn.click( | |
fn=explain_button_handler, | |
inputs=[extracted_text_state, solution_output], | |
outputs=explanation_output | |
) | |
def similar_button_handler(current_problem_text, current_classification_json): | |
print("Similar button clicked.") | |
if not current_problem_text or current_problem_text.startswith("Error:") : yield "Please process an image successfully first." ; return | |
yield "*Generating similar problems... please wait.*" | |
classification_dict = {} | |
try: | |
if isinstance(current_classification_json, str) and current_classification_json.strip(): | |
classification_dict = json.loads(current_classification_json) | |
elif isinstance(current_classification_json, dict): | |
classification_dict = current_classification_json | |
else: raise ValueError("Classification state is empty or not valid JSON/dict.") | |
if not isinstance(classification_dict, dict): raise ValueError("Parsed classification is not a dictionary.") | |
if classification_dict.get("category") == "Error": raise ValueError("Classification data indicates a previous error.") | |
except (json.JSONDecodeError, ValueError, TypeError) as e: | |
print(f"Error parsing/validating classification state: {e}") | |
yield f"Error: Could not use classification data ({e}). Cannot generate similar problems." | |
return | |
similar_result = generate_similar_problems(current_problem_text, classification_dict) | |
yield similar_result | |
similar_btn.click( | |
fn=similar_button_handler, | |
inputs=[extracted_text_state, classification_state], | |
outputs=similar_problems_output | |
) | |
if __name__ == "__main__": | |
if not os.path.exists("examples"): os.makedirs("examples") | |
for fname in ["algebra_problem.png", "calculus_problem.jpg", "geometry_problem.png"]: | |
fpath = os.path.join("examples", fname) | |
if not os.path.exists(fpath): | |
try: | |
img = Image.new('RGB', (200, 100), color = (73, 109, 137)) | |
from PIL import ImageDraw | |
d = ImageDraw.Draw(img); d.text((10,10), f"Placeholder for\n{fname}", fill=(255,255,0)); img.save(fpath) | |
print(f"Created placeholder example: {fpath}") | |
except Exception as e: print(f"Could not create placeholder image {fpath}: {e}") | |
demo.queue().launch(debug=True) |