import gradio as gr
import cv2
import numpy as np
import os
import json
from PIL import Image
import io
import base64
from openai import OpenAI
from ultralytics import YOLO

# Define the Latex2Layout model path
model_path = "latex2layout_object_detection_yolov8.pt"

# Verify model file existence
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found at {model_path}")

# Load the Latex2Layout model with error handling
try:
    model = YOLO(model_path)
except Exception as e:
    raise RuntimeError(f"Failed to load Latex2Layout model: {e}")

# Qwen API configuration
QWEN_BASE_URL = "https://dashscope.aliyuncs.com/compatible-mode/v1"
QWEN_MODELS = {
    "Qwen2.5-VL-3B-Instruct": "qwen2.5-vl-3b-instruct",
    "Qwen2.5-VL-7B-Instruct": "qwen2.5-vl-7b-instruct",
    "Qwen2.5-VL-14B-Instruct": "qwen2.5-vl-14b-instruct",
}

# Default system prompt template
default_system_prompt = """You are an assistant specialized in document layout analysis.
The following layout elements were detected in the image (confidence >= 0.5):
{layout_info}
Use this information and the image to answer layout-related questions."""

def encode_image(image_array):
    """
    Convert a numpy array image to a base64-encoded string.

    Args:
        image_array: Numpy array representing the image.

    Returns:
        str: Base64-encoded string of the image.
    """
    try:
        pil_image = Image.fromarray(image_array)
        img_byte_arr = io.BytesIO()
        pil_image.save(img_byte_arr, format='PNG')
        return base64.b64encode(img_byte_arr.getvalue()).decode("utf-8")
    except Exception as e:
        raise ValueError(f"Failed to encode image: {e}")

def detect_layout(image, confidence_threshold=0.5):
    """
    Detect layout elements in the uploaded image using the Latex2Layout model.

    Args:
        image: Uploaded image as a numpy array.
        confidence_threshold: Minimum confidence score to retain detections (default: 0.5).

    Returns:
        tuple: (annotated_image, layout_info_str)
            - annotated_image: Image with bounding boxes drawn (confidence >= 0.5).
            - layout_info_str: JSON string of layout detections (confidence >= 0.5).
    """
    if image is None or not isinstance(image, np.ndarray):
        return None, "Error: No image uploaded or invalid image format."

    try:
        # Perform detection
        results = model(image)
        result = results[0]
        annotated_image = image.copy()
        layout_info = []

        # Process detections
        for box in result.boxes:
            conf = float(box.conf[0])
            if conf < confidence_threshold:
                continue

            x1, y1, x2, y2 = map(int, box.xyxy[0].cpu().numpy())
            cls_id = int(box.cls[0])
            cls_name = result.names[cls_id]

            color = tuple(np.random.randint(0, 255, 3).tolist())
            cv2.rectangle(annotated_image, (x1, y1), (x2, y2), color, 2)
            label = f"{cls_name} {conf:.2f}"
            (label_width, label_height), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            cv2.rectangle(annotated_image, (x1, y1 - label_height - 5), (x1 + label_width, y1), color, -1)
            cv2.putText(annotated_image, label, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 1)

            layout_info.append({
                "bbox": [x1, y1, x2, y2],
                "class": cls_name,
                "confidence": conf
            })

        layout_info_str = json.dumps(layout_info, indent=2) if layout_info else "No layout elements detected with confidence >= 0.5."
        return annotated_image, layout_info_str

    except Exception as e:
        return None, f"Error during layout detection: {str(e)}"

def detect_example_image():
    """
    Load and detect layout elements in the example image (./image1.png).

    Returns:
        tuple: (example_image, annotated_image, layout_info_str)
            - example_image: Original example image.
            - annotated_image: Annotated example image.
            - layout_info_str: JSON string of layout detections.
    """
    example_image_path = "./image1.png"
    if not os.path.exists(example_image_path):
        return None, None, "Error: Example image not found."

    try:
        # Load image in BGR and convert to RGB
        bgr_image = cv2.imread(example_image_path)
        if bgr_image is None:
            return None, None, "Error: Failed to load example image."
        rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)

        # Run detection
        annotated_image, layout_info_str = detect_layout(rgb_image)
        return rgb_image, annotated_image, layout_info_str
    except Exception as e:
        return None, None, f"Error processing example image: {str(e)}"

def qa_about_layout(image, question, layout_info, api_key, model_name, system_prompt_template):
    """
    Answer layout-related questions using the Qwen API with an editable system prompt.

    Args:
        image: Uploaded image as a numpy array.
        question: User's question about the layout.
        layout_info: JSON string of layout detection results.
        api_key: User's Qwen API key.
        model_name: Selected Qwen model name.
        system_prompt_template: Editable system prompt template.

    Returns:
        str: Qwen's response to the question.
    """
    if image is None or not isinstance(image, np.ndarray):
        return "Error: Please upload a valid image."
    if not question:
        return "Error: Please enter a question."
    if not api_key:
        return "Error: Please provide a Qwen API key."
    if not layout_info:
        return "Error: No layout information available. Detect layout first."

    try:
        # Encode image to base64
        base64_image = encode_image(image)

        # Map model name to ID
        model_id = QWEN_MODELS.get(model_name)
        if not model_id:
            return "Error: Invalid Qwen model selected."

        # Replace placeholder in system prompt with layout info
        system_prompt = system_prompt_template.replace("{layout_info}", layout_info)

        # Initialize OpenAI client for Qwen API
        client = OpenAI(api_key=api_key, base_url=QWEN_BASE_URL)

        # Prepare API request messages
        messages = [
            {"role": "system", "content": [{"type": "text", "text": system_prompt}]},
            {
                "role": "user",
                "content": [
                    {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}},
                    {"type": "text", "text": question},
                ],
            },
        ]

        # Call Qwen API
        completion = client.chat.completions.create(model=model_id, messages=messages)
        return completion.choices[0].message.content

    except Exception as e:
        return f"Error during QA: {str(e)}"

# Build Gradio interface
with gr.Blocks(title="Latex2Layout QA System") as demo:
    gr.Markdown("# Latex2Layout QA System")
    gr.Markdown("Upload an image or use the example to detect layout elements and ask questions using Qwen models.")

    with gr.Row():
        with gr.Column(scale=1):
            input_image = gr.Image(label="Upload Image", type="numpy")
            detect_btn = gr.Button("Detect Layout")
            example_btn = gr.Button("Detect Example Image")
            gr.Markdown("**Tip**: Use clear images for best results.")

        with gr.Column(scale=1):
            output_image = gr.Image(label="Detected Layout")
            layout_info = gr.Textbox(label="Layout Information", lines=10, interactive=False)

    with gr.Row():
        with gr.Column(scale=1):
            api_key_input = gr.Textbox(
                label="Qwen API Key",
                placeholder="Enter your Qwen API key",
                type="password"
            )
            model_select = gr.Dropdown(
                label="Select Qwen Model",
                choices=list(QWEN_MODELS.keys()),
                value="Qwen2.5-VL-3B-Instruct"
            )
            gr.Markdown("**System Prompt Template**: Edit the prompt sent to Qwen. Include `{layout_info}` to insert detection results.")
            system_prompt_input = gr.Textbox(
                label="System Prompt Template",
                value=default_system_prompt,
                lines=5,
                placeholder="Edit the system prompt here. Keep {layout_info} to include detection results."
            )
            question_input = gr.Textbox(label="Ask About the Layout", placeholder="e.g., 'Where is the heading?'")
            qa_btn = gr.Button("Ask Question")

        with gr.Column(scale=1):
            answer_output = gr.Textbox(label="Answer", lines=5, interactive=False)

    # Event handlers
    detect_btn.click(
        fn=detect_layout,
        inputs=[input_image],
        outputs=[output_image, layout_info]
    )
    example_btn.click(
        fn=detect_example_image,
        inputs=[],
        outputs=[input_image, output_image, layout_info]
    )
    qa_btn.click(
        fn=qa_about_layout,
        inputs=[input_image, question_input, layout_info, api_key_input, model_select, system_prompt_input],
        outputs=[answer_output]
    )

# Launch the application
if __name__ == "__main__":
    demo.launch()