from fastapi import FastAPI, File, UploadFile, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch

# Initialize the FastAPI app
app = FastAPI()

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Load the model and tokenizer
model_id = "vikhyatk/moondream2"
revision = "2024-08-26"
model = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

@app.get("/")
def read_root():
    data = {"Scene": "Understanding", "Status": "Success"}
    return JSONResponse(content=data)

@app.post("/generate-text/")
async def generate_text(description: str = Form(...), file: UploadFile = File(...)):
    print("generate_text endpoint called with description:", description)

    # Convert uploaded file to PIL image
    image = Image.open(file.file).convert("RGB")
    print("Image uploaded and converted successfully")

    # Encode the image using the model
    enc_image = model.encode_image(image)

    # Answer the question using the model and tokenizer
    generated_text = model.answer_question(enc_image, description, tokenizer)
    print("Text generated successfully")

    return {"generated_text": generated_text}