from fastapi import FastAPI, File, UploadFile, Form from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from transformers import AutoModelForCausalLM, AutoTokenizer from PIL import Image import torch # Initialize the FastAPI app app = FastAPI() # Add CORS middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Load the model and tokenizer model_id = "vikhyatk/moondream2" revision = "2024-08-26" model = AutoModelForCausalLM.from_pretrained( model_id, trust_remote_code=True, revision=revision ) tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) @app.get("/") def read_root(): data = {"Scene": "Understanding", "Status": "Success"} return JSONResponse(content=data) @app.post("/generate-text/") async def generate_text(description: str = Form(...), file: UploadFile = File(...)): print("generate_text endpoint called with description:", description) # Convert uploaded file to PIL image image = Image.open(file.file).convert("RGB") print("Image uploaded and converted successfully") # Encode the image using the model enc_image = model.encode_image(image) # Answer the question using the model and tokenizer generated_text = model.answer_question(enc_image, description, tokenizer) print("Text generated successfully") return {"generated_text": generated_text}