lord-reso's picture
Working code
7dcb9b8
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import torch
# Initialize the FastAPI app
app = FastAPI()
# Add CORS middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Load the model and tokenizer
model_id = "vikhyatk/moondream2"
revision = "2024-08-26"
model = AutoModelForCausalLM.from_pretrained(
model_id, trust_remote_code=True, revision=revision
)
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
@app.get("/")
def read_root():
data = {"Scene": "Understanding", "Status": "Success"}
return JSONResponse(content=data)
@app.post("/generate-text/")
async def generate_text(description: str = Form(...), file: UploadFile = File(...)):
print("generate_text endpoint called with description:", description)
# Convert uploaded file to PIL image
image = Image.open(file.file).convert("RGB")
print("Image uploaded and converted successfully")
# Encode the image using the model
enc_image = model.encode_image(image)
# Answer the question using the model and tokenizer
generated_text = model.answer_question(enc_image, description, tokenizer)
print("Text generated successfully")
return {"generated_text": generated_text}