Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, File, UploadFile, Form | |
from fastapi.middleware.cors import CORSMiddleware | |
from fastapi.responses import JSONResponse | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from PIL import Image | |
import torch | |
# Initialize the FastAPI app | |
app = FastAPI() | |
# Add CORS middleware | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
# Load the model and tokenizer | |
model_id = "vikhyatk/moondream2" | |
revision = "2024-08-26" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, trust_remote_code=True, revision=revision | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision) | |
def read_root(): | |
data = {"Scene": "Understanding", "Status": "Success"} | |
return JSONResponse(content=data) | |
async def generate_text(description: str = Form(...), file: UploadFile = File(...)): | |
print("generate_text endpoint called with description:", description) | |
# Convert uploaded file to PIL image | |
image = Image.open(file.file).convert("RGB") | |
print("Image uploaded and converted successfully") | |
# Encode the image using the model | |
enc_image = model.encode_image(image) | |
# Answer the question using the model and tokenizer | |
generated_text = model.answer_question(enc_image, description, tokenizer) | |
print("Text generated successfully") | |
return {"generated_text": generated_text} | |