Spaces:

lord-reso
/

scene-understanding

Sleeping

scene-understanding / app.py

Working code

7dcb9b8 4 months ago

1.49 kB

	from fastapi import FastAPI, File, UploadFile, Form
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse
	from transformers import AutoModelForCausalLM, AutoTokenizer
	from PIL import Image
	import torch

	# Initialize the FastAPI app
	app = FastAPI()

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Load the model and tokenizer
	model_id = "vikhyatk/moondream2"
	revision = "2024-08-26"
	model = AutoModelForCausalLM.from_pretrained(
	model_id, trust_remote_code=True, revision=revision
	)
	tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

	@app.get("/")
	def read_root():
	data = {"Scene": "Understanding", "Status": "Success"}
	return JSONResponse(content=data)

	@app.post("/generate-text/")
	async def generate_text(description: str = Form(...), file: UploadFile = File(...)):
	print("generate_text endpoint called with description:", description)

	# Convert uploaded file to PIL image
	image = Image.open(file.file).convert("RGB")
	print("Image uploaded and converted successfully")

	# Encode the image using the model
	enc_image = model.encode_image(image)

	# Answer the question using the model and tokenizer
	generated_text = model.answer_question(enc_image, description, tokenizer)
	print("Text generated successfully")

	return {"generated_text": generated_text}