File size: 4,331 Bytes
c533a73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a9d9370
c533a73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import StreamingResponse, FileResponse
from fastapi.staticfiles import StaticFiles
import torch
import cv2
import numpy as np
import logging
from io import BytesIO
import tempfile
import os
from insightface.app import FaceAnalysis

app = FastAPI()

# Load model and necessary components
model = None

def load_model():
    global model
    from vtoonify_model import Model
    model = Model(device='cuda' if torch.cuda.is_available() else 'cpu')
    model.load_model('cartoon4')

# Initialize the InsightFace model for face detection
face_detector = FaceAnalysis(allowed_modules=['detection'])
face_detector.prepare(ctx_id=0 if torch.cuda.is_available() else -1, det_size=(640, 640))

# Configure logging
logging.basicConfig(level=logging.INFO)

def detect_and_crop_face(image, padding=0.6):
    # Get original dimensions
    orig_h, orig_w = image.shape[:2]
    
    # Resize the image for detection
    resized_image = cv2.resize(image, (640, 640))
    
    # Detect faces on the resized image
    faces = face_detector.get(resized_image)
    
    # If faces are detected, sort by x-coordinate and select the leftmost face
    if faces:
        faces = sorted(faces, key=lambda face: face.bbox[0])
        face = faces[0]  # Select the leftmost face
        bbox = face.bbox.astype(int)
        
        # Calculate scaling factors
        h_scale = orig_h / 640
        w_scale = orig_w / 640
        
        # Map the bounding box to the original image size
        x1, y1, x2, y2 = bbox
        x1 = int(x1 * w_scale)
        y1 = int(y1 * h_scale)
        x2 = int(x2 * w_scale)
        y2 = int(y2 * h_scale)
        
        # Calculate padding
        width = x2 - x1
        height = y2 - y1
        x1 = max(0, x1 - int(padding * width))
        y1 = max(0, y1 - int(padding * height))
        x2 = min(orig_w, x2 + int(padding * width))
        y2 = min(orig_h, y2 + int(padding * height))
        
        cropped_face = image[y1:y2, x1:x2]
        return cropped_face
    
    return None

@app.post("/upload/")
async def process_image(file: UploadFile = File(...), top: int = Form(...), bottom: int = Form(...), left: int = Form(...), right: int = Form(...)):
    global model
    if model is None:
        load_model()

    # Read the uploaded image file
    contents = await file.read()

    # Convert the uploaded image to numpy array
    nparr = np.frombuffer(contents, np.uint8)
    frame_bgr = cv2.imdecode(nparr, cv2.IMREAD_COLOR)  # Read as BGR format by default
    
    if frame_bgr is None:
        logging.error("Failed to decode the image.")
        return {"error": "Failed to decode the image. Please ensure the file is a valid image format."}
        logging.info(f"Uploaded image shape: {frame_bgr.shape}")

    # Detect and crop face
    cropped_face = detect_and_crop_face(frame_bgr)
    if cropped_face is None:
        return {"error": "No face detected or alignment failed."}

    # Save the cropped face temporarily
    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
        cv2.imwrite(temp_file.name, cropped_face)
        temp_file_path = temp_file.name

    try:
        # Process the cropped face using the file path
        aligned_face, instyle, message = model.detect_and_align_image(temp_file_path, top, bottom, left, right)
        if aligned_face is None or instyle is None:
            logging.error("Failed to process the image: No face detected or alignment failed.")
            return {"error": message}

        processed_image, message = model.image_toonify(aligned_face, instyle, model.exstyle, style_degree=0.5, style_type='cartoon1')
        if processed_image is None:
            logging.error("Failed to toonify the image.")
            return {"error": message}

        # Convert the processed image to RGB before returning
        processed_image_rgb = cv2.cvtColor(processed_image, cv2.COLOR_BGR2RGB)

        # Convert processed image to bytes
        _, encoded_image = cv2.imencode('.jpg', processed_image_rgb)

        # Return the processed image as a streaming response
        return StreamingResponse(BytesIO(encoded_image.tobytes()), media_type="image/jpeg")
    
    finally:
        # Clean up the temporary file
        os.remove(temp_file_path)