File size: 2,078 Bytes
08faac7
 
e13ec26
7cf86f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e13ec26
 
 
7cf86f8
e13ec26
 
 
 
 
 
 
7cf86f8
e13ec26
 
 
 
 
 
7cf86f8
e13ec26
7cf86f8
 
e13ec26
08faac7
 
7cf86f8
e13ec26
08faac7
7cf86f8
e13ec26
 
7cf86f8
08faac7
e13ec26
7cf86f8
e13ec26
7cf86f8
e13ec26
 
 
08faac7
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import time
from typing import Dict, Any

import numpy as np
from PIL import Image
from ultralytics import YOLO

from utils.predict_bounding_boxes import predict_bounding_boxes
from utils.manga_ocr_utils import get_text_from_image
from utils.translate_manga import translate_manga
from utils.process_contour import process_contour
from utils.write_text_on_image import add_text

MODEL_PATH = "./model_creation/runs/detect/train5/weights/best.pt"
object_detection_model = YOLO(MODEL_PATH)


def extract_text_from_regions(
    image: np.ndarray, target_lang: str, results: list
) -> Dict[str, Any]:

    for result in results:
        x1, y1, x2, y2, _, _ = result
        detected_image = image[int(y1) : int(y2), int(x1) : int(x2)]
        if detected_image.shape[-1] == 4:
            detected_image = detected_image[:, :, :3]
        im = Image.fromarray(np.uint8(detected_image * 255))
        text = get_text_from_image(im)

        processed_image, cont = process_contour(detected_image)
        translated_text = translate_manga(
            text, target_lang=target_lang, source_lang="ja-JP"
        )
        if translated_text is None:
            translated_text = "Translation failed"

        add_text(processed_image, translated_text, cont)


def predict(image: np.ndarray, target_lang: str):
    timestamp = str(int(time.time() * 1000000))  # Generate a unique timestamp
    temp_filename = f"image_{timestamp}.png"

    image = Image.fromarray(image)
    image.save(temp_filename)

    try:
        np_image = np.array(image)

        results = predict_bounding_boxes(object_detection_model, temp_filename)
        extract_text_from_regions(np_image, target_lang, results)

        return np_image

    except Exception as e:
        print(f"Error: {str(e)}")
        return None

    finally:
        # Clean up the temporary file
        if os.path.exists(temp_filename):
            try:
                os.remove(temp_filename)
            except OSError as e:
                print(f"Warning: Could not remove temporary file {temp_filename}: {e}")