|
import cv2 |
|
import numpy as np |
|
from sklearn import svm |
|
import os |
|
import random |
|
from concurrent.futures import ThreadPoolExecutor |
|
from joblib import dump, load |
|
import pytesseract |
|
from tqdm import tqdm |
|
|
|
pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract' |
|
dir_names = [] |
|
|
|
try: |
|
clf = load('contour.joblib') |
|
except: |
|
def load_data(batch_size=100): |
|
global dir_names |
|
img_shape = (300, 700) |
|
dir_names = ['organized_spectrograms/' + d for d in os.listdir('organized_spectrograms') if not d == ".DS_Store" and not '.py' in d] |
|
X, y = [], [] |
|
|
|
def load_and_resize_image(f): |
|
return cv2.resize(cv2.imread(os.path.join(dir_name, f), 0), img_shape) |
|
|
|
for i, dir_name in tqdm(enumerate(dir_names), desc="Loading Directories", total=len(dir_names)): |
|
with ThreadPoolExecutor(max_workers=20) as executor: |
|
images = list(executor.map(load_and_resize_image, [f for f in os.listdir(dir_name) if f.endswith('.jpg') or f.endswith('.png')])) |
|
|
|
if not images: |
|
print(f'Error: No images found in {dir_name}') |
|
continue |
|
|
|
X.extend([img.flatten() for img in images]) |
|
y.extend([i] * len(images)) |
|
|
|
if len(X) >= batch_size: |
|
yield np.array(X), np.array(y) |
|
X, y = [], [] |
|
|
|
if X and y: |
|
yield np.array(X), np.array(y) |
|
|
|
def train_classifier_in_batches(batch_generator, batch_size=100): |
|
clf = svm.SVC() |
|
|
|
for X_batch, y_batch in tqdm(batch_generator, desc="Training SVM in Batches"): |
|
if X_batch.size > 0 and y_batch.size > 0: |
|
clf.fit(X_batch, y_batch) |
|
|
|
dump(clf, 'contour.joblib') |
|
|
|
def process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect): |
|
img_shape = (300, 700) |
|
|
|
|
|
screenshot = cv2.imread(image_path) |
|
screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY) |
|
|
|
_, thresh = cv2.threshold(screenshot_gray, 127, 255, 0) |
|
contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) |
|
|
|
avg_scores = np.zeros(len(dir_names)) |
|
num_rois, drawn_boxes, details = 0, [], [] |
|
|
|
def process_contour(contour): |
|
nonlocal avg_scores, num_rois, drawn_boxes |
|
|
|
x, y, w, h = cv2.boundingRect(contour) |
|
if min_size <= w * h <= max_size: |
|
roi = screenshot_gray[y:y + h, x:x + w] |
|
|
|
|
|
roi_resized = cv2.resize(roi, img_shape).flatten().reshape(1, -1) |
|
scores = clf.decision_function(roi_resized)[0] |
|
|
|
|
|
top_indices = np.argsort(scores)[-num_classes_to_detect:] |
|
for idx in top_indices: |
|
avg_scores[idx] += scores[idx] |
|
|
|
num_rois += 1 |
|
|
|
max_score_index = np.argmax(scores) |
|
color_str = dir_names[max_score_index] |
|
color_hash = hash(color_str) & 0xffffff |
|
color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff) |
|
cv2.rectangle(screenshot, (x, y), (x + w, y + h), color_bgr, 2) |
|
drawn_boxes.append({'x': x, 'y': y, 'w': w, 'h': h}) |
|
|
|
negative_dir_name = f"{dir_names[max_score_index]}_negative" |
|
if not os.path.exists(negative_dir_name): |
|
os.makedirs(negative_dir_name) |
|
cv2.imwrite(os.path.join(negative_dir_name, str(random.randint(1, 999999999)) + '.png'), roi) |
|
|
|
with ThreadPoolExecutor(max_workers=5) as executor: |
|
executor.map(process_contour, contours) |
|
|
|
if num_rois > 0: |
|
avg_scores /= num_rois |
|
else: |
|
print('Warning: No ROIs were processed') |
|
|
|
return screenshot, avg_scores, details |
|
|
|
def draw_menu(screenshot, detected_classes, max_menu_width=200, item_height=30, font_scale=0.5): |
|
|
|
screenshot_height, screenshot_width = screenshot.shape[:2] |
|
|
|
|
|
num_items = len(detected_classes) |
|
menu_height = num_items * item_height + 20 |
|
|
|
|
|
menu_width = min(max_menu_width, screenshot_width // 4) |
|
|
|
|
|
menu_img = np.zeros((menu_height, menu_width, 3), dtype=np.uint8) |
|
|
|
for i, class_name in enumerate(detected_classes): |
|
|
|
color_hash = hash(class_name) & 0xffffff |
|
color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff) |
|
|
|
|
|
cv2.rectangle(menu_img, (10, i * item_height + 10), (40, i * item_height + item_height), color_bgr, -1) |
|
cv2.putText(menu_img, class_name, (50, i * item_height + item_height // 2 + 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1, cv2.LINE_AA) |
|
|
|
|
|
total_height = max(screenshot.shape[0], menu_height) |
|
total_width = screenshot.shape[1] + menu_width |
|
|
|
|
|
new_image = np.zeros((total_height, total_width, 3), dtype=np.uint8) |
|
|
|
|
|
new_image[:screenshot.shape[0], :screenshot.shape[1]] = screenshot |
|
|
|
|
|
new_image[:menu_height, -menu_width:] = menu_img |
|
|
|
return new_image |
|
|
|
|
|
def save_image(image, file_path): |
|
"""Saves the given image to the specified file path.""" |
|
cv2.imwrite(file_path, image) |
|
print(f"Image saved to {file_path}") |
|
|
|
|
|
def display_image(image): |
|
save_image(image,'finalle.png') |
|
cv2.imshow('Processed Image', image) |
|
key = cv2.waitKey(0) |
|
return key |
|
|
|
|
|
|
|
|
|
batch_generator = load_data(batch_size=100) |
|
train_classifier_in_batches(batch_generator) |
|
|
|
|
|
clf = load('contour.joblib') |
|
|
|
menu_height = 10 |
|
menu_width = 10 |
|
|
|
while True: |
|
num_classes_to_detect = int(input('Enter the number of top classes to detect: ')) |
|
min_size = int(input('Enter minimum size for ROIs (width*height): ')) |
|
max_size = int(input('Enter maximum size for ROIs (width*height): ')) |
|
image_path = input('Enter the path of the image you want to process: ') |
|
screenshot, avg_scores, details = process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect) |
|
|
|
|
|
top_indices = np.argsort(avg_scores)[-num_classes_to_detect:] |
|
detected_classes = [dir_names[i] for i in top_indices] |
|
|
|
screenshot = draw_menu(screenshot, detected_classes, menu_height, menu_width) |
|
key = display_image(screenshot) |
|
|
|
if key == ord('r'): |
|
continue |
|
else: |
|
break |
|
|