Ocillus commited on
Commit
76da613
·
verified ·
1 Parent(s): 4133ed9

Upload 7 files

Browse files
Files changed (7) hide show
  1. Celsia.py +186 -0
  2. Debris Wood 04 copy.png +0 -0
  3. clustering.py +31 -0
  4. contour.joblib +3 -0
  5. finalle.png +0 -0
  6. inference.py +80 -16
  7. spectrograms.zip +3 -0
Celsia.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2
2
+ import numpy as np
3
+ from sklearn import svm
4
+ import os
5
+ import random
6
+ from concurrent.futures import ThreadPoolExecutor
7
+ from joblib import dump, load
8
+ import pytesseract
9
+ from tqdm import tqdm
10
+
11
+ pytesseract.pytesseract.tesseract_cmd = '/opt/homebrew/bin/tesseract'
12
+ dir_names = []
13
+
14
+ try:
15
+ clf = load('contour.joblib')
16
+ except:
17
+ def load_data(batch_size=100):
18
+ global dir_names
19
+ img_shape = (300, 700)
20
+ dir_names = ['organized_spectrograms/' + d for d in os.listdir('organized_spectrograms') if not d == ".DS_Store" and not '.py' in d]
21
+ X, y = [], []
22
+
23
+ def load_and_resize_image(f):
24
+ return cv2.resize(cv2.imread(os.path.join(dir_name, f), 0), img_shape)
25
+
26
+ for i, dir_name in tqdm(enumerate(dir_names), desc="Loading Directories", total=len(dir_names)):
27
+ with ThreadPoolExecutor(max_workers=20) as executor:
28
+ images = list(executor.map(load_and_resize_image, [f for f in os.listdir(dir_name) if f.endswith('.jpg') or f.endswith('.png')]))
29
+
30
+ if not images:
31
+ print(f'Error: No images found in {dir_name}')
32
+ continue
33
+
34
+ X.extend([img.flatten() for img in images])
35
+ y.extend([i] * len(images))
36
+
37
+ if len(X) >= batch_size:
38
+ yield np.array(X), np.array(y)
39
+ X, y = [], []
40
+
41
+ if X and y:
42
+ yield np.array(X), np.array(y)
43
+
44
+ def train_classifier_in_batches(batch_generator, batch_size=100):
45
+ clf = svm.SVC()
46
+
47
+ for X_batch, y_batch in tqdm(batch_generator, desc="Training SVM in Batches"):
48
+ if X_batch.size > 0 and y_batch.size > 0:
49
+ clf.fit(X_batch, y_batch)
50
+
51
+ dump(clf, 'contour.joblib')
52
+
53
+ def process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect):
54
+ img_shape = (300, 700) # This is the shape used during training
55
+
56
+ # Load the image from the file
57
+ screenshot = cv2.imread(image_path)
58
+ screenshot_gray = cv2.cvtColor(screenshot, cv2.COLOR_BGR2GRAY)
59
+
60
+ _, thresh = cv2.threshold(screenshot_gray, 127, 255, 0)
61
+ contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
62
+
63
+ avg_scores = np.zeros(len(dir_names))
64
+ num_rois, drawn_boxes, details = 0, [], []
65
+
66
+ def process_contour(contour):
67
+ nonlocal avg_scores, num_rois, drawn_boxes
68
+
69
+ x, y, w, h = cv2.boundingRect(contour)
70
+ if min_size <= w * h <= max_size:
71
+ roi = screenshot_gray[y:y + h, x:x + w]
72
+
73
+ # Resize the ROI to match the training shape
74
+ roi_resized = cv2.resize(roi, img_shape).flatten().reshape(1, -1)
75
+ scores = clf.decision_function(roi_resized)[0]
76
+
77
+ # Get the indices of the top-N classes
78
+ top_indices = np.argsort(scores)[-num_classes_to_detect:]
79
+ for idx in top_indices:
80
+ avg_scores[idx] += scores[idx]
81
+
82
+ num_rois += 1
83
+
84
+ max_score_index = np.argmax(scores)
85
+ color_str = dir_names[max_score_index]
86
+ color_hash = hash(color_str) & 0xffffff
87
+ color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)
88
+ cv2.rectangle(screenshot, (x, y), (x + w, y + h), color_bgr, 2)
89
+ drawn_boxes.append({'x': x, 'y': y, 'w': w, 'h': h})
90
+
91
+ negative_dir_name = f"{dir_names[max_score_index]}_negative"
92
+ if not os.path.exists(negative_dir_name):
93
+ os.makedirs(negative_dir_name)
94
+ cv2.imwrite(os.path.join(negative_dir_name, str(random.randint(1, 999999999)) + '.png'), roi)
95
+
96
+ with ThreadPoolExecutor(max_workers=5) as executor:
97
+ executor.map(process_contour, contours)
98
+
99
+ if num_rois > 0:
100
+ avg_scores /= num_rois
101
+ else:
102
+ print('Warning: No ROIs were processed')
103
+
104
+ return screenshot, avg_scores, details
105
+
106
+ def draw_menu(screenshot, detected_classes, max_menu_width=200, item_height=30, font_scale=0.5):
107
+ # Determine the dimensions of the screenshot
108
+ screenshot_height, screenshot_width = screenshot.shape[:2]
109
+
110
+ # Calculate menu height based on the number of detected classes
111
+ num_items = len(detected_classes)
112
+ menu_height = num_items * item_height + 20 # Additional space for padding
113
+
114
+ # Set menu width to be the smaller of the max_menu_width or a portion of the screenshot width
115
+ menu_width = min(max_menu_width, screenshot_width // 4)
116
+
117
+ # Create blank menu image with the determined size
118
+ menu_img = np.zeros((menu_height, menu_width, 3), dtype=np.uint8)
119
+
120
+ for i, class_name in enumerate(detected_classes):
121
+ # Generate color based on the detected class name using hash function
122
+ color_hash = hash(class_name) & 0xffffff
123
+ color_bgr = ((color_hash >> 16) & 0xff), ((color_hash >> 8) & 0xff), (color_hash & 0xff)
124
+
125
+ # Draw color swatch and class name on menu image
126
+ cv2.rectangle(menu_img, (10, i * item_height + 10), (40, i * item_height + item_height), color_bgr, -1)
127
+ cv2.putText(menu_img, class_name, (50, i * item_height + item_height // 2 + 5), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (255, 255, 255), 1, cv2.LINE_AA)
128
+
129
+ # Adjust total dimensions to fit the menu
130
+ total_height = max(screenshot.shape[0], menu_height)
131
+ total_width = screenshot.shape[1] + menu_width
132
+
133
+ # Create a new blank image with the required size
134
+ new_image = np.zeros((total_height, total_width, 3), dtype=np.uint8)
135
+
136
+ # Place the original screenshot in the new image
137
+ new_image[:screenshot.shape[0], :screenshot.shape[1]] = screenshot
138
+
139
+ # Place the menu at the right side of the new image
140
+ new_image[:menu_height, -menu_width:] = menu_img
141
+
142
+ return new_image
143
+
144
+
145
+ def save_image(image, file_path):
146
+ """Saves the given image to the specified file path."""
147
+ cv2.imwrite(file_path, image)
148
+ print(f"Image saved to {file_path}")
149
+
150
+
151
+ def display_image(image):
152
+ save_image(image,'finalle.png')
153
+ cv2.imshow('Processed Image', image)
154
+ key = cv2.waitKey(0)
155
+ return key
156
+
157
+
158
+
159
+ # Load data in batches and train classifiers
160
+ batch_generator = load_data(batch_size=100) # Adjust batch size as needed
161
+ train_classifier_in_batches(batch_generator)
162
+
163
+ # Load the trained model
164
+ clf = load('contour.joblib')
165
+
166
+ menu_height = 10
167
+ menu_width = 10
168
+
169
+ while True:
170
+ num_classes_to_detect = int(input('Enter the number of top classes to detect: '))
171
+ min_size = int(input('Enter minimum size for ROIs (width*height): '))
172
+ max_size = int(input('Enter maximum size for ROIs (width*height): '))
173
+ image_path = input('Enter the path of the image you want to process: ')
174
+ screenshot, avg_scores, details = process_image(clf, dir_names, min_size, max_size, image_path, num_classes_to_detect)
175
+
176
+ # Only display the detected classes
177
+ top_indices = np.argsort(avg_scores)[-num_classes_to_detect:]
178
+ detected_classes = [dir_names[i] for i in top_indices]
179
+
180
+ screenshot = draw_menu(screenshot, detected_classes, menu_height, menu_width)
181
+ key = display_image(screenshot)
182
+
183
+ if key == ord('r'):
184
+ continue
185
+ else:
186
+ break
Debris Wood 04 copy.png ADDED
clustering.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from shutil import copyfile
3
+ from tqdm import tqdm # Import tqdm for progress bars
4
+
5
+ def organize_images_by_class(input_folder, output_folder):
6
+ # Create output folder if it doesn't exist
7
+ if not os.path.exists(output_folder):
8
+ os.makedirs(output_folder)
9
+
10
+ # Get a list of all spectrogram images
11
+ spectrogram_files = [f for f in os.listdir(input_folder) if f.endswith('.png')]
12
+
13
+ # Organize files by class names
14
+ print("Organizing files by class names...")
15
+ for file in tqdm(spectrogram_files, desc="Copying Files"):
16
+ # Extract class name from the filename
17
+ class_name = " ".join(file.split(" ")[:-1]) # Assuming class name is everything except the last part
18
+ class_folder = os.path.join(output_folder, class_name)
19
+
20
+ # Create class folder if it doesn't exist
21
+ if not os.path.exists(class_folder):
22
+ os.makedirs(class_folder)
23
+
24
+ # Copy the file to the respective class folder
25
+ copyfile(os.path.join(input_folder, file), os.path.join(class_folder, file))
26
+
27
+ if __name__ == "__main__":
28
+ input_folder = 'spectrograms' # Input folder containing spectrogram images
29
+ output_folder = 'organized_spectrograms' # Output folder for organized images
30
+
31
+ organize_images_by_class(input_folder, output_folder)
contour.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c78563286a55ff397879c5d1cdc0b7119114462767c8311f8a687ee11a97014
3
+ size 158002621
finalle.png ADDED
inference.py CHANGED
@@ -1,21 +1,85 @@
 
 
 
 
 
 
1
 
2
- # Example usage
3
- if __name__ == "__main__":
4
- # Load model and label encoder
5
- def load_model_and_encoder(model_path, label_encoder_path):
6
- model = tf.keras.models.load_model(model_path)
7
- classes = np.load(label_encoder_path, allow_pickle=True)
8
- label_encoder = LabelEncoder()
9
- label_encoder.classes_ = classes
10
- return model, label_encoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
- model_path = 'sound_classification_model.h5'
13
- label_encoder_path = 'label_encoder.npy'
14
- audio_path = 'Emmi Elliott - Face to Face.wav'
 
 
 
 
 
 
15
 
16
- model, label_encoder = load_model_and_encoder(model_path, label_encoder_path)
 
17
 
18
- sound_identifications = classify_audio(audio_path, model, label_encoder)
 
19
 
20
- for time, label in sound_identifications:
21
- print(f'[{time:.2f} seconds] Class: {label}')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import numpy as np
3
+ import matplotlib.pyplot as plt
4
+ from scipy.io import wavfile
5
+ from scipy import signal
6
+ from PIL import Image
7
 
8
+ def create_spectrogram(wav_file, output_folder):
9
+ # Read the wav file
10
+ sample_rate, data = wavfile.read(wav_file)
11
+
12
+ # Convert data to mono if it's stereo
13
+ if data.ndim == 2:
14
+ data = np.mean(data, axis=1)
15
+
16
+ # Create the spectrogram
17
+ frequencies, times, spectrogram = signal.spectrogram(data, sample_rate)
18
+
19
+ # Create the figure
20
+ plt.figure(figsize=(10, 4))
21
+
22
+ # Plot the spectrogram
23
+ plt.pcolormesh(times, frequencies, 10 * np.log10(spectrogram), shading='gouraud', cmap='inferno')
24
+
25
+ # Set the y-axis limit to the Nyquist frequency
26
+ plt.ylim(0, sample_rate / 2)
27
+
28
+ # Remove axes and labels
29
+ plt.axis('off')
30
+
31
+ # Get the current axis
32
+ ax = plt.gca()
33
+
34
+ # Set the x-axis limits to start from 0 to the last time point
35
+ ax.set_xlim(0, times[-1])
36
+
37
+ # Fill the area to the right of the spectrogram with black
38
+ ax.add_patch(plt.Rectangle((times[-1], 0), 10, sample_rate / 2, facecolor='black', edgecolor='none'))
39
+
40
+ # Save the spectrogram image
41
+ filename = os.path.splitext(os.path.basename(wav_file))[0] + '.png'
42
+ plt.savefig(os.path.join(output_folder, filename), bbox_inches='tight', pad_inches=0)
43
+ plt.close()
44
+
45
+ # Convert white pixels to black
46
+ convert_white_to_black(os.path.join(output_folder, filename))
47
 
48
+ def convert_white_to_black(image_path):
49
+ # Open the image
50
+ img = Image.open(image_path)
51
+
52
+ # Convert the image to RGB (if not already in that mode)
53
+ img = img.convert("RGB")
54
+
55
+ # Get the data of the image
56
+ data = np.array(img)
57
 
58
+ # Create a mask for white pixels
59
+ white_pixels = (data[:, :, 0] == 255) & (data[:, :, 1] == 255) & (data[:, :, 2] == 255)
60
 
61
+ # Change white pixels to black
62
+ data[white_pixels] = [0, 0, 0]
63
 
64
+ # Create a new image from the modified data
65
+ new_img = Image.fromarray(data)
66
+
67
+ # Save the modified image
68
+ new_img.save(image_path)
69
+
70
+ def convert_wav_to_spectrograms(input_folder, output_folder):
71
+ # Create output folder if it doesn't exist
72
+ if not os.path.exists(output_folder):
73
+ os.makedirs(output_folder)
74
+
75
+ # Iterate through all files in the input folder
76
+ for file in os.listdir(input_folder):
77
+ if file.endswith('.wav'):
78
+ wav_file_path = os.path.join(input_folder, file)
79
+ create_spectrogram(wav_file_path, output_folder)
80
+ print(f"Converted {file} to spectrogram.")
81
+
82
+ if __name__ == "__main__":
83
+ input_folder = 'dataset' # Input folder containing WAV files
84
+ output_folder = 'spectrograms' # Output folder for spectrogram images
85
+ convert_wav_to_spectrograms(input_folder, output_folder)
spectrograms.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b5205e9692cf341d2616c815d9c33e7e2bc23bccd42d5fad97bc74efd80104c
3
+ size 320872300