File size: 7,064 Bytes
8d5b7a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
import cv2
import numpy as np
from PIL import Image
import mediapipe as mp
import time
"""
This code can not be run on HuggingFace's Spaces App due to constraints
brought by Gradio's limited input and output functionality
This features both more and less functions
- Same "pen-holding" gesture to write, let go of the pen to lift off the "paper"
- Open palm facing front gesture to save a copy of the paper to home directory
- Thumbs up gesture to clear the page
*** Install dependencies from requirements.txt
*** packages.txt is device dependent
"""
def find_hands(brain, img):
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # opencv image is in BGR form but mp is trained with RGB
results = brain.process(img_rgb) # process finds the hands and outputs classification and 21 landmarks for each hand
hands_landmarks = [] # initializing array to hold the dictionary for the hands
h, w, _ = img.shape # get height and width of image for scaling
if results.multi_hand_landmarks:
for hand_type, hand_lms in zip(results.multi_handedness, results.multi_hand_landmarks): # elegant solution for mp list object traversal
hand = {} # initializing dict for each hand
lm_list = [] # landmarks array for all 21 point of the hand
for lm in hand_lms.landmark:
px, py, pz = int(lm.x * w), int(lm.y * h), int(lm.z * w) # scaling landmark points to image size for frame coordinates
lm_list.append([px, py, pz])
hand["lm_list"] = lm_list # add "lm_list" key for all landmark points of the hand
hand["type"] = hand_type.classification[0].label # adds the label (left/right) for the hand
hands_landmarks.append(hand) # appends the dict
return hands_landmarks
def is_drawing(index, thumb): # proximity function with arbitrary threshold
npindex = np.array((index[0], index[1]))
npthumb = np.array((thumb[0], thumb[1]))
if np.linalg.norm(npindex - npthumb) < 30:
return True
else:
return False
def save(landmarks): # brute force finger orientation checking
if landmarks[8][1] < landmarks[6][1]:
if landmarks[12][1] < landmarks[10][1]:
if landmarks[16][1] < landmarks[14][1]:
if landmarks[20][1] < landmarks[18][1]:
return True
else:
return False
def clear(landmarks): # brute force finger orientation checking
if landmarks[4][1] < landmarks[3][1] < landmarks[2][1] < landmarks[8][1]:
return True
else:
return False
DOMINANT_HAND = "Right"
width, height = 1280, 720
width_, height_, = 256, 144
drawing_flag = False
sleepy_time = time.time()
if __name__ == '__main__':
cam = cv2.VideoCapture(0)
cam.set(3, width)
cam.set(4, height)
detector = mp.solutions.hands.Hands(min_detection_confidence=0.8) # initialize mp model
# paper = np.zeros((width, height, 4), np.uint8)
paper = np.zeros((height, width, 3), dtype=np.uint8) # create blank page
paper.fill(255)
past_holder = () # coordinates holder
palette = cv2.imread('palette.jpg')
output_frames = []
page_num = 0
# runny = 1
color = (0, 0, 0)
while True:
# runny -= 1
x, rgb_image = cam.read()
rgb_image_f = cv2.flip(np.asanyarray(rgb_image), 1)
hands = find_hands(detector, rgb_image_f)
try:
if hands:
hand1 = hands[0] if hands[0]["type"] == DOMINANT_HAND else hands[1]
lm_list1 = hand1["lm_list"] # List of 21 Landmarks
handedness = hand1["type"]
if handedness == DOMINANT_HAND:
idx_coords = lm_list1[8][0], lm_list1[8][1] # 0 is width (bigger)
# print(idx_coords)
cv2.circle(rgb_image_f, idx_coords, 5, color, cv2.FILLED)
if idx_coords[1] < 72: # brute force but should be extremely marginally faster lol
if idx_coords[0] < 142: # red
color = (0, 0, 255)
if 142 < idx_coords[0] < 285: # orange
color = (0, 115, 255)
if 285 < idx_coords[0] < 426: # yellow
color = (0, 229, 255)
if 426 < idx_coords[0] < 569: # green
color = (0, 195, 88)
if 569 < idx_coords[0] < 711: # blue
color = (195, 85, 0)
if 711 < idx_coords[0] < 853: # indigo
color = (195, 0, 68)
if 853 < idx_coords[0] < 996: # violet
color = (195, 0, 143)
if 996 < idx_coords[0] < 1137: # black
color = (0, 0, 0)
if 1137 < idx_coords[0]: # white / eraser
color = (255, 255, 255)
if len(past_holder) and drawing_flag: # start drawing
cv2.line(paper, past_holder, idx_coords, color, 5)
cv2.line(rgb_image_f, past_holder, idx_coords, color, 5)
# paper[idx_coords[0]][idx_coords[1]][0] = 255
# paper[idx_coords[0]][idx_coords[1]][3] = 255
cv2.circle(rgb_image_f, idx_coords, 5, color, cv2.FILLED)
if save(lm_list1) and time.time() - sleepy_time > 3: # save page, 3 secs arbitrary, just to not iterate every loop iteration
paper[0:height_, w - width_: w] = 255
paper = cv2.cvtColor(paper, cv2.COLOR_BGR2RGB)
im = Image.fromarray(paper)
im.save("paper%s.png" % page_num)
print("saved")
sleepy_time = time.time()
paper = cv2.cvtColor(paper, cv2.COLOR_RGB2BGR)
page_num += 1
if clear(lm_list1) and time.time() - sleepy_time > 3: # clear page
paper = np.zeros((height, width, 3), dtype=np.uint8)
paper.fill(255)
print("page cleared")
sleepy_time = time.time()
past_holder = idx_coords
if is_drawing(idx_coords, lm_list1[4]): # 4 is thumb for intuitive "hold pen" to draw
drawing_flag = True
else:
drawing_flag = False
except:
pass
finally:
rgb_image_f[0:72, ] = palette
presenter = cv2.resize(rgb_image_f, (width_, height_))
h, w, _ = rgb_image_f.shape
paper[0:height_, w - width_: w] = presenter
cv2.imshow("Image", rgb_image_f)
cv2.imshow("paper", paper)
key = cv2.waitKey(1)
if key & 0xFF == ord('q') or key == 27: # Press esc or 'q' to close the image window
break
|