diff --git a/config.yml b/config.yml index 6979d8e..91dd880 100644 --- a/config.yml +++ b/config.yml @@ -1,57 +1,15 @@ measurements: - - name: arm_length - landmarks: - - 11 - - 13 - - 15 - - - name: leg_length - landmarks: - - 23 - - 25 - - 27 - - name: shoulder_length - landmarks: - - 11 - - 12 - - - name: neck_to_hip_length - landmarks: - - 11 - - 23 - -#0 - nose -#1 - left eye (inner) -#2 - left eye -#3 - left eye (outer) -#4 - right eye (inner) -#5 - right eye -#6 - right eye (outer) -#7 - left ear -#8 - right ear -#9 - mouth (left) -#10 - mouth (right) -#11 - left shoulder -#12 - right shoulder -#13 - left elbow -#14 - right elbow -#15 - left wrist -#16 - right wrist -#17 - left pinky -#18 - right pinky -#19 - left index -#20 - right index -#21 - left thumb -#22 - right thumb -#23 - left hip -#24 - right hip -#25 - left knee -#26 - right knee -#27 - left ankle -#28 - right ankle -#29 - left heel -#30 - right heel -#31 - left foot index -#32 - right foot index - + landmarks: + - left_shoulder + - right_shoulder + - name: arm_length + landmarks: + - left_shoulder + - left_elbow + - left_wrist + - name: leg_length + landmarks: + - left_hip + - left_knee + - left_ankle diff --git a/landmarks.py b/landmarks.py index 40c3f3a..8f6280b 100644 --- a/landmarks.py +++ b/landmarks.py @@ -1,75 +1,126 @@ -import warnings +import logging import os +import warnings +import sys +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" from tabulate import tabulate import math import argparse import cv2 -from mediapipe.python.solutions import pose -import logging - -warnings.filterwarnings("ignore", - category=UserWarning, - module="google.protobuf") - -os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" +from mediapipe.python.solutions import ( + pose, +) +import yaml +logging.basicConfig(level=logging.INFO) +warnings.filterwarnings( + "ignore", + category=UserWarning, + module="google.protobuf", +) +LANDMARK_NAME_TO_INDEX = { + "nose": 0, + "left_eye_inner": 1, + "left_eye": 2, + "left_eye_outer": 3, + "right_eye_inner": 4, + "right_eye": 5, + "right_eye_outer": 6, + "left_ear": 7, + "right_ear": 8, + "mouth_left": 9, + "mouth_right": 10, + "left_shoulder": 11, + "right_shoulder": 12, + "left_elbow": 13, + "right_elbow": 14, + "left_wrist": 15, + "right_wrist": 16, + "left_pinky": 17, + "right_pinky": 18, + "left_index": 19, + "right_index": 20, + "left_thumb": 21, + "right_thumb": 22, + "left_hip": 23, + "right_hip": 24, + "left_knee": 25, + "right_knee": 26, + "left_ankle": 27, + "right_ankle": 28, + "left_heel": 29, + "right_heel": 30, + "left_foot_index": 31, + "right_foot_index": 32, +} class Landmarker: resized_height = 256 - resized_width = 300 + resized_width = 256 def __init__(self) -> None: - args = self.parse_args() - if args.front_image == None: + self.args = self.parse_args() + self.measurements = self.load_landmarks() + if self.args.front_image is None: raise Exception("front image needs to be passed") - if args.side_image == None: + if self.args.side_image is None: raise Exception("side image needs to be passed") - self.front_image = cv2.imread(args.front_image) - self.side_image = cv2.imread(args.side_image) + self.front_image = cv2.imread(self.args.front_image) + self.side_image = cv2.imread(self.args.side_image) - self.front_image_resized = cv2.resize( - self.front_image, (self.resized_height, self.resized_width)) - self.side_image_resized = cv2.resize( - self.side_image, (self.resized_height, self.resized_width)) + self.front_image_resized = cv2.resize(self.front_image, (self.resized_height, self.resized_width)) + self.side_image_resized = cv2.resize(self.side_image, (self.resized_height, self.resized_width)) self.distances = {} - self.person_height = args.person_height - self.pixel_height = args.pixel_height + self.person_height = self.args.person_height + self.pixel_height = self.args.pixel_height self.pose = pose.Pose( static_image_mode=True, - min_detection_confidence=args.pose_detection_confidence, - min_tracking_confidence=args.pose_tracking_confidence, + min_detection_confidence=self.args.pose_detection_confidence, + min_tracking_confidence=self.args.pose_tracking_confidence, ) - self.landmarks_to_calculate = [] + self.landmarks_indices = [ - pose.PoseLandmark.LEFT_SHOULDER.value, - pose.PoseLandmark.RIGHT_SHOULDER.value, - pose.PoseLandmark.LEFT_ELBOW.value, - pose.PoseLandmark.RIGHT_ELBOW.value, - pose.PoseLandmark.LEFT_WRIST.value, - pose.PoseLandmark.RIGHT_WRIST.value, - pose.PoseLandmark.LEFT_HIP.value, - pose.PoseLandmark.RIGHT_HIP.value, - pose.PoseLandmark.LEFT_KNEE.value, - pose.PoseLandmark.RIGHT_KNEE.value, - pose.PoseLandmark.LEFT_ANKLE.value, - pose.PoseLandmark.RIGHT_ANKLE.value, + LANDMARK_NAME_TO_INDEX["left_shoulder"], + LANDMARK_NAME_TO_INDEX["right_shoulder"], + LANDMARK_NAME_TO_INDEX["left_elbow"], + LANDMARK_NAME_TO_INDEX["right_elbow"], + LANDMARK_NAME_TO_INDEX["left_wrist"], + LANDMARK_NAME_TO_INDEX["right_wrist"], + LANDMARK_NAME_TO_INDEX["left_hip"], + LANDMARK_NAME_TO_INDEX["right_hip"], + LANDMARK_NAME_TO_INDEX["left_knee"], + LANDMARK_NAME_TO_INDEX["right_knee"], + LANDMARK_NAME_TO_INDEX["left_ankle"], + LANDMARK_NAME_TO_INDEX["right_ankle"], ] + def load_landmarks(self): + with open(self.args.yaml_file, "r") as file: + landmarks_data = yaml.safe_load(file) + measurements = {} + for measurement in landmarks_data["measurements"]: + measurements[measurement["name"]] = [LANDMARK_NAME_TO_INDEX[l] for l in measurement["landmarks"]] + return measurements + def parse_args(self): parser = argparse.ArgumentParser() - parser.add_argument("--front", - dest="front_image", - type=str, - help="Front image") - parser.add_argument("--side", - dest="side_image", - type=str, - help="Side image") + parser.add_argument( + "--front", + dest="front_image", + type=str, + help="Front image", + ) + parser.add_argument( + "--side", + dest="side_image", + type=str, + help="Side image", + ) parser.add_argument( "--pose_detection_confidence", dest="pose_detection_confidence", @@ -86,43 +137,74 @@ class Landmarker: ) parser.add_argument( "--person_height", - # default=153, dest="person_height", type=int, help="person height of person", ) parser.add_argument( "--pixel_height", - # default=216, dest="pixel_height", type=int, help="pixel height of person", ) + parser.add_argument( + "--measurement", + dest="measurement", + nargs="+", + type=str, + help="Type of measurement", + ) + parser.add_argument( + "--yaml_file", + dest="yaml_file", + type=str, + help="Path to the YAML file containing landmarks", + ) return parser.parse_args() - + def run(self): - - logging.warning("person's height: %s", self.person_height) - - logging.warning("person's pixel height: %s", self.pixel_height) - - front_results, side_results = self.process_images() + front_results, _ = self.process_images() self.get_center_top_point(front_results) - self.calculate_distance_betn_landmarks(front_results) + table = [] + if self.args.measurement: + for m in self.args.measurement: + if m not in self.measurements: + raise Exception("Incorrect input (input not present in config.yml)") + else: + distance = self.calculate_distance_betn_landmarks(front_results, m) + table.append([m, distance]) + else: + for m in self.measurements: + distance = self.calculate_distance_betn_landmarks(front_results, m) + table.append([m, distance]) - self.output() - - self.display_images() + output = tabulate( + table, + headers=[ + "measurement", + "Distance (cm)", + ], + tablefmt="plain", + ) + print(output) self.pose.close() def process_images(self): front_results = self.pose.process( - cv2.cvtColor(self.front_image_resized, cv2.COLOR_BGR2RGB)) + cv2.cvtColor( + self.front_image_resized, + cv2.COLOR_BGR2RGB, + ) + ) side_results = self.pose.process( - cv2.cvtColor(self.side_image_resized, cv2.COLOR_BGR2RGB)) + cv2.cvtColor( + self.side_image_resized, + cv2.COLOR_BGR2RGB, + ) + ) self.side_image_keypoints = self.side_image_resized.copy() self.front_image_keypoints = self.front_image_resized.copy() @@ -139,12 +221,18 @@ class Landmarker: side_results.pose_landmarks, # type: ignore# type: ignore self.landmarks_indices, ) - return front_results, side_results + return ( + front_results, + side_results, + ) def pixel_to_metric_ratio(self): self.pixel_height = self.pixel_distance * 2 pixel_to_metric_ratio = self.person_height / self.pixel_height - logging.warning("pixel_to_metric_ratio %s", pixel_to_metric_ratio) + logging.debug( + "pixel_to_metric_ratio %s", + pixel_to_metric_ratio, + ) return pixel_to_metric_ratio def draw_landmarks(self, image, landmarks, indices): @@ -155,144 +243,90 @@ class Landmarker: self.circle(image, cx, cy) def circle(self, image, cx, cy): - return cv2.circle(image, (cx, cy), 2, (255, 0, 0), -1) + return cv2.circle( + image, + (cx, cy), + 2, + (255, 0, 0), + -1, + ) - def output(self): - table = [] - for landmark, distance in self.distances.items(): - table.append([landmark.replace("_", " "), distance]) - output = tabulate(table, - headers=["measurement", "value"], - tablefmt="grid") - print(output) - - def calculate_distance_betn_landmarks(self, front_results, landmarks=[]): + def calculate_distance_betn_landmarks( + self, + front_results, + measurement_name, + ): if not front_results.pose_landmarks: return landmarks = front_results.pose_landmarks.landmark - leg_landmarks = [ - pose.PoseLandmark.LEFT_HIP, - pose.PoseLandmark.LEFT_KNEE, - pose.PoseLandmark.LEFT_ANKLE, - ] - hand_landmarks = [ - pose.PoseLandmark.LEFT_SHOULDER, - pose.PoseLandmark.LEFT_ELBOW, - pose.PoseLandmark.LEFT_WRIST, - ] - self.landmarks_to_calculate = leg_landmarks + hand_landmarks - # self.landmarks_to_calculate = [ - # pose.PoseLandmark.LEFT_SHOULDER, - # pose.PoseLandmark.LEFT_ELBOW, - # pose.PoseLandmark.LEFT_WRIST, - # ] + landmark_names = self.measurements[measurement_name] - table = [] - for idx, l in enumerate(self.landmarks_to_calculate): - if idx < len(self.landmarks_to_calculate) - 1: - _current = landmarks[l.value] - _nextl = self.landmarks_to_calculate[idx + 1] - _next = landmarks[_nextl.value] - pixel_distance = self.euclidean_distance( - _current.x * self.resized_width, - _current.y * self.resized_height, - _next.x * self.resized_width, - _next.y * self.resized_height) - real_distance = pixel_distance * self.pixel_to_metric_ratio() - table.append([l.name, _nextl.name, real_distance]) - - output = tabulate( - table, - headers=["Landmark 1", "Landmark 2", "Distance (cm)"], - tablefmt="grid") - print(output) - - -# for l in self.landmarks_to_calculate: -# real_distance = 0 -# for idx, l in enumerate(self.landmarks_to_calculate): -# if idx < len(self.landmarks_to_calculate) - 1: -# _current = landmarks[l.value] -# _nextl = self.landmarks_to_calculate[idx + 1] -# _next = landmarks[_nextl.value] -# pixel_distance = self.euclidean_distance( -# _current.x * self.resized_width, -# _current.y * self.resized_height, -# _next.x * self.resized_width, -# _next.y * self.resized_height, -# ) -# real_distance += pixel_distance * self.pixel_to_metric_ratio( -# ) -# print(real_distance) -# self.distances[l.name] = real_distance -# + total_distance = 0 + for idx in range(len(landmark_names) - 1): + _current = landmarks[landmark_names[idx]] + _next = landmarks[landmark_names[idx + 1]] + pixel_distance = self.euclidean_distance( + _current.x * self.resized_width, + _current.y * self.resized_height, + _next.x * self.resized_width, + _next.y * self.resized_height, + ) + real_distance = pixel_distance * self.pixel_to_metric_ratio() + total_distance += real_distance + return total_distance def euclidean_distance(self, x1, y1, x2, y2): - distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2) + distance = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) return distance - def destroy(self): - cv2.destroyAllWindows() - - def display_images(self): - cv2.imshow("front_image_keypoints", self.front_image_keypoints) - cv2.imshow("side_image_keypoints", self.side_image_keypoints) - cv2.imshow("edges", self.edges) - cv2.waitKey(0) - def get_center_top_point(self, side_results): - gray_image = cv2.cvtColor(self.side_image_keypoints, - cv2.COLOR_BGR2GRAY) + gray_image = cv2.cvtColor( + self.side_image_keypoints, + cv2.COLOR_BGR2GRAY, + ) blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0) - roi = blurred_image[0:int(self.side_image_resized.shape[0] / 2), :] + roi = blurred_image[ + 0 : int(self.side_image_resized.shape[0] / 2), + :, + ] self.edges = cv2.Canny(roi, 50, 150) - contours, _ = cv2.findContours(self.edges, cv2.RETR_EXTERNAL, - cv2.CHAIN_APPROX_SIMPLE) - xt, yt = None, None - self.topmost_point = None + contours, _ = cv2.findContours( + self.edges.copy(), + cv2.RETR_TREE, + cv2.CHAIN_APPROX_SIMPLE, + ) + max_contour = max(contours, key=cv2.contourArea) + rect = cv2.minAreaRect(max_contour) + box = cv2.boxPoints(rect) + box = sorted( + list(box), + key=lambda p: p[1], + ) + top_point = min( + box[0], + box[1], + key=lambda p: p[0], + ) - if contours: - largest_contour = max(contours, key=cv2.contourArea) - self.topmost_point = tuple( - largest_contour[largest_contour[:, :, 1].argmin()][0]) - xt, yt = self.topmost_point - self.circle(self.side_image_keypoints, xt, yt) + left_hip = side_results.pose_landmarks.landmark[LANDMARK_NAME_TO_INDEX["left_hip"]] + right_hip = side_results.pose_landmarks.landmark[LANDMARK_NAME_TO_INDEX["right_hip"]] + + center_x = (left_hip.x + right_hip.x) / 2 + center_y = (left_hip.y + right_hip.y) / 2 + + center_x, center_y = ( + int(center_x * self.resized_width), + int(center_y * self.resized_height), + ) + + self.pixel_distance = self.euclidean_distance( + top_point[0], + top_point[1], + center_x, + center_y, + ) - logging.warning("xt: %s", xt) - logging.warning("yt: %s", yt) - xc, yc = None, None - landmarks = side_results.pose_landmarks.landmark - - if side_results.pose_landmarks: - left_hip = landmarks[pose.PoseLandmark.LEFT_HIP.value] - right_hip = landmarks[pose.PoseLandmark.RIGHT_HIP.value] - center_point = ( - (left_hip.x + right_hip.x) / 2, - (left_hip.y + right_hip.y) / 2, - ) - center_point = ( - int(center_point[0] * self.side_image_resized.shape[1]), - int(center_point[1] * self.side_image_resized.shape[0]), - ) - xc, yc = center_point - logging.warning("xc: %s", xc) - logging.warning("yc: %s", yc) - self.circle(self.side_image_keypoints, xc, yc) - - self.pixel_distance = self.euclidean_distance(xc, yc, xt, yt) - logging.warning("top_center_pixel_distance: %s", - self.pixel_distance) - self.pixel_height = self.pixel_distance * 2 - logging.warning("pxl height: %s ", self.pixel_height) - self.distance = (self.euclidean_distance(xc, yc, xt, yt) * - self.pixel_to_metric_ratio()) - return self.distance - -l = Landmarker() -try: - l.run() -except: - print("error") -finally: - l.destroy() +if __name__ == "__main__": + landmarker = Landmarker() + landmarker.run()