import logging import os import warnings import sys os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3" from tabulate import tabulate import math import argparse import cv2 from mediapipe.python.solutions import ( pose, ) import yaml logging.basicConfig(level=logging.INFO) warnings.filterwarnings( "ignore", category=UserWarning, module="google.protobuf", ) LANDMARK_NAME_TO_INDEX = { "nose": 0, "left_eye_inner": 1, "left_eye": 2, "left_eye_outer": 3, "right_eye_inner": 4, "right_eye": 5, "right_eye_outer": 6, "left_ear": 7, "right_ear": 8, "mouth_left": 9, "mouth_right": 10, "left_shoulder": 11, "right_shoulder": 12, "left_elbow": 13, "right_elbow": 14, "left_wrist": 15, "right_wrist": 16, "left_pinky": 17, "right_pinky": 18, "left_index": 19, "right_index": 20, "left_thumb": 21, "right_thumb": 22, "left_hip": 23, "right_hip": 24, "left_knee": 25, "right_knee": 26, "left_ankle": 27, "right_ankle": 28, "left_heel": 29, "right_heel": 30, "left_foot_index": 31, "right_foot_index": 32, } class Landmarker: resized_height = 256 resized_width = 256 def __init__(self) -> None: self.args = self.parse_args() self.measurements = self.load_landmarks() if self.args.front_image is None: raise Exception("front image needs to be passed") if self.args.side_image is None: raise Exception("side image needs to be passed") self.front_image = cv2.imread(self.args.front_image) self.side_image = cv2.imread(self.args.side_image) self.front_image_resized = cv2.resize(self.front_image, (self.resized_height, self.resized_width)) self.side_image_resized = cv2.resize(self.side_image, (self.resized_height, self.resized_width)) self.distances = {} self.person_height = self.args.person_height self.pixel_height = self.args.pixel_height self.pose = pose.Pose( static_image_mode=True, min_detection_confidence=self.args.pose_detection_confidence, min_tracking_confidence=self.args.pose_tracking_confidence, ) self.landmarks_indices = [ LANDMARK_NAME_TO_INDEX["left_shoulder"], LANDMARK_NAME_TO_INDEX["right_shoulder"], LANDMARK_NAME_TO_INDEX["left_elbow"], LANDMARK_NAME_TO_INDEX["right_elbow"], LANDMARK_NAME_TO_INDEX["left_wrist"], LANDMARK_NAME_TO_INDEX["right_wrist"], LANDMARK_NAME_TO_INDEX["left_hip"], LANDMARK_NAME_TO_INDEX["right_hip"], LANDMARK_NAME_TO_INDEX["left_knee"], LANDMARK_NAME_TO_INDEX["right_knee"], LANDMARK_NAME_TO_INDEX["left_ankle"], LANDMARK_NAME_TO_INDEX["right_ankle"], ] def load_landmarks(self): with open(self.args.yaml_file, "r") as file: landmarks_data = yaml.safe_load(file) measurements = {} for measurement in landmarks_data["measurements"]: measurements[measurement["name"]] = [LANDMARK_NAME_TO_INDEX[l] for l in measurement["landmarks"]] return measurements def parse_args(self): parser = argparse.ArgumentParser() parser.add_argument( "--front", dest="front_image", type=str, help="Front image", ) parser.add_argument( "--side", dest="side_image", type=str, help="Side image", ) parser.add_argument( "--pose_detection_confidence", dest="pose_detection_confidence", default=0.5, type=float, help="Confidence score for pose detection", ) parser.add_argument( "--pose_tracking_confidence", dest="pose_tracking_confidence", default=0.5, type=float, help="Confidence score for pose tracking", ) parser.add_argument( "--person_height", dest="person_height", type=int, help="person height of person", ) parser.add_argument( "--pixel_height", dest="pixel_height", type=int, help="pixel height of person", ) parser.add_argument( "--measurement", dest="measurement", nargs="+", type=str, help="Type of measurement", ) parser.add_argument( "--yaml_file", dest="yaml_file", type=str, help="Path to the YAML file containing landmarks", ) return parser.parse_args() def run(self): front_results, _ = self.process_images() self.get_center_top_point(front_results) table = [] if self.args.measurement: for m in self.args.measurement: if m not in self.measurements: raise Exception("Incorrect input (input not present in config.yml)") else: distance = self.calculate_distance_betn_landmarks(front_results, m) table.append([m, distance]) else: for m in self.measurements: distance = self.calculate_distance_betn_landmarks(front_results, m) table.append([m, distance]) output = tabulate( table, headers=[ "measurement", "Distance (cm)", ], tablefmt="plain", ) print(output) self.pose.close() def process_images(self): front_results = self.pose.process( cv2.cvtColor( self.front_image_resized, cv2.COLOR_BGR2RGB, ) ) side_results = self.pose.process( cv2.cvtColor( self.side_image_resized, cv2.COLOR_BGR2RGB, ) ) self.side_image_keypoints = self.side_image_resized.copy() self.front_image_keypoints = self.front_image_resized.copy() if front_results.pose_landmarks: # type: ignore self.draw_landmarks( self.front_image_keypoints, front_results.pose_landmarks, # type: ignore self.landmarks_indices, ) if side_results.pose_landmarks: # type: ignore self.draw_landmarks( self.side_image_keypoints, side_results.pose_landmarks, # type: ignore# type: ignore self.landmarks_indices, ) return ( front_results, side_results, ) def pixel_to_metric_ratio(self): self.pixel_height = self.pixel_distance * 2 pixel_to_metric_ratio = self.person_height / self.pixel_height logging.debug( "pixel_to_metric_ratio %s", pixel_to_metric_ratio, ) return pixel_to_metric_ratio def draw_landmarks(self, image, landmarks, indices): for idx in indices: landmark = landmarks.landmark[idx] h, w, _ = image.shape cx, cy = int(landmark.x * w), int(landmark.y * h) self.circle(image, cx, cy) def circle(self, image, cx, cy): return cv2.circle( image, (cx, cy), 2, (255, 0, 0), -1, ) def calculate_distance_betn_landmarks( self, front_results, measurement_name, ): if not front_results.pose_landmarks: return landmarks = front_results.pose_landmarks.landmark landmark_names = self.measurements[measurement_name] total_distance = 0 for idx in range(len(landmark_names) - 1): _current = landmarks[landmark_names[idx]] _next = landmarks[landmark_names[idx + 1]] pixel_distance = self.euclidean_distance( _current.x * self.resized_width, _current.y * self.resized_height, _next.x * self.resized_width, _next.y * self.resized_height, ) real_distance = pixel_distance * self.pixel_to_metric_ratio() total_distance += real_distance return total_distance def euclidean_distance(self, x1, y1, x2, y2): distance = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) return distance def get_center_top_point(self, side_results): gray_image = cv2.cvtColor( self.side_image_keypoints, cv2.COLOR_BGR2GRAY, ) blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0) roi = blurred_image[ 0 : int(self.side_image_resized.shape[0] / 2), :, ] self.edges = cv2.Canny(roi, 50, 150) contours, _ = cv2.findContours( self.edges.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE, ) max_contour = max(contours, key=cv2.contourArea) rect = cv2.minAreaRect(max_contour) box = cv2.boxPoints(rect) box = sorted( list(box), key=lambda p: p[1], ) top_point = min( box[0], box[1], key=lambda p: p[0], ) left_hip = side_results.pose_landmarks.landmark[LANDMARK_NAME_TO_INDEX["left_hip"]] right_hip = side_results.pose_landmarks.landmark[LANDMARK_NAME_TO_INDEX["right_hip"]] center_x = (left_hip.x + right_hip.x) / 2 center_y = (left_hip.y + right_hip.y) / 2 center_x, center_y = ( int(center_x * self.resized_width), int(center_y * self.resized_height), ) self.pixel_distance = self.euclidean_distance( top_point[0], top_point[1], center_x, center_y, ) if __name__ == "__main__": landmarker = Landmarker() landmarker.run()