aubome/landmarks.py

299 lines
11 KiB
Python

import warnings
import os
from tabulate import tabulate
import math
import argparse
import cv2
from mediapipe.python.solutions import pose
import logging
warnings.filterwarnings("ignore",
category=UserWarning,
module="google.protobuf")
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
class Landmarker:
resized_height = 256
resized_width = 300
def __init__(self) -> None:
args = self.parse_args()
if args.front_image == None:
raise Exception("front image needs to be passed")
if args.side_image == None:
raise Exception("side image needs to be passed")
self.front_image = cv2.imread(args.front_image)
self.side_image = cv2.imread(args.side_image)
self.front_image_resized = cv2.resize(
self.front_image, (self.resized_height, self.resized_width))
self.side_image_resized = cv2.resize(
self.side_image, (self.resized_height, self.resized_width))
self.distances = {}
self.person_height = args.person_height
self.pixel_height = args.pixel_height
self.pose = pose.Pose(
static_image_mode=True,
min_detection_confidence=args.pose_detection_confidence,
min_tracking_confidence=args.pose_tracking_confidence,
)
self.landmarks_to_calculate = []
self.landmarks_indices = [
pose.PoseLandmark.LEFT_SHOULDER.value,
pose.PoseLandmark.RIGHT_SHOULDER.value,
pose.PoseLandmark.LEFT_ELBOW.value,
pose.PoseLandmark.RIGHT_ELBOW.value,
pose.PoseLandmark.LEFT_WRIST.value,
pose.PoseLandmark.RIGHT_WRIST.value,
pose.PoseLandmark.LEFT_HIP.value,
pose.PoseLandmark.RIGHT_HIP.value,
pose.PoseLandmark.LEFT_KNEE.value,
pose.PoseLandmark.RIGHT_KNEE.value,
pose.PoseLandmark.LEFT_ANKLE.value,
pose.PoseLandmark.RIGHT_ANKLE.value,
]
def parse_args(self):
parser = argparse.ArgumentParser()
parser.add_argument("--front",
dest="front_image",
type=str,
help="Front image")
parser.add_argument("--side",
dest="side_image",
type=str,
help="Side image")
parser.add_argument(
"--pose_detection_confidence",
dest="pose_detection_confidence",
default=0.5,
type=float,
help="Confidence score for pose detection",
)
parser.add_argument(
"--pose_tracking_confidence",
dest="pose_tracking_confidence",
default=0.5,
type=float,
help="Confidence score for pose tracking",
)
parser.add_argument(
"--person_height",
# default=153,
dest="person_height",
type=int,
help="person height of person",
)
parser.add_argument(
"--pixel_height",
# default=216,
dest="pixel_height",
type=int,
help="pixel height of person",
)
return parser.parse_args()
def run(self):
logging.warning("person's height: %s", self.person_height)
logging.warning("person's pixel height: %s", self.pixel_height)
front_results, side_results = self.process_images()
self.get_center_top_point(front_results)
self.calculate_distance_betn_landmarks(front_results)
self.output()
self.display_images()
self.pose.close()
def process_images(self):
front_results = self.pose.process(
cv2.cvtColor(self.front_image_resized, cv2.COLOR_BGR2RGB))
side_results = self.pose.process(
cv2.cvtColor(self.side_image_resized, cv2.COLOR_BGR2RGB))
self.side_image_keypoints = self.side_image_resized.copy()
self.front_image_keypoints = self.front_image_resized.copy()
if front_results.pose_landmarks: # type: ignore
self.draw_landmarks(
self.front_image_keypoints,
front_results.pose_landmarks, # type: ignore
self.landmarks_indices,
)
if side_results.pose_landmarks: # type: ignore
self.draw_landmarks(
self.side_image_keypoints,
side_results.pose_landmarks, # type: ignore# type: ignore
self.landmarks_indices,
)
return front_results, side_results
def pixel_to_metric_ratio(self):
self.pixel_height = self.pixel_distance * 2
pixel_to_metric_ratio = self.person_height / self.pixel_height
logging.warning("pixel_to_metric_ratio %s", pixel_to_metric_ratio)
return pixel_to_metric_ratio
def draw_landmarks(self, image, landmarks, indices):
for idx in indices:
landmark = landmarks.landmark[idx]
h, w, _ = image.shape
cx, cy = int(landmark.x * w), int(landmark.y * h)
self.circle(image, cx, cy)
def circle(self, image, cx, cy):
return cv2.circle(image, (cx, cy), 2, (255, 0, 0), -1)
def output(self):
table = []
for landmark, distance in self.distances.items():
table.append([landmark.replace("_", " "), distance])
output = tabulate(table,
headers=["measurement", "value"],
tablefmt="grid")
print(output)
def calculate_distance_betn_landmarks(self, front_results, landmarks=[]):
if not front_results.pose_landmarks:
return
landmarks = front_results.pose_landmarks.landmark
leg_landmarks = [
pose.PoseLandmark.LEFT_HIP,
pose.PoseLandmark.LEFT_KNEE,
pose.PoseLandmark.LEFT_ANKLE,
]
hand_landmarks = [
pose.PoseLandmark.LEFT_SHOULDER,
pose.PoseLandmark.LEFT_ELBOW,
pose.PoseLandmark.LEFT_WRIST,
]
self.landmarks_to_calculate = leg_landmarks + hand_landmarks
# self.landmarks_to_calculate = [
# pose.PoseLandmark.LEFT_SHOULDER,
# pose.PoseLandmark.LEFT_ELBOW,
# pose.PoseLandmark.LEFT_WRIST,
# ]
table = []
for idx, l in enumerate(self.landmarks_to_calculate):
if idx < len(self.landmarks_to_calculate) - 1:
_current = landmarks[l.value]
_nextl = self.landmarks_to_calculate[idx + 1]
_next = landmarks[_nextl.value]
pixel_distance = self.euclidean_distance(
_current.x * self.resized_width,
_current.y * self.resized_height,
_next.x * self.resized_width,
_next.y * self.resized_height)
real_distance = pixel_distance * self.pixel_to_metric_ratio()
table.append([l.name, _nextl.name, real_distance])
output = tabulate(
table,
headers=["Landmark 1", "Landmark 2", "Distance (cm)"],
tablefmt="grid")
print(output)
# for l in self.landmarks_to_calculate:
# real_distance = 0
# for idx, l in enumerate(self.landmarks_to_calculate):
# if idx < len(self.landmarks_to_calculate) - 1:
# _current = landmarks[l.value]
# _nextl = self.landmarks_to_calculate[idx + 1]
# _next = landmarks[_nextl.value]
# pixel_distance = self.euclidean_distance(
# _current.x * self.resized_width,
# _current.y * self.resized_height,
# _next.x * self.resized_width,
# _next.y * self.resized_height,
# )
# real_distance += pixel_distance * self.pixel_to_metric_ratio(
# )
# print(real_distance)
# self.distances[l.name] = real_distance
#
def euclidean_distance(self, x1, y1, x2, y2):
distance = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
return distance
def destroy(self):
cv2.destroyAllWindows()
def display_images(self):
cv2.imshow("front_image_keypoints", self.front_image_keypoints)
cv2.imshow("side_image_keypoints", self.side_image_keypoints)
cv2.imshow("edges", self.edges)
cv2.waitKey(0)
def get_center_top_point(self, side_results):
gray_image = cv2.cvtColor(self.side_image_keypoints,
cv2.COLOR_BGR2GRAY)
blurred_image = cv2.GaussianBlur(gray_image, (5, 5), 0)
roi = blurred_image[0:int(self.side_image_resized.shape[0] / 2), :]
self.edges = cv2.Canny(roi, 50, 150)
contours, _ = cv2.findContours(self.edges, cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)
xt, yt = None, None
self.topmost_point = None
if contours:
largest_contour = max(contours, key=cv2.contourArea)
self.topmost_point = tuple(
largest_contour[largest_contour[:, :, 1].argmin()][0])
xt, yt = self.topmost_point
self.circle(self.side_image_keypoints, xt, yt)
logging.warning("xt: %s", xt)
logging.warning("yt: %s", yt)
xc, yc = None, None
landmarks = side_results.pose_landmarks.landmark
if side_results.pose_landmarks:
left_hip = landmarks[pose.PoseLandmark.LEFT_HIP.value]
right_hip = landmarks[pose.PoseLandmark.RIGHT_HIP.value]
center_point = (
(left_hip.x + right_hip.x) / 2,
(left_hip.y + right_hip.y) / 2,
)
center_point = (
int(center_point[0] * self.side_image_resized.shape[1]),
int(center_point[1] * self.side_image_resized.shape[0]),
)
xc, yc = center_point
logging.warning("xc: %s", xc)
logging.warning("yc: %s", yc)
self.circle(self.side_image_keypoints, xc, yc)
self.pixel_distance = self.euclidean_distance(xc, yc, xt, yt)
logging.warning("top_center_pixel_distance: %s",
self.pixel_distance)
self.pixel_height = self.pixel_distance * 2
logging.warning("pxl height: %s ", self.pixel_height)
self.distance = (self.euclidean_distance(xc, yc, xt, yt) *
self.pixel_to_metric_ratio())
return self.distance
l = Landmarker()
try:
l.run()
except:
print("error")
finally:
l.destroy()