-
Notifications
You must be signed in to change notification settings - Fork 5.6k
Description
Hello everyone,
I'm having a little trouble when I try to perform pose estimation on a video (in this case, a royalty-free video of squats taken during a powerlifting competition on YouTube). As you can see, the landmarks are not aligned with the person doing the squats (even though the skeleton clearly shows that the pose estimation is being performed on that person). There is therefore a discrepancy when adding this overlay to the image.
What I am trying to do is the following:
- I load a video
- I do frame-by-frame estimation
- I write the analysed frames into a new video
Here is the Python code I wrote:
from __future__ import annotations
from typing import TYPE_CHECKING, Any
import cv2
def draw_landmarks_on_image(rgb_image, detection_result):
"""Draws the landmarks on the image.
Code copied from mediapipe's python example:
https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/pose_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Pose_Landmarker.ipynb?hl=fr#scrollTo=s3E6NFV-00Qt
"""
from mediapipe import solutions
from mediapipe.framework.formats import landmark_pb2
import numpy as np
pose_landmarks_list = detection_result.pose_landmarks
annotated_image = np.copy(rgb_image)
# Loop through the detected poses to visualize.
for idx in range(len(pose_landmarks_list)):
pose_landmarks = pose_landmarks_list[idx]
# Draw the pose landmarks.
pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
pose_landmarks_proto.landmark.extend([
landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
])
solutions.drawing_utils.draw_landmarks(
annotated_image,
pose_landmarks_proto,
solutions.pose.POSE_CONNECTIONS,
solutions.drawing_styles.get_default_pose_landmarks_style())
return annotated_image
def create_landmarker(*, model_path: Path, running_mode: Any, result_callback: Callable[[mp.tasks.vision.PoseLandmarkerResult, mp.Image, int], None] | None = None) -> Any: # type: ignore
import mediapipe as mp
BaseOptions = mp.tasks.BaseOptions
PoseLandmarker = mp.tasks.vision.PoseLandmarker
PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions
# Create a pose landmarker instance with the given running mode:
options = PoseLandmarkerOptions(
base_options=BaseOptions(model_asset_path=model_path),
running_mode=running_mode, result_callback=result_callback)
return PoseLandmarker.create_from_options(options)
def process_video(*, model_path: str, video_capture: cv2.VideoCapture, output_path: str, show: bool = False, fps: int = 0) -> None:
import mediapipe as mp
import cv2
with create_landmarker(model_path=model_path, running_mode=mp.tasks.vision.RunningMode.VIDEO) as landmarker:
default_fps = video_capture.get(cv2.CAP_PROP_FPS)
if fps < 1:
fps = int(default_fps)
if fps < 1:
raise ValueError("FPS cannot be less than 1")
ret, frame = video_capture.read()
if not ret:
raise ValueError("Unable to read first frame from video_capture")
height, width = frame.shape
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
image_index = 0
while True:
if image_index != 0:
ret, frame = video_capture.read()
if not ret:
break
mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
result = landmarker.detect_for_video(mp_image, image_index)
annotated = draw_landmarks_on_image(mp_image, result)
out.write(annotated)
if show:
cv2.imshow("Mediapipe output", annotated)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
image_index += 1
video_capture.release()
out.release()
if show:
cv2.destroyAllWindows()My guess is that there is an issue concerning the conversion of image before and after the pose estimation and / or the overlay.
BTW, I'm using a .task from an existing model shared on mediapipe website "pose_landmarker_lite.task".
If someone has an idea it would be great!
Thanks a lot.