Wrong alignement landmarks Mediapipe

<img width="420" height="450" alt="Image" src="https://github.com/user-attachments/assets/aa85159c-19c8-4134-9608-b65c62f8a1ef" />

Hello everyone,

I'm having a little trouble when I try to perform pose estimation on a video (in this case, a royalty-free video of squats taken during a powerlifting competition on YouTube). As you can see, the landmarks are not aligned with the person doing the squats (even though the skeleton clearly shows that the pose estimation is being performed on that person). There is therefore a discrepancy when adding this overlay to the image.

What I am trying to do is the following:

- I load a video
- I do frame-by-frame estimation
- I write the analysed frames into a new video

Here is the Python code I wrote:

```python

from __future__ import annotations
from typing import TYPE_CHECKING, Any
import cv2

def draw_landmarks_on_image(rgb_image, detection_result):
  """Draws the landmarks on the image.
     Code copied from mediapipe's python example:
     https://colab.research.google.com/github/googlesamples/mediapipe/blob/main/examples/pose_landmarker/python/%5BMediaPipe_Python_Tasks%5D_Pose_Landmarker.ipynb?hl=fr#scrollTo=s3E6NFV-00Qt
  """

  from mediapipe import solutions
  from mediapipe.framework.formats import landmark_pb2
  import numpy as np

  pose_landmarks_list = detection_result.pose_landmarks
  annotated_image = np.copy(rgb_image)

  # Loop through the detected poses to visualize.
  for idx in range(len(pose_landmarks_list)):
    pose_landmarks = pose_landmarks_list[idx]

    # Draw the pose landmarks.
    pose_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
    pose_landmarks_proto.landmark.extend([
      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in pose_landmarks
    ])
    solutions.drawing_utils.draw_landmarks(
      annotated_image,
      pose_landmarks_proto,
      solutions.pose.POSE_CONNECTIONS,
      solutions.drawing_styles.get_default_pose_landmarks_style())
  return annotated_image


def create_landmarker(*, model_path: Path, running_mode: Any, result_callback: Callable[[mp.tasks.vision.PoseLandmarkerResult, mp.Image, int], None] | None = None) -> Any: # type: ignore
  import mediapipe as mp

  BaseOptions = mp.tasks.BaseOptions
  PoseLandmarker = mp.tasks.vision.PoseLandmarker
  PoseLandmarkerOptions = mp.tasks.vision.PoseLandmarkerOptions

  # Create a pose landmarker instance with the given running mode:
  options = PoseLandmarkerOptions(
      base_options=BaseOptions(model_asset_path=model_path),
      running_mode=running_mode, result_callback=result_callback)

  return PoseLandmarker.create_from_options(options)


def process_video(*, model_path: str, video_capture: cv2.VideoCapture, output_path: str, show: bool = False, fps: int = 0) -> None:

    import mediapipe as mp
    import cv2

    with create_landmarker(model_path=model_path, running_mode=mp.tasks.vision.RunningMode.VIDEO) as landmarker:
        default_fps = video_capture.get(cv2.CAP_PROP_FPS)
        if fps < 1:
            fps = int(default_fps)
        if fps < 1:
            raise ValueError("FPS cannot be less than 1")

        ret, frame = video_capture.read()
        if not ret:
            raise ValueError("Unable to read first frame from video_capture")

        height, width = frame.shape
        out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))

        image_index = 0
        while True:
            if image_index != 0:
                ret, frame = video_capture.read()
                if not ret:
                    break
            
            mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=frame)
            result = landmarker.detect_for_video(mp_image, image_index)
            annotated = draw_landmarks_on_image(mp_image, result)

            out.write(annotated)

            if show:
                cv2.imshow("Mediapipe output", annotated)
                if cv2.waitKey(1) & 0xFF == ord('q'):
                    break

            image_index += 1

        video_capture.release()
        out.release()
        if show:
            cv2.destroyAllWindows()
```

My guess is that there is an issue concerning the conversion of image before and after the pose estimation and / or the overlay.

BTW, I'm using a .task from an existing model shared on mediapipe website "pose_landmarker_lite.task".

If someone has an idea it would be great!

Thanks a lot.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Wrong alignement landmarks Mediapipe #6140

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Wrong alignement landmarks Mediapipe #6140

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions