diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9caab61 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +**/__pycache__/ + +*.pyc \ No newline at end of file diff --git a/keypoint_demo.py b/keypoint_demo.py new file mode 100644 index 0000000..3a9ea2e --- /dev/null +++ b/keypoint_demo.py @@ -0,0 +1,119 @@ +# keypoint_demo.py +import cv2 +import numpy as np +from mmpose.apis import MMPoseInferencer + +def draw_keypoints(frame, keypoints, keypoint_scores, threshold=0.3): + """Draw COCO 17-keypoint skeleton on frame.""" + + # COCO 17 skeleton connections (joint pairs) + SKELETON = [ + (0, 1), (0, 2), # nose to eyes + (1, 3), (2, 4), # eyes to ears + (5, 6), # shoulders + (5, 7), (7, 9), # left arm + (6, 8), (8, 10), # right arm + (5, 11), (6, 12), # shoulders to hips + (11, 12), # hips + (11, 13), (13, 15), # left leg + (12, 14), (14, 16), # right leg + ] + + KEYPOINT_NAMES = [ + 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', + 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', + 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', + 'left_knee', 'right_knee', 'left_ankle', 'right_ankle' + ] + + h, w = frame.shape[:2] + + for person_kps, person_scores in zip(keypoints, keypoint_scores): + # Draw skeleton lines + for (j1, j2) in SKELETON: + if person_scores[j1] > threshold and person_scores[j2] > threshold: + pt1 = (int(person_kps[j1][0]), int(person_kps[j1][1])) + pt2 = (int(person_kps[j2][0]), int(person_kps[j2][1])) + # Clamp to frame bounds + if (0 <= pt1[0] < w and 0 <= pt1[1] < h and + 0 <= pt2[0] < w and 0 <= pt2[1] < h): + cv2.line(frame, pt1, pt2, (0, 255, 0), 2) + + # Draw keypoints + for idx, (kp, score) in enumerate(zip(person_kps, person_scores)): + if score > threshold: + x, y = int(kp[0]), int(kp[1]) + if 0 <= x < w and 0 <= y < h: + # Color by confidence: green=high, yellow=medium + color = (0, 255, 0) if score > 0.7 else (0, 200, 200) + cv2.circle(frame, (x, y), 4, color, -1) + cv2.circle(frame, (x, y), 5, (255, 255, 255), 1) # white border + + return frame + + +def main(): + # Initialize inferencer with RTMPose + # det_model: person detector + # pose2d: pose estimator + inferencer = MMPoseInferencer( + pose2d='human', # uses the built-in alias — downloads everything automatically + device='cuda:0' + ) + + cap = cv2.VideoCapture(0) # 0 = default webcam; use 1, 2, etc. for others + cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280) + cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) + + print("Press 'q' to quit, 's' to save a frame") + + while True: + ret, frame = cap.read() + if not ret: + break + + # Run inference + # scope='mmpose' is important to avoid namespace issues + result_generator = inferencer( + frame, + show=False, # we handle display ourselves + return_datasamples=False + ) + results = next(result_generator) + + # Extract keypoints from results + predictions = results.get('predictions', [[]])[0] + + all_keypoints = [] + all_scores = [] + + for pred in predictions: + kps = pred.get('keypoints', []) + scores = pred.get('keypoint_scores', []) + if len(kps) > 0: + all_keypoints.append(np.array(kps)) + all_scores.append(np.array(scores)) + + # Draw on frame + if all_keypoints: + frame = draw_keypoints(frame, all_keypoints, all_scores) + + # Show FPS + cv2.putText(frame, f'Subjects: {len(all_keypoints)}', + (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) + + cv2.imshow('MMPose RTMPose Keypoints', frame) + + key = cv2.waitKey(1) & 0xFF + if key == ord('q'): + break + elif key == ord('s'): + cv2.imwrite('keypoint_capture.jpg', frame) + print("Frame saved.") + + cap.release() + cv2.destroyAllWindows() + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/keypoint_pose/keypoint_pose/__init__.py b/keypoint_pose/keypoint_pose/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/keypoint_pose/keypoint_pose/keypoint_node.py b/keypoint_pose/keypoint_pose/keypoint_node.py new file mode 100644 index 0000000..545d158 --- /dev/null +++ b/keypoint_pose/keypoint_pose/keypoint_node.py @@ -0,0 +1,213 @@ +""" +ROS 2 node that subscribes to rectified stereo images, runs MMPose +keypoint detection, and displays the results in OpenCV windows. +""" + +import cv2 +import numpy as np +import rclpy +from rclpy.node import Node +from sensor_msgs.msg import Image +from cv_bridge import CvBridge +from mmpose.apis import MMPoseInferencer + + +# ── COCO-17 drawing helpers ───────────────────────────────────────────── + +SKELETON = [ + (0, 1), (0, 2), # nose → eyes + (1, 3), (2, 4), # eyes → ears + (5, 6), # shoulders + (5, 7), (7, 9), # left arm + (6, 8), (8, 10), # right arm + (5, 11), (6, 12), # shoulders → hips + (11, 12), # hips + (11, 13), (13, 15), # left leg + (12, 14), (14, 16), # right leg +] + +KEYPOINT_NAMES = [ + 'nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', + 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', + 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', + 'left_knee', 'right_knee', 'left_ankle', 'right_ankle', +] + + +def draw_keypoints(frame, keypoints, keypoint_scores, threshold=0.3): + """Draw COCO 17-keypoint skeleton on *frame* (in-place).""" + h, w = frame.shape[:2] + + for person_kps, person_scores in zip(keypoints, keypoint_scores): + # Skeleton lines + for j1, j2 in SKELETON: + if person_scores[j1] > threshold and person_scores[j2] > threshold: + pt1 = (int(person_kps[j1][0]), int(person_kps[j1][1])) + pt2 = (int(person_kps[j2][0]), int(person_kps[j2][1])) + if (0 <= pt1[0] < w and 0 <= pt1[1] < h and + 0 <= pt2[0] < w and 0 <= pt2[1] < h): + cv2.line(frame, pt1, pt2, (0, 255, 0), 2) + + # Joint circles + for idx, (kp, score) in enumerate(zip(person_kps, person_scores)): + if score > threshold: + x, y = int(kp[0]), int(kp[1]) + if 0 <= x < w and 0 <= y < h: + color = (0, 255, 0) if score > 0.7 else (0, 200, 200) + cv2.circle(frame, (x, y), 4, color, -1) + cv2.circle(frame, (x, y), 5, (255, 255, 255), 1) + + return frame + + +# ── ROS 2 Node ─────────────────────────────────────────────────────────── + +class KeypointNode(Node): + """Subscribe to stereo rectified images, run MMPose, and display.""" + + def __init__(self): + super().__init__('keypoint_node') + + # ── Parameters ────────────────────────────────────────────────── + self.declare_parameter('threshold', 0.3) + self.declare_parameter('device', 'cuda:0') + + threshold = self.get_parameter('threshold').value + device = self.get_parameter('device').value + self._threshold = threshold + + # ── MMPose (single shared instance) ───────────────────────────── + self.get_logger().info( + f'Loading MMPose model on {device} (this may take a moment)…' + ) + self._inferencer = MMPoseInferencer( + pose2d='human', + device=device, + ) + self.get_logger().info('MMPose model loaded.') + + # ── cv_bridge ─────────────────────────────────────────────────── + self._bridge = CvBridge() + + # Latest frames (written by subscribers, read by display timer) + self._left_frame: np.ndarray | None = None + self._right_frame: np.ndarray | None = None + + # ── Subscribers (independent — no time-sync needed) ───────────── + self.create_subscription( + Image, + '/stereo/left/image_rect', + self._left_cb, + 10, + ) + self.create_subscription( + Image, + '/stereo/right/image_rect', + self._right_cb, + 10, + ) + + # ── Timer for cv2.waitKey (keeps GUI responsive) ──────────────── + # ~30 Hz is plenty for display refresh + self.create_timer(1.0 / 30.0, self._display_timer_cb) + + self.get_logger().info( + 'Subscribed to /stereo/left/image_rect and ' + '/stereo/right/image_rect — waiting for images…' + ) + + # ── Subscriber callbacks ──────────────────────────────────────────── + + def _process_frame(self, msg: Image, label: str) -> np.ndarray: + """Convert ROS Image -> OpenCV, run MMPose, draw keypoints.""" + # Log image metadata once to help diagnose blank-frame issues + if not hasattr(self, f'_logged_{label}'): + self.get_logger().info( + f'[{label}] First frame: encoding={msg.encoding} ' + f'size={msg.width}x{msg.height} step={msg.step}' + ) + setattr(self, f'_logged_{label}', True) + + # Use passthrough first so we can inspect the raw data + frame = self._bridge.imgmsg_to_cv2(msg, desired_encoding='passthrough') + + if not hasattr(self, f'_logged_px_{label}'): + self.get_logger().info( + f'[{label}] Frame dtype={frame.dtype} shape={frame.shape} ' + f'min={frame.min()} max={frame.max()}' + ) + setattr(self, f'_logged_px_{label}', True) + + # Convert grayscale to BGR so MMPose and drawing work correctly + if len(frame.shape) == 2 or frame.shape[2] == 1: + frame = cv2.cvtColor(frame, cv2.COLOR_GRAY2BGR) + elif frame.shape[2] == 4: + frame = cv2.cvtColor(frame, cv2.COLOR_BGRA2BGR) + + result = next(self._inferencer( + frame, + show=False, + return_datasamples=False, + )) + + predictions = result.get('predictions', [[]])[0] + all_kps, all_scores = [], [] + for pred in predictions: + kps = pred.get('keypoints', []) + scores = pred.get('keypoint_scores', []) + if len(kps) > 0: + all_kps.append(np.array(kps)) + all_scores.append(np.array(scores)) + + if all_kps: + draw_keypoints(frame, all_kps, all_scores, self._threshold) + + cv2.putText( + frame, + f'Subjects: {len(all_kps)}', + (10, 30), + cv2.FONT_HERSHEY_SIMPLEX, + 0.8, + (255, 255, 255), + 2, + ) + return frame + + def _left_cb(self, msg: Image) -> None: + self._left_frame = self._process_frame(msg, 'left') + + def _right_cb(self, msg: Image) -> None: + self._right_frame = self._process_frame(msg, 'right') + + # ── Display timer ─────────────────────────────────────────────────── + + def _display_timer_cb(self) -> None: + if self._left_frame is not None: + cv2.imshow('Left - MMPose', self._left_frame) + if self._right_frame is not None: + cv2.imshow('Right - MMPose', self._right_frame) + + key = cv2.waitKey(1) & 0xFF + if key == ord('q'): + self.get_logger().info('Quit requested — shutting down.') + self.destroy_node() + rclpy.shutdown() + + +# ── Entry point ────────────────────────────────────────────────────────── + +def main(args=None): + rclpy.init(args=args) + node = KeypointNode() + try: + rclpy.spin(node) + except KeyboardInterrupt: + pass + finally: + cv2.destroyAllWindows() + node.destroy_node() + rclpy.try_shutdown() + + +if __name__ == '__main__': + main() diff --git a/keypoint_pose/package.xml b/keypoint_pose/package.xml new file mode 100644 index 0000000..deb7875 --- /dev/null +++ b/keypoint_pose/package.xml @@ -0,0 +1,22 @@ + + + + keypoint_pose + 0.0.0 + MMPose keypoint detection on stereo rectified images + sentry + TODO: License declaration + + rclpy + sensor_msgs + cv_bridge + + ament_copyright + ament_flake8 + ament_pep257 + python3-pytest + + + ament_python + + diff --git a/keypoint_pose/resource/keypoint_pose b/keypoint_pose/resource/keypoint_pose new file mode 100644 index 0000000..e69de29 diff --git a/keypoint_pose/setup.cfg b/keypoint_pose/setup.cfg new file mode 100644 index 0000000..9fe19aa --- /dev/null +++ b/keypoint_pose/setup.cfg @@ -0,0 +1,4 @@ +[develop] +script_dir=$base/lib/keypoint_pose +[install] +install_scripts=$base/lib/keypoint_pose diff --git a/keypoint_pose/setup.py b/keypoint_pose/setup.py new file mode 100644 index 0000000..4ef54e5 --- /dev/null +++ b/keypoint_pose/setup.py @@ -0,0 +1,30 @@ +from setuptools import find_packages, setup + +package_name = 'keypoint_pose' + +setup( + name=package_name, + version='0.0.0', + packages=find_packages(exclude=['test']), + data_files=[ + ('share/ament_index/resource_index/packages', + ['resource/' + package_name]), + ('share/' + package_name, ['package.xml']), + ], + install_requires=['setuptools'], + zip_safe=True, + maintainer='sentry', + maintainer_email='pulipakaa24@outlook.com', + description='MMPose keypoint detection on stereo rectified images', + license='TODO: License declaration', + extras_require={ + 'test': [ + 'pytest', + ], + }, + entry_points={ + 'console_scripts': [ + 'keypoint_node = keypoint_pose.keypoint_node:main', + ], + }, +) diff --git a/keypoint_pose/test/test_copyright.py b/keypoint_pose/test/test_copyright.py new file mode 100644 index 0000000..97a3919 --- /dev/null +++ b/keypoint_pose/test/test_copyright.py @@ -0,0 +1,25 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_copyright.main import main +import pytest + + +# Remove the `skip` decorator once the source file(s) have a copyright header +@pytest.mark.skip(reason='No copyright header has been placed in the generated source file.') +@pytest.mark.copyright +@pytest.mark.linter +def test_copyright(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found errors' diff --git a/keypoint_pose/test/test_flake8.py b/keypoint_pose/test/test_flake8.py new file mode 100644 index 0000000..27ee107 --- /dev/null +++ b/keypoint_pose/test/test_flake8.py @@ -0,0 +1,25 @@ +# Copyright 2017 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_flake8.main import main_with_errors +import pytest + + +@pytest.mark.flake8 +@pytest.mark.linter +def test_flake8(): + rc, errors = main_with_errors(argv=[]) + assert rc == 0, \ + 'Found %d code style errors / warnings:\n' % len(errors) + \ + '\n'.join(errors) diff --git a/keypoint_pose/test/test_pep257.py b/keypoint_pose/test/test_pep257.py new file mode 100644 index 0000000..b234a38 --- /dev/null +++ b/keypoint_pose/test/test_pep257.py @@ -0,0 +1,23 @@ +# Copyright 2015 Open Source Robotics Foundation, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from ament_pep257.main import main +import pytest + + +@pytest.mark.linter +@pytest.mark.pep257 +def test_pep257(): + rc = main(argv=['.', 'test']) + assert rc == 0, 'Found code style errors / warnings' diff --git a/start_camera.launch.py b/start_camera.launch.py new file mode 100644 index 0000000..51705d9 --- /dev/null +++ b/start_camera.launch.py @@ -0,0 +1,208 @@ +import os +from ament_index_python.packages import get_package_share_directory +from launch import LaunchDescription +from launch_ros.actions import ComposableNodeContainer, Node +from launch_ros.descriptions import ComposableNode + +# From perspective of cameras +LEFT_CAMERA_NAME = 'left' +RIGHT_CAMERA_NAME = 'right' + + +def generate_launch_description(): + config_path = os.path.join( + get_package_share_directory('spinnaker_camera_driver'), + 'config', + 'blackfly_s.yaml' + ) + + # ── Container 1: Camera Drivers (component_container_mt) ───────────── + # + # Both cameras share a single process because the FLIR Spinnaker SDK + # requires USB3 cameras to negotiate bandwidth within one process. + # Using _mt so both capture callbacks can fire on separate threads. + # + # Topics published (per camera): + # /stereo/{left,right}/image_raw (sensor_msgs/Image) + # /stereo/{left,right}/camera_info (sensor_msgs/CameraInfo) + camera_container = ComposableNodeContainer( + name='camera_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + # ── Left Blackfly S (serial 25282106) ── + # namespace='stereo', name='left' → node name creates sub-namespace + # publishes: /stereo/left/image_raw, /stereo/left/camera_info + ComposableNode( + package='spinnaker_camera_driver', + plugin='spinnaker_camera_driver::CameraDriver', + name=LEFT_CAMERA_NAME, + namespace='stereo', + parameters=[{ + 'parameter_file': config_path, + 'serial_number': '25282106', + 'camera_info_url': 'file:///home/sentry/camera_ws/stereoCal/left.yaml', + }], + extra_arguments=[{'use_intra_process_comms': True}], + ), + # ── Right Blackfly S (serial 25235293) ── + # namespace='stereo', name='right' → node name creates sub-namespace + # publishes: /stereo/right/image_raw, /stereo/right/camera_info + ComposableNode( + package='spinnaker_camera_driver', + plugin='spinnaker_camera_driver::CameraDriver', + name=RIGHT_CAMERA_NAME, + namespace='stereo', + parameters=[{ + 'parameter_file': config_path, + 'serial_number': '25235293', + 'camera_info_url': 'file:///home/sentry/camera_ws/stereoCal/right.yaml', + }], + extra_arguments=[{'use_intra_process_comms': True}], + ), + ], + output='screen', + ) + + # ── Container 2: Image Processing (component_container_mt) ─────────── + # + # Debayer + Rectify for both cameras. Grouped together so that + # debayer→rectify is zero-copy (intra-process) per side, while _mt + # lets left and right pipelines run concurrently on different threads. + # + # Per-side data flow: + # image_raw → DebayerNode → image_mono → RectifyNode → image_rect + # ↘ image_color (available for other consumers) + image_proc_container = ComposableNodeContainer( + name='image_proc_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + # ── Left Debayer ── + # Subscribes: /stereo/left/image_raw (default topic, matches camera) + # Publishes: /stereo/left/image_mono (grayscale debayered) + # /stereo/left/image_color (color debayered) + ComposableNode( + package='image_proc', + plugin='image_proc::DebayerNode', + name='debayer', + namespace='stereo/left', + extra_arguments=[{'use_intra_process_comms': True}], + ), + # ── Left Rectify ── + # Remap: 'image' → 'image_mono' so it subscribes to debayer output. + # image_transport auto-derives camera_info from the image topic's + # namespace: /stereo/left/image_mono → /stereo/left/camera_info ✓ + # Publishes: /stereo/left/image_rect + ComposableNode( + package='image_proc', + plugin='image_proc::RectifyNode', + name='rectify_mono', + namespace='stereo/left', + remappings=[('image', 'image_mono')], + extra_arguments=[{'use_intra_process_comms': True}], + ), + # ── Right Debayer ── + # Subscribes: /stereo/right/image_raw + # Publishes: /stereo/right/image_mono, /stereo/right/image_color + ComposableNode( + package='image_proc', + plugin='image_proc::DebayerNode', + name='debayer', + namespace='stereo/right', + extra_arguments=[{'use_intra_process_comms': True}], + ), + # ── Right Rectify ── + # Remap: 'image' → 'image_mono' + # Subscribes: /stereo/right/image_mono + /stereo/right/camera_info + # Publishes: /stereo/right/image_rect + ComposableNode( + package='image_proc', + plugin='image_proc::RectifyNode', + name='rectify_mono', + namespace='stereo/right', + remappings=[('image', 'image_mono')], + extra_arguments=[{'use_intra_process_comms': True}], + ), + ], + output='screen', + ) + + # ── Container 3: Stereo Processing (component_container_mt) ────────── + # + # DisparityNode + PointCloudNode isolated from image processing so the + # heavy stereo matching doesn't starve debayer/rectify callbacks. + # Zero-copy between disparity → point_cloud within this container. + # + # Data flow: + # left/image_rect + right/image_rect → DisparityNode → disparity + # disparity + left/image_rect → PointCloudNode → points2 + stereo_proc_container = ComposableNodeContainer( + name='stereo_proc_container', + namespace='', + package='rclcpp_components', + executable='component_container_mt', + composable_node_descriptions=[ + # ── Disparity ── + # Subscribes: /stereo/left/image_rect (from left RectifyNode) + # /stereo/left/camera_info (from left CameraDriver) + # /stereo/right/image_rect (from right RectifyNode) + # /stereo/right/camera_info (from right CameraDriver) + # Publishes: /stereo/disparity (stereo_msgs/DisparityImage) + ComposableNode( + package='stereo_image_proc', + plugin='stereo_image_proc::DisparityNode', + name='disparity_node', + namespace='stereo', + parameters=[{'approximate_sync': True}], + extra_arguments=[{'use_intra_process_comms': True}], + ), + # ── Point Cloud ── + # Remap: left/image_rect_color → left/image_rect because + # RectifyNode publishes 'image_rect', not 'image_rect_color'. + # Subscribes: /stereo/left/image_rect (remapped from left/image_rect_color) + # /stereo/left/camera_info + # /stereo/right/camera_info + # /stereo/disparity (from DisparityNode — zero-copy) + # Publishes: /stereo/points2 (sensor_msgs/PointCloud2) + ComposableNode( + package='stereo_image_proc', + plugin='stereo_image_proc::PointCloudNode', + name='point_cloud_node', + namespace='stereo', + parameters=[{'approximate_sync': True}], + remappings=[ + ('left/image_rect_color', 'left/image_rect'), + ('right/image_rect_color', 'right/image_rect'), + ], + extra_arguments=[{'use_intra_process_comms': True}], + ), + ], + output='screen', + ) + + # ── Static TF: world → left ────────────────────────────────────────── + # + # PointCloudNode publishes points2 with frame_id="left" (the left + # camera's node name). RViz needs a TF path from its fixed frame to + # "left" — this identity transform provides it. The left↔right + # extrinsics are encoded in the camera_info P matrices, not in TF. + tf_publisher = Node( + package='tf2_ros', + executable='static_transform_publisher', + arguments=[ + '--x', '0', '--y', '0', '--z', '0', + '--yaw', '0', '--pitch', '0', '--roll', '0', + '--frame-id', 'world', + '--child-frame-id', 'left', + ], + ) + + return LaunchDescription([ + camera_container, + image_proc_container, + stereo_proc_container, + tf_publisher, + ])