reid_stuff

2026-03-15 19:58:52 -05:00
parent 5c7b26c94a
commit 53999d6023
9 changed files with 884 additions and 0 deletions
--- a/tracking_re_id/launch/reid_headless.launch.py
+++ b/tracking_re_id/launch/reid_headless.launch.py
@@ -0,0 +1,51 @@
+"""Reid pipeline in headless mode (no cv2 display windows).
+
+To view output:
+  ros2 run rqt_image_view rqt_image_view   →  select /reid/annotated
+  rviz2  →  add MarkerArray on /reid/track_markers and /keypoint_markers
+"""
+
+import os
+from launch import LaunchDescription
+from launch.actions import ExecuteProcess
+
+
+def generate_launch_description():
+    python_exe = os.path.expanduser('~/miniconda3/envs/mmpose/bin/python3')
+    keyreID_path = os.path.expanduser('~/KeyRe-ID')
+
+    return LaunchDescription([
+
+        ExecuteProcess(
+            cmd=[
+                python_exe, '-m', 'tracking_re_id.single_person_loc_node',
+                '--ros-args',
+                '-p', 'threshold:=0.3',
+                '-p', 'device:=cuda:0',
+                '-p', 'max_residual:=0.10',
+                '-p', 'headless:=true',
+            ],
+            output='screen',
+            env={**os.environ},
+        ),
+
+        ExecuteProcess(
+            cmd=[
+                python_exe, '-m', 'tracking_re_id.reid_node',
+                '--ros-args',
+                '-p', f'keyreID_path:={keyreID_path}',
+                '-p', 'num_classes:=150',
+                '-p', 'camera_num:=2',
+                '-p', 'device:=cuda:0',
+                '-p', 'seq_len:=4',
+                '-p', 'kp_threshold:=0.3',
+                '-p', 'match_threshold:=0.65',
+                '-p', 'track_dist_px:=120.0',
+                '-p', 'track_timeout:=3.0',
+                '-p', 'headless:=true',
+            ],
+            output='screen',
+            env={**os.environ},
+        ),
+
+    ])
--- a/tracking_re_id/launch/reid_pipeline.launch.py
+++ b/tracking_re_id/launch/reid_pipeline.launch.py
@@ -0,0 +1,70 @@
+"""Launch the KeyRe-ID re-identification pipeline alongside the existing
+stereo triangulation pipeline.
+
+Nodes started
+─────────────
+  1. single_person_loc_node  (unchanged – stereo 3-D triangulation)
+       publishes: /keypoint_markers  (MarkerArray)
+                  /keypoints_3d      (PointCloud2)
+
+  2. reid_node  (self-contained – left-camera MMPose + KeyRe-ID)
+       publishes: /reid/annotated      (Image)
+                  /reid/track_markers  (MarkerArray)
+
+The two nodes are independent: reid_node runs its own MMPose instance on
+the left camera only and does not depend on single_person_loc_node output.
+Run them together to get both 3-D triangulation and persistent person IDs,
+or launch reid_node on its own if only re-identification is needed.
+
+Viewing the output
+──────────────────
+  ros2 run rqt_image_view rqt_image_view  →  /reid/annotated
+  rviz2  →  add MarkerArray /reid/track_markers and /keypoint_markers
+"""
+
+import os
+from launch import LaunchDescription
+from launch.actions import ExecuteProcess
+
+
+def generate_launch_description():
+    python_exe = os.path.expanduser('~/miniconda3/envs/mmpose/bin/python3')
+    keyreID_path = os.path.expanduser('~/KeyRe-ID')
+
+    return LaunchDescription([
+
+        # # ── 1. Stereo keypoint triangulator (3-D, unchanged) ─────────────────
+        # ExecuteProcess(
+        #     cmd=[
+        #         python_exe, '-m', 'tracking_re_id.single_person_loc_node',
+        #         '--ros-args',
+        #         '-p', 'threshold:=0.3',
+        #         '-p', 'device:=cuda:0',
+        #         '-p', 'max_residual:=0.10',
+        #         '-p', 'headless:=true',
+        #     ],
+        #     output='screen',
+        #     env={**os.environ},
+        # ),
+
+        # ── 2. KeyRe-ID re-identification (self-contained) ───────────────────
+        ExecuteProcess(
+            cmd=[
+                python_exe, '-m', 'tracking_re_id.reid_node',
+                '--ros-args',
+                '-p', f'keyreID_path:={keyreID_path}',
+                '-p', 'num_classes:=150',
+                '-p', 'camera_num:=2',
+                '-p', 'device:=cuda:0',
+                '-p', 'seq_len:=4',
+                '-p', 'kp_threshold:=0.3',
+                '-p', 'match_threshold:=0.65',
+                '-p', 'track_dist_px:=120.0',
+                '-p', 'track_timeout:=3.0',
+                '-p', 'headless:=false',
+            ],
+            output='screen',
+            env={**os.environ},
+        ),
+
+    ])
--- a/tracking_re_id/setup.py
+++ b/tracking_re_id/setup.py
@@ -13,6 +13,7 @@ setup(
            ['resource/' + package_name]),
        ('share/' + package_name, ['package.xml']),
        (os.path.join('share', package_name, 'launch'), glob('launch/*.py')),
+        (os.path.join('share', package_name, 'weights'), glob('weights/*.pth')),
    ],
    install_requires=['setuptools'],
    zip_safe=True,
@@ -30,6 +31,7 @@ setup(
            'single_person_loc_node = tracking_re_id.single_person_loc_node:main',
            'ground_plane_node = tracking_re_id.ground_plane_node:main',
            'overlay_node = tracking_re_id.overlay_node:main',
+            'reid_node = tracking_re_id.reid_node:main',
        ],
    },
 )
--- a/tracking_re_id/tracking_re_id/reid_node.py
+++ b/tracking_re_id/tracking_re_id/reid_node.py
@@ -0,0 +1,431 @@
+"""
+reid_node.py
+
+Self-contained ROS 2 node: MMPose 2-D pose estimation on the left stereo
+camera + KeyRe-ID person re-identification.
+
+Pipeline (per frame)
+────────────────────
+  /stereo/left/image_raw
+        │
+        ▼
+  MMPoseInferencer  (pose2d='human')
+        │  per-person keypoints (17, 2) + scores (17,)
+        ▼
+  bbox extraction → person crop (256×128)
+  keypoint → 6-channel body-part heatmap  (matches training pipeline)
+        │
+        ▼
+  Hungarian matching  →  per-track Tracklet buffer (deque, maxlen=seq_len)
+        │
+        ▼  (once buffer full and person_id is None)
+  KeyRe-ID inference  →  feature embedding
+        │
+        ▼
+  Gallery cosine match  →  assign / register persistent person_id
+        │
+        ▼
+  /reid/annotated      (Image)        – left frame annotated with IDs
+  /reid/track_markers  (MarkerArray)  – labelled text markers for RViz
+
+Parameters
+──────────
+  weights_path    str    path to iLIDSVIDbest_CMC.pth        (required)
+  keyreID_path    str    path to KeyRe-ID source directory
+  num_classes     int    training split size (150 for iLIDS-VID split-0)
+  camera_num      int    cameras in training set (2 for iLIDS-VID)
+  device          str    'cuda:0' or 'cpu'
+  seq_len         int    frames per tracklet clip (default 4)
+  kp_threshold    float  min keypoint confidence
+  match_threshold float  cosine-similarity threshold for gallery match
+  track_dist_px   float  max centroid distance (px) to keep a track alive
+  track_timeout   float  seconds before an unseen track is dropped
+  headless        bool   suppress cv2 display window
+"""
+
+import os
+import sys
+import time
+import colorsys
+
+from ament_index_python.packages import get_package_share_directory
+
+import cv2
+import numpy as np
+import rclpy
+from rclpy.node import Node
+from sensor_msgs.msg import Image
+from visualization_msgs.msg import Marker, MarkerArray
+from cv_bridge import CvBridge
+
+import torch
+
+from .reid_utils import (
+    keypoints_to_heatmap,
+    keypoints_to_bbox,
+    clamp_bbox,
+    transform_keypoints_to_crop,
+    preprocess_crop,
+    preprocess_heatmap,
+    Tracklet,
+    Gallery,
+)
+
+
+# ── Hungarian matching ────────────────────────────────────────────────────────
+
+def _hungarian_match(tracks: dict, detections: list, max_dist: float):
+    """
+    Associate existing tracks to new per-frame detections by 2-D centroid
+    distance using the Hungarian algorithm.
+
+    Returns:
+        matched          : list of (track_id, det_index)
+        unmatched_tracks : track_ids with no detection this frame
+        unmatched_dets   : detection indices with no existing track
+    """
+    from scipy.optimize import linear_sum_assignment
+
+    track_ids = list(tracks.keys())
+    n_t, n_d = len(track_ids), len(detections)
+
+    if n_t == 0:
+        return [], [], list(range(n_d))
+    if n_d == 0:
+        return [], track_ids, []
+
+    cost = np.full((n_t, n_d), max_dist + 1.0)
+    for i, tid in enumerate(track_ids):
+        tc = tracks[tid].centroid
+        if tc is None:
+            continue
+        for j, det in enumerate(detections):
+            dc = det['centroid']
+            if dc is not None:
+                cost[i, j] = float(np.hypot(tc[0] - dc[0], tc[1] - dc[1]))
+
+    row_ind, col_ind = linear_sum_assignment(cost)
+
+    matched, used_t, used_d = [], set(), set()
+    for ri, ci in zip(row_ind, col_ind):
+        if cost[ri, ci] <= max_dist:
+            matched.append((track_ids[ri], ci))
+            used_t.add(track_ids[ri])
+            used_d.add(ci)
+
+    return (
+        matched,
+        [tid for tid in track_ids if tid not in used_t],
+        [j for j in range(n_d) if j not in used_d],
+    )
+
+
+# ── Stable per-person colour ──────────────────────────────────────────────────
+
+def _id_colour(person_id: int) -> tuple:
+    r, g, b = colorsys.hsv_to_rgb(((person_id * 0.37) % 1.0), 0.9, 1.0)
+    return (int(b * 255), int(g * 255), int(r * 255))
+
+
+# ═══════════════════════════════════════════════════════════════════════════════
+
+class ReIDNode(Node):
+    """KeyRe-ID re-identification node with integrated MMPose 2-D detection."""
+
+    def __init__(self):
+        super().__init__('reid_node')
+
+        # ── Parameters ──────────────────────────────────────────────────────
+        self.declare_parameter('weights_path',
+                               os.path.join(
+                                   get_package_share_directory('tracking_re_id'),
+                                   'weights', 'iLIDSVIDbest_CMC.pth'))
+        self.declare_parameter('keyreID_path',
+                               os.path.expanduser('~/KeyRe-ID'))
+        self.declare_parameter('num_classes',  150)
+        self.declare_parameter('camera_num',   2)
+        self.declare_parameter('device',       'cuda:0')
+        self.declare_parameter('seq_len',      4)
+        self.declare_parameter('kp_threshold', 0.3)
+        self.declare_parameter('match_threshold', 0.65)
+        self.declare_parameter('track_dist_px',   120.0)
+        self.declare_parameter('track_timeout',   3.0)
+        self.declare_parameter('headless',     False)
+
+        weights_path      = self.get_parameter('weights_path').value
+        keyreID_path      = self.get_parameter('keyreID_path').value
+        num_classes       = self.get_parameter('num_classes').value
+        camera_num        = self.get_parameter('camera_num').value
+        device_str        = self.get_parameter('device').value
+        self._seq_len     = self.get_parameter('seq_len').value
+        self._kp_thresh   = self.get_parameter('kp_threshold').value
+        self._match_thresh = self.get_parameter('match_threshold').value
+        self._track_dist  = self.get_parameter('track_dist_px').value
+        self._track_timeout = self.get_parameter('track_timeout').value
+        self._headless    = self.get_parameter('headless').value
+
+        self._device = torch.device(
+            device_str if torch.cuda.is_available() else 'cpu')
+        self.get_logger().info(f'Using device: {self._device}')
+
+        # ── MMPose ───────────────────────────────────────────────────────────
+        from mmpose.apis import MMPoseInferencer  # noqa: PLC0415
+        self.get_logger().info(f'Loading MMPose on {device_str} …')
+        self._inferencer = MMPoseInferencer(pose2d='human', device=device_str)
+        self.get_logger().info('MMPose loaded.')
+
+        # ── KeyRe-ID ─────────────────────────────────────────────────────────
+        if keyreID_path not in sys.path:
+            sys.path.insert(0, keyreID_path)
+        try:
+            from KeyRe_ID_model import KeyRe_ID  # noqa: PLC0415
+        except ImportError as exc:
+            self.get_logger().fatal(
+                f'Cannot import KeyRe_ID_model from {keyreID_path}: {exc}')
+            raise
+
+        self.get_logger().info(f'Loading KeyRe-ID weights from {weights_path} …')
+        self._model = KeyRe_ID(
+            num_classes=num_classes,
+            camera_num=camera_num,
+            pretrainpath=None,
+        )
+        self._model.load_param(weights_path, load=False)
+        self._model.to(self._device)
+        self._model.eval()
+        self.get_logger().info('KeyRe-ID model ready.')
+
+        # ── ROS infrastructure ───────────────────────────────────────────────
+        self._bridge = CvBridge()
+
+        self.create_subscription(
+            Image, '/stereo/left/image_raw', self._image_cb, 10)
+
+        self._vis_pub    = self.create_publisher(Image,      '/reid/annotated',     10)
+        self._marker_pub = self.create_publisher(MarkerArray, '/reid/track_markers', 10)
+
+        # ── State ────────────────────────────────────────────────────────────
+        self._tracks: dict[int, Tracklet] = {}
+        self._next_track_id: int = 0
+        self._gallery = Gallery(threshold=self._match_thresh)
+
+        self._display_frame = None
+        if not self._headless:
+            self.create_timer(1.0 / 30.0, self._display_timer_cb)
+
+        self.get_logger().info(
+            'reid_node ready.  Waiting for /stereo/left/image_raw …')
+
+    # ── MMPose helper ─────────────────────────────────────────────────────────
+
+    def _run_mmpose(self, frame: np.ndarray) -> list:
+        """Return list of dicts {keypoints: (17,2), scores: (17,)}."""
+        result = next(self._inferencer(frame, show=False, return_datasamples=False))
+        people = []
+        for pred in result.get('predictions', [[]])[0]:
+            kps    = pred.get('keypoints', [])
+            scores = pred.get('keypoint_scores', [])
+            if len(kps) > 0:
+                people.append({
+                    'keypoints': np.array(kps,    dtype=np.float32),
+                    'scores':    np.array(scores, dtype=np.float32),
+                })
+        return people
+
+    # ── Main image callback ───────────────────────────────────────────────────
+
+    def _image_cb(self, img_msg: Image):
+        now = time.time()
+        frame = self._bridge.imgmsg_to_cv2(img_msg, desired_encoding='bgr8')
+        frame_h, frame_w = frame.shape[:2]
+
+        # Detect people and compute centroids / bboxes
+        raw_people = self._run_mmpose(frame)
+        detections = []
+        for person in raw_people:
+            kps, scores = person['keypoints'], person['scores']
+            bbox = keypoints_to_bbox(kps, scores, threshold=self._kp_thresh)
+            if bbox is None:
+                continue
+            visible = scores > self._kp_thresh
+            centroid = (float(kps[visible, 0].mean()),
+                        float(kps[visible, 1].mean())) if np.any(visible) else None
+            if centroid is None:
+                continue
+            detections.append({
+                'keypoints': kps,
+                'scores':    scores,
+                'bbox':      bbox,
+                'centroid':  centroid,
+            })
+
+        # Associate detections to existing tracks
+        matched, unmatched_tracks, unmatched_dets = _hungarian_match(
+            self._tracks, detections, self._track_dist)
+
+        for tid, det_idx in matched:
+            self._update_track(
+                self._tracks[tid], detections[det_idx],
+                frame, frame_w, frame_h, now)
+
+        for det_idx in unmatched_dets:
+            tid = self._next_track_id
+            self._next_track_id += 1
+            self._tracks[tid] = Tracklet(tid, seq_len=self._seq_len)
+            self._update_track(
+                self._tracks[tid], detections[det_idx],
+                frame, frame_w, frame_h, now)
+
+        # Re-ID: only for tracks whose buffer just became full (person_id still None)
+        with torch.no_grad():
+            for track in self._tracks.values():
+                if track.is_ready():
+                    self._run_reid(track)
+
+        # Drop stale tracks
+        for tid in [tid for tid, t in self._tracks.items()
+                    if now - t.last_seen > self._track_timeout]:
+            del self._tracks[tid]
+
+        # Publish
+        vis = self._build_visualisation(frame.copy())
+        out = self._bridge.cv2_to_imgmsg(vis, encoding='bgr8')
+        out.header = img_msg.header
+        self._vis_pub.publish(out)
+        self._publish_markers(img_msg.header.stamp)
+
+        if not self._headless:
+            self._display_frame = vis
+
+    # ── Track update ──────────────────────────────────────────────────────────
+
+    def _update_track(self, track: Tracklet, det: dict,
+                      frame: np.ndarray, frame_w: int, frame_h: int,
+                      timestamp: float):
+        x1, y1, x2, y2 = clamp_bbox(*det['bbox'], frame_w, frame_h)
+        if x2 - x1 < 10 or y2 - y1 < 10:
+            return
+
+        crop_bgr = frame[y1:y2, x1:x2]
+        kp_xyc, crop_w, crop_h = transform_keypoints_to_crop(
+            det['keypoints'], det['scores'], x1, y1, x2, y2)
+        heatmap_np = keypoints_to_heatmap(
+            kp_xyc, crop_w, crop_h, vis_thresh=self._kp_thresh)
+
+        try:
+            crop_t    = preprocess_crop(crop_bgr)
+            heatmap_t = preprocess_heatmap(heatmap_np)
+        except Exception as exc:
+            self.get_logger().warn(
+                f'Preprocess failed for track {track.track_id}: {exc}')
+            return
+
+        track.add_frame(crop_t, heatmap_t,
+                        centroid=det['centroid'],
+                        bbox=(x1, y1, x2, y2),
+                        timestamp=timestamp)
+
+    # ── KeyRe-ID inference ────────────────────────────────────────────────────
+
+    def _run_reid(self, track: Tracklet):
+        imgs, hmaps = track.get_model_inputs()
+        imgs  = imgs.to(self._device)
+        hmaps = hmaps.to(self._device)
+        # cam_label=0: left stereo camera → iLIDS-VID cam1 index
+        feat = self._model(imgs, hmaps, None, cam_label=0)
+        feature = feat[0].cpu()
+        track.feature = feature
+        track.person_id, track.match_sim = self._gallery.match_or_register(feature)
+
+    # ── Visualisation ─────────────────────────────────────────────────────────
+
+    def _build_visualisation(self, frame: np.ndarray) -> np.ndarray:
+        for track in self._tracks.values():
+            if track.bbox is None:
+                continue
+            x1, y1, x2, y2 = track.bbox
+            if track.person_id is not None:
+                colour = _id_colour(track.person_id)
+                label  = f'P{track.person_id}  ({track.match_sim:.2f})'
+            else:
+                colour = (160, 160, 160)
+                label  = f'T{track.track_id}  ({len(track.crops)}/{self._seq_len})'
+
+            cv2.rectangle(frame, (x1, y1), (x2, y2), colour, 2)
+            (tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.55, 1)
+            cv2.rectangle(frame, (x1, y1 - th - 6), (x1 + tw + 4, y1), colour, -1)
+            cv2.putText(frame, label, (x1 + 2, y1 - 4),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.55, (0, 0, 0), 1, cv2.LINE_AA)
+
+        cv2.putText(frame,
+                    f'Known: {len(self._gallery)}  Tracks: {len(self._tracks)}',
+                    (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6,
+                    (255, 255, 255), 2, cv2.LINE_AA)
+        return frame
+
+    # ── RViz markers ─────────────────────────────────────────────────────────
+
+    def _publish_markers(self, stamp):
+        ma = MarkerArray()
+        delete = Marker()
+        delete.action = Marker.DELETEALL
+        delete.header.frame_id = 'left'
+        delete.header.stamp = stamp
+        ma.markers.append(delete)
+
+        mid = 0
+        for track in self._tracks.values():
+            if track.centroid is None or track.person_id is None:
+                continue
+            colour = _id_colour(track.person_id)
+            m = Marker()
+            m.header.frame_id = 'left'
+            m.header.stamp = stamp
+            m.ns = 'reid_labels'
+            m.id = mid; mid += 1
+            m.type = Marker.TEXT_VIEW_FACING
+            m.action = Marker.ADD
+            m.pose.position.x = float(track.centroid[0]) / 100.0
+            m.pose.position.y = float(track.centroid[1]) / 100.0
+            m.pose.position.z = 2.0
+            m.pose.orientation.w = 1.0
+            m.scale.z = 0.15
+            m.color.r = colour[2] / 255.0
+            m.color.g = colour[1] / 255.0
+            m.color.b = colour[0] / 255.0
+            m.color.a = 1.0
+            m.text = f'P{track.person_id}'
+            m.lifetime.nanosec = 500_000_000
+            ma.markers.append(m)
+
+        self._marker_pub.publish(ma)
+
+    # ── Display timer ─────────────────────────────────────────────────────────
+
+    def _display_timer_cb(self):
+        if self._display_frame is not None:
+            cv2.imshow('KeyRe-ID', self._display_frame)
+        if cv2.waitKey(1) & 0xFF == ord('q'):
+            self.get_logger().info('Quit requested.')
+            self.destroy_node()
+            rclpy.shutdown()
+
+
+# ── Entry point ───────────────────────────────────────────────────────────────
+
+def main(args=None):
+    rclpy.init(args=args)
+    node = ReIDNode()
+    try:
+        rclpy.spin(node)
+    except KeyboardInterrupt:
+        pass
+    finally:
+        if not node._headless:
+            cv2.destroyAllWindows()
+        node.destroy_node()
+        rclpy.try_shutdown()
+
+
+if __name__ == '__main__':
+    main()
--- a/tracking_re_id/tracking_re_id/reid_utils.py
+++ b/tracking_re_id/tracking_re_id/reid_utils.py
@@ -0,0 +1,326 @@
+"""
+reid_utils.py
+
+Shared utilities for the KeyRe-ID ROS 2 node:
+  - keypoints_to_heatmap()      -- matches the generate_heatmaps_ilids.py training pipeline
+  - preprocess_crop()           -- matches val_transforms from heatmap_loader.py
+  - preprocess_heatmap()        -- matches CustomHeatmapTransform from heatmap_loader.py
+  - keypoints_to_bbox()         -- derive a bounding box from visible keypoints
+  - transform_keypoints_to_crop() -- remap full-frame kps to crop-relative coords
+  - Tracklet                    -- per-track frame buffer
+  - Gallery                     -- known-person feature store with cosine re-ID
+"""
+
+from collections import deque
+
+import cv2
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision.transforms as T
+from scipy.ndimage import gaussian_filter
+from torchvision.transforms import InterpolationMode
+
+# ── COCO keypoint indices ─────────────────────────────────────────────────────
+NOSE, LEYE, REYE, LEAR, REAR = 0, 1, 2, 3, 4
+LS, RS, LE, RE, LW, RW = 5, 6, 7, 8, 9, 10
+LH, RH, LK, RK, LA, RA = 11, 12, 13, 14, 15, 16
+
+
+# ── Heatmap generation (identical to generate_heatmaps_ilids.py) ──────────────
+
+def keypoints_to_heatmap(kp_array, img_w, img_h, vis_thresh=0.1):
+    """
+    Convert 17 COCO keypoints to 6-channel body-part heatmaps.
+
+    This exactly reproduces the pipeline used to generate the training data so
+    the heatmap distribution seen at inference matches what the model trained on.
+
+    Args:
+        kp_array : np.ndarray (17, 3)  [x, y, confidence]
+        img_w    : int  width  of the source image (crop)
+        img_h    : int  height of the source image (crop)
+        vis_thresh : float  confidence threshold
+
+    Returns:
+        np.ndarray (6, img_h, img_w)  channels: head, torso, l-arm, r-arm, l-leg, r-leg
+    """
+    heatmaps = np.zeros((6, img_h, img_w), dtype=np.float32)
+    blur_sigma = max(1.0, img_w / 16.0)
+    line_thickness = max(1, int(img_w / 8))
+
+    def vis(i):
+        return kp_array[i, 2] > vis_thresh
+
+    def pt(i):
+        return (int(kp_array[i, 0]), int(kp_array[i, 1]))
+
+    def in_bounds(i):
+        x, y = int(kp_array[i, 0]), int(kp_array[i, 1])
+        return 0 <= x < img_w and 0 <= y < img_h
+
+    def usable(i):
+        return vis(i) and in_bounds(i)
+
+    # Channel 0: Head
+    head_pts = [pt(i) for i in [NOSE, LEYE, REYE, LEAR, REAR] if usable(i)]
+    if len(head_pts) >= 3:
+        hull = cv2.convexHull(np.array(head_pts, dtype=np.int32))
+        cv2.fillConvexPoly(heatmaps[0], hull, 1.0)
+    elif head_pts:
+        for p in head_pts:
+            cv2.circle(heatmaps[0], p, line_thickness, 1.0, -1)
+
+    # Channel 1: Torso
+    torso_order = [LS, RS, RH, LH]
+    torso_usable = [i for i in torso_order if usable(i)]
+    if len(torso_usable) == 4:
+        pts = np.array([pt(i) for i in torso_order], dtype=np.int32)
+        cv2.fillPoly(heatmaps[1], [pts], 1.0)
+    elif len(torso_usable) >= 3:
+        hull = cv2.convexHull(np.array([pt(i) for i in torso_usable], dtype=np.int32))
+        cv2.fillConvexPoly(heatmaps[1], hull, 1.0)
+    elif len(torso_usable) == 2:
+        pts_list = [pt(i) for i in torso_usable]
+        cv2.line(heatmaps[1], pts_list[0], pts_list[1], 1.0, line_thickness)
+    elif len(torso_usable) == 1:
+        cv2.circle(heatmaps[1], pt(torso_usable[0]), line_thickness, 1.0, -1)
+
+    # Channels 2-5: Limbs
+    limbs = [
+        (2, LE, LW),  # left arm
+        (3, RE, RW),  # right arm
+        (4, LK, LA),  # left leg
+        (5, RK, RA),  # right leg
+    ]
+    for ch, j1, j2 in limbs:
+        u1, u2 = usable(j1), usable(j2)
+        if u1 and u2:
+            cv2.line(heatmaps[ch], pt(j1), pt(j2), 1.0, line_thickness)
+        elif u1:
+            cv2.circle(heatmaps[ch], pt(j1), line_thickness, 1.0, -1)
+        elif u2:
+            cv2.circle(heatmaps[ch], pt(j2), line_thickness, 1.0, -1)
+
+    # Gaussian blur for soft edges
+    for i in range(6):
+        if heatmaps[i].max() > 0:
+            heatmaps[i] = gaussian_filter(heatmaps[i], sigma=blur_sigma)
+
+    # L/R sanity check based on torso orientation
+    if usable(LS) and usable(RS):
+        shoulder_gap = abs(kp_array[LS, 0] - kp_array[RS, 0])
+        if shoulder_gap > img_w * 0.05:
+            facing_camera = kp_array[LS, 0] > kp_array[RS, 0]
+            if usable(LE) and usable(RE):
+                if facing_camera != (kp_array[LE, 0] > kp_array[RE, 0]):
+                    heatmaps[2], heatmaps[3] = heatmaps[3].copy(), heatmaps[2].copy()
+            if usable(LK) and usable(RK):
+                if facing_camera != (kp_array[LK, 0] > kp_array[RK, 0]):
+                    heatmaps[4], heatmaps[5] = heatmaps[5].copy(), heatmaps[4].copy()
+
+    # Normalize each channel to [0, 1]
+    for i in range(6):
+        mx = heatmaps[i].max()
+        if mx > 0:
+            heatmaps[i] /= mx
+
+    return heatmaps
+
+
+# ── Bounding box utilities ────────────────────────────────────────────────────
+
+def keypoints_to_bbox(keypoints, scores, threshold=0.3, margin=0.15):
+    """
+    Derive a bounding box (x1, y1, x2, y2) from visible COCO keypoints.
+
+    Returns None if no keypoints are visible above threshold.
+    """
+    visible = scores > threshold
+    if not np.any(visible):
+        return None
+    kps = keypoints[visible]
+    x1, y1 = float(kps[:, 0].min()), float(kps[:, 1].min())
+    x2, y2 = float(kps[:, 0].max()), float(kps[:, 1].max())
+    w, h = x2 - x1, y2 - y1
+    x1 -= w * margin
+    y1 -= h * margin * 1.5   # extra headroom
+    x2 += w * margin
+    y2 += h * margin * 0.5
+    return x1, y1, x2, y2
+
+
+def clamp_bbox(x1, y1, x2, y2, frame_w, frame_h):
+    """Clamp bbox to image boundaries, return as ints."""
+    x1 = max(0, int(x1))
+    y1 = max(0, int(y1))
+    x2 = min(frame_w, int(x2))
+    y2 = min(frame_h, int(y2))
+    return x1, y1, x2, y2
+
+
+def transform_keypoints_to_crop(keypoints, scores, x1, y1, x2, y2):
+    """
+    Re-map full-frame keypoint coordinates to crop-local coordinates so that
+    the heatmap is generated in the same coordinate space as the training data
+    (where MMPose was run on individual person crops, not the full frame).
+
+    Returns:
+        kp_xyc  : np.ndarray (17, 3)  crop-relative [x, y, score]
+        crop_w  : int
+        crop_h  : int
+    """
+    kp_crop = keypoints.copy().astype(np.float32)
+    kp_crop[:, 0] -= x1
+    kp_crop[:, 1] -= y1
+    kp_xyc = np.column_stack([kp_crop, scores.astype(np.float32)])
+    return kp_xyc, int(x2 - x1), int(y2 - y1)
+
+
+# ── Preprocessing (must match heatmap_loader.py transforms) ──────────────────
+
+_IMG_TRANSFORM = T.Compose([
+    T.Resize([256, 128], interpolation=InterpolationMode.BILINEAR),
+    T.ToTensor(),
+    T.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+])
+
+_HM_NORMALIZE = T.Normalize(mean=[0.5] * 6, std=[0.5] * 6)
+
+
+def preprocess_crop(crop_bgr):
+    """
+    Preprocess a BGR person crop (numpy HxWx3) to a model-ready tensor.
+
+    Pipeline matches val_transforms in heatmap_loader.py:
+      Resize(256, 128) → ToTensor → Normalize(0.5, 0.5)
+
+    Returns:
+        torch.Tensor (3, 256, 128)
+    """
+    from PIL import Image as PILImage
+    rgb = cv2.cvtColor(crop_bgr, cv2.COLOR_BGR2RGB)
+    pil_img = PILImage.fromarray(rgb)
+    return _IMG_TRANSFORM(pil_img)
+
+
+def preprocess_heatmap(heatmap_np):
+    """
+    Preprocess a raw (6, H, W) numpy heatmap to a model-ready tensor.
+
+    Pipeline matches CustomHeatmapTransform([256, 128]) in heatmap_loader.py:
+      Resize(256, 128) bilinear → MinMaxScale per channel → Normalize(0.5, 0.5)
+
+    Returns:
+        torch.Tensor (6, 256, 128)
+    """
+    t = torch.tensor(heatmap_np, dtype=torch.float32).unsqueeze(0)  # (1, 6, H, W)
+    t = F.interpolate(t, size=(256, 128), mode='bilinear', align_corners=False)
+    t = t.squeeze(0)  # (6, 256, 128)
+    # Min-max scale per channel
+    min_val = t.amin(dim=(1, 2), keepdim=True)
+    max_val = t.amax(dim=(1, 2), keepdim=True)
+    t = (t - min_val) / (max_val - min_val + 1e-6)
+    t = _HM_NORMALIZE(t)
+    return t
+
+
+# ── Tracklet ──────────────────────────────────────────────────────────────────
+
+class Tracklet:
+    """
+    Stores a rolling buffer of preprocessed (crop, heatmap) tensors for one
+    tracked person across time, plus the most recently assigned re-ID result.
+    """
+
+    def __init__(self, track_id: int, seq_len: int = 4):
+        self.track_id = track_id
+        self.seq_len = seq_len
+
+        # Rolling buffers – when full a new frame displaces the oldest
+        self.crops: deque = deque(maxlen=seq_len)       # Tensor (3, 256, 128)
+        self.heatmaps: deque = deque(maxlen=seq_len)    # Tensor (6, 256, 128)
+
+        self.centroid = None    # (cx, cy) pixel coords in latest frame
+        self.bbox = None        # (x1, y1, x2, y2) in latest frame
+        self.last_seen: float = 0.0   # time.time() of latest update
+
+        # Re-ID assignment (filled after gallery matching)
+        self.person_id: int | None = None
+        self.match_sim: float = 0.0
+
+        # Latest computed feature embedding (feat_dim,)
+        self.feature: torch.Tensor | None = None
+
+    def add_frame(self, crop_t: torch.Tensor, heatmap_t: torch.Tensor,
+                  centroid, bbox, timestamp: float):
+        self.crops.append(crop_t)
+        self.heatmaps.append(heatmap_t)
+        self.centroid = centroid
+        self.bbox = bbox
+        self.last_seen = timestamp
+
+    def is_ready(self) -> bool:
+        """True once the rolling buffer has been filled at least once."""
+        return len(self.crops) >= self.seq_len
+
+    def get_model_inputs(self):
+        """
+        Returns (imgs, hmaps) tensors shaped for KeyRe_ID:
+            imgs  : (1, seq_len, 3, 256, 128)
+            hmaps : (1, seq_len, 6, 256, 128)
+        """
+        imgs = torch.stack(list(self.crops)).unsqueeze(0)
+        hmaps = torch.stack(list(self.heatmaps)).unsqueeze(0)
+        return imgs, hmaps
+
+
+# ── Gallery ───────────────────────────────────────────────────────────────────
+
+class Gallery:
+    """
+    Manages a set of known-person feature embeddings and performs cosine-
+    similarity-based re-identification.
+
+    New persons are registered when their best match falls below `threshold`.
+    Known persons' embeddings are updated with an exponential moving average.
+    """
+
+    def __init__(self, threshold: float = 0.65, ema_alpha: float = 0.9):
+        self.threshold = threshold
+        self.ema_alpha = ema_alpha
+        self._embeddings: dict[int, torch.Tensor] = {}  # person_id → (feat_dim,)
+        self._next_id: int = 1
+
+    def match_or_register(self, feature: torch.Tensor) -> tuple[int, float]:
+        """
+        Compare *feature* against all gallery entries.
+
+        Returns:
+            (person_id, cosine_similarity)
+            If similarity < threshold the person is registered as new.
+        """
+        feat_norm = F.normalize(feature.unsqueeze(0), dim=1)  # (1, D)
+
+        best_pid, best_sim = None, -1.0
+        for pid, emb in self._embeddings.items():
+            sim = float((feat_norm @ F.normalize(emb.unsqueeze(0), dim=1).T).item())
+            if sim > best_sim:
+                best_sim, best_pid = sim, pid
+
+        if best_pid is not None and best_sim >= self.threshold:
+            # Update existing entry with EMA
+            self._embeddings[best_pid] = (
+                self.ema_alpha * self._embeddings[best_pid]
+                + (1.0 - self.ema_alpha) * feature.detach()
+            )
+            return best_pid, best_sim
+
+        # Register new person
+        pid = self._next_id
+        self._next_id += 1
+        self._embeddings[pid] = feature.detach().clone()
+        return pid, best_sim if best_pid is not None else 1.0
+
+    def __len__(self) -> int:
+        return len(self._embeddings)
--- a/tracking_re_id/weights/iLIDSVIDbest_CMC.pth
+++ b/tracking_re_id/weights/iLIDSVIDbest_CMC.pth