1086 lines
45 KiB
Python
1086 lines
45 KiB
Python
# ByteTrack Integration Demo
|
||
# This script demonstrates how to use the ByteTrack implementation
|
||
# as a drop-in replacement for DeepSORT in your application
|
||
#
|
||
# ByteTrack is the preferred tracker with better performance and higher FPS
|
||
# This version demonstrates the improved tracking with real-time comparison
|
||
|
||
import sys
|
||
import os
|
||
import argparse
|
||
import cv2
|
||
import time
|
||
import numpy as np
|
||
from pathlib import Path
|
||
|
||
# Add the parent directory to path for imports
|
||
parent_dir = str(Path(__file__).resolve().parent.parent)
|
||
if parent_dir not in sys.path:
|
||
sys.path.append(parent_dir)
|
||
|
||
# Import both trackers for comparison
|
||
# from controllers.deepsort_tracker import DeepSortVehicleTracker # Deprecated
|
||
from controllers.bytetrack_tracker import ByteTrackVehicleTracker
|
||
|
||
def generate_mock_detections(num_objects=5, frame_shape=(1080, 1920, 3)):
|
||
"""Generate mock vehicle detections for testing"""
|
||
height, width = frame_shape[:2]
|
||
detections = []
|
||
|
||
for i in range(num_objects):
|
||
# Random box dimensions (vehicles are typically wider than tall)
|
||
w = np.random.randint(width // 10, width // 4)
|
||
h = np.random.randint(height // 10, height // 6)
|
||
|
||
# Random position
|
||
x1 = np.random.randint(0, width - w)
|
||
y1 = np.random.randint(0, height - h)
|
||
x2 = x1 + w
|
||
y2 = y1 + h
|
||
|
||
# Random confidence and class (2 for car, 7 for truck)
|
||
confidence = np.random.uniform(0.4, 0.95)
|
||
class_id = np.random.choice([2, 7])
|
||
|
||
detections.append({
|
||
'bbox': [float(x1), float(y1), float(x2), float(y2)],
|
||
'confidence': float(confidence),
|
||
'class_id': int(class_id)
|
||
})
|
||
|
||
return detections
|
||
|
||
def draw_tracks(frame, tracks, color=(0, 255, 0)):
|
||
"""Draw tracking results on frame"""
|
||
for track in tracks:
|
||
track_id = track['id']
|
||
bbox = track['bbox']
|
||
conf = track.get('confidence', 0)
|
||
|
||
x1, y1, x2, y2 = [int(b) for b in bbox]
|
||
|
||
# Draw bounding box
|
||
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
|
||
|
||
# Draw ID and confidence
|
||
text = f"ID:{track_id} {conf:.2f}"
|
||
cv2.putText(frame, text, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
|
||
|
||
return frame
|
||
|
||
def main():
|
||
parser = argparse.ArgumentParser(description="ByteTrack vs DeepSORT comparison demo")
|
||
parser.add_argument("--video", type=str, default=None, help="Path to video file (default: camera)")
|
||
parser.add_argument("--tracker", type=str, default="bytetrack",
|
||
choices=["bytetrack", "deepsort", "both"],
|
||
help="Tracker to use: bytetrack (recommended), deepsort (legacy), or both")
|
||
parser.add_argument("--mock", action="store_true", help="Use mock detections instead of actual detector")
|
||
args = parser.parse_args()
|
||
|
||
# Initialize video source
|
||
if args.video:
|
||
cap = cv2.VideoCapture(args.video)
|
||
else:
|
||
cap = cv2.VideoCapture(0) # Use default camera
|
||
|
||
if not cap.isOpened():
|
||
print(f"Error: Could not open video source.")
|
||
return
|
||
|
||
# Get video properties
|
||
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||
fps = cap.get(cv2.CAP_PROP_FPS)
|
||
|
||
print(f"Video source: {width}x{height} @ {fps}fps")
|
||
|
||
# Initialize trackers based on choice
|
||
if args.tracker == "bytetrack" or args.tracker == "both":
|
||
bytetrack_tracker = ByteTrackVehicleTracker()
|
||
|
||
if args.tracker == "deepsort" or args.tracker == "both":
|
||
print("⚠️ DeepSORT tracker is deprecated, using ByteTrack as fallback")
|
||
deepsort_tracker = ByteTrackVehicleTracker()
|
||
|
||
# Main processing loop
|
||
frame_count = 0
|
||
processing_times = {'bytetrack': [], 'deepsort': []}
|
||
|
||
while True:
|
||
ret, frame = cap.read()
|
||
if not ret:
|
||
break
|
||
|
||
frame_count += 1
|
||
print(f"\nProcessing frame {frame_count}")
|
||
|
||
# Generate or get detections
|
||
if args.mock:
|
||
detections = generate_mock_detections(num_objects=10, frame_shape=frame.shape)
|
||
print(f"Generated {len(detections)} mock detections")
|
||
else:
|
||
# In a real application, you would use your actual detector here
|
||
# This is just a placeholder for demo purposes
|
||
detections = generate_mock_detections(num_objects=10, frame_shape=frame.shape)
|
||
print(f"Generated {len(detections)} mock detections")
|
||
|
||
# Process with ByteTrack
|
||
if args.tracker == "bytetrack" or args.tracker == "both":
|
||
start_time = time.time()
|
||
bytetrack_results = bytetrack_tracker.update(detections, frame)
|
||
bt_time = time.time() - start_time
|
||
processing_times['bytetrack'].append(bt_time)
|
||
print(f"ByteTrack processing time: {bt_time:.4f}s")
|
||
|
||
if args.tracker == "bytetrack":
|
||
display_frame = draw_tracks(frame.copy(), bytetrack_results, color=(0, 255, 0))
|
||
|
||
# Process with DeepSORT
|
||
if args.tracker == "deepsort" or args.tracker == "both":
|
||
start_time = time.time()
|
||
try:
|
||
print("ℹ️ Using ByteTrack (as DeepSORT replacement)")
|
||
deepsort_results = deepsort_tracker.update(detections, frame)
|
||
ds_time = time.time() - start_time
|
||
processing_times['deepsort'].append(ds_time)
|
||
print(f"DeepSORT processing time: {ds_time:.4f}s")
|
||
except Exception as e:
|
||
print(f"DeepSORT error: {e}")
|
||
deepsort_results = []
|
||
ds_time = 0
|
||
|
||
if args.tracker == "deepsort":
|
||
display_frame = draw_tracks(frame.copy(), deepsort_results, color=(0, 0, 255))
|
||
|
||
# If comparing both, create a side-by-side view
|
||
if args.tracker == "both":
|
||
# Draw tracks on separate frames
|
||
bt_frame = draw_tracks(frame.copy(), bytetrack_results, color=(0, 255, 0))
|
||
ds_frame = draw_tracks(frame.copy(), deepsort_results, color=(0, 0, 255))
|
||
|
||
# Resize if needed and create side-by-side view
|
||
h, w = frame.shape[:2]
|
||
display_frame = np.zeros((h, w*2, 3), dtype=np.uint8)
|
||
display_frame[:, :w] = bt_frame
|
||
display_frame[:, w:] = ds_frame
|
||
|
||
# Add labels
|
||
cv2.putText(display_frame, "ByteTrack", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
|
||
cv2.putText(display_frame, f"{len(bytetrack_results)} tracks", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||
cv2.putText(display_frame, f"{bt_time:.4f}s", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||
|
||
cv2.putText(display_frame, "DeepSORT", (w+10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
|
||
cv2.putText(display_frame, f"{len(deepsort_results)} tracks", (w+10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
|
||
cv2.putText(display_frame, f"{ds_time:.4f}s", (w+10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
|
||
|
||
# Show the frame
|
||
cv2.imshow("Tracking Demo", display_frame)
|
||
|
||
if cv2.waitKey(1) & 0xFF == ord('q'):
|
||
break
|
||
|
||
# Release resources
|
||
cap.release()
|
||
cv2.destroyAllWindows()
|
||
|
||
# Print performance statistics
|
||
if len(processing_times['bytetrack']) > 0:
|
||
bt_avg = sum(processing_times['bytetrack']) / len(processing_times['bytetrack'])
|
||
print(f"ByteTrack average processing time: {bt_avg:.4f}s ({1/bt_avg:.2f} FPS)")
|
||
|
||
if len(processing_times['deepsort']) > 0:
|
||
ds_avg = sum(processing_times['deepsort']) / len(processing_times['deepsort'])
|
||
print(f"DeepSORT average processing time: {ds_avg:.4f}s ({1/ds_avg:.2f} FPS)")
|
||
|
||
if __name__ == "__main__":
|
||
main()
|
||
# ByteTrack implementation for vehicle tracking
|
||
# Efficient and robust multi-object tracking with improved association strategy
|
||
import numpy as np
|
||
import cv2
|
||
import time
|
||
from collections import defaultdict, deque
|
||
import torch
|
||
from typing import List, Dict, Any, Tuple, Optional
|
||
|
||
class BYTETracker:
|
||
"""
|
||
ByteTrack tracker implementation
|
||
Based on the paper: ByteTrack: Multi-Object Tracking by Associating Every Detection Box
|
||
"""
|
||
def __init__(
|
||
self,
|
||
track_thresh=0.5,
|
||
track_buffer=30,
|
||
match_thresh=0.8,
|
||
frame_rate=30,
|
||
track_high_thresh=0.6,
|
||
track_low_thresh=0.1,
|
||
camera_motion_compensation=False
|
||
):
|
||
self.tracked_tracks = [] # Active tracks being tracked
|
||
self.lost_tracks = [] # Lost tracks (temporarily out of view)
|
||
self.removed_tracks = [] # Removed tracks (permanently lost)
|
||
|
||
self.frame_id = 0
|
||
self.max_time_lost = int(frame_rate / 30.0 * track_buffer)
|
||
|
||
self.track_thresh = track_thresh # Threshold for high-confidence detections
|
||
self.track_high_thresh = track_high_thresh # Higher threshold for first association
|
||
self.track_low_thresh = track_low_thresh # Lower threshold for second association
|
||
self.match_thresh = match_thresh # IOU match threshold
|
||
|
||
self.track_id_count = 0
|
||
self.camera_motion_compensation = camera_motion_compensation
|
||
|
||
print(f"[BYTETRACK] Initialized with: high_thresh={track_high_thresh}, " +
|
||
f"low_thresh={track_low_thresh}, match_thresh={match_thresh}")
|
||
|
||
def update(self, detections, frame=None):
|
||
"""Update tracks with new detections
|
||
|
||
Args:
|
||
detections: list of dicts with keys ['bbox', 'confidence', 'class_id', ...]
|
||
frame: Optional BGR frame for debug visualization
|
||
|
||
Returns:
|
||
list of dicts with keys ['id', 'bbox', 'confidence', 'class_id', ...]
|
||
"""
|
||
self.frame_id += 1
|
||
|
||
# FIXED: Add more debug output
|
||
print(f"[BYTETRACK] Frame {self.frame_id}: Processing {len(detections)} detections")
|
||
print(f"[BYTETRACK] Current state: {len(self.tracked_tracks)} tracked, {len(self.lost_tracks)} lost")
|
||
|
||
# Convert detections to internal format
|
||
converted_detections = self._convert_detections(detections)
|
||
|
||
# Handle empty detections case
|
||
if len(converted_detections) == 0:
|
||
print(f"[BYTETRACK] No valid detections in frame {self.frame_id}")
|
||
# Update lost tracks and remove expired
|
||
new_tracked_tracks = []
|
||
new_lost_tracks = []
|
||
|
||
# All current tracks go to lost
|
||
for track in self.tracked_tracks:
|
||
track.is_lost = True
|
||
if self.frame_id - track.last_frame <= self.max_time_lost:
|
||
track.predict() # Predict new location
|
||
new_lost_tracks.append(track)
|
||
else:
|
||
self.removed_tracks.append(track)
|
||
|
||
# Update remaining lost tracks
|
||
for track in self.lost_tracks:
|
||
if self.frame_id - track.last_frame <= self.max_time_lost:
|
||
track.predict()
|
||
new_lost_tracks.append(track)
|
||
else:
|
||
self.removed_tracks.append(track)
|
||
|
||
self.tracked_tracks = new_tracked_tracks
|
||
self.lost_tracks = new_lost_tracks
|
||
print(f"[BYTETRACK] No detections: updated to {len(self.tracked_tracks)} tracked, {len(self.lost_tracks)} lost")
|
||
return []
|
||
|
||
# Split detections into high and low confidence - with safety checks
|
||
if len(converted_detections) > 0:
|
||
# FIXED: More robust confidence value handling
|
||
try:
|
||
# Make sure all values are numeric before comparison
|
||
confidence_values = converted_detections[:, 4].astype(float)
|
||
|
||
# Print the distribution of confidence values for debugging
|
||
if len(confidence_values) > 0:
|
||
print(f"[BYTETRACK] Confidence values: min={np.min(confidence_values):.2f}, " +
|
||
f"median={np.median(confidence_values):.2f}, max={np.max(confidence_values):.2f}")
|
||
|
||
high_dets = converted_detections[confidence_values >= self.track_high_thresh]
|
||
low_dets = converted_detections[(confidence_values >= self.track_low_thresh) &
|
||
(confidence_values < self.track_high_thresh)]
|
||
|
||
print(f"[BYTETRACK] Split into {len(high_dets)} high-conf and {len(low_dets)} low-conf detections")
|
||
except Exception as e:
|
||
print(f"[BYTETRACK] Error processing confidence values: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
# Fallback to empty arrays
|
||
high_dets = np.empty((0, 6))
|
||
low_dets = np.empty((0, 6))
|
||
else:
|
||
high_dets = np.empty((0, 6))
|
||
low_dets = np.empty((0, 6))
|
||
|
||
# Handle first frame special case
|
||
if self.frame_id == 1:
|
||
# Create new tracks for all high-confidence detections
|
||
for i in range(len(high_dets)):
|
||
det = high_dets[i]
|
||
new_track = Track(det, self.track_id_count)
|
||
new_track.last_frame = self.frame_id # CRITICAL: Set last_frame when creating track
|
||
self.track_id_count += 1
|
||
self.tracked_tracks.append(new_track)
|
||
|
||
# Also create tracks for lower confidence detections in first frame
|
||
# This helps with initial tracking when objects might not be clearly visible
|
||
for i in range(len(low_dets)):
|
||
det = low_dets[i]
|
||
new_track = Track(det, self.track_id_count)
|
||
new_track.last_frame = self.frame_id # CRITICAL: Set last_frame when creating track
|
||
self.track_id_count += 1
|
||
self.tracked_tracks.append(new_track)
|
||
|
||
print(f"[BYTETRACK] First frame: created {len(self.tracked_tracks)} new tracks")
|
||
return self._get_track_results()
|
||
|
||
# Get active and lost tracks
|
||
tracked_tlbrs = []
|
||
tracked_ids = []
|
||
|
||
for track in self.tracked_tracks:
|
||
tracked_tlbrs.append(track.tlbr)
|
||
tracked_ids.append(track.track_id)
|
||
|
||
tracked_tlbrs = np.array(tracked_tlbrs) if tracked_tlbrs else np.empty((0, 4))
|
||
tracked_ids = np.array(tracked_ids)
|
||
|
||
# First association: high confidence detections with tracked tracks
|
||
if len(tracked_tlbrs) > 0 and len(high_dets) > 0:
|
||
# Match active tracks to high confidence detections
|
||
matches, unmatched_tracks, unmatched_detections = self._match_tracks_to_detections(
|
||
tracked_tlbrs, high_dets[:, :4], self.match_thresh
|
||
)
|
||
|
||
print(f"[BYTETRACK MATCH] Found {len(matches)} matches between {len(tracked_tlbrs)} tracks and {len(high_dets)} detections")
|
||
|
||
# Update matched tracks with detections
|
||
for i_track, i_det in matches:
|
||
track_id = tracked_ids[i_track]
|
||
track = self._get_track_by_id(track_id, self.tracked_tracks)
|
||
if track:
|
||
track.update(high_dets[i_det])
|
||
track.last_frame = self.frame_id # FIXED: Update last_frame when track is matched
|
||
print(f"[BYTETRACK MATCH] Track ID={track_id} matched and updated")
|
||
|
||
# Move unmatched tracks to lost and rebuild tracked_tracks list
|
||
unmatched_track_ids = []
|
||
remaining_tracked_tracks = []
|
||
|
||
# Keep matched tracks in tracked_tracks
|
||
for i_track, _ in matches:
|
||
track_id = tracked_ids[i_track]
|
||
track = self._get_track_by_id(track_id, self.tracked_tracks)
|
||
if track:
|
||
remaining_tracked_tracks.append(track)
|
||
|
||
# Move unmatched tracks to lost
|
||
for i_track in unmatched_tracks:
|
||
track_id = tracked_ids[i_track]
|
||
track = self._get_track_by_id(track_id, self.tracked_tracks)
|
||
if track:
|
||
track.is_lost = True
|
||
track.last_frame = self.frame_id # FIXED: Update last_frame when track is lost
|
||
self.lost_tracks.append(track)
|
||
unmatched_track_ids.append(track_id)
|
||
|
||
# FIXED: Update tracked_tracks to only contain matched tracks
|
||
self.tracked_tracks = remaining_tracked_tracks
|
||
|
||
if unmatched_track_ids:
|
||
print(f"[BYTETRACK MATCH] Lost tracks: {unmatched_track_ids}")
|
||
|
||
# Create new tracks for unmatched high-confidence detections
|
||
new_track_ids = []
|
||
for i_det in unmatched_detections:
|
||
det = high_dets[i_det]
|
||
new_track = Track(det, self.track_id_count)
|
||
new_track.last_frame = self.frame_id # FIXED: Set last_frame when creating track
|
||
new_track_ids.append(self.track_id_count)
|
||
self.track_id_count += 1
|
||
self.tracked_tracks.append(new_track)
|
||
|
||
if new_track_ids:
|
||
print(f"[BYTETRACK MATCH] Created new tracks: {new_track_ids}")
|
||
|
||
print(f"[BYTETRACK] Matched {len(matches)} tracks, {len(unmatched_tracks)} unmatched tracks, " +
|
||
f"{len(unmatched_detections)} new tracks")
|
||
else:
|
||
# No tracked tracks or no high confidence detections
|
||
|
||
# Move all current tracks to lost
|
||
for track in self.tracked_tracks:
|
||
track.is_lost = True
|
||
track.last_frame = self.frame_id # FIXED: Update last_frame when track is lost
|
||
self.lost_tracks.append(track)
|
||
|
||
# Create new tracks for all high-confidence detections
|
||
for i in range(len(high_dets)):
|
||
det = high_dets[i]
|
||
new_track = Track(det, self.track_id_count)
|
||
new_track.last_frame = self.frame_id # FIXED: Set last_frame when creating track
|
||
self.track_id_count += 1
|
||
self.tracked_tracks.append(new_track)
|
||
|
||
print(f"[BYTETRACK] No active tracks or high-conf dets: {len(self.tracked_tracks)} new tracks, " +
|
||
f"{len(self.lost_tracks)} lost tracks")
|
||
|
||
# Remove lost tracks from tracked_tracks
|
||
self.tracked_tracks = [t for t in self.tracked_tracks if not t.is_lost]
|
||
|
||
# Second association: low confidence detections with lost tracks
|
||
lost_tlbrs = []
|
||
lost_ids = []
|
||
|
||
for track in self.lost_tracks:
|
||
lost_tlbrs.append(track.tlbr)
|
||
lost_ids.append(track.track_id)
|
||
|
||
lost_tlbrs = np.array(lost_tlbrs) if lost_tlbrs else np.empty((0, 4))
|
||
lost_ids = np.array(lost_ids)
|
||
|
||
if len(lost_tlbrs) > 0 and len(low_dets) > 0:
|
||
# Match lost tracks to low confidence detections
|
||
matches, _, _ = self._match_tracks_to_detections(
|
||
lost_tlbrs, low_dets[:, :4], self.match_thresh
|
||
)
|
||
|
||
# Recover matched lost tracks
|
||
recovered_tracks = []
|
||
for i_track, i_det in matches:
|
||
track_id = lost_ids[i_track]
|
||
track = self._get_track_by_id(track_id, self.lost_tracks)
|
||
if track:
|
||
track.is_lost = False
|
||
track.update(low_dets[i_det])
|
||
track.last_frame = self.frame_id # FIXED: Update last_frame on recovery
|
||
recovered_tracks.append(track)
|
||
|
||
# Add recovered tracks back to tracked_tracks
|
||
self.tracked_tracks.extend(recovered_tracks)
|
||
|
||
# Remove recovered tracks from lost_tracks
|
||
recovered_ids = [t.track_id for t in recovered_tracks]
|
||
self.lost_tracks = [t for t in self.lost_tracks if t.track_id not in recovered_ids]
|
||
|
||
print(f"[BYTETRACK] Recovered {len(recovered_tracks)} lost tracks with low-conf detections")
|
||
|
||
# Update remaining lost tracks
|
||
new_lost_tracks = []
|
||
expired_count = 0
|
||
|
||
# FIXED: Sort lost tracks by confidence score - keep higher quality tracks longer
|
||
# This prevents memory issues by limiting total number of lost tracks
|
||
sorted_lost_tracks = sorted(self.lost_tracks, key=lambda x: x.score, reverse=True)
|
||
|
||
# FIXED: Only keep top MAX_LOST_TRACKS lost tracks
|
||
MAX_LOST_TRACKS = 30 # Maximum number of lost tracks to keep
|
||
sorted_lost_tracks = sorted_lost_tracks[:MAX_LOST_TRACKS]
|
||
|
||
for track in sorted_lost_tracks:
|
||
track.predict() # Predict new location even when lost
|
||
|
||
# FIXED: Calculate elapsed frames since last detection
|
||
time_since_detection = self.frame_id - track.last_frame
|
||
|
||
# Keep track if within time buffer, otherwise remove
|
||
if time_since_detection <= self.max_time_lost:
|
||
new_lost_tracks.append(track)
|
||
else:
|
||
self.removed_tracks.append(track)
|
||
expired_count += 1
|
||
|
||
# Calculate how many tracks were removed due to confidence threshold
|
||
dropped_by_limit = len(self.lost_tracks) - len(sorted_lost_tracks)
|
||
|
||
self.lost_tracks = new_lost_tracks
|
||
|
||
print(f"[BYTETRACK] Final state: {len(self.tracked_tracks)} tracked, " +
|
||
f"{len(self.lost_tracks)} lost, {expired_count} expired, {dropped_by_limit} dropped by limit")
|
||
|
||
# Return final track results
|
||
return self._get_track_results()
|
||
|
||
def _get_track_by_id(self, track_id, track_list):
|
||
"""Helper to find a track by ID in a list"""
|
||
for track in track_list:
|
||
if track.track_id == track_id:
|
||
return track
|
||
return None
|
||
|
||
def _get_track_results(self):
|
||
"""Format track results as dicts for return value"""
|
||
results = []
|
||
for track in self.tracked_tracks:
|
||
if track.hits >= 1: # FIXED: Much more lenient confirmation threshold (was 3, then 2)
|
||
tlbr = track.tlbr
|
||
track_id = track.track_id
|
||
score = track.score
|
||
class_id = track.class_id
|
||
|
||
# FIXED: Better error checking for bbox values
|
||
try:
|
||
x1, y1, x2, y2 = map(float, tlbr)
|
||
|
||
# FIXED: Ensure values are valid
|
||
if not all(np.isfinite([x1, y1, x2, y2])):
|
||
print(f"[BYTETRACK WARNING] Track {track_id} has invalid bbox: {tlbr}")
|
||
continue
|
||
|
||
# FIXED: Make sure width and height are positive
|
||
if x2 <= x1 or y2 <= y1:
|
||
print(f"[BYTETRACK WARNING] Track {track_id} has invalid bbox dimensions: {tlbr}")
|
||
continue
|
||
|
||
results.append({
|
||
'id': track_id,
|
||
'bbox': [float(x1), float(y1), float(x2), float(y2)],
|
||
'confidence': float(score),
|
||
'class_id': int(class_id),
|
||
'state': 'tracked'
|
||
})
|
||
except Exception as e:
|
||
print(f"[BYTETRACK ERROR] Failed to process track {track_id}: {e}")
|
||
|
||
print(f"[BYTETRACK] Returning {len(results)} confirmed tracks")
|
||
return results
|
||
|
||
def _convert_detections(self, detections):
|
||
"""Convert detection dictionaries to numpy array format
|
||
Format: [x1, y1, x2, y2, score, class_id]
|
||
"""
|
||
if not detections:
|
||
return np.empty((0, 6))
|
||
|
||
result = []
|
||
for det in detections:
|
||
bbox = det.get('bbox')
|
||
conf = det.get('confidence', 0.0)
|
||
class_id = det.get('class_id', -1)
|
||
|
||
# Make sure we have numeric values
|
||
try:
|
||
if bbox is not None and len(bbox) == 4:
|
||
# FIXED: Explicitly convert to float32 for ByteTrack
|
||
x1, y1, x2, y2 = map(np.float32, bbox)
|
||
conf = np.float32(conf)
|
||
class_id = int(class_id) if isinstance(class_id, (int, float)) else -1
|
||
|
||
# Validate bbox dimensions
|
||
if x2 > x1 and y2 > y1 and conf > 0:
|
||
result.append([x1, y1, x2, y2, conf, class_id])
|
||
except (ValueError, TypeError) as e:
|
||
print(f"[BYTETRACK] Error converting detection: {e}")
|
||
|
||
# FIXED: Explicitly convert to float32 array
|
||
return np.array(result, dtype=np.float32) if result else np.empty((0, 6), dtype=np.float32)
|
||
|
||
def _match_tracks_to_detections(self, tracks_tlbr, dets_tlbr, threshold):
|
||
"""
|
||
Match tracks to detections using IoU
|
||
|
||
Args:
|
||
tracks_tlbr: Track boxes [x1, y1, x2, y2]
|
||
dets_tlbr: Detection boxes [x1, y1, x2, y2]
|
||
threshold: IoU threshold
|
||
|
||
Returns:
|
||
(matches, unmatched_tracks, unmatched_detections)
|
||
"""
|
||
if len(tracks_tlbr) == 0 or len(dets_tlbr) == 0:
|
||
return [], list(range(len(tracks_tlbr))), list(range(len(dets_tlbr)))
|
||
|
||
iou_matrix = self._iou_batch(tracks_tlbr, dets_tlbr)
|
||
|
||
# Use Hungarian algorithm for optimal assignment
|
||
matched_indices = self._linear_assignment(-iou_matrix, threshold)
|
||
|
||
unmatched_tracks = []
|
||
for i in range(len(tracks_tlbr)):
|
||
if i not in matched_indices[:, 0]:
|
||
unmatched_tracks.append(i)
|
||
|
||
unmatched_detections = []
|
||
for i in range(len(dets_tlbr)):
|
||
if i not in matched_indices[:, 1]:
|
||
unmatched_detections.append(i)
|
||
|
||
matches = []
|
||
for i, j in matched_indices:
|
||
if iou_matrix[i, j] < threshold:
|
||
unmatched_tracks.append(i)
|
||
unmatched_detections.append(j)
|
||
else:
|
||
matches.append((i, j))
|
||
|
||
return matches, unmatched_tracks, unmatched_detections
|
||
|
||
def _iou_batch(self, boxes1, boxes2):
|
||
"""
|
||
Calculate IoU between all pairs of boxes
|
||
|
||
Args:
|
||
boxes1: (N, 4) [x1, y1, x2, y2]
|
||
boxes2: (M, 4) [x1, y1, x2, y2]
|
||
|
||
Returns:
|
||
IoU matrix (N, M)
|
||
"""
|
||
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
|
||
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
|
||
|
||
lt = np.maximum(boxes1[:, None, :2], boxes2[:, :2]) # (N,M,2)
|
||
rb = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:]) # (N,M,2)
|
||
|
||
wh = np.clip(rb - lt, 0, None) # (N,M,2)
|
||
inter = wh[:, :, 0] * wh[:, :, 1] # (N,M)
|
||
|
||
union = area1[:, None] + area2 - inter
|
||
|
||
iou = inter / (union + 1e-10)
|
||
return iou
|
||
|
||
def _linear_assignment(self, cost_matrix, threshold):
|
||
"""
|
||
Improved greedy assignment implementation
|
||
For each detection, find the track with highest IoU above threshold
|
||
"""
|
||
if cost_matrix.size == 0:
|
||
return np.empty((0, 2), dtype=int)
|
||
|
||
matches = []
|
||
# Sort costs in descending order
|
||
flat_indices = np.argsort(cost_matrix.flatten())[::-1]
|
||
cost_values = cost_matrix.flatten()[flat_indices]
|
||
|
||
# Get row and col indices
|
||
row_indices = flat_indices // cost_matrix.shape[1]
|
||
col_indices = flat_indices % cost_matrix.shape[1]
|
||
|
||
# Keep track of assigned rows and columns
|
||
assigned_rows = set()
|
||
assigned_cols = set()
|
||
|
||
# Iterate through sorted indices
|
||
for i in range(len(row_indices)):
|
||
row, col = row_indices[i], col_indices[i]
|
||
cost = cost_values[i]
|
||
|
||
# If cost is below threshold, continue checking but apply a decay
|
||
# This helps with low FPS scenarios where IoU might be lower
|
||
if cost < threshold:
|
||
# Calculate dynamic threshold based on position in list
|
||
position_ratio = 1.0 - (i / len(row_indices))
|
||
dynamic_threshold = threshold * 0.7 * position_ratio
|
||
|
||
if cost < dynamic_threshold:
|
||
continue
|
||
|
||
# If row or col already assigned, skip
|
||
if row in assigned_rows or col in assigned_cols:
|
||
continue
|
||
|
||
# Add match
|
||
matches.append((row, col))
|
||
assigned_rows.add(row)
|
||
assigned_cols.add(col)
|
||
|
||
return np.array(matches) if matches else np.empty((0, 2), dtype=int)
|
||
|
||
|
||
class Track:
|
||
"""Track class for ByteTracker"""
|
||
|
||
def __init__(self, detection, track_id):
|
||
"""Initialize a track from a detection
|
||
|
||
Args:
|
||
detection: Detection array [x1, y1, x2, y2, score, class_id]
|
||
track_id: Unique track ID
|
||
"""
|
||
self.track_id = track_id
|
||
self.tlbr = detection[:4] # [x1, y1, x2, y2]
|
||
self.score = detection[4]
|
||
self.class_id = int(detection[5])
|
||
|
||
self.time_since_update = 0
|
||
self.hits = 1 # Number of times track was matched to a detection
|
||
self.age = 1
|
||
self.last_frame = 0 # Will be set by the tracker during update
|
||
self.is_lost = False # Flag to indicate if track is lost
|
||
|
||
# For Kalman filter
|
||
self.kf = None
|
||
self.mean = None
|
||
self.covariance = None
|
||
|
||
# Keep track of last 30 positions for smoother trajectories
|
||
self.history = []
|
||
self._init_kalman_filter()
|
||
|
||
def _init_kalman_filter(self):
|
||
"""Initialize simple Kalman filter for position and velocity prediction
|
||
State: [x, y, w, h, vx, vy, vw, vh]
|
||
"""
|
||
# Simplified KF implementation
|
||
self.mean = np.zeros(8)
|
||
x1, y1, x2, y2 = self.tlbr
|
||
w, h = x2 - x1, y2 - y1
|
||
cx, cy = x1 + w/2, y1 + h/2
|
||
|
||
# Initialize state
|
||
self.mean[:4] = [cx, cy, w, h]
|
||
|
||
# Initialize covariance matrix
|
||
self.covariance = np.eye(8) * 10
|
||
|
||
def predict(self):
|
||
"""Predict next state using constant velocity model"""
|
||
# Simple constant velocity prediction
|
||
dt = 1.0
|
||
|
||
# Transition matrix for constant velocity model
|
||
F = np.eye(8)
|
||
F[0, 4] = dt
|
||
F[1, 5] = dt
|
||
F[2, 6] = dt
|
||
F[3, 7] = dt
|
||
|
||
# Predict next state
|
||
self.mean = F @ self.mean
|
||
|
||
# Update covariance (simplified)
|
||
Q = np.eye(8) * 0.01 # Process noise
|
||
self.covariance = F @ self.covariance @ F.T + Q
|
||
|
||
# Convert state back to bbox
|
||
cx, cy, w, h = self.mean[:4]
|
||
self.tlbr = np.array([cx - w/2, cy - h/2, cx + w/2, cy + h/2])
|
||
|
||
self.age += 1
|
||
self.time_since_update += 1
|
||
|
||
def update(self, detection):
|
||
"""Update track with new detection
|
||
|
||
Args:
|
||
detection: Detection array [x1, y1, x2, y2, score, class_id]
|
||
"""
|
||
x1, y1, x2, y2 = detection[:4]
|
||
self.tlbr = detection[:4]
|
||
|
||
# Update score with EMA
|
||
alpha = 0.9
|
||
self.score = alpha * self.score + (1 - alpha) * detection[4]
|
||
|
||
# Update state (simplified Kalman update)
|
||
w, h = x2 - x1, y2 - y1
|
||
cx, cy = x1 + w/2, y1 + h/2
|
||
|
||
# Measurement
|
||
z = np.array([cx, cy, w, h])
|
||
|
||
# Kalman gain (simplified)
|
||
H = np.zeros((4, 8))
|
||
H[:4, :4] = np.eye(4)
|
||
|
||
# Measurement covariance (higher = less trust in measurement)
|
||
R = np.eye(4) * (1.0 / self.score)
|
||
|
||
# Kalman update equations (simplified)
|
||
y = z - H @ self.mean
|
||
S = H @ self.covariance @ H.T + R
|
||
K = self.covariance @ H.T @ np.linalg.inv(S)
|
||
|
||
self.mean = self.mean + K @ y
|
||
self.covariance = (np.eye(8) - K @ H) @ self.covariance
|
||
|
||
# Convert back to bbox
|
||
cx, cy, w, h = self.mean[:4]
|
||
self.tlbr = np.array([cx - w/2, cy - h/2, cx + w/2, cy + h/2])
|
||
|
||
# Update history
|
||
self.history.append(self.tlbr.copy())
|
||
if len(self.history) > 30:
|
||
self.history = self.history[-30:]
|
||
|
||
# FIXED: Reset time since update counter and increment hits
|
||
self.time_since_update = 0
|
||
self.hits += 1
|
||
self.is_lost = False # FIXED: Ensure track is marked as not lost when updated
|
||
|
||
|
||
class ByteTrackVehicleTracker:
|
||
"""
|
||
ByteTrack-based vehicle tracker with same API as DeepSortVehicleTracker
|
||
for drop-in replacement with improved performance
|
||
"""
|
||
_instance = None
|
||
|
||
def __new__(cls, *args, **kwargs):
|
||
if cls._instance is None:
|
||
print("[BYTETRACK SINGLETON] Creating ByteTrackVehicleTracker instance")
|
||
cls._instance = super(ByteTrackVehicleTracker, cls).__new__(cls)
|
||
cls._instance._initialized = False
|
||
return cls._instance
|
||
|
||
def __init__(self):
|
||
if getattr(self, '_initialized', False):
|
||
return
|
||
print("[BYTETRACK INIT] Initializing ByteTrack tracker (should only see this once)")
|
||
|
||
# Parameters tuned for vehicle tracking in traffic scenes with low FPS
|
||
# FIXED: Much more lenient parameters for consistent vehicle tracking
|
||
self.tracker = BYTETracker(
|
||
track_thresh=0.2, # FIXED: Even lower threshold for better tracking continuity
|
||
track_buffer=60, # FIXED: Keep tracks alive longer (60 frames = 4-6 seconds at 10 FPS)
|
||
match_thresh=0.4, # FIXED: Much more lenient IoU threshold for matching
|
||
track_high_thresh=0.25, # FIXED: Lower high confidence threshold
|
||
track_low_thresh=0.05, # FIXED: Very low threshold for second-chance matching
|
||
frame_rate=10 # FIXED: Match actual video FPS (~7-10)
|
||
)
|
||
|
||
self._initialized = True
|
||
self.track_id_counter = {} # Track seen IDs
|
||
self.debug = True # Enable debug output
|
||
|
||
# Track count tracking for debugging
|
||
self.track_counts = {
|
||
'frames_processed': 0,
|
||
'total_tracks_created': 0,
|
||
'max_concurrent_tracks': 0,
|
||
'current_active_tracks': 0,
|
||
'current_lost_tracks': 0
|
||
}
|
||
|
||
def update(self, detections, frame=None):
|
||
"""
|
||
Update tracker with new detections
|
||
|
||
Args:
|
||
detections: list of dicts with keys ['bbox', 'confidence', 'class_id', ...]
|
||
frame: BGR image (optional, used for visualization but not required for ByteTrack)
|
||
|
||
Returns:
|
||
list of dicts with keys ['id', 'bbox', 'confidence', 'class_id', ...]
|
||
"""
|
||
# FIXED: Add safety check for track ID counter
|
||
if hasattr(self.tracker, 'track_id_count') and self.tracker.track_id_count > 10000:
|
||
print(f"[BYTETRACK WARNING] Track ID counter extremely high ({self.tracker.track_id_count}). Resetting to 0.")
|
||
self.tracker.track_id_count = 0
|
||
|
||
# Convert detections to ByteTrack format with validation
|
||
valid_dets = []
|
||
for i, det in enumerate(detections):
|
||
bbox = det.get('bbox')
|
||
conf = det.get('confidence', 0.0)
|
||
class_id = det.get('class_id', -1)
|
||
|
||
if bbox is not None and len(bbox) == 4:
|
||
try:
|
||
# FIXED: Ensure all values are explicitly converted to float32 for consistent tracking
|
||
x1, y1, x2, y2 = map(np.float32, bbox)
|
||
conf = np.float32(conf)
|
||
class_id = int(class_id) if isinstance(class_id, (int, float)) else -1
|
||
|
||
# Validate bbox dimensions
|
||
if x2 > x1 and y2 > y1 and conf > 0.05: # FIXED: Lower threshold for ByteTrack
|
||
# Create a new det with verified types
|
||
valid_det = {
|
||
'bbox': [x1, y1, x2, y2], # Already converted to float32 above
|
||
'confidence': conf,
|
||
'class_id': class_id
|
||
}
|
||
valid_dets.append(valid_det)
|
||
|
||
if self.debug and i % 5 == 0: # Only print every 5th detection to reduce log spam
|
||
print(f"[BYTETRACK] Added detection {i}: bbox={[x1, y1, x2, y2]}, conf={conf:.2f}")
|
||
else:
|
||
if self.debug:
|
||
print(f"[BYTETRACK] Rejected detection {i}: invalid bbox dimensions or very low confidence")
|
||
except Exception as e:
|
||
if self.debug:
|
||
print(f"[BYTETRACK] Error processing detection {i}: {e}")
|
||
else:
|
||
if self.debug:
|
||
print(f"[BYTETRACK] Rejected detection {i}: invalid bbox format")
|
||
|
||
if self.debug:
|
||
print(f"[BYTETRACK] Processing {len(valid_dets)} valid detections")
|
||
|
||
try:
|
||
# Use try/except to catch any errors in the tracker update
|
||
tracks = self.tracker.update(valid_dets, frame)
|
||
|
||
# Update track statistics
|
||
self.track_counts['frames_processed'] += 1
|
||
self.track_counts['current_active_tracks'] = len(self.tracker.tracked_tracks)
|
||
self.track_counts['current_lost_tracks'] = len(self.tracker.lost_tracks)
|
||
self.track_counts['max_concurrent_tracks'] = max(
|
||
self.track_counts['max_concurrent_tracks'],
|
||
len(self.tracker.tracked_tracks) + len(self.tracker.lost_tracks)
|
||
)
|
||
|
||
# FIXED: Clean up old removed tracks more aggressively to prevent memory issues
|
||
if self.track_counts['frames_processed'] % 50 == 0:
|
||
old_removed_count = len(self.tracker.removed_tracks)
|
||
# Only keep the last 30 removed tracks
|
||
self.tracker.removed_tracks = self.tracker.removed_tracks[-30:] if len(self.tracker.removed_tracks) > 30 else []
|
||
print(f"[BYTETRACK] Memory cleanup: removed {old_removed_count - len(self.tracker.removed_tracks)} old tracks")
|
||
print(f"[BYTETRACK] Stats: Active={self.track_counts['current_active_tracks']}, " +
|
||
f"Lost={self.track_counts['current_lost_tracks']}, " +
|
||
f"Max concurrent={self.track_counts['max_concurrent_tracks']}")
|
||
|
||
# Make sure tracks are in a consistent dictionary format
|
||
standardized_tracks = []
|
||
for track in tracks:
|
||
if isinstance(track, dict):
|
||
# Track is already a dict, just ensure it has required fields
|
||
if 'id' not in track and 'track_id' in track:
|
||
track['id'] = track['track_id']
|
||
standardized_tracks.append(track)
|
||
else:
|
||
# Convert object to dict
|
||
try:
|
||
track_dict = {
|
||
'id': track.track_id if hasattr(track, 'track_id') else -1,
|
||
'bbox': track.bbox if hasattr(track, 'bbox') else [0, 0, 0, 0],
|
||
'confidence': track.confidence if hasattr(track, 'confidence') else 0.0,
|
||
'class_id': track.class_id if hasattr(track, 'class_id') else -1
|
||
}
|
||
standardized_tracks.append(track_dict)
|
||
except Exception as e:
|
||
print(f"[BYTETRACK ERROR] Error converting track to dict: {e}")
|
||
|
||
return standardized_tracks
|
||
except Exception as e:
|
||
print(f"[BYTETRACK ERROR] Error updating tracker: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
# Return empty tracks list as fallback
|
||
return []
|
||
|
||
def update_tracks(self, detections, frame=None):
|
||
"""
|
||
Alias for the update method to maintain compatibility with DeepSORT interface
|
||
|
||
Args:
|
||
detections: list of detection arrays in format [bbox_xywh, conf, class_id]
|
||
frame: BGR image
|
||
|
||
Returns:
|
||
list of objects with DeepSORT-compatible interface including is_confirmed() method
|
||
"""
|
||
# Convert from DeepSORT format to ByteTrack format
|
||
converted_dets = []
|
||
|
||
for det in detections:
|
||
try:
|
||
# Handle different detection formats
|
||
if isinstance(det, (list, tuple, np.ndarray)) and len(det) >= 2:
|
||
# DeepSORT format: [bbox_xywh, conf, class_id]
|
||
bbox_xywh, conf = det[:2]
|
||
class_id = det[2] if len(det) > 2 else -1
|
||
|
||
# Convert [x, y, w, h] to [x1, y1, x2, y2] with type validation
|
||
x, y, w, h = map(float, bbox_xywh)
|
||
conf = float(conf)
|
||
class_id = int(class_id) if isinstance(class_id, (int, float)) else -1
|
||
|
||
converted_dets.append({
|
||
'bbox': [x, y, x + w, y + h],
|
||
'confidence': conf,
|
||
'class_id': class_id
|
||
})
|
||
|
||
elif isinstance(det, dict):
|
||
# Newer format with bbox in dict
|
||
if 'bbox' in det:
|
||
bbox = det['bbox']
|
||
if len(bbox) == 4:
|
||
# Check if it's already in [x1, y1, x2, y2] format
|
||
if bbox[2] > bbox[0] and bbox[3] > bbox[1]:
|
||
# Already in [x1, y1, x2, y2] format
|
||
converted_dets.append(det.copy())
|
||
else:
|
||
# Assume it's [x, y, w, h] and convert
|
||
x, y, w, h = bbox
|
||
converted_det = det.copy()
|
||
converted_det['bbox'] = [x, y, x + w, y + h]
|
||
converted_dets.append(converted_det)
|
||
except Exception as e:
|
||
print(f"[BYTETRACK] Error converting detection format: {e}")
|
||
|
||
# Call the regular update method to get dictionary tracks
|
||
dict_tracks = self.update(converted_dets, frame)
|
||
|
||
if self.debug:
|
||
print(f"[BYTETRACK] Converting {len(dict_tracks)} dict tracks to DeepSORT-compatible objects")
|
||
|
||
# Create DeepSORT compatible track objects from dictionaries
|
||
ds_tracks = []
|
||
for track_data in dict_tracks:
|
||
ds_track = ByteTrackOutput(track_data)
|
||
ds_tracks.append(ds_track)
|
||
|
||
return ds_tracks
|
||
|
||
def reset(self):
|
||
"""
|
||
Reset the tracker to clean state, resetting all IDs and clearing tracks.
|
||
Call this when starting a new video or session.
|
||
"""
|
||
print("[BYTETRACK] Resetting tracker state - IDs will start from 1")
|
||
if hasattr(self, 'tracker') and self.tracker is not None:
|
||
# Reset the internal BYTETracker
|
||
self.tracker.tracked_tracks = []
|
||
self.tracker.lost_tracks = []
|
||
self.tracker.removed_tracks = []
|
||
self.tracker.frame_id = 0
|
||
self.tracker.track_id_count = 1 # FIXED: Start from 1 instead of 0
|
||
|
||
print("[BYTETRACK] Reset complete - track ID counter reset to 1")
|
||
else:
|
||
print("[BYTETRACK] Warning: Tracker not initialized, nothing to reset")
|
||
|
||
# Reset tracking statistics
|
||
self.track_counts = {
|
||
'frames_processed': 0,
|
||
'total_tracks_created': 0,
|
||
'max_concurrent_tracks': 0,
|
||
'current_active_tracks': 0,
|
||
'current_lost_tracks': 0
|
||
}
|
||
self.track_id_counter = {}
|
||
|
||
# Adapter class to make ByteTrack output compatible with DeepSORT output
|
||
class ByteTrackOutput:
|
||
def __init__(self, track_data):
|
||
self.track_id = track_data['id']
|
||
self.bbox = track_data['bbox'] # [x1, y1, x2, y2]
|
||
self.confidence = track_data['confidence']
|
||
self.class_id = track_data['class_id']
|
||
self._ltrb = self.bbox # Store bbox in LTRB format directly
|
||
|
||
def to_ltrb(self):
|
||
"""Return bbox in [left, top, right, bottom] format"""
|
||
return self._ltrb
|
||
|
||
def to_tlbr(self):
|
||
"""Return bbox in [top, left, bottom, right] format"""
|
||
# For ByteTrack, LTRB and TLBR are the same since we use [x1, y1, x2, y2]
|
||
return self._ltrb
|
||
|
||
def to_xyah(self):
|
||
"""Return bbox in [center_x, center_y, aspect_ratio, height] format"""
|
||
x1, y1, x2, y2 = self._ltrb
|
||
w, h = x2 - x1, y2 - y1
|
||
center_x = x1 + w / 2
|
||
center_y = y1 + h / 2
|
||
aspect_ratio = w / h if h > 0 else 1.0
|
||
return [center_x, center_y, aspect_ratio, h]
|
||
|
||
def is_confirmed(self):
|
||
"""Return True if track is confirmed"""
|
||
return True # ByteTrack only returns confirmed tracks
|