Files
Traffic-Intersection-Monito…/qt_app_pyside1/controllers/bytetrack_demo.py

1086 lines
45 KiB
Python
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# ByteTrack Integration Demo
# This script demonstrates how to use the ByteTrack implementation
# as a drop-in replacement for DeepSORT in your application
#
# ByteTrack is the preferred tracker with better performance and higher FPS
# This version demonstrates the improved tracking with real-time comparison
import sys
import os
import argparse
import cv2
import time
import numpy as np
from pathlib import Path
# Add the parent directory to path for imports
parent_dir = str(Path(__file__).resolve().parent.parent)
if parent_dir not in sys.path:
sys.path.append(parent_dir)
# Import both trackers for comparison
# from controllers.deepsort_tracker import DeepSortVehicleTracker # Deprecated
from controllers.bytetrack_tracker import ByteTrackVehicleTracker
def generate_mock_detections(num_objects=5, frame_shape=(1080, 1920, 3)):
"""Generate mock vehicle detections for testing"""
height, width = frame_shape[:2]
detections = []
for i in range(num_objects):
# Random box dimensions (vehicles are typically wider than tall)
w = np.random.randint(width // 10, width // 4)
h = np.random.randint(height // 10, height // 6)
# Random position
x1 = np.random.randint(0, width - w)
y1 = np.random.randint(0, height - h)
x2 = x1 + w
y2 = y1 + h
# Random confidence and class (2 for car, 7 for truck)
confidence = np.random.uniform(0.4, 0.95)
class_id = np.random.choice([2, 7])
detections.append({
'bbox': [float(x1), float(y1), float(x2), float(y2)],
'confidence': float(confidence),
'class_id': int(class_id)
})
return detections
def draw_tracks(frame, tracks, color=(0, 255, 0)):
"""Draw tracking results on frame"""
for track in tracks:
track_id = track['id']
bbox = track['bbox']
conf = track.get('confidence', 0)
x1, y1, x2, y2 = [int(b) for b in bbox]
# Draw bounding box
cv2.rectangle(frame, (x1, y1), (x2, y2), color, 2)
# Draw ID and confidence
text = f"ID:{track_id} {conf:.2f}"
cv2.putText(frame, text, (x1, y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
return frame
def main():
parser = argparse.ArgumentParser(description="ByteTrack vs DeepSORT comparison demo")
parser.add_argument("--video", type=str, default=None, help="Path to video file (default: camera)")
parser.add_argument("--tracker", type=str, default="bytetrack",
choices=["bytetrack", "deepsort", "both"],
help="Tracker to use: bytetrack (recommended), deepsort (legacy), or both")
parser.add_argument("--mock", action="store_true", help="Use mock detections instead of actual detector")
args = parser.parse_args()
# Initialize video source
if args.video:
cap = cv2.VideoCapture(args.video)
else:
cap = cv2.VideoCapture(0) # Use default camera
if not cap.isOpened():
print(f"Error: Could not open video source.")
return
# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS)
print(f"Video source: {width}x{height} @ {fps}fps")
# Initialize trackers based on choice
if args.tracker == "bytetrack" or args.tracker == "both":
bytetrack_tracker = ByteTrackVehicleTracker()
if args.tracker == "deepsort" or args.tracker == "both":
print("⚠️ DeepSORT tracker is deprecated, using ByteTrack as fallback")
deepsort_tracker = ByteTrackVehicleTracker()
# Main processing loop
frame_count = 0
processing_times = {'bytetrack': [], 'deepsort': []}
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
print(f"\nProcessing frame {frame_count}")
# Generate or get detections
if args.mock:
detections = generate_mock_detections(num_objects=10, frame_shape=frame.shape)
print(f"Generated {len(detections)} mock detections")
else:
# In a real application, you would use your actual detector here
# This is just a placeholder for demo purposes
detections = generate_mock_detections(num_objects=10, frame_shape=frame.shape)
print(f"Generated {len(detections)} mock detections")
# Process with ByteTrack
if args.tracker == "bytetrack" or args.tracker == "both":
start_time = time.time()
bytetrack_results = bytetrack_tracker.update(detections, frame)
bt_time = time.time() - start_time
processing_times['bytetrack'].append(bt_time)
print(f"ByteTrack processing time: {bt_time:.4f}s")
if args.tracker == "bytetrack":
display_frame = draw_tracks(frame.copy(), bytetrack_results, color=(0, 255, 0))
# Process with DeepSORT
if args.tracker == "deepsort" or args.tracker == "both":
start_time = time.time()
try:
print(" Using ByteTrack (as DeepSORT replacement)")
deepsort_results = deepsort_tracker.update(detections, frame)
ds_time = time.time() - start_time
processing_times['deepsort'].append(ds_time)
print(f"DeepSORT processing time: {ds_time:.4f}s")
except Exception as e:
print(f"DeepSORT error: {e}")
deepsort_results = []
ds_time = 0
if args.tracker == "deepsort":
display_frame = draw_tracks(frame.copy(), deepsort_results, color=(0, 0, 255))
# If comparing both, create a side-by-side view
if args.tracker == "both":
# Draw tracks on separate frames
bt_frame = draw_tracks(frame.copy(), bytetrack_results, color=(0, 255, 0))
ds_frame = draw_tracks(frame.copy(), deepsort_results, color=(0, 0, 255))
# Resize if needed and create side-by-side view
h, w = frame.shape[:2]
display_frame = np.zeros((h, w*2, 3), dtype=np.uint8)
display_frame[:, :w] = bt_frame
display_frame[:, w:] = ds_frame
# Add labels
cv2.putText(display_frame, "ByteTrack", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
cv2.putText(display_frame, f"{len(bytetrack_results)} tracks", (10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.putText(display_frame, f"{bt_time:.4f}s", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
cv2.putText(display_frame, "DeepSORT", (w+10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
cv2.putText(display_frame, f"{len(deepsort_results)} tracks", (w+10, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
cv2.putText(display_frame, f"{ds_time:.4f}s", (w+10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)
# Show the frame
cv2.imshow("Tracking Demo", display_frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
# Print performance statistics
if len(processing_times['bytetrack']) > 0:
bt_avg = sum(processing_times['bytetrack']) / len(processing_times['bytetrack'])
print(f"ByteTrack average processing time: {bt_avg:.4f}s ({1/bt_avg:.2f} FPS)")
if len(processing_times['deepsort']) > 0:
ds_avg = sum(processing_times['deepsort']) / len(processing_times['deepsort'])
print(f"DeepSORT average processing time: {ds_avg:.4f}s ({1/ds_avg:.2f} FPS)")
if __name__ == "__main__":
main()
# ByteTrack implementation for vehicle tracking
# Efficient and robust multi-object tracking with improved association strategy
import numpy as np
import cv2
import time
from collections import defaultdict, deque
import torch
from typing import List, Dict, Any, Tuple, Optional
class BYTETracker:
"""
ByteTrack tracker implementation
Based on the paper: ByteTrack: Multi-Object Tracking by Associating Every Detection Box
"""
def __init__(
self,
track_thresh=0.5,
track_buffer=30,
match_thresh=0.8,
frame_rate=30,
track_high_thresh=0.6,
track_low_thresh=0.1,
camera_motion_compensation=False
):
self.tracked_tracks = [] # Active tracks being tracked
self.lost_tracks = [] # Lost tracks (temporarily out of view)
self.removed_tracks = [] # Removed tracks (permanently lost)
self.frame_id = 0
self.max_time_lost = int(frame_rate / 30.0 * track_buffer)
self.track_thresh = track_thresh # Threshold for high-confidence detections
self.track_high_thresh = track_high_thresh # Higher threshold for first association
self.track_low_thresh = track_low_thresh # Lower threshold for second association
self.match_thresh = match_thresh # IOU match threshold
self.track_id_count = 0
self.camera_motion_compensation = camera_motion_compensation
print(f"[BYTETRACK] Initialized with: high_thresh={track_high_thresh}, " +
f"low_thresh={track_low_thresh}, match_thresh={match_thresh}")
def update(self, detections, frame=None):
"""Update tracks with new detections
Args:
detections: list of dicts with keys ['bbox', 'confidence', 'class_id', ...]
frame: Optional BGR frame for debug visualization
Returns:
list of dicts with keys ['id', 'bbox', 'confidence', 'class_id', ...]
"""
self.frame_id += 1
# FIXED: Add more debug output
print(f"[BYTETRACK] Frame {self.frame_id}: Processing {len(detections)} detections")
print(f"[BYTETRACK] Current state: {len(self.tracked_tracks)} tracked, {len(self.lost_tracks)} lost")
# Convert detections to internal format
converted_detections = self._convert_detections(detections)
# Handle empty detections case
if len(converted_detections) == 0:
print(f"[BYTETRACK] No valid detections in frame {self.frame_id}")
# Update lost tracks and remove expired
new_tracked_tracks = []
new_lost_tracks = []
# All current tracks go to lost
for track in self.tracked_tracks:
track.is_lost = True
if self.frame_id - track.last_frame <= self.max_time_lost:
track.predict() # Predict new location
new_lost_tracks.append(track)
else:
self.removed_tracks.append(track)
# Update remaining lost tracks
for track in self.lost_tracks:
if self.frame_id - track.last_frame <= self.max_time_lost:
track.predict()
new_lost_tracks.append(track)
else:
self.removed_tracks.append(track)
self.tracked_tracks = new_tracked_tracks
self.lost_tracks = new_lost_tracks
print(f"[BYTETRACK] No detections: updated to {len(self.tracked_tracks)} tracked, {len(self.lost_tracks)} lost")
return []
# Split detections into high and low confidence - with safety checks
if len(converted_detections) > 0:
# FIXED: More robust confidence value handling
try:
# Make sure all values are numeric before comparison
confidence_values = converted_detections[:, 4].astype(float)
# Print the distribution of confidence values for debugging
if len(confidence_values) > 0:
print(f"[BYTETRACK] Confidence values: min={np.min(confidence_values):.2f}, " +
f"median={np.median(confidence_values):.2f}, max={np.max(confidence_values):.2f}")
high_dets = converted_detections[confidence_values >= self.track_high_thresh]
low_dets = converted_detections[(confidence_values >= self.track_low_thresh) &
(confidence_values < self.track_high_thresh)]
print(f"[BYTETRACK] Split into {len(high_dets)} high-conf and {len(low_dets)} low-conf detections")
except Exception as e:
print(f"[BYTETRACK] Error processing confidence values: {e}")
import traceback
traceback.print_exc()
# Fallback to empty arrays
high_dets = np.empty((0, 6))
low_dets = np.empty((0, 6))
else:
high_dets = np.empty((0, 6))
low_dets = np.empty((0, 6))
# Handle first frame special case
if self.frame_id == 1:
# Create new tracks for all high-confidence detections
for i in range(len(high_dets)):
det = high_dets[i]
new_track = Track(det, self.track_id_count)
new_track.last_frame = self.frame_id # CRITICAL: Set last_frame when creating track
self.track_id_count += 1
self.tracked_tracks.append(new_track)
# Also create tracks for lower confidence detections in first frame
# This helps with initial tracking when objects might not be clearly visible
for i in range(len(low_dets)):
det = low_dets[i]
new_track = Track(det, self.track_id_count)
new_track.last_frame = self.frame_id # CRITICAL: Set last_frame when creating track
self.track_id_count += 1
self.tracked_tracks.append(new_track)
print(f"[BYTETRACK] First frame: created {len(self.tracked_tracks)} new tracks")
return self._get_track_results()
# Get active and lost tracks
tracked_tlbrs = []
tracked_ids = []
for track in self.tracked_tracks:
tracked_tlbrs.append(track.tlbr)
tracked_ids.append(track.track_id)
tracked_tlbrs = np.array(tracked_tlbrs) if tracked_tlbrs else np.empty((0, 4))
tracked_ids = np.array(tracked_ids)
# First association: high confidence detections with tracked tracks
if len(tracked_tlbrs) > 0 and len(high_dets) > 0:
# Match active tracks to high confidence detections
matches, unmatched_tracks, unmatched_detections = self._match_tracks_to_detections(
tracked_tlbrs, high_dets[:, :4], self.match_thresh
)
print(f"[BYTETRACK MATCH] Found {len(matches)} matches between {len(tracked_tlbrs)} tracks and {len(high_dets)} detections")
# Update matched tracks with detections
for i_track, i_det in matches:
track_id = tracked_ids[i_track]
track = self._get_track_by_id(track_id, self.tracked_tracks)
if track:
track.update(high_dets[i_det])
track.last_frame = self.frame_id # FIXED: Update last_frame when track is matched
print(f"[BYTETRACK MATCH] Track ID={track_id} matched and updated")
# Move unmatched tracks to lost and rebuild tracked_tracks list
unmatched_track_ids = []
remaining_tracked_tracks = []
# Keep matched tracks in tracked_tracks
for i_track, _ in matches:
track_id = tracked_ids[i_track]
track = self._get_track_by_id(track_id, self.tracked_tracks)
if track:
remaining_tracked_tracks.append(track)
# Move unmatched tracks to lost
for i_track in unmatched_tracks:
track_id = tracked_ids[i_track]
track = self._get_track_by_id(track_id, self.tracked_tracks)
if track:
track.is_lost = True
track.last_frame = self.frame_id # FIXED: Update last_frame when track is lost
self.lost_tracks.append(track)
unmatched_track_ids.append(track_id)
# FIXED: Update tracked_tracks to only contain matched tracks
self.tracked_tracks = remaining_tracked_tracks
if unmatched_track_ids:
print(f"[BYTETRACK MATCH] Lost tracks: {unmatched_track_ids}")
# Create new tracks for unmatched high-confidence detections
new_track_ids = []
for i_det in unmatched_detections:
det = high_dets[i_det]
new_track = Track(det, self.track_id_count)
new_track.last_frame = self.frame_id # FIXED: Set last_frame when creating track
new_track_ids.append(self.track_id_count)
self.track_id_count += 1
self.tracked_tracks.append(new_track)
if new_track_ids:
print(f"[BYTETRACK MATCH] Created new tracks: {new_track_ids}")
print(f"[BYTETRACK] Matched {len(matches)} tracks, {len(unmatched_tracks)} unmatched tracks, " +
f"{len(unmatched_detections)} new tracks")
else:
# No tracked tracks or no high confidence detections
# Move all current tracks to lost
for track in self.tracked_tracks:
track.is_lost = True
track.last_frame = self.frame_id # FIXED: Update last_frame when track is lost
self.lost_tracks.append(track)
# Create new tracks for all high-confidence detections
for i in range(len(high_dets)):
det = high_dets[i]
new_track = Track(det, self.track_id_count)
new_track.last_frame = self.frame_id # FIXED: Set last_frame when creating track
self.track_id_count += 1
self.tracked_tracks.append(new_track)
print(f"[BYTETRACK] No active tracks or high-conf dets: {len(self.tracked_tracks)} new tracks, " +
f"{len(self.lost_tracks)} lost tracks")
# Remove lost tracks from tracked_tracks
self.tracked_tracks = [t for t in self.tracked_tracks if not t.is_lost]
# Second association: low confidence detections with lost tracks
lost_tlbrs = []
lost_ids = []
for track in self.lost_tracks:
lost_tlbrs.append(track.tlbr)
lost_ids.append(track.track_id)
lost_tlbrs = np.array(lost_tlbrs) if lost_tlbrs else np.empty((0, 4))
lost_ids = np.array(lost_ids)
if len(lost_tlbrs) > 0 and len(low_dets) > 0:
# Match lost tracks to low confidence detections
matches, _, _ = self._match_tracks_to_detections(
lost_tlbrs, low_dets[:, :4], self.match_thresh
)
# Recover matched lost tracks
recovered_tracks = []
for i_track, i_det in matches:
track_id = lost_ids[i_track]
track = self._get_track_by_id(track_id, self.lost_tracks)
if track:
track.is_lost = False
track.update(low_dets[i_det])
track.last_frame = self.frame_id # FIXED: Update last_frame on recovery
recovered_tracks.append(track)
# Add recovered tracks back to tracked_tracks
self.tracked_tracks.extend(recovered_tracks)
# Remove recovered tracks from lost_tracks
recovered_ids = [t.track_id for t in recovered_tracks]
self.lost_tracks = [t for t in self.lost_tracks if t.track_id not in recovered_ids]
print(f"[BYTETRACK] Recovered {len(recovered_tracks)} lost tracks with low-conf detections")
# Update remaining lost tracks
new_lost_tracks = []
expired_count = 0
# FIXED: Sort lost tracks by confidence score - keep higher quality tracks longer
# This prevents memory issues by limiting total number of lost tracks
sorted_lost_tracks = sorted(self.lost_tracks, key=lambda x: x.score, reverse=True)
# FIXED: Only keep top MAX_LOST_TRACKS lost tracks
MAX_LOST_TRACKS = 30 # Maximum number of lost tracks to keep
sorted_lost_tracks = sorted_lost_tracks[:MAX_LOST_TRACKS]
for track in sorted_lost_tracks:
track.predict() # Predict new location even when lost
# FIXED: Calculate elapsed frames since last detection
time_since_detection = self.frame_id - track.last_frame
# Keep track if within time buffer, otherwise remove
if time_since_detection <= self.max_time_lost:
new_lost_tracks.append(track)
else:
self.removed_tracks.append(track)
expired_count += 1
# Calculate how many tracks were removed due to confidence threshold
dropped_by_limit = len(self.lost_tracks) - len(sorted_lost_tracks)
self.lost_tracks = new_lost_tracks
print(f"[BYTETRACK] Final state: {len(self.tracked_tracks)} tracked, " +
f"{len(self.lost_tracks)} lost, {expired_count} expired, {dropped_by_limit} dropped by limit")
# Return final track results
return self._get_track_results()
def _get_track_by_id(self, track_id, track_list):
"""Helper to find a track by ID in a list"""
for track in track_list:
if track.track_id == track_id:
return track
return None
def _get_track_results(self):
"""Format track results as dicts for return value"""
results = []
for track in self.tracked_tracks:
if track.hits >= 1: # FIXED: Much more lenient confirmation threshold (was 3, then 2)
tlbr = track.tlbr
track_id = track.track_id
score = track.score
class_id = track.class_id
# FIXED: Better error checking for bbox values
try:
x1, y1, x2, y2 = map(float, tlbr)
# FIXED: Ensure values are valid
if not all(np.isfinite([x1, y1, x2, y2])):
print(f"[BYTETRACK WARNING] Track {track_id} has invalid bbox: {tlbr}")
continue
# FIXED: Make sure width and height are positive
if x2 <= x1 or y2 <= y1:
print(f"[BYTETRACK WARNING] Track {track_id} has invalid bbox dimensions: {tlbr}")
continue
results.append({
'id': track_id,
'bbox': [float(x1), float(y1), float(x2), float(y2)],
'confidence': float(score),
'class_id': int(class_id),
'state': 'tracked'
})
except Exception as e:
print(f"[BYTETRACK ERROR] Failed to process track {track_id}: {e}")
print(f"[BYTETRACK] Returning {len(results)} confirmed tracks")
return results
def _convert_detections(self, detections):
"""Convert detection dictionaries to numpy array format
Format: [x1, y1, x2, y2, score, class_id]
"""
if not detections:
return np.empty((0, 6))
result = []
for det in detections:
bbox = det.get('bbox')
conf = det.get('confidence', 0.0)
class_id = det.get('class_id', -1)
# Make sure we have numeric values
try:
if bbox is not None and len(bbox) == 4:
# FIXED: Explicitly convert to float32 for ByteTrack
x1, y1, x2, y2 = map(np.float32, bbox)
conf = np.float32(conf)
class_id = int(class_id) if isinstance(class_id, (int, float)) else -1
# Validate bbox dimensions
if x2 > x1 and y2 > y1 and conf > 0:
result.append([x1, y1, x2, y2, conf, class_id])
except (ValueError, TypeError) as e:
print(f"[BYTETRACK] Error converting detection: {e}")
# FIXED: Explicitly convert to float32 array
return np.array(result, dtype=np.float32) if result else np.empty((0, 6), dtype=np.float32)
def _match_tracks_to_detections(self, tracks_tlbr, dets_tlbr, threshold):
"""
Match tracks to detections using IoU
Args:
tracks_tlbr: Track boxes [x1, y1, x2, y2]
dets_tlbr: Detection boxes [x1, y1, x2, y2]
threshold: IoU threshold
Returns:
(matches, unmatched_tracks, unmatched_detections)
"""
if len(tracks_tlbr) == 0 or len(dets_tlbr) == 0:
return [], list(range(len(tracks_tlbr))), list(range(len(dets_tlbr)))
iou_matrix = self._iou_batch(tracks_tlbr, dets_tlbr)
# Use Hungarian algorithm for optimal assignment
matched_indices = self._linear_assignment(-iou_matrix, threshold)
unmatched_tracks = []
for i in range(len(tracks_tlbr)):
if i not in matched_indices[:, 0]:
unmatched_tracks.append(i)
unmatched_detections = []
for i in range(len(dets_tlbr)):
if i not in matched_indices[:, 1]:
unmatched_detections.append(i)
matches = []
for i, j in matched_indices:
if iou_matrix[i, j] < threshold:
unmatched_tracks.append(i)
unmatched_detections.append(j)
else:
matches.append((i, j))
return matches, unmatched_tracks, unmatched_detections
def _iou_batch(self, boxes1, boxes2):
"""
Calculate IoU between all pairs of boxes
Args:
boxes1: (N, 4) [x1, y1, x2, y2]
boxes2: (M, 4) [x1, y1, x2, y2]
Returns:
IoU matrix (N, M)
"""
area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
lt = np.maximum(boxes1[:, None, :2], boxes2[:, :2]) # (N,M,2)
rb = np.minimum(boxes1[:, None, 2:], boxes2[:, 2:]) # (N,M,2)
wh = np.clip(rb - lt, 0, None) # (N,M,2)
inter = wh[:, :, 0] * wh[:, :, 1] # (N,M)
union = area1[:, None] + area2 - inter
iou = inter / (union + 1e-10)
return iou
def _linear_assignment(self, cost_matrix, threshold):
"""
Improved greedy assignment implementation
For each detection, find the track with highest IoU above threshold
"""
if cost_matrix.size == 0:
return np.empty((0, 2), dtype=int)
matches = []
# Sort costs in descending order
flat_indices = np.argsort(cost_matrix.flatten())[::-1]
cost_values = cost_matrix.flatten()[flat_indices]
# Get row and col indices
row_indices = flat_indices // cost_matrix.shape[1]
col_indices = flat_indices % cost_matrix.shape[1]
# Keep track of assigned rows and columns
assigned_rows = set()
assigned_cols = set()
# Iterate through sorted indices
for i in range(len(row_indices)):
row, col = row_indices[i], col_indices[i]
cost = cost_values[i]
# If cost is below threshold, continue checking but apply a decay
# This helps with low FPS scenarios where IoU might be lower
if cost < threshold:
# Calculate dynamic threshold based on position in list
position_ratio = 1.0 - (i / len(row_indices))
dynamic_threshold = threshold * 0.7 * position_ratio
if cost < dynamic_threshold:
continue
# If row or col already assigned, skip
if row in assigned_rows or col in assigned_cols:
continue
# Add match
matches.append((row, col))
assigned_rows.add(row)
assigned_cols.add(col)
return np.array(matches) if matches else np.empty((0, 2), dtype=int)
class Track:
"""Track class for ByteTracker"""
def __init__(self, detection, track_id):
"""Initialize a track from a detection
Args:
detection: Detection array [x1, y1, x2, y2, score, class_id]
track_id: Unique track ID
"""
self.track_id = track_id
self.tlbr = detection[:4] # [x1, y1, x2, y2]
self.score = detection[4]
self.class_id = int(detection[5])
self.time_since_update = 0
self.hits = 1 # Number of times track was matched to a detection
self.age = 1
self.last_frame = 0 # Will be set by the tracker during update
self.is_lost = False # Flag to indicate if track is lost
# For Kalman filter
self.kf = None
self.mean = None
self.covariance = None
# Keep track of last 30 positions for smoother trajectories
self.history = []
self._init_kalman_filter()
def _init_kalman_filter(self):
"""Initialize simple Kalman filter for position and velocity prediction
State: [x, y, w, h, vx, vy, vw, vh]
"""
# Simplified KF implementation
self.mean = np.zeros(8)
x1, y1, x2, y2 = self.tlbr
w, h = x2 - x1, y2 - y1
cx, cy = x1 + w/2, y1 + h/2
# Initialize state
self.mean[:4] = [cx, cy, w, h]
# Initialize covariance matrix
self.covariance = np.eye(8) * 10
def predict(self):
"""Predict next state using constant velocity model"""
# Simple constant velocity prediction
dt = 1.0
# Transition matrix for constant velocity model
F = np.eye(8)
F[0, 4] = dt
F[1, 5] = dt
F[2, 6] = dt
F[3, 7] = dt
# Predict next state
self.mean = F @ self.mean
# Update covariance (simplified)
Q = np.eye(8) * 0.01 # Process noise
self.covariance = F @ self.covariance @ F.T + Q
# Convert state back to bbox
cx, cy, w, h = self.mean[:4]
self.tlbr = np.array([cx - w/2, cy - h/2, cx + w/2, cy + h/2])
self.age += 1
self.time_since_update += 1
def update(self, detection):
"""Update track with new detection
Args:
detection: Detection array [x1, y1, x2, y2, score, class_id]
"""
x1, y1, x2, y2 = detection[:4]
self.tlbr = detection[:4]
# Update score with EMA
alpha = 0.9
self.score = alpha * self.score + (1 - alpha) * detection[4]
# Update state (simplified Kalman update)
w, h = x2 - x1, y2 - y1
cx, cy = x1 + w/2, y1 + h/2
# Measurement
z = np.array([cx, cy, w, h])
# Kalman gain (simplified)
H = np.zeros((4, 8))
H[:4, :4] = np.eye(4)
# Measurement covariance (higher = less trust in measurement)
R = np.eye(4) * (1.0 / self.score)
# Kalman update equations (simplified)
y = z - H @ self.mean
S = H @ self.covariance @ H.T + R
K = self.covariance @ H.T @ np.linalg.inv(S)
self.mean = self.mean + K @ y
self.covariance = (np.eye(8) - K @ H) @ self.covariance
# Convert back to bbox
cx, cy, w, h = self.mean[:4]
self.tlbr = np.array([cx - w/2, cy - h/2, cx + w/2, cy + h/2])
# Update history
self.history.append(self.tlbr.copy())
if len(self.history) > 30:
self.history = self.history[-30:]
# FIXED: Reset time since update counter and increment hits
self.time_since_update = 0
self.hits += 1
self.is_lost = False # FIXED: Ensure track is marked as not lost when updated
class ByteTrackVehicleTracker:
"""
ByteTrack-based vehicle tracker with same API as DeepSortVehicleTracker
for drop-in replacement with improved performance
"""
_instance = None
def __new__(cls, *args, **kwargs):
if cls._instance is None:
print("[BYTETRACK SINGLETON] Creating ByteTrackVehicleTracker instance")
cls._instance = super(ByteTrackVehicleTracker, cls).__new__(cls)
cls._instance._initialized = False
return cls._instance
def __init__(self):
if getattr(self, '_initialized', False):
return
print("[BYTETRACK INIT] Initializing ByteTrack tracker (should only see this once)")
# Parameters tuned for vehicle tracking in traffic scenes with low FPS
# FIXED: Much more lenient parameters for consistent vehicle tracking
self.tracker = BYTETracker(
track_thresh=0.2, # FIXED: Even lower threshold for better tracking continuity
track_buffer=60, # FIXED: Keep tracks alive longer (60 frames = 4-6 seconds at 10 FPS)
match_thresh=0.4, # FIXED: Much more lenient IoU threshold for matching
track_high_thresh=0.25, # FIXED: Lower high confidence threshold
track_low_thresh=0.05, # FIXED: Very low threshold for second-chance matching
frame_rate=10 # FIXED: Match actual video FPS (~7-10)
)
self._initialized = True
self.track_id_counter = {} # Track seen IDs
self.debug = True # Enable debug output
# Track count tracking for debugging
self.track_counts = {
'frames_processed': 0,
'total_tracks_created': 0,
'max_concurrent_tracks': 0,
'current_active_tracks': 0,
'current_lost_tracks': 0
}
def update(self, detections, frame=None):
"""
Update tracker with new detections
Args:
detections: list of dicts with keys ['bbox', 'confidence', 'class_id', ...]
frame: BGR image (optional, used for visualization but not required for ByteTrack)
Returns:
list of dicts with keys ['id', 'bbox', 'confidence', 'class_id', ...]
"""
# FIXED: Add safety check for track ID counter
if hasattr(self.tracker, 'track_id_count') and self.tracker.track_id_count > 10000:
print(f"[BYTETRACK WARNING] Track ID counter extremely high ({self.tracker.track_id_count}). Resetting to 0.")
self.tracker.track_id_count = 0
# Convert detections to ByteTrack format with validation
valid_dets = []
for i, det in enumerate(detections):
bbox = det.get('bbox')
conf = det.get('confidence', 0.0)
class_id = det.get('class_id', -1)
if bbox is not None and len(bbox) == 4:
try:
# FIXED: Ensure all values are explicitly converted to float32 for consistent tracking
x1, y1, x2, y2 = map(np.float32, bbox)
conf = np.float32(conf)
class_id = int(class_id) if isinstance(class_id, (int, float)) else -1
# Validate bbox dimensions
if x2 > x1 and y2 > y1 and conf > 0.05: # FIXED: Lower threshold for ByteTrack
# Create a new det with verified types
valid_det = {
'bbox': [x1, y1, x2, y2], # Already converted to float32 above
'confidence': conf,
'class_id': class_id
}
valid_dets.append(valid_det)
if self.debug and i % 5 == 0: # Only print every 5th detection to reduce log spam
print(f"[BYTETRACK] Added detection {i}: bbox={[x1, y1, x2, y2]}, conf={conf:.2f}")
else:
if self.debug:
print(f"[BYTETRACK] Rejected detection {i}: invalid bbox dimensions or very low confidence")
except Exception as e:
if self.debug:
print(f"[BYTETRACK] Error processing detection {i}: {e}")
else:
if self.debug:
print(f"[BYTETRACK] Rejected detection {i}: invalid bbox format")
if self.debug:
print(f"[BYTETRACK] Processing {len(valid_dets)} valid detections")
try:
# Use try/except to catch any errors in the tracker update
tracks = self.tracker.update(valid_dets, frame)
# Update track statistics
self.track_counts['frames_processed'] += 1
self.track_counts['current_active_tracks'] = len(self.tracker.tracked_tracks)
self.track_counts['current_lost_tracks'] = len(self.tracker.lost_tracks)
self.track_counts['max_concurrent_tracks'] = max(
self.track_counts['max_concurrent_tracks'],
len(self.tracker.tracked_tracks) + len(self.tracker.lost_tracks)
)
# FIXED: Clean up old removed tracks more aggressively to prevent memory issues
if self.track_counts['frames_processed'] % 50 == 0:
old_removed_count = len(self.tracker.removed_tracks)
# Only keep the last 30 removed tracks
self.tracker.removed_tracks = self.tracker.removed_tracks[-30:] if len(self.tracker.removed_tracks) > 30 else []
print(f"[BYTETRACK] Memory cleanup: removed {old_removed_count - len(self.tracker.removed_tracks)} old tracks")
print(f"[BYTETRACK] Stats: Active={self.track_counts['current_active_tracks']}, " +
f"Lost={self.track_counts['current_lost_tracks']}, " +
f"Max concurrent={self.track_counts['max_concurrent_tracks']}")
# Make sure tracks are in a consistent dictionary format
standardized_tracks = []
for track in tracks:
if isinstance(track, dict):
# Track is already a dict, just ensure it has required fields
if 'id' not in track and 'track_id' in track:
track['id'] = track['track_id']
standardized_tracks.append(track)
else:
# Convert object to dict
try:
track_dict = {
'id': track.track_id if hasattr(track, 'track_id') else -1,
'bbox': track.bbox if hasattr(track, 'bbox') else [0, 0, 0, 0],
'confidence': track.confidence if hasattr(track, 'confidence') else 0.0,
'class_id': track.class_id if hasattr(track, 'class_id') else -1
}
standardized_tracks.append(track_dict)
except Exception as e:
print(f"[BYTETRACK ERROR] Error converting track to dict: {e}")
return standardized_tracks
except Exception as e:
print(f"[BYTETRACK ERROR] Error updating tracker: {e}")
import traceback
traceback.print_exc()
# Return empty tracks list as fallback
return []
def update_tracks(self, detections, frame=None):
"""
Alias for the update method to maintain compatibility with DeepSORT interface
Args:
detections: list of detection arrays in format [bbox_xywh, conf, class_id]
frame: BGR image
Returns:
list of objects with DeepSORT-compatible interface including is_confirmed() method
"""
# Convert from DeepSORT format to ByteTrack format
converted_dets = []
for det in detections:
try:
# Handle different detection formats
if isinstance(det, (list, tuple, np.ndarray)) and len(det) >= 2:
# DeepSORT format: [bbox_xywh, conf, class_id]
bbox_xywh, conf = det[:2]
class_id = det[2] if len(det) > 2 else -1
# Convert [x, y, w, h] to [x1, y1, x2, y2] with type validation
x, y, w, h = map(float, bbox_xywh)
conf = float(conf)
class_id = int(class_id) if isinstance(class_id, (int, float)) else -1
converted_dets.append({
'bbox': [x, y, x + w, y + h],
'confidence': conf,
'class_id': class_id
})
elif isinstance(det, dict):
# Newer format with bbox in dict
if 'bbox' in det:
bbox = det['bbox']
if len(bbox) == 4:
# Check if it's already in [x1, y1, x2, y2] format
if bbox[2] > bbox[0] and bbox[3] > bbox[1]:
# Already in [x1, y1, x2, y2] format
converted_dets.append(det.copy())
else:
# Assume it's [x, y, w, h] and convert
x, y, w, h = bbox
converted_det = det.copy()
converted_det['bbox'] = [x, y, x + w, y + h]
converted_dets.append(converted_det)
except Exception as e:
print(f"[BYTETRACK] Error converting detection format: {e}")
# Call the regular update method to get dictionary tracks
dict_tracks = self.update(converted_dets, frame)
if self.debug:
print(f"[BYTETRACK] Converting {len(dict_tracks)} dict tracks to DeepSORT-compatible objects")
# Create DeepSORT compatible track objects from dictionaries
ds_tracks = []
for track_data in dict_tracks:
ds_track = ByteTrackOutput(track_data)
ds_tracks.append(ds_track)
return ds_tracks
def reset(self):
"""
Reset the tracker to clean state, resetting all IDs and clearing tracks.
Call this when starting a new video or session.
"""
print("[BYTETRACK] Resetting tracker state - IDs will start from 1")
if hasattr(self, 'tracker') and self.tracker is not None:
# Reset the internal BYTETracker
self.tracker.tracked_tracks = []
self.tracker.lost_tracks = []
self.tracker.removed_tracks = []
self.tracker.frame_id = 0
self.tracker.track_id_count = 1 # FIXED: Start from 1 instead of 0
print("[BYTETRACK] Reset complete - track ID counter reset to 1")
else:
print("[BYTETRACK] Warning: Tracker not initialized, nothing to reset")
# Reset tracking statistics
self.track_counts = {
'frames_processed': 0,
'total_tracks_created': 0,
'max_concurrent_tracks': 0,
'current_active_tracks': 0,
'current_lost_tracks': 0
}
self.track_id_counter = {}
# Adapter class to make ByteTrack output compatible with DeepSORT output
class ByteTrackOutput:
def __init__(self, track_data):
self.track_id = track_data['id']
self.bbox = track_data['bbox'] # [x1, y1, x2, y2]
self.confidence = track_data['confidence']
self.class_id = track_data['class_id']
self._ltrb = self.bbox # Store bbox in LTRB format directly
def to_ltrb(self):
"""Return bbox in [left, top, right, bottom] format"""
return self._ltrb
def to_tlbr(self):
"""Return bbox in [top, left, bottom, right] format"""
# For ByteTrack, LTRB and TLBR are the same since we use [x1, y1, x2, y2]
return self._ltrb
def to_xyah(self):
"""Return bbox in [center_x, center_y, aspect_ratio, height] format"""
x1, y1, x2, y2 = self._ltrb
w, h = x2 - x1, y2 - y1
center_x = x1 + w / 2
center_y = y1 + h / 2
aspect_ratio = w / h if h > 0 else 1.0
return [center_x, center_y, aspect_ratio, h]
def is_confirmed(self):
"""Return True if track is confirmed"""
return True # ByteTrack only returns confirmed tracks