You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
484 lines
18 KiB
484 lines
18 KiB
#!/usr/bin/env python
|
|
|
|
# Copyright (c) 2019 Computer Vision Center (CVC) at the Universitat Autonoma de
|
|
# Barcelona (UAB).
|
|
#
|
|
# This work is licensed under the terms of the MIT license.
|
|
# For a copy, see <https://opensource.org/licenses/MIT>.
|
|
|
|
# Generates 2D and 3D bounding boxes for a simulation and can save them as JSON
|
|
# Instructions:
|
|
|
|
"""
|
|
Welcome to CARLA bounding boxes.
|
|
R : toggle recording images and bounding boxes
|
|
3 : visualize bounding boxes in 3D
|
|
2 : vizualize bounding boxes in 2D
|
|
ESC : quit
|
|
"""
|
|
|
|
import carla
|
|
import json
|
|
import random
|
|
import queue
|
|
import pygame
|
|
import argparse
|
|
import numpy as np
|
|
from math import radians
|
|
|
|
from pygame.locals import K_ESCAPE
|
|
from pygame.locals import K_2
|
|
from pygame.locals import K_3
|
|
from pygame.locals import K_r
|
|
|
|
# Bounding box edge topology order
|
|
EDGES = [[0,1], [1,3], [3,2], [2,0], [0,4], [4,5], [5,1], [5,7], [7,6], [6,4], [6,2], [7,3]]
|
|
|
|
# Map for CARLA semantic labels to class names and colors
|
|
SEMANTIC_MAP = {0: ('unlabelled', (0,0,0)), 1: ('road', (128,64,0)),2: ('sidewalk', (244,35,232)),
|
|
3: ('building', (70,70,70)), 4: ('wall', (102,102,156)), 5: ('fence', (190,153,153)),
|
|
6: ('pole', (153,153,153)), 7: ('traffic light', (250,170,30)),
|
|
8: ('traffic sign', (220,220,0)), 9: ('vegetation', (107,142,35)),
|
|
10: ('terrain', (152,251,152)), 11: ('sky', (70,130,180)),
|
|
12: ('pedestrian', (220,20,60)), 13: ('rider', (255,0,0)),
|
|
14: ('car', (0,0,142)), 15: ('truck', (0,0,70)), 16: ('bus', (0,60,100)),
|
|
17: ('train', (0,80,100)), 18: ('motorcycle', (0,0,230)),
|
|
19: ('bicycle', (119,11,32)), 20: ('static', (110,190,160)),
|
|
21: ('dynamic', (170,120,50)), 22: ('other', (55,90,80)),
|
|
23: ('water', (45,60,150)), 24: ('road line', (157,234,50)),
|
|
25: ('ground', (81,0,81)), 26: ('bridge', (150,100,100)),
|
|
27: ('rail track', (230,150,140)), 28: ('guard rail', (180,165,180))}
|
|
|
|
# Calculate the camera projection matrix
|
|
def build_projection_matrix(w, h, fov, is_behind_camera=False):
|
|
focal = w / (2.0 * np.tan(fov * np.pi / 360.0))
|
|
K = np.identity(3)
|
|
|
|
if is_behind_camera:
|
|
K[0, 0] = K[1, 1] = -focal
|
|
else:
|
|
K[0, 0] = K[1, 1] = focal
|
|
|
|
K[0, 2] = w / 2.0
|
|
K[1, 2] = h / 2.0
|
|
return K
|
|
|
|
# Calculate 2D projection of 3D coordinate
|
|
def get_image_point(loc, K, w2c):
|
|
|
|
# Format the input coordinate (loc is a carla.Position object)
|
|
point = np.array([loc.x, loc.y, loc.z, 1])
|
|
# transform to camera coordinates
|
|
point_camera = np.dot(w2c, point)
|
|
|
|
# New we must change from UE4's coordinate system to an "standard"
|
|
# (x, y ,z) -> (y, -z, x)
|
|
# and we remove the fourth componebonent also
|
|
point_camera = [point_camera[1], -point_camera[2], point_camera[0]]
|
|
|
|
# now project 3D->2D using the camera matrix
|
|
point_img = np.dot(K, point_camera)
|
|
# normalize
|
|
point_img[0] /= point_img[2]
|
|
point_img[1] /= point_img[2]
|
|
|
|
return point_img[0:2]
|
|
|
|
# Verify that the point is within the image plane
|
|
def point_in_canvas(pos, img_h, img_w):
|
|
"""Return true if point is in canvas"""
|
|
if (pos[0] >= 0) and (pos[0] < img_w) and (pos[1] >= 0) and (pos[1] < img_h):
|
|
return True
|
|
return False
|
|
|
|
# Decode the instance segmentation map into semantic labels and actor IDs
|
|
def decode_instance_segmentation(img_rgba: np.ndarray):
|
|
semantic_labels = img_rgba[..., 2] # R channel
|
|
actor_ids = img_rgba[..., 1].astype(np.uint16) + (img_rgba[..., 0].astype(np.uint16) << 8)
|
|
return semantic_labels, actor_ids
|
|
|
|
# Generate a 2D bounding box for an actor from the actor ID image
|
|
def bbox_2d_for_actor(actor, actor_ids: np.ndarray, semantic_labels: np.ndarray):
|
|
mask = (actor_ids == actor.id)
|
|
if not np.any(mask):
|
|
return None # actor not present
|
|
ys, xs = np.where(mask)
|
|
xmin, xmax = xs.min(), xs.max()
|
|
ymin, ymax = ys.min(), ys.max()
|
|
return {'actor_id': actor.id,
|
|
'semantic_label': actor.semantic_tags[0],
|
|
'bbox_2d': (xmin, ymin, xmax, ymax)}
|
|
|
|
# Generate a 3D bounding box for an actor from the simulation
|
|
def bbox_3d_for_actor(actor, ego, camera_bp, camera):
|
|
|
|
# Get the world to camera matrix
|
|
world_2_camera = np.array(camera.get_transform().get_inverse_matrix())
|
|
|
|
# Get the attributes from the camera
|
|
image_w = camera_bp.get_attribute("image_size_x").as_int()
|
|
image_h = camera_bp.get_attribute("image_size_y").as_int()
|
|
fov = camera_bp.get_attribute("fov").as_float()
|
|
|
|
# Calculate the camera projection matrix to project from 3D -> 2D
|
|
K = build_projection_matrix(image_w, image_h, fov)
|
|
K_b = build_projection_matrix(image_w, image_h, fov, is_behind_camera=True)
|
|
|
|
ego_bbox_loc = ego.get_transform().location + ego.bounding_box.location
|
|
ego_bbox_transform = carla.Transform(ego_bbox_loc, ego.get_transform().rotation)
|
|
|
|
npc_bbox_loc = actor.get_transform().location + actor.bounding_box.location
|
|
#npc_bbox_transform = carla.Transform(npc_bbox_loc, actor.get_transform().rotation)
|
|
|
|
npc_loc_ego_space = ego_bbox_transform.inverse_transform(npc_bbox_loc)
|
|
|
|
verts = [v for v in actor.bounding_box.get_world_vertices(actor.get_transform())]
|
|
|
|
projection = []
|
|
for edge in EDGES:
|
|
p1 = get_image_point(verts[edge[0]], K, world_2_camera)
|
|
p2 = get_image_point(verts[edge[1]], K, world_2_camera)
|
|
|
|
p1_in_canvas = point_in_canvas(p1, image_h, image_w)
|
|
p2_in_canvas = point_in_canvas(p2, image_h, image_w)
|
|
|
|
if not p1_in_canvas and not p2_in_canvas:
|
|
continue
|
|
|
|
ray0 = verts[edge[0]] - camera.get_transform().location
|
|
ray1 = verts[edge[1]] - camera.get_transform().location
|
|
cam_forward_vec = camera.get_transform().get_forward_vector()
|
|
|
|
# One of the vertexes is behind the camera
|
|
if not (cam_forward_vec.dot(ray0) > 0):
|
|
p1 = get_image_point(verts[edge[0]], K_b, world_2_camera)
|
|
if not (cam_forward_vec.dot(ray1) > 0):
|
|
p2 = get_image_point(verts[edge[1]], K_b, world_2_camera)
|
|
|
|
projection.append((int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])))
|
|
|
|
return {'actor_id': actor.id,
|
|
'semantic_label': actor.semantic_tags[0],
|
|
'bbox_3d': {
|
|
'center': {
|
|
'x': npc_loc_ego_space.x,
|
|
'y': npc_loc_ego_space.y,
|
|
'z': npc_loc_ego_space.z
|
|
},
|
|
'dimensions': {
|
|
'length': actor.bounding_box.extent.x*2,
|
|
'width': actor.bounding_box.extent.y*2,
|
|
'height': actor.bounding_box.extent.z*2,
|
|
},
|
|
'rotation_yaw': radians(actor.get_transform().rotation.yaw - ego.get_transform().rotation.yaw)
|
|
},
|
|
'projection': projection
|
|
}
|
|
|
|
# Visualize 2D bounding boxes in Pygame
|
|
def visualize_2d_bboxes(surface, img, bboxes):
|
|
|
|
rgb_img = img[:, :, :3][:, :, ::-1]
|
|
frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2)))
|
|
surface.blit(frame_surface, (0, 0))
|
|
|
|
font = pygame.font.SysFont("Arial", 18)
|
|
|
|
for item in bboxes:
|
|
bbox = item['2d']
|
|
if bbox is not None:
|
|
xmin, ymin, xmax, ymax = [int(v) for v in bbox['bbox_2d']]
|
|
label = SEMANTIC_MAP[bbox['semantic_label']][0]
|
|
color = SEMANTIC_MAP[bbox['semantic_label']][1]
|
|
pygame.draw.rect(surface, color, pygame.Rect(xmin, ymin, xmax-xmin, ymax-ymin), 2)
|
|
text_surface = font.render(label, True, (255,255,255), color)
|
|
text_rect = text_surface.get_rect(topleft=(xmin, ymin-20))
|
|
surface.blit(text_surface, text_rect)
|
|
|
|
return surface
|
|
|
|
# Visualize 3D bounding boxes in Pygame
|
|
def visualize_3d_bboxes(surface, img, bboxes):
|
|
|
|
rgb_img = img[:, :, :3][:, :, ::-1]
|
|
frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2)))
|
|
surface.blit(frame_surface, (0, 0))
|
|
|
|
for item in bboxes:
|
|
bbox = item['3d']
|
|
color = SEMANTIC_MAP[bbox['semantic_label']][1]
|
|
|
|
n = 0
|
|
mean_x = 0
|
|
mean_y = 0
|
|
for line in bbox['projection']:
|
|
mean_x += line[0]
|
|
mean_y += line[1]
|
|
n += 1
|
|
pygame.draw.line(surface, color, (line[0], line[1]), (line[2],line[3]), 2)
|
|
|
|
if n > 0:
|
|
mean_x /= n
|
|
mean_y /= n
|
|
|
|
# --- Render label ---
|
|
font = pygame.font.SysFont("Arial", 18)
|
|
text_surface = font.render(SEMANTIC_MAP[bbox['semantic_label']][0], True, (255,255,255), color) # black text, filled bg
|
|
text_rect = text_surface.get_rect(topleft=(mean_x, mean_y))
|
|
surface.blit(text_surface, text_rect)
|
|
|
|
def calculate_relative_velocity(actor, ego):
|
|
# Calculate the relative velocity in world frame
|
|
rel_vel = actor.get_velocity() - ego.get_velocity()
|
|
# Now convert to local frame of ego
|
|
vel_ego_frame = ego.get_transform().inverse_transform(rel_vel)
|
|
|
|
return {
|
|
'x': vel_ego_frame.x,
|
|
'y': vel_ego_frame.y,
|
|
'z': vel_ego_frame.z
|
|
}
|
|
|
|
def vehicle_light_state_to_dict(vehicle: carla.Vehicle):
|
|
state = vehicle.get_light_state()
|
|
return {
|
|
"position": bool(state & carla.VehicleLightState.Position),
|
|
"low_beam": bool(state & carla.VehicleLightState.LowBeam),
|
|
"high_beam": bool(state & carla.VehicleLightState.HighBeam),
|
|
"brake": bool(state & carla.VehicleLightState.Brake),
|
|
"reverse": bool(state & carla.VehicleLightState.Reverse),
|
|
"left_blinker": bool(state & carla.VehicleLightState.LeftBlinker),
|
|
"right_blinker":bool(state & carla.VehicleLightState.RightBlinker),
|
|
"fog": bool(state & carla.VehicleLightState.Fog),
|
|
"interior": bool(state & carla.VehicleLightState.Interior),
|
|
"special1": bool(state & carla.VehicleLightState.Special1),
|
|
"special2": bool(state & carla.VehicleLightState.Special2),
|
|
}
|
|
|
|
def main():
|
|
|
|
argparser = argparse.ArgumentParser(
|
|
description='CARLA bounding boxes')
|
|
argparser.add_argument(
|
|
'--host',
|
|
metavar='H',
|
|
default='127.0.0.1',
|
|
help='IP of the host server (default: 127.0.0.1)')
|
|
argparser.add_argument(
|
|
'-p', '--port',
|
|
metavar='P',
|
|
default=2000,
|
|
type=int,
|
|
help='TCP port to listen to (default: 2000)')
|
|
argparser.add_argument(
|
|
'-d', '--distance',
|
|
metavar='D',
|
|
default=50,
|
|
type=int,
|
|
help='Actor distance threshold')
|
|
argparser.add_argument(
|
|
'--res',
|
|
metavar='WIDTHxHEIGHT',
|
|
default='1280x720',
|
|
help='window resolution (default: 1280x720)')
|
|
args = argparser.parse_args()
|
|
|
|
args.width, args.height = [int(x) for x in args.res.split('x')]
|
|
|
|
pygame.init()
|
|
|
|
# State variables
|
|
record = False
|
|
display_3d = False
|
|
run_simulation = True
|
|
|
|
clock = pygame.time.Clock()
|
|
pygame.display.set_caption("Bounding Box Visualization")
|
|
display = pygame.display.set_mode(
|
|
(args.width, args.height),
|
|
pygame.HWSURFACE | pygame.DOUBLEBUF)
|
|
display.fill((0,0,0))
|
|
pygame.display.flip()
|
|
|
|
# Connect to the CARLA server and get the world object
|
|
client = carla.Client(args.host, args.port)
|
|
world = client.get_world()
|
|
|
|
# Set up the simulator in synchronous mode
|
|
settings = world.get_settings()
|
|
settings.synchronous_mode = True # Enables synchronous mode
|
|
settings.fixed_delta_seconds = 0.05
|
|
world.apply_settings(settings)
|
|
|
|
# Set the traffic manager to Synchronous mode
|
|
traffic_manager = client.get_trafficmanager()
|
|
traffic_manager.set_synchronous_mode(True)
|
|
|
|
bp_lib = world.get_blueprint_library()
|
|
|
|
# Get the map spawn points
|
|
spawn_points = world.get_map().get_spawn_points()
|
|
|
|
# spawn vehicle
|
|
vehicle_bp =bp_lib.find('vehicle.lincoln.mkz_2020')
|
|
ego_vehicle = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points))
|
|
|
|
# spawn RGB camera
|
|
camera_bp = bp_lib.find('sensor.camera.rgb')
|
|
camera_bp.set_attribute('image_size_x', str(args.width))
|
|
camera_bp.set_attribute('image_size_y', str(args.height))
|
|
camera_init_trans = carla.Transform(carla.Location(z=2))
|
|
camera = world.spawn_actor(camera_bp, camera_init_trans, attach_to=ego_vehicle)
|
|
|
|
# spawn instance segmentation camera
|
|
inst_camera_bp = bp_lib.find('sensor.camera.instance_segmentation')
|
|
inst_camera_bp.set_attribute('image_size_x', str(args.width))
|
|
inst_camera_bp.set_attribute('image_size_y', str(args.height))
|
|
camera_init_trans = carla.Transform(carla.Location(z=2))
|
|
inst_camera = world.spawn_actor(inst_camera_bp, camera_init_trans, attach_to=ego_vehicle)
|
|
|
|
ego_vehicle.set_autopilot(True)
|
|
|
|
# Add some traffic
|
|
npcs = []
|
|
for i in range(100):
|
|
vehicle_bp = random.choice(bp_lib.filter('vehicle'))
|
|
npc = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points))
|
|
if npc:
|
|
npc.set_autopilot(True)
|
|
npcs.append(npc)
|
|
|
|
# Create queues to store and retrieve the sensor data
|
|
image_queue = queue.Queue()
|
|
camera.listen(image_queue.put)
|
|
|
|
inst_queue = queue.Queue()
|
|
inst_camera.listen(inst_queue.put)
|
|
|
|
try:
|
|
while run_simulation:
|
|
for event in pygame.event.get():
|
|
if event.type == pygame.KEYUP:
|
|
if event.key == K_r:
|
|
record = True
|
|
if event.key == K_2:
|
|
display_3d = False
|
|
if event.key == K_3:
|
|
display_3d = True
|
|
if event.key == K_ESCAPE:
|
|
run_simulation = False
|
|
if event.type == pygame.QUIT:
|
|
run_simulation = False
|
|
|
|
world.tick()
|
|
snapshot = world.get_snapshot()
|
|
|
|
json_frame_data = {
|
|
'frame_id': snapshot.frame,
|
|
'timestamp': snapshot.timestamp.elapsed_seconds,
|
|
'objects': []
|
|
}
|
|
|
|
image = image_queue.get()
|
|
img = np.reshape(np.copy(image.raw_data), (image.height, image.width, 4))
|
|
|
|
if record:
|
|
image.save_to_disk('_out/%08d' % image.frame)
|
|
|
|
inst_seg_image = inst_queue.get()
|
|
inst_seg = np.reshape(np.copy(inst_seg_image.raw_data), (inst_seg_image.height, inst_seg_image.width, 4))
|
|
|
|
# Decode instance segmentation image
|
|
semantic_labels, actor_ids = decode_instance_segmentation(inst_seg)
|
|
|
|
# Empty list to collect bounding boxes for this frame
|
|
frame_bboxes = []
|
|
|
|
# Loop through the NPCs in the simulation
|
|
for npc in world.get_actors().filter('*vehicle*'):
|
|
|
|
# Filter out the ego vehicle
|
|
if npc.id !=ego_vehicle.id:
|
|
|
|
npc_bbox = npc.bounding_box
|
|
dist = npc.get_transform().location.distance(ego_vehicle.get_transform().location)
|
|
|
|
# Filter for the vehicles within 50m
|
|
if dist < args.distance:
|
|
|
|
# Limit to vehicles in front of the camera
|
|
forward_vec = camera.get_transform().get_forward_vector()
|
|
inter_vehicle_vec = npc.get_transform().location - camera.get_transform().location
|
|
|
|
if forward_vec.dot(inter_vehicle_vec) > 0:
|
|
|
|
# Generate 2D and 2D bounding boxes for each actor
|
|
npc_bbox_2d = bbox_2d_for_actor(npc, actor_ids, semantic_labels)
|
|
npc_bbox_3d = bbox_3d_for_actor(npc, ego_vehicle, camera_bp, camera)
|
|
|
|
frame_bboxes.append({'3d': npc_bbox_3d, '2d': npc_bbox_2d})
|
|
|
|
json_frame_data['objects'].append({
|
|
'id': npc.id,
|
|
'class': SEMANTIC_MAP[npc.semantic_tags[0]][0],
|
|
'blueprint_id': npc.type_id,
|
|
'velocity': calculate_relative_velocity(npc, ego_vehicle),
|
|
'bbox_3d': npc_bbox_3d['bbox_3d'],
|
|
'bbox_2d': {
|
|
'xmin': int(npc_bbox_2d['bbox_2d'][0]),
|
|
'ymin': int(npc_bbox_2d['bbox_2d'][1]),
|
|
'xmax': int(npc_bbox_2d['bbox_2d'][2]),
|
|
'ymax': int(npc_bbox_2d['bbox_2d'][3]),
|
|
} if npc_bbox_2d else None,
|
|
'light_state': vehicle_light_state_to_dict(npc)
|
|
|
|
})
|
|
|
|
# Draw the scene in Pygame
|
|
display.fill((0,0,0))
|
|
if display_3d:
|
|
visualize_3d_bboxes(display, img, frame_bboxes)
|
|
else:
|
|
visualize_2d_bboxes(display, img, frame_bboxes)
|
|
pygame.display.flip()
|
|
clock.tick(30) # 30 FPS
|
|
if record:
|
|
with open(f"_out/{snapshot.frame}.json", 'w') as f:
|
|
json.dump(json_frame_data, f)
|
|
|
|
except KeyboardInterrupt:
|
|
pass
|
|
finally:
|
|
|
|
ego_vehicle.destroy()
|
|
camera.stop()
|
|
camera.destroy()
|
|
inst_camera.stop()
|
|
inst_camera.destroy()
|
|
for npc in npcs:
|
|
npc.set_autopilot(False)
|
|
npc.destroy()
|
|
|
|
world.tick()
|
|
|
|
# Set up the simulator in synchronous mode
|
|
settings = world.get_settings()
|
|
settings.synchronous_mode = False # Disables synchronous mode
|
|
settings.fixed_delta_seconds = None
|
|
world.apply_settings(settings)
|
|
|
|
# Set the traffic manager to Synchronous mode
|
|
traffic_manager.set_synchronous_mode(False)
|
|
|
|
pygame.quit()
|
|
|
|
print('\ndone.')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
print('Bounding boxes script instructions:')
|
|
print('R : toggle recording images as PNG and bounding boxes as JSON')
|
|
print('3 : view the bounding boxes in 3D')
|
|
print('2 : view the bounding boxes in 2D')
|
|
print('ESC : quit')
|
|
main()
|