#!/usr/bin/env python # Copyright (c) 2019 Computer Vision Center (CVC) at the Universitat Autonoma de # Barcelona (UAB). # # This work is licensed under the terms of the MIT license. # For a copy, see . # Generates 2D and 3D bounding boxes for a simulation and can save them as JSON # Instructions: """ Welcome to CARLA bounding boxes. R : toggle recording images and bounding boxes 3 : visualize bounding boxes in 3D 2 : vizualize bounding boxes in 2D ESC : quit """ import carla import json import random import queue import pygame import argparse import numpy as np from math import radians from pygame.locals import K_ESCAPE from pygame.locals import K_2 from pygame.locals import K_3 from pygame.locals import K_r # Bounding box edge topology order EDGES = [[0,1], [1,3], [3,2], [2,0], [0,4], [4,5], [5,1], [5,7], [7,6], [6,4], [6,2], [7,3]] # Map for CARLA semantic labels to class names and colors SEMANTIC_MAP = {0: ('unlabelled', (0,0,0)), 1: ('road', (128,64,0)),2: ('sidewalk', (244,35,232)), 3: ('building', (70,70,70)), 4: ('wall', (102,102,156)), 5: ('fence', (190,153,153)), 6: ('pole', (153,153,153)), 7: ('traffic light', (250,170,30)), 8: ('traffic sign', (220,220,0)), 9: ('vegetation', (107,142,35)), 10: ('terrain', (152,251,152)), 11: ('sky', (70,130,180)), 12: ('pedestrian', (220,20,60)), 13: ('rider', (255,0,0)), 14: ('car', (0,0,142)), 15: ('truck', (0,0,70)), 16: ('bus', (0,60,100)), 17: ('train', (0,80,100)), 18: ('motorcycle', (0,0,230)), 19: ('bicycle', (119,11,32)), 20: ('static', (110,190,160)), 21: ('dynamic', (170,120,50)), 22: ('other', (55,90,80)), 23: ('water', (45,60,150)), 24: ('road line', (157,234,50)), 25: ('ground', (81,0,81)), 26: ('bridge', (150,100,100)), 27: ('rail track', (230,150,140)), 28: ('guard rail', (180,165,180))} # Calculate the camera projection matrix def build_projection_matrix(w, h, fov, is_behind_camera=False): focal = w / (2.0 * np.tan(fov * np.pi / 360.0)) K = np.identity(3) if is_behind_camera: K[0, 0] = K[1, 1] = -focal else: K[0, 0] = K[1, 1] = focal K[0, 2] = w / 2.0 K[1, 2] = h / 2.0 return K # Calculate 2D projection of 3D coordinate def get_image_point(loc, K, w2c): # Format the input coordinate (loc is a carla.Position object) point = np.array([loc.x, loc.y, loc.z, 1]) # transform to camera coordinates point_camera = np.dot(w2c, point) # New we must change from UE4's coordinate system to an "standard" # (x, y ,z) -> (y, -z, x) # and we remove the fourth componebonent also point_camera = [point_camera[1], -point_camera[2], point_camera[0]] # now project 3D->2D using the camera matrix point_img = np.dot(K, point_camera) # normalize point_img[0] /= point_img[2] point_img[1] /= point_img[2] return point_img[0:2] # Verify that the point is within the image plane def point_in_canvas(pos, img_h, img_w): """Return true if point is in canvas""" if (pos[0] >= 0) and (pos[0] < img_w) and (pos[1] >= 0) and (pos[1] < img_h): return True return False # Decode the instance segmentation map into semantic labels and actor IDs def decode_instance_segmentation(img_rgba: np.ndarray): semantic_labels = img_rgba[..., 2] # R channel actor_ids = img_rgba[..., 1].astype(np.uint16) + (img_rgba[..., 0].astype(np.uint16) << 8) return semantic_labels, actor_ids # Generate a 2D bounding box for an actor from the actor ID image def bbox_2d_for_actor(actor, actor_ids: np.ndarray, semantic_labels: np.ndarray): mask = (actor_ids == actor.id) if not np.any(mask): return None # actor not present ys, xs = np.where(mask) xmin, xmax = xs.min(), xs.max() ymin, ymax = ys.min(), ys.max() return {'actor_id': actor.id, 'semantic_label': actor.semantic_tags[0], 'bbox_2d': (xmin, ymin, xmax, ymax)} # Generate a 3D bounding box for an actor from the simulation def bbox_3d_for_actor(actor, ego, camera_bp, camera): # Get the world to camera matrix world_2_camera = np.array(camera.get_transform().get_inverse_matrix()) # Get the attributes from the camera image_w = camera_bp.get_attribute("image_size_x").as_int() image_h = camera_bp.get_attribute("image_size_y").as_int() fov = camera_bp.get_attribute("fov").as_float() # Calculate the camera projection matrix to project from 3D -> 2D K = build_projection_matrix(image_w, image_h, fov) K_b = build_projection_matrix(image_w, image_h, fov, is_behind_camera=True) ego_bbox_loc = ego.get_transform().location + ego.bounding_box.location ego_bbox_transform = carla.Transform(ego_bbox_loc, ego.get_transform().rotation) npc_bbox_loc = actor.get_transform().location + actor.bounding_box.location #npc_bbox_transform = carla.Transform(npc_bbox_loc, actor.get_transform().rotation) npc_loc_ego_space = ego_bbox_transform.inverse_transform(npc_bbox_loc) verts = [v for v in actor.bounding_box.get_world_vertices(actor.get_transform())] projection = [] for edge in EDGES: p1 = get_image_point(verts[edge[0]], K, world_2_camera) p2 = get_image_point(verts[edge[1]], K, world_2_camera) p1_in_canvas = point_in_canvas(p1, image_h, image_w) p2_in_canvas = point_in_canvas(p2, image_h, image_w) if not p1_in_canvas and not p2_in_canvas: continue ray0 = verts[edge[0]] - camera.get_transform().location ray1 = verts[edge[1]] - camera.get_transform().location cam_forward_vec = camera.get_transform().get_forward_vector() # One of the vertexes is behind the camera if not (cam_forward_vec.dot(ray0) > 0): p1 = get_image_point(verts[edge[0]], K_b, world_2_camera) if not (cam_forward_vec.dot(ray1) > 0): p2 = get_image_point(verts[edge[1]], K_b, world_2_camera) projection.append((int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1]))) return {'actor_id': actor.id, 'semantic_label': actor.semantic_tags[0], 'bbox_3d': { 'center': { 'x': npc_loc_ego_space.x, 'y': npc_loc_ego_space.y, 'z': npc_loc_ego_space.z }, 'dimensions': { 'length': actor.bounding_box.extent.x*2, 'width': actor.bounding_box.extent.y*2, 'height': actor.bounding_box.extent.z*2, }, 'rotation_yaw': radians(actor.get_transform().rotation.yaw - ego.get_transform().rotation.yaw) }, 'projection': projection } # Visualize 2D bounding boxes in Pygame def visualize_2d_bboxes(surface, img, bboxes): rgb_img = img[:, :, :3][:, :, ::-1] frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2))) surface.blit(frame_surface, (0, 0)) font = pygame.font.SysFont("Arial", 18) for item in bboxes: bbox = item['2d'] if bbox is not None: xmin, ymin, xmax, ymax = [int(v) for v in bbox['bbox_2d']] label = SEMANTIC_MAP[bbox['semantic_label']][0] color = SEMANTIC_MAP[bbox['semantic_label']][1] pygame.draw.rect(surface, color, pygame.Rect(xmin, ymin, xmax-xmin, ymax-ymin), 2) text_surface = font.render(label, True, (255,255,255), color) text_rect = text_surface.get_rect(topleft=(xmin, ymin-20)) surface.blit(text_surface, text_rect) return surface # Visualize 3D bounding boxes in Pygame def visualize_3d_bboxes(surface, img, bboxes): rgb_img = img[:, :, :3][:, :, ::-1] frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2))) surface.blit(frame_surface, (0, 0)) for item in bboxes: bbox = item['3d'] color = SEMANTIC_MAP[bbox['semantic_label']][1] n = 0 mean_x = 0 mean_y = 0 for line in bbox['projection']: mean_x += line[0] mean_y += line[1] n += 1 pygame.draw.line(surface, color, (line[0], line[1]), (line[2],line[3]), 2) if n > 0: mean_x /= n mean_y /= n # --- Render label --- font = pygame.font.SysFont("Arial", 18) text_surface = font.render(SEMANTIC_MAP[bbox['semantic_label']][0], True, (255,255,255), color) # black text, filled bg text_rect = text_surface.get_rect(topleft=(mean_x, mean_y)) surface.blit(text_surface, text_rect) def calculate_relative_velocity(actor, ego): # Calculate the relative velocity in world frame rel_vel = actor.get_velocity() - ego.get_velocity() # Now convert to local frame of ego vel_ego_frame = ego.get_transform().inverse_transform(rel_vel) return { 'x': vel_ego_frame.x, 'y': vel_ego_frame.y, 'z': vel_ego_frame.z } def vehicle_light_state_to_dict(vehicle: carla.Vehicle): state = vehicle.get_light_state() return { "position": bool(state & carla.VehicleLightState.Position), "low_beam": bool(state & carla.VehicleLightState.LowBeam), "high_beam": bool(state & carla.VehicleLightState.HighBeam), "brake": bool(state & carla.VehicleLightState.Brake), "reverse": bool(state & carla.VehicleLightState.Reverse), "left_blinker": bool(state & carla.VehicleLightState.LeftBlinker), "right_blinker":bool(state & carla.VehicleLightState.RightBlinker), "fog": bool(state & carla.VehicleLightState.Fog), "interior": bool(state & carla.VehicleLightState.Interior), "special1": bool(state & carla.VehicleLightState.Special1), "special2": bool(state & carla.VehicleLightState.Special2), } def main(): argparser = argparse.ArgumentParser( description='CARLA bounding boxes') argparser.add_argument( '--host', metavar='H', default='127.0.0.1', help='IP of the host server (default: 127.0.0.1)') argparser.add_argument( '-p', '--port', metavar='P', default=2000, type=int, help='TCP port to listen to (default: 2000)') argparser.add_argument( '-d', '--distance', metavar='D', default=50, type=int, help='Actor distance threshold') argparser.add_argument( '--res', metavar='WIDTHxHEIGHT', default='1280x720', help='window resolution (default: 1280x720)') args = argparser.parse_args() args.width, args.height = [int(x) for x in args.res.split('x')] pygame.init() # State variables record = False display_3d = False run_simulation = True clock = pygame.time.Clock() pygame.display.set_caption("Bounding Box Visualization") display = pygame.display.set_mode( (args.width, args.height), pygame.HWSURFACE | pygame.DOUBLEBUF) display.fill((0,0,0)) pygame.display.flip() # Connect to the CARLA server and get the world object client = carla.Client(args.host, args.port) world = client.get_world() # Set up the simulator in synchronous mode settings = world.get_settings() settings.synchronous_mode = True # Enables synchronous mode settings.fixed_delta_seconds = 0.05 world.apply_settings(settings) # Set the traffic manager to Synchronous mode traffic_manager = client.get_trafficmanager() traffic_manager.set_synchronous_mode(True) bp_lib = world.get_blueprint_library() # Get the map spawn points spawn_points = world.get_map().get_spawn_points() # spawn vehicle vehicle_bp =bp_lib.find('vehicle.lincoln.mkz_2020') ego_vehicle = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points)) # spawn RGB camera camera_bp = bp_lib.find('sensor.camera.rgb') camera_bp.set_attribute('image_size_x', str(args.width)) camera_bp.set_attribute('image_size_y', str(args.height)) camera_init_trans = carla.Transform(carla.Location(z=2)) camera = world.spawn_actor(camera_bp, camera_init_trans, attach_to=ego_vehicle) # spawn instance segmentation camera inst_camera_bp = bp_lib.find('sensor.camera.instance_segmentation') inst_camera_bp.set_attribute('image_size_x', str(args.width)) inst_camera_bp.set_attribute('image_size_y', str(args.height)) camera_init_trans = carla.Transform(carla.Location(z=2)) inst_camera = world.spawn_actor(inst_camera_bp, camera_init_trans, attach_to=ego_vehicle) ego_vehicle.set_autopilot(True) # Add some traffic npcs = [] for i in range(100): vehicle_bp = random.choice(bp_lib.filter('vehicle')) npc = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points)) if npc: npc.set_autopilot(True) npcs.append(npc) # Create queues to store and retrieve the sensor data image_queue = queue.Queue() camera.listen(image_queue.put) inst_queue = queue.Queue() inst_camera.listen(inst_queue.put) try: while run_simulation: for event in pygame.event.get(): if event.type == pygame.KEYUP: if event.key == K_r: record = True if event.key == K_2: display_3d = False if event.key == K_3: display_3d = True if event.key == K_ESCAPE: run_simulation = False if event.type == pygame.QUIT: run_simulation = False world.tick() snapshot = world.get_snapshot() json_frame_data = { 'frame_id': snapshot.frame, 'timestamp': snapshot.timestamp.elapsed_seconds, 'objects': [] } image = image_queue.get() img = np.reshape(np.copy(image.raw_data), (image.height, image.width, 4)) if record: image.save_to_disk('_out/%08d' % image.frame) inst_seg_image = inst_queue.get() inst_seg = np.reshape(np.copy(inst_seg_image.raw_data), (inst_seg_image.height, inst_seg_image.width, 4)) # Decode instance segmentation image semantic_labels, actor_ids = decode_instance_segmentation(inst_seg) # Empty list to collect bounding boxes for this frame frame_bboxes = [] # Loop through the NPCs in the simulation for npc in world.get_actors().filter('*vehicle*'): # Filter out the ego vehicle if npc.id !=ego_vehicle.id: npc_bbox = npc.bounding_box dist = npc.get_transform().location.distance(ego_vehicle.get_transform().location) # Filter for the vehicles within 50m if dist < args.distance: # Limit to vehicles in front of the camera forward_vec = camera.get_transform().get_forward_vector() inter_vehicle_vec = npc.get_transform().location - camera.get_transform().location if forward_vec.dot(inter_vehicle_vec) > 0: # Generate 2D and 2D bounding boxes for each actor npc_bbox_2d = bbox_2d_for_actor(npc, actor_ids, semantic_labels) npc_bbox_3d = bbox_3d_for_actor(npc, ego_vehicle, camera_bp, camera) frame_bboxes.append({'3d': npc_bbox_3d, '2d': npc_bbox_2d}) json_frame_data['objects'].append({ 'id': npc.id, 'class': SEMANTIC_MAP[npc.semantic_tags[0]][0], 'blueprint_id': npc.type_id, 'velocity': calculate_relative_velocity(npc, ego_vehicle), 'bbox_3d': npc_bbox_3d['bbox_3d'], 'bbox_2d': { 'xmin': int(npc_bbox_2d['bbox_2d'][0]), 'ymin': int(npc_bbox_2d['bbox_2d'][1]), 'xmax': int(npc_bbox_2d['bbox_2d'][2]), 'ymax': int(npc_bbox_2d['bbox_2d'][3]), } if npc_bbox_2d else None, 'light_state': vehicle_light_state_to_dict(npc) }) # Draw the scene in Pygame display.fill((0,0,0)) if display_3d: visualize_3d_bboxes(display, img, frame_bboxes) else: visualize_2d_bboxes(display, img, frame_bboxes) pygame.display.flip() clock.tick(30) # 30 FPS if record: with open(f"_out/{snapshot.frame}.json", 'w') as f: json.dump(json_frame_data, f) except KeyboardInterrupt: pass finally: ego_vehicle.destroy() camera.stop() camera.destroy() inst_camera.stop() inst_camera.destroy() for npc in npcs: npc.set_autopilot(False) npc.destroy() world.tick() # Set up the simulator in synchronous mode settings = world.get_settings() settings.synchronous_mode = False # Disables synchronous mode settings.fixed_delta_seconds = None world.apply_settings(settings) # Set the traffic manager to Synchronous mode traffic_manager.set_synchronous_mode(False) pygame.quit() print('\ndone.') if __name__ == '__main__': print('Bounding boxes script instructions:') print('R : toggle recording images as PNG and bounding boxes as JSON') print('3 : view the bounding boxes in 3D') print('2 : view the bounding boxes in 2D') print('ESC : quit') main()