Simulator/carla_examples/bounding_boxes.py


								#!/usr/bin/env python


								# Copyright (c) 2019 Computer Vision Center (CVC) at the Universitat Autonoma de

								# Barcelona (UAB).

								#

								# This work is licensed under the terms of the MIT license.

								# For a copy, see <https://opensource.org/licenses/MIT>.


								# Generates 2D and 3D bounding boxes for a simulation and can save them as JSON

								# Instructions:


								"""

								Welcome to CARLA bounding boxes.

								    R       : toggle recording images and bounding boxes

								    3       : visualize bounding boxes in 3D

								    2       : vizualize bounding boxes in 2D

								    ESC     : quit

								"""


								import carla

								import json

								import random

								import queue

								import pygame

								import argparse

								import numpy as np

								from math import radians


								from pygame.locals import K_ESCAPE

								from pygame.locals import K_2

								from pygame.locals import K_3

								from pygame.locals import K_r


								# Bounding box edge topology order

								EDGES = [[0,1], [1,3], [3,2], [2,0], [0,4], [4,5], [5,1], [5,7], [7,6], [6,4], [6,2], [7,3]]


								# Map for CARLA semantic labels to class names and colors

								SEMANTIC_MAP = {0: ('unlabelled', (0,0,0)), 1: ('road', (128,64,0)),2: ('sidewalk', (244,35,232)),

								                3: ('building', (70,70,70)), 4: ('wall', (102,102,156)), 5: ('fence', (190,153,153)),

								                6: ('pole', (153,153,153)), 7: ('traffic light', (250,170,30)),

								                8: ('traffic sign', (220,220,0)), 9: ('vegetation', (107,142,35)),

								                10: ('terrain', (152,251,152)), 11: ('sky', (70,130,180)),

								                12: ('pedestrian', (220,20,60)), 13: ('rider', (255,0,0)),

								                14: ('car', (0,0,142)), 15: ('truck', (0,0,70)), 16: ('bus', (0,60,100)),

								                17: ('train', (0,80,100)), 18: ('motorcycle', (0,0,230)),

								                19: ('bicycle', (119,11,32)), 20: ('static', (110,190,160)),

								                21: ('dynamic', (170,120,50)), 22: ('other', (55,90,80)),

								                23: ('water', (45,60,150)), 24: ('road line', (157,234,50)),

								                25: ('ground', (81,0,81)), 26: ('bridge', (150,100,100)),

								                27: ('rail track', (230,150,140)), 28: ('guard rail', (180,165,180))}


								# Calculate the camera projection matrix

								def build_projection_matrix(w, h, fov, is_behind_camera=False):

								    focal = w / (2.0 * np.tan(fov * np.pi / 360.0))

								    K = np.identity(3)


								    if is_behind_camera:

								        K[0, 0] = K[1, 1] = -focal

								    else:

								        K[0, 0] = K[1, 1] = focal


								    K[0, 2] = w / 2.0

								    K[1, 2] = h / 2.0

								    return K


								# Calculate 2D projection of 3D coordinate

								def get_image_point(loc, K, w2c):


								    # Format the input coordinate (loc is a carla.Position object)

								    point = np.array([loc.x, loc.y, loc.z, 1])

								    # transform to camera coordinates

								    point_camera = np.dot(w2c, point)


								    # New we must change from UE4's coordinate system to an "standard"

								    # (x, y ,z) -> (y, -z, x)

								    # and we remove the fourth componebonent also

								    point_camera = [point_camera[1], -point_camera[2], point_camera[0]]


								    # now project 3D->2D using the camera matrix

								    point_img = np.dot(K, point_camera)

								    # normalize

								    point_img[0] /= point_img[2]

								    point_img[1] /= point_img[2]


								    return point_img[0:2]


								# Verify that the point is within the image plane

								def point_in_canvas(pos, img_h, img_w):

								    """Return true if point is in canvas"""

								    if (pos[0] >= 0) and (pos[0] < img_w) and (pos[1] >= 0) and (pos[1] < img_h):

								        return True

								    return False


								# Decode the instance segmentation map into semantic labels and actor IDs

								def decode_instance_segmentation(img_rgba: np.ndarray):

								    semantic_labels = img_rgba[..., 2]  # R channel

								    actor_ids = img_rgba[..., 1].astype(np.uint16) + (img_rgba[..., 0].astype(np.uint16) << 8)

								    return semantic_labels, actor_ids


								# Generate a 2D bounding box for an actor from the actor ID image

								def bbox_2d_for_actor(actor, actor_ids: np.ndarray, semantic_labels: np.ndarray):

								    mask = (actor_ids == actor.id)

								    if not np.any(mask):

								        return None  # actor not present

								    ys, xs = np.where(mask)

								    xmin, xmax = xs.min(), xs.max()

								    ymin, ymax = ys.min(), ys.max()

								    return {'actor_id': actor.id,

								            'semantic_label': actor.semantic_tags[0],

								            'bbox_2d': (xmin, ymin, xmax, ymax)}


								# Generate a 3D bounding box for an actor from the simulation

								def bbox_3d_for_actor(actor, ego, camera_bp, camera):


								    # Get the world to camera matrix

								    world_2_camera = np.array(camera.get_transform().get_inverse_matrix())


								     # Get the attributes from the camera

								    image_w = camera_bp.get_attribute("image_size_x").as_int()

								    image_h = camera_bp.get_attribute("image_size_y").as_int()

								    fov = camera_bp.get_attribute("fov").as_float()


								    # Calculate the camera projection matrix to project from 3D -> 2D

								    K = build_projection_matrix(image_w, image_h, fov)

								    K_b = build_projection_matrix(image_w, image_h, fov, is_behind_camera=True)


								    ego_bbox_loc = ego.get_transform().location + ego.bounding_box.location

								    ego_bbox_transform = carla.Transform(ego_bbox_loc, ego.get_transform().rotation)


								    npc_bbox_loc = actor.get_transform().location + actor.bounding_box.location

								    #npc_bbox_transform = carla.Transform(npc_bbox_loc, actor.get_transform().rotation)


								    npc_loc_ego_space = ego_bbox_transform.inverse_transform(npc_bbox_loc)


								    verts = [v for v in actor.bounding_box.get_world_vertices(actor.get_transform())]


								    projection = []

								    for edge in EDGES:

								        p1 = get_image_point(verts[edge[0]], K, world_2_camera)

								        p2 = get_image_point(verts[edge[1]],  K, world_2_camera)


								        p1_in_canvas = point_in_canvas(p1, image_h, image_w)

								        p2_in_canvas = point_in_canvas(p2, image_h, image_w)


								        if not p1_in_canvas and not p2_in_canvas:

								            continue


								        ray0 = verts[edge[0]] - camera.get_transform().location

								        ray1 = verts[edge[1]] - camera.get_transform().location

								        cam_forward_vec = camera.get_transform().get_forward_vector()


								        # One of the vertexes is behind the camera

								        if not (cam_forward_vec.dot(ray0) > 0):

								            p1 = get_image_point(verts[edge[0]], K_b, world_2_camera)

								        if not (cam_forward_vec.dot(ray1) > 0):

								            p2 = get_image_point(verts[edge[1]], K_b, world_2_camera)


								        projection.append((int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])))


								    return {'actor_id': actor.id,

								            'semantic_label': actor.semantic_tags[0],

								            'bbox_3d': {

								                'center': {

								                    'x': npc_loc_ego_space.x,

								                    'y': npc_loc_ego_space.y,

								                    'z': npc_loc_ego_space.z

								                },

								                'dimensions': {

								                    'length': actor.bounding_box.extent.x*2,

								                    'width': actor.bounding_box.extent.y*2,

								                    'height': actor.bounding_box.extent.z*2,

								                },

								                'rotation_yaw': radians(actor.get_transform().rotation.yaw - ego.get_transform().rotation.yaw)

								            },

								            'projection': projection

								    }


								# Visualize 2D bounding boxes in Pygame

								def visualize_2d_bboxes(surface, img, bboxes):


								    rgb_img = img[:, :, :3][:, :, ::-1]

								    frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2)))

								    surface.blit(frame_surface, (0, 0))


								    font = pygame.font.SysFont("Arial", 18)


								    for item in bboxes:

								        bbox = item['2d']

								        if bbox is not None:

								            xmin, ymin, xmax, ymax = [int(v) for v in bbox['bbox_2d']]

								            label = SEMANTIC_MAP[bbox['semantic_label']][0]

								            color = SEMANTIC_MAP[bbox['semantic_label']][1]

								            pygame.draw.rect(surface, color, pygame.Rect(xmin, ymin, xmax-xmin, ymax-ymin), 2)

								            text_surface = font.render(label, True, (255,255,255), color)

								            text_rect = text_surface.get_rect(topleft=(xmin, ymin-20))

								            surface.blit(text_surface, text_rect)


								    return surface


								# Visualize 3D bounding boxes in Pygame

								def visualize_3d_bboxes(surface, img, bboxes):


								    rgb_img = img[:, :, :3][:, :, ::-1]

								    frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2)))

								    surface.blit(frame_surface, (0, 0))


								    for item in bboxes:

								        bbox = item['3d']

								        color = SEMANTIC_MAP[bbox['semantic_label']][1]


								        n = 0

								        mean_x = 0

								        mean_y = 0

								        for line in bbox['projection']:

								            mean_x += line[0]

								            mean_y += line[1]

								            n += 1

								            pygame.draw.line(surface, color, (line[0], line[1]), (line[2],line[3]), 2)


								        if n > 0:

								            mean_x /= n

								            mean_y /= n


								            # --- Render label ---

								            font = pygame.font.SysFont("Arial", 18)

								            text_surface = font.render(SEMANTIC_MAP[bbox['semantic_label']][0], True, (255,255,255), color)  # black text, filled bg

								            text_rect = text_surface.get_rect(topleft=(mean_x, mean_y))

								            surface.blit(text_surface, text_rect)


								def calculate_relative_velocity(actor, ego):

								    # Calculate the relative velocity in world frame

								    rel_vel = actor.get_velocity() - ego.get_velocity()

								    # Now convert to local frame of ego

								    vel_ego_frame = ego.get_transform().inverse_transform(rel_vel)


								    return {

								        'x': vel_ego_frame.x,

								        'y': vel_ego_frame.y,

								        'z': vel_ego_frame.z

								    }


								def vehicle_light_state_to_dict(vehicle: carla.Vehicle):

								    state = vehicle.get_light_state()

								    return {

								        "position":     bool(state & carla.VehicleLightState.Position),

								        "low_beam":     bool(state & carla.VehicleLightState.LowBeam),

								        "high_beam":    bool(state & carla.VehicleLightState.HighBeam),

								        "brake":        bool(state & carla.VehicleLightState.Brake),

								        "reverse":      bool(state & carla.VehicleLightState.Reverse),

								        "left_blinker": bool(state & carla.VehicleLightState.LeftBlinker),

								        "right_blinker":bool(state & carla.VehicleLightState.RightBlinker),

								        "fog":          bool(state & carla.VehicleLightState.Fog),

								        "interior":     bool(state & carla.VehicleLightState.Interior),

								        "special1":     bool(state & carla.VehicleLightState.Special1),

								        "special2":     bool(state & carla.VehicleLightState.Special2),

								    }


								def main():


								    argparser = argparse.ArgumentParser(

								        description='CARLA bounding boxes')

								    argparser.add_argument(

								        '--host',

								        metavar='H',

								        default='127.0.0.1',

								        help='IP of the host server (default: 127.0.0.1)')

								    argparser.add_argument(

								        '-p', '--port',

								        metavar='P',

								        default=2000,

								        type=int,

								        help='TCP port to listen to (default: 2000)')

								    argparser.add_argument(

								        '-d', '--distance',

								        metavar='D',

								        default=50,

								        type=int,

								        help='Actor distance threshold')

								    argparser.add_argument(

								        '--res',

								        metavar='WIDTHxHEIGHT',

								        default='1280x720',

								        help='window resolution (default: 1280x720)')

								    args = argparser.parse_args()


								    args.width, args.height = [int(x) for x in args.res.split('x')]


								    pygame.init()


								    # State variables

								    record = False

								    display_3d = False

								    run_simulation = True


								    clock = pygame.time.Clock()

								    pygame.display.set_caption("Bounding Box Visualization")

								    display = pygame.display.set_mode(

								            (args.width, args.height),

								            pygame.HWSURFACE | pygame.DOUBLEBUF)

								    display.fill((0,0,0))

								    pygame.display.flip()


								    # Connect to the CARLA server and get the world object

								    client = carla.Client(args.host, args.port)

								    world  = client.get_world()


								    # Set up the simulator in synchronous mode

								    settings = world.get_settings()

								    settings.synchronous_mode = True # Enables synchronous mode

								    settings.fixed_delta_seconds = 0.05

								    world.apply_settings(settings)


								    # Set the traffic manager to Synchronous mode

								    traffic_manager = client.get_trafficmanager()

								    traffic_manager.set_synchronous_mode(True)


								    bp_lib = world.get_blueprint_library()


								    # Get the map spawn points

								    spawn_points = world.get_map().get_spawn_points()


								    # spawn vehicle

								    vehicle_bp =bp_lib.find('vehicle.lincoln.mkz_2020')

								    ego_vehicle = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points))


								    # spawn RGB camera

								    camera_bp = bp_lib.find('sensor.camera.rgb')

								    camera_bp.set_attribute('image_size_x', str(args.width))

								    camera_bp.set_attribute('image_size_y', str(args.height))

								    camera_init_trans = carla.Transform(carla.Location(z=2))

								    camera = world.spawn_actor(camera_bp, camera_init_trans, attach_to=ego_vehicle)


								    # spawn instance segmentation camera

								    inst_camera_bp = bp_lib.find('sensor.camera.instance_segmentation')

								    inst_camera_bp.set_attribute('image_size_x', str(args.width))

								    inst_camera_bp.set_attribute('image_size_y', str(args.height))

								    camera_init_trans = carla.Transform(carla.Location(z=2))

								    inst_camera = world.spawn_actor(inst_camera_bp, camera_init_trans, attach_to=ego_vehicle)


								    ego_vehicle.set_autopilot(True)


								    # Add some traffic

								    npcs = []

								    for i in range(100):

								        vehicle_bp = random.choice(bp_lib.filter('vehicle'))

								        npc = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points))

								        if npc:

								            npc.set_autopilot(True)

								            npcs.append(npc)


								    # Create queues to store and retrieve the sensor data

								    image_queue = queue.Queue()

								    camera.listen(image_queue.put)


								    inst_queue = queue.Queue()

								    inst_camera.listen(inst_queue.put)


								    try:

								        while run_simulation:

								            for event in pygame.event.get():

								                if event.type == pygame.KEYUP:

								                    if event.key == K_r:

								                        record = True

								                    if event.key == K_2:

								                        display_3d = False

								                    if event.key == K_3:

								                        display_3d = True

								                    if event.key == K_ESCAPE:

								                        run_simulation = False

								                if event.type == pygame.QUIT:

								                    run_simulation = False


								            world.tick()

								            snapshot = world.get_snapshot()


								            json_frame_data = {

								                'frame_id': snapshot.frame,

								                'timestamp': snapshot.timestamp.elapsed_seconds,

								                'objects': []

								            }


								            image = image_queue.get()

								            img = np.reshape(np.copy(image.raw_data), (image.height, image.width, 4))


								            if record:

								                image.save_to_disk('_out/%08d' % image.frame)


								            inst_seg_image = inst_queue.get()

								            inst_seg = np.reshape(np.copy(inst_seg_image.raw_data), (inst_seg_image.height, inst_seg_image.width, 4))


								            # Decode instance segmentation image

								            semantic_labels, actor_ids = decode_instance_segmentation(inst_seg)


								            # Empty list to collect bounding boxes for this frame

								            frame_bboxes = []


								            # Loop through the NPCs in the simulation

								            for npc in world.get_actors().filter('*vehicle*'):


								                # Filter out the ego vehicle

								                if npc.id !=ego_vehicle.id:


								                    npc_bbox = npc.bounding_box

								                    dist = npc.get_transform().location.distance(ego_vehicle.get_transform().location)


								                    # Filter for the vehicles within 50m

								                    if dist < args.distance:


								                        # Limit to vehicles in front of the camera

								                        forward_vec = camera.get_transform().get_forward_vector()

								                        inter_vehicle_vec = npc.get_transform().location - camera.get_transform().location


								                        if forward_vec.dot(inter_vehicle_vec) > 0:


								                            # Generate 2D and 2D bounding boxes for each actor

								                            npc_bbox_2d = bbox_2d_for_actor(npc, actor_ids, semantic_labels)

								                            npc_bbox_3d = bbox_3d_for_actor(npc, ego_vehicle, camera_bp, camera)


								                            frame_bboxes.append({'3d': npc_bbox_3d, '2d': npc_bbox_2d})


								                            json_frame_data['objects'].append({

								                                'id': npc.id,

								                                'class': SEMANTIC_MAP[npc.semantic_tags[0]][0],

								                                'blueprint_id': npc.type_id,

								                                'velocity': calculate_relative_velocity(npc, ego_vehicle),

								                                'bbox_3d': npc_bbox_3d['bbox_3d'],

								                                'bbox_2d': {

								                                    'xmin': int(npc_bbox_2d['bbox_2d'][0]),

								                                    'ymin': int(npc_bbox_2d['bbox_2d'][1]),

								                                    'xmax': int(npc_bbox_2d['bbox_2d'][2]),

								                                    'ymax': int(npc_bbox_2d['bbox_2d'][3]),

								                                } if npc_bbox_2d else None,

								                                'light_state': vehicle_light_state_to_dict(npc)


								                            })


								            # Draw the scene in Pygame

								            display.fill((0,0,0))

								            if display_3d:

								                visualize_3d_bboxes(display, img, frame_bboxes)

								            else:

								                visualize_2d_bboxes(display, img, frame_bboxes)

								            pygame.display.flip()

								            clock.tick(30)  # 30 FPS

								            if record:

								                with open(f"_out/{snapshot.frame}.json", 'w') as f:

								                    json.dump(json_frame_data, f)


								    except KeyboardInterrupt:

								        pass

								    finally:


								        ego_vehicle.destroy()

								        camera.stop()

								        camera.destroy()

								        inst_camera.stop()

								        inst_camera.destroy()

								        for npc in npcs:

								            npc.set_autopilot(False)

								            npc.destroy()


								        world.tick()


								        # Set up the simulator in synchronous mode

								        settings = world.get_settings()

								        settings.synchronous_mode = False # Disables synchronous mode

								        settings.fixed_delta_seconds = None

								        world.apply_settings(settings)


								        # Set the traffic manager to Synchronous mode

								        traffic_manager.set_synchronous_mode(False)


								        pygame.quit()


								        print('\ndone.')


								if __name__ == '__main__':

								    print('Bounding boxes script instructions:')

								    print('R    : toggle recording images as PNG and bounding boxes as JSON')

								    print('3    : view the bounding boxes in 3D')

								    print('2    : view the bounding boxes in 2D')

								    print('ESC  : quit')

								    main()