#!/usr/bin/env python

# Copyright (c) 2019 Computer Vision Center (CVC) at the Universitat Autonoma de
# Barcelona (UAB).
#
# This work is licensed under the terms of the MIT license.
# For a copy, see <https://opensource.org/licenses/MIT>.

# Generates 2D and 3D bounding boxes for a simulation and can save them as JSON
# Instructions:

"""
Welcome to CARLA bounding boxes.
    R       : toggle recording images and bounding boxes
    3       : visualize bounding boxes in 3D
    2       : vizualize bounding boxes in 2D
    ESC     : quit
"""

import carla
import json
import random
import queue
import pygame
import argparse
import numpy as np
from math import radians

from pygame.locals import K_ESCAPE
from pygame.locals import K_2
from pygame.locals import K_3
from pygame.locals import K_r

# Bounding box edge topology order
EDGES = [[0,1], [1,3], [3,2], [2,0], [0,4], [4,5], [5,1], [5,7], [7,6], [6,4], [6,2], [7,3]]

# Map for CARLA semantic labels to class names and colors
SEMANTIC_MAP = {0: ('unlabelled', (0,0,0)), 1: ('road', (128,64,0)),2: ('sidewalk', (244,35,232)),
                3: ('building', (70,70,70)), 4: ('wall', (102,102,156)), 5: ('fence', (190,153,153)),
                6: ('pole', (153,153,153)), 7: ('traffic light', (250,170,30)), 
                8: ('traffic sign', (220,220,0)), 9: ('vegetation', (107,142,35)),
                10: ('terrain', (152,251,152)), 11: ('sky', (70,130,180)), 
                12: ('pedestrian', (220,20,60)), 13: ('rider', (255,0,0)), 
                14: ('car', (0,0,142)), 15: ('truck', (0,0,70)), 16: ('bus', (0,60,100)), 
                17: ('train', (0,80,100)), 18: ('motorcycle', (0,0,230)), 
                19: ('bicycle', (119,11,32)), 20: ('static', (110,190,160)), 
                21: ('dynamic', (170,120,50)), 22: ('other', (55,90,80)), 
                23: ('water', (45,60,150)), 24: ('road line', (157,234,50)), 
                25: ('ground', (81,0,81)), 26: ('bridge', (150,100,100)), 
                27: ('rail track', (230,150,140)), 28: ('guard rail', (180,165,180))}

# Calculate the camera projection matrix
def build_projection_matrix(w, h, fov, is_behind_camera=False):
    focal = w / (2.0 * np.tan(fov * np.pi / 360.0))
    K = np.identity(3)

    if is_behind_camera:
        K[0, 0] = K[1, 1] = -focal
    else:
        K[0, 0] = K[1, 1] = focal

    K[0, 2] = w / 2.0
    K[1, 2] = h / 2.0
    return K

# Calculate 2D projection of 3D coordinate
def get_image_point(loc, K, w2c):
    
    # Format the input coordinate (loc is a carla.Position object)
    point = np.array([loc.x, loc.y, loc.z, 1])
    # transform to camera coordinates
    point_camera = np.dot(w2c, point)

    # New we must change from UE4's coordinate system to an "standard"
    # (x, y ,z) -> (y, -z, x)
    # and we remove the fourth componebonent also
    point_camera = [point_camera[1], -point_camera[2], point_camera[0]]

    # now project 3D->2D using the camera matrix
    point_img = np.dot(K, point_camera)
    # normalize
    point_img[0] /= point_img[2]
    point_img[1] /= point_img[2]

    return point_img[0:2]

# Verify that the point is within the image plane
def point_in_canvas(pos, img_h, img_w):
    """Return true if point is in canvas"""
    if (pos[0] >= 0) and (pos[0] < img_w) and (pos[1] >= 0) and (pos[1] < img_h):
        return True
    return False

# Decode the instance segmentation map into semantic labels and actor IDs
def decode_instance_segmentation(img_rgba: np.ndarray):
    semantic_labels = img_rgba[..., 2]  # R channel
    actor_ids = img_rgba[..., 1].astype(np.uint16) + (img_rgba[..., 0].astype(np.uint16) << 8)
    return semantic_labels, actor_ids

# Generate a 2D bounding box for an actor from the actor ID image
def bbox_2d_for_actor(actor, actor_ids: np.ndarray, semantic_labels: np.ndarray):
    mask = (actor_ids == actor.id)
    if not np.any(mask):
        return None  # actor not present
    ys, xs = np.where(mask)
    xmin, xmax = xs.min(), xs.max()
    ymin, ymax = ys.min(), ys.max()
    return {'actor_id': actor.id,
            'semantic_label': actor.semantic_tags[0],
            'bbox_2d': (xmin, ymin, xmax, ymax)}

# Generate a 3D bounding box for an actor from the simulation
def bbox_3d_for_actor(actor, ego, camera_bp, camera):

    # Get the world to camera matrix
    world_2_camera = np.array(camera.get_transform().get_inverse_matrix())

     # Get the attributes from the camera
    image_w = camera_bp.get_attribute("image_size_x").as_int()
    image_h = camera_bp.get_attribute("image_size_y").as_int()
    fov = camera_bp.get_attribute("fov").as_float()

    # Calculate the camera projection matrix to project from 3D -> 2D
    K = build_projection_matrix(image_w, image_h, fov)
    K_b = build_projection_matrix(image_w, image_h, fov, is_behind_camera=True)

    ego_bbox_loc = ego.get_transform().location + ego.bounding_box.location
    ego_bbox_transform = carla.Transform(ego_bbox_loc, ego.get_transform().rotation)

    npc_bbox_loc = actor.get_transform().location + actor.bounding_box.location
    #npc_bbox_transform = carla.Transform(npc_bbox_loc, actor.get_transform().rotation)

    npc_loc_ego_space = ego_bbox_transform.inverse_transform(npc_bbox_loc)

    verts = [v for v in actor.bounding_box.get_world_vertices(actor.get_transform())]

    projection = []
    for edge in EDGES:
        p1 = get_image_point(verts[edge[0]], K, world_2_camera)
        p2 = get_image_point(verts[edge[1]],  K, world_2_camera)

        p1_in_canvas = point_in_canvas(p1, image_h, image_w)
        p2_in_canvas = point_in_canvas(p2, image_h, image_w)

        if not p1_in_canvas and not p2_in_canvas:
            continue

        ray0 = verts[edge[0]] - camera.get_transform().location
        ray1 = verts[edge[1]] - camera.get_transform().location
        cam_forward_vec = camera.get_transform().get_forward_vector()

        # One of the vertexes is behind the camera
        if not (cam_forward_vec.dot(ray0) > 0):
            p1 = get_image_point(verts[edge[0]], K_b, world_2_camera)
        if not (cam_forward_vec.dot(ray1) > 0):
            p2 = get_image_point(verts[edge[1]], K_b, world_2_camera)
        
        projection.append((int(p1[0]), int(p1[1]), int(p2[0]), int(p2[1])))

    return {'actor_id': actor.id,
            'semantic_label': actor.semantic_tags[0],
            'bbox_3d': {
                'center': {
                    'x': npc_loc_ego_space.x,
                    'y': npc_loc_ego_space.y,
                    'z': npc_loc_ego_space.z
                },
                'dimensions': {
                    'length': actor.bounding_box.extent.x*2,
                    'width': actor.bounding_box.extent.y*2,
                    'height': actor.bounding_box.extent.z*2,
                },
                'rotation_yaw': radians(actor.get_transform().rotation.yaw - ego.get_transform().rotation.yaw)
            },
            'projection': projection
    }

# Visualize 2D bounding boxes in Pygame
def visualize_2d_bboxes(surface, img, bboxes):

    rgb_img = img[:, :, :3][:, :, ::-1] 
    frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2)))
    surface.blit(frame_surface, (0, 0))

    font = pygame.font.SysFont("Arial", 18)

    for item in bboxes:
        bbox = item['2d']
        if bbox is not None:
            xmin, ymin, xmax, ymax = [int(v) for v in bbox['bbox_2d']]
            label = SEMANTIC_MAP[bbox['semantic_label']][0]
            color = SEMANTIC_MAP[bbox['semantic_label']][1]
            pygame.draw.rect(surface, color, pygame.Rect(xmin, ymin, xmax-xmin, ymax-ymin), 2)
            text_surface = font.render(label, True, (255,255,255), color) 
            text_rect = text_surface.get_rect(topleft=(xmin, ymin-20))
            surface.blit(text_surface, text_rect)

    return surface

# Visualize 3D bounding boxes in Pygame
def visualize_3d_bboxes(surface, img, bboxes):

    rgb_img = img[:, :, :3][:, :, ::-1] 
    frame_surface = pygame.surfarray.make_surface(np.transpose(rgb_img[..., 0:3], (1,0,2)))
    surface.blit(frame_surface, (0, 0))

    for item in bboxes:
        bbox = item['3d']
        color = SEMANTIC_MAP[bbox['semantic_label']][1]

        n = 0
        mean_x = 0
        mean_y = 0
        for line in bbox['projection']:
            mean_x += line[0]
            mean_y += line[1]
            n += 1
            pygame.draw.line(surface, color, (line[0], line[1]), (line[2],line[3]), 2)

        if n > 0:
            mean_x /= n
            mean_y /= n

            # --- Render label ---
            font = pygame.font.SysFont("Arial", 18)
            text_surface = font.render(SEMANTIC_MAP[bbox['semantic_label']][0], True, (255,255,255), color)  # black text, filled bg
            text_rect = text_surface.get_rect(topleft=(mean_x, mean_y))
            surface.blit(text_surface, text_rect)

def calculate_relative_velocity(actor, ego):
    # Calculate the relative velocity in world frame
    rel_vel = actor.get_velocity() - ego.get_velocity()
    # Now convert to local frame of ego
    vel_ego_frame = ego.get_transform().inverse_transform(rel_vel)

    return {
        'x': vel_ego_frame.x,
        'y': vel_ego_frame.y,
        'z': vel_ego_frame.z
    }

def vehicle_light_state_to_dict(vehicle: carla.Vehicle):
    state = vehicle.get_light_state()
    return {
        "position":     bool(state & carla.VehicleLightState.Position),
        "low_beam":     bool(state & carla.VehicleLightState.LowBeam),
        "high_beam":    bool(state & carla.VehicleLightState.HighBeam),
        "brake":        bool(state & carla.VehicleLightState.Brake),
        "reverse":      bool(state & carla.VehicleLightState.Reverse),
        "left_blinker": bool(state & carla.VehicleLightState.LeftBlinker),
        "right_blinker":bool(state & carla.VehicleLightState.RightBlinker),
        "fog":          bool(state & carla.VehicleLightState.Fog),
        "interior":     bool(state & carla.VehicleLightState.Interior),
        "special1":     bool(state & carla.VehicleLightState.Special1),
        "special2":     bool(state & carla.VehicleLightState.Special2),
    }

def main():

    argparser = argparse.ArgumentParser(
        description='CARLA bounding boxes')
    argparser.add_argument(
        '--host',
        metavar='H',
        default='127.0.0.1',
        help='IP of the host server (default: 127.0.0.1)')
    argparser.add_argument(
        '-p', '--port',
        metavar='P',
        default=2000,
        type=int,
        help='TCP port to listen to (default: 2000)')
    argparser.add_argument(
        '-d', '--distance',
        metavar='D',
        default=50,
        type=int,
        help='Actor distance threshold')
    argparser.add_argument(
        '--res',
        metavar='WIDTHxHEIGHT',
        default='1280x720',
        help='window resolution (default: 1280x720)')
    args = argparser.parse_args()

    args.width, args.height = [int(x) for x in args.res.split('x')]

    pygame.init()

    # State variables
    record = False
    display_3d = False
    run_simulation = True

    clock = pygame.time.Clock()
    pygame.display.set_caption("Bounding Box Visualization")
    display = pygame.display.set_mode(
            (args.width, args.height),
            pygame.HWSURFACE | pygame.DOUBLEBUF)
    display.fill((0,0,0))
    pygame.display.flip()

    # Connect to the CARLA server and get the world object
    client = carla.Client(args.host, args.port)
    world  = client.get_world()

    # Set up the simulator in synchronous mode
    settings = world.get_settings()
    settings.synchronous_mode = True # Enables synchronous mode
    settings.fixed_delta_seconds = 0.05
    world.apply_settings(settings)

    # Set the traffic manager to Synchronous mode
    traffic_manager = client.get_trafficmanager()
    traffic_manager.set_synchronous_mode(True)

    bp_lib = world.get_blueprint_library()

    # Get the map spawn points
    spawn_points = world.get_map().get_spawn_points()

    # spawn vehicle
    vehicle_bp =bp_lib.find('vehicle.lincoln.mkz_2020')
    ego_vehicle = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points))

    # spawn RGB camera
    camera_bp = bp_lib.find('sensor.camera.rgb')
    camera_bp.set_attribute('image_size_x', str(args.width))
    camera_bp.set_attribute('image_size_y', str(args.height))
    camera_init_trans = carla.Transform(carla.Location(z=2))
    camera = world.spawn_actor(camera_bp, camera_init_trans, attach_to=ego_vehicle)

    # spawn instance segmentation camera
    inst_camera_bp = bp_lib.find('sensor.camera.instance_segmentation')
    inst_camera_bp.set_attribute('image_size_x', str(args.width))
    inst_camera_bp.set_attribute('image_size_y', str(args.height))
    camera_init_trans = carla.Transform(carla.Location(z=2))
    inst_camera = world.spawn_actor(inst_camera_bp, camera_init_trans, attach_to=ego_vehicle)

    ego_vehicle.set_autopilot(True)

    # Add some traffic
    npcs = []
    for i in range(100):
        vehicle_bp = random.choice(bp_lib.filter('vehicle'))
        npc = world.try_spawn_actor(vehicle_bp, random.choice(spawn_points))
        if npc:
            npc.set_autopilot(True)
            npcs.append(npc)

    # Create queues to store and retrieve the sensor data
    image_queue = queue.Queue()
    camera.listen(image_queue.put)

    inst_queue = queue.Queue()
    inst_camera.listen(inst_queue.put)

    try:
        while run_simulation:
            for event in pygame.event.get():
                if event.type == pygame.KEYUP:
                    if event.key == K_r:
                        record = True
                    if event.key == K_2:
                        display_3d = False
                    if event.key == K_3:
                        display_3d = True
                    if event.key == K_ESCAPE:
                        run_simulation = False
                if event.type == pygame.QUIT:
                    run_simulation = False

            world.tick()
            snapshot = world.get_snapshot()

            json_frame_data = {
                'frame_id': snapshot.frame,
                'timestamp': snapshot.timestamp.elapsed_seconds,
                'objects': [] 
            }

            image = image_queue.get()
            img = np.reshape(np.copy(image.raw_data), (image.height, image.width, 4))

            if record:
                image.save_to_disk('_out/%08d' % image.frame)

            inst_seg_image = inst_queue.get()
            inst_seg = np.reshape(np.copy(inst_seg_image.raw_data), (inst_seg_image.height, inst_seg_image.width, 4))

            # Decode instance segmentation image
            semantic_labels, actor_ids = decode_instance_segmentation(inst_seg)

            # Empty list to collect bounding boxes for this frame
            frame_bboxes = []

            # Loop through the NPCs in the simulation
            for npc in world.get_actors().filter('*vehicle*'):

                # Filter out the ego vehicle
                if npc.id !=ego_vehicle.id:

                    npc_bbox = npc.bounding_box
                    dist = npc.get_transform().location.distance(ego_vehicle.get_transform().location)

                    # Filter for the vehicles within 50m
                    if dist < args.distance:

                        # Limit to vehicles in front of the camera
                        forward_vec = camera.get_transform().get_forward_vector()
                        inter_vehicle_vec = npc.get_transform().location - camera.get_transform().location

                        if forward_vec.dot(inter_vehicle_vec) > 0:
                            
                            # Generate 2D and 2D bounding boxes for each actor
                            npc_bbox_2d = bbox_2d_for_actor(npc, actor_ids, semantic_labels)
                            npc_bbox_3d = bbox_3d_for_actor(npc, ego_vehicle, camera_bp, camera)

                            frame_bboxes.append({'3d': npc_bbox_3d, '2d': npc_bbox_2d})

                            json_frame_data['objects'].append({
                                'id': npc.id,
                                'class': SEMANTIC_MAP[npc.semantic_tags[0]][0],
                                'blueprint_id': npc.type_id,
                                'velocity': calculate_relative_velocity(npc, ego_vehicle),
                                'bbox_3d': npc_bbox_3d['bbox_3d'],
                                'bbox_2d': {
                                    'xmin': int(npc_bbox_2d['bbox_2d'][0]),
                                    'ymin': int(npc_bbox_2d['bbox_2d'][1]),
                                    'xmax': int(npc_bbox_2d['bbox_2d'][2]),
                                    'ymax': int(npc_bbox_2d['bbox_2d'][3]),
                                } if npc_bbox_2d else None,
                                'light_state': vehicle_light_state_to_dict(npc)

                            })

            # Draw the scene in Pygame
            display.fill((0,0,0))
            if display_3d:
                visualize_3d_bboxes(display, img, frame_bboxes)
            else:
                visualize_2d_bboxes(display, img, frame_bboxes)
            pygame.display.flip()
            clock.tick(30)  # 30 FPS              
            if record:
                with open(f"_out/{snapshot.frame}.json", 'w') as f:
                    json.dump(json_frame_data, f)

    except KeyboardInterrupt:
        pass
    finally:
        
        ego_vehicle.destroy()
        camera.stop()
        camera.destroy()
        inst_camera.stop()
        inst_camera.destroy()
        for npc in npcs:
            npc.set_autopilot(False)
            npc.destroy()

        world.tick()

        # Set up the simulator in synchronous mode
        settings = world.get_settings()
        settings.synchronous_mode = False # Disables synchronous mode
        settings.fixed_delta_seconds = None
        world.apply_settings(settings)

        # Set the traffic manager to Synchronous mode
        traffic_manager.set_synchronous_mode(False)

        pygame.quit()

        print('\ndone.')


if __name__ == '__main__':
    print('Bounding boxes script instructions:')
    print('R    : toggle recording images as PNG and bounding boxes as JSON')
    print('3    : view the bounding boxes in 3D')
    print('2    : view the bounding boxes in 2D')
    print('ESC  : quit')
    main()