robocar-training/src/tf_container/train.py

#!/usr/bin/env python3

import os

# import container_support as cs
import argparse
import json

import numpy as np
import re
import tensorflow as tf
import zipfile
# from tensorflow.keras import backend as K
from tensorflow.keras import callbacks
from tensorflow.keras.layers import Convolution2D
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.client import device_lib


def linear_bin(a: float, N: int = 15, offset: int = 1, R: float = 2.0):
    """
    create a bin of length N
    map val A to range R
    offset one hot bin by offset, commonly R/2
    """
    a = a + offset
    b = round(a / (R / (N - offset)))
    arr = np.zeros(N)
    b = clamp(b, 0, N - 1)
    arr[int(b)] = 1
    return arr


def clamp(n, min, max):
    if n <= min:
        return min
    if n >= max:
        return max
    return n


def get_data(root_dir, filename):
    print('load data from file ' + filename)
    d = json.load(open(os.path.join(root_dir, filename)))
    return [(d['user/angle']), root_dir, d['cam/image_array']]


numbers = re.compile(r'(\d+)')


def unzip_file(root, f):
    zip_ref = zipfile.ZipFile(os.path.join(root, f), 'r')
    zip_ref.extractall(root)
    zip_ref.close()


def train(batch_size: int, slide_size: int, img_height: int, img_width: int, img_depth: int, horizon: int, drop: float):
    # env = cs.TrainingEnvironment()

    print(device_lib.list_local_devices())
    os.system('mkdir -p logs')

    # ### Loading the files ###
    # ** You need to copy all your files to the directory where you are runing this notebook **
    # ** into a folder named "data"                                                          **

    data = []

    for root, dirs, files in os.walk('/opt/ml/input/data/train'):
        for f in files:
            if f.endswith('.zip'):
                unzip_file(root, f)

    for root, dirs, files in os.walk('/opt/ml/input/data/train'):
        data.extend(
            [get_data(root, f) for f in sorted(files, key=str.lower) if f.startswith('record') and f.endswith('.json')])

    # ### Loading throttle and angle ###

    angle = [d[0] for d in data]
    angle_array = np.array(angle)

    # ### Loading images ###
    if horizon > 0:
        images = np.array([img_to_array(load_img(os.path.join(d[1], d[2])).crop((0, horizon, img_width, img_height))) for d in data], 'f')
    else:
        images = np.array( [img_to_array(load_img(os.path.join(d[1], d[2]))) for d in data], 'f')

    # slide images vs orders
    if slide_size > 0:
        images = images[:len(images) - slide_size]
        angle_array = angle_array[slide_size:]

    # ### Start training ###
    from datetime import datetime
    logdir = '/opt/ml/model/logs/' + datetime.now().strftime("%Y%m%d-%H%M%S")
    logs = callbacks.TensorBoard(log_dir=logdir, histogram_freq=0, write_graph=True, write_images=True)

    # Creates a file writer for the log directory.
    # file_writer = tf.summary.create_file_writer(logdir)

    # Using the file writer, log the reshaped image.
    # with file_writer.as_default():
    #    # Don't forget to reshape.
    #    imgs = np.reshape(images[0:25], (-1, img_height, img_width, img_depth))
    #    tf.summary.image("25 training data examples", imgs, max_outputs=25, step=0)

    save_best = callbacks.ModelCheckpoint('/opt/ml/model/model_cat', monitor='val_loss', verbose=1,
                                          save_best_only=True, mode='min')
    early_stop = callbacks.EarlyStopping(monitor='val_loss',
                                         min_delta=.0005,
                                         patience=5,
                                         verbose=1,
                                         mode='auto')

    # categorical output of the angle
    callbacks_list = [save_best, early_stop, logs]

    angle_cat_array = np.array([linear_bin(float(a)) for a in angle_array])

    model = default_model(input_shape=(img_height - horizon, img_width, img_depth), drop=drop)
    #model = default_categorical(input_shape=(img_height - horizon, img_width, img_depth), drop=drop)

    model.compile(optimizer='adam',
                  loss={'angle_out': 'categorical_crossentropy', },
                  loss_weights={'angle_out': 0.9})
    model.fit({'img_in': images}, {'angle_out': angle_cat_array, }, batch_size=batch_size,
              epochs=100, verbose=1, validation_split=0.2, shuffle=True, callbacks=callbacks_list)

    # Save model for tensorflow using
    model.save("/opt/ml/model/tfModel", save_format="tf")

    def representative_dataset():
        for d in tf.data.Dataset.from_tensor_slices(images).batch(1).take(100):
            yield [tf.dtypes.cast(d, tf.float32)]

    converter = tf.lite.TFLiteConverter.from_keras_model(model)

    # full quantization for edgeTpu
    # https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization
    converter.optimizations = [tf.lite.Optimize.DEFAULT]
    converter.representative_dataset = representative_dataset
    converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
    converter.inference_input_type = tf.uint8  # or tf.int8
    converter.inference_output_type = tf.uint8  # or tf.int8

    tflite_model = converter.convert()

    # Save the model.
    with open('/opt/ml/model/model_' + str(img_width) + 'x' + str(img_height) + 'h' + str(horizon) + '.tflite',
              'wb') as f:
        f.write(tflite_model)


def conv2d(filters, kernel, strides, layer_num, activation='relu'):
    """
    Helper function to create a standard valid-padded convolutional layer
    with square kernel and strides and unified naming convention
    :param filters:     channel dimension of the layer
    :param kernel:      creates (kernel, kernel) kernel matrix dimension
    :param strides:     creates (strides, strides) stride
    :param layer_num:   used in labelling the layer
    :param activation:  activation, defaults to relu
    :return:            tf.keras Convolution2D layer
    """
    return Convolution2D(filters=filters,
                         kernel_size=(kernel, kernel),
                         strides=(strides, strides),
                         activation=activation,
                         name='conv2d_' + str(layer_num))


def core_cnn_layers(img_in: Input, img_height: int, img_width: int, drop: float, l4_stride: int = 1):
    """
    Returns the core CNN layers that are shared among the different models,
    like linear, imu, behavioural
    :param img_in:          input layer of network
    :param drop:            dropout rate
    :param l4_stride:       4-th layer stride, default 1
    :return:                stack of CNN layers
    """
    x = img_in
    x = conv2d(img_height/5, 5, 2, 1)(x)
    x = Dropout(drop)(x)
    x = conv2d(img_width / 5, 5, 2, 2)(x)
    x = Dropout(drop)(x)
    x = conv2d(64, 5, 2, 3)(x)
    x = Dropout(drop)(x)
    x = conv2d(64, 3, l4_stride, 4)(x)
    x = Dropout(drop)(x)
    x = conv2d(64, 3, 1, 5)(x)
    x = Dropout(drop)(x)
    x = Flatten(name='flattened')(x)
    return x


def default_model(input_shape, drop):
    # First layer, input layer, Shape comes from camera.py resolution, RGB
    img_in = Input(shape=input_shape, name='img_in')
    kernel_size = 5

    x = img_in
    # 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation
    x = Convolution2D(input_shape[1] / kernel_size, (kernel_size, kernel_size), strides=(2, 2), activation='relu')(x)
    x = Dropout(drop)(x)
    # 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion
    x = Convolution2D(input_shape[0] / kernel_size, (kernel_size, kernel_size), strides=(2, 2), activation='relu')(x)
    x = Dropout(drop)(x)
    # 64 features, 5px5p kernel window, 2wx2h stride, relu
    x = Convolution2D(64, (kernel_size, kernel_size), strides=(2, 2), activation='relu')(x)
    x = Dropout(drop)(x)
    # 64 features, 3px3p kernel window, 2wx2h stride, relu
    x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu')(x)
    x = Dropout(drop)(x)
    # 64 features, 3px3p kernel window, 1wx1h stride, relu
    x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x)
    x = Dropout(drop)(x)

    # Possibly add MaxPooling (will make it less sensitive to position in image).
    # Camera angle fixed, so may not to be needed

    x = Flatten(name='flattened')(x)  # Flatten to 1D (Fully connected)
    x = Dense(100, activation='relu')(x)  # Classify the data into 100 features, make all negatives 0
    x = Dropout(drop)(x)
    x = Dense(50, activation='relu')(x)
    x = Dropout(drop)(x)
    # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage.
    # 15 categories and find best one based off percentage 0.0-1.0
    angle_out = Dense(15, activation='softmax', name='angle_out')(x)

    model = Model(inputs=[img_in], outputs=[angle_out])

    return model


def default_n_linear(num_outputs, input_shape=(120, 160, 3), drop=0.2):
    img_in = Input(shape=input_shape, name='img_in')
    x = core_cnn_layers(img_in, img_width=input_shape[1], img_height=input_shape[0],  drop=drop)
    x = Dense(100, activation='relu', name='dense_1')(x)
    x = Dropout(drop)(x)
    x = Dense(50, activation='relu', name='dense_2')(x)
    x = Dropout(drop)(x)

    outputs = []
    for i in range(num_outputs):
        outputs.append(
            Dense(1, activation='linear', name='n_outputs' + str(i))(x))

    model = Model(inputs=[img_in], outputs=outputs, name='linear')
    return model


def default_categorical(input_shape=(120, 160, 3), drop=0.2):
    img_in = Input(shape=input_shape, name='img_in')
    x = core_cnn_layers(img_in, img_width=input_shape[1], img_height=input_shape[0], drop=drop, l4_stride=2)
    x = Dense(100, activation='relu', name="dense_1")(x)
    x = Dropout(drop)(x)
    x = Dense(50, activation='relu', name="dense_2")(x)
    x = Dropout(drop)(x)
    # Categorical output of the angle into 15 bins
    angle_out = Dense(15, activation='softmax', name='angle_out')(x)

    model = Model(inputs=[img_in], outputs=[angle_out],
                  name='categorical')
    return model


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument("--slide_size", type=int, default=0)
    parser.add_argument("--img_height", type=int, default=120)
    parser.add_argument("--img_width", type=int, default=160)
    parser.add_argument("--img_depth", type=int, default=3)
    parser.add_argument("--horizon", type=int, default=0)
    parser.add_argument("--batch_size", type=int, default=32)
    parser.add_argument("--drop", type=float, default=0.2)

    args = parser.parse_args()
    params = vars(args)
    train(
        batch_size=params["batch_size"],
        slide_size=params["slide_size"],
        img_height=params["img_height"],
        img_width=params["img_width"],
        img_depth=params["img_depth"],
        horizon=params["horizon"],
        drop=params["drop"],
    )