robocar-training/tf_container/train.py

316 lines
12 KiB
Python

#!/usr/bin/env python3
import os
# import container_support as cs
import argparse
import json
import pathlib
import typing
import numpy as np
import re
import tensorflow as tf
import zipfile
import tensorflow.keras.losses
from tensorflow.keras import callbacks
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Layer
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from numpy import typing as npt
MODEL_CATEGORICAL = "categorical"
MODEL_LINEAR = "linear"
def linear_bin_speed_zone(a: int, N: int = 4) -> npt.NDArray[np.float64]:
"""
create a bin of length N
"""
arr = np.zeros(N)
arr[a] = 1
return arr
def linear_bin(a: float, N: int = 15, offset: int = 1, R: float = 2.0) -> npt.NDArray[np.float64]:
"""
create a bin of length N
map val A to range R
offset one hot bin by offset, commonly R/2
"""
a = a + offset
b = round(a / (R / (N - offset)))
arr = np.zeros(N)
b = clamp(b, 0, N - 1)
arr[int(b)] = 1
return arr
def clamp(n: int, min: int, max: int) -> int:
if n <= min:
return min
if n >= max:
return max
return n
def get_data(root_dir: pathlib.Path, filename: str) -> typing.List[typing.Any]:
# print('load data from file ' + filename)
d = json.load(open(os.path.join(root_dir, filename)))
return [(d['user/angle']), root_dir, d['cam/image_array']]
def get_data_speed_zone(root_dir, filename):
print('load data from file ' + filename)
d = json.load(open(os.path.join(root_dir, filename)))
return [(d['speed_zone']), root_dir, d['cam/image_array']]
numbers = re.compile(r'(\d+)')
def unzip_file(root: pathlib.Path, f: str) -> None:
zip_ref = zipfile.ZipFile(os.path.join(root, f), 'r')
zip_ref.extractall(root)
zip_ref.close()
def train(model_type: str, record_field: str, batch_size: int, slide_size: int, img_height: int, img_width: int,
img_depth: int, horizon: int, drop: float) -> None:
# env = cs.TrainingEnvironment()
os.system('mkdir -p logs')
# ### Loading the files ###
# ** You need to copy all your files to the directory where you are runing this notebook **
# ** into a folder named "data" **
data = []
for root, dirs, files in os.walk('/opt/ml/input/data/train'):
for f in files:
if f.endswith('.zip'):
unzip_file(pathlib.Path(root), f)
if record_field == 'angle':
output_name = 'angle_out'
for root, dirs, files in os.walk('/opt/ml/input/data/train'):
data.extend(
[get_data(root, f) for f in sorted(files, key=str.lower) if f.startswith('record') and f.endswith('.json')])
elif record_field == 'speed_zone':
output_name = 'speed_zone_output'
for root, dirs, files in os.walk('/opt/ml/input/data/train'):
data.extend(
[get_data_speed_zone(root, f) for f in sorted(files, key=str.lower) if f.startswith('record') and f.endswith('.json')])
else:
print(f"invalid record filed: {record_field}")
return
# ### Loading values (angle or speed_zone) ###
value = [d[0] for d in data]
value_array = np.array(value)
# ### Loading images ###
if horizon > 0:
images = np.array(
[img_to_array(load_img(os.path.join(d[1], d[2])).crop((0, horizon, img_width, img_height))) for d in data],
'f')
else:
images = np.array([img_to_array(load_img(os.path.join(d[1], d[2]))) for d in data], 'f')
# slide images vs orders
if slide_size > 0:
images = images[:len(images) - slide_size]
value_array = value_array[slide_size:]
# ### Start training ###
from datetime import datetime
logdir = '/opt/ml/model/logs/' + datetime.now().strftime("%Y%m%d-%H%M%S")
logs = callbacks.TensorBoard(log_dir=logdir, histogram_freq=0, write_graph=True, write_images=True)
# Creates a file writer for the log directory.
# file_writer = tf.summary.create_file_writer(logdir)
# Using the file writer, log the reshaped image.
# with file_writer.as_default():
# # Don't forget to reshape.
# imgs = np.reshape(images[0:25], (-1, img_height, img_width, img_depth))
# tf.summary.image("25 training data examples", imgs, max_outputs=25, step=0)
model_filepath = '/opt/ml/model/model_other'
if model_type == MODEL_CATEGORICAL:
model_filepath = '/opt/ml/model/model_cat'
if record_field == 'angle':
input_value_array = np.array([linear_bin(float(a)) for a in value_array])
output_bin = 15
elif record_field == 'speed_zone':
input_value_array = np.array([linear_bin_speed_zone(a) for a in value_array])
output_bin = 4
model = default_categorical(input_shape=(img_height - horizon, img_width, img_depth), drop=drop,
output_name=output_name, output_bin=output_bin)
loss = {output_name: 'categorical_crossentropy', }
optimizer = 'adam'
elif model_type == MODEL_LINEAR:
model_filepath = '/opt/ml/model/model_lin'
input_value_array = np.array([a for a in value_array])
model = default_linear(input_shape=(img_height - horizon, img_width, img_depth), drop=drop, output_name=output_name)
loss = 'mse'
optimizer = 'rmsprop'
else:
raise Exception("invalid model type")
# Display the model's architecture
model.summary()
save_best = callbacks.ModelCheckpoint(model_filepath, monitor='val_loss', verbose=1,
save_best_only=True, mode='min')
early_stop = callbacks.EarlyStopping(monitor='val_loss',
min_delta=.0005,
patience=5,
verbose=1,
mode='auto')
# categorical output of the angle
callbacks_list = [save_best, early_stop, logs]
model.compile(optimizer=optimizer,
loss=loss,)
model.fit({'img_in': images}, {output_name: input_value_array, }, batch_size=batch_size,
epochs=100, verbose=1, validation_split=0.2, shuffle=True, callbacks=callbacks_list)
# Save model for tensorflow using
model.save(f'/opt/ml/model/model_{record_field.replace("_", "")}_{model_type}_{str(img_width)}x{str(img_height)}h{str(horizon)}')
def representative_dataset() -> typing.Generator[typing.List[float], typing.Any, None]:
for d in tf.data.Dataset.from_tensor_slices(images).batch(1).take(100):
yield [tf.dtypes.cast(d, tf.float32)]
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# full quantization for edgeTpu
# https://www.tensorflow.org/lite/performance/post_training_quantization#full_integer_quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.uint8 # or tf.int8
converter.inference_output_type = tf.uint8 # or tf.int8
tflite_model = converter.convert()
# Save the model.
with open(f'/opt/ml/model/model_{record_field.replace("_", "")}_{model_type}_{str(img_width)}x{str(img_height)}h{str(horizon)}.tflite',
'wb') as f:
f.write(tflite_model)
def conv2d(filters: float, kernel: typing.Union[int, typing.Tuple[int, int]], strides: typing.Union[int, typing.Tuple[int, int]], layer_num: int,
activation: str = 'relu') -> Conv2D:
"""
Helper function to create a standard valid-padded convolutional layer
with square kernel and strides and unified naming convention
:param filters: channel dimension of the layer
:param kernel: creates (kernel, kernel) kernel matrix dimension
:param strides: creates (strides, strides) stride
:param layer_num: used in labelling the layer
:param activation: activation, defaults to relu
:return: tf.keras Convolution2D layer
"""
return Conv2D(filters=filters,
kernel_size=(kernel, kernel),
strides=(strides, strides),
activation=activation,
name='conv2d_' + str(layer_num))
def core_cnn_layers(img_in: Input, img_height: int, img_width: int, drop: float, l4_stride: int = 1) -> Layer:
"""
Returns the core CNN layers that are shared among the different models,
like linear, imu, behavioural
:param img_width: image width
:param img_height: image height
:param img_in: input layer of network
:param drop: dropout rate
:param l4_stride: 4-th layer stride, default 1
:return: stack of CNN layers
"""
x = img_in
x = conv2d(img_height / 5, 5, 2, 1)(x)
x = Dropout(drop)(x)
x = conv2d(img_width / 5, 5, 2, 2)(x)
x = Dropout(drop)(x)
x = conv2d(64, 5, 2, 3)(x)
x = Dropout(drop)(x)
x = conv2d(64, 3, l4_stride, 4)(x)
x = Dropout(drop)(x)
x = conv2d(64, 3, 1, 5)(x)
x = Dropout(drop)(x)
x = Flatten(name='flattened')(x)
return x
def default_linear(input_shape: typing.Tuple[int, int, int] = (120, 160, 3), drop: float = 0.2,
output_name: str ='angle_out') -> Model:
img_in = Input(shape=input_shape, name='img_in')
x = core_cnn_layers(img_in, img_width=input_shape[1], img_height=input_shape[0], drop=drop)
x = Dense(100, activation='relu', name='dense_1')(x)
x = Dropout(drop)(x)
x = Dense(50, activation='relu', name='dense_2')(x)
x = Dropout(drop)(x)
value_out = Dense(1, activation='linear', name=output_name)(x)
model = Model(inputs=[img_in], outputs=[value_out], name='linear')
return model
def default_categorical(input_shape: typing.Tuple[int, int, int] = (120, 160, 3), drop: float = 0.2,
output_name: str ='angle_out', output_bin: int = 15) -> Model:
img_in = Input(shape=input_shape, name='img_in')
x = core_cnn_layers(img_in, img_width=input_shape[1], img_height=input_shape[0], drop=drop, l4_stride=2)
x = Dense(100, activation='relu', name="dense_1")(x)
x = Dropout(drop)(x)
x = Dense(50, activation='relu', name="dense_2")(x)
x = Dropout(drop)(x)
# Categorical output of the angle into 15 bins
value_out = Dense(output_bin, activation='softmax', name=output_name)(x)
model = Model(inputs=[img_in], outputs=[value_out], name='categorical')
return model
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument("--slide_size", type=int, default=0)
parser.add_argument("--img_height", type=int, default=120)
parser.add_argument("--img_width", type=int, default=160)
parser.add_argument("--img_depth", type=int, default=3)
parser.add_argument("--horizon", type=int, default=0)
parser.add_argument("--batch_size", type=int, default=32)
parser.add_argument("--drop", type=float, default=0.2)
parser.add_argument("--model_type", type=str, default=MODEL_CATEGORICAL)
parser.add_argument("--record_field", type=str, choices=['angle', 'speed_zone'], default='angle')
args = parser.parse_args()
params = vars(args)
train(
model_type=params["model_type"],
record_field=params["record_field"],
batch_size=params["batch_size"],
slide_size=params["slide_size"],
img_height=params["img_height"],
img_width=params["img_width"],
img_depth=params["img_depth"],
horizon=params["horizon"],
drop=params["drop"],
)
if __name__ == "__main__":
main()