7 Commits

Author SHA1 Message Date
b8e011e7cd refactor: remove pipenv 2020-03-02 19:21:21 +01:00
3a376dd5a3 Fix refactor from aws execution logs 2020-03-02 19:20:42 +01:00
2076b4491a refactor: compute only angle value 2020-02-17 19:31:06 +01:00
37bb0fff2d Update docker tag 2020-02-17 19:11:48 +01:00
a5354e5653 Reformat code 2020-02-17 19:11:29 +01:00
84a8b11942 Export tf model 2019-11-05 19:57:54 +01:00
9ec80414c9 First impl for satanas car 2019-11-05 19:45:46 +01:00
7 changed files with 122 additions and 90 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
venv

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
venv
/src/robocars_sagemaker_container.egg-info/

View File

@ -6,10 +6,12 @@ WORKDIR /usr/src
RUN python3 setup.py sdist RUN python3 setup.py sdist
FROM tensorflow/tensorflow:1.8.0-gpu-py3 #FROM tensorflow/tensorflow:1.8.0-py3
FROM tensorflow/tensorflow:1.15.0-gpu-py3
#tensorflow-serving-api-python3==1.7.0 #tensorflow-serving-api-python3==1.7.0
RUN pip3 list && pip3 install numpy boto3 six awscli flask==0.11 Jinja2==2.9 gevent gunicorn keras==2.1.3 pillow h5py \ COPY requirements.txt .
RUN pip3 install --upgrade pip==20.0.2 && pip3 list && pip3 install -r requirements.txt \
&& pip3 list && pip3 list
WORKDIR /root WORKDIR /root
@ -32,5 +34,5 @@ RUN pip3 install robocars_sagemaker_container-1.0.0.tar.gz
RUN rm robocars_sagemaker_container-1.0.0.tar.gz RUN rm robocars_sagemaker_container-1.0.0.tar.gz
ENTRYPOINT ["entry.py"] ENTRYPOINT ["train"]

View File

@ -1,22 +1,24 @@
#!/bin/bash #!/bin/bash
job_name=$1 job_name=$1
if [ -z $job_name ] if [[ -z ${job_name} ]]
then then
echo 'Provide model name' echo 'Provide model name'
exit 0 exit 0
fi fi
echo 'Creating training job '$1 echo 'Creating training job '$1
training_image="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3" training_image="117617958416.dkr.ecr.eu-west-1.amazonaws.com/robocars:tensorflow"
iam_role_arn="arn:aws:iam::<replace_me>:role/service-role/<replace_me>" iam_role_arn="arn:aws:iam::117617958416:role/robocar-training"
DATA_BUCKET="s3://robocars-cyrilix-learning/input"
DATA_OUTPUT="s3://robocars-cyrilix-learning/output"
aws sagemaker create-training-job \ aws sagemaker create-training-job \
--training-job-name $job_name \ --training-job-name ${job_name} \
--hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \ --hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \
--algorithm-specification TrainingImage=$training_image,TrainingInputMode=File \ --algorithm-specification TrainingImage="${training_image}",TrainingInputMode=File \
--role-arn $iam_role_arn \ --role-arn ${iam_role_arn} \
--input-data-config '[{ "ChannelName": "train", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://<replace_me>", "S3DataDistributionType": "FullyReplicated" }} }]' \ --input-data-config "[{ \"ChannelName\": \"train\", \"DataSource\": { \"S3DataSource\": { \"S3DataType\": \"S3Prefix\", \"S3Uri\": \"${DATA_BUCKET}\", \"S3DataDistributionType\": \"FullyReplicated\" }} }]" \
--output-data-config S3OutputPath=s3://<replace_me> \ --output-data-config S3OutputPath=${DATA_OUTPUT} \
--resource-config InstanceType=ml.p2.xlarge,InstanceCount=1,VolumeSizeInGB=1 \ --resource-config InstanceType=ml.p2.xlarge,InstanceCount=1,VolumeSizeInGB=1 \
--stopping-condition MaxRuntimeInSeconds=1800 --stopping-condition MaxRuntimeInSeconds=1800

12
requirements.txt Normal file
View File

@ -0,0 +1,12 @@
sagemaker-container-support==1.1.3
numpy==1.18.1
boto3==1.12.11
six==1.14.0
awscli==1.18.11
flask==0.12.5
Jinja2==2.11.1
gevent==1.4.0
gunicorn==19.10.0
keras==2.1.3
pillow==7.0.0
h5py==2.10.0

View File

@ -1,8 +1,8 @@
import os import os
from glob import glob
from os.path import basename from os.path import basename
from os.path import splitext from os.path import splitext
from glob import glob
from setuptools import setup, find_packages from setuptools import setup, find_packages
@ -19,9 +19,13 @@ setup(
py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')], py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
classifiers=[ classifiers=[
'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.7',
], ],
entry_points={
'console_scripts': [
'train=tf_container.train_entry_point:train',
]
},
install_requires=['sagemaker-container-support'], install_requires=['sagemaker-container-support'],
extras_require={}, extras_require={},
) )

View File

@ -1,21 +1,38 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import container_support as cs
import os import os
import json
import re
import zipfile
from keras.preprocessing.image import load_img, img_to_array
import numpy as np
from keras.layers import Input, Dense, merge import container_support as cs
from keras.models import Model import json
from keras.layers import Convolution2D, MaxPooling2D, Reshape, BatchNormalization import numpy as np
from keras.layers import Activation, Dropout, Flatten, Dense import re
import tensorflow as tf
import zipfile
from keras import backend as K
from keras import callbacks from keras import callbacks
from keras.layers import Convolution2D
from keras.layers import Dropout, Flatten, Dense
from keras.layers import Input
from keras.models import Model
from keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.client import device_lib from tensorflow.python.client import device_lib
def get_data(root_dir, filename):
print('load data from file ' + filename)
d = json.load(open(os.path.join(root_dir, filename)))
return [d['user/angle'], root_dir, d['cam/image_array']]
numbers = re.compile(r'(\d+)')
def unzip_file(root, f):
zip_ref = zipfile.ZipFile(os.path.join(root, f), 'r')
zip_ref.extractall(root)
zip_ref.close()
def train(): def train():
env = cs.TrainingEnvironment() env = cs.TrainingEnvironment()
@ -23,24 +40,10 @@ def train():
os.system('mkdir -p logs') os.system('mkdir -p logs')
# ### Loading the files ### # ### Loading the files ###
# ** You need to copy all your files to the directory where you are runing this notebook into a folder named "data" ** # ** You need to copy all your files to the directory where you are runing this notebook **
# ** into a folder named "data" **
numbers = re.compile(r'(\d+)')
data = [] data = []
def get_data(root,f):
d = json.load(open(os.path.join(root,f)))
if ('pilot/throttle' in d):
return [d['user/mode'],d['user/throttle'],d['user/angle'],root,d['cam/image_array'],d['pilot/throttle'],d['pilot/angle']]
else:
return [d['user/mode'],d['user/throttle'],d['user/angle'],root,d['cam/image_array']]
def numericalSort(value):
parts = numbers.split(value)
parts[1::2] = map(int, parts[1::2])
return parts
def unzip_file(root,f):
zip_ref = zipfile.ZipFile(os.path.join(root,f), 'r')
zip_ref.extractall(root)
zip_ref.close()
for root, dirs, files in os.walk('/opt/ml/input/data/train'): for root, dirs, files in os.walk('/opt/ml/input/data/train'):
for f in files: for f in files:
@ -48,38 +51,22 @@ def train():
unzip_file(root, f) unzip_file(root, f)
for root, dirs, files in os.walk('/opt/ml/input/data/train'): for root, dirs, files in os.walk('/opt/ml/input/data/train'):
data.extend([get_data(root,f) for f in sorted(files, key=numericalSort) if f.startswith('record') and f.endswith('.json')]) data.extend(
[get_data(root, f) for f in sorted(files, key=str.lower) if f.startswith('record') and f.endswith('.json')])
# Normalize / correct data
data = [d for d in data if d[1] > 0.1]
for d in data:
if d[1] < 0.2:
d[1] = 0.2
# ### Loading throttle and angle ### # ### Loading throttle and angle ###
angle = [d[2] for d in data] angle = [d[0] for d in data]
throttle = [d[1] for d in data]
angle_array = np.array(angle) angle_array = np.array(angle)
throttle_array = np.array(throttle)
if (len(data[0]) > 5):
pilot_angle = [d[6] for d in data]
pilot_throttle = [d[5] for d in data]
pilot_angle_array = np.array(pilot_angle)
pilot_throttle_array = np.array(pilot_throttle)
else:
pilot_angle = []
pilot_throttle = []
# ### Loading images ### # ### Loading images ###
images = np.array([img_to_array(load_img(os.path.join(d[3],d[4]))) for d in data],'f') images = np.array([img_to_array(load_img(os.path.join(d[1], d[2]))) for d in data], 'f')
# slide images vs orders # slide images vs orders
if env.hyperparameters.get('with_slide', False): if env.hyperparameters.get('with_slide', False):
images = images[:len(images) - 2] images = images[:len(images) - 2]
angle_array = angle_array[2:] angle_array = angle_array[2:]
throttle_array = throttle_array[2:]
# ### Start training ### # ### Start training ###
def linear_bin(a): def linear_bin(a):
@ -90,19 +77,31 @@ def train():
return arr return arr
logs = callbacks.TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=True) logs = callbacks.TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=True)
save_best = callbacks.ModelCheckpoint('/opt/ml/model/model_cat', monitor='angle_out_loss', verbose=1, save_best_only=True, mode='min') save_best = callbacks.ModelCheckpoint('/opt/ml/model/model_cat', monitor='val_loss', verbose=1,
early_stop = callbacks.EarlyStopping(monitor='angle_out_loss', save_best_only=True, mode='min')
early_stop = callbacks.EarlyStopping(monitor='val_loss',
min_delta=.0005, min_delta=.0005,
patience=10, patience=10,
verbose=1, verbose=1,
mode='auto') mode='auto')
img_in = Input(shape=(120, 160, 3), name='img_in') # First layer, input layer, Shape comes from camera.py resolution, RGB # Only for export model to tensorflow
sess = tf.Session()
K.set_session(sess)
# First layer, input layer, Shape comes from camera.py resolution, RGB
img_in = Input(shape=(128, 160, 3),
name='img_in')
x = img_in x = img_in
x = Convolution2D(24, (5,5), strides=(2,2), activation='relu')(x) # 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation # 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation
x = Convolution2D(32, (5,5), strides=(2,2), activation='relu')(x) # 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu')(x)
x = Convolution2D(64, (5,5), strides=(2,2), activation='relu')(x) # 64 features, 5px5p kernal window, 2wx2h stride, relu # 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion
x = Convolution2D(64, (3,3), strides=(2,2), activation='relu')(x) # 64 features, 3px3p kernal window, 2wx2h stride, relu x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')(x)
x = Convolution2D(64, (3,3), strides=(1,1), activation='relu')(x) # 64 features, 3px3p kernal window, 1wx1h stride, relu # 64 features, 5px5p kernal window, 2wx2h stride, relu
x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu')(x)
# 64 features, 3px3p kernal window, 2wx2h stride, relu
x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu')(x)
# 64 features, 3px3p kernal window, 1wx1h stride, relu
x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x)
# Possibly add MaxPooling (will make it less sensitive to position in image). Camera angle fixed, so may not to be needed # Possibly add MaxPooling (will make it less sensitive to position in image). Camera angle fixed, so may not to be needed
@ -110,17 +109,26 @@ def train():
x = Dense(100, activation='relu')(x) # Classify the data into 100 features, make all negatives 0 x = Dense(100, activation='relu')(x) # Classify the data into 100 features, make all negatives 0
x = Dropout(.1)(x) x = Dropout(.1)(x)
x = Dense(50, activation='relu')(x) x = Dense(50, activation='relu')(x)
x = Dropout(.1)(x) # Randomly drop out 10% of the neurons (Prevent overfitting) # Randomly drop out 10% of the neurons (Prevent overfitting)
x = Dropout(.1)(x)
# categorical output of the angle # categorical output of the angle
callbacks_list = [save_best, early_stop, logs] callbacks_list = [save_best, early_stop, logs]
angle_out = Dense(15, activation='softmax', name='angle_out')(x) # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage. 15 categories and find best one based off percentage 0.0-1.0 # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage.
# 15 categories and find best one based off percentage 0.0-1.0
angle_out = Dense(15, activation='softmax', name='angle_out')(x)
#continous output of throttle
throttle_out = Dense(1, activation='relu', name='throttle_out')(x) # Reduce to 1 number, Positive number only
angle_cat_array = np.array([linear_bin(a) for a in angle_array]) angle_cat_array = np.array([linear_bin(a) for a in angle_array])
model = Model(inputs=[img_in], outputs=[angle_out, throttle_out]) model = Model(inputs=[img_in], outputs=[angle_out])
model.compile(optimizer='adam', model.compile(optimizer='adam',
loss={'angle_out': 'categorical_crossentropy', loss={'angle_out': 'categorical_crossentropy', },
'throttle_out': 'mean_absolute_error'}, loss_weights={'angle_out': 0.9 })
loss_weights={'angle_out': 0.9, 'throttle_out': .001}) model.fit({'img_in': images}, {'angle_out': angle_cat_array, }, batch_size=32,
model.fit({'img_in':images},{'angle_out': angle_cat_array, 'throttle_out': throttle_array}, batch_size=32, epochs=100, verbose=1, validation_split=0.2, shuffle=True, callbacks=callbacks_list) epochs=100, verbose=1, validation_split=0.2, shuffle=True, callbacks=callbacks_list)
# Save model for tensorflow using
builder = tf.saved_model.builder.SavedModelBuilder("/opt/ml/model/tfModel")
# Tag the model, required for Go
builder.add_meta_graph_and_variables(sess, ["myTag"])
builder.save()
sess.close()