8 Commits

Author SHA1 Message Date
b8e011e7cd refactor: remove pipenv 2020-03-02 19:21:21 +01:00
3a376dd5a3 Fix refactor from aws execution logs 2020-03-02 19:20:42 +01:00
2076b4491a refactor: compute only angle value 2020-02-17 19:31:06 +01:00
37bb0fff2d Update docker tag 2020-02-17 19:11:48 +01:00
a5354e5653 Reformat code 2020-02-17 19:11:29 +01:00
84a8b11942 Export tf model 2019-11-05 19:57:54 +01:00
9ec80414c9 First impl for satanas car 2019-11-05 19:45:46 +01:00
b81cb57230 1.8.0 image 2018-08-03 12:30:42 +02:00
11 changed files with 129 additions and 129 deletions

2
.dockerignore Normal file
View File

@ -0,0 +1,2 @@
venv

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
venv
/src/robocars_sagemaker_container.egg-info/

View File

@ -6,7 +6,13 @@ WORKDIR /usr/src
RUN python3 setup.py sdist
FROM tensorflow-base:1.4.1-gpu-py3
#FROM tensorflow/tensorflow:1.8.0-py3
FROM tensorflow/tensorflow:1.15.0-gpu-py3
#tensorflow-serving-api-python3==1.7.0
COPY requirements.txt .
RUN pip3 install --upgrade pip==20.0.2 && pip3 list && pip3 install -r requirements.txt \
&& pip3 list
WORKDIR /root
@ -28,5 +34,5 @@ RUN pip3 install robocars_sagemaker_container-1.0.0.tar.gz
RUN rm robocars_sagemaker_container-1.0.0.tar.gz
ENTRYPOINT ["entry.py"]
ENTRYPOINT ["train"]

View File

@ -1,29 +0,0 @@
FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
RUN apt-get update && apt-get install -y --no-install-recommends \
python3-pip python3-dev python3-setuptools \
&& \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
&& pip3 install tensorflow-gpu==1.4.1
RUN pip3 list && pip3 install numpy boto3 six awscli flask==0.11 Jinja2==2.9 gevent gunicorn keras==2.1.3 pillow h5py \
&& pip3 list
# Configure the build for our CUDA configuration.
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
ENV CI_BUILD_PYTHON=python \
LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH \
TF_NEED_CUDA=1 \
TF_CUDA_VERSION=8.0 \
TF_CUDNN_VERSION=6 \
TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.1
# Fix paths so that CUDNN can be found
# See https://github.com/tensorflow/tensorflow/issues/8264
RUN ls -lah /usr/local/cuda/lib64/*
RUN mkdir /usr/lib/x86_64-linux-gnu/include/ && \
ln -s /usr/lib/x86_64-linux-gnu/include/cudnn.h /usr/lib/x86_64-linux-gnu/include/cudnn.h && \
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h && \
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/libcudnn.so && \
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so.6

View File

@ -4,16 +4,10 @@ Run DIY Robocars model training as Sagemaker (https://aws.amazon.com/fr/sagemake
# Build images
- Build base image:
```
docker build -t robocars-base:1.4.1-gpu-py3 -f Dockerfile_base.gpu .
```
- Build model image:
```
docker build -t robocars:1.4.1-gpu-py3 -f Dockerfile.gpu .
docker build -t robocars:1.8.0-gpu-py3 -f Dockerfile.gpu .
```
# Prepare training (once)
@ -22,9 +16,9 @@ docker build -t robocars:1.4.1-gpu-py3 -f Dockerfile.gpu .
- Create an AWS docker registry and push your model image to it. Docker hub registry is not supported
```
docker tag robocars:1.4.1-gpu-py <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3
docker tag robocars:1.8.0-gpu-py <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3
# you should have AWS SDK installed and login to docker
docker push <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3
docker push <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3
```
# Run training
@ -47,7 +41,7 @@ echo 'Creating training job '$1
aws sagemaker create-training-job \
--training-job-name $job_name \
--hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \
--algorithm-specification TrainingImage="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3",TrainingInputMode=File \
--algorithm-specification TrainingImage="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3",TrainingInputMode=File \
--role-arn "<your_iam_sagemaker_role>" \
--input-data-config '[{ "ChannelName": "train", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://<your_input_bucket>", "S3DataDistributionType": "FullyReplicated" }} }]' \
--output-data-config S3OutputPath=s3://<your_output_bucket> \

View File

@ -1 +0,0 @@
docker build -t tensorflow-base:1.4.1-gpu-py3 -f Dockerfile_base_pip.gpu .

View File

@ -1 +1 @@
docker build -t tensorflow:1.4.1-gpu-py3 -f Dockerfile.gpu .
docker build -t tensorflow:1.8.0-gpu-py3 -f Dockerfile.gpu .

View File

@ -1,22 +1,24 @@
#!/bin/bash
job_name=$1
if [ -z $job_name ]
if [[ -z ${job_name} ]]
then
echo 'Provide model name'
exit 0
fi
fi
echo 'Creating training job '$1
training_image="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3"
iam_role_arn="arn:aws:iam::<replace_me>:role/service-role/<replace_me>"
training_image="117617958416.dkr.ecr.eu-west-1.amazonaws.com/robocars:tensorflow"
iam_role_arn="arn:aws:iam::117617958416:role/robocar-training"
DATA_BUCKET="s3://robocars-cyrilix-learning/input"
DATA_OUTPUT="s3://robocars-cyrilix-learning/output"
aws sagemaker create-training-job \
--training-job-name $job_name \
--training-job-name ${job_name} \
--hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \
--algorithm-specification TrainingImage=$training_image,TrainingInputMode=File \
--role-arn $iam_role_arn \
--input-data-config '[{ "ChannelName": "train", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://<replace_me>", "S3DataDistributionType": "FullyReplicated" }} }]' \
--output-data-config S3OutputPath=s3://<replace_me> \
--algorithm-specification TrainingImage="${training_image}",TrainingInputMode=File \
--role-arn ${iam_role_arn} \
--input-data-config "[{ \"ChannelName\": \"train\", \"DataSource\": { \"S3DataSource\": { \"S3DataType\": \"S3Prefix\", \"S3Uri\": \"${DATA_BUCKET}\", \"S3DataDistributionType\": \"FullyReplicated\" }} }]" \
--output-data-config S3OutputPath=${DATA_OUTPUT} \
--resource-config InstanceType=ml.p2.xlarge,InstanceCount=1,VolumeSizeInGB=1 \
--stopping-condition MaxRuntimeInSeconds=1800

12
requirements.txt Normal file
View File

@ -0,0 +1,12 @@
sagemaker-container-support==1.1.3
numpy==1.18.1
boto3==1.12.11
six==1.14.0
awscli==1.18.11
flask==0.12.5
Jinja2==2.11.1
gevent==1.4.0
gunicorn==19.10.0
keras==2.1.3
pillow==7.0.0
h5py==2.10.0

View File

@ -1,8 +1,8 @@
import os
from glob import glob
from os.path import basename
from os.path import splitext
from glob import glob
from setuptools import setup, find_packages
@ -19,9 +19,13 @@ setup(
py_modules=[splitext(basename(path))[0] for path in glob('src/*.py')],
classifiers=[
'Programming Language :: Python :: 3.5',
'Programming Language :: Python :: 3.7',
],
entry_points={
'console_scripts': [
'train=tf_container.train_entry_point:train',
]
},
install_requires=['sagemaker-container-support'],
extras_require={},
)

View File

@ -1,21 +1,38 @@
#!/usr/bin/env python3
import container_support as cs
import os
import json
import re
import zipfile
from keras.preprocessing.image import load_img, img_to_array
import numpy as np
from keras.layers import Input, Dense, merge
from keras.models import Model
from keras.layers import Convolution2D, MaxPooling2D, Reshape, BatchNormalization
from keras.layers import Activation, Dropout, Flatten, Dense
import container_support as cs
import json
import numpy as np
import re
import tensorflow as tf
import zipfile
from keras import backend as K
from keras import callbacks
from keras.layers import Convolution2D
from keras.layers import Dropout, Flatten, Dense
from keras.layers import Input
from keras.models import Model
from keras.preprocessing.image import load_img, img_to_array
from tensorflow.python.client import device_lib
def get_data(root_dir, filename):
print('load data from file ' + filename)
d = json.load(open(os.path.join(root_dir, filename)))
return [d['user/angle'], root_dir, d['cam/image_array']]
numbers = re.compile(r'(\d+)')
def unzip_file(root, f):
zip_ref = zipfile.ZipFile(os.path.join(root, f), 'r')
zip_ref.extractall(root)
zip_ref.close()
def train():
env = cs.TrainingEnvironment()
@ -23,104 +40,95 @@ def train():
os.system('mkdir -p logs')
# ### Loading the files ###
# ** You need to copy all your files to the directory where you are runing this notebook into a folder named "data" **
# ** You need to copy all your files to the directory where you are runing this notebook **
# ** into a folder named "data" **
numbers = re.compile(r'(\d+)')
data = []
def get_data(root,f):
d = json.load(open(os.path.join(root,f)))
if ('pilot/throttle' in d):
return [d['user/mode'],d['user/throttle'],d['user/angle'],root,d['cam/image_array'],d['pilot/throttle'],d['pilot/angle']]
else:
return [d['user/mode'],d['user/throttle'],d['user/angle'],root,d['cam/image_array']]
def numericalSort(value):
parts = numbers.split(value)
parts[1::2] = map(int, parts[1::2])
return parts
def unzip_file(root,f):
zip_ref = zipfile.ZipFile(os.path.join(root,f), 'r')
zip_ref.extractall(root)
zip_ref.close()
for root, dirs, files in os.walk('/opt/ml/input/data/train'):
for f in files:
for f in files:
if f.endswith('.zip'):
unzip_file(root, f)
for root, dirs, files in os.walk('/opt/ml/input/data/train'):
data.extend([get_data(root,f) for f in sorted(files, key=numericalSort) if f.startswith('record') and f.endswith('.json')])
data.extend(
[get_data(root, f) for f in sorted(files, key=str.lower) if f.startswith('record') and f.endswith('.json')])
# Normalize / correct data
data = [d for d in data if d[1] > 0.1]
for d in data:
if d[1] < 0.2:
d[1] = 0.2
# ### Loading throttle and angle ###
angle = [d[2] for d in data]
throttle = [d[1] for d in data]
angle = [d[0] for d in data]
angle_array = np.array(angle)
throttle_array = np.array(throttle)
if (len(data[0]) > 5):
pilot_angle = [d[6] for d in data]
pilot_throttle = [d[5] for d in data]
pilot_angle_array = np.array(pilot_angle)
pilot_throttle_array = np.array(pilot_throttle)
else:
pilot_angle = []
pilot_throttle = []
# ### Loading images ###
images = np.array([img_to_array(load_img(os.path.join(d[3],d[4]))) for d in data],'f')
images = np.array([img_to_array(load_img(os.path.join(d[1], d[2]))) for d in data], 'f')
# slide images vs orders
if env.hyperparameters.get('with_slide', False):
images = images[:len(images)-2]
images = images[:len(images) - 2]
angle_array = angle_array[2:]
throttle_array = throttle_array[2:]
# ### Start training ###
def linear_bin(a):
a = a + 1
b = round(a / (2/14))
b = round(a / (2 / 14))
arr = np.zeros(15)
arr[int(b)] = 1
return arr
logs = callbacks.TensorBoard(log_dir='logs', histogram_freq=0, write_graph=True, write_images=True)
save_best = callbacks.ModelCheckpoint('/opt/ml/model/model_cat', monitor='angle_out_loss', verbose=1, save_best_only=True, mode='min')
early_stop = callbacks.EarlyStopping(monitor='angle_out_loss',
min_delta=.0005,
patience=10,
verbose=1,
mode='auto')
img_in = Input(shape=(120, 160, 3), name='img_in') # First layer, input layer, Shape comes from camera.py resolution, RGB
save_best = callbacks.ModelCheckpoint('/opt/ml/model/model_cat', monitor='val_loss', verbose=1,
save_best_only=True, mode='min')
early_stop = callbacks.EarlyStopping(monitor='val_loss',
min_delta=.0005,
patience=10,
verbose=1,
mode='auto')
# Only for export model to tensorflow
sess = tf.Session()
K.set_session(sess)
# First layer, input layer, Shape comes from camera.py resolution, RGB
img_in = Input(shape=(128, 160, 3),
name='img_in')
x = img_in
x = Convolution2D(24, (5,5), strides=(2,2), activation='relu')(x) # 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation
x = Convolution2D(32, (5,5), strides=(2,2), activation='relu')(x) # 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion
x = Convolution2D(64, (5,5), strides=(2,2), activation='relu')(x) # 64 features, 5px5p kernal window, 2wx2h stride, relu
x = Convolution2D(64, (3,3), strides=(2,2), activation='relu')(x) # 64 features, 3px3p kernal window, 2wx2h stride, relu
x = Convolution2D(64, (3,3), strides=(1,1), activation='relu')(x) # 64 features, 3px3p kernal window, 1wx1h stride, relu
# 24 features, 5 pixel x 5 pixel kernel (convolution, feauture) window, 2wx2h stride, relu activation
x = Convolution2D(24, (5, 5), strides=(2, 2), activation='relu')(x)
# 32 features, 5px5p kernel window, 2wx2h stride, relu activatiion
x = Convolution2D(32, (5, 5), strides=(2, 2), activation='relu')(x)
# 64 features, 5px5p kernal window, 2wx2h stride, relu
x = Convolution2D(64, (5, 5), strides=(2, 2), activation='relu')(x)
# 64 features, 3px3p kernal window, 2wx2h stride, relu
x = Convolution2D(64, (3, 3), strides=(2, 2), activation='relu')(x)
# 64 features, 3px3p kernal window, 1wx1h stride, relu
x = Convolution2D(64, (3, 3), strides=(1, 1), activation='relu')(x)
# Possibly add MaxPooling (will make it less sensitive to position in image). Camera angle fixed, so may not to be needed
x = Flatten(name='flattened')(x) # Flatten to 1D (Fully connected)
x = Dense(100, activation='relu')(x) # Classify the data into 100 features, make all negatives 0
x = Flatten(name='flattened')(x) # Flatten to 1D (Fully connected)
x = Dense(100, activation='relu')(x) # Classify the data into 100 features, make all negatives 0
x = Dropout(.1)(x)
x = Dense(50, activation='relu')(x)
x = Dropout(.1)(x) # Randomly drop out 10% of the neurons (Prevent overfitting)
#categorical output of the angle
# Randomly drop out 10% of the neurons (Prevent overfitting)
x = Dropout(.1)(x)
# categorical output of the angle
callbacks_list = [save_best, early_stop, logs]
angle_out = Dense(15, activation='softmax', name='angle_out')(x) # Connect every input with every output and output 15 hidden units. Use Softmax to give percentage. 15 categories and find best one based off percentage 0.0-1.0
# Connect every input with every output and output 15 hidden units. Use Softmax to give percentage.
# 15 categories and find best one based off percentage 0.0-1.0
angle_out = Dense(15, activation='softmax', name='angle_out')(x)
#continous output of throttle
throttle_out = Dense(1, activation='relu', name='throttle_out')(x) # Reduce to 1 number, Positive number only
angle_cat_array = np.array([linear_bin(a) for a in angle_array])
model = Model(inputs=[img_in], outputs=[angle_out, throttle_out])
model = Model(inputs=[img_in], outputs=[angle_out])
model.compile(optimizer='adam',
loss={'angle_out': 'categorical_crossentropy',
'throttle_out': 'mean_absolute_error'},
loss_weights={'angle_out': 0.9, 'throttle_out': .001})
model.fit({'img_in':images},{'angle_out': angle_cat_array, 'throttle_out': throttle_array}, batch_size=32, epochs=100, verbose=1, validation_split=0.2, shuffle=True, callbacks=callbacks_list)
loss={'angle_out': 'categorical_crossentropy', },
loss_weights={'angle_out': 0.9 })
model.fit({'img_in': images}, {'angle_out': angle_cat_array, }, batch_size=32,
epochs=100, verbose=1, validation_split=0.2, shuffle=True, callbacks=callbacks_list)
# Save model for tensorflow using
builder = tf.saved_model.builder.SavedModelBuilder("/opt/ml/model/tfModel")
# Tag the model, required for Go
builder.add_meta_graph_and_variables(sess, ["myTag"])
builder.save()
sess.close()