1.8.0 image
This commit is contained in:
parent
eedbee5253
commit
b81cb57230
@ -6,7 +6,11 @@ WORKDIR /usr/src
|
||||
|
||||
RUN python3 setup.py sdist
|
||||
|
||||
FROM tensorflow-base:1.4.1-gpu-py3
|
||||
FROM tensorflow/tensorflow:1.8.0-gpu-py3
|
||||
|
||||
#tensorflow-serving-api-python3==1.7.0
|
||||
RUN pip3 list && pip3 install numpy boto3 six awscli flask==0.11 Jinja2==2.9 gevent gunicorn keras==2.1.3 pillow h5py \
|
||||
&& pip3 list
|
||||
|
||||
WORKDIR /root
|
||||
|
||||
|
@ -1,29 +0,0 @@
|
||||
FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
python3-pip python3-dev python3-setuptools \
|
||||
&& \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* \
|
||||
&& pip3 install tensorflow-gpu==1.4.1
|
||||
|
||||
RUN pip3 list && pip3 install numpy boto3 six awscli flask==0.11 Jinja2==2.9 gevent gunicorn keras==2.1.3 pillow h5py \
|
||||
&& pip3 list
|
||||
|
||||
# Configure the build for our CUDA configuration.
|
||||
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
|
||||
ENV CI_BUILD_PYTHON=python \
|
||||
LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH \
|
||||
TF_NEED_CUDA=1 \
|
||||
TF_CUDA_VERSION=8.0 \
|
||||
TF_CUDNN_VERSION=6 \
|
||||
TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.1
|
||||
|
||||
# Fix paths so that CUDNN can be found
|
||||
# See https://github.com/tensorflow/tensorflow/issues/8264
|
||||
RUN ls -lah /usr/local/cuda/lib64/*
|
||||
RUN mkdir /usr/lib/x86_64-linux-gnu/include/ && \
|
||||
ln -s /usr/lib/x86_64-linux-gnu/include/cudnn.h /usr/lib/x86_64-linux-gnu/include/cudnn.h && \
|
||||
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h && \
|
||||
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/libcudnn.so && \
|
||||
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so.6
|
14
Readme.md
14
Readme.md
@ -4,16 +4,10 @@ Run DIY Robocars model training as Sagemaker (https://aws.amazon.com/fr/sagemake
|
||||
|
||||
# Build images
|
||||
|
||||
- Build base image:
|
||||
|
||||
```
|
||||
docker build -t robocars-base:1.4.1-gpu-py3 -f Dockerfile_base.gpu .
|
||||
```
|
||||
|
||||
- Build model image:
|
||||
|
||||
```
|
||||
docker build -t robocars:1.4.1-gpu-py3 -f Dockerfile.gpu .
|
||||
docker build -t robocars:1.8.0-gpu-py3 -f Dockerfile.gpu .
|
||||
```
|
||||
|
||||
# Prepare training (once)
|
||||
@ -22,9 +16,9 @@ docker build -t robocars:1.4.1-gpu-py3 -f Dockerfile.gpu .
|
||||
- Create an AWS docker registry and push your model image to it. Docker hub registry is not supported
|
||||
|
||||
```
|
||||
docker tag robocars:1.4.1-gpu-py <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3
|
||||
docker tag robocars:1.8.0-gpu-py <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3
|
||||
# you should have AWS SDK installed and login to docker
|
||||
docker push <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3
|
||||
docker push <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3
|
||||
```
|
||||
|
||||
# Run training
|
||||
@ -47,7 +41,7 @@ echo 'Creating training job '$1
|
||||
aws sagemaker create-training-job \
|
||||
--training-job-name $job_name \
|
||||
--hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \
|
||||
--algorithm-specification TrainingImage="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3",TrainingInputMode=File \
|
||||
--algorithm-specification TrainingImage="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3",TrainingInputMode=File \
|
||||
--role-arn "<your_iam_sagemaker_role>" \
|
||||
--input-data-config '[{ "ChannelName": "train", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://<your_input_bucket>", "S3DataDistributionType": "FullyReplicated" }} }]' \
|
||||
--output-data-config S3OutputPath=s3://<your_output_bucket> \
|
||||
|
@ -1 +0,0 @@
|
||||
docker build -t tensorflow-base:1.4.1-gpu-py3 -f Dockerfile_base_pip.gpu .
|
@ -1 +1 @@
|
||||
docker build -t tensorflow:1.4.1-gpu-py3 -f Dockerfile.gpu .
|
||||
docker build -t tensorflow:1.8.0-gpu-py3 -f Dockerfile.gpu .
|
@ -8,7 +8,7 @@ then
|
||||
fi
|
||||
echo 'Creating training job '$1
|
||||
|
||||
training_image="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3"
|
||||
training_image="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3"
|
||||
iam_role_arn="arn:aws:iam::<replace_me>:role/service-role/<replace_me>"
|
||||
|
||||
aws sagemaker create-training-job \
|
||||
|
Loading…
Reference in New Issue
Block a user