1.8.0 image
This commit is contained in:
parent
eedbee5253
commit
b81cb57230
@ -6,7 +6,11 @@ WORKDIR /usr/src
|
|||||||
|
|
||||||
RUN python3 setup.py sdist
|
RUN python3 setup.py sdist
|
||||||
|
|
||||||
FROM tensorflow-base:1.4.1-gpu-py3
|
FROM tensorflow/tensorflow:1.8.0-gpu-py3
|
||||||
|
|
||||||
|
#tensorflow-serving-api-python3==1.7.0
|
||||||
|
RUN pip3 list && pip3 install numpy boto3 six awscli flask==0.11 Jinja2==2.9 gevent gunicorn keras==2.1.3 pillow h5py \
|
||||||
|
&& pip3 list
|
||||||
|
|
||||||
WORKDIR /root
|
WORKDIR /root
|
||||||
|
|
||||||
|
@ -1,29 +0,0 @@
|
|||||||
FROM nvidia/cuda:8.0-cudnn6-devel-ubuntu16.04
|
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
||||||
python3-pip python3-dev python3-setuptools \
|
|
||||||
&& \
|
|
||||||
apt-get clean && \
|
|
||||||
rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& pip3 install tensorflow-gpu==1.4.1
|
|
||||||
|
|
||||||
RUN pip3 list && pip3 install numpy boto3 six awscli flask==0.11 Jinja2==2.9 gevent gunicorn keras==2.1.3 pillow h5py \
|
|
||||||
&& pip3 list
|
|
||||||
|
|
||||||
# Configure the build for our CUDA configuration.
|
|
||||||
ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64/stubs:$LD_LIBRARY_PATH
|
|
||||||
ENV CI_BUILD_PYTHON=python \
|
|
||||||
LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH \
|
|
||||||
TF_NEED_CUDA=1 \
|
|
||||||
TF_CUDA_VERSION=8.0 \
|
|
||||||
TF_CUDNN_VERSION=6 \
|
|
||||||
TF_CUDA_COMPUTE_CAPABILITIES=3.7,6.1
|
|
||||||
|
|
||||||
# Fix paths so that CUDNN can be found
|
|
||||||
# See https://github.com/tensorflow/tensorflow/issues/8264
|
|
||||||
RUN ls -lah /usr/local/cuda/lib64/*
|
|
||||||
RUN mkdir /usr/lib/x86_64-linux-gnu/include/ && \
|
|
||||||
ln -s /usr/lib/x86_64-linux-gnu/include/cudnn.h /usr/lib/x86_64-linux-gnu/include/cudnn.h && \
|
|
||||||
ln -s /usr/include/cudnn.h /usr/local/cuda/include/cudnn.h && \
|
|
||||||
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/libcudnn.so && \
|
|
||||||
ln -s /usr/lib/x86_64-linux-gnu/libcudnn.so.6 /usr/local/cuda/lib64/libcudnn.so.6
|
|
14
Readme.md
14
Readme.md
@ -4,16 +4,10 @@ Run DIY Robocars model training as Sagemaker (https://aws.amazon.com/fr/sagemake
|
|||||||
|
|
||||||
# Build images
|
# Build images
|
||||||
|
|
||||||
- Build base image:
|
|
||||||
|
|
||||||
```
|
|
||||||
docker build -t robocars-base:1.4.1-gpu-py3 -f Dockerfile_base.gpu .
|
|
||||||
```
|
|
||||||
|
|
||||||
- Build model image:
|
- Build model image:
|
||||||
|
|
||||||
```
|
```
|
||||||
docker build -t robocars:1.4.1-gpu-py3 -f Dockerfile.gpu .
|
docker build -t robocars:1.8.0-gpu-py3 -f Dockerfile.gpu .
|
||||||
```
|
```
|
||||||
|
|
||||||
# Prepare training (once)
|
# Prepare training (once)
|
||||||
@ -22,9 +16,9 @@ docker build -t robocars:1.4.1-gpu-py3 -f Dockerfile.gpu .
|
|||||||
- Create an AWS docker registry and push your model image to it. Docker hub registry is not supported
|
- Create an AWS docker registry and push your model image to it. Docker hub registry is not supported
|
||||||
|
|
||||||
```
|
```
|
||||||
docker tag robocars:1.4.1-gpu-py <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3
|
docker tag robocars:1.8.0-gpu-py <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3
|
||||||
# you should have AWS SDK installed and login to docker
|
# you should have AWS SDK installed and login to docker
|
||||||
docker push <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3
|
docker push <replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3
|
||||||
```
|
```
|
||||||
|
|
||||||
# Run training
|
# Run training
|
||||||
@ -47,7 +41,7 @@ echo 'Creating training job '$1
|
|||||||
aws sagemaker create-training-job \
|
aws sagemaker create-training-job \
|
||||||
--training-job-name $job_name \
|
--training-job-name $job_name \
|
||||||
--hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \
|
--hyper-parameters '{ "sagemaker_region": "\"eu-west-1\"", "with_slide": "true" }' \
|
||||||
--algorithm-specification TrainingImage="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3",TrainingInputMode=File \
|
--algorithm-specification TrainingImage="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3",TrainingInputMode=File \
|
||||||
--role-arn "<your_iam_sagemaker_role>" \
|
--role-arn "<your_iam_sagemaker_role>" \
|
||||||
--input-data-config '[{ "ChannelName": "train", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://<your_input_bucket>", "S3DataDistributionType": "FullyReplicated" }} }]' \
|
--input-data-config '[{ "ChannelName": "train", "DataSource": { "S3DataSource": { "S3DataType": "S3Prefix", "S3Uri": "s3://<your_input_bucket>", "S3DataDistributionType": "FullyReplicated" }} }]' \
|
||||||
--output-data-config S3OutputPath=s3://<your_output_bucket> \
|
--output-data-config S3OutputPath=s3://<your_output_bucket> \
|
||||||
|
@ -1 +0,0 @@
|
|||||||
docker build -t tensorflow-base:1.4.1-gpu-py3 -f Dockerfile_base_pip.gpu .
|
|
@ -1 +1 @@
|
|||||||
docker build -t tensorflow:1.4.1-gpu-py3 -f Dockerfile.gpu .
|
docker build -t tensorflow:1.8.0-gpu-py3 -f Dockerfile.gpu .
|
@ -8,7 +8,7 @@ then
|
|||||||
fi
|
fi
|
||||||
echo 'Creating training job '$1
|
echo 'Creating training job '$1
|
||||||
|
|
||||||
training_image="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.4.1-gpu-py3"
|
training_image="<replace_me>.dkr.ecr.eu-west-1.amazonaws.com/robocars:1.8.0-gpu-py3"
|
||||||
iam_role_arn="arn:aws:iam::<replace_me>:role/service-role/<replace_me>"
|
iam_role_arn="arn:aws:iam::<replace_me>:role/service-role/<replace_me>"
|
||||||
|
|
||||||
aws sagemaker create-training-job \
|
aws sagemaker create-training-job \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user