Running Tensorflow with Docker on GCP

Provision Virtual Machine

$ gcloud auth login

$ gcloud config set project machine-learning-000000 # Your project id

$ gcloud beta compute \
addresses create mlvm \
--region=us-east1 \
--network-tier=PREMIUM

$ MLVM_IP="$(gcloud beta compute \
addresses describe mlvm \
--region=us-east1 \
| head -n1 | awk '{print $2}')"

$ gcloud beta compute \
instances create mlvm \
--zone=us-east1-b \
--machine-type=n1-standard-2 \
--subnet=default \
--network-tier=PREMIUM \
--address="$MLVM_IP" \
--maintenance-policy=TERMINATE \
--no-service-account \
--no-scopes \
--accelerator=type=nvidia-tesla-p100,count=1 \
--image=centos-7-v20181011 \
--image-project=centos-cloud \
--boot-disk-size=40GB \
--boot-disk-type=pd-standard \
--boot-disk-device-name=mlvm

Configure Virtual Machine

$ gcloud beta compute ssh user@mlvm

$ sudo su

$ cd ~/

$ curl https://download.docker.com/linux/centos/docker-ce.repo \
> /etc/yum.repos.d/docker-ce.repo

$ curl https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo \
> /etc/yum.repos.d/nvidia-docker.repo

$ yum install --assumeyes \
"@Development Tools" \
"kernel-devel-$(uname -r)" \
"kernel-headers-$(uname -r)" \
"docker-ce-18.06.1" \
"nvidia-docker2-2.0.3"

$ curl https://us.download.nvidia.com/tesla/396.44/NVIDIA-Linux-x86_64-396.44.run \
> NVIDIA-Linux-x86_64-396.44.run

$ sh NVIDIA-Linux-x86_64-396.44.run --silent

$ systemctl enable docker

$ systemctl start docker

$ docker run \
--runtime=nvidia \
-it \
--rm \
tensorflow/tensorflow:1.11.0-devel-gpu \
python -c "import tensorflow as tf; print(tf.contrib.eager.num_gpus())"

😄🙌🎉…🔥💰

Destroy Virtual Machine

$ exit # Exit from 'sudo su'

$ exit # Exit from 'gcloud beta compute ssh user@mlvm'

$ gcloud beta compute \
addresses delete mlvm \
--region=us-east1

$ gcloud beta compute \
instances delete mlvm \
--zone=us-east1-b