Running Tensorflow with Docker on GCP
Provision Virtual Machine:
$ gcloud auth login
$ gcloud config set project machine-learning-000000 # Your project id
$ gcloud beta compute \
addresses create mlvm \
--region=us-east1 \
--network-tier=PREMIUM
$ MLVM_IP="$(gcloud beta compute \
addresses describe mlvm \
--region=us-east1 \
| head -n1 | awk '{print $2}')"
$ gcloud beta compute \
instances create mlvm \
--zone=us-east1-b \
--machine-type=n1-standard-2 \
--subnet=default \
--network-tier=PREMIUM \
--address="$MLVM_IP" \
--maintenance-policy=TERMINATE \
--no-service-account \
--no-scopes \
--accelerator=type=nvidia-tesla-p100,count=1 \
--image=centos-7-v20181011 \
--image-project=centos-cloud \
--boot-disk-size=40GB \
--boot-disk-type=pd-standard \
--boot-disk-device-name=mlvm
Configure Virtual Machine:
$ gcloud beta compute ssh user@mlvm
$ sudo su
$ cd ~/
$ curl https://download.docker.com/linux/centos/docker-ce.repo \
> /etc/yum.repos.d/docker-ce.repo
$ curl https://nvidia.github.io/nvidia-docker/centos7/nvidia-docker.repo \
> /etc/yum.repos.d/nvidia-docker.repo
$ yum install --assumeyes \
"@Development Tools" \
"kernel-devel-$(uname -r)" \
"kernel-headers-$(uname -r)" \
"docker-ce-18.06.1" \
"nvidia-docker2-2.0.3"
$ curl https://us.download.nvidia.com/tesla/396.44/NVIDIA-Linux-x86_64-396.44.run \
> NVIDIA-Linux-x86_64-396.44.run
$ sh NVIDIA-Linux-x86_64-396.44.run --silent
$ systemctl enable docker
$ systemctl start docker
$ docker run \
--runtime=nvidia \
-it \
--rm \
tensorflow/tensorflow:1.11.0-devel-gpu \
python -c "import tensorflow as tf; print(tf.contrib.eager.num_gpus())"
😄🙌🎉 … 🔥💰
Destroy Virtual Machine:
$ exit # Exit from 'sudo su'
$ exit # Exit from 'gcloud beta compute ssh user@mlvm'
$ gcloud beta compute \
addresses delete mlvm \
--region=us-east1
$ gcloud beta compute \
instances delete mlvm \
--zone=us-east1-b