diff --git a/example/integrations/tensorflow/Dockerfile b/example/integrations/tensorflow/Dockerfile index 5821747c08..5d0d12ed91 100644 --- a/example/integrations/tensorflow/Dockerfile +++ b/example/integrations/tensorflow/Dockerfile @@ -1,6 +1,10 @@ -#NOTE: the build process would change during developing. +# NOTE: the build process would change during developing, +# the commit ID when first creating the image: 62c833f806db621943a6cf8195657b9d0fa67d93 (master) +# original image is: gcr.io/kubeflow/tf-benchmarks-cpu:v20171202-bdab599-dirty-284af3, +# the image needs an update to use the latest tf-benchmark logic +# ref => https://github.com/tensorflow/benchmarks/tree/master/scripts/tf_cnn_benchmarks. FROM python:2.7 -MAINTAINER volcano +MAINTAINER volcano RUN apt-get update --fix-missing \ && apt-get install -y git \ && apt-get clean \ diff --git a/example/integrations/tensorflow/tf-example.yaml b/example/integrations/tensorflow/tf-example.yaml index 05bc67d781..92d004e45b 100644 --- a/example/integrations/tensorflow/tf-example.yaml +++ b/example/integrations/tensorflow/tf-example.yaml @@ -27,14 +27,15 @@ # 2019-04-23 11:10:25.552861: I tensorflow/core/distributed_runtime/rpc/grpc_channel.cc:215] # Initialize GrpcChannelCache for job worker -> {0 -> tensorflow-benchmark-worker-0.tensorflow-benchmark:2222} # -# **NOTES**: This example may take about an hour to finish. +# **NOTES**: This example may take about an hour to finish. When running multiple jobs, please ensure enough resource +# is guaranteed for each of the worker pods. apiVersion: batch.volcano.sh/v1alpha1 kind: Job metadata: name: tensorflow-benchmark spec: - minAvailable: 2 + minAvailable: 3 schedulerName: kube-batch plugins: env: [] @@ -65,7 +66,7 @@ spec: resources: {} workingDir: /opt/tf-benchmarks/scripts/tf_cnn_benchmarks restartPolicy: OnFailure - - replicas: 1 + - replicas: 2 name: worker policies: - event: TaskCompleted