Skip to content

Commit

Permalink
Use a community hosted image in MXJob E2E (#1928)
Browse files Browse the repository at this point in the history
Signed-off-by: Yuki Iwai <yuki.iwai.tz@gmail.com>
  • Loading branch information
tenzen-y authored Oct 5, 2023
1 parent 7183081 commit 95f2553
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 9 deletions.
4 changes: 2 additions & 2 deletions examples/mxnet/mxjob_dist_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ spec:
containers:
- name: mxnet
image: kubeflow/mxnet-gpu:latest
command: ["python"]
args: ["/incubator-mxnet/example/image-classification/train_mnist.py","--num-epochs","10","--num-layers","2","--kv-store","dist_device_sync","--gpus","0"]
command: ["python3"]
args: ["/mxnet/mxnet/example/image-classification/train_mnist.py","--num-epochs","10","--num-layers","2","--kv-store","dist_device_sync","--gpus","0"]
resources:
limits:
nvidia.com/gpu: 1
Expand Down
11 changes: 4 additions & 7 deletions sdk/python/test/e2e/test_e2e_mxjob.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,11 +221,10 @@ def generate_mxjob(
def generate_containers() -> Tuple[V1Container, V1Container, V1Container]:
worker_container = V1Container(
name=CONTAINER_NAME,
# TODO (tenzen-y): Replace the below image with the kubeflow hosted image
image="docker.io/johnugeorge/mxnet:1.9.1_cpu_py3",
image="docker.io/kubeflow/mxnet-gpu:latest",
command=["/usr/local/bin/python3"],
args=[
"incubator-mxnet/example/image-classification/train_mnist.py",
"/mxnet/mxnet/example/image-classification/train_mnist.py",
"--num-epochs",
"1",
"--num-examples",
Expand All @@ -239,16 +238,14 @@ def generate_containers() -> Tuple[V1Container, V1Container, V1Container]:

server_container = V1Container(
name=CONTAINER_NAME,
# TODO (tenzen-y): Replace the below image with the kubeflow hosted image
image="docker.io/johnugeorge/mxnet:1.9.1_cpu_py3",
image="docker.io/kubeflow/mxnet-gpu:latest",
ports=[V1ContainerPort(container_port=9991, name="mxjob-port")],
resources=V1ResourceRequirements(limits={"memory": "1Gi", "cpu": "0.25"}),
)

scheduler_container = V1Container(
name=CONTAINER_NAME,
# TODO (tenzen-y): Replace the below image with the kubeflow hosted image
image="docker.io/johnugeorge/mxnet:1.9.1_cpu_py3",
image="docker.io/kubeflow/mxnet-gpu:latest",
ports=[V1ContainerPort(container_port=9991, name="mxjob-port")],
resources=V1ResourceRequirements(limits={"memory": "1Gi", "cpu": "0.25"}),
)
Expand Down

0 comments on commit 95f2553

Please sign in to comment.