apiVersion: kubeflow.org/v1alpha1
kind: ChainerJob
metadata:
name: chainer-job
namespace: default
spec:
backend: mpi
master:
template:
spec:
containers:
- args:
- -n
- "2"
- -N
- "-1"
- --allow-run-as-root
- python3
- /train_mnist.py
- -e
- "2"
- -b
- "1000"
- -u
- "100"
command:
- mpiexec
image: everpeace/chainermn:latest
name: chainer-job
workerSets:
ws:
replicas: 1
template:
spec:
containers:
- args:
- -c
- trap exit TERM; while true; do sleep 1 & wait; done
command:
- sh
image: everpeace/chainermn:latest
name: chainer-job
E0628 12:31:07.098157 1 runtime.go:66] Observed a panic: "invalid memory address or nil pointer dereference" (runtime error: invalid memory address or nil pointer dereference)
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:72
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:65
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:51
chainer-operator-c9cb5f946-cfxpc chainer-operator /usr/local/go/src/runtime/asm_amd64.s:573
chainer-operator-c9cb5f946-cfxpc chainer-operator /usr/local/go/src/runtime/panic.go:502
chainer-operator-c9cb5f946-cfxpc chainer-operator /usr/local/go/src/runtime/panic.go:63
chainer-operator-c9cb5f946-cfxpc chainer-operator /usr/local/go/src/runtime/signal_unix.go:388
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/backends/mpi/mpi_backend.go:282
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/backends/util.go:199
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/backends/mpi/mpi_backend.go:234
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/backends/mpi/mpi_backend.go:136
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/chainer_controller.go:448
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/chainer_controller.go:340
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/chainer_controller.go:348
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/chainer_controller.go:301
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/chainer_controller.go:287
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:133
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:134
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/wait/wait.go:88
chainer-operator-c9cb5f946-cfxpc chainer-operator /usr/local/go/src/runtime/asm_amd64.s:2361
chainer-operator-c9cb5f946-cfxpc chainer-operator panic: runtime error: invalid memory address or nil pointer dereference [recovered]
chainer-operator-c9cb5f946-cfxpc chainer-operator panic: runtime error: invalid memory address or nil pointer dereference
chainer-operator-c9cb5f946-cfxpc chainer-operator [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0xe9e1c0]
chainer-operator-c9cb5f946-cfxpc chainer-operator
chainer-operator-c9cb5f946-cfxpc chainer-operator goroutine 127 [running]:
chainer-operator-c9cb5f946-cfxpc chainer-operator github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/runtime.HandleCrash(0x0, 0x0, 0x0)
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:58 +0x107
chainer-operator-c9cb5f946-cfxpc chainer-operator panic(0xfaee60, 0x18c3d70)
chainer-operator-c9cb5f946-cfxpc chainer-operator /usr/local/go/src/runtime/panic.go:502 +0x229
chainer-operator-c9cb5f946-cfxpc chainer-operator github.com/kubeflow/chainer-operator/pkg/controllers/backends/mpi.newConfigMap(0xc420288000, 0xc4202b6dc0)
chainer-operator-c9cb5f946-cfxpc chainer-operator /go/src/github.com/kubeflow/chainer-operator/pkg/controllers/backends/mpi/mpi_backend.go:282 +0x180