Skip to content

Commit

Permalink
Merge branch 'main' of github.com:DevinWain/sedna into main
Browse files Browse the repository at this point in the history
Signed-off-by: DevinWain <devinwain@foxmail.com>
  • Loading branch information
DevinWain committed Sep 2, 2022
2 parents 622b532 + 94458a3 commit 5032cbf
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 8 deletions.
16 changes: 16 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,22 @@ lint:
hack/make-rules/lint.sh
endif

define PYLINT_HELP_INFO
# run python lint check.
#
# Example:
# make pylint
# make pylint HELP=y
endef
.PHONY: pylint
ifeq ($(HELP),y)
pylint:
@echo "$$PYLINT_HELP_INFO"
else
pylint:
hack/make-rules/pylint.sh
endif

define CLEAN_HELP_INFO
# Clean up the output of make.
#
Expand Down
5 changes: 3 additions & 2 deletions hack/lib/buildx.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ sedna::buildx::prepare_env() {
docker buildx create --use --name $builder_instance --driver docker-container
fi
docker buildx use $builder_instance

# go speed tag with CGO_ENABLED=1 and alpine image
_speed_buildx_for_cgo_alpine_
}

_speed_buildx_for_go_() {
Expand Down Expand Up @@ -101,8 +104,6 @@ sedna::buildx:generate-speed-dockerfile() {
# go speed tag
_speed_buildx_for_go_

# go speed tag with CGO_ENABLED=1 and alpine image
_speed_buildx_for_cgo_alpine_
)

local base_cmds='
Expand Down
40 changes: 40 additions & 0 deletions hack/make-rules/pylint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash

# Copyright 2020 The KubeEdge Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o errexit
set -o nounset
set -o pipefail

SEDNA_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd -P)"

source "${SEDNA_ROOT}/hack/lib/init.sh"

install_pylint() {
echo "installing pylint"
python3 -m pip install -U pylint
if [[ $? -ne 0 ]]; then
echo "failed to install pylint, exiting."
exit 1
fi
}

check_pylint() {
echo "checking pylint"
install_pylint
pylint ${SEDNA_ROOT}/lib
}

check_pylint
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"encoding/json"
"fmt"
"strconv"
"strings"
"time"

Expand Down Expand Up @@ -542,6 +543,37 @@ func IsJobFinished(j *sednav1.LifelongLearningJob) bool {
return false
}

// isCompletedInitialTraining checks whether job has completed initial train task.
func (c *Controller) hasCompletedInitialTraining(jobConditions []sednav1.LLJobCondition) bool {
for i := 0; i < len(jobConditions); i++ {
jobCond := jobConditions[i]
if jobCond.Stage == sednav1.LLJobTrain && jobCond.Type == sednav1.LLJobStageCondCompleted {
return true
}
}
return false
}

func (c *Controller) getCloudKBIndex(jobConditions []sednav1.LLJobCondition) string {
for i := len(jobConditions) - 1; i >= 0; i-- {
jobCond := jobConditions[i]
var cond ConditionData
if jobCond.Stage == sednav1.LLJobTrain && jobCond.Type == sednav1.LLJobStageCondCompleted {
if err := (&cond).Unmarshal([]byte(jobCond.Data)); err != nil {
continue
}

if cond.Output == nil || len(cond.Output.Models) == 0 {
continue
}

model := cond.Output.Models[0]
return model.GetURL()
}
}
return ""
}

func (c *Controller) createPod(job *sednav1.LifelongLearningJob, podtype sednav1.LLJobStage) (err error) {
ctx := context.Background()
var podTemplate *v1.PodTemplateSpec
Expand Down Expand Up @@ -571,8 +603,10 @@ func (c *Controller) createPod(job *sednav1.LifelongLearningJob, podtype sednav1
return err
}

jobConditions := job.Status.Conditions

// get all url for train and eval from data in condition
condDataStr := job.Status.Conditions[len(job.Status.Conditions)-1].Data
condDataStr := jobConditions[len(job.Status.Conditions)-1].Data
klog.V(2).Infof("lifelonglearning job %v/%v data condition:%s", job.Namespace, job.Name, condDataStr)
var cond ConditionData
(&cond).Unmarshal([]byte(condDataStr))
Expand All @@ -598,13 +632,19 @@ func (c *Controller) createPod(job *sednav1.LifelongLearningJob, podtype sednav1
podTemplate = &job.Spec.TrainSpec.Template
// Env parameters for train

hasCompletedInitialTraining := c.hasCompletedInitialTraining(jobConditions)

workerParam.Env = map[string]string{
"NAMESPACE": job.Namespace,
"JOB_NAME": job.Name,
"WORKER_NAME": "train-worker-" + utilrand.String(5),
"NAMESPACE": job.Namespace,
"JOB_NAME": job.Name,
"WORKER_NAME": "train-worker-" + utilrand.String(5),
"HAS_COMPLETED_INITIAL_TRAINING": strconv.FormatBool(hasCompletedInitialTraining),
"LC_SERVER": c.cfg.LC.Server,
"KB_SERVER": c.cfg.KB.Server,
}

"LC_SERVER": c.cfg.LC.Server,
"KB_SERVER": c.cfg.KB.Server,
if hasCompletedInitialTraining {
workerParam.Env["CLOUD_KB_INDEX"] = c.getCloudKBIndex(jobConditions)
}

workerParam.Mounts = append(workerParam.Mounts,
Expand Down Expand Up @@ -701,6 +741,7 @@ func (c *Controller) createPod(job *sednav1.LifelongLearningJob, podtype sednav1
// set the default policy instead of Always policy
workerParam.RestartPolicy = v1.RestartPolicyOnFailure
workerParam.HostNetwork = true
workerParam.DNSPolicy = v1.DNSClusterFirstWithHostNet

// create pod based on podtype
_, err = runtime.CreatePodWithTemplate(c.kubeClient, job, podTemplate, workerParam)
Expand Down Expand Up @@ -745,6 +786,7 @@ func (c *Controller) createInferPod(job *sednav1.LifelongLearningJob) error {

workerParam.WorkerType = runtime.InferencePodType
workerParam.HostNetwork = true
workerParam.DNSPolicy = v1.DNSClusterFirstWithHostNet

// create edge pod
_, err = runtime.CreatePodWithTemplate(c.kubeClient, job, &job.Spec.DeploySpec.Template, workerParam)
Expand Down
6 changes: 6 additions & 0 deletions pkg/globalmanager/runtime/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ type WorkerParam struct {
ModelHotUpdate ModelHotUpdate

RestartPolicy v1.RestartPolicy

DNSPolicy v1.DNSPolicy
}

type ModelHotUpdate struct {
Expand Down Expand Up @@ -163,6 +165,10 @@ func injectWorkerParam(pod *v1.Pod, workerParam *WorkerParam, object CommonInter
if pod.Spec.RestartPolicy == "" {
pod.Spec.RestartPolicy = workerParam.RestartPolicy
}

if workerParam.DNSPolicy != "" {
pod.Spec.DNSPolicy = workerParam.DNSPolicy
}
}

// CreatePodWithTemplate creates and returns a pod object given a crd object, pod template, and workerParam
Expand Down

0 comments on commit 5032cbf

Please sign in to comment.