Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update status of workers in GetWorkers #127

Merged
merged 3 commits into from
Jun 30, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion cmd/manager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,13 @@ func (s *server) StopWorkers(ctx context.Context, in *pb.StopWorkersRequest) (*p
func (s *server) GetWorkers(ctx context.Context, in *pb.GetWorkersRequest) (*pb.GetWorkersReply, error) {
var ws []*pb.Worker
var err error
if in.StudyId == "" {
return &pb.GetWorkersReply{Workers: ws}, errors.New("StudyId should be set")
}
err = s.wIF.UpdateWorkerStatus(in.StudyId)
if err != nil {
return &pb.GetWorkersReply{Workers: ws}, err
}
if in.WorkerId == "" {
ws, err = dbIf.GetWorkerList(in.StudyId, in.TrialId)
} else {
Expand All @@ -152,6 +159,9 @@ func (s *server) GetShouldStopWorkers(ctx context.Context, in *pb.GetShouldStopW

func (s *server) GetMetrics(ctx context.Context, in *pb.GetMetricsRequest) (*pb.GetMetricsReply, error) {
var mNames []string
if in.StudyId == "" {
return &pb.GetMetricsReply{}, errors.New("StudyId should be set")
}
if len(in.MetricsNames) > 0 {
mNames = in.MetricsNames
} else {
Expand All @@ -167,7 +177,10 @@ func (s *server) GetMetrics(ctx context.Context, in *pb.GetMetricsRequest) (*pb.
}
mls := make([]*pb.MetricsLogSet, len(in.WorkerIds))
for i, w := range in.WorkerIds {
wr, err := s.GetWorkers(ctx, &pb.GetWorkersRequest{WorkerId: w})
wr, err := s.GetWorkers(ctx, &pb.GetWorkersRequest{
StudyId: in.StudyId,
WorkerId: w,
})
if err != nil {
return &pb.GetMetricsReply{}, err
}
Expand Down
16 changes: 10 additions & 6 deletions pkg/manager/worker/kubernetes/kubernetes.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,16 @@ func (d *KubernetesWorkerInterface) UpdateWorkerStatus(studyId string) error {
return err
}
for _, w := range ws {
if w.Status == api.State_RUNNING {
if w.Status == api.State_PENDING {
err = d.StoreWorkerLog(w.WorkerId)
if err == nil {
err = d.db.UpdateWorker(w.WorkerId, api.State_RUNNING)
if err != nil {
log.Printf("Error updating status for %s: %v", w.WorkerId, err)
return err
}
}
} else if w.Status == api.State_RUNNING {
c, err := d.IsWorkerComplete(w.WorkerId)
if err != nil {
return err
Expand Down Expand Up @@ -205,11 +214,6 @@ func (d *KubernetesWorkerInterface) SpawnWorker(wid string, workerConf *api.Work
if err != nil {
return err
}
err = d.db.UpdateWorker(wid, api.State_RUNNING)
if err != nil {
log.Printf("Error updating status for %s: %v", job.ObjectMeta.Name, err)
return err
}
log.Printf("Created Job %q.", result.GetObjectMeta().GetName())
return nil
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/suggestion/hyperband_service.go
Original file line number Diff line number Diff line change
Expand Up @@ -337,10 +337,12 @@ func (h *HyperBandSuggestService) evalWorkers(ctx context.Context, c api.Manager
bracket := Bracket{}
for _, tid := range hbparam.evaluatingTrials {
gwreq := &api.GetWorkersRequest{
StudyId: studyId,
TrialId: tid,
}
gwrep, err := c.GetWorkers(ctx, gwreq)
if err != nil {
log.Printf("GetWorkers error %v", err)
return err, nil
}
wl := make([]string, len(gwrep.Workers))
Expand Down
51 changes: 48 additions & 3 deletions test/e2e/test-client.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import (
)

var managerAddr = flag.String("s", "127.0.0.1:6789", "Endpoint of manager default 127.0.0.1:6789")
var suggestArgo = flag.String("a", "random", "Suggestion Algorithm (random, grid)")
var suggestArgo = flag.String("a", "random", "Suggestion Algorithm (random, grid, hyperband)")
var requestnum = flag.Int("r", 2, "Request number for random Suggestions (default: 2)")
var suggestionConfFile = flag.String("c", "", "File path to suggestion config.")

Expand Down Expand Up @@ -53,6 +53,10 @@ func main() {
//RunTrials
workerIds := runTrials(c, studyId, getSuggestReply)

//GetWorkersCheck
getWorker(c, studyId, getSuggestReply, workerIds)
log.Println("GetWorkers checked!")

iter := 0

for !isCompletedAllWorker(c, studyId) {
Expand Down Expand Up @@ -83,8 +87,11 @@ func main() {
//RunTrials
workerIds := runTrials(c, studyId, getSuggestReply)

iter := 0
//GetWorkersCheck
getWorker(c, studyId, getSuggestReply, workerIds)
log.Println("GetWorkers checked!")

iter := 0
for !isCompletedAllWorker(c, studyId) {
if iter > TimeOut {
log.Fatal("GetMetrics Timeout.")
Expand Down Expand Up @@ -220,7 +227,7 @@ func getSuggestion(c api.ManagerClient, studyId string, paramId string) *api.Get

getSuggestReply, err := c.GetSuggestions(ctx, getSuggestRequest)
if err != nil {
log.Fatalf("GetSuggestion Error %v", err)
log.Fatalf("GetSuggestion Error %v \nRequest %v", err, getSuggestRequest)
}
log.Println("Get " + *suggestArgo + " Suggestions:")
for _, t := range getSuggestReply.Trials {
Expand Down Expand Up @@ -288,6 +295,44 @@ func runTrials(c api.ManagerClient, studyId string, getSuggestReply *api.GetSugg
return workerIds
}

func getWorker(c api.ManagerClient, studyId string, getSuggestReply *api.GetSuggestionsReply, workerIds []string) {
ctx := context.Background()
getByTID := []*api.Worker{}
getByWID := []*api.Worker{}
gwr := &api.GetWorkersRequest{
StudyId: studyId,
}
_, err := c.GetWorkers(ctx, gwr)
if err != nil {
log.Fatalf("GwtWorker by Study ID Error %v", err)
}
for _, t := range getSuggestReply.Trials {
gwr := &api.GetWorkersRequest{
StudyId: studyId,
TrialId: t.TrialId,
}
gwrep, err := c.GetWorkers(ctx, gwr)
if err != nil {
log.Fatalf("GwtWorker by Trial ID Error %v", err)
}
getByTID = append(getByTID, gwrep.Workers...)
}
for _, w := range workerIds {
gwr = &api.GetWorkersRequest{
StudyId: studyId,
WorkerId: w,
}
gwrep, err := c.GetWorkers(ctx, gwr)
if err != nil {
log.Fatalf("GwtWorker by Worker ID Error %v", err)
}
getByWID = append(getByWID, gwrep.Workers...)
}
if len(getByTID) != len(getByWID) {
log.Fatalf("GwtWorker by Worker ID is mismatch with GwtWorker by Trial ID \n by Trial ID %v \n by Worker ID %v", getByTID, getByWID)
}
}

func SaveOrUpdateModel(c api.ManagerClient, getMetricsReply *api.GetMetricsReply) {
ctx := context.Background()
for _, mls := range getMetricsReply.MetricsLogSets {
Expand Down