Skip to content

Commit

Permalink
fix: no sagemaker autostop or EC2 stop lag (awslabs#703)
Browse files Browse the repository at this point in the history
  • Loading branch information
maghirardelli authored Sep 10, 2021
1 parent 5160b61 commit 9074ccf
Show file tree
Hide file tree
Showing 4 changed files with 138 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1422,4 +1422,111 @@ describe('EnvironmentSCService', () => {
expect(currentIngressRules).toMatchObject(expectedOutcome);
expect(securityGroupId).toBeUndefined();
});

describe('pollAndSyncSageMakerStatus function', () => {
const roleArn = 'roleArn';
const externalId = 'externalId';
const requestContext = {};

it('should finish updating before returning', async () => {
// BUILD
const sagemakerInstances = { 'notebook-instance-name': {} };
service.pollSageMakerRealtimeStatus = jest.fn(() => {
return { 'notebook-instance-name': 'InService' };
});
service.updateStatus = jest.fn(async () => {
// sleep
await new Promise(r => setTimeout(r, 1000));
// return some non falsey value
return 'Updated';
});

// OPERATE
const sagemakerUpdated = await service.pollAndSyncSageMakerStatus(
roleArn,
externalId,
sagemakerInstances,
requestContext,
);

// CHECK
await expect(sagemakerUpdated).toEqual({ 'notebook-instance-name': 'Updated' });
});

it('should finish update all records that need it before returning', async () => {
// BUILD
const sagemakerInstances = { 'notebook-instance-name': {}, 'notebook-instance-name-1': {} };
service.pollSageMakerRealtimeStatus = jest.fn(() => {
return { 'notebook-instance-name': 'InService', 'notebook-instance-name-1': 'InService' };
});
service.updateStatus = jest.fn(async () => {
// sleep
await new Promise(r => setTimeout(r, 1000));
// return some non falsey value
return 'Updated';
});

// OPERATE
const sagemakerUpdated = await service.pollAndSyncSageMakerStatus(
roleArn,
externalId,
sagemakerInstances,
requestContext,
);

// CHECK
await expect(sagemakerUpdated).toEqual({
'notebook-instance-name': 'Updated',
'notebook-instance-name-1': 'Updated',
});
await expect(Object.keys(sagemakerUpdated).length).toEqual(Object.keys(sagemakerInstances).length);
});
});

describe('pollAndSyncEC2Status function', () => {
const roleArn = 'roleArn';
const externalId = 'externalId';
const requestContext = {};

it('should finish updating before returning', async () => {
// BUILD
const ec2Instances = { 'instance-name': {} };
service.pollEc2RealtimeStatus = jest.fn(() => {
return { 'instance-name': 'running' };
});
service.updateStatus = jest.fn(async () => {
// sleep
await new Promise(r => setTimeout(r, 1000));
// return some non falsey value
return 'Updated';
});

// OPERATE
const ec2Updated = await service.pollAndSyncEc2Status(roleArn, externalId, ec2Instances, requestContext);

// CHECK
await expect(ec2Updated).toEqual({ 'instance-name': 'Updated' });
});

it('should finish update all records that need it before returning', async () => {
// BUILD
const ec2Instances = { 'instance-name': {}, 'instance-name-1': {} };
service.pollEc2RealtimeStatus = jest.fn(() => {
return { 'instance-name': 'running', 'instance-name-1': 'running' };
});
service.updateStatus = jest.fn(async () => {
// sleep
await new Promise(r => setTimeout(r, 1000));
// return some non falsey value
return 'Updated';
});

// OPERATE
const ec2Updated = await service.pollAndSyncEc2Status(roleArn, externalId, ec2Instances, requestContext);

// CHECK
await expect(ec2Updated).toEqual({ 'instance-name': 'Updated', 'instance-name-1': 'Updated' });
await expect(Object.keys(ec2Updated).length).toEqual(Object.keys(ec2Instances).length);
});
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class EnvironmentScService extends Service {
let envs = await this._scanner({ fields: ['id', 'indexId', 'status', 'outputs'] })
// Verified with EC2 support team that EC2 describe instances API can take 10K instanceIds without issue
.limit(10000)
.strong()
.scan();
envs = _.filter(
envs,
Expand Down Expand Up @@ -183,14 +184,9 @@ class EnvironmentScService extends Service {
'terminated': 'TERMINATED',
};
const ec2RealtimeStatus = await this.pollEc2RealtimeStatus(roleArn, externalId, ec2Instances);
const ec2Updated = {};
_.forEach(ec2Instances, async (existingEnvRecord, ec2InstanceId) => {
const expectedDDBStatus = EC2StatusMap[ec2RealtimeStatus[ec2InstanceId]];
const updateStatusResult = await this.updateStatus(requestContext, existingEnvRecord, expectedDDBStatus);
if (updateStatusResult) {
ec2Updated[ec2InstanceId] = updateStatusResult;
}
});

const ec2Updated = await this.updateAllStatuses(ec2Instances, EC2StatusMap, ec2RealtimeStatus, requestContext);

return ec2Updated;
}

Expand Down Expand Up @@ -225,17 +221,34 @@ class EnvironmentScService extends Service {
Failed: 'FAILED',
};
const sagemakerRealtimeStatus = await this.pollSageMakerRealtimeStatus(roleArn, externalId);
const sagemakerUpdated = {};
_.forEach(sagemakerInstances, async (existingEnvRecord, key) => {
const expectedDDBStatus = SageMakerStatusMap[sagemakerRealtimeStatus[key]];
const updateStatusResult = await this.updateStatus(requestContext, existingEnvRecord, expectedDDBStatus);
if (updateStatusResult) {
sagemakerUpdated[key] = updateStatusResult;
}
});

const sagemakerUpdated = await this.updateAllStatuses(
sagemakerInstances,
SageMakerStatusMap,
sagemakerRealtimeStatus,
requestContext,
);

return sagemakerUpdated;
}

async updateAllStatuses(instancesList, statusMap, realtimeStatus, requestContext) {
const updated = {};
const keys = Object.keys(instancesList);
await Promise.all(
keys.map(async key => {
const existingEnvRecord = instancesList[key];
const expectedDDBStatus = statusMap[realtimeStatus[key]];
const updateStatusResult = await this.updateStatus(requestContext, existingEnvRecord, expectedDDBStatus);
if (updateStatusResult) {
updated[key] = updateStatusResult;
}
}),
);

return updated;
}

async pollSageMakerRealtimeStatus(roleArn, externalId) {
const aws = await this.service('aws');
const sagemakerClient = await aws.getClientSdkForRole({ roleArn, externalId, clientName: 'SageMaker' });
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class DbService extends Service {
add: jest.fn().mockReturnThis(),
values: jest.fn().mockReturnThis(),
return: jest.fn().mockReturnThis(),
strong: jest.fn().mockReturnThis(),

// Following functions are actual calls to dynamo
scan: jest.fn(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ const handler = async () => {
const environmentScService = await container.find('environmentScService');
const userContext = getSystemRequestContext();
const updatedEnvData = await environmentScService.pollAndSyncWsStatus(userContext);
return { statusCode: 200, body: updatedEnvData };
return { statusCode: 200, body: JSON.stringify(updatedEnvData) };
};

// eslint-disable-next-line import/prefer-default-export
Expand Down

0 comments on commit 9074ccf

Please sign in to comment.