Skip to content
This repository has been archived by the owner on Sep 18, 2024. It is now read-only.

Change default CPU scheduler from random to round-robin #1702

Merged
merged 13 commits into from
Nov 8, 2019
23 changes: 22 additions & 1 deletion src/nni_manager/training_service/remote_machine/gpuScheduler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,20 +28,27 @@ import {
parseGpuIndices, RemoteMachineMeta, RemoteMachineScheduleResult, RemoteMachineTrialJobDetail, ScheduleResultType, SSHClientManager
} from './remoteMachineData';

type SCHEDULE_POLICY_NAME = 'random' | 'round-robin';
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can users change schedule policy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No for now, there is no need to change it from round-robin to random for now.


/**
* A simple GPU scheduler implementation
*/
export class GPUScheduler {

private readonly machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>;
private readonly log: Logger = getLogger();
private readonly policyName: SCHEDULE_POLICY_NAME = 'round-robin';
private roundRobinIndex: number = 0;
private configuredRMs: RemoteMachineMeta[] = [];

/**
* Constructor
* @param machineSSHClientMap map from remote machine to sshClient
*/
constructor(machineSSHClientMap : Map<RemoteMachineMeta, SSHClientManager>) {
assert(machineSSHClientMap.size > 0);
this.machineSSHClientMap = machineSSHClientMap;
this.configuredRMs = Array.from(machineSSHClientMap.keys());
}

/**
Expand Down Expand Up @@ -189,7 +196,21 @@ export class GPUScheduler {
private selectMachine(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
assert(rmMetas !== undefined && rmMetas.length > 0);

return randomSelect(rmMetas);
if (this.policyName === 'random') {
return randomSelect(rmMetas);
} else if (this.policyName === 'round-robin') {
return this.roundRobinSelect(rmMetas);
} else {
throw new Error(`Unsupported schedule policy: ${this.policyName}`);
}
}

private roundRobinSelect(rmMetas: RemoteMachineMeta[]): RemoteMachineMeta {
while (!rmMetas.includes(this.configuredRMs[this.roundRobinIndex % this.configuredRMs.length])) {
this.roundRobinIndex++;
}

return this.configuredRMs[this.roundRobinIndex++ % this.configuredRMs.length];
}

private selectGPUsForTrial(gpuInfos: GPUInfo[], requiredGPUNum: number): GPUInfo[] {
Expand Down