Skip to content
This repository has been archived by the owner on Nov 16, 2023. It is now read-only.

Commit

Permalink
[VS Code] Fix job v2 submission bugs. (#3151)
Browse files Browse the repository at this point in the history
* Add protocol version to cluster config

* auto check protocol version when empty

* merge create job config

* update create config v2 from script

* fix simulate job running from cluster explorer

* add warning when simulate yaml job config file

* add job name suffix for submit job v2

* update
  • Loading branch information
yiyione authored Jul 12, 2019
1 parent 2055ed8 commit 9a0f9e2
Show file tree
Hide file tree
Showing 10 changed files with 275 additions and 150 deletions.
6 changes: 4 additions & 2 deletions i18n/common.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
"cluster.add.host.empty": "Host is empty...",
"cluster.add.host.invalidchar": "Host should not contain '/'...",
"cluster.add.checkstatus": "Checking status of OpenPAI cluster",
"cluster.add.checkprotocolversion": "Checking the protocol version of OpenPAI cluster",
"treeview.node.edit": "Edit Configuration...",
"treeview.node.openhdfs": "Open HDFS...",
"treeview.node.openPortal": "Open Web Portal...",
"treeview.node.listjob": "List Jobs Externally...",
"treeview.node.create-config": "Create Job Config...",
"treeview.node.create-yaml-config": "Create Job v2 Config...",
"treeview.node.submitjob": "Submit Job...",
"treeview.node.simulate": "Simulate Job Running...",
"treeview.hdfs.select-cluster.label": "Double click to connect to a PAI cluster's HDFS...",
Expand Down Expand Up @@ -57,6 +57,7 @@
"job.prepare.cluster.cancelled": "No cluster selected, job submission cancelled.",
"job.prepare.config.prompt": "Please select a PAI job config json file",
"job.prepare.config.invalid": "Invalid job config json file, job submission cancelled.",
"job.prepare.config.yaml-not-support": "Current operation don't support YAML job config file, job submission cancelled.",
"job.prepare.config.cancelled": "No job config selected, job submission cancelled.",
"job.prepare.upload.prompt": "Enable auto uploading of code?",
"job.prepare.upload.yes.detail": "The extension will upload your project files to PAI job config's code dir automatically.",
Expand Down Expand Up @@ -101,12 +102,12 @@
"cluster.add.host.empty": "集群地址为空……",
"cluster.add.host.invalidchar": "集群地址不应该包含 '/'……",
"cluster.add.checkstatus": "正在检查 OpenPAI 集群状态",
"cluster.add.checkprotocolversion": "正在检查 OpenPAI protocol 版本",
"treeview.node.edit": "编辑配置...",
"treeview.node.openhdfs": "打开 HDFS...",
"treeview.node.openPortal": "打开 OpenPAI 门户...",
"treeview.node.listjob": "在浏览器里打开任务列表...",
"treeview.node.create-config": "创建任务配置文件...",
"treeview.node.create-yaml-config": "创建任务V2配置文件...",
"treeview.node.submitjob": "提交任务...",
"treeview.node.simulate": "模拟任务执行...",
"treeview.hdfs.select-cluster.label": "双击以连接到 PAI 集群的 HDFS...",
Expand Down Expand Up @@ -139,6 +140,7 @@
"job.prepare.cluster.cancelled": "未选择集群,任务提交已被取消。",
"job.prepare.config.prompt": "请选择一个 PAI 任务配置 JSON",
"job.prepare.config.invalid": "任务配置文件不合法,任务提交已被取消。",
"job.prepare.config.yaml-not-support": "当前操作不支持YAML任务配置文件,任务提交已被取消。",
"job.prepare.config.cancelled": "未选择任务配置文件,任务提交已被取消。",
"job.prepare.upload.prompt": "是否启用代码自动上传功能?",
"job.prepare.upload.yes.detail": "插件将会自动上传你的项目文件至 PAI 任务配置中的 code dir",
Expand Down
17 changes: 11 additions & 6 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,13 @@
"category": "PAI"
},
{
"command": "paiext.cluster.job.create-yaml-config",
"title": "%paiext.cluster.job.create-yaml-config%",
"command": "paiext.cluster.job.create-config-v1",
"title": "%paiext.cluster.job.create-config-v1%",
"category": "PAI"
},
{
"command": "paiext.cluster.job.create-config-v2",
"title": "%paiext.cluster.job.create-config-v2%",
"category": "PAI"
},
{
Expand Down Expand Up @@ -259,11 +264,11 @@
],
"explorer/context": [
{
"command": "paiext.cluster.job.create-config",
"command": "paiext.cluster.job.create-config-v1",
"when": "resourceLangId =~ /(python)|(cntk)/ && resourceScheme == file"
},
{
"command": "paiext.cluster.job.create-yaml-config",
"command": "paiext.cluster.job.create-config-v2",
"when": "resourceLangId =~ /(python)|(cntk)/ && resourceScheme == file"
},
{
Expand All @@ -289,11 +294,11 @@
],
"editor/context": [
{
"command": "paiext.cluster.job.create-config",
"command": "paiext.cluster.job.create-config-v1",
"when": "resourceLangId =~ /(python)|(cntk)/ && resourceScheme == file"
},
{
"command": "paiext.cluster.job.create-yaml-config",
"command": "paiext.cluster.job.create-config-v2",
"when": "resourceLangId =~ /(python)|(cntk)/ && resourceScheme == file"
},
{
Expand Down
5 changes: 3 additions & 2 deletions package.nls.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
"paiext.cluster.dashboard.open": "Open Dashboard",
"paiext.cluster.job.list": "Open Job List Externally",
"paiext.cluster.job.submit": "Submit Job to PAI Cluster",
"paiext.cluster.job.create-config": "Create PAI Job Config JSON",
"paiext.cluster.job.create-yaml-config": "Create PAI Job Config YAML",
"paiext.cluster.job.create-config": "Create PAI Job Config",
"paiext.cluster.job.create-config-v1": "Create PAI Job Config V1",
"paiext.cluster.job.create-config-v2": "Create PAI Job Config V2",
"paiext.cluster.job.simulate": "Simulate PAI Job Running",
"paiext.cluster.job.view": "View Job Detail",
"paiext.cluster.job.more": "View More...",
Expand Down
5 changes: 3 additions & 2 deletions package.nls.zh-cn.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
"paiext.cluster.dashboard.open": "打开仪表板",
"paiext.cluster.job.list": "在浏览器里打开任务列表",
"paiext.cluster.job.submit": "在 PAI 集群上提交任务",
"paiext.cluster.job.create-config": "创建 PAI 任务配置 JSON 文件",
"paiext.cluster.job.create-yaml-config": "创建 PAI 任务配置 YAML 文件",
"paiext.cluster.job.create-config": "创建 PAI 任务配置文件",
"paiext.cluster.job.create-config-v1": "创建 PAI 任务配置文件 V1",
"paiext.cluster.job.create-config-v2": "创建 PAI 任务配置文件 V2",
"paiext.cluster.job.simulate": "模拟 PAI 任务执行",
"paiext.cluster.job.view": "查看任务详情",
"paiext.cluster.job.more": "显示更多...",
Expand Down
4 changes: 4 additions & 0 deletions schemas/pai_cluster.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
"type": "string",
"description": "PAI web portal endpoint",
"pattern": "^(?!https?:\/\/).*"
},
"protocol_version": {
"type": "string",
"description": "PAI protocol version"
}
},
"required": [
Expand Down
3 changes: 2 additions & 1 deletion src/common/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ export const COMMAND_TREEVIEW_DOUBLECLICK = 'paiext.treeview.doubleclick';
export const COMMAND_SUBMIT_JOB = 'paiext.cluster.job.submit';
export const COMMAND_SIMULATE_JOB = 'paiext.cluster.job.simulate';
export const COMMAND_CREATE_JOB_CONFIG = 'paiext.cluster.job.create-config';
export const COMMAND_CREATE_YAML_JOB_CONFIG = 'paiext.cluster.job.create-yaml-config';
export const COMMAND_CREATE_JOB_CONFIG_V1 = 'paiext.cluster.job.create-config-v1';
export const COMMAND_CREATE_JOB_CONFIG_V2 = 'paiext.cluster.job.create-config-v2';
export const COMMAND_CONTAINER_HDFS_BACK = 'paiext.container.hdfs.back';
export const COMMAND_CONTAINER_HDFS_REFRESH = 'paiext.container.hdfs.refresh';
export const COMMAND_CONTAINER_HDFS_DELETE = 'paiext.container.hdfs.delete';
Expand Down
59 changes: 58 additions & 1 deletion src/pai/clusterManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ export class ClusterManager extends Singleton {
hdfs_uri: 'hdfs://127.0.0.1:9000',
webhdfs_uri: '127.0.0.1:50070',
grafana_uri: '127.0.0.1:3000',
k8s_dashboard_uri: '127.0.0.1:9090'
k8s_dashboard_uri: '127.0.0.1:9090',
protocol_version: '2'
};

private onDidChangeEmitter: vscode.EventEmitter<IClusterModification> = new vscode.EventEmitter<IClusterModification>();
Expand Down Expand Up @@ -90,6 +91,7 @@ export class ClusterManager extends Singleton {
this.configuration = this.context.globalState.get<IConfiguration>(ClusterManager.CONF_KEY) || ClusterManager.default;
try {
await this.validateConfiguration();
await this.ensureProtocolVersion();
} catch (ex) {
await this.askConfigurationFix(__('cluster.activate.error', [ex]));
}
Expand All @@ -105,6 +107,33 @@ export class ClusterManager extends Singleton {
}
}

public async ensureProtocolVersion(): Promise<void> {
let updated: Boolean = true;
const list: Promise<any>[] = [];
this.configuration!.pais.forEach((config: IPAICluster, i, pais) => {
if (!config.protocol_version) {
updated = true;
list.push(request
.get(`http://${config.rest_server_uri}/api/v2/jobs/protocolversion/config`, { timeout: 5 * 1000 })
.then(() => {
pais[i].protocol_version = '2';
})
.catch((err) => {
const error: any = JSON.parse(err.error);
if (error.code === 'NoApiError') {
pais[i].protocol_version = '1';
} else {
pais[i].protocol_version = '2';
}
}));
}
});

if (updated) {
await Promise.all(list).then(async () => await this.save());
}
}

public get allConfigurations(): IPAICluster[] {
return this.configuration!.pais;
}
Expand Down Expand Up @@ -157,6 +186,34 @@ export class ClusterManager extends Singleton {
cluster.hdfs_uri = `hdfs://${host}:9000`;
cluster.k8s_dashboard_uri = `${host}:9090`;
}

// Config the protocol version.
try {
await vscode.window.withProgress(
{
location: vscode.ProgressLocation.Notification,
title: __('cluster.add.checkprotocolversion'),
cancellable: true
},
(_progress, cancellationToken) => new Promise((resolve, reject) => {
const req: request.RequestPromise = request
.get(`http://${cluster.rest_server_uri}/api/v2/jobs/protocolversion/config`, { timeout: 5 * 1000 });
cancellationToken.onCancellationRequested(() => {
req.abort();
reject();
});
req.then(resolve).catch(reject);
}));
cluster.protocol_version = '2';
} catch (exception) {
const error: any = JSON.parse(exception.error);
if (error.code === 'NoApiError') {
cluster.protocol_version = '1';
} else {
cluster.protocol_version = '2';
}
}

return this.edit(this.allConfigurations.length, cluster);
}

Expand Down
7 changes: 1 addition & 6 deletions src/pai/configurationTreeDataProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import {
} from 'vscode';

import {
COMMAND_CREATE_JOB_CONFIG, COMMAND_CREATE_YAML_JOB_CONFIG, COMMAND_EDIT_CLUSTER, COMMAND_LIST_JOB, COMMAND_OPEN_HDFS,
COMMAND_CREATE_JOB_CONFIG, COMMAND_EDIT_CLUSTER, COMMAND_LIST_JOB, COMMAND_OPEN_HDFS,
COMMAND_REFRESH_CLUSTER, COMMAND_SIMULATE_JOB, COMMAND_SUBMIT_JOB,
COMMAND_TREEVIEW_DOUBLECLICK, COMMAND_TREEVIEW_OPEN_PORTAL,
CONTEXT_CONFIGURATION_ITEM,
Expand Down Expand Up @@ -48,11 +48,6 @@ const childNodeDefinitions: IChildNodeDefinition[] = [
command: COMMAND_CREATE_JOB_CONFIG,
icon: ICON_CREATE_CONFIG
},
{
title: 'treeview.node.create-yaml-config',
command: COMMAND_CREATE_YAML_JOB_CONFIG,
icon: ICON_CREATE_CONFIG
},
{
title: 'treeview.node.submitjob',
command: COMMAND_SUBMIT_JOB,
Expand Down
7 changes: 4 additions & 3 deletions src/pai/paiInterface.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export interface IPAICluster {
hdfs_uri?: string;
k8s_dashboard_uri?: string;
web_portal_uri?: string;
protocol_version?: string;
}

export interface IPAITaskRole {
Expand All @@ -25,7 +26,7 @@ export interface IPAITaskRole {
command: string;
}

export interface IPAIJobConfig {
export interface IPAIJobConfigV1 {
jobName: string;
image: string;
dataDir?: string;
Expand All @@ -52,7 +53,7 @@ export interface IPAIJobInfo {
/**
* OpenPAI Job Protocol.
*/
export interface IPAIYamlJobConfig {
export interface IPAIJobConfigV2 {
/** Protocol version, current version is 2. */
protocolVersion: string | number;
name: string;
Expand All @@ -66,7 +67,7 @@ export interface IPAIYamlJobConfig {
/** Each item is the protocol for data, script, dockerimage, or output type. */
prerequisites?: {
/** If omitted, follow the protocolVersion in root. */
protocolVersion?: string | number;
protocolVersion?: string;
name: string;
/** Component type. Must be one of the following: data, script, dockerimage, or output. Prerequisites.type cannot be "job". */
type: string;
Expand Down
Loading

0 comments on commit 9a0f9e2

Please sign in to comment.