Skip to content

Commit

Permalink
feat(mis): 检查默认作业计费项是否完整 (#456)
Browse files Browse the repository at this point in the history
当系统遇到一个数据库中不存在计费项的作业时,系统获取作业将会报错。为了避免这个情况发生,这个PR加入了以下检查机制:

- mis-server启动时将会自动检查当时的默认计费项是否完整覆盖了集群中所有的计费项。如果不完整,将会打log
- mis完成初始化时,系统将会检查默认计费项是否完整覆盖了集群中所有的计费项。如果没有,系统将不会允许完成初始化
- 

![image](https://user-images.githubusercontent.com/8363856/218453053-87228681-af5c-43c7-a0fa-865bf0638bd4.png)

这个PR同时整理了一下计算作业费用相关的代码。
  • Loading branch information
ddadaal authored Feb 13, 2023
1 parent ae95ff2 commit 4ecca3d
Show file tree
Hide file tree
Showing 14 changed files with 454 additions and 255 deletions.
6 changes: 6 additions & 0 deletions .changeset/perfect-dragons-serve.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"@scow/mis-server": minor
"@scow/mis-web": minor
---

检查默认计费项是否完备
24 changes: 24 additions & 0 deletions apps/mis-server/config/priceItems.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,30 @@
"amount": "gpu"
}
}
},
"hpc02": {
"compute": {
"id": "HPC16",
"price": "5.00",
"amount": "gpu"
},
"gpu": {
"low": {
"id": "HPC17",
"price": "4.00",
"amount": "gpu"
},
"normal": {
"id": "HPC18",
"price": "5.00",
"amount": "gpu"
},
"high": {
"id": "HPC19",
"price": "6.00",
"amount": "gpu"
}
}
}
},
"another": {
Expand Down
145 changes: 145 additions & 0 deletions apps/mis-server/src/bl/PriceMap.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
/**
* Copyright (c) 2022 Peking University and Peking University Institute for Computing and Digital Economy
* SCOW is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/

import { Logger } from "@ddadaal/tsgrpc-server";
import { MySqlDriver, SqlEntityManager } from "@mikro-orm/mysql";
import { calculateJobPrice } from "src/bl/jobPrice";
import { clusters } from "src/config/clusters";
import { JobPriceInfo } from "src/entities/JobInfo";
import { JobPriceItem } from "src/entities/JobPriceItem";

export interface JobInfo {
biJobIndex: number;
// scow cluster id
cluster: string;
partition: string;
qos: string;
timeUsed: number;
cpusAlloc: number;
gpu: number;
memReq: number;
memAlloc: number;
account: string;
tenant: string;
}

export interface PriceMap {
// path: [cluster, partition, qos]
getPriceItem(path: [string, string, string], tenantName?: string): JobPriceItem;
getPriceMap(tenantName?: string): Record<string, JobPriceItem>;

calculatePrice(info: JobInfo): JobPriceInfo;

getMissingDefaultPriceItems(): string[];
}


export async function createPriceMap(em: SqlEntityManager<MySqlDriver>, logger: Logger): Promise<PriceMap> {
// get all billing items
// order by ASC so that items added later overrides items added before.
const billingItems = await em.find(JobPriceItem, {}, {
populate: ["tenant"],
orderBy: { createTime: "ASC" },
});

const { defaultPrices, tenantSpecificPrices } = getActiveBillingItems(billingItems);

logger.info("Default Price Map: %o", defaultPrices);
logger.info("Tenant specific prices %o", tenantSpecificPrices);

const getPriceItem = (path: [string, string, string], tenantName?: string) => {

const [cluster, partition, qos] = path;

if (tenantName && tenantName in tenantSpecificPrices) {
const specific = tenantSpecificPrices[tenantName][[cluster, partition, qos].join(".")] ||
tenantSpecificPrices[tenantName][[cluster, partition].join(".")];

if (specific) { return specific; }
}

const price = defaultPrices[[cluster, partition, qos].join(".")] ||
defaultPrices[[cluster, partition].join(".")];

if (!price) {
throw new Error(`Unknown cluster ${cluster} partition ${partition} qos ${qos}`);
}

return price;
};

return {

calculatePrice: (info) => calculateJobPrice(info, getPriceItem, logger),

getMissingDefaultPriceItems: () => {

const missingPaths = [] as string[];

for (const cluster in clusters) {
for (const partition in clusters[cluster].slurm.partitions) {
const path = [cluster, partition];

const { qos } = clusters[cluster].slurm.partitions[partition];

if (path.join(".") in defaultPrices) {
continue;
}

if (Array.isArray(qos)) {
qos.forEach((q) => {
const newPath = [...path, q].join(".");
if (!(newPath in defaultPrices)) {
missingPaths.push(newPath);
}
});
} else {
missingPaths.push(path.join("."));
}
}
}

return missingPaths;
},

getPriceMap: (tenantName) => {
return {
...defaultPrices,
...(tenantName) ? tenantSpecificPrices[tenantName] : undefined,
};
},

getPriceItem,
};
}

export function getActiveBillingItems(items: JobPriceItem[]) {
// { [cluster.partition[.qos]]: price }
const defaultPrices: Record<string, JobPriceItem> = {};
// { tenantName: { [cluster.partition[.qos] ]: price }}
const tenantSpecificPrices: Record<string, Record<string, JobPriceItem>> = {};

items.forEach((item) => {
if (!item.tenant) {
defaultPrices[item.path.join(".")] = item;
} else {
const tenantName = item.tenant.getProperty("name");
if (!(tenantName in tenantSpecificPrices)) {
tenantSpecificPrices[tenantName] = {};
}
tenantSpecificPrices[tenantName][item.path.join(".")] = item;
}
});

return { defaultPrices, tenantSpecificPrices };
}

102 changes: 102 additions & 0 deletions apps/mis-server/src/bl/jobPrice.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
/**
* Copyright (c) 2022 Peking University and Peking University Institute for Computing and Digital Economy
* SCOW is licensed under Mulan PSL v2.
* You can use this software according to the terms and conditions of the Mulan PSL v2.
* You may obtain a copy of Mulan PSL v2 at:
* http://license.coscl.org.cn/MulanPSL2
* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
* EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT,
* MERCHANTABILITY OR FIT FOR A PARTICULAR PURPOSE.
* See the Mulan PSL v2 for more details.
*/

import { Logger } from "@ddadaal/tsgrpc-server";
import { Decimal } from "@scow/lib-decimal";
import { JobInfo, PriceMap } from "src/bl/PriceMap";
import { clusters } from "src/config/clusters";
import { JobPriceInfo } from "src/entities/JobInfo";
import { AmountStrategy, JobPriceItem } from "src/entities/JobPriceItem";

type AmountStrategyFunc = (info: JobInfo) => Decimal;

const amountStrategyFuncs: Record<AmountStrategy, AmountStrategyFunc> = {
[AmountStrategy.GPU]: (info) => new Decimal(info.gpu),
[AmountStrategy.CPUS_ALLOC]: (info) => new Decimal(info.cpusAlloc),
[AmountStrategy.MAX_GPU_CPUSALLOC]: (info) => {
const { gpu, cpusAlloc } = info;
const { cores, gpus } = clusters[info.cluster].slurm.partitions[info.partition];
return Decimal.max(
gpu,
new Decimal(cpusAlloc).div(
new Decimal(cores).div(gpus),
).integerValue(Decimal.ROUND_CEIL),
);
},
[AmountStrategy.MAX_CPUSALLOC_MEM]: (info) => {

const { mem, cores } = clusters[info.cluster].slurm.partitions[info.partition];
return Decimal.max(
// 核心数
info.cpusAlloc,

// 申请内存总数/(分区内容/分区核心数)
new Decimal(info.memReq).div(
new Decimal(mem).div(cores),
).integerValue(Decimal.ROUND_CEIL),
);
},

};


export function calculateJobPrice(
info: JobInfo, getPriceItem: PriceMap["getPriceItem"],
logger: Logger): JobPriceInfo {

logger.trace(`Calculating price for job ${info.biJobIndex}`);

const clusterInfo = clusters[info.cluster];

if (!clusterInfo) {
logger.warn(`Unknown cluster ${info.cluster}`);
return emptyJobPriceInfo();
}

const partitionInfo = clusterInfo.slurm.partitions[info.partition];
if (!partitionInfo) {
logger.warn(`Unknown partition ${info.partition} of cluster ${info.cluster}`);
return emptyJobPriceInfo();
}

const path = [info.cluster, info.partition, info.qos] as [string, string, string];

function calculatePrice(priceItem: JobPriceItem) {
const time = new Decimal(info.timeUsed).div(3600); // 秒到小时

const amountFn = amountStrategyFuncs[priceItem.amount];

let amount = amountFn ? amountFn(info) : new Decimal(0);

if (!amountFn) {
logger.warn("Unknown AmountStrategy %s. Count as 0.", priceItem.amount);
}

amount = amount.multipliedBy(time);

amount = amount.decimalPlaces(3, Decimal.ROUND_DOWN);

return priceItem.price.multipliedBy(amount).decimalPlaces(3, Decimal.ROUND_HALF_CEIL);
}
const accountBase = getPriceItem(path, info.tenant);
const tenantBase = getPriceItem(path);

const accountPrice = calculatePrice(accountBase);
const tenantPrice = calculatePrice(tenantBase);

return {
tenant: { billingItemId: tenantBase.itemId, price: tenantPrice },
account: { billingItemId: accountBase.itemId, price: accountPrice },
};
}

export const emptyJobPriceInfo = (): JobPriceInfo => ({ tenant: undefined, account: undefined });
Loading

0 comments on commit 4ecca3d

Please sign in to comment.