From cb2ce82649686df43bec774eadfb8f6ca3243008 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Tue, 16 Apr 2024 11:05:45 +0200 Subject: [PATCH 01/17] Benchmark file --- docs/examples/benchmark_gpt.ts | 65 ++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 docs/examples/benchmark_gpt.ts diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts new file mode 100644 index 000000000..bfa74fa18 --- /dev/null +++ b/docs/examples/benchmark_gpt.ts @@ -0,0 +1,65 @@ +import type { Task } from '@epfml/discojs-core' +import { + Disco, fetchTasks, data, client as clients, + aggregator as aggregators, models +} from '@epfml/discojs-core' +import { NodeTextLoader, saveModelToDisk, loadModelFromDisk } from '@epfml/discojs-node' + + +async function main(): Promise { + // Launch a server instance + const url = new URL('http://localhost:8080') + + // Fetch the wikitext task from the server + const tasks = await fetchTasks(url) + const task = tasks.get('wikitext-103') + if (task === undefined) { throw new Error('task not found') } + + let model; + const modelFolder = './models' + const modelFileName = 'model_random.json' + + // Toggle TRAIN_MODEL to either train and save a new model from scratch or load an existing model + const TRAIN_MODEL = true + if (TRAIN_MODEL) { + // Load the wikitext dataset from the `datasets` folder + const dataset = await loadWikitextData(task) + + // Initialize a Disco instance and start training a language model + const aggregator = new aggregators.MeanAggregator() + const client = new clients.federated.FederatedClient(url, task, aggregator) + const disco = new Disco(task, { scheme: 'federated', client, aggregator }) + for await (const _ of disco.fit(dataset)); + + // Get the model and complete the prompt + if (aggregator.model === undefined) { + throw new Error('model was not set') + } + // Save the trained model + model = aggregator.model as models.GPT + await saveModelToDisk(model, modelFolder, modelFileName) + await disco.close() + } else { + // Load the trained model + model = await loadModelFromDisk(`${modelFolder}/${modelFileName}`) as models.GPT + } + + // Retrieve the tokenizer used during training + const tokenizer = await models.getTaskTokenizer(task) + + const prompt = 'The game began development in 2010 , carrying over a large portion' + const generations = await model.generate(prompt, tokenizer) + console.log(generations) +} + +async function loadWikitextData (task: Task): Promise { + const loader = new NodeTextLoader(task) + const dataSplit: data.DataSplit = { + train: await data.TextData.init(await loader.load('../../datasets/wikitext/wiki.train.tokens', {shuffle: true}), task), + validation: await data.TextData.init(await loader.load('../../datasets/wikitext/wiki.valid.tokens', {shuffle: true}), task) + } + return dataSplit +} + +// You can run this example with "npm start" from this folder +main().catch(console.error) \ No newline at end of file From f0d7e7bd595b3cd89d49d75738a0216c531dae88 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Tue, 16 Apr 2024 13:29:02 +0200 Subject: [PATCH 02/17] Benchmarking commit --- discojs/discojs-core/src/models/gpt/config.ts | 8 +-- discojs/discojs-core/src/models/gpt/index.ts | 18 ++++- discojs/discojs-core/src/models/gpt/layers.ts | 2 +- discojs/discojs-core/src/models/gpt/model.ts | 20 +++++- discojs/discojs-core/src/models/index.ts | 2 +- discojs/discojs-core/src/models/model.ts | 5 ++ discojs/discojs-core/src/models/tfjs.ts | 4 ++ docs/examples/benchmark_gpt.ts | 68 +++++++++---------- 8 files changed, 78 insertions(+), 49 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/config.ts b/discojs/discojs-core/src/models/gpt/config.ts index 727412563..a8368af64 100644 --- a/discojs/discojs-core/src/models/gpt/config.ts +++ b/discojs/discojs-core/src/models/gpt/config.ts @@ -1,4 +1,4 @@ -type ModelType = +export type GPTModelType = | 'gpt2' | 'gpt2-medium' | 'gpt2-large' @@ -11,7 +11,7 @@ export interface GPTConfig { lr: number blockSize: number vocabSize: number - modelType: ModelType + modelType: GPTModelType name?: string, evaluate?: boolean maxEvalBatches?: number @@ -61,7 +61,7 @@ export type ModelSize = { nEmbd: number } -export function getModelSizes (modelType: ModelType): Required { +export function getModelSizes (modelType: GPTModelType): Required { switch (modelType) { case 'gpt2': return { nLayer: 12, nHead: 12, nEmbd: 768 } @@ -79,7 +79,7 @@ export function getModelSizes (modelType: ModelType): Required { return { nLayer: 3, nHead: 3, nEmbd: 48 } default: { const _: never = modelType - throw new Error("should never happen") + throw new Error(`GPT model type specified doesn't exist: ${modelType}`) } } } diff --git a/discojs/discojs-core/src/models/gpt/index.ts b/discojs/discojs-core/src/models/gpt/index.ts index f4a47a3c4..e09b069fe 100644 --- a/discojs/discojs-core/src/models/gpt/index.ts +++ b/discojs/discojs-core/src/models/gpt/index.ts @@ -45,13 +45,14 @@ export class GPT extends Model { }; for (let epoch = 0; epoch < epochs; epoch++) { await this.model.fitDataset(trainingData, trainingArgs); - if (logs === undefined) { throw new Error("epoch didn't gave any logs"); } - const { loss, val_acc, val_loss } = logs; + const { loss, val_acc, val_loss, weightUpdateTime, memory } = logs; if (loss === undefined || isNaN(loss)) { - throw new Error("Invalid training logs"); + console.log(loss) + logs.loss = -1 + // throw new Error("Invalid training logs"); } const structuredLogs: EpochLogs = { epoch, @@ -67,9 +68,16 @@ export class GPT extends Model { } structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss} } + if (weightUpdateTime !== undefined && !isNaN(weightUpdateTime)) { + structuredLogs['weightUpdateTime'] = weightUpdateTime + } + if (memory !== undefined && !isNaN(memory)) { + structuredLogs['memory'] = memory + } yield structuredLogs } + this.model.optimizer.dispose() } override predict (input: Sample): Promise { @@ -118,6 +126,10 @@ export class GPT extends Model { config: this.config } } + + dispose(): void { + this.model.dispose() + } } export type GPTSerialization = { diff --git a/discojs/discojs-core/src/models/gpt/layers.ts b/discojs/discojs-core/src/models/gpt/layers.ts index 243de000a..c7c4df2dd 100644 --- a/discojs/discojs-core/src/models/gpt/layers.ts +++ b/discojs/discojs-core/src/models/gpt/layers.ts @@ -188,7 +188,7 @@ class CausalSelfAttention extends tf.layers.Layer { y = tf.reshape(y, [B, T, C]) y = dense(y, this.cProjKernel, this.cProjBias) y = kwargs.training === true ? tf.dropout(y, this.dropout) : y - + console.log("Attention memory", (tf.memory().numBytes / 1024 / 1024).toFixed(0)) return y }) } diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index b4a88e7eb..aa1184564 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -55,6 +55,8 @@ class GPTModel extends tf.LayersModel { await callbacks.onTrainBegin?.() for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) { let averageLoss = 0 + let averageWeightUpdateTime = 0 + let averageMemory = 0 let iteration = 1 const iterator = await dataset.iterator() @@ -77,23 +79,28 @@ class GPTModel extends tf.LayersModel { } return tf.losses.softmaxCrossEntropy(ys, logits) } - + let peakMemory const lossTensor = tf.tidy(() => { const { grads, value: lossTensor } = this.optimizer.computeGradients(lossFn) const gradsClipped = clipByGlobalNormObj(grads, 1) this.optimizer.applyGradients(gradsClipped) + peakMemory = tf.memory().numBytes / 1024 / 1024 return lossTensor }) const loss = await lossTensor.array() averageLoss += loss + weightUpdateTime = performance.now() - weightUpdateTime + averageWeightUpdateTime += weightUpdateTime + peakMemory = peakMemory ?? 0 + averageMemory += peakMemory tf.dispose([xs, ys, lossTensor, next.value]) - weightUpdateTime = performance.now() - weightUpdateTime console.log( `Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, `\tLoss: ${loss.toFixed(3)}`, + `\tPeak memory: ${peakMemory.toFixed(2)} MB`, `\tMemory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `\tNumber of tensors allocated: ${tf.memory().numTensors}`, `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, @@ -107,9 +114,16 @@ class GPTModel extends tf.LayersModel { } iteration++ continueTraining = next.done !== true && iteration <= this.config.maxIter + // If we reached the last iteration, cleanup the tensors + if (next.done != true && iteration > this.config.maxIter) { + const { xs, ys } = next.value as { xs: tf.Tensor2D, ys: tf.Tensor3D } + tf.dispose([xs, ys]) + } } let logs: tf.Logs = { - 'loss': averageLoss / iteration + 'loss': averageLoss / iteration, + 'weightUpdateTime': averageWeightUpdateTime / iteration, + 'memory': averageMemory / iteration } if (evalDataset !== undefined) { logs = { ...logs, ...await evaluate(this, evalDataset, this.config.maxEvalBatches) } diff --git a/discojs/discojs-core/src/models/index.ts b/discojs/discojs-core/src/models/index.ts index aefb9b5fa..f6c1de092 100644 --- a/discojs/discojs-core/src/models/index.ts +++ b/discojs/discojs-core/src/models/index.ts @@ -1,6 +1,6 @@ export { EpochLogs, Model } from './model.js' export { GPT } from './gpt/index.js' -export { GPTConfig } from './gpt/config.js' +export { GPTConfig, GPTModelType } from './gpt/config.js' export { TFJS } from './tfjs.js' export { getTaskTokenizer } from './tokenizer.js' diff --git a/discojs/discojs-core/src/models/model.ts b/discojs/discojs-core/src/models/model.ts index b4696445a..7237dd777 100644 --- a/discojs/discojs-core/src/models/model.ts +++ b/discojs/discojs-core/src/models/model.ts @@ -13,6 +13,8 @@ export interface EpochLogs { loss: number, accuracy: number }; + weightUpdateTime?: number; + memory?: number; } // TODO still bound to tfjs @@ -50,4 +52,7 @@ export abstract class Model { /** Predict likely values */ // TODO extract in separated TrainedModel? abstract predict(input: Sample): Promise; + + // Cleanup the memory occupied by the model + abstract dispose(): void; } diff --git a/discojs/discojs-core/src/models/tfjs.ts b/discojs/discojs-core/src/models/tfjs.ts index 1b3ffc46d..191bf7aef 100644 --- a/discojs/discojs-core/src/models/tfjs.ts +++ b/discojs/discojs-core/src/models/tfjs.ts @@ -103,6 +103,10 @@ export class TFJS extends Model { return await ret } + dispose() { + this.model.dispose() + } + /** * extract wrapped model * diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index bfa74fa18..8f6f00e0d 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -1,10 +1,7 @@ import type { Task } from '@epfml/discojs-core' -import { - Disco, fetchTasks, data, client as clients, - aggregator as aggregators, models -} from '@epfml/discojs-core' -import { NodeTextLoader, saveModelToDisk, loadModelFromDisk } from '@epfml/discojs-node' - +import { fetchTasks, data, models } from '@epfml/discojs-core' +import { NodeTextLoader } from '@epfml/discojs-node' +import * as tf from '@tensorflow/tfjs' async function main(): Promise { // Launch a server instance @@ -14,42 +11,39 @@ async function main(): Promise { const tasks = await fetchTasks(url) const task = tasks.get('wikitext-103') if (task === undefined) { throw new Error('task not found') } + // Load the wikitext dataset from the `datasets` folder - let model; - const modelFolder = './models' - const modelFileName = 'model_random.json' - // Toggle TRAIN_MODEL to either train and save a new model from scratch or load an existing model - const TRAIN_MODEL = true - if (TRAIN_MODEL) { - // Load the wikitext dataset from the `datasets` folder - const dataset = await loadWikitextData(task) - - // Initialize a Disco instance and start training a language model - const aggregator = new aggregators.MeanAggregator() - const client = new clients.federated.FederatedClient(url, task, aggregator) - const disco = new Disco(task, { scheme: 'federated', client, aggregator }) - for await (const _ of disco.fit(dataset)); - // Get the model and complete the prompt - if (aggregator.model === undefined) { - throw new Error('model was not set') - } - // Save the trained model - model = aggregator.model as models.GPT - await saveModelToDisk(model, modelFolder, modelFileName) - await disco.close() - } else { - // Load the trained model - model = await loadModelFromDisk(`${modelFolder}/${modelFileName}`) as models.GPT + const config: models.GPTConfig = { + modelType: 'gpt-nano', + lr: 0.0001, + maxIter: 5, + evaluateEvery:10000, + maxEvalBatches: 10, + blockSize: 8, + vocabSize: 50258 } - - // Retrieve the tokenizer used during training - const tokenizer = await models.getTaskTokenizer(task) + const modelType = 'gpt2'//['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] + const contextLength = 2048 // [128, 256, 512, 1024, 2048] + const batchSize = 32 //[8, 16, 32, 64] - const prompt = 'The game began development in 2010 , carrying over a large portion' - const generations = await model.generate(prompt, tokenizer) - console.log(generations) + console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) + task.trainingInformation.batchSize = batchSize + config.modelType = modelType as models.GPTModelType + config.blockSize = contextLength + console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) + // Reload the dataset to batch it with the right batch size + const dataset = await loadWikitextData(task) + const preprocessedDataset = dataset.train.preprocess().batch().dataset + const model = new models.GPT(config) + const logGenerator = model.train(preprocessedDataset, undefined, 1) // 5 epochs + for await (const logs of logGenerator) { + const updateTime = logs.weightUpdateTime ?? 0 + const msPerToken = updateTime / batchSize / contextLength + console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.memory?.toFixed(0)} MB`) + } + model.dispose() } async function loadWikitextData (task: Task): Promise { From 982686218e4efee5e5e2bc35cf5faff20aab5623 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 11:24:23 +0200 Subject: [PATCH 03/17] New benchmark commit --- discojs/discojs-core/src/models/gpt/index.ts | 1 + discojs/discojs-core/src/models/gpt/layers.ts | 17 ++++++---- discojs/discojs-core/src/models/gpt/model.ts | 33 ++++++++++++------- docs/examples/benchmark_gpt.ts | 19 ++++++----- 4 files changed, 42 insertions(+), 28 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/index.ts b/discojs/discojs-core/src/models/gpt/index.ts index e09b069fe..fc707ad10 100644 --- a/discojs/discojs-core/src/models/gpt/index.ts +++ b/discojs/discojs-core/src/models/gpt/index.ts @@ -128,6 +128,7 @@ export class GPT extends Model { } dispose(): void { + this.model.disposeRefs() this.model.dispose() } } diff --git a/discojs/discojs-core/src/models/gpt/layers.ts b/discojs/discojs-core/src/models/gpt/layers.ts index c7c4df2dd..7479db39f 100644 --- a/discojs/discojs-core/src/models/gpt/layers.ts +++ b/discojs/discojs-core/src/models/gpt/layers.ts @@ -65,7 +65,7 @@ class CausalSelfAttention extends tf.layers.Layer { cProjKernel?: tf.LayerVariable cProjBias?: tf.LayerVariable - constructor (private readonly config: CausalSelfAttentionConfig) { + constructor (private readonly config: CausalSelfAttentionConfig, disposalRefs: Array<() => void>) { super(config) this.nEmbd = config.nEmbd @@ -77,6 +77,7 @@ class CausalSelfAttention extends tf.layers.Layer { // calling bandPart zero out the upper triangular part of the all-ones matrix // from the doc: tf.linalg.band_part(input, -1, 0) ==> Lower triangular part this.mask = tf.linalg.bandPart(tf.ones([config.blockSize, config.blockSize]), -1, 0) + disposalRefs.push(() => tf.dispose(this.mask)) // Push a callback to dispose this matrix later } build (): void { @@ -188,7 +189,7 @@ class CausalSelfAttention extends tf.layers.Layer { y = tf.reshape(y, [B, T, C]) y = dense(y, this.cProjKernel, this.cProjBias) y = kwargs.training === true ? tf.dropout(y, this.dropout) : y - console.log("Attention memory", (tf.memory().numBytes / 1024 / 1024).toFixed(0)) + console.log("Attention memory", (tf.memory().numBytes / 1024 / 1024 / 1024).toFixed(2)) return y }) } @@ -257,7 +258,7 @@ function MLP (config: MLPConfig): tf.LayersModel { type BlockConfig = CausalSelfAttentionConfig & MLPConfig & { debug: boolean } -function TransformerBlock (conf: BlockConfig): tf.LayersModel { +function TransformerBlock (conf: BlockConfig, disposalRefs: Array<() => void>): tf.LayersModel { const config = Object.assign({ name: 'h' }, conf) const inputs = tf.input({ shape: [config.blockSize, config.nEmbd] }) let x1, x2 @@ -269,7 +270,8 @@ function TransformerBlock (conf: BlockConfig): tf.LayersModel { } // self attention layer x1 = new CausalSelfAttention( - Object.assign({}, config, { name: config.name + '/attn' }) + Object.assign({}, config, { name: config.name + '/attn' }), + disposalRefs ).apply(x1) // Residual connection x1 = tf.layers.add().apply([inputs, x1 as tf.SymbolicTensor]) @@ -295,7 +297,7 @@ function TransformerBlock (conf: BlockConfig): tf.LayersModel { * @param conf GPTConfig * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply */ -export function GPTArchitecture (config: Required): tf.LayersModel { +export function GPTArchitecture(config: Required, disposalRefs: Array<() => void>): tf.LayersModel { const inputs = tf.input({ shape: [null] }) //Token embedding @@ -325,7 +327,7 @@ export function GPTArchitecture (config: Required): tf.LayersModel { // token and positional embeddings are added together let x = tf.layers.add().apply([tokEmb, posEmb]) - //dropout + // dropout x = tf.layers.dropout({name: 'drop', rate: config.embdDrop}).apply(x) if (config.debug) { x = new LogLayer({ name: 'dropadd' }).apply(x) @@ -334,7 +336,8 @@ export function GPTArchitecture (config: Required): tf.LayersModel { //Apply successively transformer blocks, attention and dense layers for (let i = 0; i < config.nLayer; i++) { x = TransformerBlock( - Object.assign({}, config, { name: config.name + '/h/' + i }) + Object.assign({}, config, { name: config.name + '/h/' + i }), + disposalRefs ).apply(x) } // Normalization diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index aa1184564..0e2c8a698 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -24,18 +24,27 @@ export declare abstract class Dataset { */ class GPTModel extends tf.LayersModel { protected readonly config: Required + private readonly disposalRefs: Array<() => void> constructor(partialConfig?: GPTConfig) { - // Complete missing config parameters with default values + // Fill missing config parameters with default values let completeConfig: Required = { ...DEFAULT_CONFIG, ...partialConfig } // Add layer sizes depending on which model has been specified completeConfig = { ...completeConfig, ...getModelSizes(completeConfig.modelType) } // Init the tf.LayersModel and assign it to this - const gpt = GPTArchitecture(completeConfig) + const disposalRefs:Array<() => void> = [] + const gpt = GPTArchitecture(completeConfig, disposalRefs) const { inputs, outputs, name } = gpt super({ inputs, outputs, name }) this.config = completeConfig + this.disposalRefs = disposalRefs + } + + disposeRefs() { + for (let disposeFn of this.disposalRefs) { + disposeFn() + } } get getGPTConfig() { @@ -96,16 +105,16 @@ class GPTModel extends tf.LayersModel { averageMemory += peakMemory tf.dispose([xs, ys, lossTensor, next.value]) - console.log( - `Epoch: ${epoch}`, - `\tStep: ${iteration} / ${this.config.maxIter}`, - `\tLoss: ${loss.toFixed(3)}`, - `\tPeak memory: ${peakMemory.toFixed(2)} MB`, - `\tMemory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, - `\tNumber of tensors allocated: ${tf.memory().numTensors}`, - `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, - `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms` - ) + // console.log( + // `Epoch: ${epoch}`, + // `\tStep: ${iteration} / ${this.config.maxIter}`, + // `\tLoss: ${loss.toFixed(3)}`, + // `\tPeak memory: ${peakMemory.toFixed(2)} MB`, + // `\tMemory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, + // `\tNumber of tensors allocated: ${tf.memory().numTensors}`, + // `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, + // `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms` + // ) if (evalDataset !== undefined && this.config.evaluateEvery !== undefined && iteration % this.config.evaluateEvery == 0) { diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index 8f6f00e0d..f03b5e880 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -10,40 +10,41 @@ async function main(): Promise { // Fetch the wikitext task from the server const tasks = await fetchTasks(url) const task = tasks.get('wikitext-103') - if (task === undefined) { throw new Error('task not found') } - // Load the wikitext dataset from the `datasets` folder - - // Toggle TRAIN_MODEL to either train and save a new model from scratch or load an existing model + if (task === undefined) { throw new Error('task not found') } const config: models.GPTConfig = { modelType: 'gpt-nano', lr: 0.0001, - maxIter: 5, + maxIter: 10, evaluateEvery:10000, maxEvalBatches: 10, blockSize: 8, vocabSize: 50258 } const modelType = 'gpt2'//['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] - const contextLength = 2048 // [128, 256, 512, 1024, 2048] - const batchSize = 32 //[8, 16, 32, 64] - + const contextLength = 128 // [128, 256, 512, 1024, 2048] + const batchSize = 16 //[8, 16, 32, 64] + + // TODO make sure the loss is not NAN anymore when training + console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) task.trainingInformation.batchSize = batchSize config.modelType = modelType as models.GPTModelType + task.trainingInformation.maxSequenceLength = contextLength config.blockSize = contextLength console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) // Reload the dataset to batch it with the right batch size const dataset = await loadWikitextData(task) const preprocessedDataset = dataset.train.preprocess().batch().dataset const model = new models.GPT(config) - const logGenerator = model.train(preprocessedDataset, undefined, 1) // 5 epochs + const logGenerator = model.train(preprocessedDataset, undefined, 1) for await (const logs of logGenerator) { const updateTime = logs.weightUpdateTime ?? 0 const msPerToken = updateTime / batchSize / contextLength console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.memory?.toFixed(0)} MB`) } model.dispose() + } async function loadWikitextData (task: Task): Promise { From f635d2bf5208e9ba37d5eb7827093b70ea74418b Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 13:53:44 +0200 Subject: [PATCH 04/17] Cleanup benchmark and add peak memory tracking --- discojs/discojs-core/src/models/gpt/index.ts | 32 ++++--- discojs/discojs-core/src/models/gpt/layers.ts | 23 +++-- discojs/discojs-core/src/models/gpt/model.ts | 65 ++++++++------ discojs/discojs-core/src/models/model.ts | 2 +- docs/examples/benchmark_gpt.ts | 89 ++++++++++++------- docs/examples/wikitext.ts | 4 +- 6 files changed, 131 insertions(+), 84 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/index.ts b/discojs/discojs-core/src/models/gpt/index.ts index fc707ad10..61e947f5c 100644 --- a/discojs/discojs-core/src/models/gpt/index.ts +++ b/discojs/discojs-core/src/models/gpt/index.ts @@ -48,11 +48,9 @@ export class GPT extends Model { if (logs === undefined) { throw new Error("epoch didn't gave any logs"); } - const { loss, val_acc, val_loss, weightUpdateTime, memory } = logs; + const { loss, val_acc, val_loss, weightUpdateTime, peakMemory } = logs; if (loss === undefined || isNaN(loss)) { - console.log(loss) - logs.loss = -1 - // throw new Error("Invalid training logs"); + throw new Error("Training loss is undefined or nan"); } const structuredLogs: EpochLogs = { epoch, @@ -71,13 +69,12 @@ export class GPT extends Model { if (weightUpdateTime !== undefined && !isNaN(weightUpdateTime)) { structuredLogs['weightUpdateTime'] = weightUpdateTime } - if (memory !== undefined && !isNaN(memory)) { - structuredLogs['memory'] = memory + if (peakMemory !== undefined && !isNaN(peakMemory)) { + structuredLogs['peakMemory'] = peakMemory } yield structuredLogs } - this.model.optimizer.dispose() } override predict (input: Sample): Promise { @@ -89,18 +86,26 @@ export class GPT extends Model { return Promise.resolve(ret) } - async generate (input: string, tokenizer: PreTrainedTokenizer, newTokens: number = 10): Promise { + async generate(input: string, tokenizer: PreTrainedTokenizer, newTokens: number = 10): + Promise<{ generation: string, avgTokenTime: number }> { const { input_ids: tokens } = await tokenizer(input, { return_tensor: false}) as { input_ids: number[] } const generationConfig = { maxNewTokens: newTokens, temperature: 1.0, - doSample: false, - topK: null + doSample: false } - const predictedTokens = await this.model.generate(tokens, generationConfig) + let avgTimePerToken = 0 + let tokenCount = 0 + const predictedTokens = await this.model.generate(tokens, generationConfig, (res) => { + avgTimePerToken += res.timePerToken + tokenCount += 1 + }) const generatedWords = tokenizer.decode(predictedTokens[0]) - return generatedWords + return { + generation: generatedWords, + avgTokenTime: avgTimePerToken / tokenCount + } } get config (): Required { @@ -128,6 +133,9 @@ export class GPT extends Model { } dispose(): void { + this.model.optimizer.dispose() + // Some tensors are not cleaned up when model.dispose is called + // So we dispose them manually this.model.disposeRefs() this.model.dispose() } diff --git a/discojs/discojs-core/src/models/gpt/layers.ts b/discojs/discojs-core/src/models/gpt/layers.ts index 7479db39f..05b720eb4 100644 --- a/discojs/discojs-core/src/models/gpt/layers.ts +++ b/discojs/discojs-core/src/models/gpt/layers.ts @@ -59,13 +59,12 @@ class CausalSelfAttention extends tf.layers.Layer { private readonly dropout: number private readonly bias: boolean private readonly mask: tf.Tensor2D - cAttnKernel?: tf.LayerVariable cAttnBias?: tf.LayerVariable cProjKernel?: tf.LayerVariable cProjBias?: tf.LayerVariable - constructor (private readonly config: CausalSelfAttentionConfig, disposalRefs: Array<() => void>) { + constructor (private readonly config: CausalSelfAttentionConfig, disposalRefs: tf.TensorContainer[], private peakMemory: {value: number}) { super(config) this.nEmbd = config.nEmbd @@ -77,7 +76,7 @@ class CausalSelfAttention extends tf.layers.Layer { // calling bandPart zero out the upper triangular part of the all-ones matrix // from the doc: tf.linalg.band_part(input, -1, 0) ==> Lower triangular part this.mask = tf.linalg.bandPart(tf.ones([config.blockSize, config.blockSize]), -1, 0) - disposalRefs.push(() => tf.dispose(this.mask)) // Push a callback to dispose this matrix later + disposalRefs.push(this.mask) // Push a reference to dispose this matrix later } build (): void { @@ -189,7 +188,10 @@ class CausalSelfAttention extends tf.layers.Layer { y = tf.reshape(y, [B, T, C]) y = dense(y, this.cProjKernel, this.cProjBias) y = kwargs.training === true ? tf.dropout(y, this.dropout) : y - console.log("Attention memory", (tf.memory().numBytes / 1024 / 1024 / 1024).toFixed(2)) + const memoryAllocated = tf.memory().numBytes / 1024 / 1024 / 1024 // GB + if (memoryAllocated > this.peakMemory.value) { + this.peakMemory.value = memoryAllocated + } return y }) } @@ -258,7 +260,7 @@ function MLP (config: MLPConfig): tf.LayersModel { type BlockConfig = CausalSelfAttentionConfig & MLPConfig & { debug: boolean } -function TransformerBlock (conf: BlockConfig, disposalRefs: Array<() => void>): tf.LayersModel { +function TransformerBlock (conf: BlockConfig, disposalRefs: tf.TensorContainer[], peakMemory: {value: number}): tf.LayersModel { const config = Object.assign({ name: 'h' }, conf) const inputs = tf.input({ shape: [config.blockSize, config.nEmbd] }) let x1, x2 @@ -271,7 +273,8 @@ function TransformerBlock (conf: BlockConfig, disposalRefs: Array<() => void>): // self attention layer x1 = new CausalSelfAttention( Object.assign({}, config, { name: config.name + '/attn' }), - disposalRefs + disposalRefs, + peakMemory ).apply(x1) // Residual connection x1 = tf.layers.add().apply([inputs, x1 as tf.SymbolicTensor]) @@ -297,7 +300,10 @@ function TransformerBlock (conf: BlockConfig, disposalRefs: Array<() => void>): * @param conf GPTConfig * @returns model, tf.LayersModel, which supports model(inputs), model.predict and model.apply */ -export function GPTArchitecture(config: Required, disposalRefs: Array<() => void>): tf.LayersModel { +export function GPTArchitecture( + config: Required, + disposalRefs: tf.TensorContainer[], + peakMemory: {value: number }): tf.LayersModel { const inputs = tf.input({ shape: [null] }) //Token embedding @@ -337,7 +343,8 @@ export function GPTArchitecture(config: Required, disposalRefs: Array for (let i = 0; i < config.nLayer; i++) { x = TransformerBlock( Object.assign({}, config, { name: config.name + '/h/' + i }), - disposalRefs + disposalRefs, + peakMemory ).apply(x) } // Normalization diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index 0e2c8a698..67b4b3893 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -24,7 +24,10 @@ export declare abstract class Dataset { */ class GPTModel extends tf.LayersModel { protected readonly config: Required - private readonly disposalRefs: Array<() => void> + private readonly disposalRefs: tf.TensorContainer[] // Array to store tensor to dispose manually + // Object to pass down to layers to store max memory allocated + // This is an object rather than a primitive to pass the reference + protected peakMemory: { value: number } constructor(partialConfig?: GPTConfig) { // Fill missing config parameters with default values @@ -33,17 +36,21 @@ class GPTModel extends tf.LayersModel { completeConfig = { ...completeConfig, ...getModelSizes(completeConfig.modelType) } // Init the tf.LayersModel and assign it to this - const disposalRefs:Array<() => void> = [] - const gpt = GPTArchitecture(completeConfig, disposalRefs) + const disposalRefs: tf.TensorContainer[] = [] + const peakMemory: { value: number } = {value: 0} + const gpt = GPTArchitecture(completeConfig, disposalRefs, peakMemory) const { inputs, outputs, name } = gpt super({ inputs, outputs, name }) this.config = completeConfig this.disposalRefs = disposalRefs + this.peakMemory = peakMemory } + // Some tensors are not cleaned up when model.dispose is called + // So we dispose them manually disposeRefs() { - for (let disposeFn of this.disposalRefs) { - disposeFn() + for (let tensorContainer of this.disposalRefs) { + tf.dispose([tensorContainer]) } } @@ -55,6 +62,7 @@ class GPTModel extends tf.LayersModel { this.optimizer = this.config.weightDecay !== 0 ? getCustomAdam(this, this.config.lr, this.config.weightDecay) : tf.train.adam(this.config.lr) + this.peakMemory.value = 0 } async fitDataset(dataset: Dataset, trainingArgs: tf.ModelFitDatasetArgs): Promise { @@ -65,7 +73,6 @@ class GPTModel extends tf.LayersModel { for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) { let averageLoss = 0 let averageWeightUpdateTime = 0 - let averageMemory = 0 let iteration = 1 const iterator = await dataset.iterator() @@ -88,12 +95,12 @@ class GPTModel extends tf.LayersModel { } return tf.losses.softmaxCrossEntropy(ys, logits) } - let peakMemory + let currentMemory = 0 const lossTensor = tf.tidy(() => { const { grads, value: lossTensor } = this.optimizer.computeGradients(lossFn) const gradsClipped = clipByGlobalNormObj(grads, 1) this.optimizer.applyGradients(gradsClipped) - peakMemory = tf.memory().numBytes / 1024 / 1024 + currentMemory = tf.memory().numBytes / 1024 / 1024 / 1024 return lossTensor }) @@ -101,20 +108,21 @@ class GPTModel extends tf.LayersModel { averageLoss += loss weightUpdateTime = performance.now() - weightUpdateTime averageWeightUpdateTime += weightUpdateTime - peakMemory = peakMemory ?? 0 - averageMemory += peakMemory + if (currentMemory > this.peakMemory.value) { + console.log("Max memory", currentMemory) + this.peakMemory.value = currentMemory + } tf.dispose([xs, ys, lossTensor, next.value]) - // console.log( - // `Epoch: ${epoch}`, - // `\tStep: ${iteration} / ${this.config.maxIter}`, - // `\tLoss: ${loss.toFixed(3)}`, - // `\tPeak memory: ${peakMemory.toFixed(2)} MB`, - // `\tMemory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, - // `\tNumber of tensors allocated: ${tf.memory().numTensors}`, - // `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, - // `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms` - // ) + console.log( + `Epoch: ${epoch}`, + `\tStep: ${iteration} / ${this.config.maxIter}`, + `\tLoss: ${loss.toFixed(3)}`, + `\tPeak memory: ${this.peakMemory.value.toFixed(2)} GB`, + `\tNumber of tensors allocated: ${tf.memory().numTensors}`, + `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, + `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms` + ) if (evalDataset !== undefined && this.config.evaluateEvery !== undefined && iteration % this.config.evaluateEvery == 0) { @@ -132,7 +140,7 @@ class GPTModel extends tf.LayersModel { let logs: tf.Logs = { 'loss': averageLoss / iteration, 'weightUpdateTime': averageWeightUpdateTime / iteration, - 'memory': averageMemory / iteration + 'peakMemory': this.peakMemory.value } if (evalDataset !== undefined) { logs = { ...logs, ...await evaluate(this, evalDataset, this.config.maxEvalBatches) } @@ -186,7 +194,7 @@ function prepareIdx (idx: tf.TensorLike): tf.Tensor2D { * */ export class GPTForCausalLM extends GPTModel { - async generate (idxRaw: tf.TensorLike, conf: GenerateConfig, act?: (_: { idxNext: number[][], timePerToken: number }) => Promise): Promise { + async generate (idxRaw: tf.TensorLike, conf: GenerateConfig, act?: (_: { idxNext: number[][], timePerToken: number }) => void): Promise { const config = Object.assign({}, defaultGenerateConfig, conf) let idx = prepareIdx(idxRaw) for (let step = 0; step < config.maxNewTokens; step++) { @@ -207,23 +215,23 @@ export class GPTForCausalLM extends GPTModel { private generateOnce (model: tf.LayersModel, idx: tf.Tensor2D, config: GenerateConfig): { idxNext: tf.Tensor2D, timePerToken: number } { let timePerToken = performance.now() - const idxNext = tf.tidy(() => { + // slice input tokens if longer than context length const blockSize = this.config.blockSize - const idxCond = idx.shape[1] <= blockSize - ? idx : idx.slice([0, -blockSize], [-1, -1]) + idx = idx.shape[1] <= blockSize + ? idx : idx.slice([0, idx.shape[1] - blockSize]) - const output = model.predict(idxCond) + const output = model.predict(idx) if (Array.isArray(output)) throw new Error('The model outputs too multiple values') if (output.shape.length !== 3) throw new Error('The model outputs wrong shape') const logits = output as tf.Tensor3D - + timePerToken = performance.now() - timePerToken const logitsScaled = logits .slice([0, idx.shape[1] - 1, 0]) .reshape([logits.shape[0], logits.shape[2]]) .div(tf.scalar(config.temperature)) - const probs = logitsScaled.softmax(-1) + const probs = logitsScaled.softmax(-1) if (config.doSample) { return tf.multinomial(probs, 1) as tf.Tensor2D } else { @@ -236,4 +244,5 @@ export class GPTForCausalLM extends GPTModel { timePerToken } } + } diff --git a/discojs/discojs-core/src/models/model.ts b/discojs/discojs-core/src/models/model.ts index 7237dd777..2137cdf6d 100644 --- a/discojs/discojs-core/src/models/model.ts +++ b/discojs/discojs-core/src/models/model.ts @@ -14,7 +14,7 @@ export interface EpochLogs { accuracy: number }; weightUpdateTime?: number; - memory?: number; + peakMemory?: number; } // TODO still bound to tfjs diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index f03b5e880..d2a20862f 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -1,50 +1,73 @@ import type { Task } from '@epfml/discojs-core' import { fetchTasks, data, models } from '@epfml/discojs-core' -import { NodeTextLoader } from '@epfml/discojs-node' +import { NodeTextLoader, loadModelFromDisk } from '@epfml/discojs-node' import * as tf from '@tensorflow/tfjs' async function main(): Promise { // Launch a server instance const url = new URL('http://localhost:8080') - // Fetch the wikitext task from the server const tasks = await fetchTasks(url) const task = tasks.get('wikitext-103') if (task === undefined) { throw new Error('task not found') } - const config: models.GPTConfig = { - modelType: 'gpt-nano', - lr: 0.0001, - maxIter: 10, - evaluateEvery:10000, - maxEvalBatches: 10, - blockSize: 8, - vocabSize: 50258 - } - const modelType = 'gpt2'//['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] - const contextLength = 128 // [128, 256, 512, 1024, 2048] - const batchSize = 16 //[8, 16, 32, 64] + const BENCHMARK_TRAIN = true // if false benchmark inference + if (BENCHMARK_TRAIN) { + const config: models.GPTConfig = { + modelType: 'gpt-nano', + lr: 0.0001, + maxIter: 10, + evaluateEvery:10000, + maxEvalBatches: 10, + blockSize: 8, + vocabSize: 50258 + } + + const modelType = 'gpt-nano'//['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] + const contextLength = 256 // [128, 256, 512, 1024, 2048] + const batchSize = 8 //[8, 16, 32, 64] + + console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) + task.trainingInformation.batchSize = batchSize + config.modelType = modelType as models.GPTModelType + task.trainingInformation.maxSequenceLength = contextLength + config.blockSize = contextLength + console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) + // Reload the dataset to batch it with the right batch size + const dataset = await loadWikitextData(task) + const preprocessedDataset = dataset.train.preprocess().batch().dataset + const model = new models.GPT(config) + const logGenerator = model.train(preprocessedDataset, undefined, 1) + for await (const logs of logGenerator) { + const updateTime = logs.weightUpdateTime ?? 0 + const msPerToken = updateTime / batchSize / contextLength + console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.peakMemory?.toFixed(2)} GB`) + } + model.dispose() + } else { + + const model = await loadModelFromDisk(`models/model_random.json`) as models.GPT + // Retrieve the tokenizer used during training + const tokenizer = await models.getTaskTokenizer(task) + const prompt = 'The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion,' + const nbNewTokens = 200 + const iterations = 10 + console.log("Prompt token size", tokenizer(prompt)['input_ids']['size']) + console.log("Number new tokens", nbNewTokens) - // TODO make sure the loss is not NAN anymore when training - - console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) - task.trainingInformation.batchSize = batchSize - config.modelType = modelType as models.GPTModelType - task.trainingInformation.maxSequenceLength = contextLength - config.blockSize = contextLength - console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) - // Reload the dataset to batch it with the right batch size - const dataset = await loadWikitextData(task) - const preprocessedDataset = dataset.train.preprocess().batch().dataset - const model = new models.GPT(config) - const logGenerator = model.train(preprocessedDataset, undefined, 1) - for await (const logs of logGenerator) { - const updateTime = logs.weightUpdateTime ?? 0 - const msPerToken = updateTime / batchSize / contextLength - console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.memory?.toFixed(0)} MB`) + let inferenceTime = 0 + let iterationAvgTokenTime = 0 + for (let i = 0; i < iterations; i++) { + const timeStart = performance.now() + const { generation, avgTokenTime } = await model.generate(prompt, tokenizer, nbNewTokens) + inferenceTime += performance.now() - timeStart + iterationAvgTokenTime += avgTokenTime + } + // Overall average includes tokenization, token sampling and de-tokenization + console.log(`Overall average: ${(inferenceTime/ nbNewTokens / iterations).toFixed(2)} ms/token`) + console.log(`token inference only: ${(iterationAvgTokenTime / iterations).toFixed(2)} ms/token`) } - model.dispose() - + } async function loadWikitextData (task: Task): Promise { diff --git a/docs/examples/wikitext.ts b/docs/examples/wikitext.ts index 7b46831fb..95d188108 100644 --- a/docs/examples/wikitext.ts +++ b/docs/examples/wikitext.ts @@ -47,8 +47,8 @@ async function main(): Promise { // Retrieve the tokenizer used during training const tokenizer = await models.getTaskTokenizer(task) const prompt = 'The game began development in 2010 , carrying over a large portion' - const generations = await model.generate(prompt, tokenizer) - console.log(generations) + const { generation, avgTokenTime } = await model.generate(prompt, tokenizer) + console.log(generation) } async function loadWikitextData (task: Task): Promise { From fd8d35208fd45c4377a7665d4d7c0c0403feed24 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 14:08:37 +0200 Subject: [PATCH 05/17] Fix training loop end condition mistake --- discojs/discojs-core/src/models/gpt/index.ts | 4 +- discojs/discojs-core/src/models/gpt/model.ts | 48 ++++++++++---------- docs/examples/benchmark_gpt.ts | 5 +- 3 files changed, 30 insertions(+), 27 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/index.ts b/discojs/discojs-core/src/models/gpt/index.ts index 61e947f5c..411740464 100644 --- a/discojs/discojs-core/src/models/gpt/index.ts +++ b/discojs/discojs-core/src/models/gpt/index.ts @@ -96,15 +96,13 @@ export class GPT extends Model { doSample: false } let avgTimePerToken = 0 - let tokenCount = 0 const predictedTokens = await this.model.generate(tokens, generationConfig, (res) => { avgTimePerToken += res.timePerToken - tokenCount += 1 }) const generatedWords = tokenizer.decode(predictedTokens[0]) return { generation: generatedWords, - avgTokenTime: avgTimePerToken / tokenCount + avgTokenTime: avgTimePerToken / generationConfig.maxNewTokens } } diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index 67b4b3893..359f5559b 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -68,20 +68,18 @@ class GPTModel extends tf.LayersModel { async fitDataset(dataset: Dataset, trainingArgs: tf.ModelFitDatasetArgs): Promise { const callbacks = trainingArgs.callbacks as tf.CustomCallbackArgs const evalDataset = trainingArgs.validationData as tf.data.Dataset<{ xs: tf.Tensor2D, ys: tf.Tensor3D }> - + console.log(`Begin train - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) await callbacks.onTrainBegin?.() for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) { let averageLoss = 0 let averageWeightUpdateTime = 0 let iteration = 1 const iterator = await dataset.iterator() + let preprocessingTime = performance.now() + let next = await iterator.next() + preprocessingTime = performance.now() - preprocessingTime - let continueTraining = true - while (continueTraining) { - let preprocessingTime = performance.now() - const next = await iterator.next() - preprocessingTime = performance.now() - preprocessingTime - + while (next.done !== true && iteration <= this.config.maxIter) { let weightUpdateTime = performance.now() await callbacks.onEpochBegin?.(epoch) const { xs, ys } = next.value as { xs: tf.Tensor2D, ys: tf.Tensor3D } @@ -112,8 +110,17 @@ class GPTModel extends tf.LayersModel { console.log("Max memory", currentMemory) this.peakMemory.value = currentMemory } - tf.dispose([xs, ys, lossTensor, next.value]) + tf.dispose([xs, ys, lossTensor]) + + if ( + evalDataset !== undefined && + this.config.evaluateEvery !== undefined && + iteration % this.config.evaluateEvery == 0 + ){ + const iterationLogs = await evaluate(this, evalDataset, this.config.maxEvalBatches) + console.log(iterationLogs) + } console.log( `Epoch: ${epoch}`, `\tStep: ${iteration} / ${this.config.maxIter}`, @@ -123,19 +130,13 @@ class GPTModel extends tf.LayersModel { `\tPreprocessing time: ${preprocessingTime.toFixed(0)} ms`, `\tWeight update time: ${weightUpdateTime.toFixed(0)} ms` ) - - if (evalDataset !== undefined && this.config.evaluateEvery !== undefined - && iteration % this.config.evaluateEvery == 0) { - const logs = await evaluate(this, evalDataset, this.config.maxEvalBatches) - console.log(logs) - } iteration++ - continueTraining = next.done !== true && iteration <= this.config.maxIter - // If we reached the last iteration, cleanup the tensors - if (next.done != true && iteration > this.config.maxIter) { - const { xs, ys } = next.value as { xs: tf.Tensor2D, ys: tf.Tensor3D } - tf.dispose([xs, ys]) - } + next = await iterator.next() + } + // Memory leak: If we reached the last iteration rather than the end of the dataset, cleanup the tensors + if (next.done != true && iteration > this.config.maxIter) { + const { xs, ys } = next.value as { xs: tf.Tensor2D, ys: tf.Tensor3D } + tf.dispose([xs, ys]) } let logs: tf.Logs = { 'loss': averageLoss / iteration, @@ -150,6 +151,8 @@ class GPTModel extends tf.LayersModel { } await callbacks.onTrainEnd?.() + console.log(`End train - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) + return new tf.History() } } @@ -220,7 +223,7 @@ export class GPTForCausalLM extends GPTModel { const blockSize = this.config.blockSize idx = idx.shape[1] <= blockSize ? idx : idx.slice([0, idx.shape[1] - blockSize]) - + const output = model.predict(idx) if (Array.isArray(output)) throw new Error('The model outputs too multiple values') if (output.shape.length !== 3) throw new Error('The model outputs wrong shape') @@ -231,7 +234,7 @@ export class GPTForCausalLM extends GPTModel { .slice([0, idx.shape[1] - 1, 0]) .reshape([logits.shape[0], logits.shape[2]]) .div(tf.scalar(config.temperature)) - const probs = logitsScaled.softmax(-1) + const probs = logitsScaled.softmax(-1) if (config.doSample) { return tf.multinomial(probs, 1) as tf.Tensor2D } else { @@ -244,5 +247,4 @@ export class GPTForCausalLM extends GPTModel { timePerToken } } - } diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index d2a20862f..80b05a761 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -44,6 +44,9 @@ async function main(): Promise { console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.peakMemory?.toFixed(2)} GB`) } model.dispose() + + // Check for memory leak. Currently, there are a few tensors that are still not disposed + console.log(`End loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) } else { const model = await loadModelFromDisk(`models/model_random.json`) as models.GPT @@ -80,4 +83,4 @@ async function loadWikitextData (task: Task): Promise { } // You can run this example with "npm start" from this folder -main().catch(console.error) \ No newline at end of file +main().catch(console.error) From be413960fb4526bba5bb2637546170d28a1c5694 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 14:15:24 +0200 Subject: [PATCH 06/17] Fix linting errors --- discojs/discojs-core/src/models/gpt/config.ts | 18 +++++++----------- discojs/discojs-core/src/models/gpt/model.ts | 4 ++-- docs/examples/benchmark_gpt.ts | 6 +++--- docs/examples/package.json | 1 + docs/examples/wikitext.ts | 2 +- 5 files changed, 14 insertions(+), 17 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/config.ts b/discojs/discojs-core/src/models/gpt/config.ts index a8368af64..f9609e4b3 100644 --- a/discojs/discojs-core/src/models/gpt/config.ts +++ b/discojs/discojs-core/src/models/gpt/config.ts @@ -1,11 +1,11 @@ export type GPTModelType = - | 'gpt2' - | 'gpt2-medium' - | 'gpt2-large' - | 'gpt2-xl' - | 'gpt-mini' - | 'gpt-micro' - | 'gpt-nano' + | 'gpt2' + | 'gpt2-medium' + | 'gpt2-large' + | 'gpt2-xl' + | 'gpt-mini' + | 'gpt-micro' + | 'gpt-nano' export interface GPTConfig { lr: number @@ -77,9 +77,5 @@ export function getModelSizes (modelType: GPTModelType): Required { return { nLayer: 4, nHead: 4, nEmbd: 128 } case 'gpt-nano': return { nLayer: 3, nHead: 3, nEmbd: 48 } - default: { - const _: never = modelType - throw new Error(`GPT model type specified doesn't exist: ${modelType}`) - } } } diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index 359f5559b..ec437e428 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -49,7 +49,7 @@ class GPTModel extends tf.LayersModel { // Some tensors are not cleaned up when model.dispose is called // So we dispose them manually disposeRefs() { - for (let tensorContainer of this.disposalRefs) { + for (const tensorContainer of this.disposalRefs) { tf.dispose([tensorContainer]) } } @@ -208,7 +208,7 @@ export class GPTForCausalLM extends GPTModel { const idxNextArr = await idxNext.array() tf.dispose(idxNext) if (act !== undefined) { - await act({ idxNext: idxNextArr, timePerToken }) + act({ idxNext: idxNextArr, timePerToken }) } } const idxArr = await idx.array() diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index 80b05a761..258dbacd0 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -45,7 +45,7 @@ async function main(): Promise { } model.dispose() - // Check for memory leak. Currently, there are a few tensors that are still not disposed + // Check for memory leak. Currently, there are a few tensors that are still not disposed (one per attention layer in the model) console.log(`End loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) } else { @@ -55,14 +55,14 @@ async function main(): Promise { const prompt = 'The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion,' const nbNewTokens = 200 const iterations = 10 - console.log("Prompt token size", tokenizer(prompt)['input_ids']['size']) + console.log("Prompt token size", (tokenizer(prompt) as {input_ids: number[]}).input_ids.length) console.log("Number new tokens", nbNewTokens) let inferenceTime = 0 let iterationAvgTokenTime = 0 for (let i = 0; i < iterations; i++) { const timeStart = performance.now() - const { generation, avgTokenTime } = await model.generate(prompt, tokenizer, nbNewTokens) + const { generation: _, avgTokenTime } = await model.generate(prompt, tokenizer, nbNewTokens) inferenceTime += performance.now() - timeStart iterationAvgTokenTime += avgTokenTime } diff --git a/docs/examples/package.json b/docs/examples/package.json index d5b05106b..6c75c7c16 100644 --- a/docs/examples/package.json +++ b/docs/examples/package.json @@ -7,6 +7,7 @@ "train": "npm run build && node dist/training.js", "custom_task": "npm run build && node dist/custom_task.js", "language_model": "npm run build && node dist/wikitext.js", + "benchmark_gpt": "npm run build && node dist/benchmark_gpt.js", "build": "tsc", "lint": "npx eslint .", "test": "npm run train" diff --git a/docs/examples/wikitext.ts b/docs/examples/wikitext.ts index 95d188108..61b83ffb7 100644 --- a/docs/examples/wikitext.ts +++ b/docs/examples/wikitext.ts @@ -47,7 +47,7 @@ async function main(): Promise { // Retrieve the tokenizer used during training const tokenizer = await models.getTaskTokenizer(task) const prompt = 'The game began development in 2010 , carrying over a large portion' - const { generation, avgTokenTime } = await model.generate(prompt, tokenizer) + const { generation, avgTokenTime: _ } = await model.generate(prompt, tokenizer) console.log(generation) } From 3dd8d8d404acd6d97d233216c1cadc0e586bb467 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 14:18:38 +0200 Subject: [PATCH 07/17] Cleanup console.logs --- discojs/discojs-core/src/models/gpt/model.ts | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index ec437e428..bad51532f 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -68,8 +68,8 @@ class GPTModel extends tf.LayersModel { async fitDataset(dataset: Dataset, trainingArgs: tf.ModelFitDatasetArgs): Promise { const callbacks = trainingArgs.callbacks as tf.CustomCallbackArgs const evalDataset = trainingArgs.validationData as tf.data.Dataset<{ xs: tf.Tensor2D, ys: tf.Tensor3D }> - console.log(`Begin train - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) await callbacks.onTrainBegin?.() + for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) { let averageLoss = 0 let averageWeightUpdateTime = 0 @@ -93,12 +93,12 @@ class GPTModel extends tf.LayersModel { } return tf.losses.softmaxCrossEntropy(ys, logits) } - let currentMemory = 0 + let backwardPassMemory = 0 const lossTensor = tf.tidy(() => { const { grads, value: lossTensor } = this.optimizer.computeGradients(lossFn) const gradsClipped = clipByGlobalNormObj(grads, 1) this.optimizer.applyGradients(gradsClipped) - currentMemory = tf.memory().numBytes / 1024 / 1024 / 1024 + backwardPassMemory = tf.memory().numBytes / 1024 / 1024 / 1024 return lossTensor }) @@ -106,9 +106,10 @@ class GPTModel extends tf.LayersModel { averageLoss += loss weightUpdateTime = performance.now() - weightUpdateTime averageWeightUpdateTime += weightUpdateTime - if (currentMemory > this.peakMemory.value) { - console.log("Max memory", currentMemory) - this.peakMemory.value = currentMemory + // Probably never the case. Empirically the attention mechanism always allocates + // more memory than the backward pass + if (backwardPassMemory > this.peakMemory.value) { + this.peakMemory.value = backwardPassMemory } tf.dispose([xs, ys, lossTensor]) @@ -151,8 +152,6 @@ class GPTModel extends tf.LayersModel { } await callbacks.onTrainEnd?.() - console.log(`End train - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) - return new tf.History() } } From c5a667458114e03d29adbcab1aaf45e3086f72ff Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 14:19:50 +0200 Subject: [PATCH 08/17] Fix gpt test --- discojs/discojs-core/src/models/gpt/gpt.spec.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/discojs/discojs-core/src/models/gpt/gpt.spec.ts b/discojs/discojs-core/src/models/gpt/gpt.spec.ts index 7f8c090cb..b86ccd7a6 100644 --- a/discojs/discojs-core/src/models/gpt/gpt.spec.ts +++ b/discojs/discojs-core/src/models/gpt/gpt.spec.ts @@ -37,7 +37,7 @@ describe('gpt-tfjs', function() { const model = new GPT(config) const logGenerator = model.train(tokenDataset, undefined, 5) // 5 epochs for await (const _ of logGenerator); // Await the end of training - const generation = await model.generate("Lorem ipsum dolor", tokenizer, 1) + const { generation, avgTokenTime: _ } = await model.generate("Lorem ipsum dolor", tokenizer, 1) console.log(generation) expect(generation).equal(data) // Assert that the model completes 'Lorem ipsum dolor' with 'sit' }) From cc83ad3d9679f2b7d3efebbe17c3bccd4bdf5ad8 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Thu, 18 Apr 2024 14:32:07 +0200 Subject: [PATCH 09/17] Add benchmark PR link --- discojs/discojs-core/src/models/gpt/config.ts | 2 +- docs/examples/benchmark_gpt.ts | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/discojs/discojs-core/src/models/gpt/config.ts b/discojs/discojs-core/src/models/gpt/config.ts index f9609e4b3..a2f508936 100644 --- a/discojs/discojs-core/src/models/gpt/config.ts +++ b/discojs/discojs-core/src/models/gpt/config.ts @@ -30,7 +30,7 @@ export interface GPTConfig { nHead?: number nEmbd?: number } - +// for a benchmark of performance, see https://github.com/epfml/disco/pull/659 export const DEFAULT_CONFIG: Required = { name: 'transformer', lr: 0.001, diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index 258dbacd0..14ec959d5 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -3,6 +3,10 @@ import { fetchTasks, data, models } from '@epfml/discojs-core' import { NodeTextLoader, loadModelFromDisk } from '@epfml/discojs-node' import * as tf from '@tensorflow/tfjs' +/** + * Benchmark results are reported in https://github.com/epfml/disco/pull/659 + */ + async function main(): Promise { // Launch a server instance const url = new URL('http://localhost:8080') From f4784fceb9e8834e06ec8761d43f1b9180c37d3b Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sat, 20 Apr 2024 16:53:31 +0200 Subject: [PATCH 10/17] Clean benchmarking code --- discojs/discojs-core/src/models/gpt/config.ts | 2 +- .../discojs-core/src/models/gpt/gpt.spec.ts | 2 +- discojs/discojs-core/src/models/gpt/index.ts | 25 ++++---------- discojs/discojs-core/src/models/gpt/model.ts | 18 +++------- discojs/discojs-core/src/models/index.ts | 2 +- discojs/discojs-core/src/models/model.ts | 3 +- discojs/discojs-core/src/models/tfjs.ts | 23 +++++++++---- docs/examples/benchmark_gpt.ts | 33 ++++++++++--------- docs/examples/wikitext.ts | 2 +- 9 files changed, 49 insertions(+), 61 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/config.ts b/discojs/discojs-core/src/models/gpt/config.ts index a2f508936..343f2949e 100644 --- a/discojs/discojs-core/src/models/gpt/config.ts +++ b/discojs/discojs-core/src/models/gpt/config.ts @@ -1,4 +1,4 @@ -export type GPTModelType = +type GPTModelType = | 'gpt2' | 'gpt2-medium' | 'gpt2-large' diff --git a/discojs/discojs-core/src/models/gpt/gpt.spec.ts b/discojs/discojs-core/src/models/gpt/gpt.spec.ts index b86ccd7a6..7f8c090cb 100644 --- a/discojs/discojs-core/src/models/gpt/gpt.spec.ts +++ b/discojs/discojs-core/src/models/gpt/gpt.spec.ts @@ -37,7 +37,7 @@ describe('gpt-tfjs', function() { const model = new GPT(config) const logGenerator = model.train(tokenDataset, undefined, 5) // 5 epochs for await (const _ of logGenerator); // Await the end of training - const { generation, avgTokenTime: _ } = await model.generate("Lorem ipsum dolor", tokenizer, 1) + const generation = await model.generate("Lorem ipsum dolor", tokenizer, 1) console.log(generation) expect(generation).equal(data) // Assert that the model completes 'Lorem ipsum dolor' with 'sit' }) diff --git a/discojs/discojs-core/src/models/gpt/index.ts b/discojs/discojs-core/src/models/gpt/index.ts index 411740464..337e4733f 100644 --- a/discojs/discojs-core/src/models/gpt/index.ts +++ b/discojs/discojs-core/src/models/gpt/index.ts @@ -46,14 +46,15 @@ export class GPT extends Model { for (let epoch = 0; epoch < epochs; epoch++) { await this.model.fitDataset(trainingData, trainingArgs); if (logs === undefined) { - throw new Error("epoch didn't gave any logs"); + throw new Error("Epoch didn't gave any logs"); } - const { loss, val_acc, val_loss, weightUpdateTime, peakMemory } = logs; + const { loss, val_acc, val_loss, peakMemory } = logs; if (loss === undefined || isNaN(loss)) { throw new Error("Training loss is undefined or nan"); } const structuredLogs: EpochLogs = { epoch, + peakMemory, training: { loss: logs.loss } @@ -66,13 +67,6 @@ export class GPT extends Model { } structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss} } - if (weightUpdateTime !== undefined && !isNaN(weightUpdateTime)) { - structuredLogs['weightUpdateTime'] = weightUpdateTime - } - if (peakMemory !== undefined && !isNaN(peakMemory)) { - structuredLogs['peakMemory'] = peakMemory - } - yield structuredLogs } } @@ -86,8 +80,7 @@ export class GPT extends Model { return Promise.resolve(ret) } - async generate(input: string, tokenizer: PreTrainedTokenizer, newTokens: number = 10): - Promise<{ generation: string, avgTokenTime: number }> { + async generate(input: string, tokenizer: PreTrainedTokenizer, newTokens: number = 10): Promise { const { input_ids: tokens } = await tokenizer(input, { return_tensor: false}) as { input_ids: number[] } const generationConfig = { @@ -95,15 +88,9 @@ export class GPT extends Model { temperature: 1.0, doSample: false } - let avgTimePerToken = 0 - const predictedTokens = await this.model.generate(tokens, generationConfig, (res) => { - avgTimePerToken += res.timePerToken - }) + const predictedTokens = await this.model.generate(tokens, generationConfig) const generatedWords = tokenizer.decode(predictedTokens[0]) - return { - generation: generatedWords, - avgTokenTime: avgTimePerToken / generationConfig.maxNewTokens - } + return generatedWords } get config (): Required { diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index bad51532f..87285d97e 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -141,7 +141,6 @@ class GPTModel extends tf.LayersModel { } let logs: tf.Logs = { 'loss': averageLoss / iteration, - 'weightUpdateTime': averageWeightUpdateTime / iteration, 'peakMemory': this.peakMemory.value } if (evalDataset !== undefined) { @@ -196,27 +195,23 @@ function prepareIdx (idx: tf.TensorLike): tf.Tensor2D { * */ export class GPTForCausalLM extends GPTModel { - async generate (idxRaw: tf.TensorLike, conf: GenerateConfig, act?: (_: { idxNext: number[][], timePerToken: number }) => void): Promise { + async generate (idxRaw: tf.TensorLike, conf: GenerateConfig): Promise { const config = Object.assign({}, defaultGenerateConfig, conf) let idx = prepareIdx(idxRaw) for (let step = 0; step < config.maxNewTokens; step++) { - const { idxNext, timePerToken } = this.generateOnce(this, idx, config) + const idxNext = this.generateOnce(this, idx, config) const idxNew = idx.concat(idxNext, 1) tf.dispose(idx) idx = idxNew const idxNextArr = await idxNext.array() tf.dispose(idxNext) - if (act !== undefined) { - act({ idxNext: idxNextArr, timePerToken }) - } } const idxArr = await idx.array() tf.dispose(idx) return idxArr } - private generateOnce (model: tf.LayersModel, idx: tf.Tensor2D, config: GenerateConfig): { idxNext: tf.Tensor2D, timePerToken: number } { - let timePerToken = performance.now() + private generateOnce (model: tf.LayersModel, idx: tf.Tensor2D, config: GenerateConfig): tf.Tensor2D { const idxNext = tf.tidy(() => { // slice input tokens if longer than context length const blockSize = this.config.blockSize @@ -228,7 +223,6 @@ export class GPTForCausalLM extends GPTModel { if (output.shape.length !== 3) throw new Error('The model outputs wrong shape') const logits = output as tf.Tensor3D - timePerToken = performance.now() - timePerToken const logitsScaled = logits .slice([0, idx.shape[1] - 1, 0]) .reshape([logits.shape[0], logits.shape[2]]) @@ -240,10 +234,6 @@ export class GPTForCausalLM extends GPTModel { return probs.argMax(-1).expandDims(1) } }) - - return { - idxNext, - timePerToken - } + return idxNext } } diff --git a/discojs/discojs-core/src/models/index.ts b/discojs/discojs-core/src/models/index.ts index f6c1de092..aefb9b5fa 100644 --- a/discojs/discojs-core/src/models/index.ts +++ b/discojs/discojs-core/src/models/index.ts @@ -1,6 +1,6 @@ export { EpochLogs, Model } from './model.js' export { GPT } from './gpt/index.js' -export { GPTConfig, GPTModelType } from './gpt/config.js' +export { GPTConfig } from './gpt/config.js' export { TFJS } from './tfjs.js' export { getTaskTokenizer } from './tokenizer.js' diff --git a/discojs/discojs-core/src/models/model.ts b/discojs/discojs-core/src/models/model.ts index 2137cdf6d..b087a2121 100644 --- a/discojs/discojs-core/src/models/model.ts +++ b/discojs/discojs-core/src/models/model.ts @@ -13,8 +13,7 @@ export interface EpochLogs { loss: number, accuracy: number }; - weightUpdateTime?: number; - peakMemory?: number; + peakMemory: number; } // TODO still bound to tfjs diff --git a/discojs/discojs-core/src/models/tfjs.ts b/discojs/discojs-core/src/models/tfjs.ts index 191bf7aef..de5e219b9 100644 --- a/discojs/discojs-core/src/models/tfjs.ts +++ b/discojs/discojs-core/src/models/tfjs.ts @@ -34,23 +34,31 @@ export class TFJS extends Model { ): AsyncGenerator { for (let epoch = 0; epoch < epochs; epoch++) { let logs: tf.Logs | undefined; - + let peakMemory = 0 await this.model.fitDataset(trainingData, { epochs: 1, validationData, - callbacks: { onEpochEnd: (_, cur) => { logs = cur } }, + callbacks: { + onBatchEnd: (_) => { + let currentMemory = tf.memory().numBytes + if (currentMemory > peakMemory) { + peakMemory = currentMemory + } + }, + onEpochEnd: (_, cur) => { logs = cur } + }, }); if (logs === undefined) { - throw new Error("epoch didn't gave any logs"); + throw new Error("Epoch didn't gave any logs"); } const { loss, acc, val_acc, val_loss } = logs; - console.log(logs) if (loss === undefined || isNaN(loss) || acc === undefined || isNaN(acc)) { - throw new Error("Invalid training logs"); + throw new Error("Training loss is undefined or nan"); } const structuredLogs: EpochLogs = { epoch, + peakMemory, training: { loss: logs.loss, accuracy: logs.acc, @@ -61,7 +69,10 @@ export class TFJS extends Model { val_acc === undefined || isNaN(val_acc)) { throw new Error("Invalid validation logs"); } - structuredLogs.validation = { accuracy: logs.val_acc, loss: logs.val_loss} + structuredLogs.validation = { + accuracy: logs.val_acc, + loss: logs.val_loss + } } yield structuredLogs } diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index 14ec959d5..d22921fb5 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -21,19 +21,20 @@ async function main(): Promise { modelType: 'gpt-nano', lr: 0.0001, maxIter: 10, - evaluateEvery:10000, - maxEvalBatches: 10, blockSize: 8, vocabSize: 50258 } - const modelType = 'gpt-nano'//['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] - const contextLength = 256 // [128, 256, 512, 1024, 2048] - const batchSize = 8 //[8, 16, 32, 64] + if (config.maxIter === undefined) { + throw new Error("The maximum number of iterations per epoch should be specified in the GPTConfig") + } + const modelType = 'gpt-nano' as const //['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] + const contextLength = 512 // [128, 256, 512, 1024, 2048] + const batchSize = 16 //[8, 16, 32, 64] console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) task.trainingInformation.batchSize = batchSize - config.modelType = modelType as models.GPTModelType + config.modelType = modelType task.trainingInformation.maxSequenceLength = contextLength config.blockSize = contextLength console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) @@ -41,19 +42,22 @@ async function main(): Promise { const dataset = await loadWikitextData(task) const preprocessedDataset = dataset.train.preprocess().batch().dataset const model = new models.GPT(config) - const logGenerator = model.train(preprocessedDataset, undefined, 1) + let epochTime = performance.now() + const logGenerator = model.train(preprocessedDataset, undefined, 1) // Only one epoch for await (const logs of logGenerator) { - const updateTime = logs.weightUpdateTime ?? 0 - const msPerToken = updateTime / batchSize / contextLength - console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.peakMemory?.toFixed(2)} GB`) + epochTime = (performance.now() - epochTime) + const msPerToken = epochTime / (batchSize * contextLength * config.maxIter) + console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.peakMemory.toFixed(2)} GB`) } model.dispose() - // Check for memory leak. Currently, there are a few tensors that are still not disposed (one per attention layer in the model) console.log(`End loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) } else { - const model = await loadModelFromDisk(`models/model_random.json`) as models.GPT + const model = await loadModelFromDisk(`models/model_random.json`) + if (!(model instanceof models.GPT)){ + throw new Error("Loaded model isn't a GPT model") + } // Retrieve the tokenizer used during training const tokenizer = await models.getTaskTokenizer(task) const prompt = 'The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion,' @@ -63,16 +67,13 @@ async function main(): Promise { console.log("Number new tokens", nbNewTokens) let inferenceTime = 0 - let iterationAvgTokenTime = 0 for (let i = 0; i < iterations; i++) { const timeStart = performance.now() - const { generation: _, avgTokenTime } = await model.generate(prompt, tokenizer, nbNewTokens) + const _ = await model.generate(prompt, tokenizer, nbNewTokens) inferenceTime += performance.now() - timeStart - iterationAvgTokenTime += avgTokenTime } // Overall average includes tokenization, token sampling and de-tokenization console.log(`Overall average: ${(inferenceTime/ nbNewTokens / iterations).toFixed(2)} ms/token`) - console.log(`token inference only: ${(iterationAvgTokenTime / iterations).toFixed(2)} ms/token`) } } diff --git a/docs/examples/wikitext.ts b/docs/examples/wikitext.ts index 61b83ffb7..e76edc65f 100644 --- a/docs/examples/wikitext.ts +++ b/docs/examples/wikitext.ts @@ -47,7 +47,7 @@ async function main(): Promise { // Retrieve the tokenizer used during training const tokenizer = await models.getTaskTokenizer(task) const prompt = 'The game began development in 2010 , carrying over a large portion' - const { generation, avgTokenTime: _ } = await model.generate(prompt, tokenizer) + const generation = await model.generate(prompt, tokenizer) console.log(generation) } From d0f13229ae179ea5b6282b146688ec1c6fa323bb Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sat, 20 Apr 2024 17:25:25 +0200 Subject: [PATCH 11/17] Fix linting errors --- discojs/discojs-core/src/models/gpt/model.ts | 3 -- discojs/discojs-core/src/models/tfjs.ts | 2 +- .../src/training/trainer/trainer.ts | 1 + docs/examples/benchmark_gpt.ts | 39 +++++++++++-------- 4 files changed, 24 insertions(+), 21 deletions(-) diff --git a/discojs/discojs-core/src/models/gpt/model.ts b/discojs/discojs-core/src/models/gpt/model.ts index 87285d97e..13e725749 100644 --- a/discojs/discojs-core/src/models/gpt/model.ts +++ b/discojs/discojs-core/src/models/gpt/model.ts @@ -72,7 +72,6 @@ class GPTModel extends tf.LayersModel { for (let epoch = 1; epoch <= trainingArgs.epochs; epoch++) { let averageLoss = 0 - let averageWeightUpdateTime = 0 let iteration = 1 const iterator = await dataset.iterator() let preprocessingTime = performance.now() @@ -105,7 +104,6 @@ class GPTModel extends tf.LayersModel { const loss = await lossTensor.array() averageLoss += loss weightUpdateTime = performance.now() - weightUpdateTime - averageWeightUpdateTime += weightUpdateTime // Probably never the case. Empirically the attention mechanism always allocates // more memory than the backward pass if (backwardPassMemory > this.peakMemory.value) { @@ -203,7 +201,6 @@ export class GPTForCausalLM extends GPTModel { const idxNew = idx.concat(idxNext, 1) tf.dispose(idx) idx = idxNew - const idxNextArr = await idxNext.array() tf.dispose(idxNext) } const idxArr = await idx.array() diff --git a/discojs/discojs-core/src/models/tfjs.ts b/discojs/discojs-core/src/models/tfjs.ts index de5e219b9..cfd54a0c2 100644 --- a/discojs/discojs-core/src/models/tfjs.ts +++ b/discojs/discojs-core/src/models/tfjs.ts @@ -40,7 +40,7 @@ export class TFJS extends Model { validationData, callbacks: { onBatchEnd: (_) => { - let currentMemory = tf.memory().numBytes + const currentMemory = tf.memory().numBytes / 1024 / 1024 / 1024 // GB if (currentMemory > peakMemory) { peakMemory = currentMemory } diff --git a/discojs/discojs-core/src/training/trainer/trainer.ts b/discojs/discojs-core/src/training/trainer/trainer.ts index ed1f5bd5b..f4c74d3fa 100644 --- a/discojs/discojs-core/src/training/trainer/trainer.ts +++ b/discojs/discojs-core/src/training/trainer/trainer.ts @@ -66,6 +66,7 @@ export abstract class Trainer { round: logs.epoch, epochs: List.of(logs), }; + console.log(logs.peakMemory) if (logs.epoch % this.#roundDuration === 0) { const round = Math.trunc(logs.epoch / this.#roundDuration); diff --git a/docs/examples/benchmark_gpt.ts b/docs/examples/benchmark_gpt.ts index d22921fb5..fdb4be67b 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/docs/examples/benchmark_gpt.ts @@ -17,43 +17,48 @@ async function main(): Promise { const BENCHMARK_TRAIN = true // if false benchmark inference if (BENCHMARK_TRAIN) { + + // Benchmark parameters + const epoch = 1 + const iterationsPerEpoch = 10 + + // Model parameters to benchmark + const modelType = 'gpt-nano' //['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] + const contextLength = 512 // [128, 256, 512, 1024, 2048] + const batchSize = 16 //[8, 16, 32, 64] + const config: models.GPTConfig = { - modelType: 'gpt-nano', + modelType: modelType, lr: 0.0001, - maxIter: 10, - blockSize: 8, + maxIter: iterationsPerEpoch, + blockSize: contextLength, vocabSize: 50258 } + console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) - if (config.maxIter === undefined) { - throw new Error("The maximum number of iterations per epoch should be specified in the GPTConfig") - } - const modelType = 'gpt-nano' as const //['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] - const contextLength = 512 // [128, 256, 512, 1024, 2048] - const batchSize = 16 //[8, 16, 32, 64] - console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) + // Load the dataset after choosing the batch size and max sequence length + // to make sure the dataset is batched and tokenized correctly task.trainingInformation.batchSize = batchSize - config.modelType = modelType task.trainingInformation.maxSequenceLength = contextLength - config.blockSize = contextLength - console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) - // Reload the dataset to batch it with the right batch size const dataset = await loadWikitextData(task) const preprocessedDataset = dataset.train.preprocess().batch().dataset + + // Init and train the model const model = new models.GPT(config) + console.log(`\tmodel type ${modelType} \n\tbatch size ${batchSize} \n\tcontext length ${contextLength}`) + let epochTime = performance.now() - const logGenerator = model.train(preprocessedDataset, undefined, 1) // Only one epoch + const logGenerator = model.train(preprocessedDataset, undefined, epoch) for await (const logs of logGenerator) { epochTime = (performance.now() - epochTime) - const msPerToken = epochTime / (batchSize * contextLength * config.maxIter) + const msPerToken = epochTime / (batchSize * contextLength * iterationsPerEpoch * epoch) console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.peakMemory.toFixed(2)} GB`) } model.dispose() // Check for memory leak. Currently, there are a few tensors that are still not disposed (one per attention layer in the model) console.log(`End loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) } else { - const model = await loadModelFromDisk(`models/model_random.json`) if (!(model instanceof models.GPT)){ throw new Error("Loaded model isn't a GPT model") From a02e65164be1a4d4b0b5b8cb75c28edf7fe2d12c Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sat, 20 Apr 2024 18:02:37 +0200 Subject: [PATCH 12/17] Move benchmark to cli --- cli/package.json | 1 + {docs/examples => cli/src}/benchmark_gpt.ts | 93 ++++++++++++++------- 2 files changed, 64 insertions(+), 30 deletions(-) rename {docs/examples => cli/src}/benchmark_gpt.ts (51%) diff --git a/cli/package.json b/cli/package.json index 09052f1ab..51d9c6d04 100644 --- a/cli/package.json +++ b/cli/package.json @@ -6,6 +6,7 @@ "scripts": { "watch": "nodemon --ext ts --ignore dist --watch ../discojs/discojs-node/dist --watch ../server/dist --watch . --exec npm run", "start": "npm run build && node dist/cli.js", + "benchmark_gpt": "npm run build && node dist/benchmark_gpt.js", "build": "tsc", "lint": "npx eslint .", "test": ": nothing" diff --git a/docs/examples/benchmark_gpt.ts b/cli/src/benchmark_gpt.ts similarity index 51% rename from docs/examples/benchmark_gpt.ts rename to cli/src/benchmark_gpt.ts index fdb4be67b..c978d666a 100644 --- a/docs/examples/benchmark_gpt.ts +++ b/cli/src/benchmark_gpt.ts @@ -1,43 +1,71 @@ +import { parse } from 'ts-command-line-args'; import type { Task } from '@epfml/discojs-core' import { fetchTasks, data, models } from '@epfml/discojs-core' import { NodeTextLoader, loadModelFromDisk } from '@epfml/discojs-node' -import * as tf from '@tensorflow/tfjs' +import { startServer } from '@epfml/disco-server' + +interface CLIArguments{ + modelType?: string; // 'gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2' + contextLength?: number; // 128, 256, 512, 1024, 2048 + batchSize?: number; // 8, 16, 32, 64 + inference?: boolean; // benchmark inference if true, training otherwise + modelPath?: string; +} + +const parsedArgs = parse({ + modelType: { type: String, optional: true, description: "A GPT architecture: 'gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'" }, + contextLength: { type: Number, optional: true, description: "The maximum input sequence length to train the model on" }, + batchSize: { type: Number, optional: true, description: "The model training bat size" }, + inference: { type: Boolean, optional: true, description: "Whether to benchmark the model inference or training" }, + modelPath: { type: String, optional: true, description: "If benchmarking inference, the path to the trained model" }, +}); + +const defaultArgs: Required = { + modelType: 'gpt-nano', + contextLength: 128, + batchSize: 8, + inference: false, + modelPath: 'models/model.json', +} + +// Fill parsed args with default args +const args = { ...defaultArgs, ...parsedArgs } /** * Benchmark results are reported in https://github.com/epfml/disco/pull/659 */ -async function main(): Promise { +async function main(args: Required): Promise { + const { inference: benchmarkInference, modelType, + contextLength, batchSize, modelPath } = args + // Launch a server instance - const url = new URL('http://localhost:8080') + const [server, url] = await startServer() + + // const url = new URL('http://localhost:8080') + // Fetch the wikitext task from the server const tasks = await fetchTasks(url) const task = tasks.get('wikitext-103') if (task === undefined) { throw new Error('task not found') } - - const BENCHMARK_TRAIN = true // if false benchmark inference - if (BENCHMARK_TRAIN) { - + + /** + * Training benchmark + */ + if (!benchmarkInference) { // Benchmark parameters const epoch = 1 const iterationsPerEpoch = 10 - // Model parameters to benchmark - const modelType = 'gpt-nano' //['gpt-nano', 'gpt-micro', 'gpt-mini', 'gpt2'] - const contextLength = 512 // [128, 256, 512, 1024, 2048] - const batchSize = 16 //[8, 16, 32, 64] - const config: models.GPTConfig = { - modelType: modelType, - lr: 0.0001, + modelType: modelType as models.GPTConfig['modelType'], maxIter: iterationsPerEpoch, blockSize: contextLength, - vocabSize: 50258 + lr: 0.0001, + vocabSize: 50258 // default wikitext task uses the gpt2 tokenizer with vocabSize 50258 } - console.log(`Begin loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) - - - // Load the dataset after choosing the batch size and max sequence length + + // Load the dataset after setting the Task batch size and max sequence length // to make sure the dataset is batched and tokenized correctly task.trainingInformation.batchSize = batchSize task.trainingInformation.maxSequenceLength = contextLength @@ -53,23 +81,26 @@ async function main(): Promise { for await (const logs of logGenerator) { epochTime = (performance.now() - epochTime) const msPerToken = epochTime / (batchSize * contextLength * iterationsPerEpoch * epoch) - console.log(`\t\t\t${msPerToken.toFixed(2)} ms/token
${logs.peakMemory.toFixed(2)} GB`) + console.log(`\t\tTraining time: ${msPerToken.toFixed(2)} ms/token
${logs.peakMemory.toFixed(2)} GB`) } model.dispose() - // Check for memory leak. Currently, there are a few tensors that are still not disposed (one per attention layer in the model) - console.log(`End loop - Memory: ${(tf.memory().numBytes / 1024 / 1024).toFixed(2)} MB`, `Num tensors: ${tf.memory().numTensors}`) + + /** + * Inference benchmark + */ } else { - const model = await loadModelFromDisk(`models/model_random.json`) + const model = await loadModelFromDisk(modelPath) if (!(model instanceof models.GPT)){ throw new Error("Loaded model isn't a GPT model") } // Retrieve the tokenizer used during training const tokenizer = await models.getTaskTokenizer(task) + + // Benchmark parameters const prompt = 'The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion, The game began development in 2010 , carrying over a large portion,' const nbNewTokens = 200 const iterations = 10 - console.log("Prompt token size", (tokenizer(prompt) as {input_ids: number[]}).input_ids.length) - console.log("Number new tokens", nbNewTokens) + console.log("Generating", nbNewTokens, "new tokens") let inferenceTime = 0 for (let i = 0; i < iterations; i++) { @@ -78,19 +109,21 @@ async function main(): Promise { inferenceTime += performance.now() - timeStart } // Overall average includes tokenization, token sampling and de-tokenization - console.log(`Overall average: ${(inferenceTime/ nbNewTokens / iterations).toFixed(2)} ms/token`) + console.log(`Inference time: ${(inferenceTime/ nbNewTokens / iterations).toFixed(2)} ms/token`) } - + await new Promise((resolve, reject) => { + server.once('close', resolve) + server.close(reject) + }) } async function loadWikitextData (task: Task): Promise { const loader = new NodeTextLoader(task) const dataSplit: data.DataSplit = { - train: await data.TextData.init(await loader.load('../../datasets/wikitext/wiki.train.tokens', {shuffle: true}), task), - validation: await data.TextData.init(await loader.load('../../datasets/wikitext/wiki.valid.tokens', {shuffle: true}), task) + train: await data.TextData.init(await loader.load('../datasets/wikitext/wiki.train.tokens', {shuffle: true}), task) } return dataSplit } // You can run this example with "npm start" from this folder -main().catch(console.error) +main(args).catch(console.error) From 58f018f07fb0c61a28fd9bd9a7ff403e06fbfd3a Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sat, 20 Apr 2024 18:14:56 +0200 Subject: [PATCH 13/17] Update vue-tsc --- package-lock.json | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/package-lock.json b/package-lock.json index e944137c1..dfea7d7e0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -54,7 +54,7 @@ "ws": "8" }, "devDependencies": { - "@tensorflow/tfjs-node": "^4.17.0", + "@tensorflow/tfjs-node": "4", "@types/chai": "4", "@types/mocha": "10", "@types/simple-peer": "9", @@ -2655,30 +2655,30 @@ } }, "node_modules/@volar/language-core": { - "version": "2.2.0-alpha.7", - "resolved": "https://registry.npmjs.org/@volar/language-core/-/language-core-2.2.0-alpha.7.tgz", - "integrity": "sha512-igpp+nTkyl8faVzRJMpSCeA4XlBJ5UVSyc/WGyksmUmP10YbfufbcQCFlxEXv2uMBV+a3L4JVCj+Vju+08FOSA==", + "version": "2.2.0-alpha.8", + "resolved": "https://registry.npmjs.org/@volar/language-core/-/language-core-2.2.0-alpha.8.tgz", + "integrity": "sha512-Ew1Iw7/RIRNuDLn60fWJdOLApAlfTVPxbPiSLzc434PReC9kleYtaa//Wo2WlN1oiRqneW0pWQQV0CwYqaimLQ==", "dev": true, "dependencies": { - "@volar/source-map": "2.2.0-alpha.7" + "@volar/source-map": "2.2.0-alpha.8" } }, "node_modules/@volar/source-map": { - "version": "2.2.0-alpha.7", - "resolved": "https://registry.npmjs.org/@volar/source-map/-/source-map-2.2.0-alpha.7.tgz", - "integrity": "sha512-iIZM2EovdEnr6mMwlsnt4ciix4xz7HSGHyUSviRaY5cii5PMXGHeUU9UDeb+xzLCx8kdk3L5J4z+ts50AhkYcg==", + "version": "2.2.0-alpha.8", + "resolved": "https://registry.npmjs.org/@volar/source-map/-/source-map-2.2.0-alpha.8.tgz", + "integrity": "sha512-E1ZVmXFJ5DU4fWDcWHzi8OLqqReqIDwhXvIMhVdk6+VipfMVv4SkryXu7/rs4GA/GsebcRyJdaSkKBB3OAkIcA==", "dev": true, "dependencies": { "muggle-string": "^0.4.0" } }, "node_modules/@volar/typescript": { - "version": "2.2.0-alpha.7", - "resolved": "https://registry.npmjs.org/@volar/typescript/-/typescript-2.2.0-alpha.7.tgz", - "integrity": "sha512-qy04/hx4UbW1BdPlzaxlH60D4plubcyqdbYM6Y5vZiascZxFowtd6vE39Td9FYzDxwcKgzb/Crvf/ABhdHnuBA==", + "version": "2.2.0-alpha.8", + "resolved": "https://registry.npmjs.org/@volar/typescript/-/typescript-2.2.0-alpha.8.tgz", + "integrity": "sha512-RLbRDI+17CiayHZs9HhSzlH0FhLl/+XK6o2qoiw2o2GGKcyD1aDoY6AcMd44acYncTOrqoTNoY6LuCiRyiJiGg==", "dev": true, "dependencies": { - "@volar/language-core": "2.2.0-alpha.7", + "@volar/language-core": "2.2.0-alpha.8", "path-browserify": "^1.0.1" } }, @@ -2782,12 +2782,12 @@ } }, "node_modules/@vue/language-core": { - "version": "2.0.12", - "resolved": "https://registry.npmjs.org/@vue/language-core/-/language-core-2.0.12.tgz", - "integrity": "sha512-aIStDPt69SHOpiIckGTIIjEz/sXc6ZfCMS5uWYL1AcbcRMhzFCLZscGAVte1+ad+RRFepSpKBjGttyPcgKJ7ww==", + "version": "2.0.13", + "resolved": "https://registry.npmjs.org/@vue/language-core/-/language-core-2.0.13.tgz", + "integrity": "sha512-oQgM+BM66SU5GKtUMLQSQN0bxHFkFpLSSAiY87wVziPaiNQZuKVDt/3yA7GB9PiQw0y/bTNL0bOc0jM/siYjKg==", "dev": true, "dependencies": { - "@volar/language-core": "2.2.0-alpha.7", + "@volar/language-core": "2.2.0-alpha.8", "@vue/compiler-dom": "^3.4.0", "@vue/shared": "^3.4.0", "computeds": "^0.0.1", @@ -11119,13 +11119,13 @@ } }, "node_modules/vue-tsc": { - "version": "2.0.12", - "resolved": "https://registry.npmjs.org/vue-tsc/-/vue-tsc-2.0.12.tgz", - "integrity": "sha512-thlBBWlPYrNdba535oDdxz7PRUufZgRZRVP5Aql5wBVpGSWSeqou4EzFXeKVoZr59lp9hJROubDVzlhACmcEhg==", + "version": "2.0.13", + "resolved": "https://registry.npmjs.org/vue-tsc/-/vue-tsc-2.0.13.tgz", + "integrity": "sha512-a3nL3FvguCWVJUQW/jFrUxdeUtiEkbZoQjidqvMeBK//tuE2w6NWQAbdrEpY2+6nSa4kZoKZp8TZUMtHpjt4mQ==", "dev": true, "dependencies": { - "@volar/typescript": "2.2.0-alpha.7", - "@vue/language-core": "2.0.12", + "@volar/typescript": "2.2.0-alpha.8", + "@vue/language-core": "2.0.13", "semver": "^7.5.4" }, "bin": { From bcbd822755caf6afcab4fe4ae217dbb903b0e4b3 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sat, 20 Apr 2024 18:42:09 +0200 Subject: [PATCH 14/17] Make Model implement the Disposable interface --- cli/src/benchmark_gpt.ts | 2 +- discojs/discojs-core/src/models/gpt/index.ts | 7 +++++-- discojs/discojs-core/src/models/model.ts | 15 ++++++++++++--- discojs/discojs-core/src/models/tfjs.ts | 2 +- 4 files changed, 19 insertions(+), 7 deletions(-) diff --git a/cli/src/benchmark_gpt.ts b/cli/src/benchmark_gpt.ts index c978d666a..ac01183c5 100644 --- a/cli/src/benchmark_gpt.ts +++ b/cli/src/benchmark_gpt.ts @@ -83,7 +83,7 @@ async function main(args: Required): Promise { const msPerToken = epochTime / (batchSize * contextLength * iterationsPerEpoch * epoch) console.log(`\t\tTraining time: ${msPerToken.toFixed(2)} ms/token
${logs.peakMemory.toFixed(2)} GB`) } - model.dispose() + // model.dispose() /** * Inference benchmark diff --git a/discojs/discojs-core/src/models/gpt/index.ts b/discojs/discojs-core/src/models/gpt/index.ts index 337e4733f..73ca76cb6 100644 --- a/discojs/discojs-core/src/models/gpt/index.ts +++ b/discojs/discojs-core/src/models/gpt/index.ts @@ -117,8 +117,11 @@ export class GPT extends Model { } } - dispose(): void { - this.model.optimizer.dispose() + [Symbol.dispose](): void{ + console.log("Disposing model") + if (this.model.optimizer !== undefined) { + this.model.optimizer.dispose() + } // Some tensors are not cleaned up when model.dispose is called // So we dispose them manually this.model.disposeRefs() diff --git a/discojs/discojs-core/src/models/model.ts b/discojs/discojs-core/src/models/model.ts index b087a2121..60b61764c 100644 --- a/discojs/discojs-core/src/models/model.ts +++ b/discojs/discojs-core/src/models/model.ts @@ -26,7 +26,7 @@ export type Sample = tf.Tensor; * Allow for various implementation of models (various train function, tensor-library, ...) **/ // TODO make it typesafe: same shape of data/input/weights -export abstract class Model { +export abstract class Model implements Disposable{ // TODO don't allow external access but upgrade train to return weights on every epoch /** Return training state */ abstract get weights(): WeightsContainer; @@ -52,6 +52,15 @@ export abstract class Model { // TODO extract in separated TrainedModel? abstract predict(input: Sample): Promise; - // Cleanup the memory occupied by the model - abstract dispose(): void; + + /** + * This method is automatically called to cleanup the memory occupied by the model + * when leaving the definition scope if the instance has been defined with the `using` keyword. + * For example: + * function f() { + * using model = new Model(); + * } + * Calling f() will call the model's dispose method when exiting the function. + */ + abstract [Symbol.dispose](): void; } diff --git a/discojs/discojs-core/src/models/tfjs.ts b/discojs/discojs-core/src/models/tfjs.ts index cfd54a0c2..8646c34cd 100644 --- a/discojs/discojs-core/src/models/tfjs.ts +++ b/discojs/discojs-core/src/models/tfjs.ts @@ -114,7 +114,7 @@ export class TFJS extends Model { return await ret } - dispose() { + [Symbol.dispose](): void{ this.model.dispose() } From d99d6ada29849a45bfb71d5dfc3b20fb69fa2b20 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sat, 20 Apr 2024 18:47:31 +0200 Subject: [PATCH 15/17] Remove commented code --- cli/src/benchmark_gpt.ts | 1 - discojs/discojs-core/src/models/gpt/config.ts | 6 +++--- discojs/discojs-core/src/training/trainer/trainer.ts | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cli/src/benchmark_gpt.ts b/cli/src/benchmark_gpt.ts index ac01183c5..d8fe08ae4 100644 --- a/cli/src/benchmark_gpt.ts +++ b/cli/src/benchmark_gpt.ts @@ -83,7 +83,6 @@ async function main(args: Required): Promise { const msPerToken = epochTime / (batchSize * contextLength * iterationsPerEpoch * epoch) console.log(`\t\tTraining time: ${msPerToken.toFixed(2)} ms/token
${logs.peakMemory.toFixed(2)} GB`) } - // model.dispose() /** * Inference benchmark diff --git a/discojs/discojs-core/src/models/gpt/config.ts b/discojs/discojs-core/src/models/gpt/config.ts index 343f2949e..17515ec9f 100644 --- a/discojs/discojs-core/src/models/gpt/config.ts +++ b/discojs/discojs-core/src/models/gpt/config.ts @@ -1,4 +1,4 @@ -type GPTModelType = +type ModelType = | 'gpt2' | 'gpt2-medium' | 'gpt2-large' @@ -11,7 +11,7 @@ export interface GPTConfig { lr: number blockSize: number vocabSize: number - modelType: GPTModelType + modelType: ModelType name?: string, evaluate?: boolean maxEvalBatches?: number @@ -61,7 +61,7 @@ export type ModelSize = { nEmbd: number } -export function getModelSizes (modelType: GPTModelType): Required { +export function getModelSizes (modelType: ModelType): Required { switch (modelType) { case 'gpt2': return { nLayer: 12, nHead: 12, nEmbd: 768 } diff --git a/discojs/discojs-core/src/training/trainer/trainer.ts b/discojs/discojs-core/src/training/trainer/trainer.ts index f4c74d3fa..ed1f5bd5b 100644 --- a/discojs/discojs-core/src/training/trainer/trainer.ts +++ b/discojs/discojs-core/src/training/trainer/trainer.ts @@ -66,7 +66,6 @@ export abstract class Trainer { round: logs.epoch, epochs: List.of(logs), }; - console.log(logs.peakMemory) if (logs.epoch % this.#roundDuration === 0) { const round = Math.trunc(logs.epoch / this.#roundDuration); From d2a27f5cb8228dc56241086664e4895595d8a094 Mon Sep 17 00:00:00 2001 From: Julien Vignoud Date: Sun, 21 Apr 2024 15:30:05 +0200 Subject: [PATCH 16/17] Add help option to benchmark cli --- cli/src/benchmark_gpt.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cli/src/benchmark_gpt.ts b/cli/src/benchmark_gpt.ts index d8fe08ae4..462b94f6a 100644 --- a/cli/src/benchmark_gpt.ts +++ b/cli/src/benchmark_gpt.ts @@ -10,6 +10,7 @@ interface CLIArguments{ batchSize?: number; // 8, 16, 32, 64 inference?: boolean; // benchmark inference if true, training otherwise modelPath?: string; + help?: boolean // print help } const parsedArgs = parse({ @@ -18,7 +19,8 @@ const parsedArgs = parse({ batchSize: { type: Number, optional: true, description: "The model training bat size" }, inference: { type: Boolean, optional: true, description: "Whether to benchmark the model inference or training" }, modelPath: { type: String, optional: true, description: "If benchmarking inference, the path to the trained model" }, -}); + help: { type: Boolean, optional: true, alias: 'h', description: 'Prints this usage guide' }, +}, {helpArg: 'help'}); const defaultArgs: Required = { modelType: 'gpt-nano', @@ -26,6 +28,7 @@ const defaultArgs: Required = { batchSize: 8, inference: false, modelPath: 'models/model.json', + help: false } // Fill parsed args with default args From 8c977111df491d1bd363a06107a54b9db90f2765 Mon Sep 17 00:00:00 2001 From: Julien Vignoud <33122365+JulienVig@users.noreply.github.com> Date: Sun, 21 Apr 2024 15:34:14 +0200 Subject: [PATCH 17/17] Update CLI README.md with benchmark information --- cli/README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cli/README.md b/cli/README.md index 5fdd9a759..b2f2bc903 100644 --- a/cli/README.md +++ b/cli/README.md @@ -5,7 +5,7 @@ The CLI lets one use DISCO in standalone manner (i.e. without running a server o For example, the following command trains a model on CIFAR10, using 4 federated clients for 15 epochs with a round duration of 5 epochs (see [DISCOJS.md](../docs/DISCOJS.md#rounds) for more information on rounds) > [!NOTE] -> Make sure you first ran `./get_training_data.sh` (in the root folder) to download training data. +> Make sure you first ran `./datasets/populate` (from the root folder) to download training data. ``` # From the root folder @@ -35,3 +35,12 @@ You should now be able to run your task as follows: ``` npm -w cli start -- --task your_task --numberOfUsers 4 --epochs 15 --roundDuration 5 ``` + +## Benchmarking GPT-TF.js + +The CLI also allows benchmarking the time and memory requirements of the gpt-tfjs implementation in DISCO. The last benchmark has been reported in [this PR](https://github.com/epfml/disco/pull/659). +CLI options can be listed with `npm -w cli run benchmark_gpt -- -h`. + +To benchmark model training, you can run `npm -w cli run benchmark_gpt -- --modelType gpt-nano --contextLength 128 --batchSize 8`. + +For inference run `npm -w cli run benchmark_gpt -- --inference --modelPath `. You can use the `docs/example/wikitext` example script to train a model. The model needs to be trained on the wikitext default task to ensure that model parameters such as vocab size, tokenizer, max sequence length are the same between training and inference.