src: operator Upsample (#207)

* op: implement operator Upsample (cpu/webgl) * revert debugging code * lint * update operator list * resolve comments
microsoft · Aug 18, 2020 · fcc8d77 · fcc8d77
1 parent fac72e9
commit fcc8d77
Show file tree

Hide file tree

Showing 10 changed files with 936 additions and 4 deletions.
diff --git a/docs/operators.md b/docs/operators.md
@@ -142,6 +142,6 @@ _This file is automatically generated from the def files via [this script](/tool
 |                      [TopK](https://github.com/onnx/onnx/blob/master/docs/Operators.md#TopK)                      |               |               |               |
 |                 [Transpose](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Transpose)                 |      1+       |               |      1+       |
 |                 [Unsqueeze](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Unsqueeze)                 |      1+       |               |      1+       |
-|                  [Upsample](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Upsample)                  |               |               |               |
+|                  [Upsample](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Upsample)                  |      7-8      |               |      7-8      |
 |                     [Where](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Where)                     |               |               |               |
 |                       [Xor](https://github.com/onnx/onnx/blob/master/docs/Operators.md#Xor)                       |      7+       |      7+       |      7+       |
diff --git a/lib/backends/cpu/op-resolve-rules.ts b/lib/backends/cpu/op-resolve-rules.ts
@@ -30,6 +30,7 @@ import {CpuTranspose} from './ops/transpose';
 import * as unaryOps from './ops/unary-op';
 import {CpuUnaryOp} from './ops/unary-op';
 import {CpuUnsqueeze} from './ops/unsqueeze';
+import {CpuUpsample} from './ops/upsample';
 
 export const CPU_OP_RESOLVE_RULES: ReadonlyArray<OpSet.ResolveRule> = [
   ['Abs', '', '6+', () => new CpuUnaryOp(NUMBER_TYPES, unaryOps.abs)],
@@ -100,5 +101,6 @@ export const CPU_OP_RESOLVE_RULES: ReadonlyArray<OpSet.ResolveRule> = [
   ['Tile', '', '6+', () => new CpuTile()],
   ['Transpose', '', '1+', () => new CpuTranspose()],
   ['Unsqueeze', '', '1+', () => new CpuUnsqueeze()],
+  ['Upsample', '', '7-8', () => new CpuUpsample()],
   ['Xor', '', '7+', () => new CpuBinaryOp(['bool'], (e1, e2) => (e1 ^ e2))],
 ];
diff --git a/lib/backends/cpu/ops/upsample.ts b/lib/backends/cpu/ops/upsample.ts
@@ -0,0 +1,157 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+import {Upsample} from '../../../ops/upsample';
+import {Tensor} from '../../../tensor';
+import {CpuInferenceHandler} from '../inference-handler';
+
+export class CpuUpsample extends Upsample {
+  run(inferenceHandler: CpuInferenceHandler, inputs: Tensor[]): Tensor[] {
+    const xDims = inputs[0].dims;
+    const yDims = xDims.map((dim, i) => Math.floor(dim * this.scales[i]));
+    const y = new Tensor(yDims, inputs[0].type);
+    if (this.mode === 'nearest') {
+      upsampleNearest(inputs[0].data, y.data, xDims, yDims, this.scales);
+    } else {
+      upsampleLinear(inputs[0].data, y.data, xDims, yDims, this.scales);
+    }
+    return [y];
+  }
+}
+
+function upsampleNearest(
+    xData: Tensor.DataTypeMap[Tensor.DataType], yData: Tensor.DataTypeMap[Tensor.DataType],
+    xDims: ReadonlyArray<number>, yDims: ReadonlyArray<number>, scales: number[]) {
+  const dim = xDims.length;
+
+  const inputDimCounter = new Array<number>(dim);
+  inputDimCounter.fill(0);
+  const inputDimFactor = new Array<number>(dim);
+  inputDimFactor[dim - 1] = 1;  // initialize dimension factor
+  for (let i = dim - 2; i >= 0; i--) {
+    inputDimFactor[i] = inputDimFactor[i + 1] * xDims[i + 1];
+  }
+  const outputDimCounter = new Array<number>(dim);
+  outputDimCounter.fill(0);
+  outputDimCounter[dim - 1] = -1;
+
+  let yIdx = 0;
+  let xIdx = 0;
+  for (; yIdx < yData.length; yIdx++) {
+    for (let dimIdx = dim - 1; dimIdx >= 0; dimIdx--) {
+      if (++outputDimCounter[dimIdx] < yDims[dimIdx]) {
+        let currentInputDimCounter = 0;
+        const originalIdx = getOriginalCoordinate(outputDimCounter[dimIdx], scales[dimIdx]);
+        currentInputDimCounter = Math.floor(originalIdx);
+        currentInputDimCounter = Math.max(0, Math.min(currentInputDimCounter, (xDims[dimIdx] - 1)));
+
+        if (currentInputDimCounter !== inputDimCounter[dimIdx]) {
+          xIdx += (currentInputDimCounter - inputDimCounter[dimIdx]) * inputDimFactor[dimIdx];
+          inputDimCounter[dimIdx] = currentInputDimCounter;
+        }
+        break;
+      } else {
+        outputDimCounter[dimIdx] = 0;
+        xIdx += (0 - inputDimCounter[dimIdx]) * inputDimFactor[dimIdx];
+        inputDimCounter[dimIdx] = 0;
+      }
+    }
+    yData[yIdx] = xData[xIdx];
+  }
+}
+
+function upsampleLinear(
+    xData: Tensor.DataTypeMap[Tensor.DataType], yData: Tensor.DataTypeMap[Tensor.DataType],
+    xDims: ReadonlyArray<number>, yDims: ReadonlyArray<number>, scales: number[]) {
+  const is2D = xDims.length === 2;
+  const batchSize = is2D ? 1 : xDims[0];
+  const numChannels = is2D ? 1 : xDims[1];
+  const inputHeight = is2D ? xDims[0] : xDims[2];
+  const inputWidth = is2D ? xDims[1] : xDims[3];
+  const outputHeight = is2D ? yDims[0] : yDims[2];
+  const outputWidth = is2D ? yDims[1] : yDims[3];
+
+  upsampleBilinear(
+      xData as Tensor.NumberType, yData as Tensor.NumberType, batchSize, numChannels, inputHeight, inputWidth,
+      outputHeight, outputWidth, is2D ? scales[0] : scales[2], is2D ? scales[1] : scales[3]);
+}
+
+function upsampleBilinear(
+    xData: Tensor.NumberType, yData: Tensor.NumberType, batchSize: number, numChannels: number, inputHeight: number,
+    inputWidth: number, outputHeight: number, outputWidth: number, heightScale: number, widthScale: number) {
+  const yOriginal: number[] = [];
+  const xOriginal: number[] = [];
+
+  const inputWidthMulY1 = new Array<number>(outputHeight);
+  const inputWidthMulY2 = new Array<number>(outputHeight);
+  const inX1 = new Array<number>(outputWidth);
+  const inX2 = new Array<number>(outputWidth);
+  const dy1 = new Array<number>(outputHeight);
+  const dy2 = new Array<number>(outputHeight);
+  const dx1 = new Array<number>(outputWidth);
+  const dx2 = new Array<number>(outputWidth);
+
+  for (let y = 0; y < outputHeight; ++y) {
+    let inY = getOriginalCoordinate(y, heightScale);
+    yOriginal.push(inY);
+    inY = Math.max(0, Math.min(inY, inputHeight - 1));
+
+    const inY1 = Math.min(Math.floor(inY), inputHeight - 1);
+    const inY2 = Math.min(inY1 + 1, inputHeight - 1);
+
+    if (inY1 === inY2) {
+      dy1[y] = 0.5;
+      dy2[y] = 0.5;
+    } else {
+      dy1[y] = Math.abs(inY - inY1);
+      dy2[y] = Math.abs(inY - inY2);
+    }
+
+    inputWidthMulY1[y] = inputWidth * inY1;
+    inputWidthMulY2[y] = inputWidth * inY2;
+  }
+
+  for (let x = 0; x < outputWidth; ++x) {
+    let inX = getOriginalCoordinate(x, widthScale);
+    xOriginal.push(inX);
+    inX = Math.max(0, Math.min(inX, inputWidth - 1));
+
+    inX1[x] = Math.min(Math.floor(inX), inputWidth - 1);
+    inX2[x] = Math.min(inX1[x] + 1, inputWidth - 1);
+
+    if (inX1[x] === inX2[x]) {
+      dx1[x] = 0.5;
+      dx2[x] = 0.5;
+    } else {
+      dx1[x] = Math.abs(inX - inX1[x]);
+      dx2[x] = Math.abs(inX - inX2[x]);
+    }
+  }
+
+  let xOffset = 0;
+  let yOffset = 0;
+  for (let n = 0; n < batchSize; ++n) {
+    for (let c = 0; c < numChannels; ++c) {
+      for (let y = 0; y < outputHeight; ++y) {
+        for (let x = 0; x < outputWidth; ++x) {
+          const x11 = xData[xOffset + inputWidthMulY1[y] + inX1[x]];
+          const x21 = xData[xOffset + inputWidthMulY1[y] + inX2[x]];
+          const x12 = xData[xOffset + inputWidthMulY2[y] + inX1[x]];
+          const x22 = xData[xOffset + inputWidthMulY2[y] + inX2[x]];
+
+          yData[yOffset + outputWidth * y + x] =
+              (dx2[x] * dy2[y] * x11 + dx1[x] * dy2[y] * x21 + dx2[x] * dy1[y] * x12 + dx1[x] * dy1[y] * x22);
+        }
+      }
+      xOffset += inputHeight * inputWidth;
+      yOffset += outputWidth * outputHeight;
+    }
+  }
+}
+
+function getOriginalCoordinate(xResized: number, xScale: number): number {
+  // Coordinate transformation mode attr was introduced in version 11, before that asymmetric mode was the only
+  // available transformation mode
+  // return ((xResized + 0.5) / xScale) - 0.5;
+  return xResized / xScale;
+}
diff --git a/lib/backends/webgl/op-resolve-rules.ts b/lib/backends/webgl/op-resolve-rules.ts
@@ -31,6 +31,7 @@ import {WebGLTile} from './ops/tile';
 import {WebGLTranspose} from './ops/transpose';
 import * as unaryOps from './ops/unary-op';
 import {WebGLUnsqueeze} from './ops/unsqueeze';
+import {WebGLUpsample} from './ops/upsample';
 
 export const WEBGL_OP_RESOLVE_RULES: ReadonlyArray<OpSet.ResolveRule> = [
   ['Abs', '', '6+', () => new unaryOps.WebGLUnaryOp(NUMBER_TYPES, unaryOps.glslAbs())],
@@ -100,6 +101,7 @@ export const WEBGL_OP_RESOLVE_RULES: ReadonlyArray<OpSet.ResolveRule> = [
   ['Tanh', '', '6+', () => new unaryOps.WebGLUnaryOp(FLOAT_TYPES, unaryOps.glslTanh())],
   ['Tile', '', '6+', () => new WebGLTile()],
   ['Transpose', '', '1+', () => new WebGLTranspose()],
+  ['Upsample', '', '7-8', () => new WebGLUpsample()],
   ['Unsqueeze', '', '1+', () => new WebGLUnsqueeze()],
   ['Xor', '', '7+', () => new binaryOps.WebGLBinaryOp(['bool'], binaryOps.glslXor())],
 ];
diff --git a/lib/backends/webgl/ops/upsample.ts b/lib/backends/webgl/ops/upsample.ts
@@ -0,0 +1,193 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT license.
+
+import {Upsample} from '../../../ops/upsample';
+import {Tensor} from '../../../tensor';
+import {getGlsl} from '../glsl-source';
+import {WebGLInferenceHandler} from '../inference-handler';
+import {ProgramInfo, RunData, WebGLOperator} from '../types';
+
+export class WebGLUpsample extends Upsample implements WebGLOperator {
+  run(inferenceHandler: WebGLInferenceHandler, inputs: Tensor[]): Tensor[] {
+    return inferenceHandler.run(this, inputs);
+  }
+  createProgramInfo(handler: WebGLInferenceHandler, inputs: Tensor[]): ProgramInfo {
+    const inputLayout = handler.getOrCreateTextureLayout(inputs[0]);
+    const outputShape = inputs[0].dims.map((dim, i) => Math.floor(dim * this.scales[i]));
+    const outputLayout = handler.createTextureLayoutFromShape(outputShape);
+    const dim = outputShape.length;
+
+    const glsl = getGlsl(handler.session.backend.glContext.version);
+
+    const outputPitches = new Array<number>(dim);
+    const inputPitches = new Array<number>(dim);
+    let precalculatedPitches = `
+      int output_pitches[${dim}];
+      int input_pitches[${dim}];
+      `;
+    for (let d = dim - 1; d >= 0; d--) {
+      outputPitches[d] = (d === dim - 1) ? 1 : outputPitches[d + 1] * outputShape[d + 1];
+      inputPitches[d] = (d === dim - 1) ? 1 : inputPitches[d + 1] * inputs[0].dims[d + 1];
+
+      precalculatedPitches += `
+      output_pitches[${d}] = ${outputPitches[d]};
+      input_pitches[${d}] = ${inputPitches[d]};
+      `;
+    }
+    const getInputFloatFunction = `
+    float getInputFloat(int index) {
+      vec2 coords = offsetToCoords(index, ${inputLayout.width}, ${inputLayout.height});
+      float value = getColorAsFloat(${glsl.texture2D}(X, coords));
+      return value;
+    }
+    `;
+
+    const shaderSource = this.mode === 'nearest' ?
+        // nearest
+        `
+      ${getInputFloatFunction}
+      float process(int indices[${dim}]) {
+        int input_index = 0;
+        int output_index = coordsToOffset(TexCoords, ${outputLayout.width}, ${outputLayout.height});
+
+        ${precalculatedPitches}
+
+        int d, m;
+        for (int dim = 0; dim < ${dim}; ++dim) {
+          d = output_index / output_pitches[dim];
+          m = output_index - d * output_pitches[dim];
+          output_index = m;
+
+          if (scales[dim] != 1 && d > 0) {
+            int d2 = d / scales[dim];
+            m = d - d2 * scales[dim];
+            d = d2;
+          }
+          input_index += input_pitches[dim] * d;
+        }
+
+        return getInputFloat(input_index);
+      }` :
+        dim === 4 ?
+        // bilinear 4D
+            `
+      ${getInputFloatFunction}
+      float process(int indices[4]) {
+        int input_index = 0;
+        int output_index = coordsToOffset(TexCoords, ${outputLayout.width}, ${outputLayout.height});
+
+        ${precalculatedPitches}
+
+        int m;
+        int index_of_dim0, index_of_dim1, index_of_dim2, index_of_dim3;
+        index_of_dim0 = output_index / output_pitches[0];
+        m = output_index - index_of_dim0 * output_pitches[0];
+        index_of_dim1 = m / output_pitches[1];
+        m = m - index_of_dim1 * output_pitches[1];
+        index_of_dim2 = m / output_pitches[2];
+        m = m - index_of_dim2 * output_pitches[2];
+        index_of_dim3 = m;
+
+        int index_of_input_dim2, index_of_input_dim3, x_offset, y_offset;
+        index_of_input_dim2 = index_of_dim2 / scales[2];
+        y_offset = index_of_dim2 - index_of_input_dim2 * scales[2];
+        index_of_input_dim3 = index_of_dim3 / scales[3];
+        x_offset = index_of_dim3 - index_of_input_dim3 * scales[3];
+
+        input_index = index_of_dim0 * input_pitches[0] +
+                      index_of_dim1 * input_pitches[1] +
+                      index_of_input_dim2 * input_pitches[2] +
+                      index_of_input_dim3;
+
+        float x00 = getInputFloat(input_index);
+        float x10, x01, x11;
+
+        bool end_of_dim2 = false;
+        if (index_of_input_dim2 == (${inputs[0].dims[2]} - 1)) {
+          // It's the end in dimension 2
+          x01 = x00;
+          end_of_dim2 = true;
+        } else {
+          x01 = getInputFloat(input_index + input_pitches[2]);
+        }
+
+        if (index_of_input_dim3 == (input_pitches[2] - 1)) {
+          // It's the end in dimension 3
+          x10 = x00;
+          x11 = x01;
+        }
+        else {
+          x10 = getInputFloat(input_index + 1);
+          x11 = end_of_dim2 ? x10 : getInputFloat(input_index + input_pitches[2] + 1);
+        }
+
+        float y0 = x00 + float(y_offset) * (x01 - x00) / float(scales[2]);
+        float y1 = x10 + float(y_offset) * (x11 - x10) / float(scales[2]);
+        return y0 + float(x_offset) * (y1 - y0) / float(scales[3]);
+      }` :
+            // bilinear 2D
+            `
+      ${getInputFloatFunction}
+      float process(int indices[2]) {
+        int input_index = 0;
+        int output_index = coordsToOffset(TexCoords, ${outputLayout.width}, ${outputLayout.height});
+
+        ${precalculatedPitches}
+
+        int m;
+        int index_of_dim0, index_of_dim1;
+        index_of_dim0 = output_index / output_pitches[0];
+        m = output_index - index_of_dim0 * output_pitches[0];
+        index_of_dim1 = m;
+
+        int index_of_input_dim0, index_of_input_dim1, x_offset, y_offset;
+        index_of_input_dim0 = index_of_dim0 / scales[0];
+        y_offset = index_of_dim0 - index_of_input_dim0 * scales[0];
+        index_of_input_dim1 = index_of_dim1 / scales[1];
+        x_offset = index_of_dim1 - index_of_input_dim1 * scales[1];
+
+        input_index = index_of_input_dim0 * input_pitches[0] + index_of_input_dim1;
+
+        float x00 = getInputFloat(input_index);
+        float x10, x01, x11;
+
+        bool end_of_dim0 = false;
+        if (index_of_input_dim0 == (${inputs[0].dims[0]} - 1)) {
+          // It's the end in dimension 0
+          x01 = x00;
+          end_of_dim0 = true;
+        } else {
+          x01 = getInputFloat(input_index + input_pitches[0]);
+        }
+
+        if (index_of_input_dim1 == (input_pitches[0] - 1)) {
+          // It's the end in dimension 1
+          x10 = x00;
+          x11 = x01;
+        }
+        else {
+          x10 = getInputFloat(input_index + 1);
+          x11 = end_of_dim0 ? x10 : getInputFloat(input_index + input_pitches[0] + 1);
+        }
+
+        float y0 = x00 + float(y_offset) * (x01 - x00) / float(scales[0]);
+        float y1 = x10 + float(y_offset) * (x11 - x10) / float(scales[0]);
+        return y0 + float(x_offset) * (y1 - y0) / float(scales[1]);
+      }`;
+    return {
+      inputLayouts: [inputLayout],
+      outputLayout,
+      samplers: ['X'],
+      shaderSource,
+      variables: [{name: 'scales', type: 'int', arrayLength: this.scales.length}]
+    };
+  }
+  createRunData(handler: WebGLInferenceHandler, programInfo: ProgramInfo, inputs: Tensor[]): RunData {
+    const inputTDs = inputs.map((t, i) => handler.getOrCreateTextureData(t, programInfo.inputLayouts[i]));
+    return {
+      inputTextureDatas: inputTDs,
+      outputTextureData: handler.createTextureDataFromLayout(programInfo.outputLayout, inputTDs[0].tensor.type),
+      uniformData: {scales: this.scales.map(x => Math.ceil(x))}
+    };
+  }
+}