From 33692ab3ed262e1b685af4df131f91c24ca68032 Mon Sep 17 00:00:00 2001 From: Peter Eastman Date: Wed, 9 Dec 2020 13:59:28 -0800 Subject: [PATCH] Added option to use tanh activation --- schnet/CFConv.h | 28 ++++++++++++++++++++++--- schnet/CpuCFConv.cpp | 22 ++++++++++++++------ schnet/CpuCFConv.h | 5 +++-- schnet/CudaCFConv.cu | 44 +++++++++++++++++++++++++--------------- schnet/CudaCFConv.h | 5 +++-- schnet/TestCFConv.h | 36 +++++++++++++++++++++++++++----- schnet/TestCpuCFConv.cpp | 4 ++-- schnet/TestCudaCFConv.cu | 4 ++-- 8 files changed, 110 insertions(+), 38 deletions(-) diff --git a/schnet/CFConv.h b/schnet/CFConv.h index 1ecb955..2b601a5 100644 --- a/schnet/CFConv.h +++ b/schnet/CFConv.h @@ -93,7 +93,7 @@ class CFConvNeighbors { * * 1. Compute a set of Gaussian basis functions describing the distance between them. * 2. Pass them through a dense layer. - * 3. Apply a shifted softplus activation function. + * 3. Apply an activation function. * 4. Pass the result through a second dense layer. * 5. Apply a cosine cutoff function to make interactions smoothly go to zero at the cutoff. * @@ -108,6 +108,19 @@ class CFConvNeighbors { */ class CFConv { public: + /** + * This is an enumeration of supported activation functions. + */ + enum ActivationFunction { + /** + * y(x) = log(0.5*exp(x) + 0.5) + */ + ShiftedSoftplus = 0, + /** + * y(x) = tanh(x) + */ + Tanh = 1 + }; /** * Construct on object for computing continuous filter convolution (cfconv) functions. * @@ -117,9 +130,11 @@ class CFConv { * @param cutoff the cutoff distance * @param periodic whether to apply periodic boundary conditions * @param gaussianWidth the width of the Gaussian basis functions + * @param activation the activation function to use between the two dense layers */ - CFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth) : - numAtoms(numAtoms), width(width), numGaussians(numGaussians), cutoff(cutoff), periodic(periodic), gaussianWidth(gaussianWidth) { + CFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, + ActivationFunction activation) : numAtoms(numAtoms), width(width), numGaussians(numGaussians), + cutoff(cutoff), periodic(periodic), gaussianWidth(gaussianWidth), activation(activation) { } virtual ~CFConv() { } @@ -188,10 +203,17 @@ class CFConv { float getGaussianWidth() const { return gaussianWidth; } + /** + * Get the activation function used between the two dense layers. + */ + ActivationFunction getActivation() const { + return activation; + } protected: const int numAtoms, width, numGaussians; const float cutoff, gaussianWidth; const bool periodic; + const ActivationFunction activation; }; #endif diff --git a/schnet/CpuCFConv.cpp b/schnet/CpuCFConv.cpp index a26f1b2..6a3fd47 100644 --- a/schnet/CpuCFConv.cpp +++ b/schnet/CpuCFConv.cpp @@ -116,8 +116,8 @@ void CpuCFConvNeighbors::findNeighbors(const float* positions, const float* peri } CpuCFConv::CpuCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, - const float* w1, const float* b1, const float* w2, const float* b2) : - CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth) { + ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2) : + CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation) { for (int i = 0; i < numGaussians; i++) gaussianPos.push_back(i*cutoff/(numGaussians-1)); for (int i = 0; i < numGaussians*width; i++) @@ -161,7 +161,10 @@ void CpuCFConv::compute(const CFConvNeighbors& neighbors, const float* positions float sum = b1[i]; for (int j = 0; j < getNumGaussians(); j++) sum += gaussian[j]*w1[i*getNumGaussians()+j]; - y1[i] = logf(0.5f*expf(sum) + 0.5f); + if (getActivation() == ShiftedSoftplus) + y1[i] = logf(0.5f*expf(sum) + 0.5f); + else + y1[i] = tanhf(sum); } // Apply the second dense layer. @@ -251,9 +254,16 @@ void CpuCFConv::backpropImpl(const CpuCFConvNeighbors& neighbors, const float* p sum += gaussian[j]*w1[i*getNumGaussians()+j]; dSumdR += dGaussdR[j]*w1[i*getNumGaussians()+j]; } - float expSum = expf(sum); - y1[i] = logf(0.5f*expSum + 0.5f); - dY1dR[i] = dSumdR*expSum/(expSum + 1); + if (getActivation() == ShiftedSoftplus) { + float expSum = expf(sum); + y1[i] = logf(0.5f*expSum + 0.5f); + dY1dR[i] = dSumdR*expSum/(expSum + 1); + } + else { + float th = tanhf(sum); + y1[i] = th; + dY1dR[i] = dSumdR*(1-th*th); + } } // Apply the second dense layer. diff --git a/schnet/CpuCFConv.h b/schnet/CpuCFConv.h index b225555..50a1cb7 100644 --- a/schnet/CpuCFConv.h +++ b/schnet/CpuCFConv.h @@ -90,7 +90,7 @@ class CpuCFConvNeighbors : public CFConvNeighbors { * * 1. Compute a set of Gaussian basis functions describing the distance between them. * 2. Pass them through a dense layer. - * 3. Apply a shifted softplus activation function. + * 3. Apply an activation function. * 4. Pass the result through a second dense layer. * 5. Apply a cosine cutoff function to make interactions smoothly go to zero at the cutoff. * @@ -111,13 +111,14 @@ class CpuCFConv : public CFConv { * @param cutoff the cutoff distance * @param periodic whether to apply periodic boundary conditions * @param gaussianWidth the width of the Gaussian basis functions + * @param activation the activation function to use between the two dense layers * @param w1 an array of shape [numGaussians][width] containing the weights of the first dense layer * @param b1 an array of length [width] containing the biases of the first dense layer * @param w2 an array of shape [width][width] containing the weights of the second dense layer * @param b2 an array of length [width] containing the biases of the second dense layer */ CpuCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, - const float* w1, const float* b1, const float* w2, const float* b2); + ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2); /** * Compute the output of the layer. * diff --git a/schnet/CudaCFConv.cu b/schnet/CudaCFConv.cu index a6efa91..da1a08e 100644 --- a/schnet/CudaCFConv.cu +++ b/schnet/CudaCFConv.cu @@ -187,8 +187,9 @@ void CudaCFConvNeighbors::build(const float* positions, const float* periodicBox buildNeighborList<<>>(getNumAtoms(), getCutoff(), neighbors, neighborCount, neighborDistances, neighborDeltas, (float3*) devicePositions, deviceBoxVectors); } -CudaCFConv::CudaCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, const float* w1, - const float* b1, const float* w2, const float* b2) : CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth), +CudaCFConv::CudaCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, + ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2) : + CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation), input(0), output(0), inputDeriv(0), positionDeriv(0), w1(0), b1(0), w2(0), b2(0) { // Allocate memory on the device for the layer parameters. @@ -267,7 +268,7 @@ __device__ float cutoffDeriv(float r, float rc) { return -(0.5f*Pi/rc) * sinf(Pi*r/rc); } -__global__ void computeCFConv(int numAtoms, int numGaussians, int width, float cutoff, float gaussianWidth, +__global__ void computeCFConv(int numAtoms, int numGaussians, int width, float cutoff, float gaussianWidth, int activation, const int2* neighbors, const int* neighborCount, const float* neighborDistance, const float* input, float* output, const float* w1, const float* b1, const float* w2, const float* b2) { const int warp = (blockIdx.x*blockDim.x+threadIdx.x)/32; @@ -302,7 +303,10 @@ __global__ void computeCFConv(int numAtoms, int numGaussians, int width, float c float sum = b1[i]; for (int j = 0; j < numGaussians; j++) sum += temp1[j]*w1[i+j*width]; - temp2[i] = logf(0.5f*expf(sum) + 0.5f); + if (activation == 0) + temp2[i] = logf(0.5f*expf(sum) + 0.5f); + else + temp2[i] = tanhf(sum); } __syncwarp(); @@ -344,8 +348,9 @@ void CudaCFConv::compute(const CFConvNeighbors& neighbors, const float* position const int numBlocks = numMultiprocessors*2; const int tempSize = max(getNumGaussians(), getWidth())*(blockSize/32); computeCFConv<<>>(getNumAtoms(), getNumGaussians(), - getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(), - cudaNeighbors.getNeighborDistances(), deviceInput, deviceOutput, w1, b1, w2, b2); + getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(), + cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDistances(), deviceInput, + deviceOutput, w1, b1, w2, b2); // If necessary, copy the output. @@ -355,7 +360,7 @@ void CudaCFConv::compute(const CFConvNeighbors& neighbors, const float* position template __global__ void backpropCFConv(int numAtoms, int numGaussians, int width, float cutoff, float gaussianWidth, - const int2* neighbors, const int* neighborCount, const float3* neighborDeltas, + int activation, const int2* neighbors, const int* neighborCount, const float3* neighborDeltas, const float* input, const float* outputDeriv, float* inputDeriv, float* positionDeriv, const float* w1, const float* b1, const float* w2, const float* b2) { const int warp = (blockIdx.x*blockDim.x+threadIdx.x)/32; @@ -399,9 +404,16 @@ __global__ void backpropCFConv(int numAtoms, int numGaussians, int width, float sum += temp1[j]*w; dSumdR += dtemp1[j]*w; } - float expSum = expf(sum); - temp2[i] = logf(0.5f*expSum + 0.5f); - dtemp2[i] = dSumdR*expSum/(expSum + 1); + if (activation == 0) { + float expSum = expf(sum); + temp2[i] = logf(0.5f*expSum + 0.5f); + dtemp2[i] = dSumdR*expSum/(expSum + 1); + } + else { + float th = tanhf(sum); + temp2[i] = th; + dtemp2[i] = dSumdR*(1-th*th); + } } __syncwarp(); @@ -466,19 +478,19 @@ void CudaCFConv::backprop(const CFConvNeighbors& neighbors, const float* positio if (getPeriodic()) { if (neighbors.getTriclinic()) backpropCFConv<<>>(getNumAtoms(), getNumGaussians(), - getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(), - cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv, + getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(), + cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv, deviceInputDeriv, devicePositionDeriv, w1, b1, w2, b2); else backpropCFConv<<>>(getNumAtoms(), getNumGaussians(), - getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(), - cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv, + getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(), + cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv, deviceInputDeriv, devicePositionDeriv, w1, b1, w2, b2); } else backpropCFConv<<>>(getNumAtoms(), getNumGaussians(), - getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(), - cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv, + getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(), + cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv, deviceInputDeriv, devicePositionDeriv, w1, b1, w2, b2); // If necessary, copy the output. diff --git a/schnet/CudaCFConv.h b/schnet/CudaCFConv.h index 5670fe5..ccd9493 100644 --- a/schnet/CudaCFConv.h +++ b/schnet/CudaCFConv.h @@ -123,7 +123,7 @@ class CudaCFConvNeighbors : public CFConvNeighbors { * * 1. Compute a set of Gaussian basis functions describing the distance between them. * 2. Pass them through a dense layer. - * 3. Apply a shifted softplus activation function. + * 3. Apply an activation function. * 4. Pass the result through a second dense layer. * 5. Apply a cosine cutoff function to make interactions smoothly go to zero at the cutoff. * @@ -144,13 +144,14 @@ class CudaCFConv : public CFConv { * @param cutoff the cutoff distance * @param periodic whether to apply periodic boundary conditions * @param gaussianWidth the width of the Gaussian basis functions + * @param activation the activation function to use between the two dense layers * @param w1 an array of shape [numGaussians][width] containing the weights of the first dense layer * @param b1 an array of length [width] containing the biases of the first dense layer * @param w2 an array of shape [width][width] containing the weights of the second dense layer * @param b2 an array of length [width] containing the biases of the second dense layer */ CudaCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, - const float* w1, const float* b1, const float* w2, const float* b2); + ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2); ~CudaCFConv(); /** * Compute the output of the layer. All of the pointers passed to this method may refer to either host or device memory. diff --git a/schnet/TestCFConv.h b/schnet/TestCFConv.h index ed6bc63..ccc5a27 100644 --- a/schnet/TestCFConv.h +++ b/schnet/TestCFConv.h @@ -78,7 +78,7 @@ void validateDerivatives(CFConvNeighbors& neighbors, CFConv& conv, float* positi } } -void testWater(float* periodicVectors, float* expectedOutput) { +void testWater(float* periodicVectors, float* expectedOutput, CFConv::ActivationFunction activation) { int numAtoms = 18; float positions[18][3] = { { 0.726, -1.384, -0.376}, @@ -128,7 +128,7 @@ void testWater(float* periodicVectors, float* expectedOutput) { vector y(8*18); CFConvNeighbors* neighbors = createNeighbors(numAtoms, 2.0, (periodicVectors != NULL)); neighbors->build((float*) positions, periodicVectors); - CFConv* conv = createConv(numAtoms, 8, 5, 2.0, (periodicVectors != NULL), 0.5, (float*) w1, b1.data(), (float*) w2, b2.data()); + CFConv* conv = createConv(numAtoms, 8, 5, 2.0, (periodicVectors != NULL), 0.5, activation, (float*) w1, b1.data(), (float*) w2, b2.data()); conv->compute(*neighbors, (float*) positions, periodicVectors, x.data(), y.data()); for (int i = 0; i < y.size(); i++) assertEqual(expectedOutput[i], y[i], 1e-4, 1e-3); @@ -159,7 +159,7 @@ void testWaterNonperiodic() { 35.483078, 25.192368, 20.326399, -11.645652, -17.048292, -6.7168756, 7.7418537, 38.036129, 35.362617, 25.096624, 20.284662, -11.627875, -16.997498, -6.7423472, 7.7480173, 38.007557 }; - testWater(NULL, expectedOutput); + testWater(NULL, expectedOutput, CFConv::ShiftedSoftplus); } void testWaterPeriodic() { @@ -189,7 +189,7 @@ void testWaterPeriodic() { 0.0, 5.0, 0.0, 0.0, 0.0, 5.0 }; - testWater(periodicVectors, expectedOutput); + testWater(periodicVectors, expectedOutput, CFConv::ShiftedSoftplus); } void testWaterTriclinic() { @@ -219,12 +219,38 @@ void testWaterTriclinic() { 1.5, 5.0, 0.0, -0.5, -1.0, 5.0 }; - testWater(periodicVectors, expectedOutput); + testWater(periodicVectors, expectedOutput, CFConv::ShiftedSoftplus); +} + +void testWaterTanh() { + // Values computed with SchNetPack. + float expectedOutput[] = { + 1.3675559, 0.5935415, 1.9673429, -0.95395255, 0.50335306, -0.48667794, 1.7807188, 3.3819561, + 0.29005343, 0.14386685, 0.53776228, -0.28281462, 0.16077384, -0.16586897, 0.63400537, 1.2554247, + 0.11627886, 0.07301569, 0.3117035, -0.17863229, 0.10733558, -0.11581345, 0.45773405, 0.93143916, + 4.4241352, 1.8217319, 5.763402, -2.6756771, 1.3593771, -1.2676939, 4.4866109, 8.2608976, + 1.8093463, 0.74896717, 2.4062924, -1.1258253, 0.5758335, -0.5449248, 1.9352937, 3.5917006, + 1.8147539, 0.75354069, 2.418226, -1.1328967, 0.58087122, -0.54870433, 1.9535246, 3.6271381, + 6.5715466, 2.6803012, 8.4133148, -3.8772564, 1.9475672, -1.8058398, 6.336915, 11.591103, + 3.5397758, 1.4405037, 4.5571709, -2.1003108, 1.0585054, -0.98888546, 3.4656358, 6.3538094, + 3.6488724, 1.4866341, 4.7028041, -2.1701686, 1.0932748, -1.0214566, 3.5815094, 6.5694709, + 9.3324986, 3.7891164, 11.839811, -5.4330664, 2.7172399, -2.5089715, 8.7689342, 15.976856, + 5.2280664, 2.117528, 6.6500511, -3.0500164, 1.5282308, -1.4179878, 4.9473982, 9.0240326, + 4.9489441, 2.0055315, 6.2975488, -2.8885479, 1.4482468, -1.3434849, 4.6904936, 8.5567827, + 12.145325, 4.9184694, 15.3298, -7.017447, 3.5011201, -3.2249959, 11.244879, 20.440998, + 6.9010692, 2.7870331, 8.7232628, -3.9895077, 1.9928961, -1.8433089, 6.4140768, 11.666815, + 6.5481153, 2.6455522, 8.2783947, -3.7856936, 1.8922514, -1.7496059, 6.0915923, 11.080776, + 13.773149, 5.5679493, 17.322308, -7.9124432, 3.9436364, -3.6259429, 12.627276, 22.916275, + 7.3554273, 2.9703379, 9.2593012, -4.2263622, 2.1111732, -1.9438766, 6.7665024, 12.282778, + 7.3363366, 2.9642651, 9.2432461, -4.2205625, 2.1098421, -1.9429932, 6.7673745, 12.288699 + }; + testWater(NULL, expectedOutput, CFConv::Tanh); } int main() { testWaterNonperiodic(); testWaterPeriodic(); testWaterTriclinic(); + testWaterTanh(); return 0; } diff --git a/schnet/TestCpuCFConv.cpp b/schnet/TestCpuCFConv.cpp index 0bacb6f..6d963ea 100644 --- a/schnet/TestCpuCFConv.cpp +++ b/schnet/TestCpuCFConv.cpp @@ -5,8 +5,8 @@ CFConvNeighbors* createNeighbors(int numAtoms, float cutoff, bool periodic) { } CFConv* createConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, - float* w1, float* b1, float* w2, float* b2) { - return new CpuCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, w1, b1, w2, b2); + CFConv::ActivationFunction activation, float* w1, float* b1, float* w2, float* b2) { + return new CpuCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation, w1, b1, w2, b2); } #include "TestCFConv.h" diff --git a/schnet/TestCudaCFConv.cu b/schnet/TestCudaCFConv.cu index dc8b0f9..fe73b5c 100644 --- a/schnet/TestCudaCFConv.cu +++ b/schnet/TestCudaCFConv.cu @@ -5,8 +5,8 @@ CFConvNeighbors* createNeighbors(int numAtoms, float cutoff, bool periodic) { } CFConv* createConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, - float* w1, float* b1, float* w2, float* b2) { - return new CudaCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, w1, b1, w2, b2); + CFConv::ActivationFunction activation, float* w1, float* b1, float* w2, float* b2) { + return new CudaCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation, w1, b1, w2, b2); } #include "TestCFConv.h"