Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added option to use tanh activation #23

Merged
merged 1 commit into from
Dec 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 25 additions & 3 deletions schnet/CFConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ class CFConvNeighbors {
*
* 1. Compute a set of Gaussian basis functions describing the distance between them.
* 2. Pass them through a dense layer.
* 3. Apply a shifted softplus activation function.
* 3. Apply an activation function.
* 4. Pass the result through a second dense layer.
* 5. Apply a cosine cutoff function to make interactions smoothly go to zero at the cutoff.
*
Expand All @@ -108,6 +108,19 @@ class CFConvNeighbors {
*/
class CFConv {
public:
/**
* This is an enumeration of supported activation functions.
*/
enum ActivationFunction {
/**
* y(x) = log(0.5*exp(x) + 0.5)
*/
ShiftedSoftplus = 0,
/**
* y(x) = tanh(x)
*/
Tanh = 1
};
/**
* Construct on object for computing continuous filter convolution (cfconv) functions.
*
Expand All @@ -117,9 +130,11 @@ class CFConv {
* @param cutoff the cutoff distance
* @param periodic whether to apply periodic boundary conditions
* @param gaussianWidth the width of the Gaussian basis functions
* @param activation the activation function to use between the two dense layers
*/
CFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth) :
numAtoms(numAtoms), width(width), numGaussians(numGaussians), cutoff(cutoff), periodic(periodic), gaussianWidth(gaussianWidth) {
CFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
ActivationFunction activation) : numAtoms(numAtoms), width(width), numGaussians(numGaussians),
cutoff(cutoff), periodic(periodic), gaussianWidth(gaussianWidth), activation(activation) {
}
virtual ~CFConv() {
}
Expand Down Expand Up @@ -188,10 +203,17 @@ class CFConv {
float getGaussianWidth() const {
return gaussianWidth;
}
/**
* Get the activation function used between the two dense layers.
*/
ActivationFunction getActivation() const {
return activation;
}
protected:
const int numAtoms, width, numGaussians;
const float cutoff, gaussianWidth;
const bool periodic;
const ActivationFunction activation;
};

#endif
22 changes: 16 additions & 6 deletions schnet/CpuCFConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ void CpuCFConvNeighbors::findNeighbors(const float* positions, const float* peri
}

CpuCFConv::CpuCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
const float* w1, const float* b1, const float* w2, const float* b2) :
CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth) {
ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2) :
CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation) {
for (int i = 0; i < numGaussians; i++)
gaussianPos.push_back(i*cutoff/(numGaussians-1));
for (int i = 0; i < numGaussians*width; i++)
Expand Down Expand Up @@ -161,7 +161,10 @@ void CpuCFConv::compute(const CFConvNeighbors& neighbors, const float* positions
float sum = b1[i];
for (int j = 0; j < getNumGaussians(); j++)
sum += gaussian[j]*w1[i*getNumGaussians()+j];
y1[i] = logf(0.5f*expf(sum) + 0.5f);
if (getActivation() == ShiftedSoftplus)
y1[i] = logf(0.5f*expf(sum) + 0.5f);
else
y1[i] = tanhf(sum);
}

// Apply the second dense layer.
Expand Down Expand Up @@ -251,9 +254,16 @@ void CpuCFConv::backpropImpl(const CpuCFConvNeighbors& neighbors, const float* p
sum += gaussian[j]*w1[i*getNumGaussians()+j];
dSumdR += dGaussdR[j]*w1[i*getNumGaussians()+j];
}
float expSum = expf(sum);
y1[i] = logf(0.5f*expSum + 0.5f);
dY1dR[i] = dSumdR*expSum/(expSum + 1);
if (getActivation() == ShiftedSoftplus) {
float expSum = expf(sum);
y1[i] = logf(0.5f*expSum + 0.5f);
dY1dR[i] = dSumdR*expSum/(expSum + 1);
}
else {
float th = tanhf(sum);
y1[i] = th;
dY1dR[i] = dSumdR*(1-th*th);
}
}

// Apply the second dense layer.
Expand Down
5 changes: 3 additions & 2 deletions schnet/CpuCFConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class CpuCFConvNeighbors : public CFConvNeighbors {
*
* 1. Compute a set of Gaussian basis functions describing the distance between them.
* 2. Pass them through a dense layer.
* 3. Apply a shifted softplus activation function.
* 3. Apply an activation function.
* 4. Pass the result through a second dense layer.
* 5. Apply a cosine cutoff function to make interactions smoothly go to zero at the cutoff.
*
Expand All @@ -111,13 +111,14 @@ class CpuCFConv : public CFConv {
* @param cutoff the cutoff distance
* @param periodic whether to apply periodic boundary conditions
* @param gaussianWidth the width of the Gaussian basis functions
* @param activation the activation function to use between the two dense layers
* @param w1 an array of shape [numGaussians][width] containing the weights of the first dense layer
* @param b1 an array of length [width] containing the biases of the first dense layer
* @param w2 an array of shape [width][width] containing the weights of the second dense layer
* @param b2 an array of length [width] containing the biases of the second dense layer
*/
CpuCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
const float* w1, const float* b1, const float* w2, const float* b2);
ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2);
/**
* Compute the output of the layer.
*
Expand Down
44 changes: 28 additions & 16 deletions schnet/CudaCFConv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,9 @@ void CudaCFConvNeighbors::build(const float* positions, const float* periodicBox
buildNeighborList<false, false><<<numBlocks, blockSize>>>(getNumAtoms(), getCutoff(), neighbors, neighborCount, neighborDistances, neighborDeltas, (float3*) devicePositions, deviceBoxVectors);
}

CudaCFConv::CudaCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth, const float* w1,
const float* b1, const float* w2, const float* b2) : CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth),
CudaCFConv::CudaCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2) :
CFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation),
input(0), output(0), inputDeriv(0), positionDeriv(0), w1(0), b1(0), w2(0), b2(0) {
// Allocate memory on the device for the layer parameters.

Expand Down Expand Up @@ -267,7 +268,7 @@ __device__ float cutoffDeriv(float r, float rc) {
return -(0.5f*Pi/rc) * sinf(Pi*r/rc);
}

__global__ void computeCFConv(int numAtoms, int numGaussians, int width, float cutoff, float gaussianWidth,
__global__ void computeCFConv(int numAtoms, int numGaussians, int width, float cutoff, float gaussianWidth, int activation,
const int2* neighbors, const int* neighborCount, const float* neighborDistance, const float* input,
float* output, const float* w1, const float* b1, const float* w2, const float* b2) {
const int warp = (blockIdx.x*blockDim.x+threadIdx.x)/32;
Expand Down Expand Up @@ -302,7 +303,10 @@ __global__ void computeCFConv(int numAtoms, int numGaussians, int width, float c
float sum = b1[i];
for (int j = 0; j < numGaussians; j++)
sum += temp1[j]*w1[i+j*width];
temp2[i] = logf(0.5f*expf(sum) + 0.5f);
if (activation == 0)
temp2[i] = logf(0.5f*expf(sum) + 0.5f);
else
temp2[i] = tanhf(sum);
}
__syncwarp();

Expand Down Expand Up @@ -344,8 +348,9 @@ void CudaCFConv::compute(const CFConvNeighbors& neighbors, const float* position
const int numBlocks = numMultiprocessors*2;
const int tempSize = max(getNumGaussians(), getWidth())*(blockSize/32);
computeCFConv<<<numBlocks, blockSize, 2*tempSize*sizeof(float)>>>(getNumAtoms(), getNumGaussians(),
getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(),
cudaNeighbors.getNeighborDistances(), deviceInput, deviceOutput, w1, b1, w2, b2);
getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(),
cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDistances(), deviceInput,
deviceOutput, w1, b1, w2, b2);

// If necessary, copy the output.

Expand All @@ -355,7 +360,7 @@ void CudaCFConv::compute(const CFConvNeighbors& neighbors, const float* position

template <bool PERIODIC, bool TRICLINIC>
__global__ void backpropCFConv(int numAtoms, int numGaussians, int width, float cutoff, float gaussianWidth,
const int2* neighbors, const int* neighborCount, const float3* neighborDeltas,
int activation, const int2* neighbors, const int* neighborCount, const float3* neighborDeltas,
const float* input, const float* outputDeriv, float* inputDeriv, float* positionDeriv, const float* w1,
const float* b1, const float* w2, const float* b2) {
const int warp = (blockIdx.x*blockDim.x+threadIdx.x)/32;
Expand Down Expand Up @@ -399,9 +404,16 @@ __global__ void backpropCFConv(int numAtoms, int numGaussians, int width, float
sum += temp1[j]*w;
dSumdR += dtemp1[j]*w;
}
float expSum = expf(sum);
temp2[i] = logf(0.5f*expSum + 0.5f);
dtemp2[i] = dSumdR*expSum/(expSum + 1);
if (activation == 0) {
float expSum = expf(sum);
temp2[i] = logf(0.5f*expSum + 0.5f);
dtemp2[i] = dSumdR*expSum/(expSum + 1);
}
else {
float th = tanhf(sum);
temp2[i] = th;
dtemp2[i] = dSumdR*(1-th*th);
}
}
__syncwarp();

Expand Down Expand Up @@ -466,19 +478,19 @@ void CudaCFConv::backprop(const CFConvNeighbors& neighbors, const float* positio
if (getPeriodic()) {
if (neighbors.getTriclinic())
backpropCFConv<true, true><<<numBlocks, blockSize, 4*tempSize*sizeof(float)>>>(getNumAtoms(), getNumGaussians(),
getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(),
cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv,
getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(),
cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv,
deviceInputDeriv, devicePositionDeriv, w1, b1, w2, b2);
else
backpropCFConv<true, false><<<numBlocks, blockSize, 4*tempSize*sizeof(float)>>>(getNumAtoms(), getNumGaussians(),
getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(),
cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv,
getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(),
cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv,
deviceInputDeriv, devicePositionDeriv, w1, b1, w2, b2);
}
else
backpropCFConv<false, false><<<numBlocks, blockSize, 4*tempSize*sizeof(float)>>>(getNumAtoms(), getNumGaussians(),
getWidth(), getCutoff(), getGaussianWidth(), cudaNeighbors.getNeighbors(), cudaNeighbors.getNeighborCount(),
cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv,
getWidth(), getCutoff(), getGaussianWidth(), getActivation(), cudaNeighbors.getNeighbors(),
cudaNeighbors.getNeighborCount(), cudaNeighbors.getNeighborDeltas(), deviceInput, deviceOutputDeriv,
deviceInputDeriv, devicePositionDeriv, w1, b1, w2, b2);

// If necessary, copy the output.
Expand Down
5 changes: 3 additions & 2 deletions schnet/CudaCFConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class CudaCFConvNeighbors : public CFConvNeighbors {
*
* 1. Compute a set of Gaussian basis functions describing the distance between them.
* 2. Pass them through a dense layer.
* 3. Apply a shifted softplus activation function.
* 3. Apply an activation function.
* 4. Pass the result through a second dense layer.
* 5. Apply a cosine cutoff function to make interactions smoothly go to zero at the cutoff.
*
Expand All @@ -144,13 +144,14 @@ class CudaCFConv : public CFConv {
* @param cutoff the cutoff distance
* @param periodic whether to apply periodic boundary conditions
* @param gaussianWidth the width of the Gaussian basis functions
* @param activation the activation function to use between the two dense layers
* @param w1 an array of shape [numGaussians][width] containing the weights of the first dense layer
* @param b1 an array of length [width] containing the biases of the first dense layer
* @param w2 an array of shape [width][width] containing the weights of the second dense layer
* @param b2 an array of length [width] containing the biases of the second dense layer
*/
CudaCFConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
const float* w1, const float* b1, const float* w2, const float* b2);
ActivationFunction activation, const float* w1, const float* b1, const float* w2, const float* b2);
~CudaCFConv();
/**
* Compute the output of the layer. All of the pointers passed to this method may refer to either host or device memory.
Expand Down
36 changes: 31 additions & 5 deletions schnet/TestCFConv.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ void validateDerivatives(CFConvNeighbors& neighbors, CFConv& conv, float* positi
}
}

void testWater(float* periodicVectors, float* expectedOutput) {
void testWater(float* periodicVectors, float* expectedOutput, CFConv::ActivationFunction activation) {
int numAtoms = 18;
float positions[18][3] = {
{ 0.726, -1.384, -0.376},
Expand Down Expand Up @@ -128,7 +128,7 @@ void testWater(float* periodicVectors, float* expectedOutput) {
vector<float> y(8*18);
CFConvNeighbors* neighbors = createNeighbors(numAtoms, 2.0, (periodicVectors != NULL));
neighbors->build((float*) positions, periodicVectors);
CFConv* conv = createConv(numAtoms, 8, 5, 2.0, (periodicVectors != NULL), 0.5, (float*) w1, b1.data(), (float*) w2, b2.data());
CFConv* conv = createConv(numAtoms, 8, 5, 2.0, (periodicVectors != NULL), 0.5, activation, (float*) w1, b1.data(), (float*) w2, b2.data());
conv->compute(*neighbors, (float*) positions, periodicVectors, x.data(), y.data());
for (int i = 0; i < y.size(); i++)
assertEqual(expectedOutput[i], y[i], 1e-4, 1e-3);
Expand Down Expand Up @@ -159,7 +159,7 @@ void testWaterNonperiodic() {
35.483078, 25.192368, 20.326399, -11.645652, -17.048292, -6.7168756, 7.7418537, 38.036129,
35.362617, 25.096624, 20.284662, -11.627875, -16.997498, -6.7423472, 7.7480173, 38.007557
};
testWater(NULL, expectedOutput);
testWater(NULL, expectedOutput, CFConv::ShiftedSoftplus);
}

void testWaterPeriodic() {
Expand Down Expand Up @@ -189,7 +189,7 @@ void testWaterPeriodic() {
0.0, 5.0, 0.0,
0.0, 0.0, 5.0
};
testWater(periodicVectors, expectedOutput);
testWater(periodicVectors, expectedOutput, CFConv::ShiftedSoftplus);
}

void testWaterTriclinic() {
Expand Down Expand Up @@ -219,12 +219,38 @@ void testWaterTriclinic() {
1.5, 5.0, 0.0,
-0.5, -1.0, 5.0
};
testWater(periodicVectors, expectedOutput);
testWater(periodicVectors, expectedOutput, CFConv::ShiftedSoftplus);
}

void testWaterTanh() {
// Values computed with SchNetPack.
float expectedOutput[] = {
1.3675559, 0.5935415, 1.9673429, -0.95395255, 0.50335306, -0.48667794, 1.7807188, 3.3819561,
0.29005343, 0.14386685, 0.53776228, -0.28281462, 0.16077384, -0.16586897, 0.63400537, 1.2554247,
0.11627886, 0.07301569, 0.3117035, -0.17863229, 0.10733558, -0.11581345, 0.45773405, 0.93143916,
4.4241352, 1.8217319, 5.763402, -2.6756771, 1.3593771, -1.2676939, 4.4866109, 8.2608976,
1.8093463, 0.74896717, 2.4062924, -1.1258253, 0.5758335, -0.5449248, 1.9352937, 3.5917006,
1.8147539, 0.75354069, 2.418226, -1.1328967, 0.58087122, -0.54870433, 1.9535246, 3.6271381,
6.5715466, 2.6803012, 8.4133148, -3.8772564, 1.9475672, -1.8058398, 6.336915, 11.591103,
3.5397758, 1.4405037, 4.5571709, -2.1003108, 1.0585054, -0.98888546, 3.4656358, 6.3538094,
3.6488724, 1.4866341, 4.7028041, -2.1701686, 1.0932748, -1.0214566, 3.5815094, 6.5694709,
9.3324986, 3.7891164, 11.839811, -5.4330664, 2.7172399, -2.5089715, 8.7689342, 15.976856,
5.2280664, 2.117528, 6.6500511, -3.0500164, 1.5282308, -1.4179878, 4.9473982, 9.0240326,
4.9489441, 2.0055315, 6.2975488, -2.8885479, 1.4482468, -1.3434849, 4.6904936, 8.5567827,
12.145325, 4.9184694, 15.3298, -7.017447, 3.5011201, -3.2249959, 11.244879, 20.440998,
6.9010692, 2.7870331, 8.7232628, -3.9895077, 1.9928961, -1.8433089, 6.4140768, 11.666815,
6.5481153, 2.6455522, 8.2783947, -3.7856936, 1.8922514, -1.7496059, 6.0915923, 11.080776,
13.773149, 5.5679493, 17.322308, -7.9124432, 3.9436364, -3.6259429, 12.627276, 22.916275,
7.3554273, 2.9703379, 9.2593012, -4.2263622, 2.1111732, -1.9438766, 6.7665024, 12.282778,
7.3363366, 2.9642651, 9.2432461, -4.2205625, 2.1098421, -1.9429932, 6.7673745, 12.288699
};
testWater(NULL, expectedOutput, CFConv::Tanh);
}

int main() {
testWaterNonperiodic();
testWaterPeriodic();
testWaterTriclinic();
testWaterTanh();
return 0;
}
4 changes: 2 additions & 2 deletions schnet/TestCpuCFConv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ CFConvNeighbors* createNeighbors(int numAtoms, float cutoff, bool periodic) {
}

CFConv* createConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
float* w1, float* b1, float* w2, float* b2) {
return new CpuCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, w1, b1, w2, b2);
CFConv::ActivationFunction activation, float* w1, float* b1, float* w2, float* b2) {
return new CpuCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation, w1, b1, w2, b2);
}

#include "TestCFConv.h"
4 changes: 2 additions & 2 deletions schnet/TestCudaCFConv.cu
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ CFConvNeighbors* createNeighbors(int numAtoms, float cutoff, bool periodic) {
}

CFConv* createConv(int numAtoms, int width, int numGaussians, float cutoff, bool periodic, float gaussianWidth,
float* w1, float* b1, float* w2, float* b2) {
return new CudaCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, w1, b1, w2, b2);
CFConv::ActivationFunction activation, float* w1, float* b1, float* w2, float* b2) {
return new CudaCFConv(numAtoms, width, numGaussians, cutoff, periodic, gaussianWidth, activation, w1, b1, w2, b2);
}

#include "TestCFConv.h"