Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
Revert PR 17767 for fixing GPU memory usage regression (#18283)
Browse files Browse the repository at this point in the history
* Revert "Fix and optimize handling of vectorized memory accesses (#17767)"

This reverts commit 5542d03.

* add license to reverted file
  • Loading branch information
rondogency authored May 13, 2020
1 parent 51844b2 commit 47a38d1
Show file tree
Hide file tree
Showing 19 changed files with 463 additions and 1,344 deletions.
48 changes: 48 additions & 0 deletions 3rdparty/mshadow/mshadow/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@ extern "C" {
}

#include "./half.h"
#include "./half2.h"
#include "./bfloat.h"
#define MSHADOW_HALF_BF_OPERATOR(RTYPE, OP) \
MSHADOW_XINLINE RTYPE operator OP(mshadow::half::half_t a, mshadow::bfloat::bf16_t b) { \
Expand Down Expand Up @@ -386,6 +387,11 @@ struct DataType<half::half_t> {
#endif
};
template<>
struct DataType<half::half2_t> {
static const int kFlag = kFloat16;
static const int kLanes = 2;
};
template<>
struct DataType<bfloat::bf16_t> {
static const int kFlag = kBfloat16;
static const int kLanes = 1;
Expand Down Expand Up @@ -1138,6 +1144,48 @@ struct minimum {
}
#endif

#define MSHADOW_TYPE_SWITCH_WITH_HALF2(type, DType, ...) \
switch (type) { \
case mshadow::kFloat32: \
{ \
typedef float DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kFloat64: \
{ \
typedef double DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kFloat16: \
{ \
typedef mshadow::half::half2_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kUint8: \
{ \
typedef uint8_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kInt32: \
{ \
typedef int32_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kInt64: \
{ \
typedef int64_t DType; \
{__VA_ARGS__} \
} \
break; \
default: \
LOG(FATAL) << "Unknown type enum " << type; \
}

#define MSHADOW_SGL_DBL_TYPE_SWITCH(type, DType, ...) \
switch (type) { \
case mshadow::kFloat32: \
Expand Down
162 changes: 162 additions & 0 deletions 3rdparty/mshadow/mshadow/half2.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

/*!
* Copyright (c) 2017 by Contributors
* \file half2.h
* \brief definition of vector float16, half2 type.
*
* \author Antti-Pekka Hynninen
*/
#ifndef MSHADOW_HALF2_H_
#define MSHADOW_HALF2_H_

#if (defined(__CUDACC__) && __CUDA_ARCH__ >= 530 && MSHADOW_USE_CUDA && CUDA_VERSION >= 7050)
#define MSHADOW_CUDA_HALF2 1
#include <cuda_fp16.h>
#else
#define MSHADOW_CUDA_HALF2 0
#endif

#include<math.h>

/*! \brief namespace for mshadow */
namespace mshadow {
/* \brief name space for host/device portable half-precision floats */
namespace half {

#define MSHADOW_HALF2_ASSIGNOP(AOP, OP) \
template<typename T> \
MSHADOW_XINLINE half2_t operator AOP (const T& a) { \
return *this = half2_t(*this OP a); /* NOLINT(*)*/ \
} \

class MSHADOW_ALIGNED(4) half2_t {
public:
#if MSHADOW_CUDA_HALF2
half2 half2_;
#else
half_t half_t2[2];
#endif

MSHADOW_XINLINE half2_t() {}

#if MSHADOW_CUDA_HALF2
MSHADOW_XINLINE explicit half2_t(half2 a) : half2_(a) {}
#else
MSHADOW_XINLINE explicit half2_t(half_t a, half_t b) {
half_t2[0] = a;
half_t2[1] = b;
}
#endif

MSHADOW_XINLINE explicit half2_t(int a) {
#if MSHADOW_CUDA_HALF2
half2_ = __half2half2(__int2half_rz(a));
#else
half_t2[0] = (half_t)a;
half_t2[1] = (half_t)a;
#endif
}

MSHADOW_XINLINE half2_t operator+() {
return *this;
}

MSHADOW_XINLINE half2_t operator-() {
#if MSHADOW_CUDA_HALF2
return half2_t(__hneg2(half2_));
#else
return half2_t(-half_t2[0], -half_t2[1]);
#endif
}

MSHADOW_XINLINE half2_t operator=(const half2_t& a) {
#if MSHADOW_CUDA_HALF2
half2_ = a.half2_;
#else
half_t2[0] = a.half_t2[0];
half_t2[1] = a.half_t2[1];
#endif
return a;
}

MSHADOW_HALF2_ASSIGNOP(+=, +)
MSHADOW_HALF2_ASSIGNOP(-=, -)
MSHADOW_HALF2_ASSIGNOP(*=, *)
MSHADOW_HALF2_ASSIGNOP(/=, /)
};

/*! \brief overloaded + operator for half2_t */
MSHADOW_XINLINE half2_t operator+(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) + __low2float(b.half2_),
__high2float(a.half2_) + __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] + b.half_t2[0], a.half_t2[1] + b.half_t2[1]);
#endif
}
/*! \brief overloaded - operator for half2_t */
MSHADOW_XINLINE half2_t operator-(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) - __low2float(b.half2_),
__high2float(a.half2_) - __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] - b.half_t2[0], a.half_t2[1] - b.half_t2[1]);
#endif
}
/*! \brief overloaded * operator for half2_t */
MSHADOW_XINLINE half2_t operator*(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) * __low2float(b.half2_),
__high2float(a.half2_) * __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] * b.half_t2[0], a.half_t2[1] * b.half_t2[1]);
#endif
}
/*! \brief overloaded / operator for half2_t */
MSHADOW_XINLINE half2_t operator/(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(__low2float(a.half2_) / __low2float(b.half2_),
__high2float(a.half2_) / __high2float(b.half2_)));
#else
return half2_t(a.half_t2[0] / b.half_t2[0], a.half_t2[1] / b.half_t2[1]);
#endif
}
/*! \brief overloaded % operator for half2_t */
MSHADOW_XINLINE half2_t operator%(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return half2_t(__floats2half2_rn(::fmod(__low2float(a.half2_), __low2float(b.half2_)),
::fmod(__high2float(a.half2_), __high2float(b.half2_))));
#else
return half2_t(::fmod(a.half_t2[0], b.half_t2[0]), ::fmod(a.half_t2[1], b.half_t2[1]));
#endif
}
/*! \brief overloaded == operator for half2_t */
MSHADOW_XINLINE bool operator==(half2_t a, half2_t b) {
#if MSHADOW_CUDA_HALF2
return __hbeq2(a.half2_, b.half2_);
#else
return (a.half_t2[0] == b.half_t2[0] && a.half_t2[1] == b.half_t2[1]);
#endif
}

} // namespace half
} // namespace mshadow
#endif // MSHADOW_HALF2_H_
Loading

0 comments on commit 47a38d1

Please sign in to comment.