Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Fix and optimize handling of vectorized memory accesses #17767

Merged
merged 41 commits into from
Apr 17, 2020
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
2fe0eaf
Vectorized loads for binary elemwise kernel
ptrendx Feb 14, 2020
6b89506
More generalization
ptrendx Feb 14, 2020
37d81c8
Add backwardusenone
ptrendx Feb 19, 2020
f86da86
Remove the unused _backward_add op
ptrendx Feb 21, 2020
ea56552
Add vectorized backwardusein
ptrendx Feb 22, 2020
ec08749
Extending vectorization to more binary ops, binary ops with scalar and
ptrendx Feb 25, 2020
28e5877
Handling ElementwiseSum
ptrendx Mar 4, 2020
541aebb
Get rid of half2 in mshadow
ptrendx Mar 4, 2020
7729114
Remove backward_elemwiseaddex
ptrendx Mar 5, 2020
8455c0d
Revert "Remove the unused _backward_add op"
ptrendx Mar 5, 2020
402bb59
Revert "Remove backward_elemwiseaddex"
ptrendx Mar 5, 2020
716aa1a
Add back the backward_add since C++ test relies on it
ptrendx Mar 5, 2020
948cea1
Test bcast implementations
ptrendx Mar 10, 2020
f326f7e
First version of vecotrized bcast
ptrendx Mar 11, 2020
85f6070
Adding single side vectorized bcast kernel
ptrendx Mar 16, 2020
ed8d745
Removing debug prints
ptrendx Mar 16, 2020
3d84675
Actually run the single side kernel
ptrendx Mar 16, 2020
3227476
Move the default implementation of bcast to the vectorized one
ptrendx Mar 17, 2020
2017f75
Limit the new implementation to GPU only
ptrendx Mar 17, 2020
320e91a
Enabling vectorization when broadcast does not actually do broadcast
ptrendx Mar 17, 2020
4decacd
Cleaning
ptrendx Mar 17, 2020
a16cec0
Cleaning part 2
ptrendx Mar 18, 2020
ff2243d
Fix for numpy ops using stuff from broadcast
ptrendx Mar 18, 2020
ecbdc6d
Fix
ptrendx Mar 18, 2020
2592e53
Fix lint
ptrendx Mar 18, 2020
0136fdd
Merge branch 'upstream' into pr_vectorized_loads
ptrendx Apr 3, 2020
51ed0bb
Merge branch 'upstream' into pr_vectorized_loads
ptrendx Apr 10, 2020
78d25a9
Try to debug pinv numpy test
ptrendx Apr 10, 2020
29d8cb2
Merge branch 'upstream' into pr_vectorized_loads
ptrendx Apr 13, 2020
a801f8b
Fix
ptrendx Apr 14, 2020
810f8c8
Fix the vectorized broadcast implementation for misaligned input
ptrendx Apr 16, 2020
f5f5d3e
Added tests
ptrendx Apr 17, 2020
4120fe8
Added docs to cuda_vectorization.cuh
ptrendx Apr 17, 2020
0e38e81
Merge branch 'upstream' into pr_vectorized_loads
ptrendx Apr 17, 2020
c1a734a
Another fix for broadcast and fix INT64 compilation
ptrendx Apr 17, 2020
de6125e
Optimize for aligned=true
ptrendx Apr 17, 2020
9129ba2
1 more addition to test
ptrendx Apr 17, 2020
43f4b4e
Reverting the change to Numpy op test
ptrendx Apr 17, 2020
1af684c
Trying mcmodel=medium to fix the failure in CMake static build
ptrendx Apr 17, 2020
31c84b1
Revert "Trying mcmodel=medium to fix the failure in CMake static build"
ptrendx Apr 17, 2020
b9d1760
Limiting the PR to just elementwise ops
ptrendx Apr 17, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 0 additions & 48 deletions 3rdparty/mshadow/mshadow/base.h
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ extern "C" {
}

#include "./half.h"
#include "./half2.h"
#include "./bfloat.h"
#define MSHADOW_HALF_BF_OPERATOR(RTYPE, OP) \
MSHADOW_XINLINE RTYPE operator OP(mshadow::half::half_t a, mshadow::bfloat::bf16_t b) { \
Expand Down Expand Up @@ -392,11 +391,6 @@ struct DataType<half::half_t> {
#endif
};
template<>
struct DataType<half::half2_t> {
static const int kFlag = kFloat16;
static const int kLanes = 2;
};
template<>
struct DataType<bfloat::bf16_t> {
static const int kFlag = kBfloat16;
static const int kLanes = 1;
Expand Down Expand Up @@ -1149,48 +1143,6 @@ struct minimum {
}
#endif

#define MSHADOW_TYPE_SWITCH_WITH_HALF2(type, DType, ...) \
switch (type) { \
case mshadow::kFloat32: \
{ \
typedef float DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kFloat64: \
{ \
typedef double DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kFloat16: \
{ \
typedef mshadow::half::half2_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kUint8: \
{ \
typedef uint8_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kInt32: \
{ \
typedef int32_t DType; \
{__VA_ARGS__} \
} \
break; \
case mshadow::kInt64: \
{ \
typedef int64_t DType; \
{__VA_ARGS__} \
} \
break; \
default: \
LOG(FATAL) << "Unknown type enum " << type; \
}

#define MSHADOW_SGL_DBL_TYPE_SWITCH(type, DType, ...) \
switch (type) { \
case mshadow::kFloat32: \
Expand Down
143 changes: 0 additions & 143 deletions 3rdparty/mshadow/mshadow/half2.h

This file was deleted.

Loading