Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Clear CUDA global error state after CUDA API calls #1020

Merged
merged 2 commits into from
Oct 15, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/branching.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ versions don't directly map to any CUDA Toolkit version.

| Thrust version | CUDA version |
| ----------------- | ------------- |
| 1.9.6 | 10.1 Update 2 |
| 1.9.5 | 10.1 Update 1 |
| 1.9.4 | 10.1 |
| 1.9.3 | 10.0 |
Expand Down
18 changes: 18 additions & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
# Thrust v1.9.6 (CUDA 10.1 Update 2) #

## Summary

Thrust v1.9.6 is a minor release accompanying the CUDA 10.1 Update 2 release.

## Bug Fixes

- NVBug 2509847 Inconsistent alignment of `thrust::complex`
- NVBug 2586774 Compilation failure with Clang + older libstdc++ that doesn't
have `std::is_trivially_copyable`
- NVBug 200488234 CUDA header files contain unicode characters which leads
compiling errors on Windows
- #949, #973, NVBug 2422333, NVBug 2522259, NVBug 2528822 `thrust::detail::aligned_reinterpret_cast`
must be annotated with __host__ __device__
- NVBug 2599629 Missing include in the OpenMP sort implementation
- NVBug 200513211 Truncation warning in test code under VC142

# Thrust v1.9.5 (CUDA 10.1 Update 1)

## Summary
Expand Down
24 changes: 24 additions & 0 deletions testing/out_of_memory_recovery.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Regression test for NVBug 2720132.

#include <unittest/unittest.h>
#include <thrust/device_vector.h>
#include <thrust/detail/cstdint.h>

struct non_trivial
{
__host__ __device__ non_trivial() {}
__host__ __device__ ~non_trivial() {}
};

void test_out_of_memory_recovery()
{
try
{
thrust::device_vector<non_trivial> x(1);

for (thrust::detail::uint64_t n = 1 ;; n <<= 1)
thrust::device_vector<thrust::detail::uint32_t> y(n);
}
catch (...) { }
}
DECLARE_UNITTEST(test_out_of_memory_recovery);
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
Expand All @@ -12,7 +12,7 @@
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
Expand Down Expand Up @@ -177,13 +177,13 @@ public:
res_desc.res.linear.desc = channel_desc;
res_desc.res.linear.sizeInBytes = bytes;
tex_desc.readMode = cudaReadModeElementType;
return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL);
return CubDebug(cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL));
}

/// Unbind this iterator from its texture reference
cudaError_t UnbindTexture()
{
return cudaDestroyTextureObject(tex_obj);
return CubDebug(cudaDestroyTextureObject(tex_obj));
}

/// Postfix increment
Expand Down
2 changes: 1 addition & 1 deletion thrust/system/cuda/detail/cub/util_allocator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -406,7 +406,7 @@ struct CachingDeviceAllocator
// in use by the device, only consider cached blocks that are
// either (from the active stream) or (from an idle stream)
if ((active_stream == block_itr->associated_stream) ||
(cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady))
(CubDebug(cudaEventQuery(block_itr->ready_event)) != cudaErrorNotReady))
{
// Reuse existing cache block. Insert into live blocks.
found = true;
Expand Down
11 changes: 9 additions & 2 deletions thrust/system/cuda/detail/cub/util_debug.cuh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
Expand All @@ -12,7 +12,7 @@
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
Expand Down Expand Up @@ -72,6 +72,13 @@ __host__ __device__ __forceinline__ cudaError_t Debug(
{
(void)filename;
(void)line;

#ifdef CUB_RUNTIME_ENABLED
// Clear the global CUDA error state which may have been set by the last
// call. Otherwise, errors may "leak" to unrelated kernel launches.
cudaGetLastError();
#endif

#ifdef CUB_STDERR
if (error)
{
Expand Down
15 changes: 6 additions & 9 deletions thrust/system/cuda/detail/cub/util_device.cuh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
Expand All @@ -12,7 +12,7 @@
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
Expand Down Expand Up @@ -199,11 +199,11 @@ CUB_RUNTIME_FUNCTION __forceinline__
static cudaError_t SyncStream(cudaStream_t stream)
{
#if (CUB_PTX_ARCH == 0)
return cudaStreamSynchronize(stream);
return CubDebug(cudaStreamSynchronize(stream));
#else
(void)stream;
// Device can't yet sync on a specific stream
return cudaDeviceSynchronize();
return CubDebug(cudaDeviceSynchronize());
#endif
}

Expand Down Expand Up @@ -255,15 +255,12 @@ cudaError_t MaxSmOccupancy(

// CUDA API calls not supported from this device
return CubDebug(cudaErrorInvalidConfiguration);

#else

return cudaOccupancyMaxActiveBlocksPerMultiprocessor (
return CubDebug(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
&max_sm_occupancy,
kernel_ptr,
block_threads,
dynamic_smem_bytes);

dynamic_smem_bytes));
#endif // CUB_RUNTIME_ENABLED
}

Expand Down
4 changes: 2 additions & 2 deletions thrust/system/cuda/detail/malloc_and_free.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ void *malloc(execution_policy<DerivedPolicy> &, std::size_t n)

if(status != cudaSuccess)
{
// cuda_cub::throw_on_error(status, "device malloc failed");
cudaGetLastError(); // Clear global CUDA error state.
thrust::system::detail::bad_alloc(thrust::cuda_category().message(status).c_str());
}
}
#else
result = thrust::raw_pointer_cast(thrust::malloc(thrust::seq, n));
#endif
Expand Down
31 changes: 1 addition & 30 deletions thrust/system/cuda/detail/par.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <thrust/detail/config.h>
#include <thrust/system/cuda/detail/guarded_cuda_runtime_api.h>
#include <thrust/system/cuda/detail/execution_policy.h>
#include <thrust/system/cuda/detail/util.h>

#include <thrust/detail/allocator_aware_execution_policy.h>

Expand All @@ -40,36 +41,6 @@
THRUST_BEGIN_NS
namespace cuda_cub {

inline __host__ __device__
cudaStream_t
default_stream()
{
return cudaStreamLegacy;
}

template <class Derived>
__host__ __device__
cudaStream_t
get_stream(execution_policy<Derived> &)
{
return default_stream();
}

__thrust_exec_check_disable__
template <class Derived>
__host__ __device__
cudaError_t
synchronize_stream(execution_policy<Derived> &)
{
#if __THRUST_HAS_CUDART__
cudaDeviceSynchronize();
return cudaGetLastError();
#else
return cudaSuccess;
#endif
}


template <class Derived>
struct execute_on_stream_base : execution_policy<Derived>
{
Expand Down
Loading