From 240dd579d2fc7a89a21ac648af413377f8e4e59e Mon Sep 17 00:00:00 2001 From: David Bucciarelli Date: Sun, 14 Jun 2020 12:09:43 +0200 Subject: [PATCH] The use of more than 64 logical processors on Windows is now extended to all pre-processing tasks like caches, etc. --- include/luxrays/utils/fileext.h | 2 +- include/luxrays/utils/thread.h | 49 +---------- release-notes.txt | 1 + src/luxcore/luxcoreimpl.cpp | 2 +- src/luxrays/CMakeLists.txt | 1 + src/luxrays/utils/safesave.cpp | 1 - src/luxrays/utils/thread.cpp | 83 +++++++++++++++++++ .../engines/caches/photongi/photongicache.cpp | 6 +- .../caches/photongi/tracephotonsthread.cpp | 5 ++ src/slg/engines/cpurenderengine.cpp | 14 +--- src/slg/engines/oclrenderengine.cpp | 12 +-- src/slg/film/filmparse.cpp | 4 +- src/slg/utils/film2sceneradius.cpp | 7 +- src/slg/utils/scenevisibility.cpp | 7 +- 14 files changed, 119 insertions(+), 75 deletions(-) create mode 100644 src/luxrays/utils/thread.cpp diff --git a/include/luxrays/utils/fileext.h b/include/luxrays/utils/fileext.h index a9d8db09a..1a04af37c 100644 --- a/include/luxrays/utils/fileext.h +++ b/include/luxrays/utils/fileext.h @@ -23,7 +23,7 @@ #include #include -namespace slg { +namespace luxrays { inline std::string GetFileNameExt(const std::string &fileName) { return boost::algorithm::to_lower_copy(boost::filesystem::path(fileName).extension().string()); diff --git a/include/luxrays/utils/thread.h b/include/luxrays/utils/thread.h index 16ebb6fff..fe6f678c7 100644 --- a/include/luxrays/utils/thread.h +++ b/include/luxrays/utils/thread.h @@ -25,52 +25,9 @@ namespace luxrays { -inline void SetThreadGroupAffinity(const u_int threadIndex) { - // Set thread affinity the modern way.May not work for Windows version prior to Windows7 -#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined (WIN64) - auto totalProcessors = 0U; - int processorIndex = threadIndex % GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); - - // Determine which processor group to bind the thread to. - for (auto i = 0U; i < GetActiveProcessorGroupCount(); ++i) { - totalProcessors += GetActiveProcessorCount(i); - if (totalProcessors >= processorIndex) { - auto mask = (1ULL << GetActiveProcessorCount(i)) - 1; - GROUP_AFFINITY groupAffinity = { mask, static_cast(i), { 0, 0, 0 } }; - SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, nullptr); - break; - } - } -#endif -} - -inline bool SetThreadRRPriority(boost::thread *thread, int pri = 0) { -#if defined (__linux__) || defined (__APPLE__) || defined(__CYGWIN__) || defined(__OpenBSD__) || defined(__FreeBSD__) - { - const pthread_t tid = (pthread_t)thread->native_handle(); - - int policy = SCHED_FIFO; - int sysMinPriority = sched_get_priority_min(policy); - struct sched_param param; - param.sched_priority = sysMinPriority + pri; - - return pthread_setschedparam(tid, policy, ¶m); - } -#elif defined (WIN32) - { - const HANDLE tid = (HANDLE)thread->native_handle(); - if (!SetPriorityClass(tid, HIGH_PRIORITY_CLASS)) - return false; - else - return true; - - /*if (!SetThreadPriority(tid, THREAD_PRIORITY_HIGHEST)) - return false; - else - return true;*/ - } -#endif -} +extern size_t GetHardwareThreadCount(); +extern void SetThreadGroupAffinity(const size_t threadIndex); +extern bool SetThreadRRPriority(boost::thread *thread, int pri = 0); } diff --git a/release-notes.txt b/release-notes.txt index 5b98a4ede..bc19895ed 100644 --- a/release-notes.txt +++ b/release-notes.txt @@ -38,6 +38,7 @@ * RTPathOCLRenderEngine::WaitNewFrame() takes is not more synchronous with rendering refresh * The use of more than 64 logical processors on Windows is now extended to all render engines * It is now possible to render scenes without light sources (!) +* The use of more than 64 logical processors on Windows is now extended to all pre-processing tasks like caches, etc. ### Fixed Bugs diff --git a/src/luxcore/luxcoreimpl.cpp b/src/luxcore/luxcoreimpl.cpp index 38db23052..1d71de0a9 100644 --- a/src/luxcore/luxcoreimpl.cpp +++ b/src/luxcore/luxcoreimpl.cpp @@ -432,7 +432,7 @@ SceneImpl::SceneImpl(const luxrays::Properties &props, const float imageScale) { SceneImpl::SceneImpl(const string &fileName, const float imageScale) { camera = new CameraImpl(*this); - const string ext = slg::GetFileNameExt(fileName); + const string ext = luxrays::GetFileNameExt(fileName); if (ext == ".bsc") { // The file is in a binary format scene = slg::Scene::LoadSerialized(fileName); diff --git a/src/luxrays/CMakeLists.txt b/src/luxrays/CMakeLists.txt index 3b77c0b0e..4693ddb7f 100644 --- a/src/luxrays/CMakeLists.txt +++ b/src/luxrays/CMakeLists.txt @@ -199,6 +199,7 @@ set(LUXRAYS_SRCS ${LuxRays_SOURCE_DIR}/src/luxrays/utils/ocl.cpp ${LuxRays_SOURCE_DIR}/src/luxrays/utils/safesave.cpp ${LuxRays_SOURCE_DIR}/src/luxrays/utils/serializationutils.cpp + ${LuxRays_SOURCE_DIR}/src/luxrays/utils/thread.cpp ${LuxRays_SOURCE_DIR}/src/luxrays/utils/ply/rply.cpp ${LuxRays_SOURCE_DIR}/src/luxrays/utils/properties.cpp ${LuxRays_SOURCE_DIR}/deps/cuew/src/cuew.cpp diff --git a/src/luxrays/utils/safesave.cpp b/src/luxrays/utils/safesave.cpp index d40a96f95..31640f1b2 100644 --- a/src/luxrays/utils/safesave.cpp +++ b/src/luxrays/utils/safesave.cpp @@ -26,7 +26,6 @@ using namespace std; using namespace luxrays; -using namespace slg; //------------------------------------------------------------------------------ // SafeSave diff --git a/src/luxrays/utils/thread.cpp b/src/luxrays/utils/thread.cpp new file mode 100644 index 000000000..d8a923ae8 --- /dev/null +++ b/src/luxrays/utils/thread.cpp @@ -0,0 +1,83 @@ +/*************************************************************************** + * Copyright 1998-2020 by authors (see AUTHORS.txt) * + * * + * This file is part of LuxCoreRender. * + * * + * Licensed under the Apache License, Version 2.0 (the "License"); * + * you may not use this file except in compliance with the License. * + * You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, software * + * distributed under the License is distributed on an "AS IS" BASIS, * + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.* + * See the License for the specific language governing permissions and * + * limitations under the License. * + ***************************************************************************/ + +#include "luxrays/utils/thread.h" + +using namespace std; +using namespace luxrays; + +//------------------------------------------------------------------------------ +// Thread related, utility functions +//------------------------------------------------------------------------------ + +size_t luxrays::GetHardwareThreadCount() { + // For Windows version greater than Windows 7,modern way of calculating processor count is used + // May not work with Windows version prior to Windows 7 +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) + return (size_t)GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); +#else + return (size_t)boost::thread::hardware_concurrency(); +#endif +} + +void luxrays::SetThreadGroupAffinity(const size_t threadIndex) { + // Set thread affinity the modern way.May not work for Windows version prior to Windows7 +#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined (WIN64) + auto totalProcessors = 0U; + size_t processorIndex = threadIndex % GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); + + // Determine which processor group to bind the thread to. + for (auto i = 0U; i < GetActiveProcessorGroupCount(); ++i) { + totalProcessors += GetActiveProcessorCount(i); + if (totalProcessors >= processorIndex) { + auto mask = (1ULL << GetActiveProcessorCount(i)) - 1; + GROUP_AFFINITY groupAffinity = { mask, static_cast(i), { 0, 0, 0 } }; + SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, nullptr); + break; + } + } +#endif +} + +bool luxrays::SetThreadRRPriority(boost::thread *thread, int pri) { +#if defined (__linux__) || defined (__APPLE__) || defined(__CYGWIN__) || defined(__OpenBSD__) || defined(__FreeBSD__) + { + const pthread_t tid = (pthread_t)thread->native_handle(); + + int policy = SCHED_FIFO; + int sysMinPriority = sched_get_priority_min(policy); + struct sched_param param; + param.sched_priority = sysMinPriority + pri; + + return pthread_setschedparam(tid, policy, ¶m); + } +#elif defined (WIN32) + { + const HANDLE tid = (HANDLE)thread->native_handle(); + if (!SetPriorityClass(tid, HIGH_PRIORITY_CLASS)) + return false; + else + return true; + + /*if (!SetThreadPriority(tid, THREAD_PRIORITY_HIGHEST)) + return false; + else + return true;*/ + } +#endif +} diff --git a/src/slg/engines/caches/photongi/photongicache.cpp b/src/slg/engines/caches/photongi/photongicache.cpp index d4cac4932..3e77ec13f 100644 --- a/src/slg/engines/caches/photongi/photongicache.cpp +++ b/src/slg/engines/caches/photongi/photongicache.cpp @@ -21,6 +21,8 @@ #include #include +#include "luxrays/utils/thread.h" + #include "slg/samplers/sobol.h" #include "slg/utils/pathdepthinfo.h" #include "slg/engines/caches/photongi/photongicache.h" @@ -104,7 +106,7 @@ void PhotonGICache::TracePhotons(const u_int seedBase, const u_int photonTracedC const bool indirectCacheDone, const bool causticCacheDone, boost::atomic &globalIndirectPhotonsTraced, boost::atomic &globalCausticPhotonsTraced, boost::atomic &globalIndirectSize, boost::atomic &globalCausticSize) { - const size_t renderThreadCount = boost::thread::hardware_concurrency(); + const size_t renderThreadCount = GetHardwareThreadCount(); vector renderThreads(renderThreadCount, nullptr); boost::atomic globalPhotonsCounter(0); @@ -158,7 +160,7 @@ void PhotonGICache::TracePhotons(const u_int seedBase, const u_int photonTracedC } void PhotonGICache::TracePhotons(const bool indirectEnabled, const bool causticEnabled) { - const size_t renderThreadCount = boost::thread::hardware_concurrency(); + const size_t renderThreadCount = GetHardwareThreadCount(); boost::atomic globalIndirectPhotonsTraced(0); boost::atomic globalCausticPhotonsTraced(0); diff --git a/src/slg/engines/caches/photongi/tracephotonsthread.cpp b/src/slg/engines/caches/photongi/tracephotonsthread.cpp index ae84862b2..baffcebab 100644 --- a/src/slg/engines/caches/photongi/tracephotonsthread.cpp +++ b/src/slg/engines/caches/photongi/tracephotonsthread.cpp @@ -18,6 +18,8 @@ #include +#include "luxrays/utils/thread.h" + #include "slg/scene/scene.h" #include "slg/engines/renderengine.h" #include "slg/engines/caches/photongi/photongicache.h" @@ -285,6 +287,9 @@ void TracePhotonsThread::RenderFunc() { // Initialization //-------------------------------------------------------------------------- + // This is really used only by Windows for 64+ threads support + SetThreadGroupAffinity(threadIndex); + RandomGenerator rndGen(seedBase + threadIndex); sampleBootSize = 7; diff --git a/src/slg/engines/cpurenderengine.cpp b/src/slg/engines/cpurenderengine.cpp index c64f33c79..52590b5db 100644 --- a/src/slg/engines/cpurenderengine.cpp +++ b/src/slg/engines/cpurenderengine.cpp @@ -18,11 +18,9 @@ #include -#include "slg/engines/cpurenderengine.h" +#include "luxrays/utils/thread.h" -#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) -#include -#endif +#include "slg/engines/cpurenderengine.h" using namespace std; using namespace luxrays; @@ -190,13 +188,7 @@ Properties CPURenderEngine::ToProperties(const Properties &cfg) { const Properties &CPURenderEngine::GetDefaultProps() { static Properties props = Properties() << RenderEngine::GetDefaultProps() << -//For Windows version greater than Windows 7,modern way of calculating processor count is used -//May not work with Windows version prior to Windows 7 -#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) - Property("native.threads.count")((int)GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)); -#else - Property("native.threads.count")(boost::thread::hardware_concurrency()); -#endif + Property("native.threads.count")((u_int)GetHardwareThreadCount()); return props; } diff --git a/src/slg/engines/oclrenderengine.cpp b/src/slg/engines/oclrenderengine.cpp index 6a16f2b54..b1eb98b4b 100644 --- a/src/slg/engines/oclrenderengine.cpp +++ b/src/slg/engines/oclrenderengine.cpp @@ -19,13 +19,11 @@ #include "slg/engines/oclrenderengine.h" #include "luxrays/core/intersectiondevice.h" +#include "luxrays/utils/thread.h" #if !defined(LUXRAYS_DISABLE_OPENCL) #include "luxrays/devices/ocldevice.h" #endif -#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) -#include -#endif using namespace std; using namespace luxrays; @@ -162,13 +160,7 @@ const Properties &OCLRenderEngine::GetDefaultProps() { #endif Property("opencl.gpu.workgroup.size")(32) << Property("opencl.devices.select")("") << -//For Windows version greater than Windows 7,modern way of calculating processor count is used -//May not work with Windows version prior to Windows 7 -#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64) - Property("opencl.native.threads.count")((int)GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)) << -#else - Property("opencl.native.threads.count")(boost::thread::hardware_concurrency()) << -#endif + Property("opencl.native.threads.count")((u_int)GetHardwareThreadCount()) << Property("opencl.outofcore.enable")(false); return props; diff --git a/src/slg/film/filmparse.cpp b/src/slg/film/filmparse.cpp index 4f6e6ccbb..069321941 100644 --- a/src/slg/film/filmparse.cpp +++ b/src/slg/film/filmparse.cpp @@ -21,6 +21,8 @@ #include #include "luxrays/utils/fileext.h" +#include "luxrays/utils/thread.h" + #include "slg/core/sdl.h" #include "slg/film/film.h" #include "slg/film/filters/filter.h" @@ -592,7 +594,7 @@ ImagePipeline *Film::CreateImagePipeline(const Properties &props, const string & const bool applyDenoise = props.Get(Property(prefix + ".applydenoise")(true)).Get(); const float prefilterThresholdStDevFactor = props.Get(Property(prefix + ".spikestddev")(2.f)).Get(); - const int threadCount = (userThreadCount > 0) ? userThreadCount : boost::thread::hardware_concurrency(); + const int threadCount = (userThreadCount > 0) ? userThreadCount : GetHardwareThreadCount(); imagePipeline->AddPlugin(new BCDDenoiserPlugin( warmUpSamplesPerPixel, diff --git a/src/slg/utils/film2sceneradius.cpp b/src/slg/utils/film2sceneradius.cpp index 140406ec9..1f34c9f51 100644 --- a/src/slg/utils/film2sceneradius.cpp +++ b/src/slg/utils/film2sceneradius.cpp @@ -20,6 +20,8 @@ #include +#include "luxrays/utils/thread.h" + #include "slg/cameras/camera.h" #include "slg/samplers/sobol.h" #include "slg/scene/scene.h" @@ -114,6 +116,9 @@ typedef struct Film2SceneRadiusThreadParams { } Film2SceneRadiusThreadParams; static void Film2SceneRadiusThread(Film2SceneRadiusThreadParams ¶ms) { + // This is really used only by Windows for 64+ threads support + SetThreadGroupAffinity(params.threadIndex); + // Hard coded RR parameters const u_int rrDepth = 3; const float rrImportanceCap = .5f; @@ -261,7 +266,7 @@ float Film2SceneRadius(const Scene *scene, const float imagePlaneRadius, const float defaultRadius, const u_int maxPathDepth, const float timeStart, const float timeEnd, const Film2SceneRadiusValidator *validator) { - const size_t renderThreadCount = 1;//boost::thread::hardware_concurrency(); + const size_t renderThreadCount = GetHardwareThreadCount(); // Render 16 passes at 256 * 256 resolution const u_int workSize = 16 * 256 * 256 / renderThreadCount; diff --git a/src/slg/utils/scenevisibility.cpp b/src/slg/utils/scenevisibility.cpp index 40b3d3fd8..29ed6545d 100644 --- a/src/slg/utils/scenevisibility.cpp +++ b/src/slg/utils/scenevisibility.cpp @@ -21,6 +21,8 @@ #include #include +#include "luxrays/utils/thread.h" + #include "slg/core/indexoctree.h" #include "slg/scene/scene.h" #include "slg/engines/renderengine.h" @@ -106,6 +108,9 @@ void SceneVisibility::TraceVisibilityThread::RenderFunc() { // Initialization //-------------------------------------------------------------------------- + // This is really used only by Windows for 64+ threads support + SetThreadGroupAffinity(threadIndex); + const Scene *scene = sv.scene; const Camera *camera = scene->camera; @@ -338,7 +343,7 @@ SceneVisibility::~SceneVisibility() { template void SceneVisibility::Build() { - const size_t renderThreadCount = boost::thread::hardware_concurrency(); + const size_t renderThreadCount = GetHardwareThreadCount(); vector renderThreads(renderThreadCount, nullptr); SLG_LOG("SceneVisibility trace thread count: " << renderThreadCount);