Skip to content

Commit

Permalink
The use of more than 64 logical processors on Windows is now extended…
Browse files Browse the repository at this point in the history
… to all pre-processing tasks like caches, etc.
  • Loading branch information
Dade916 committed Jun 14, 2020
1 parent 4cb96ab commit 240dd57
Show file tree
Hide file tree
Showing 14 changed files with 119 additions and 75 deletions.
2 changes: 1 addition & 1 deletion include/luxrays/utils/fileext.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <boost/filesystem.hpp>
#include <boost/algorithm/string/case_conv.hpp>

namespace slg {
namespace luxrays {

inline std::string GetFileNameExt(const std::string &fileName) {
return boost::algorithm::to_lower_copy(boost::filesystem::path(fileName).extension().string());
Expand Down
49 changes: 3 additions & 46 deletions include/luxrays/utils/thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,52 +25,9 @@

namespace luxrays {

inline void SetThreadGroupAffinity(const u_int threadIndex) {
// Set thread affinity the modern way.May not work for Windows version prior to Windows7
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined (WIN64)
auto totalProcessors = 0U;
int processorIndex = threadIndex % GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);

// Determine which processor group to bind the thread to.
for (auto i = 0U; i < GetActiveProcessorGroupCount(); ++i) {
totalProcessors += GetActiveProcessorCount(i);
if (totalProcessors >= processorIndex) {
auto mask = (1ULL << GetActiveProcessorCount(i)) - 1;
GROUP_AFFINITY groupAffinity = { mask, static_cast<WORD>(i), { 0, 0, 0 } };
SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, nullptr);
break;
}
}
#endif
}

inline bool SetThreadRRPriority(boost::thread *thread, int pri = 0) {
#if defined (__linux__) || defined (__APPLE__) || defined(__CYGWIN__) || defined(__OpenBSD__) || defined(__FreeBSD__)
{
const pthread_t tid = (pthread_t)thread->native_handle();

int policy = SCHED_FIFO;
int sysMinPriority = sched_get_priority_min(policy);
struct sched_param param;
param.sched_priority = sysMinPriority + pri;

return pthread_setschedparam(tid, policy, &param);
}
#elif defined (WIN32)
{
const HANDLE tid = (HANDLE)thread->native_handle();
if (!SetPriorityClass(tid, HIGH_PRIORITY_CLASS))
return false;
else
return true;

/*if (!SetThreadPriority(tid, THREAD_PRIORITY_HIGHEST))
return false;
else
return true;*/
}
#endif
}
extern size_t GetHardwareThreadCount();
extern void SetThreadGroupAffinity(const size_t threadIndex);
extern bool SetThreadRRPriority(boost::thread *thread, int pri = 0);

}

Expand Down
1 change: 1 addition & 0 deletions release-notes.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
* RTPathOCLRenderEngine::WaitNewFrame() takes is not more synchronous with rendering refresh
* The use of more than 64 logical processors on Windows is now extended to all render engines
* It is now possible to render scenes without light sources (!)
* The use of more than 64 logical processors on Windows is now extended to all pre-processing tasks like caches, etc.

### Fixed Bugs

Expand Down
2 changes: 1 addition & 1 deletion src/luxcore/luxcoreimpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ SceneImpl::SceneImpl(const luxrays::Properties &props, const float imageScale) {
SceneImpl::SceneImpl(const string &fileName, const float imageScale) {
camera = new CameraImpl(*this);

const string ext = slg::GetFileNameExt(fileName);
const string ext = luxrays::GetFileNameExt(fileName);
if (ext == ".bsc") {
// The file is in a binary format
scene = slg::Scene::LoadSerialized(fileName);
Expand Down
1 change: 1 addition & 0 deletions src/luxrays/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,7 @@ set(LUXRAYS_SRCS
${LuxRays_SOURCE_DIR}/src/luxrays/utils/ocl.cpp
${LuxRays_SOURCE_DIR}/src/luxrays/utils/safesave.cpp
${LuxRays_SOURCE_DIR}/src/luxrays/utils/serializationutils.cpp
${LuxRays_SOURCE_DIR}/src/luxrays/utils/thread.cpp
${LuxRays_SOURCE_DIR}/src/luxrays/utils/ply/rply.cpp
${LuxRays_SOURCE_DIR}/src/luxrays/utils/properties.cpp
${LuxRays_SOURCE_DIR}/deps/cuew/src/cuew.cpp
Expand Down
1 change: 0 additions & 1 deletion src/luxrays/utils/safesave.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@

using namespace std;
using namespace luxrays;
using namespace slg;

//------------------------------------------------------------------------------
// SafeSave
Expand Down
83 changes: 83 additions & 0 deletions src/luxrays/utils/thread.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/***************************************************************************
* Copyright 1998-2020 by authors (see AUTHORS.txt) *
* *
* This file is part of LuxCoreRender. *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); *
* you may not use this file except in compliance with the License. *
* You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.*
* See the License for the specific language governing permissions and *
* limitations under the License. *
***************************************************************************/

#include "luxrays/utils/thread.h"

using namespace std;
using namespace luxrays;

//------------------------------------------------------------------------------
// Thread related, utility functions
//------------------------------------------------------------------------------

size_t luxrays::GetHardwareThreadCount() {
// For Windows version greater than Windows 7,modern way of calculating processor count is used
// May not work with Windows version prior to Windows 7
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64)
return (size_t)GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);
#else
return (size_t)boost::thread::hardware_concurrency();
#endif
}

void luxrays::SetThreadGroupAffinity(const size_t threadIndex) {
// Set thread affinity the modern way.May not work for Windows version prior to Windows7
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined (WIN64)
auto totalProcessors = 0U;
size_t processorIndex = threadIndex % GetActiveProcessorCount(ALL_PROCESSOR_GROUPS);

// Determine which processor group to bind the thread to.
for (auto i = 0U; i < GetActiveProcessorGroupCount(); ++i) {
totalProcessors += GetActiveProcessorCount(i);
if (totalProcessors >= processorIndex) {
auto mask = (1ULL << GetActiveProcessorCount(i)) - 1;
GROUP_AFFINITY groupAffinity = { mask, static_cast<WORD>(i), { 0, 0, 0 } };
SetThreadGroupAffinity(GetCurrentThread(), &groupAffinity, nullptr);
break;
}
}
#endif
}

bool luxrays::SetThreadRRPriority(boost::thread *thread, int pri) {
#if defined (__linux__) || defined (__APPLE__) || defined(__CYGWIN__) || defined(__OpenBSD__) || defined(__FreeBSD__)
{
const pthread_t tid = (pthread_t)thread->native_handle();

int policy = SCHED_FIFO;
int sysMinPriority = sched_get_priority_min(policy);
struct sched_param param;
param.sched_priority = sysMinPriority + pri;

return pthread_setschedparam(tid, policy, &param);
}
#elif defined (WIN32)
{
const HANDLE tid = (HANDLE)thread->native_handle();
if (!SetPriorityClass(tid, HIGH_PRIORITY_CLASS))
return false;
else
return true;

/*if (!SetThreadPriority(tid, THREAD_PRIORITY_HIGHEST))
return false;
else
return true;*/
}
#endif
}
6 changes: 4 additions & 2 deletions src/slg/engines/caches/photongi/photongicache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <boost/format.hpp>
#include <boost/filesystem.hpp>

#include "luxrays/utils/thread.h"

#include "slg/samplers/sobol.h"
#include "slg/utils/pathdepthinfo.h"
#include "slg/engines/caches/photongi/photongicache.h"
Expand Down Expand Up @@ -104,7 +106,7 @@ void PhotonGICache::TracePhotons(const u_int seedBase, const u_int photonTracedC
const bool indirectCacheDone, const bool causticCacheDone,
boost::atomic<u_int> &globalIndirectPhotonsTraced, boost::atomic<u_int> &globalCausticPhotonsTraced,
boost::atomic<u_int> &globalIndirectSize, boost::atomic<u_int> &globalCausticSize) {
const size_t renderThreadCount = boost::thread::hardware_concurrency();
const size_t renderThreadCount = GetHardwareThreadCount();
vector<TracePhotonsThread *> renderThreads(renderThreadCount, nullptr);

boost::atomic<u_int> globalPhotonsCounter(0);
Expand Down Expand Up @@ -158,7 +160,7 @@ void PhotonGICache::TracePhotons(const u_int seedBase, const u_int photonTracedC
}

void PhotonGICache::TracePhotons(const bool indirectEnabled, const bool causticEnabled) {
const size_t renderThreadCount = boost::thread::hardware_concurrency();
const size_t renderThreadCount = GetHardwareThreadCount();

boost::atomic<u_int> globalIndirectPhotonsTraced(0);
boost::atomic<u_int> globalCausticPhotonsTraced(0);
Expand Down
5 changes: 5 additions & 0 deletions src/slg/engines/caches/photongi/tracephotonsthread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

#include <boost/format.hpp>

#include "luxrays/utils/thread.h"

#include "slg/scene/scene.h"
#include "slg/engines/renderengine.h"
#include "slg/engines/caches/photongi/photongicache.h"
Expand Down Expand Up @@ -285,6 +287,9 @@ void TracePhotonsThread::RenderFunc() {
// Initialization
//--------------------------------------------------------------------------

// This is really used only by Windows for 64+ threads support
SetThreadGroupAffinity(threadIndex);

RandomGenerator rndGen(seedBase + threadIndex);

sampleBootSize = 7;
Expand Down
14 changes: 3 additions & 11 deletions src/slg/engines/cpurenderengine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,9 @@

#include <boost/format.hpp>

#include "slg/engines/cpurenderengine.h"
#include "luxrays/utils/thread.h"

#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64)
#include <Windows.h>
#endif
#include "slg/engines/cpurenderengine.h"

using namespace std;
using namespace luxrays;
Expand Down Expand Up @@ -190,13 +188,7 @@ Properties CPURenderEngine::ToProperties(const Properties &cfg) {
const Properties &CPURenderEngine::GetDefaultProps() {
static Properties props = Properties() <<
RenderEngine::GetDefaultProps() <<
//For Windows version greater than Windows 7,modern way of calculating processor count is used
//May not work with Windows version prior to Windows 7
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64)
Property("native.threads.count")((int)GetActiveProcessorCount(ALL_PROCESSOR_GROUPS));
#else
Property("native.threads.count")(boost::thread::hardware_concurrency());
#endif
Property("native.threads.count")((u_int)GetHardwareThreadCount());

return props;
}
Expand Down
12 changes: 2 additions & 10 deletions src/slg/engines/oclrenderengine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@
#include "slg/engines/oclrenderengine.h"

#include "luxrays/core/intersectiondevice.h"
#include "luxrays/utils/thread.h"
#if !defined(LUXRAYS_DISABLE_OPENCL)
#include "luxrays/devices/ocldevice.h"
#endif

#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64)
#include <Windows.h>
#endif

using namespace std;
using namespace luxrays;
Expand Down Expand Up @@ -162,13 +160,7 @@ const Properties &OCLRenderEngine::GetDefaultProps() {
#endif
Property("opencl.gpu.workgroup.size")(32) <<
Property("opencl.devices.select")("") <<
//For Windows version greater than Windows 7,modern way of calculating processor count is used
//May not work with Windows version prior to Windows 7
#if defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(WIN64)
Property("opencl.native.threads.count")((int)GetActiveProcessorCount(ALL_PROCESSOR_GROUPS)) <<
#else
Property("opencl.native.threads.count")(boost::thread::hardware_concurrency()) <<
#endif
Property("opencl.native.threads.count")((u_int)GetHardwareThreadCount()) <<
Property("opencl.outofcore.enable")(false);

return props;
Expand Down
4 changes: 3 additions & 1 deletion src/slg/film/filmparse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <boost/lexical_cast.hpp>

#include "luxrays/utils/fileext.h"
#include "luxrays/utils/thread.h"

#include "slg/core/sdl.h"
#include "slg/film/film.h"
#include "slg/film/filters/filter.h"
Expand Down Expand Up @@ -592,7 +594,7 @@ ImagePipeline *Film::CreateImagePipeline(const Properties &props, const string &
const bool applyDenoise = props.Get(Property(prefix + ".applydenoise")(true)).Get<bool>();
const float prefilterThresholdStDevFactor = props.Get(Property(prefix + ".spikestddev")(2.f)).Get<float>();

const int threadCount = (userThreadCount > 0) ? userThreadCount : boost::thread::hardware_concurrency();
const int threadCount = (userThreadCount > 0) ? userThreadCount : GetHardwareThreadCount();

imagePipeline->AddPlugin(new BCDDenoiserPlugin(
warmUpSamplesPerPixel,
Expand Down
7 changes: 6 additions & 1 deletion src/slg/utils/film2sceneradius.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

#include <boost/thread.hpp>

#include "luxrays/utils/thread.h"

#include "slg/cameras/camera.h"
#include "slg/samplers/sobol.h"
#include "slg/scene/scene.h"
Expand Down Expand Up @@ -114,6 +116,9 @@ typedef struct Film2SceneRadiusThreadParams {
} Film2SceneRadiusThreadParams;

static void Film2SceneRadiusThread(Film2SceneRadiusThreadParams &params) {
// This is really used only by Windows for 64+ threads support
SetThreadGroupAffinity(params.threadIndex);

// Hard coded RR parameters
const u_int rrDepth = 3;
const float rrImportanceCap = .5f;
Expand Down Expand Up @@ -261,7 +266,7 @@ float Film2SceneRadius(const Scene *scene,
const float imagePlaneRadius, const float defaultRadius,
const u_int maxPathDepth, const float timeStart, const float timeEnd,
const Film2SceneRadiusValidator *validator) {
const size_t renderThreadCount = 1;//boost::thread::hardware_concurrency();
const size_t renderThreadCount = GetHardwareThreadCount();

// Render 16 passes at 256 * 256 resolution
const u_int workSize = 16 * 256 * 256 / renderThreadCount;
Expand Down
7 changes: 6 additions & 1 deletion src/slg/utils/scenevisibility.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <boost/thread.hpp>
#include <boost/thread/mutex.hpp>

#include "luxrays/utils/thread.h"

#include "slg/core/indexoctree.h"
#include "slg/scene/scene.h"
#include "slg/engines/renderengine.h"
Expand Down Expand Up @@ -106,6 +108,9 @@ void SceneVisibility<T>::TraceVisibilityThread::RenderFunc() {
// Initialization
//--------------------------------------------------------------------------

// This is really used only by Windows for 64+ threads support
SetThreadGroupAffinity(threadIndex);

const Scene *scene = sv.scene;
const Camera *camera = scene->camera;

Expand Down Expand Up @@ -338,7 +343,7 @@ SceneVisibility<T>::~SceneVisibility() {

template <class T>
void SceneVisibility<T>::Build() {
const size_t renderThreadCount = boost::thread::hardware_concurrency();
const size_t renderThreadCount = GetHardwareThreadCount();
vector<TraceVisibilityThread *> renderThreads(renderThreadCount, nullptr);
SLG_LOG("SceneVisibility trace thread count: " << renderThreadCount);

Expand Down

0 comments on commit 240dd57

Please sign in to comment.