Skip to content

Commit

Permalink
Recent kernels have more jitter in nanosleep(). Account for that.
Browse files Browse the repository at this point in the history
o Use a larger allowance for jitter, with the final microseconds to be
  manually inched towards with busy wait. Only do this on Pi2 or 3 where
  we have a core to burn.
o Always disable realtime throttling for Pi2 and 3 now (used to be an
  optional parameter), because now we are more likely to hit 100% core
  utilization.

Should fix #483
  • Loading branch information
hzeller committed Jan 22, 2018
1 parent 9e54dc9 commit 181e3bf
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
4 changes: 0 additions & 4 deletions lib/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,6 @@ HARDWARE_DESC?=regular
# some oddball old (typically one-colored) display, such as Hub12.
#DEFINES+=-DONLY_SINGLE_SUB_PANEL

# Experimental: Disable realtime throttling of the Linux kernel. Uncomment
# if you see periodic flickering in particular with longer display chains.
#DEFINES+=-DDISABLE_RT_THROTTLE

# If someone gives additional values on the make commandline e.g.
# make USER_DEFINES="-DSHOW_REFRESH_RATE"
DEFINES+=$(USER_DEFINES)
Expand Down
47 changes: 35 additions & 12 deletions lib/gpio.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,25 @@
*
* Note: A higher value here will result in more CPU use because of more busy
* waiting inching towards the real value (for all the cases that nanosleep()
* actually was better than this overhead) so you might consider defining
* DISABLE_RT_THROTTLE as well (see lib/Makefile)
* actually was better than this overhead).
*
* This might be interesting to tweak in particular if you have a realtime
* kernel with different characteristics.
*/
#define EMPIRICAL_NANOSLEEP_OVERHEAD_US 25

/*
* In few cases on a standard kernel, we see that the overhead is actually
* even longer; these additional 35usec cover up for the 99.999%-ile.
* So ideally, we always use these additional time and also busy-wait them,
* right ?
* However, that would take away a lot of CPU on older, one-core Raspberry Pis
* or Pi Zeros. They rely for us to sleep when possible for it to do work.
* So we only enable it, if we have have a newer Pi where we anyway burn
* away on one core (And are isolated there with isolcpus=3).
*/
#define EMPIRICAL_NANOSLEEP_EXTRA_OVERHEAD_US 35

/* In order to determine useful values for above, set this to 1 and use the
* hardware pin-pulser.
* It will output a histogram atexit() of how much how often we were over
Expand Down Expand Up @@ -152,7 +163,7 @@ uint32_t GPIO::InitOutputs(uint32_t outputs) {
return output_bits_;
}

static bool IsRaspberryPi2() {
static bool DetermineIsRaspberryPi2() {
// TODO: there must be a better, more robust way. Can we ask the processor ?
char buffer[2048];
const int fd = open("/proc/cmdline", O_RDONLY);
Expand All @@ -169,6 +180,19 @@ static bool IsRaspberryPi2() {
return false;
}

static bool IsRaspberryPi2() {
static bool ispi2 = DetermineIsRaspberryPi2();
return ispi2;
}

static uint32_t JitterAllowanceMicroseconds() {
// If this is a Raspberry Pi2 or 3, we can allow to burn a bit more busy-wait
// CPU cycles to get the timing accurate as we have more CPU to spare.
static int allowance_us = EMPIRICAL_NANOSLEEP_OVERHEAD_US
+ (IsRaspberryPi2() ? EMPIRICAL_NANOSLEEP_EXTRA_OVERHEAD_US : 0);
return allowance_us;
}

static uint32_t *mmap_bcm_register(bool isRPi2, off_t register_offset) {
const off_t base = (isRPi2 ? BCM2709_PERI_BASE : BCM2708_PERI_BASE);

Expand Down Expand Up @@ -271,14 +295,12 @@ static void (*busy_sleep_impl)(long) = sleep_nanos_rpi_1;
// really want all we can get iff the machine has more cores and
// our RT-thread is locked onto one of these.
// So let's tell it not to do that.
// Only call if there is more than one core available.
static void DisableRealtimeThrottling() {
#ifdef DISABLE_RT_THROTTLE
if (!IsRaspberryPi2()) return; // Not safe if we don't have > 1 core.
const int out = open("/proc/sys/kernel/sched_rt_runtime_us", O_WRONLY);
if (out < 0) return;
write(out, "-1", 2);
close(out);
#endif
}

bool Timers::Init() {
Expand Down Expand Up @@ -308,10 +330,11 @@ void Timers::sleep_nanos(long nanos) {
// We use the global 1Mhz hardware timer to measure the actual time period
// that has passed, and then inch forward for the remaining time with
// busy wait.
if (nanos > EMPIRICAL_NANOSLEEP_OVERHEAD_US * 1000 + 5000) {
static long kJitterAllowanceNanos = JitterAllowanceMicroseconds() * 1000;
if (nanos > kJitterAllowanceNanos + 5000) {
const uint32_t before = *timer1Mhz;
struct timespec sleep_time
= { 0, nanos - EMPIRICAL_NANOSLEEP_OVERHEAD_US * 1000 };
= { 0, nanos - kJitterAllowanceNanos };
nanosleep(&sleep_time, NULL);
const uint32_t after = *timer1Mhz;
const long nanoseconds_passed = 1000 * (uint32_t)(after - before);
Expand Down Expand Up @@ -344,9 +367,9 @@ static void sleep_nanos_rpi_2(long nanos) {
#if DEBUG_SLEEP_JITTER
static int overshoot_histogram_us[256] = {0};
static void print_overshoot_histogram() {
fprintf(stderr, "Overshoot histogram >= EMPIRICAL_NANOSLEEP_OVERHEAD_US=%d\n"
fprintf(stderr, "Overshoot histogram >= empirical overhead of %dus\n"
"%6s | %7s | %7s\n",
EMPIRICAL_NANOSLEEP_OVERHEAD_US, "usec", "count", "accum");
JitterAllowanceMicroseconds(), "usec", "count", "accum");
int total_count = 0;
for (int i = 0; i < 256; ++i) total_count += overshoot_histogram_us[i];
int running_count = 0;
Expand Down Expand Up @@ -395,7 +418,7 @@ class HardwarePinPulser : public PinPulser {

for (size_t i = 0; i < specs.size(); ++i) {
// Hints how long to nanosleep, already corrected for system overhead.
sleep_hints_.push_back(specs[i] / 1000 - EMPIRICAL_NANOSLEEP_OVERHEAD_US);
sleep_hints_.push_back(specs[i] / 1000 - JitterAllowanceMicroseconds());
}

const int base = specs[0];
Expand Down Expand Up @@ -480,7 +503,7 @@ class HardwarePinPulser : public PinPulser {
// took.
const int total_us = *timer1Mhz - start_time_;
const int nanoslept = total_us - already_elapsed_usec;
int overshoot = nanoslept - (to_sleep + EMPIRICAL_NANOSLEEP_OVERHEAD_US);
int overshoot = nanoslept - (to_sleep + JitterAllowanceMicroseconds());
if (overshoot < 0) overshoot = 0;
if (overshoot > 255) overshoot = 255;
overshoot_histogram_us[overshoot]++;
Expand Down

0 comments on commit 181e3bf

Please sign in to comment.