Skip to content

WIP Memory Fences

M Hightower edited this page Jul 24, 2019 · 12 revisions

Memory Fences - What are they?

Keywords such as Memory Fences, Memory Barriers, membar, Compiler Barriers are often used to refer to the same thing. So far I have found this issue often defined in abstract terms with weak examples. It gets even more complicated when reading a fully detailed description which encompasses the prefetching, out of order execution, caching, ... Multiprocessor realm. The writeup LINUX KERNEL MEMORY BARRIERS appears to be a comprehensive discussion for that, TL;DR. However, my area of interest here is the single-threaded single core with possible interrupts scenario. I think the subtopic compiler barriers explicitly describes what I am looking for.

...blah...blah...blah...

If you read nothing else, this one is very much worth the time! Memory Ordering at Compile Time.

Another important topic to understand well: Implications of pure and constant functions

So far it appears a memory fence will result in the compiler not referencing registers set to memory values gathered before the fence. These value will be reloaded.

Compiler Memory Barriers:

  • function calls are natural barriers provided they are not: inline, pure or const
  • asm volatile(:::"memory")

Initial References: https://people.cs.pitt.edu/~xianeizhang/notes/cpp11_mem.html https://github.com/esp8266/Arduino/issues/615 - more to do with IRQs enable/disable

Simple test

int just4Fun(int *a, int *b) {
  uint32_t old_ps;
  int result = *a;
  old_ps = xt_rsil(15);
  *b = *a;
  xt_wsr_ps(old_ps);
  return result;
}

Compiled to Xtensa assembly with no optimization, -O0

_Z8just4FunPiS_:
        addi    sp, sp, -48      // sp = sp - 48;           // Reserve space on stack for variables
        s32i.n  a15, sp, 44      // *((int*)&sp[44]) = a15; // Backup reg a15 to stack
        mov.n   a15, sp          // a15 = sp;               // a15 will be used as a base pointer
        s32i.n  a2, a15, 16      // *((int*)&a15[16]) = a2; // save "a" (arg1) on the stack
        s32i.n  a3, a15, 20      // *((int*)&a15[20]) = a3; // save "b" (arg2) on the stack
        l32i.n  a2, a15, 16      // a2 = *((int*)&a15[16]); // Load pointer "a" from stack into register a2
        l32i.n  a2, a2, 0        // a2 = *(int*)a2;         // effectivly a2 = *a;
        s32i.n  a2, a15, 0       // *(int*)a15 = a2;        // Save a2 (result) to stack

#   old_ps = xt_rsil(15);
        rsil    a2, 15           // a2 = ps;
                                 // ps.intlevel = 15;       // Disable interrupts
        s32i.n  a2, a15, 4       // *((int*)&a15[4]) = a2;  // Save previous intlevel in ps
        l32i.n  a2, a15, 4       // a2 = *((int*)&a15[4]);  // Load previous ps into a2
        s32i.n  a2, a15, 8       // *((int*)&a15[8]) = a2;  // Save register PS to stack again in new location

#   *b = *a;
        l32i.n  a2, a15, 16      // a2 = *((int*)&a15[16]);  // get pointer "a" (arg1) from stack
        l32i.n  a3, a2, 0        // a3 = *(int*)a2;          // load value pointed to by "a"
        l32i.n  a2, a15, 20      // a2 = *((int*)&a15[20]);  // get pointer "b" (arg2) from stack
        s32i.n  a3, a2, 0        // *(int*)a2 = a3;          // save value to memory pointed to by a2 (*b) +0

#   xt_wsr_ps(old_ps);
        l32i.n  a2, a15, 8       // a2 = *((int*)&a15[8]);   // Get back PS to restore INTLEVEL
        wsr     a2, ps           // ps = a2;                 // Restore interrupts
        isync

#   return result;
        l32i.n  a2, a15, 0       // a2 = *(int*)a15;         // get return value from stack
        mov.n   sp, a15          // sp = a15;                // restore sp
        l32i.n  a15, sp, 44      // a15 = *((int*)sp[44]);   // restore a15
        addi    sp, sp, 48       // sp = sp + 48;            // restore sp for return
        ret.n