From 2c11e65d7fb7fa26d2a08ad04ba9e2b389f8f33e Mon Sep 17 00:00:00 2001
From: Purdea Andrei <andrei@purdea.ro>
Date: Sat, 11 Apr 2020 03:14:28 +0300
Subject: [PATCH] Various fixes to how timer differences are calculated (#8585)

* tmk_core/common: Fixing TIMER_DIFF macro to calculate difference correctly after the timer wraps.

Let's go through an example, using the following macro:

If the first timer read is 0xe4 and the second one is 0x32, the timer wrapped.
If the timer would have had more bits, it's new value would have been 0x132,
and the correct difference in time is 0x132 - 0xe4 = 0x4e

old code TIMER_DIFF_8(0x32, 0xe4) = 0xff - 0xe4 + 0x32 = 0x4d, which is wrong.
new code TIMER_DIFF_8(0x32, 0xe4) = 0xff + 1 - 0xe4 + 0x32 = 0x4e, which is correct.

This also gives a chance for a smart compiler to optimize the code using normal
integer overflow.

For example on AVR, the following C code:
uint8_t __attribute__ ((noinline)) test(uint8_t current_timer, uint8_t start_timer)
{
    return TIMER_DIFF_8(current_timer, start_timer);
}
With the original code, it gets translated to the following list of instructions:
00004c6e <test>:
    4c6e:       98 2f           mov     r25, r24
    4c70:       86 1b           sub     r24, r22
    4c72:       96 17           cp      r25, r22
    4c74:       08 f4           brcc    .+2             ; 0x4c78 <test+0xa>
    4c76:       81 50           subi    r24, 0x01       ; 1
    4c78:       08 95           ret
But with this commit, it gets translated to a single instruction:
00004c40 <test>:
    4c40:       86 1b           sub     r24, r22
    4c42:       08 95           ret

This unfortunately doesn't always work so nicely, for example the following C code:
int __attribute__ ((noinline)) test(uint8_t current_timer, uint8_t start_timer)
{
    return TIMER_DIFF_8(current_timer, start_timer);
}
(Note: return type changed to int)
With the original code it gets translated to:
00004c6e <test>:
    4c6e:       28 2f           mov     r18, r24
    4c70:       30 e0           ldi     r19, 0x00       ; 0
    4c72:       46 2f           mov     r20, r22
    4c74:       50 e0           ldi     r21, 0x00       ; 0
    4c76:       86 17           cp      r24, r22
    4c78:       20 f0           brcs    .+8             ; 0x4c82 <test+0x14>
    4c7a:       c9 01           movw    r24, r18
    4c7c:       84 1b           sub     r24, r20
    4c7e:       95 0b           sbc     r25, r21
    4c80:       08 95           ret
    4c82:       c9 01           movw    r24, r18
    4c84:       84 1b           sub     r24, r20
    4c86:       95 0b           sbc     r25, r21
    4c88:       81 50           subi    r24, 0x01       ; 1
    4c8a:       9f 4f           sbci    r25, 0xFF       ; 255
    4c8c:       08 95           ret
Wth this commit it gets translated to:
00004c40 <test>:
    4c40:       28 2f           mov     r18, r24
    4c42:       30 e0           ldi     r19, 0x00       ; 0
    4c44:       46 2f           mov     r20, r22
    4c46:       50 e0           ldi     r21, 0x00       ; 0
    4c48:       86 17           cp      r24, r22
    4c4a:       20 f0           brcs    .+8             ; 0x4c54 <test+0x14>
    4c4c:       c9 01           movw    r24, r18
    4c4e:       84 1b           sub     r24, r20
    4c50:       95 0b           sbc     r25, r21
    4c52:       08 95           ret
    4c54:       c9 01           movw    r24, r18
    4c56:       84 1b           sub     r24, r20
    4c58:       95 0b           sbc     r25, r21
    4c5a:       93 95           inc     r25
    4c5c:       08 95           ret
There is not much performance improvement in this case, however at least with this
commit it functions correctly.

Note: The following commit will improve compiler output for the latter example.

* tmk_core/common: Improve code generation for TIMER_DIFF* macros

Because of integer promotion the compiler is having a hard time generating
efficient code to calculate TIMER_DIFF* macros in some situations.
In the below example, the return value is "int", and this is causing the
trouble.

Example C code:

int __attribute__ ((noinline)) test(uint8_t current_timer, uint8_t start_timer)
{
    return TIMER_DIFF_8(current_timer, start_timer);
}

BEFORE: (with -Os)

00004c40 <test>:
    4c40:       28 2f           mov     r18, r24
    4c42:       30 e0           ldi     r19, 0x00       ; 0
    4c44:       46 2f           mov     r20, r22
    4c46:       50 e0           ldi     r21, 0x00       ; 0
    4c48:       86 17           cp      r24, r22
    4c4a:       20 f0           brcs    .+8             ; 0x4c54 <test+0x14>
    4c4c:       c9 01           movw    r24, r18
    4c4e:       84 1b           sub     r24, r20
    4c50:       95 0b           sbc     r25, r21
    4c52:       08 95           ret
    4c54:       c9 01           movw    r24, r18
    4c56:       84 1b           sub     r24, r20
    4c58:       95 0b           sbc     r25, r21
    4c5a:       93 95           inc     r25
    4c5c:       08 95           ret

AFTER: (with -Os)

00004c40 <test>:
    4c40:       86 1b           sub     r24, r22
    4c42:       90 e0           ldi     r25, 0x00       ; 0
    4c44:       08 95           ret

Note: the example is showing -Os but improvements can be seen at all optimization levels,
including -O0. We never use -O0, but I tested it to make sure that no extra code is
generated in that case.OA

* quantum/debounce: Fix custom wrapping timers in eager_pr and eager_pk debounce algorithms

Please see the below simulated sequence of events:
Column A is the 16-bit value returned by read_timer();
Column B is the value returned by custom_wrap_timer_read();
Column C is the original code: (timer_read() % MAX_DEBOUNCE)

    A,     B,     C
65530,    19,    30
65531,    20,    31
65532,    21,    32
65533,    22,    33
65534,    23,    34
65535,    24,    35
    0     25,     0
    1,    26,     1
    2,    27,     2
    3,    28,     3
    4,    29,     4
    5,    30,     5

read_timer() wraps about every 1.09 seconds, and so debouncing might
fail at these times without this commit.

* quantum/debounce/eager_pr and eager_pk: modifications for code readability according to code review.

* quantum/debounce/eager_pr and eager_pk: modifications for code readability according to code review. (2)
---
 quantum/debounce/eager_pk.c | 12 +++++++++++-
 quantum/debounce/eager_pr.c | 12 +++++++++++-
 tmk_core/common/timer.h     |  5 ++++-
 3 files changed, 26 insertions(+), 3 deletions(-)
diff --git a/quantum/debounce/eager_pk.c b/quantum/debounce/eager_pk.c
index 76b978d059e8..46768922432d 100644
--- a/quantum/debounce/eager_pk.c
+++ b/quantum/debounce/eager_pk.c
@@ -44,6 +44,16 @@ static bool                matrix_need_update;
 #define DEBOUNCE_ELAPSED 251
 #define MAX_DEBOUNCE (DEBOUNCE_ELAPSED - 1)
 
+static uint8_t wrapping_timer_read(void) {
+    static uint16_t time = 0;
+    static uint8_t  last_result = 0;
+    uint16_t new_time = timer_read();
+    uint16_t diff = new_time - time;
+    time = new_time;
+    last_result = (last_result + diff) % (MAX_DEBOUNCE + 1);
+    return last_result;
+}
+
 void update_debounce_counters(uint8_t num_rows, uint8_t current_time);
 void transfer_matrix_values(matrix_row_t raw[], matrix_row_t cooked[], uint8_t num_rows, uint8_t current_time);
 
@@ -59,7 +69,7 @@ void debounce_init(uint8_t num_rows) {
 }
 
 void debounce(matrix_row_t raw[], matrix_row_t cooked[], uint8_t num_rows, bool changed) {
-    uint8_t current_time = timer_read() % MAX_DEBOUNCE;
+    uint8_t current_time = wrapping_timer_read();
     if (counters_need_update) {
         update_debounce_counters(num_rows, current_time);
     }
diff --git a/quantum/debounce/eager_pr.c b/quantum/debounce/eager_pr.c
index 173ad15ee92b..41843aedbc6c 100644
--- a/quantum/debounce/eager_pr.c
+++ b/quantum/debounce/eager_pr.c
@@ -36,6 +36,16 @@ static bool                counters_need_update;
 #define DEBOUNCE_ELAPSED 251
 #define MAX_DEBOUNCE (DEBOUNCE_ELAPSED - 1)
 
+static uint8_t wrapping_timer_read(void) {
+    static uint16_t time = 0;
+    static uint8_t  last_result = 0;
+    uint16_t new_time = timer_read();
+    uint16_t diff = new_time - time;
+    time = new_time;
+    last_result = (last_result + diff) % (MAX_DEBOUNCE + 1);
+    return last_result;
+}
+
 void update_debounce_counters(uint8_t num_rows, uint8_t current_time);
 void transfer_matrix_values(matrix_row_t raw[], matrix_row_t cooked[], uint8_t num_rows, uint8_t current_time);
 
@@ -48,7 +58,7 @@ void debounce_init(uint8_t num_rows) {
 }
 
 void debounce(matrix_row_t raw[], matrix_row_t cooked[], uint8_t num_rows, bool changed) {
-    uint8_t current_time  = timer_read() % MAX_DEBOUNCE;
+    uint8_t current_time  = wrapping_timer_read();
     bool    needed_update = counters_need_update;
     if (counters_need_update) {
         update_debounce_counters(num_rows, current_time);
diff --git a/tmk_core/common/timer.h b/tmk_core/common/timer.h
index bbaae109d0f3..117b41d1be07 100644
--- a/tmk_core/common/timer.h
+++ b/tmk_core/common/timer.h
@@ -25,7 +25,10 @@ along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #    include "avr/timer_avr.h"
 #endif
 
-#define TIMER_DIFF(a, b, max) ((a) >= (b) ? (a) - (b) : (max) - (b) + (a))
+#define TIMER_DIFF(a, b, max) ((max == UINT8_MAX) ? ((uint8_t)((a)-(b))) : ( \
+                               (max == UINT16_MAX) ? ((uint16_t)((a)-(b))) : ( \
+                               (max == UINT32_MAX) ? ((uint32_t)((a)-(b))) : ( \
+                               (a) >= (b) ? (a) - (b) : (max) + 1 - (b) + (a) ))))
 #define TIMER_DIFF_8(a, b) TIMER_DIFF(a, b, UINT8_MAX)
 #define TIMER_DIFF_16(a, b) TIMER_DIFF(a, b, UINT16_MAX)
 #define TIMER_DIFF_32(a, b) TIMER_DIFF(a, b, UINT32_MAX)