JIT fails to merge trivial blocks after jump threading #85547

jakobbotsch · 2023-04-28T22:45:21Z

[MethodImpl(MethodImplOptions.NoInlining)]
private static int Foo(bool cond)
{
    int x;
    if (cond)
    {
        x = 2;
    }
    else
    {
        x = 23;
    }

    int y;
    if (cond)
    {
        y = 2;
    }
    else
    {
        y = 23;
    }

    return x + y;
}

Output:

G_M7860_IG02:  ;; offset=0000H
       test     cl, cl
       je       SHORT G_M7860_IG04
                                                ;; size=4 bbWeight=1 PerfScore 1.25
G_M7860_IG03:  ;; offset=0004H
       mov      eax, 2
       jmp      SHORT G_M7860_IG05
                                                ;; size=7 bbWeight=0.50 PerfScore 1.12
G_M7860_IG04:  ;; offset=000BH
       mov      eax, 23
       jmp      SHORT G_M7860_IG06
                                                ;; size=7 bbWeight=1 PerfScore 2.25
G_M7860_IG05:  ;; offset=0012H
       mov      edx, 2
       jmp      SHORT G_M7860_IG07
                                                ;; size=7 bbWeight=0.50 PerfScore 1.12
G_M7860_IG06:  ;; offset=0019H
       mov      edx, 23
                                                ;; size=5 bbWeight=0.50 PerfScore 0.12
G_M7860_IG07:  ;; offset=001EH
       add      eax, edx
                                                ;; size=2 bbWeight=1 PerfScore 0.25
G_M7860_IG08:  ;; offset=0020H
       ret

The blocks resulting in IG03/IG05 and IG04/IG06 should have been merged at some point to avoid those unnecessary unconditional jumps.

ghost · 2023-04-28T22:45:27Z

Tagging subscribers to this area: @JulieLeeMSFT, @jakobbotsch
See info in area-owners.md if you want to be subscribed.

Issue Details

[MethodImpl(MethodImplOptions.NoInlining)]
private static int Foo(bool cond)
{
    int x;
    if (cond)
    {
        x = 2;
    }
    else
    {
        x = 23;
    }

    int y;
    if (cond)
    {
        y = 2;
    }
    else
    {
        y = 23;
    }

    return x + y;
}

Output:

G_M7860_IG02:  ;; offset=0000H
       test     cl, cl
       je       SHORT G_M7860_IG04
                                                ;; size=4 bbWeight=1 PerfScore 1.25
G_M7860_IG03:  ;; offset=0004H
       mov      eax, 2
       jmp      SHORT G_M7860_IG05
                                                ;; size=7 bbWeight=0.50 PerfScore 1.12
G_M7860_IG04:  ;; offset=000BH
       mov      eax, 23
       jmp      SHORT G_M7860_IG06
                                                ;; size=7 bbWeight=1 PerfScore 2.25
G_M7860_IG05:  ;; offset=0012H
       mov      edx, 2
       jmp      SHORT G_M7860_IG07
                                                ;; size=7 bbWeight=0.50 PerfScore 1.12
G_M7860_IG06:  ;; offset=0019H
       mov      edx, 23
                                                ;; size=5 bbWeight=0.50 PerfScore 0.12
G_M7860_IG07:  ;; offset=001EH
       add      eax, edx
                                                ;; size=2 bbWeight=1 PerfScore 0.25
G_M7860_IG08:  ;; offset=0020H
       ret

IG03/IG05 and IG04/IG06 should have been merged at some point to avoid those unnecessary unconditional jumps.

Author:	jakobbotsch
Assignees:	-
Labels:	`area-CodeGen-coreclr`
Milestone:	-

jl0pd · 2023-07-18T05:27:16Z

Not sure if this is same issue, but Jit fails to optimize jump in following case:

.method static int32 IndexOf(int32[], int32)
    {
        .locals init (int32)
        
    Condition:
        ldloc.0
        ldarg.0
        ldlen        
        conv.i4
        bge NotFound
        
    Body:
        ldarg.0
        ldloc.0
        ldelem.i4
        ldarg.1
        beq Found
        
    Increment:
        ldloc.0
        ldc.i4.1
        add
        stloc.0
        br Condition

    Found:
        ldloc.0
        br Ret
        
    NotFound:
        ldc.i4.m1

    Ret:
        ret
    }

Produces

Program.IndexOf(Int32[], Int32)
    L0000: push ebp
    L0001: mov ebp, esp
    L0003: push esi
    L0004: xor eax, eax
    L0006: mov esi, [ecx+4]
    L0009: cmp esi, eax
    L000b: jle short L0018
    L000d: cmp [ecx+eax*4+8], edx
    L0011: je short L0016
    L0013: inc eax
    L0014: jmp short L0006
    L0016: jmp short L001d
    L0018: mov eax, 0xffffffff
    L001d: pop esi
    L001e: pop ebp
    L001f: ret

Where L0011: je short L0016 can be simplified to L0011: je short L001d, which will remove L0016 and save 1 unconditional jump

AndyAyersMS · 2024-12-18T17:18:42Z

@jakobbotsch 's example is now a bit cleaner. One might consider pushing the add up (eg if we're at a join and all phi values for some local operations are known in each pred (but different) consider tail duplicating the operations.

; Method Program:<<Main>$>g__Foo|0_0(ubyte):int (FullOpts)
G_M49102_IG01:  ;; offset=0x0000
						;; size=0 bbWeight=1 PerfScore 0.00

G_M49102_IG02:  ;; offset=0x0000
       movzx    rax, cl
       test     eax, eax
       jne      SHORT G_M49102_IG04
						;; size=7 bbWeight=1 PerfScore 1.50

G_M49102_IG03:  ;; offset=0x0007
       mov      eax, 23
       mov      ecx, 23
       jmp      SHORT G_M49102_IG05
						;; size=12 bbWeight=0.50 PerfScore 1.25

G_M49102_IG04:  ;; offset=0x0013
       mov      eax, 2
       mov      ecx, 2
						;; size=10 bbWeight=0.50 PerfScore 0.25

G_M49102_IG05:  ;; offset=0x001D
       add      eax, ecx
						;; size=2 bbWeight=1 PerfScore 0.25

G_M49102_IG06:  ;; offset=0x001F
       ret      
						;; size=1 bbWeight=1 PerfScore 1.00
; Total bytes of code: 32

@jl0pd 's example is now

G_M11661_IG01:  ;; offset=0x0000
       push     ebp
       mov      ebp, esp
       push     esi
       xor      eax, eax
                                                ;; size=6 bbWeight=1 PerfScore 2.50
G_M11661_IG02:  ;; offset=0x0006
       mov      esi, dword ptr [ecx+0x04]
       jmp      SHORT G_M11661_IG04
       align    [0 bytes for IG03]
                                                ;; size=5 bbWeight=1 PerfScore 4.00
G_M11661_IG03:  ;; offset=0x000B
       inc      eax
                                                ;; size=1 bbWeight=4 PerfScore 1.00
G_M11661_IG04:  ;; offset=0x000C
       cmp      esi, eax
       jle      SHORT G_M11661_IG07
                                                ;; size=4 bbWeight=8 PerfScore 10.00
G_M11661_IG05:  ;; offset=0x0010
       cmp      dword ptr [ecx+4*eax+0x08], edx
       jne      SHORT G_M11661_IG03
                                                ;; size=6 bbWeight=4 PerfScore 16.00
G_M11661_IG06:  ;; offset=0x0016
       pop      esi
       pop      ebp
       ret
                                                ;; size=3 bbWeight=1 PerfScore 2.00
G_M11661_IG07:  ;; offset=0x0019
       mov      eax, -1
       jmp      SHORT G_M11661_IG06
                                                ;; size=7 bbWeight=0.50 PerfScore 1.12

; Total bytes of code 32, prolog size 6, PerfScore 36.62, instruction count 17, allocated bytes for code 32 (MethodHash=3333d272) for method X:IndexOf(int[],int):int (FullOpts)

where there are no in-loop unconditional branches. Here one might like to see the loop be rotated and perhaps strength reduce the array access / down-count?

I think we can close this one.

dotnet-issue-labeler bot added the area-CodeGen-coreclr CLR JIT compiler in src/coreclr/src/jit and related components such as SuperPMI label Apr 28, 2023

ghost added the untriaged New issue has not been triaged by the area owner label Apr 28, 2023

MichalPetryka mentioned this issue Apr 29, 2023

Set lvSingleDef for non TYP_REF locals #85398

Merged

JulieLeeMSFT removed the untriaged New issue has not been triaged by the area owner label May 1, 2023

JulieLeeMSFT added this to the Future milestone May 1, 2023

AndyAyersMS closed this as completed Dec 18, 2024

github-actions bot locked and limited conversation to collaborators Jan 18, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

JIT fails to merge trivial blocks after jump threading #85547

JIT fails to merge trivial blocks after jump threading #85547

jakobbotsch commented Apr 28, 2023 •

edited

Loading

ghost commented Apr 28, 2023

jl0pd commented Jul 18, 2023

AndyAyersMS commented Dec 18, 2024

JIT fails to merge trivial blocks after jump threading #85547

JIT fails to merge trivial blocks after jump threading #85547

Comments

jakobbotsch commented Apr 28, 2023 • edited Loading

ghost commented Apr 28, 2023

jl0pd commented Jul 18, 2023

AndyAyersMS commented Dec 18, 2024

jakobbotsch commented Apr 28, 2023 •

edited

Loading