From 5d501096ca1fae74f910411cfeb0491d94c635b7 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 9 Oct 2020 10:57:37 -0700
Subject: [PATCH 01/17] [lldb] Update docs with new buildbot URLs

Buildbot got upgraded and now the (LLDB) builders have different URLs.
---
 lldb/docs/resources/bots.rst | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/lldb/docs/resources/bots.rst b/lldb/docs/resources/bots.rst
index d9ddcde41abcc2..926259bd92beef 100644
--- a/lldb/docs/resources/bots.rst
+++ b/lldb/docs/resources/bots.rst
@@ -7,11 +7,15 @@ Buildbot
 LLVM Buildbot is the place where volunteers provide build machines. Everyone can
 `add a buildbot for LLDB <https://llvm.org/docs/HowToAddABuilder.html>`_.
 
-* `lldb-x64-windows-ninja <http://lab.llvm.org:8011/builders/lldb-x64-windows-ninja>`_
-* `lldb-x86_64-debian <http://lab.llvm.org:8011/builders/lldb-x86_64-debian>`_
-* `lldb-aarch64-ubuntu <http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/>`_
-* `lldb-arm-ubuntu <http://lab.llvm.org:8011/builders/lldb-arm-ubuntu>`_
-* `lldb-x86_64-fedora <http://lab.llvm.org:8014/builders/lldb-x86_64-fedora>`_
+* `lldb-x64-windows-ninja <http://lab.llvm.org:8011/#/builders/83>`_
+* `lldb-x86_64-debian <http://lab.llvm.org:8011/#/builders/68>`_
+* `lldb-aarch64-ubuntu <http://lab.llvm.org:8011/#/builders/96>`_
+* `lldb-arm-ubuntu <http://lab.llvm.org:8011/#/builders/17>`_
+* `lldb-x86_64-fedora <http://lab.llvm.org:8011/#/builders/22>`_
+
+An overview of all LLDB builders can be found here:
+
+`http://lab.llvm.org:8011/#/builders?tags=lldb <http://lab.llvm.org:8011/#/builders?tags=lldb>`_
 
 GreenDragon
 -----------

From 191fbda5d2a5ceb4b5af894d987a69537b8431b4 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 9 Oct 2020 19:19:35 +0100
Subject: [PATCH 02/17] [ARM][MIPS] Add funnel shift test coverage

Based on offline discussions regarding D89139 and D88783 - we want to make sure targets aren't doing anything particularly dumb

Tests copied from aarch64 which has a mixture of general, legalization and special case tests
---
 llvm/test/CodeGen/ARM/funnel-shift-rot.ll  | 367 +++++++++++++
 llvm/test/CodeGen/ARM/funnel-shift.ll      | 398 ++++++++++++++
 llvm/test/CodeGen/Mips/funnel-shift-rot.ll | 415 ++++++++++++++
 llvm/test/CodeGen/Mips/funnel-shift.ll     | 601 +++++++++++++++++++++
 4 files changed, 1781 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/funnel-shift-rot.ll
 create mode 100644 llvm/test/CodeGen/ARM/funnel-shift.ll
 create mode 100644 llvm/test/CodeGen/Mips/funnel-shift-rot.ll
 create mode 100644 llvm/test/CodeGen/Mips/funnel-shift.ll

diff --git a/llvm/test/CodeGen/ARM/funnel-shift-rot.ll b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
new file mode 100644
index 00000000000000..55157875d355f9
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/funnel-shift-rot.ll
@@ -0,0 +1,367 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+; When first 2 operands match, it's a rotate.
+
+define i8 @rotl_i8_const_shift(i8 %x) {
+; CHECK-LABEL: rotl_i8_const_shift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    uxtb r1, r0
+; CHECK-NEXT:    lsl r0, r0, #3
+; CHECK-NEXT:    orr r0, r0, r1, lsr #5
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
+  ret i8 %f
+}
+
+define i64 @rotl_i64_const_shift(i64 %x) {
+; CHECK-LABEL: rotl_i64_const_shift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsl r2, r0, #3
+; CHECK-NEXT:    orr r2, r2, r1, lsr #29
+; CHECK-NEXT:    lsl r1, r1, #3
+; CHECK-NEXT:    orr r1, r1, r0, lsr #29
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    bx lr
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
+  ret i64 %f
+}
+
+; When first 2 operands match, it's a rotate (by variable amount).
+
+define i16 @rotl_i16(i16 %x, i16 %z) {
+; CHECK-LABEL: rotl_i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #15
+; CHECK-NEXT:    rsb r1, r1, #0
+; CHECK-NEXT:    and r1, r1, #15
+; CHECK-NEXT:    lsl r2, r0, r2
+; CHECK-NEXT:    uxth r0, r0
+; CHECK-NEXT:    orr r0, r2, r0, lsr r1
+; CHECK-NEXT:    bx lr
+  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
+  ret i16 %f
+}
+
+define i32 @rotl_i32(i32 %x, i32 %z) {
+; CHECK-LABEL: rotl_i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    rsb r1, r1, #0
+; CHECK-NEXT:    ror r0, r0, r1
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
+  ret i32 %f
+}
+
+define i64 @rotl_i64(i64 %x, i64 %z) {
+; SCALAR-LABEL: rotl_i64:
+; SCALAR:       @ %bb.0:
+; SCALAR-NEXT:    .save {r4, r5, r11, lr}
+; SCALAR-NEXT:    push {r4, r5, r11, lr}
+; SCALAR-NEXT:    rsb r3, r2, #0
+; SCALAR-NEXT:    and r4, r2, #63
+; SCALAR-NEXT:    and lr, r3, #63
+; SCALAR-NEXT:    rsb r3, lr, #32
+; SCALAR-NEXT:    lsl r2, r0, r4
+; SCALAR-NEXT:    lsr r12, r0, lr
+; SCALAR-NEXT:    orr r3, r12, r1, lsl r3
+; SCALAR-NEXT:    subs r12, lr, #32
+; SCALAR-NEXT:    lsrpl r3, r1, r12
+; SCALAR-NEXT:    subs r5, r4, #32
+; SCALAR-NEXT:    movwpl r2, #0
+; SCALAR-NEXT:    cmp r5, #0
+; SCALAR-NEXT:    orr r2, r2, r3
+; SCALAR-NEXT:    rsb r3, r4, #32
+; SCALAR-NEXT:    lsr r3, r0, r3
+; SCALAR-NEXT:    orr r3, r3, r1, lsl r4
+; SCALAR-NEXT:    lslpl r3, r0, r5
+; SCALAR-NEXT:    lsr r0, r1, lr
+; SCALAR-NEXT:    cmp r12, #0
+; SCALAR-NEXT:    movwpl r0, #0
+; SCALAR-NEXT:    orr r1, r3, r0
+; SCALAR-NEXT:    mov r0, r2
+; SCALAR-NEXT:    pop {r4, r5, r11, pc}
+;
+; NEON-LABEL: rotl_i64:
+; NEON:       @ %bb.0:
+; NEON-NEXT:    .save {r4, r5, r11, lr}
+; NEON-NEXT:    push {r4, r5, r11, lr}
+; NEON-NEXT:    and r12, r2, #63
+; NEON-NEXT:    rsb r2, r2, #0
+; NEON-NEXT:    rsb r3, r12, #32
+; NEON-NEXT:    and r4, r2, #63
+; NEON-NEXT:    subs lr, r12, #32
+; NEON-NEXT:    lsr r3, r0, r3
+; NEON-NEXT:    lsr r2, r1, r4
+; NEON-NEXT:    orr r3, r3, r1, lsl r12
+; NEON-NEXT:    lslpl r3, r0, lr
+; NEON-NEXT:    subs r5, r4, #32
+; NEON-NEXT:    movwpl r2, #0
+; NEON-NEXT:    cmp r5, #0
+; NEON-NEXT:    orr r2, r3, r2
+; NEON-NEXT:    lsr r3, r0, r4
+; NEON-NEXT:    rsb r4, r4, #32
+; NEON-NEXT:    lsl r0, r0, r12
+; NEON-NEXT:    orr r3, r3, r1, lsl r4
+; NEON-NEXT:    lsrpl r3, r1, r5
+; NEON-NEXT:    cmp lr, #0
+; NEON-NEXT:    movwpl r0, #0
+; NEON-NEXT:    mov r1, r2
+; NEON-NEXT:    orr r0, r0, r3
+; NEON-NEXT:    pop {r4, r5, r11, pc}
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
+  ret i64 %f
+}
+
+; Vector rotate.
+
+define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
+; SCALAR-LABEL: rotl_v4i32:
+; SCALAR:       @ %bb.0:
+; SCALAR-NEXT:    ldr r12, [sp]
+; SCALAR-NEXT:    rsb r12, r12, #0
+; SCALAR-NEXT:    ror r0, r0, r12
+; SCALAR-NEXT:    ldr r12, [sp, #4]
+; SCALAR-NEXT:    rsb r12, r12, #0
+; SCALAR-NEXT:    ror r1, r1, r12
+; SCALAR-NEXT:    ldr r12, [sp, #8]
+; SCALAR-NEXT:    rsb r12, r12, #0
+; SCALAR-NEXT:    ror r2, r2, r12
+; SCALAR-NEXT:    ldr r12, [sp, #12]
+; SCALAR-NEXT:    rsb r12, r12, #0
+; SCALAR-NEXT:    ror r3, r3, r12
+; SCALAR-NEXT:    bx lr
+;
+; NEON-LABEL: rotl_v4i32:
+; NEON:       @ %bb.0:
+; NEON-NEXT:    mov r12, sp
+; NEON-NEXT:    vld1.64 {d16, d17}, [r12]
+; NEON-NEXT:    vmov.i32 q10, #0x1f
+; NEON-NEXT:    vneg.s32 q9, q8
+; NEON-NEXT:    vmov d23, r2, r3
+; NEON-NEXT:    vand q9, q9, q10
+; NEON-NEXT:    vand q8, q8, q10
+; NEON-NEXT:    vmov d22, r0, r1
+; NEON-NEXT:    vneg.s32 q9, q9
+; NEON-NEXT:    vshl.u32 q8, q11, q8
+; NEON-NEXT:    vshl.u32 q9, q11, q9
+; NEON-NEXT:    vorr q8, q8, q9
+; NEON-NEXT:    vmov r0, r1, d16
+; NEON-NEXT:    vmov r2, r3, d17
+; NEON-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
+  ret <4 x i32> %f
+}
+
+; Vector rotate by constant splat amount.
+
+define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
+; SCALAR-LABEL: rotl_v4i32_rotl_const_shift:
+; SCALAR:       @ %bb.0:
+; SCALAR-NEXT:    ror r0, r0, #29
+; SCALAR-NEXT:    ror r1, r1, #29
+; SCALAR-NEXT:    ror r2, r2, #29
+; SCALAR-NEXT:    ror r3, r3, #29
+; SCALAR-NEXT:    bx lr
+;
+; NEON-LABEL: rotl_v4i32_rotl_const_shift:
+; NEON:       @ %bb.0:
+; NEON-NEXT:    vmov d17, r2, r3
+; NEON-NEXT:    vmov d16, r0, r1
+; NEON-NEXT:    vshr.u32 q9, q8, #29
+; NEON-NEXT:    vshl.i32 q8, q8, #3
+; NEON-NEXT:    vorr q8, q8, q9
+; NEON-NEXT:    vmov r0, r1, d16
+; NEON-NEXT:    vmov r2, r3, d17
+; NEON-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %f
+}
+
+; Repeat everything for funnel shift right.
+
+; When first 2 operands match, it's a rotate.
+
+define i8 @rotr_i8_const_shift(i8 %x) {
+; CHECK-LABEL: rotr_i8_const_shift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    uxtb r1, r0
+; CHECK-NEXT:    lsr r1, r1, #3
+; CHECK-NEXT:    orr r0, r1, r0, lsl #5
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
+  ret i8 %f
+}
+
+define i32 @rotr_i32_const_shift(i32 %x) {
+; CHECK-LABEL: rotr_i32_const_shift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    ror r0, r0, #3
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
+  ret i32 %f
+}
+
+; When first 2 operands match, it's a rotate (by variable amount).
+
+define i16 @rotr_i16(i16 %x, i16 %z) {
+; CHECK-LABEL: rotr_i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r1, #15
+; CHECK-NEXT:    rsb r1, r1, #0
+; CHECK-NEXT:    and r1, r1, #15
+; CHECK-NEXT:    uxth r3, r0
+; CHECK-NEXT:    lsr r2, r3, r2
+; CHECK-NEXT:    orr r0, r2, r0, lsl r1
+; CHECK-NEXT:    bx lr
+  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
+  ret i16 %f
+}
+
+define i32 @rotr_i32(i32 %x, i32 %z) {
+; CHECK-LABEL: rotr_i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    ror r0, r0, r1
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
+  ret i32 %f
+}
+
+define i64 @rotr_i64(i64 %x, i64 %z) {
+; CHECK-LABEL: rotr_i64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r11, lr}
+; CHECK-NEXT:    and lr, r2, #63
+; CHECK-NEXT:    rsb r2, r2, #0
+; CHECK-NEXT:    rsb r3, lr, #32
+; CHECK-NEXT:    and r4, r2, #63
+; CHECK-NEXT:    lsr r12, r0, lr
+; CHECK-NEXT:    orr r3, r12, r1, lsl r3
+; CHECK-NEXT:    subs r12, lr, #32
+; CHECK-NEXT:    lsl r2, r0, r4
+; CHECK-NEXT:    lsrpl r3, r1, r12
+; CHECK-NEXT:    subs r5, r4, #32
+; CHECK-NEXT:    movwpl r2, #0
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    orr r2, r3, r2
+; CHECK-NEXT:    rsb r3, r4, #32
+; CHECK-NEXT:    lsr r3, r0, r3
+; CHECK-NEXT:    orr r3, r3, r1, lsl r4
+; CHECK-NEXT:    lslpl r3, r0, r5
+; CHECK-NEXT:    lsr r0, r1, lr
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    movwpl r0, #0
+; CHECK-NEXT:    orr r1, r0, r3
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    pop {r4, r5, r11, pc}
+  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
+  ret i64 %f
+}
+
+; Vector rotate.
+
+define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
+; SCALAR-LABEL: rotr_v4i32:
+; SCALAR:       @ %bb.0:
+; SCALAR-NEXT:    ldr r12, [sp]
+; SCALAR-NEXT:    ror r0, r0, r12
+; SCALAR-NEXT:    ldr r12, [sp, #4]
+; SCALAR-NEXT:    ror r1, r1, r12
+; SCALAR-NEXT:    ldr r12, [sp, #8]
+; SCALAR-NEXT:    ror r2, r2, r12
+; SCALAR-NEXT:    ldr r12, [sp, #12]
+; SCALAR-NEXT:    ror r3, r3, r12
+; SCALAR-NEXT:    bx lr
+;
+; NEON-LABEL: rotr_v4i32:
+; NEON:       @ %bb.0:
+; NEON-NEXT:    mov r12, sp
+; NEON-NEXT:    vld1.64 {d16, d17}, [r12]
+; NEON-NEXT:    vmov.i32 q9, #0x1f
+; NEON-NEXT:    vneg.s32 q10, q8
+; NEON-NEXT:    vand q8, q8, q9
+; NEON-NEXT:    vmov d23, r2, r3
+; NEON-NEXT:    vand q9, q10, q9
+; NEON-NEXT:    vneg.s32 q8, q8
+; NEON-NEXT:    vmov d22, r0, r1
+; NEON-NEXT:    vshl.u32 q9, q11, q9
+; NEON-NEXT:    vshl.u32 q8, q11, q8
+; NEON-NEXT:    vorr q8, q8, q9
+; NEON-NEXT:    vmov r0, r1, d16
+; NEON-NEXT:    vmov r2, r3, d17
+; NEON-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
+  ret <4 x i32> %f
+}
+
+; Vector rotate by constant splat amount.
+
+define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
+; SCALAR-LABEL: rotr_v4i32_const_shift:
+; SCALAR:       @ %bb.0:
+; SCALAR-NEXT:    ror r0, r0, #3
+; SCALAR-NEXT:    ror r1, r1, #3
+; SCALAR-NEXT:    ror r2, r2, #3
+; SCALAR-NEXT:    ror r3, r3, #3
+; SCALAR-NEXT:    bx lr
+;
+; NEON-LABEL: rotr_v4i32_const_shift:
+; NEON:       @ %bb.0:
+; NEON-NEXT:    vmov d17, r2, r3
+; NEON-NEXT:    vmov d16, r0, r1
+; NEON-NEXT:    vshl.i32 q9, q8, #29
+; NEON-NEXT:    vshr.u32 q8, q8, #3
+; NEON-NEXT:    vorr q8, q8, q9
+; NEON-NEXT:    vmov r0, r1, d16
+; NEON-NEXT:    vmov r2, r3, d17
+; NEON-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %f
+}
+
+define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
+; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
+  ret i32 %f
+}
+
+define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
+; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
+  ret i32 %f
+}
+
+define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
+; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
+define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
+; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
diff --git a/llvm/test/CodeGen/ARM/funnel-shift.ll b/llvm/test/CodeGen/ARM/funnel-shift.ll
new file mode 100644
index 00000000000000..a8b6aff767a748
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/funnel-shift.ll
@@ -0,0 +1,398 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 | FileCheck %s --check-prefixes=CHECK,SCALAR
+; RUN: llc < %s -mtriple=arm-eabi -mattr=+v6t2 -mattr=+neon | FileCheck %s --check-prefixes=CHECK,NEON
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+; General case - all operands can be variables.
+
+define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
+; CHECK-LABEL: fshl_i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    and r2, r2, #15
+; CHECK-NEXT:    mov r3, #31
+; CHECK-NEXT:    lsl r1, r1, #16
+; CHECK-NEXT:    bic r3, r3, r2
+; CHECK-NEXT:    lsl r0, r0, r2
+; CHECK-NEXT:    lsr r1, r1, #1
+; CHECK-NEXT:    orr r0, r0, r1, lsr r3
+; CHECK-NEXT:    bx lr
+  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
+  ret i16 %f
+}
+
+define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: fshl_i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r3, #31
+; CHECK-NEXT:    lsr r1, r1, #1
+; CHECK-NEXT:    bic r3, r3, r2
+; CHECK-NEXT:    and r2, r2, #31
+; CHECK-NEXT:    lsl r0, r0, r2
+; CHECK-NEXT:    orr r0, r0, r1, lsr r3
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
+  ret i32 %f
+}
+
+; Verify that weird types are minimally supported.
+declare i37 @llvm.fshl.i37(i37, i37, i37)
+define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
+; CHECK-LABEL: fshl_i37:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, lr}
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    ldr r0, [sp, #24]
+; CHECK-NEXT:    mov r6, r3
+; CHECK-NEXT:    ldr r1, [sp, #28]
+; CHECK-NEXT:    mov r7, r2
+; CHECK-NEXT:    mov r2, #37
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    bl __aeabi_uldivmod
+; CHECK-NEXT:    mov r0, #63
+; CHECK-NEXT:    bic r1, r0, r2
+; CHECK-NEXT:    lsl r0, r6, #27
+; CHECK-NEXT:    lsl r3, r7, #27
+; CHECK-NEXT:    orr r0, r0, r7, lsr #5
+; CHECK-NEXT:    and r2, r2, #63
+; CHECK-NEXT:    lsrs r7, r0, #1
+; CHECK-NEXT:    rrx r0, r3
+; CHECK-NEXT:    rsb r3, r1, #32
+; CHECK-NEXT:    lsr r0, r0, r1
+; CHECK-NEXT:    lsl r6, r4, r2
+; CHECK-NEXT:    orr r0, r0, r7, lsl r3
+; CHECK-NEXT:    subs r3, r1, #32
+; CHECK-NEXT:    lsr r1, r7, r1
+; CHECK-NEXT:    lsrpl r0, r7, r3
+; CHECK-NEXT:    subs r5, r2, #32
+; CHECK-NEXT:    movwpl r6, #0
+; CHECK-NEXT:    orr r0, r6, r0
+; CHECK-NEXT:    rsb r6, r2, #32
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    lsr r6, r4, r6
+; CHECK-NEXT:    orr r2, r6, r8, lsl r2
+; CHECK-NEXT:    lslpl r2, r4, r5
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    movwpl r1, #0
+; CHECK-NEXT:    orr r1, r2, r1
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, pc}
+  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
+  ret i37 %f
+}
+
+; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
+
+declare i7 @llvm.fshl.i7(i7, i7, i7)
+define i7 @fshl_i7_const_fold() {
+; CHECK-LABEL: fshl_i7_const_fold:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #67
+; CHECK-NEXT:    bx lr
+  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
+  ret i7 %f
+}
+
+define i8 @fshl_i8_const_fold_overshift_1() {
+; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #128
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
+  ret i8 %f
+}
+
+define i8 @fshl_i8_const_fold_overshift_2() {
+; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #120
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
+  ret i8 %f
+}
+
+define i8 @fshl_i8_const_fold_overshift_3() {
+; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
+  ret i8 %f
+}
+
+; With constant shift amount, this is 'extr'.
+
+define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_const_shift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsl r0, r0, #9
+; CHECK-NEXT:    orr r0, r0, r1, lsr #23
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
+  ret i32 %f
+}
+
+; Check modulo math on shift amount.
+
+define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_const_overshift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsl r0, r0, #9
+; CHECK-NEXT:    orr r0, r0, r1, lsr #23
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
+  ret i32 %f
+}
+
+; 64-bit should also work.
+
+define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
+; CHECK-LABEL: fshl_i64_const_overshift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsr r1, r2, #23
+; CHECK-NEXT:    orr r2, r1, r3, lsl #9
+; CHECK-NEXT:    lsl r0, r0, #9
+; CHECK-NEXT:    orr r1, r0, r3, lsr #23
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    bx lr
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
+  ret i64 %f
+}
+
+; This should work without any node-specific logic.
+
+define i8 @fshl_i8_const_fold() {
+; CHECK-LABEL: fshl_i8_const_fold:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #128
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
+  ret i8 %f
+}
+
+; Repeat everything for funnel shift right.
+
+; General case - all operands can be variables.
+
+define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
+; CHECK-LABEL: fshr_i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r3, #1
+; CHECK-NEXT:    lsl r0, r0, #1
+; CHECK-NEXT:    bfi r2, r3, #4, #28
+; CHECK-NEXT:    mov r3, #31
+; CHECK-NEXT:    bic r3, r3, r2
+; CHECK-NEXT:    and r2, r2, #31
+; CHECK-NEXT:    lsl r1, r1, #16
+; CHECK-NEXT:    lsl r0, r0, r3
+; CHECK-NEXT:    orr r0, r0, r1, lsr r2
+; CHECK-NEXT:    bx lr
+  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
+  ret i16 %f
+}
+
+define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: fshr_i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r3, #31
+; CHECK-NEXT:    lsl r0, r0, #1
+; CHECK-NEXT:    bic r3, r3, r2
+; CHECK-NEXT:    and r2, r2, #31
+; CHECK-NEXT:    lsl r0, r0, r3
+; CHECK-NEXT:    orr r0, r0, r1, lsr r2
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
+  ret i32 %f
+}
+
+; Verify that weird types are minimally supported.
+declare i37 @llvm.fshr.i37(i37, i37, i37)
+define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
+; CHECK-LABEL: fshr_i37:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r11, lr}
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    ldr r0, [sp, #32]
+; CHECK-NEXT:    mov r6, r3
+; CHECK-NEXT:    ldr r1, [sp, #36]
+; CHECK-NEXT:    mov r7, r2
+; CHECK-NEXT:    mov r2, #37
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    bl __aeabi_uldivmod
+; CHECK-NEXT:    add r0, r2, #27
+; CHECK-NEXT:    lsl r6, r6, #27
+; CHECK-NEXT:    and r1, r0, #63
+; CHECK-NEXT:    lsl r2, r7, #27
+; CHECK-NEXT:    orr r7, r6, r7, lsr #5
+; CHECK-NEXT:    mov r6, #63
+; CHECK-NEXT:    rsb r3, r1, #32
+; CHECK-NEXT:    lsr r2, r2, r1
+; CHECK-NEXT:    subs r12, r1, #32
+; CHECK-NEXT:    bic r6, r6, r0
+; CHECK-NEXT:    orr r2, r2, r7, lsl r3
+; CHECK-NEXT:    lsl r5, r9, #1
+; CHECK-NEXT:    lsrpl r2, r7, r12
+; CHECK-NEXT:    lsl r0, r5, r6
+; CHECK-NEXT:    subs r4, r6, #32
+; CHECK-NEXT:    lsl r3, r8, #1
+; CHECK-NEXT:    movwpl r0, #0
+; CHECK-NEXT:    orr r3, r3, r9, lsr #31
+; CHECK-NEXT:    orr r0, r0, r2
+; CHECK-NEXT:    rsb r2, r6, #32
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    lsr r1, r7, r1
+; CHECK-NEXT:    lsr r2, r5, r2
+; CHECK-NEXT:    orr r2, r2, r3, lsl r6
+; CHECK-NEXT:    lslpl r2, r5, r4
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    movwpl r1, #0
+; CHECK-NEXT:    orr r1, r2, r1
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r11, pc}
+  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
+  ret i37 %f
+}
+
+; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
+
+declare i7 @llvm.fshr.i7(i7, i7, i7)
+define i7 @fshr_i7_const_fold() {
+; CHECK-LABEL: fshr_i7_const_fold:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #31
+; CHECK-NEXT:    bx lr
+  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
+  ret i7 %f
+}
+
+define i8 @fshr_i8_const_fold_overshift_1() {
+; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #254
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
+  ret i8 %f
+}
+
+define i8 @fshr_i8_const_fold_overshift_2() {
+; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #225
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
+  ret i8 %f
+}
+
+define i8 @fshr_i8_const_fold_overshift_3() {
+; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #255
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
+  ret i8 %f
+}
+
+; With constant shift amount, this is 'extr'.
+
+define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_const_shift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsl r0, r0, #23
+; CHECK-NEXT:    orr r0, r0, r1, lsr #9
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
+  ret i32 %f
+}
+
+; Check modulo math on shift amount. 41-32=9.
+
+define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_const_overshift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsl r0, r0, #23
+; CHECK-NEXT:    orr r0, r0, r1, lsr #9
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
+  ret i32 %f
+}
+
+; 64-bit should also work. 105-64 = 41.
+
+define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
+; CHECK-LABEL: fshr_i64_const_overshift:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    lsl r2, r0, #23
+; CHECK-NEXT:    lsl r1, r1, #23
+; CHECK-NEXT:    orr r2, r2, r3, lsr #9
+; CHECK-NEXT:    orr r1, r1, r0, lsr #9
+; CHECK-NEXT:    mov r0, r2
+; CHECK-NEXT:    bx lr
+  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
+  ret i64 %f
+}
+
+; This should work without any node-specific logic.
+
+define i8 @fshr_i8_const_fold() {
+; CHECK-LABEL: fshr_i8_const_fold:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, #254
+; CHECK-NEXT:    bx lr
+  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
+  ret i8 %f
+}
+
+define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
+  ret i32 %f
+}
+
+define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    bx lr
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
+  ret i32 %f
+}
+
+define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
+define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
+; SCALAR-LABEL: fshr_v4i32_shift_by_bitwidth:
+; SCALAR:       @ %bb.0:
+; SCALAR-NEXT:    ldm sp, {r0, r1, r2, r3}
+; SCALAR-NEXT:    bx lr
+;
+; NEON-LABEL: fshr_v4i32_shift_by_bitwidth:
+; NEON:       @ %bb.0:
+; NEON-NEXT:    mov r0, sp
+; NEON-NEXT:    vld1.64 {d16, d17}, [r0]
+; NEON-NEXT:    vmov r0, r1, d16
+; NEON-NEXT:    vmov r2, r3, d17
+; NEON-NEXT:    bx lr
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
diff --git a/llvm/test/CodeGen/Mips/funnel-shift-rot.ll b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll
new file mode 100644
index 00000000000000..49532f246838ac
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/funnel-shift-rot.ll
@@ -0,0 +1,415 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips-linux-gnu -march=mips -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+; When first 2 operands match, it's a rotate.
+
+define i8 @rotl_i8_const_shift(i8 %x) {
+; CHECK-LABEL: rotl_i8_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sll $1, $4, 3
+; CHECK-NEXT:    andi $2, $4, 224
+; CHECK-NEXT:    srl $2, $2, 5
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $1, $2
+  %f = call i8 @llvm.fshl.i8(i8 %x, i8 %x, i8 3)
+  ret i8 %f
+}
+
+define i64 @rotl_i64_const_shift(i64 %x) {
+; CHECK-LABEL: rotl_i64_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srl $1, $5, 29
+; CHECK-NEXT:    sll $2, $4, 3
+; CHECK-NEXT:    or $2, $2, $1
+; CHECK-NEXT:    srl $1, $4, 29
+; CHECK-NEXT:    sll $3, $5, 3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $3, $3, $1
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 3)
+  ret i64 %f
+}
+
+; When first 2 operands match, it's a rotate (by variable amount).
+
+define i16 @rotl_i16(i16 %x, i16 %z) {
+; CHECK-LABEL: rotl_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $5, 15
+; CHECK-NEXT:    sllv $1, $4, $1
+; CHECK-NEXT:    negu $2, $5
+; CHECK-NEXT:    andi $2, $2, 15
+; CHECK-NEXT:    andi $3, $4, 65535
+; CHECK-NEXT:    srlv $2, $3, $2
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $1, $2
+  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z)
+  ret i16 %f
+}
+
+define i32 @rotl_i32(i32 %x, i32 %z) {
+; CHECK-LABEL: rotl_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $5, 31
+; CHECK-NEXT:    sllv $1, $4, $1
+; CHECK-NEXT:    negu $2, $5
+; CHECK-NEXT:    andi $2, $2, 31
+; CHECK-NEXT:    srlv $2, $4, $2
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $1, $2
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
+  ret i32 %f
+}
+
+define i64 @rotl_i64(i64 %x, i64 %z) {
+; CHECK-BE-LABEL: rotl_i64:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    negu $1, $7
+; CHECK-BE-NEXT:    andi $3, $1, 63
+; CHECK-BE-NEXT:    srlv $6, $4, $3
+; CHECK-BE-NEXT:    andi $1, $1, 32
+; CHECK-BE-NEXT:    andi $2, $7, 63
+; CHECK-BE-NEXT:    move $8, $6
+; CHECK-BE-NEXT:    movn $8, $zero, $1
+; CHECK-BE-NEXT:    sllv $9, $4, $2
+; CHECK-BE-NEXT:    srl $10, $5, 1
+; CHECK-BE-NEXT:    not $11, $2
+; CHECK-BE-NEXT:    srlv $10, $10, $11
+; CHECK-BE-NEXT:    or $9, $9, $10
+; CHECK-BE-NEXT:    sllv $10, $5, $2
+; CHECK-BE-NEXT:    andi $7, $7, 32
+; CHECK-BE-NEXT:    movn $9, $10, $7
+; CHECK-BE-NEXT:    or $2, $9, $8
+; CHECK-BE-NEXT:    srlv $5, $5, $3
+; CHECK-BE-NEXT:    not $3, $3
+; CHECK-BE-NEXT:    sll $4, $4, 1
+; CHECK-BE-NEXT:    sllv $3, $4, $3
+; CHECK-BE-NEXT:    or $3, $3, $5
+; CHECK-BE-NEXT:    movn $3, $6, $1
+; CHECK-BE-NEXT:    movn $10, $zero, $7
+; CHECK-BE-NEXT:    jr $ra
+; CHECK-BE-NEXT:    or $3, $10, $3
+;
+; CHECK-LE-LABEL: rotl_i64:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    negu $1, $6
+; CHECK-LE-NEXT:    andi $2, $1, 63
+; CHECK-LE-NEXT:    srlv $7, $5, $2
+; CHECK-LE-NEXT:    andi $1, $1, 32
+; CHECK-LE-NEXT:    andi $3, $6, 63
+; CHECK-LE-NEXT:    move $8, $7
+; CHECK-LE-NEXT:    movn $8, $zero, $1
+; CHECK-LE-NEXT:    sllv $9, $5, $3
+; CHECK-LE-NEXT:    srl $10, $4, 1
+; CHECK-LE-NEXT:    not $11, $3
+; CHECK-LE-NEXT:    srlv $10, $10, $11
+; CHECK-LE-NEXT:    or $9, $9, $10
+; CHECK-LE-NEXT:    sllv $10, $4, $3
+; CHECK-LE-NEXT:    andi $6, $6, 32
+; CHECK-LE-NEXT:    movn $9, $10, $6
+; CHECK-LE-NEXT:    or $3, $9, $8
+; CHECK-LE-NEXT:    srlv $4, $4, $2
+; CHECK-LE-NEXT:    not $2, $2
+; CHECK-LE-NEXT:    sll $5, $5, 1
+; CHECK-LE-NEXT:    sllv $2, $5, $2
+; CHECK-LE-NEXT:    or $2, $2, $4
+; CHECK-LE-NEXT:    movn $2, $7, $1
+; CHECK-LE-NEXT:    movn $10, $zero, $6
+; CHECK-LE-NEXT:    jr $ra
+; CHECK-LE-NEXT:    or $2, $10, $2
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
+  ret i64 %f
+}
+
+; Vector rotate.
+
+define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) {
+; CHECK-LABEL: rotl_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw $1, 24($sp)
+; CHECK-NEXT:    negu $2, $1
+; CHECK-NEXT:    lw $3, 20($sp)
+; CHECK-NEXT:    negu $8, $3
+; CHECK-NEXT:    andi $8, $8, 31
+; CHECK-NEXT:    andi $2, $2, 31
+; CHECK-NEXT:    andi $3, $3, 31
+; CHECK-NEXT:    andi $1, $1, 31
+; CHECK-NEXT:    lw $9, 16($sp)
+; CHECK-NEXT:    sllv $1, $6, $1
+; CHECK-NEXT:    srlv $6, $6, $2
+; CHECK-NEXT:    sllv $3, $5, $3
+; CHECK-NEXT:    srlv $5, $5, $8
+; CHECK-NEXT:    andi $2, $9, 31
+; CHECK-NEXT:    sllv $2, $4, $2
+; CHECK-NEXT:    negu $8, $9
+; CHECK-NEXT:    andi $8, $8, 31
+; CHECK-NEXT:    srlv $4, $4, $8
+; CHECK-NEXT:    lw $8, 28($sp)
+; CHECK-NEXT:    or $2, $2, $4
+; CHECK-NEXT:    or $3, $3, $5
+; CHECK-NEXT:    or $4, $1, $6
+; CHECK-NEXT:    andi $1, $8, 31
+; CHECK-NEXT:    sllv $1, $7, $1
+; CHECK-NEXT:    negu $5, $8
+; CHECK-NEXT:    andi $5, $5, 31
+; CHECK-NEXT:    srlv $5, $7, $5
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $5, $1, $5
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
+  ret <4 x i32> %f
+}
+
+; Vector rotate by constant splat amount.
+
+define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
+; CHECK-LABEL: rotl_v4i32_rotl_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srl $1, $5, 29
+; CHECK-NEXT:    sll $3, $5, 3
+; CHECK-NEXT:    srl $2, $4, 29
+; CHECK-NEXT:    sll $4, $4, 3
+; CHECK-NEXT:    or $2, $4, $2
+; CHECK-NEXT:    or $3, $3, $1
+; CHECK-NEXT:    srl $1, $6, 29
+; CHECK-NEXT:    sll $4, $6, 3
+; CHECK-NEXT:    or $4, $4, $1
+; CHECK-NEXT:    srl $1, $7, 29
+; CHECK-NEXT:    sll $5, $7, 3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $5, $5, $1
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %f
+}
+
+; Repeat everything for funnel shift right.
+
+; When first 2 operands match, it's a rotate.
+
+define i8 @rotr_i8_const_shift(i8 %x) {
+; CHECK-LABEL: rotr_i8_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sll $1, $4, 5
+; CHECK-NEXT:    andi $2, $4, 248
+; CHECK-NEXT:    srl $2, $2, 3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i8 @llvm.fshr.i8(i8 %x, i8 %x, i8 3)
+  ret i8 %f
+}
+
+define i32 @rotr_i32_const_shift(i32 %x) {
+; CHECK-LABEL: rotr_i32_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sll $1, $4, 29
+; CHECK-NEXT:    srl $2, $4, 3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
+  ret i32 %f
+}
+
+; When first 2 operands match, it's a rotate (by variable amount).
+
+define i16 @rotr_i16(i16 %x, i16 %z) {
+; CHECK-LABEL: rotr_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $5, 15
+; CHECK-NEXT:    andi $2, $4, 65535
+; CHECK-NEXT:    srlv $1, $2, $1
+; CHECK-NEXT:    negu $2, $5
+; CHECK-NEXT:    andi $2, $2, 15
+; CHECK-NEXT:    sllv $2, $4, $2
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $1, $2
+  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %x, i16 %z)
+  ret i16 %f
+}
+
+define i32 @rotr_i32(i32 %x, i32 %z) {
+; CHECK-LABEL: rotr_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $5, 31
+; CHECK-NEXT:    srlv $1, $4, $1
+; CHECK-NEXT:    negu $2, $5
+; CHECK-NEXT:    andi $2, $2, 31
+; CHECK-NEXT:    sllv $2, $4, $2
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $1, $2
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
+  ret i32 %f
+}
+
+define i64 @rotr_i64(i64 %x, i64 %z) {
+; CHECK-BE-LABEL: rotr_i64:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    negu $1, $7
+; CHECK-BE-NEXT:    andi $2, $1, 63
+; CHECK-BE-NEXT:    sllv $6, $5, $2
+; CHECK-BE-NEXT:    andi $1, $1, 32
+; CHECK-BE-NEXT:    andi $3, $7, 63
+; CHECK-BE-NEXT:    move $8, $6
+; CHECK-BE-NEXT:    movn $8, $zero, $1
+; CHECK-BE-NEXT:    srlv $9, $5, $3
+; CHECK-BE-NEXT:    sll $10, $4, 1
+; CHECK-BE-NEXT:    not $11, $3
+; CHECK-BE-NEXT:    sllv $10, $10, $11
+; CHECK-BE-NEXT:    or $9, $10, $9
+; CHECK-BE-NEXT:    srlv $10, $4, $3
+; CHECK-BE-NEXT:    andi $7, $7, 32
+; CHECK-BE-NEXT:    movn $9, $10, $7
+; CHECK-BE-NEXT:    or $3, $9, $8
+; CHECK-BE-NEXT:    sllv $4, $4, $2
+; CHECK-BE-NEXT:    not $2, $2
+; CHECK-BE-NEXT:    srl $5, $5, 1
+; CHECK-BE-NEXT:    srlv $2, $5, $2
+; CHECK-BE-NEXT:    or $2, $4, $2
+; CHECK-BE-NEXT:    movn $2, $6, $1
+; CHECK-BE-NEXT:    movn $10, $zero, $7
+; CHECK-BE-NEXT:    jr $ra
+; CHECK-BE-NEXT:    or $2, $10, $2
+;
+; CHECK-LE-LABEL: rotr_i64:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    negu $1, $6
+; CHECK-LE-NEXT:    andi $3, $1, 63
+; CHECK-LE-NEXT:    sllv $7, $4, $3
+; CHECK-LE-NEXT:    andi $1, $1, 32
+; CHECK-LE-NEXT:    andi $2, $6, 63
+; CHECK-LE-NEXT:    move $8, $7
+; CHECK-LE-NEXT:    movn $8, $zero, $1
+; CHECK-LE-NEXT:    srlv $9, $4, $2
+; CHECK-LE-NEXT:    sll $10, $5, 1
+; CHECK-LE-NEXT:    not $11, $2
+; CHECK-LE-NEXT:    sllv $10, $10, $11
+; CHECK-LE-NEXT:    or $9, $10, $9
+; CHECK-LE-NEXT:    srlv $10, $5, $2
+; CHECK-LE-NEXT:    andi $6, $6, 32
+; CHECK-LE-NEXT:    movn $9, $10, $6
+; CHECK-LE-NEXT:    or $2, $9, $8
+; CHECK-LE-NEXT:    sllv $5, $5, $3
+; CHECK-LE-NEXT:    not $3, $3
+; CHECK-LE-NEXT:    srl $4, $4, 1
+; CHECK-LE-NEXT:    srlv $3, $4, $3
+; CHECK-LE-NEXT:    or $3, $5, $3
+; CHECK-LE-NEXT:    movn $3, $7, $1
+; CHECK-LE-NEXT:    movn $10, $zero, $6
+; CHECK-LE-NEXT:    jr $ra
+; CHECK-LE-NEXT:    or $3, $10, $3
+  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
+  ret i64 %f
+}
+
+; Vector rotate.
+
+define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) {
+; CHECK-LABEL: rotr_v4i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw $1, 24($sp)
+; CHECK-NEXT:    negu $2, $1
+; CHECK-NEXT:    lw $3, 20($sp)
+; CHECK-NEXT:    negu $8, $3
+; CHECK-NEXT:    andi $8, $8, 31
+; CHECK-NEXT:    andi $2, $2, 31
+; CHECK-NEXT:    andi $3, $3, 31
+; CHECK-NEXT:    andi $1, $1, 31
+; CHECK-NEXT:    lw $9, 16($sp)
+; CHECK-NEXT:    srlv $1, $6, $1
+; CHECK-NEXT:    sllv $6, $6, $2
+; CHECK-NEXT:    srlv $3, $5, $3
+; CHECK-NEXT:    sllv $5, $5, $8
+; CHECK-NEXT:    andi $2, $9, 31
+; CHECK-NEXT:    srlv $2, $4, $2
+; CHECK-NEXT:    negu $8, $9
+; CHECK-NEXT:    andi $8, $8, 31
+; CHECK-NEXT:    sllv $4, $4, $8
+; CHECK-NEXT:    lw $8, 28($sp)
+; CHECK-NEXT:    or $2, $2, $4
+; CHECK-NEXT:    or $3, $3, $5
+; CHECK-NEXT:    or $4, $1, $6
+; CHECK-NEXT:    andi $1, $8, 31
+; CHECK-NEXT:    srlv $1, $7, $1
+; CHECK-NEXT:    negu $5, $8
+; CHECK-NEXT:    andi $5, $5, 31
+; CHECK-NEXT:    sllv $5, $7, $5
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $5, $1, $5
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
+  ret <4 x i32> %f
+}
+
+; Vector rotate by constant splat amount.
+
+define <4 x i32> @rotr_v4i32_const_shift(<4 x i32> %x) {
+; CHECK-LABEL: rotr_v4i32_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sll $1, $5, 29
+; CHECK-NEXT:    srl $3, $5, 3
+; CHECK-NEXT:    sll $2, $4, 29
+; CHECK-NEXT:    srl $4, $4, 3
+; CHECK-NEXT:    or $2, $4, $2
+; CHECK-NEXT:    or $3, $3, $1
+; CHECK-NEXT:    sll $1, $6, 29
+; CHECK-NEXT:    srl $4, $6, 3
+; CHECK-NEXT:    or $4, $4, $1
+; CHECK-NEXT:    sll $1, $7, 29
+; CHECK-NEXT:    srl $5, $7, 3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $5, $5, $1
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
+  ret <4 x i32> %f
+}
+
+define i32 @rotl_i32_shift_by_bitwidth(i32 %x) {
+; CHECK-LABEL: rotl_i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $2, $4
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 32)
+  ret i32 %f
+}
+
+define i32 @rotr_i32_shift_by_bitwidth(i32 %x) {
+; CHECK-LABEL: rotr_i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $2, $4
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 32)
+  ret i32 %f
+}
+
+define <4 x i32> @rotl_v4i32_shift_by_bitwidth(<4 x i32> %x) {
+; CHECK-LABEL: rotl_v4i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    move $2, $4
+; CHECK-NEXT:    move $3, $5
+; CHECK-NEXT:    move $4, $6
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $5, $7
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
+define <4 x i32> @rotr_v4i32_shift_by_bitwidth(<4 x i32> %x) {
+; CHECK-LABEL: rotr_v4i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    move $2, $4
+; CHECK-NEXT:    move $3, $5
+; CHECK-NEXT:    move $4, $6
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $5, $7
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
diff --git a/llvm/test/CodeGen/Mips/funnel-shift.ll b/llvm/test/CodeGen/Mips/funnel-shift.ll
new file mode 100644
index 00000000000000..47d3db18c00370
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/funnel-shift.ll
@@ -0,0 +1,601 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=mips-linux-gnu -march=mips -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-BE
+; RUN: llc < %s -mtriple=mipsel-linux-gnu -march=mipsel -mcpu=mips32 | FileCheck %s --check-prefixes=CHECK,CHECK-LE
+
+declare i8 @llvm.fshl.i8(i8, i8, i8)
+declare i16 @llvm.fshl.i16(i16, i16, i16)
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+declare i8 @llvm.fshr.i8(i8, i8, i8)
+declare i16 @llvm.fshr.i16(i16, i16, i16)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+
+; General case - all operands can be variables.
+
+define i16 @fshl_i16(i16 %x, i16 %y, i16 %z) {
+; CHECK-LABEL: fshl_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $6, 15
+; CHECK-NEXT:    sllv $2, $4, $1
+; CHECK-NEXT:    sll $3, $5, 16
+; CHECK-NEXT:    srl $3, $3, 1
+; CHECK-NEXT:    not $1, $1
+; CHECK-NEXT:    andi $1, $1, 31
+; CHECK-NEXT:    srlv $1, $3, $1
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
+  ret i16 %f
+}
+
+define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: fshl_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $6, 31
+; CHECK-NEXT:    sllv $1, $4, $1
+; CHECK-NEXT:    srl $2, $5, 1
+; CHECK-NEXT:    not $3, $6
+; CHECK-NEXT:    andi $3, $3, 31
+; CHECK-NEXT:    srlv $2, $2, $3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $1, $2
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
+  ret i32 %f
+}
+
+; Verify that weird types are minimally supported.
+declare i37 @llvm.fshl.i37(i37, i37, i37)
+define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
+; CHECK-BE-LABEL: fshl_i37:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    addiu $sp, $sp, -40
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-BE-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $19, 32($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $18, 28($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $17, 24($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $16, 20($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    .cfi_offset 31, -4
+; CHECK-BE-NEXT:    .cfi_offset 19, -8
+; CHECK-BE-NEXT:    .cfi_offset 18, -12
+; CHECK-BE-NEXT:    .cfi_offset 17, -16
+; CHECK-BE-NEXT:    .cfi_offset 16, -20
+; CHECK-BE-NEXT:    move $16, $7
+; CHECK-BE-NEXT:    move $17, $6
+; CHECK-BE-NEXT:    move $18, $5
+; CHECK-BE-NEXT:    move $19, $4
+; CHECK-BE-NEXT:    lw $4, 56($sp)
+; CHECK-BE-NEXT:    lw $5, 60($sp)
+; CHECK-BE-NEXT:    addiu $6, $zero, 0
+; CHECK-BE-NEXT:    jal __umoddi3
+; CHECK-BE-NEXT:    addiu $7, $zero, 37
+; CHECK-BE-NEXT:    not $1, $3
+; CHECK-BE-NEXT:    andi $2, $3, 63
+; CHECK-BE-NEXT:    not $4, $2
+; CHECK-BE-NEXT:    srl $5, $18, 1
+; CHECK-BE-NEXT:    sllv $6, $19, $2
+; CHECK-BE-NEXT:    srlv $4, $5, $4
+; CHECK-BE-NEXT:    andi $5, $1, 63
+; CHECK-BE-NEXT:    srl $7, $16, 5
+; CHECK-BE-NEXT:    sll $8, $17, 27
+; CHECK-BE-NEXT:    or $7, $8, $7
+; CHECK-BE-NEXT:    srl $8, $7, 1
+; CHECK-BE-NEXT:    srlv $9, $8, $5
+; CHECK-BE-NEXT:    andi $1, $1, 32
+; CHECK-BE-NEXT:    move $10, $9
+; CHECK-BE-NEXT:    movn $10, $zero, $1
+; CHECK-BE-NEXT:    or $4, $6, $4
+; CHECK-BE-NEXT:    sllv $6, $18, $2
+; CHECK-BE-NEXT:    andi $3, $3, 32
+; CHECK-BE-NEXT:    movn $4, $6, $3
+; CHECK-BE-NEXT:    sll $7, $7, 31
+; CHECK-BE-NEXT:    sll $2, $16, 27
+; CHECK-BE-NEXT:    srl $11, $2, 1
+; CHECK-BE-NEXT:    or $2, $4, $10
+; CHECK-BE-NEXT:    movn $6, $zero, $3
+; CHECK-BE-NEXT:    or $3, $11, $7
+; CHECK-BE-NEXT:    srlv $3, $3, $5
+; CHECK-BE-NEXT:    not $4, $5
+; CHECK-BE-NEXT:    sll $5, $8, 1
+; CHECK-BE-NEXT:    sllv $4, $5, $4
+; CHECK-BE-NEXT:    or $3, $4, $3
+; CHECK-BE-NEXT:    movn $3, $9, $1
+; CHECK-BE-NEXT:    or $3, $6, $3
+; CHECK-BE-NEXT:    lw $16, 20($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $17, 24($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $18, 28($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $19, 32($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    jr $ra
+; CHECK-BE-NEXT:    addiu $sp, $sp, 40
+;
+; CHECK-LE-LABEL: fshl_i37:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    addiu $sp, $sp, -40
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-LE-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $19, 32($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $18, 28($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $17, 24($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $16, 20($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    .cfi_offset 31, -4
+; CHECK-LE-NEXT:    .cfi_offset 19, -8
+; CHECK-LE-NEXT:    .cfi_offset 18, -12
+; CHECK-LE-NEXT:    .cfi_offset 17, -16
+; CHECK-LE-NEXT:    .cfi_offset 16, -20
+; CHECK-LE-NEXT:    move $16, $7
+; CHECK-LE-NEXT:    move $17, $6
+; CHECK-LE-NEXT:    move $18, $5
+; CHECK-LE-NEXT:    move $19, $4
+; CHECK-LE-NEXT:    lw $4, 56($sp)
+; CHECK-LE-NEXT:    lw $5, 60($sp)
+; CHECK-LE-NEXT:    addiu $6, $zero, 37
+; CHECK-LE-NEXT:    jal __umoddi3
+; CHECK-LE-NEXT:    addiu $7, $zero, 0
+; CHECK-LE-NEXT:    not $1, $2
+; CHECK-LE-NEXT:    andi $3, $2, 63
+; CHECK-LE-NEXT:    not $4, $3
+; CHECK-LE-NEXT:    srl $5, $19, 1
+; CHECK-LE-NEXT:    sllv $6, $18, $3
+; CHECK-LE-NEXT:    srlv $4, $5, $4
+; CHECK-LE-NEXT:    andi $5, $1, 63
+; CHECK-LE-NEXT:    srl $7, $17, 5
+; CHECK-LE-NEXT:    sll $8, $16, 27
+; CHECK-LE-NEXT:    or $7, $8, $7
+; CHECK-LE-NEXT:    srl $8, $7, 1
+; CHECK-LE-NEXT:    srlv $9, $8, $5
+; CHECK-LE-NEXT:    andi $1, $1, 32
+; CHECK-LE-NEXT:    move $10, $9
+; CHECK-LE-NEXT:    movn $10, $zero, $1
+; CHECK-LE-NEXT:    or $4, $6, $4
+; CHECK-LE-NEXT:    sllv $6, $19, $3
+; CHECK-LE-NEXT:    andi $2, $2, 32
+; CHECK-LE-NEXT:    movn $4, $6, $2
+; CHECK-LE-NEXT:    sll $7, $7, 31
+; CHECK-LE-NEXT:    sll $3, $17, 27
+; CHECK-LE-NEXT:    srl $11, $3, 1
+; CHECK-LE-NEXT:    or $3, $4, $10
+; CHECK-LE-NEXT:    movn $6, $zero, $2
+; CHECK-LE-NEXT:    or $2, $11, $7
+; CHECK-LE-NEXT:    srlv $2, $2, $5
+; CHECK-LE-NEXT:    not $4, $5
+; CHECK-LE-NEXT:    sll $5, $8, 1
+; CHECK-LE-NEXT:    sllv $4, $5, $4
+; CHECK-LE-NEXT:    or $2, $4, $2
+; CHECK-LE-NEXT:    movn $2, $9, $1
+; CHECK-LE-NEXT:    or $2, $6, $2
+; CHECK-LE-NEXT:    lw $16, 20($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $17, 24($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $18, 28($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $19, 32($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    jr $ra
+; CHECK-LE-NEXT:    addiu $sp, $sp, 40
+  %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
+  ret i37 %f
+}
+
+; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011
+
+declare i7 @llvm.fshl.i7(i7, i7, i7)
+define i7 @fshl_i7_const_fold() {
+; CHECK-LABEL: fshl_i7_const_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 67
+  %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2)
+  ret i7 %f
+}
+
+define i8 @fshl_i8_const_fold_overshift_1() {
+; CHECK-LABEL: fshl_i8_const_fold_overshift_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 128
+  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15)
+  ret i8 %f
+}
+
+define i8 @fshl_i8_const_fold_overshift_2() {
+; CHECK-LABEL: fshl_i8_const_fold_overshift_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 120
+  %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11)
+  ret i8 %f
+}
+
+define i8 @fshl_i8_const_fold_overshift_3() {
+; CHECK-LABEL: fshl_i8_const_fold_overshift_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 0
+  %f = call i8 @llvm.fshl.i8(i8 0, i8 225, i8 8)
+  ret i8 %f
+}
+
+; With constant shift amount, this is 'extr'.
+
+define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srl $1, $5, 23
+; CHECK-NEXT:    sll $2, $4, 9
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
+  ret i32 %f
+}
+
+; Check modulo math on shift amount.
+
+define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_const_overshift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srl $1, $5, 23
+; CHECK-NEXT:    sll $2, $4, 9
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
+  ret i32 %f
+}
+
+; 64-bit should also work.
+
+define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) {
+; CHECK-BE-LABEL: fshl_i64_const_overshift:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    srl $1, $6, 23
+; CHECK-BE-NEXT:    sll $2, $5, 9
+; CHECK-BE-NEXT:    or $2, $2, $1
+; CHECK-BE-NEXT:    sll $1, $6, 9
+; CHECK-BE-NEXT:    srl $3, $7, 23
+; CHECK-BE-NEXT:    jr $ra
+; CHECK-BE-NEXT:    or $3, $3, $1
+;
+; CHECK-LE-LABEL: fshl_i64_const_overshift:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    sll $1, $7, 9
+; CHECK-LE-NEXT:    srl $2, $6, 23
+; CHECK-LE-NEXT:    or $2, $2, $1
+; CHECK-LE-NEXT:    srl $1, $7, 23
+; CHECK-LE-NEXT:    sll $3, $4, 9
+; CHECK-LE-NEXT:    jr $ra
+; CHECK-LE-NEXT:    or $3, $3, $1
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
+  ret i64 %f
+}
+
+; This should work without any node-specific logic.
+
+define i8 @fshl_i8_const_fold() {
+; CHECK-LABEL: fshl_i8_const_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 128
+  %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7)
+  ret i8 %f
+}
+
+; Repeat everything for funnel shift right.
+
+; General case - all operands can be variables.
+
+define i16 @fshr_i16(i16 %x, i16 %y, i16 %z) {
+; CHECK-LABEL: fshr_i16:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    sll $1, $5, 16
+; CHECK-NEXT:    andi $2, $6, 15
+; CHECK-NEXT:    ori $3, $2, 16
+; CHECK-NEXT:    srlv $1, $1, $3
+; CHECK-NEXT:    sll $3, $4, 1
+; CHECK-NEXT:    xori $2, $2, 15
+; CHECK-NEXT:    sllv $2, $3, $2
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
+  ret i16 %f
+}
+
+define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: fshr_i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi $1, $6, 31
+; CHECK-NEXT:    srlv $1, $5, $1
+; CHECK-NEXT:    sll $2, $4, 1
+; CHECK-NEXT:    not $3, $6
+; CHECK-NEXT:    andi $3, $3, 31
+; CHECK-NEXT:    sllv $2, $2, $3
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
+  ret i32 %f
+}
+
+; Verify that weird types are minimally supported.
+declare i37 @llvm.fshr.i37(i37, i37, i37)
+define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
+; CHECK-BE-LABEL: fshr_i37:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    addiu $sp, $sp, -40
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-BE-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $19, 32($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $18, 28($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $17, 24($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    sw $16, 20($sp) # 4-byte Folded Spill
+; CHECK-BE-NEXT:    .cfi_offset 31, -4
+; CHECK-BE-NEXT:    .cfi_offset 19, -8
+; CHECK-BE-NEXT:    .cfi_offset 18, -12
+; CHECK-BE-NEXT:    .cfi_offset 17, -16
+; CHECK-BE-NEXT:    .cfi_offset 16, -20
+; CHECK-BE-NEXT:    move $16, $7
+; CHECK-BE-NEXT:    move $17, $6
+; CHECK-BE-NEXT:    move $18, $5
+; CHECK-BE-NEXT:    move $19, $4
+; CHECK-BE-NEXT:    lw $4, 56($sp)
+; CHECK-BE-NEXT:    lw $5, 60($sp)
+; CHECK-BE-NEXT:    addiu $6, $zero, 0
+; CHECK-BE-NEXT:    jal __umoddi3
+; CHECK-BE-NEXT:    addiu $7, $zero, 37
+; CHECK-BE-NEXT:    addiu $1, $3, 27
+; CHECK-BE-NEXT:    andi $2, $1, 63
+; CHECK-BE-NEXT:    not $3, $2
+; CHECK-BE-NEXT:    srl $4, $16, 5
+; CHECK-BE-NEXT:    sll $5, $17, 27
+; CHECK-BE-NEXT:    or $4, $5, $4
+; CHECK-BE-NEXT:    sll $5, $4, 1
+; CHECK-BE-NEXT:    sll $6, $16, 27
+; CHECK-BE-NEXT:    srlv $6, $6, $2
+; CHECK-BE-NEXT:    sllv $3, $5, $3
+; CHECK-BE-NEXT:    not $5, $1
+; CHECK-BE-NEXT:    andi $7, $5, 63
+; CHECK-BE-NEXT:    sll $8, $18, 1
+; CHECK-BE-NEXT:    sllv $8, $8, $7
+; CHECK-BE-NEXT:    andi $5, $5, 32
+; CHECK-BE-NEXT:    move $9, $8
+; CHECK-BE-NEXT:    movn $9, $zero, $5
+; CHECK-BE-NEXT:    or $3, $3, $6
+; CHECK-BE-NEXT:    srlv $2, $4, $2
+; CHECK-BE-NEXT:    andi $1, $1, 32
+; CHECK-BE-NEXT:    movn $3, $2, $1
+; CHECK-BE-NEXT:    srl $4, $18, 31
+; CHECK-BE-NEXT:    sll $6, $19, 1
+; CHECK-BE-NEXT:    or $4, $6, $4
+; CHECK-BE-NEXT:    or $3, $9, $3
+; CHECK-BE-NEXT:    movn $2, $zero, $1
+; CHECK-BE-NEXT:    sllv $1, $4, $7
+; CHECK-BE-NEXT:    not $4, $7
+; CHECK-BE-NEXT:    lui $6, 32767
+; CHECK-BE-NEXT:    ori $6, $6, 65535
+; CHECK-BE-NEXT:    and $6, $18, $6
+; CHECK-BE-NEXT:    srlv $4, $6, $4
+; CHECK-BE-NEXT:    or $1, $1, $4
+; CHECK-BE-NEXT:    movn $1, $8, $5
+; CHECK-BE-NEXT:    or $2, $1, $2
+; CHECK-BE-NEXT:    lw $16, 20($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $17, 24($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $18, 28($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $19, 32($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; CHECK-BE-NEXT:    jr $ra
+; CHECK-BE-NEXT:    addiu $sp, $sp, 40
+;
+; CHECK-LE-LABEL: fshr_i37:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    addiu $sp, $sp, -40
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 40
+; CHECK-LE-NEXT:    sw $ra, 36($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $19, 32($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $18, 28($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $17, 24($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    sw $16, 20($sp) # 4-byte Folded Spill
+; CHECK-LE-NEXT:    .cfi_offset 31, -4
+; CHECK-LE-NEXT:    .cfi_offset 19, -8
+; CHECK-LE-NEXT:    .cfi_offset 18, -12
+; CHECK-LE-NEXT:    .cfi_offset 17, -16
+; CHECK-LE-NEXT:    .cfi_offset 16, -20
+; CHECK-LE-NEXT:    move $16, $7
+; CHECK-LE-NEXT:    move $17, $6
+; CHECK-LE-NEXT:    move $18, $5
+; CHECK-LE-NEXT:    move $19, $4
+; CHECK-LE-NEXT:    lw $4, 56($sp)
+; CHECK-LE-NEXT:    lw $5, 60($sp)
+; CHECK-LE-NEXT:    addiu $6, $zero, 37
+; CHECK-LE-NEXT:    jal __umoddi3
+; CHECK-LE-NEXT:    addiu $7, $zero, 0
+; CHECK-LE-NEXT:    addiu $1, $2, 27
+; CHECK-LE-NEXT:    andi $2, $1, 63
+; CHECK-LE-NEXT:    not $3, $2
+; CHECK-LE-NEXT:    srl $4, $17, 5
+; CHECK-LE-NEXT:    sll $5, $16, 27
+; CHECK-LE-NEXT:    or $4, $5, $4
+; CHECK-LE-NEXT:    sll $5, $4, 1
+; CHECK-LE-NEXT:    sll $6, $17, 27
+; CHECK-LE-NEXT:    srlv $6, $6, $2
+; CHECK-LE-NEXT:    sllv $3, $5, $3
+; CHECK-LE-NEXT:    not $5, $1
+; CHECK-LE-NEXT:    andi $7, $5, 63
+; CHECK-LE-NEXT:    sll $8, $19, 1
+; CHECK-LE-NEXT:    sllv $8, $8, $7
+; CHECK-LE-NEXT:    andi $5, $5, 32
+; CHECK-LE-NEXT:    move $9, $8
+; CHECK-LE-NEXT:    movn $9, $zero, $5
+; CHECK-LE-NEXT:    or $3, $3, $6
+; CHECK-LE-NEXT:    srlv $4, $4, $2
+; CHECK-LE-NEXT:    andi $1, $1, 32
+; CHECK-LE-NEXT:    movn $3, $4, $1
+; CHECK-LE-NEXT:    srl $2, $19, 31
+; CHECK-LE-NEXT:    sll $6, $18, 1
+; CHECK-LE-NEXT:    or $6, $6, $2
+; CHECK-LE-NEXT:    or $2, $9, $3
+; CHECK-LE-NEXT:    movn $4, $zero, $1
+; CHECK-LE-NEXT:    sllv $1, $6, $7
+; CHECK-LE-NEXT:    not $3, $7
+; CHECK-LE-NEXT:    lui $6, 32767
+; CHECK-LE-NEXT:    ori $6, $6, 65535
+; CHECK-LE-NEXT:    and $6, $19, $6
+; CHECK-LE-NEXT:    srlv $3, $6, $3
+; CHECK-LE-NEXT:    or $1, $1, $3
+; CHECK-LE-NEXT:    movn $1, $8, $5
+; CHECK-LE-NEXT:    or $3, $1, $4
+; CHECK-LE-NEXT:    lw $16, 20($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $17, 24($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $18, 28($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $19, 32($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    lw $ra, 36($sp) # 4-byte Folded Reload
+; CHECK-LE-NEXT:    jr $ra
+; CHECK-LE-NEXT:    addiu $sp, $sp, 40
+  %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
+  ret i37 %f
+}
+
+; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111
+
+declare i7 @llvm.fshr.i7(i7, i7, i7)
+define i7 @fshr_i7_const_fold() {
+; CHECK-LABEL: fshr_i7_const_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 31
+  %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2)
+  ret i7 %f
+}
+
+define i8 @fshr_i8_const_fold_overshift_1() {
+; CHECK-LABEL: fshr_i8_const_fold_overshift_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 254
+  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15)
+  ret i8 %f
+}
+
+define i8 @fshr_i8_const_fold_overshift_2() {
+; CHECK-LABEL: fshr_i8_const_fold_overshift_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 225
+  %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11)
+  ret i8 %f
+}
+
+define i8 @fshr_i8_const_fold_overshift_3() {
+; CHECK-LABEL: fshr_i8_const_fold_overshift_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 255
+  %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8)
+  ret i8 %f
+}
+
+; With constant shift amount, this is 'extr'.
+
+define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_const_shift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srl $1, $5, 9
+; CHECK-NEXT:    sll $2, $4, 23
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
+  ret i32 %f
+}
+
+; Check modulo math on shift amount. 41-32=9.
+
+define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_const_overshift:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    srl $1, $5, 9
+; CHECK-NEXT:    sll $2, $4, 23
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    or $2, $2, $1
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
+  ret i32 %f
+}
+
+; 64-bit should also work. 105-64 = 41.
+
+define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) {
+; CHECK-BE-LABEL: fshr_i64_const_overshift:
+; CHECK-BE:       # %bb.0:
+; CHECK-BE-NEXT:    srl $1, $5, 9
+; CHECK-BE-NEXT:    sll $2, $4, 23
+; CHECK-BE-NEXT:    or $2, $2, $1
+; CHECK-BE-NEXT:    srl $1, $6, 9
+; CHECK-BE-NEXT:    sll $3, $5, 23
+; CHECK-BE-NEXT:    jr $ra
+; CHECK-BE-NEXT:    or $3, $3, $1
+;
+; CHECK-LE-LABEL: fshr_i64_const_overshift:
+; CHECK-LE:       # %bb.0:
+; CHECK-LE-NEXT:    srl $1, $7, 9
+; CHECK-LE-NEXT:    sll $2, $4, 23
+; CHECK-LE-NEXT:    or $2, $2, $1
+; CHECK-LE-NEXT:    srl $1, $4, 9
+; CHECK-LE-NEXT:    sll $3, $5, 23
+; CHECK-LE-NEXT:    jr $ra
+; CHECK-LE-NEXT:    or $3, $3, $1
+  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
+  ret i64 %f
+}
+
+; This should work without any node-specific logic.
+
+define i8 @fshr_i8_const_fold() {
+; CHECK-LABEL: fshr_i8_const_fold:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    addiu $2, $zero, 254
+  %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7)
+  ret i8 %f
+}
+
+define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: fshl_i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $2, $4
+  %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
+  ret i32 %f
+}
+
+define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) {
+; CHECK-LABEL: fshr_i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $2, $5
+  %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
+  ret i32 %f
+}
+
+define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshl_v4i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    move $2, $4
+; CHECK-NEXT:    move $3, $5
+; CHECK-NEXT:    move $4, $6
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    move $5, $7
+  %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+
+define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: fshr_v4i32_shift_by_bitwidth:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw $5, 28($sp)
+; CHECK-NEXT:    lw $4, 24($sp)
+; CHECK-NEXT:    lw $3, 20($sp)
+; CHECK-NEXT:    lw $2, 16($sp)
+; CHECK-NEXT:    jr $ra
+; CHECK-NEXT:    nop
+  %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
+  ret <4 x i32> %f
+}
+

From 2218e6d0a873f6bad4c4cdd5bccbdc0ae6f4c760 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 6 Oct 2020 20:28:43 -0700
Subject: [PATCH 03/17] [BPF] Make BPFAbstractMemberAccessPass required

Or else on optnone functions we get the following during instruction selection:
  fatal error: error in backend: Cannot select: intrinsic %llvm.preserve.struct.access.index

Currently the -O0 pipeline doesn't properly run passes registered via
TargetMachine::registerPassBuilderCallbacks(), so don't add that RUN
line yet. That will be fixed after this.

Reviewed By: yonghong-song

Differential Revision: https://reviews.llvm.org/D89083
---
 llvm/lib/Target/BPF/BPF.h          |  4 +++
 llvm/test/CodeGen/BPF/optnone-2.ll | 52 ++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)
 create mode 100644 llvm/test/CodeGen/BPF/optnone-2.ll

diff --git a/llvm/lib/Target/BPF/BPF.h b/llvm/lib/Target/BPF/BPF.h
index 82ac091fa7fa36..8629c1503b4e90 100644
--- a/llvm/lib/Target/BPF/BPF.h
+++ b/llvm/lib/Target/BPF/BPF.h
@@ -46,11 +46,15 @@ class BPFAbstractMemberAccessPass
 public:
   BPFAbstractMemberAccessPass(BPFTargetMachine *TM) : TM(TM) {}
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
 };
 
 class BPFPreserveDITypePass : public PassInfoMixin<BPFPreserveDITypePass> {
 public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  static bool isRequired() { return true; }
 };
 } // namespace llvm
 
diff --git a/llvm/test/CodeGen/BPF/optnone-2.ll b/llvm/test/CodeGen/BPF/optnone-2.ll
new file mode 100644
index 00000000000000..82014bdaf2cc9f
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/optnone-2.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -passes='default<O2>' | llc -march=bpfel -filetype=asm -o /dev/null -
+; TODO: add -O0 once that's supported
+
+; IR generated by
+; $ cat /tmp/a.c
+; struct ss { int a; };
+; int foo() { return __builtin_btf_type_id(0, 0) + __builtin_preserve_type_info(*(struct ss *)0, 0); }
+; $ clang -target bpf -g -S -emit-llvm t.c -Xclang -disable-llvm-passes /tmp/a.c
+
+target triple = "bpf"
+
+; Function Attrs: noinline nounwind optnone
+define dso_local i32 @foo() #0 !dbg !9 {
+entry:
+  %0 = call i32 @llvm.bpf.btf.type.id(i32 0, i64 0), !dbg !12, !llvm.preserve.access.index !4
+  %1 = call i32 @llvm.bpf.preserve.type.info(i32 1, i64 0), !dbg !13, !llvm.preserve.access.index !14
+  %add = add i32 %0, %1, !dbg !17
+  ret i32 %add, !dbg !18
+}
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bpf.btf.type.id(i32, i64) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.bpf.preserve.type.info(i32, i64) #1
+
+attributes #0 = { noinline nounwind optnone }
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5, !6, !7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "C:/src/tmp\\a.c", directory: "C:\\src\\llvm-project")
+!2 = !{}
+!3 = !{!4}
+!4 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!5 = !{i32 7, !"Dwarf Version", i32 4}
+!6 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = !{i32 1, !"wchar_size", i32 4}
+!8 = !{!"clang version 12.0.0"}
+!9 = distinct !DISubprogram(name: "foo", scope: !10, file: !10, line: 2, type: !11, scopeLine: 2, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!10 = !DIFile(filename: "C:/src/tmp/a.c", directory: "")
+!11 = !DISubroutineType(types: !3)
+!12 = !DILocation(line: 2, column: 21, scope: !9)
+!13 = !DILocation(line: 2, column: 51, scope: !9)
+!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ss", file: !10, line: 1, size: 32, elements: !15)
+!15 = !{!16}
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !14, file: !10, line: 1, baseType: !4, size: 32)
+!17 = !DILocation(line: 2, column: 49, scope: !9)
+!18 = !DILocation(line: 2, column: 14, scope: !9)

From 4abb519619694de12e401de5454a6eed5c1384ea Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 9 Oct 2020 14:21:23 -0400
Subject: [PATCH 04/17] [libc++] NFCI: Define small methods of
 basic_stringstream inline

It greatly increases readability because defining the methods out-of-line
involves a ton of boilerplate template declarations.
---
 libcxx/include/sstream | 396 +++++++++++++++--------------------------
 1 file changed, 139 insertions(+), 257 deletions(-)

diff --git a/libcxx/include/sstream b/libcxx/include/sstream
index 4b1d17cfde3327..042766ca22c91e 100644
--- a/libcxx/include/sstream
+++ b/libcxx/include/sstream
@@ -208,11 +208,19 @@ private:
 
 public:
     // 27.8.1.1 Constructors:
-    inline _LIBCPP_INLINE_VISIBILITY
-    explicit basic_stringbuf(ios_base::openmode __wch = ios_base::in | ios_base::out);
-    inline _LIBCPP_INLINE_VISIBILITY
+    _LIBCPP_INLINE_VISIBILITY
+    explicit basic_stringbuf(ios_base::openmode __wch = ios_base::in | ios_base::out)
+        : __hm_(0), __mode_(__wch)
+    { }
+
+    _LIBCPP_INLINE_VISIBILITY
     explicit basic_stringbuf(const string_type& __s,
-                             ios_base::openmode __wch = ios_base::in | ios_base::out);
+                             ios_base::openmode __wch = ios_base::in | ios_base::out)
+        : __str_(__s.get_allocator()), __hm_(0), __mode_(__wch)
+    {
+        str(__s);
+    }
+
     basic_stringbuf(basic_stringbuf&& __rhs);
 
     // 27.8.1.2 Assign and swap:
@@ -230,28 +238,13 @@ protected:
     virtual int_type overflow (int_type __c = traits_type::eof());
     virtual pos_type seekoff(off_type __off, ios_base::seekdir __way,
                              ios_base::openmode __wch = ios_base::in | ios_base::out);
-    inline _LIBCPP_INLINE_VISIBILITY
+    _LIBCPP_INLINE_VISIBILITY
     virtual pos_type seekpos(pos_type __sp,
-                             ios_base::openmode __wch = ios_base::in | ios_base::out);
+                             ios_base::openmode __wch = ios_base::in | ios_base::out) {
+        return seekoff(__sp, ios_base::beg, __wch);
+    }
 };
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringbuf<_CharT, _Traits, _Allocator>::basic_stringbuf(ios_base::openmode __wch)
-    : __hm_(0),
-      __mode_(__wch)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringbuf<_CharT, _Traits, _Allocator>::basic_stringbuf(const string_type& __s,
-                             ios_base::openmode __wch)
-    : __str_(__s.get_allocator()),
-      __hm_(0),
-      __mode_(__wch)
-{
-    str(__s);
-}
-
 template <class _CharT, class _Traits, class _Allocator>
 basic_stringbuf<_CharT, _Traits, _Allocator>::basic_stringbuf(basic_stringbuf&& __rhs)
     : __mode_(__rhs.__mode_)
@@ -609,14 +602,6 @@ basic_stringbuf<_CharT, _Traits, _Allocator>::seekoff(off_type __off,
     return pos_type(__noff);
 }
 
-template <class _CharT, class _Traits, class _Allocator>
-typename basic_stringbuf<_CharT, _Traits, _Allocator>::pos_type
-basic_stringbuf<_CharT, _Traits, _Allocator>::seekpos(pos_type __sp,
-                                                      ios_base::openmode __wch)
-{
-    return seekoff(__sp, ios_base::beg, __wch);
-}
-
 // basic_istringstream
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -638,67 +623,53 @@ private:
 
 public:
     // 27.8.2.1 Constructors:
-    inline _LIBCPP_INLINE_VISIBILITY
-    explicit basic_istringstream(ios_base::openmode __wch = ios_base::in);
-    inline _LIBCPP_INLINE_VISIBILITY
+    _LIBCPP_INLINE_VISIBILITY
+    explicit basic_istringstream(ios_base::openmode __wch = ios_base::in)
+        : basic_istream<_CharT, _Traits>(&__sb_)
+        , __sb_(__wch | ios_base::in)
+    { }
+    _LIBCPP_INLINE_VISIBILITY
     explicit basic_istringstream(const string_type& __s,
-                                 ios_base::openmode __wch = ios_base::in);
-    inline _LIBCPP_INLINE_VISIBILITY
-    basic_istringstream(basic_istringstream&& __rhs);
+                                 ios_base::openmode __wch = ios_base::in)
+        : basic_istream<_CharT, _Traits>(&__sb_)
+        , __sb_(__s, __wch | ios_base::in)
+    { }
+
+    _LIBCPP_INLINE_VISIBILITY
+    basic_istringstream(basic_istringstream&& __rhs)
+        : basic_istream<_CharT, _Traits>(_VSTD::move(__rhs))
+        , __sb_(_VSTD::move(__rhs.__sb_))
+    {
+        basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_);
+    }
 
     // 27.8.2.2 Assign and swap:
-    basic_istringstream& operator=(basic_istringstream&& __rhs);
-    inline _LIBCPP_INLINE_VISIBILITY
-    void swap(basic_istringstream& __rhs);
+    basic_istringstream& operator=(basic_istringstream&& __rhs) {
+        basic_istream<char_type, traits_type>::operator=(_VSTD::move(__rhs));
+        __sb_ = _VSTD::move(__rhs.__sb_);
+        return *this;
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    void swap(basic_istringstream& __rhs) {
+        basic_istream<char_type, traits_type>::swap(__rhs);
+        __sb_.swap(__rhs.__sb_);
+    }
 
     // 27.8.2.3 Members:
-    inline _LIBCPP_INLINE_VISIBILITY
-    basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const;
-    inline _LIBCPP_INLINE_VISIBILITY
-    string_type str() const;
-    inline _LIBCPP_INLINE_VISIBILITY
-    void str(const string_type& __s);
+    _LIBCPP_INLINE_VISIBILITY
+    basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
+        return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    string_type str() const {
+        return __sb_.str();
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    void str(const string_type& __s) {
+        __sb_.str(__s);
+    }
 };
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_istringstream<_CharT, _Traits, _Allocator>::basic_istringstream(ios_base::openmode __wch)
-    : basic_istream<_CharT, _Traits>(&__sb_),
-      __sb_(__wch | ios_base::in)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_istringstream<_CharT, _Traits, _Allocator>::basic_istringstream(const string_type& __s,
-                                                                      ios_base::openmode __wch)
-    : basic_istream<_CharT, _Traits>(&__sb_),
-      __sb_(__s, __wch | ios_base::in)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_istringstream<_CharT, _Traits, _Allocator>::basic_istringstream(basic_istringstream&& __rhs)
-    : basic_istream<_CharT, _Traits>(_VSTD::move(__rhs)),
-      __sb_(_VSTD::move(__rhs.__sb_))
-{
-    basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_);
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_istringstream<_CharT, _Traits, _Allocator>&
-basic_istringstream<_CharT, _Traits, _Allocator>::operator=(basic_istringstream&& __rhs)
-{
-    basic_istream<char_type, traits_type>::operator=(_VSTD::move(__rhs));
-    __sb_ = _VSTD::move(__rhs.__sb_);
-    return *this;
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-void basic_istringstream<_CharT, _Traits, _Allocator>::swap(basic_istringstream& __rhs)
-{
-    basic_istream<char_type, traits_type>::swap(__rhs);
-    __sb_.swap(__rhs.__sb_);
-}
-
 template <class _CharT, class _Traits, class _Allocator>
 inline _LIBCPP_INLINE_VISIBILITY
 void
@@ -708,26 +679,6 @@ swap(basic_istringstream<_CharT, _Traits, _Allocator>& __x,
     __x.swap(__y);
 }
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringbuf<_CharT, _Traits, _Allocator>*
-basic_istringstream<_CharT, _Traits, _Allocator>::rdbuf() const
-{
-    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_string<_CharT, _Traits, _Allocator>
-basic_istringstream<_CharT, _Traits, _Allocator>::str() const
-{
-    return __sb_.str();
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-void basic_istringstream<_CharT, _Traits, _Allocator>::str(const string_type& __s)
-{
-    __sb_.str(__s);
-}
-
 // basic_ostringstream
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -749,68 +700,55 @@ private:
 
 public:
     // 27.8.2.1 Constructors:
-    inline _LIBCPP_INLINE_VISIBILITY
-    explicit basic_ostringstream(ios_base::openmode __wch = ios_base::out);
-    inline _LIBCPP_INLINE_VISIBILITY
+    _LIBCPP_INLINE_VISIBILITY
+    explicit basic_ostringstream(ios_base::openmode __wch = ios_base::out)
+        : basic_ostream<_CharT, _Traits>(&__sb_)
+        , __sb_(__wch | ios_base::out)
+    { }
+
+    _LIBCPP_INLINE_VISIBILITY
     explicit basic_ostringstream(const string_type& __s,
-                                 ios_base::openmode __wch = ios_base::out);
-    inline _LIBCPP_INLINE_VISIBILITY
-    basic_ostringstream(basic_ostringstream&& __rhs);
+                                 ios_base::openmode __wch = ios_base::out)
+        : basic_ostream<_CharT, _Traits>(&__sb_)
+        , __sb_(__s, __wch | ios_base::out)
+    { }
+
+    _LIBCPP_INLINE_VISIBILITY
+    basic_ostringstream(basic_ostringstream&& __rhs)
+        : basic_ostream<_CharT, _Traits>(_VSTD::move(__rhs))
+        , __sb_(_VSTD::move(__rhs.__sb_))
+    {
+        basic_ostream<_CharT, _Traits>::set_rdbuf(&__sb_);
+    }
 
     // 27.8.2.2 Assign and swap:
-    basic_ostringstream& operator=(basic_ostringstream&& __rhs);
-    inline _LIBCPP_INLINE_VISIBILITY
-    void swap(basic_ostringstream& __rhs);
+    basic_ostringstream& operator=(basic_ostringstream&& __rhs) {
+        basic_ostream<char_type, traits_type>::operator=(_VSTD::move(__rhs));
+        __sb_ = _VSTD::move(__rhs.__sb_);
+        return *this;
+    }
+
+    _LIBCPP_INLINE_VISIBILITY
+    void swap(basic_ostringstream& __rhs) {
+        basic_ostream<char_type, traits_type>::swap(__rhs);
+        __sb_.swap(__rhs.__sb_);
+    }
 
     // 27.8.2.3 Members:
-    inline _LIBCPP_INLINE_VISIBILITY
-    basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const;
-    inline _LIBCPP_INLINE_VISIBILITY
-    string_type str() const;
-    inline _LIBCPP_INLINE_VISIBILITY
-    void str(const string_type& __s);
+    _LIBCPP_INLINE_VISIBILITY
+    basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
+        return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    string_type str() const {
+        return __sb_.str();
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    void str(const string_type& __s) {
+        __sb_.str(__s);
+    }
 };
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_ostringstream<_CharT, _Traits, _Allocator>::basic_ostringstream(ios_base::openmode __wch)
-    : basic_ostream<_CharT, _Traits>(&__sb_),
-      __sb_(__wch | ios_base::out)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_ostringstream<_CharT, _Traits, _Allocator>::basic_ostringstream(const string_type& __s,
-                                                                      ios_base::openmode __wch)
-    : basic_ostream<_CharT, _Traits>(&__sb_),
-      __sb_(__s, __wch | ios_base::out)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_ostringstream<_CharT, _Traits, _Allocator>::basic_ostringstream(basic_ostringstream&& __rhs)
-    : basic_ostream<_CharT, _Traits>(_VSTD::move(__rhs)),
-      __sb_(_VSTD::move(__rhs.__sb_))
-{
-    basic_ostream<_CharT, _Traits>::set_rdbuf(&__sb_);
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_ostringstream<_CharT, _Traits, _Allocator>&
-basic_ostringstream<_CharT, _Traits, _Allocator>::operator=(basic_ostringstream&& __rhs)
-{
-    basic_ostream<char_type, traits_type>::operator=(_VSTD::move(__rhs));
-    __sb_ = _VSTD::move(__rhs.__sb_);
-    return *this;
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-void
-basic_ostringstream<_CharT, _Traits, _Allocator>::swap(basic_ostringstream& __rhs)
-{
-    basic_ostream<char_type, traits_type>::swap(__rhs);
-    __sb_.swap(__rhs.__sb_);
-}
-
 template <class _CharT, class _Traits, class _Allocator>
 inline _LIBCPP_INLINE_VISIBILITY
 void
@@ -820,27 +758,6 @@ swap(basic_ostringstream<_CharT, _Traits, _Allocator>& __x,
     __x.swap(__y);
 }
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringbuf<_CharT, _Traits, _Allocator>*
-basic_ostringstream<_CharT, _Traits, _Allocator>::rdbuf() const
-{
-    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_string<_CharT, _Traits, _Allocator>
-basic_ostringstream<_CharT, _Traits, _Allocator>::str() const
-{
-    return __sb_.str();
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-void
-basic_ostringstream<_CharT, _Traits, _Allocator>::str(const string_type& __s)
-{
-    __sb_.str(__s);
-}
-
 // basic_stringstream
 
 template <class _CharT, class _Traits, class _Allocator>
@@ -862,68 +779,54 @@ private:
 
 public:
     // 27.8.2.1 Constructors:
-    inline _LIBCPP_INLINE_VISIBILITY
-    explicit basic_stringstream(ios_base::openmode __wch = ios_base::in | ios_base::out);
-    inline _LIBCPP_INLINE_VISIBILITY
+    _LIBCPP_INLINE_VISIBILITY
+    explicit basic_stringstream(ios_base::openmode __wch = ios_base::in | ios_base::out)
+        : basic_iostream<_CharT, _Traits>(&__sb_)
+        , __sb_(__wch)
+    { }
+
+    _LIBCPP_INLINE_VISIBILITY
     explicit basic_stringstream(const string_type& __s,
-                                ios_base::openmode __wch = ios_base::in | ios_base::out);
-    inline _LIBCPP_INLINE_VISIBILITY
-    basic_stringstream(basic_stringstream&& __rhs);
+                                ios_base::openmode __wch = ios_base::in | ios_base::out)
+        : basic_iostream<_CharT, _Traits>(&__sb_)
+        , __sb_(__s, __wch)
+    { }
+
+    _LIBCPP_INLINE_VISIBILITY
+    basic_stringstream(basic_stringstream&& __rhs)
+        : basic_iostream<_CharT, _Traits>(_VSTD::move(__rhs))
+        , __sb_(_VSTD::move(__rhs.__sb_))
+    {
+        basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_);
+    }
 
     // 27.8.2.2 Assign and swap:
-    basic_stringstream& operator=(basic_stringstream&& __rhs);
-    inline _LIBCPP_INLINE_VISIBILITY
-    void swap(basic_stringstream& __rhs);
+    basic_stringstream& operator=(basic_stringstream&& __rhs) {
+        basic_iostream<char_type, traits_type>::operator=(_VSTD::move(__rhs));
+        __sb_ = _VSTD::move(__rhs.__sb_);
+        return *this;
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    void swap(basic_stringstream& __rhs) {
+        basic_iostream<char_type, traits_type>::swap(__rhs);
+        __sb_.swap(__rhs.__sb_);
+    }
 
     // 27.8.2.3 Members:
-    inline _LIBCPP_INLINE_VISIBILITY
-    basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const;
-    inline _LIBCPP_INLINE_VISIBILITY
-    string_type str() const;
-    inline _LIBCPP_INLINE_VISIBILITY
-    void str(const string_type& __s);
+    _LIBCPP_INLINE_VISIBILITY
+    basic_stringbuf<char_type, traits_type, allocator_type>* rdbuf() const {
+        return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    string_type str() const {
+        return __sb_.str();
+    }
+    _LIBCPP_INLINE_VISIBILITY
+    void str(const string_type& __s) {
+        __sb_.str(__s);
+    }
 };
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringstream<_CharT, _Traits, _Allocator>::basic_stringstream(ios_base::openmode __wch)
-    : basic_iostream<_CharT, _Traits>(&__sb_),
-      __sb_(__wch)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringstream<_CharT, _Traits, _Allocator>::basic_stringstream(const string_type& __s,
-                                                                    ios_base::openmode __wch)
-    : basic_iostream<_CharT, _Traits>(&__sb_),
-      __sb_(__s, __wch)
-{
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringstream<_CharT, _Traits, _Allocator>::basic_stringstream(basic_stringstream&& __rhs)
-    : basic_iostream<_CharT, _Traits>(_VSTD::move(__rhs)),
-      __sb_(_VSTD::move(__rhs.__sb_))
-{
-    basic_istream<_CharT, _Traits>::set_rdbuf(&__sb_);
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringstream<_CharT, _Traits, _Allocator>&
-basic_stringstream<_CharT, _Traits, _Allocator>::operator=(basic_stringstream&& __rhs)
-{
-    basic_iostream<char_type, traits_type>::operator=(_VSTD::move(__rhs));
-    __sb_ = _VSTD::move(__rhs.__sb_);
-    return *this;
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-void
-basic_stringstream<_CharT, _Traits, _Allocator>::swap(basic_stringstream& __rhs)
-{
-    basic_iostream<char_type, traits_type>::swap(__rhs);
-    __sb_.swap(__rhs.__sb_);
-}
-
 template <class _CharT, class _Traits, class _Allocator>
 inline _LIBCPP_INLINE_VISIBILITY
 void
@@ -933,27 +836,6 @@ swap(basic_stringstream<_CharT, _Traits, _Allocator>& __x,
     __x.swap(__y);
 }
 
-template <class _CharT, class _Traits, class _Allocator>
-basic_stringbuf<_CharT, _Traits, _Allocator>*
-basic_stringstream<_CharT, _Traits, _Allocator>::rdbuf() const
-{
-    return const_cast<basic_stringbuf<char_type, traits_type, allocator_type>*>(&__sb_);
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-basic_string<_CharT, _Traits, _Allocator>
-basic_stringstream<_CharT, _Traits, _Allocator>::str() const
-{
-    return __sb_.str();
-}
-
-template <class _CharT, class _Traits, class _Allocator>
-void
-basic_stringstream<_CharT, _Traits, _Allocator>::str(const string_type& __s)
-{
-    __sb_.str(__s);
-}
-
 _LIBCPP_END_NAMESPACE_STD
 
 _LIBCPP_POP_MACROS

From e0d66ccf068752b7d194bb231993f171ba23d830 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 9 Oct 2020 14:39:20 -0400
Subject: [PATCH 05/17] [libc++] Rename LIBCXX_ENABLE_DEBUG_MODE to
 LIBCXX_ENABLE_DEBUG_MODE_SUPPORT

To make it clearer this is about whether the library supports the debug
mode at all, not whether the debug mode is enabled. Per comment by Nico
Weber on IRC.
---
 libcxx/cmake/caches/Apple.cmake           | 2 +-
 libcxx/cmake/caches/Generic-nodebug.cmake | 2 +-
 libcxx/src/CMakeLists.txt                 | 2 +-
 libcxx/test/CMakeLists.txt                | 2 +-
 libcxx/test/configs/legacy.cfg.in         | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/libcxx/cmake/caches/Apple.cmake b/libcxx/cmake/caches/Apple.cmake
index cab7c1407d63e8..38f2c4c016b093 100644
--- a/libcxx/cmake/caches/Apple.cmake
+++ b/libcxx/cmake/caches/Apple.cmake
@@ -11,7 +11,7 @@ set(LIBCXX_TYPEINFO_COMPARISON_IMPLEMENTATION "1" CACHE STRING "")
 set(LIBCXX_CXX_ABI libcxxabi CACHE STRING "")
 set(LIBCXX_ENABLE_NEW_DELETE_DEFINITIONS OFF CACHE BOOL "")
 set(LIBCXX_HIDE_FROM_ABI_PER_TU_BY_DEFAULT ON CACHE BOOL "")
-set(LIBCXX_ENABLE_DEBUG_MODE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT OFF CACHE BOOL "")
 
 set(LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS ON CACHE BOOL "")
 set(LIBCXXABI_ENABLE_PIC OFF CACHE BOOL "")
diff --git a/libcxx/cmake/caches/Generic-nodebug.cmake b/libcxx/cmake/caches/Generic-nodebug.cmake
index b301b2ef1c7d38..a62760fa78fd6f 100644
--- a/libcxx/cmake/caches/Generic-nodebug.cmake
+++ b/libcxx/cmake/caches/Generic-nodebug.cmake
@@ -1 +1 @@
-set(LIBCXX_ENABLE_DEBUG_MODE OFF CACHE BOOL "")
+set(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT OFF CACHE BOOL "")
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 5de4a513ac1707..7f77a784577640 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -55,7 +55,7 @@ set(LIBCXX_SOURCES
   vector.cpp
   )
 
-if (LIBCXX_ENABLE_DEBUG_MODE)
+if (LIBCXX_ENABLE_DEBUG_MODE_SUPPORT)
   list(APPEND LIBCXX_SOURCES
     debug.cpp
     )
diff --git a/libcxx/test/CMakeLists.txt b/libcxx/test/CMakeLists.txt
index e2e3382b779a4f..4d9f2e5f0017ce 100644
--- a/libcxx/test/CMakeLists.txt
+++ b/libcxx/test/CMakeLists.txt
@@ -70,7 +70,7 @@ pythonize_bool(LIBCXX_HAS_ATOMIC_LIB)
 pythonize_bool(LIBCXX_HAVE_CXX_ATOMICS_WITH_LIB)
 pythonize_bool(LIBCXX_BUILD_EXTERNAL_THREAD_LIBRARY)
 pythonize_bool(LIBCXX_DEBUG_BUILD)
-pythonize_bool(LIBCXX_ENABLE_DEBUG_MODE)
+pythonize_bool(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT)
 pythonize_bool(LIBCXX_ENABLE_PARALLEL_ALGORITHMS)
 
 # By default, for non-standalone builds, libcxx and libcxxabi share a library
diff --git a/libcxx/test/configs/legacy.cfg.in b/libcxx/test/configs/legacy.cfg.in
index 4bfc9bc69a7d93..f0a4e8a73e09a5 100644
--- a/libcxx/test/configs/legacy.cfg.in
+++ b/libcxx/test/configs/legacy.cfg.in
@@ -9,7 +9,7 @@ config.libcxx_src_root          = "@LIBCXX_SOURCE_DIR@"
 config.libcxx_obj_root          = "@LIBCXX_BINARY_DIR@"
 config.cxx_library_root         = "@LIBCXX_LIBRARY_DIR@"
 config.enable_exceptions        = @LIBCXX_ENABLE_EXCEPTIONS@
-config.enable_debug_tests       = @LIBCXX_ENABLE_DEBUG_MODE@
+config.enable_debug_tests       = @LIBCXX_ENABLE_DEBUG_MODE_SUPPORT@
 config.enable_experimental      = @LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY@
 config.enable_filesystem        = @LIBCXX_ENABLE_FILESYSTEM@
 config.enable_rtti              = @LIBCXX_ENABLE_RTTI@

From 877667287fa5515d525edfee169b18207b342cd5 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 9 Oct 2020 14:40:47 -0400
Subject: [PATCH 06/17] [libc++] Fixup a missing occurrence of
 LIBCXX_ENABLE_DEBUG_MODE

---
 libcxx/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 8599e5d2cc997a..ecc8924f2cf942 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -94,7 +94,7 @@ option(LIBCXX_ENABLE_FILESYSTEM "Build filesystem as part of the main libc++ lib
     ${ENABLE_FILESYSTEM_DEFAULT})
 option(LIBCXX_INCLUDE_TESTS "Build the libc++ tests." ${LLVM_INCLUDE_TESTS})
 option(LIBCXX_ENABLE_PARALLEL_ALGORITHMS "Enable the parallel algorithms library. This requires the PSTL to be available." OFF)
-option(LIBCXX_ENABLE_DEBUG_MODE
+option(LIBCXX_ENABLE_DEBUG_MODE_SUPPORT
   "Whether to include support for libc++'s debugging mode in the library.
    By default, this is turned on. If you turn it off and try to enable the
    debug mode when compiling a program against libc++, it will fail to link

From 466c8296f20f5940fc282b228e28408b7c4d7d9b Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 9 Oct 2020 20:52:08 +0200
Subject: [PATCH 07/17] [MemCpyOpt] Add test for incorrectly hoisted store
 (NFC)

---
 llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 777ba51f38271a..6cffce50eb803a 100644
--- a/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/llvm/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -144,3 +144,22 @@ define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstid
   store %S %1, %S* %dst2
   ret void
 }
+
+define void @throwing_call(%S* noalias %src, %S* %dst) {
+; CHECK-LABEL: @throwing_call(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast %S* [[SRC:%.*]] to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast %S* [[DST:%.*]] to i8*
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast %S* [[SRC]] to i8*
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP2]], i8* align 8 [[TMP3]], i64 16, i1 false)
+; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* align 8 [[TMP1]], i8 0, i64 16, i1 false)
+; CHECK-NEXT:    call void @call() [[ATTR2:#.*]]
+; CHECK-NEXT:    ret void
+;
+  %1 = load %S, %S* %src
+  store %S zeroinitializer, %S* %src
+  call void @call() readnone
+  store %S %1, %S* %dst
+  ret void
+}
+
+declare void @call()

From 662024df331bd1f1a206678435e51232683e3cf6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 9 Oct 2020 10:26:50 -0700
Subject: [PATCH 08/17] [X86] Don't copy kill flag when expanding
 LCMPXCHG16B_SAVE_RBX

The expansion code creates a copy to RBX before the real LCMPXCHG16B.
It's possible this copy uses a register that is also used by the
real LCMPXCHG16B. If we set the kill flag on the use in the copy,
then we'll fail the machine verifier on the use on the LCMPXCHG16B.

Differential Revision: https://reviews.llvm.org/D89151
---
 llvm/lib/Target/X86/X86ExpandPseudo.cpp | 4 +++-
 llvm/test/CodeGen/X86/pr42064.ll        | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index d9c0964e9ed834..b1d15225eaaf1f 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -346,7 +346,9 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
 
     // Copy the input argument of the pseudo into the argument of the
     // actual instruction.
-    TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), InArg.isKill());
+    // NOTE: We don't copy the kill flag since the input might be the same reg
+    // as one of the other operands of LCMPXCHG16B.
+    TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false);
     // Create the actual instruction.
     MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B));
     // Copy the operands related to the address.
diff --git a/llvm/test/CodeGen/X86/pr42064.ll b/llvm/test/CodeGen/X86/pr42064.ll
index 6269a59ff055e6..089895da18a1df 100644
--- a/llvm/test/CodeGen/X86/pr42064.ll
+++ b/llvm/test/CodeGen/X86/pr42064.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-pc-windows-msvc19.11.0 -mattr=+avx,+cx16 | FileCheck %s
 
 %struct.TestStruct = type { %union.Int128 }
 %union.Int128 = type { i128 }

From f34bb06935aa3bab353d70d515b767fdd2f5625c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 9 Oct 2020 11:48:10 -0700
Subject: [PATCH 09/17] [X86] When expanding LCMPXCHG16B_NO_RBX in
 EmitInstrWithCustomInserter, directly copy address operands instead of going
 through X86AddressMode.

I suspect getAddressFromInstr and addFullAddress are not handling
all addresses cases properly based on a report from MaskRay.

So just copy the operands directly. This should be more efficient
anyway.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9e17cab106436f..5d4dfaab45033d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33765,7 +33765,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::LCMPXCHG16B_NO_RBX: {
     const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
     Register BasePtr = TRI->getBaseRegister();
-    X86AddressMode AM = getAddressFromInstr(&MI, 0);
     if (TRI->hasBasePointer(*MF) &&
         (BasePtr == X86::RBX || BasePtr == X86::EBX)) {
       if (!BB->isLiveIn(BasePtr))
@@ -33776,15 +33775,20 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
       BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX)
           .addReg(X86::RBX);
       Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass);
-      addFullAddress(
-          BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst), AM)
-          .add(MI.getOperand(X86::AddrNumOperands))
-          .addReg(SaveRBX);
+      MachineInstrBuilder MIB =
+          BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst);
+      for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
+        MIB.add(MI.getOperand(Idx));
+      MIB.add(MI.getOperand(X86::AddrNumOperands));
+      MIB.addReg(SaveRBX);
     } else {
       // Simple case, just copy the virtual register to RBX.
       BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX)
           .add(MI.getOperand(X86::AddrNumOperands));
-      addFullAddress(BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)), AM);
+      MachineInstrBuilder MIB =
+          BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B));
+      for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx)
+        MIB.add(MI.getOperand(Idx));
     }
     MI.eraseFromParent();
     return BB;

From 40cef5a00eb83f44e946912b74be83e8dc02effb Mon Sep 17 00:00:00 2001
From: Scott Linder <Scott.Linder@amd.com>
Date: Fri, 9 Oct 2020 19:02:53 +0000
Subject: [PATCH 10/17] [clang] Add a test for CGDebugInfo treatment of blocks

There doesn't seem to be a direct test of this, and I'm planning to make
future changes which will affect it.

I'm not particularly familiar with the blocks extension, so suggestions
for better tests are welcome.

Differential Revision: https://reviews.llvm.org/D88754
---
 clang/test/CodeGen/debug-info-block-expr.c | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 clang/test/CodeGen/debug-info-block-expr.c

diff --git a/clang/test/CodeGen/debug-info-block-expr.c b/clang/test/CodeGen/debug-info-block-expr.c
new file mode 100644
index 00000000000000..009e7800b6ee95
--- /dev/null
+++ b/clang/test/CodeGen/debug-info-block-expr.c
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -fblocks -debug-info-kind=limited -emit-llvm -o - %s | FileCheck %s
+// Verify that the desired DIExpression are generated for blocks.
+
+void test() {
+// CHECK: call void @llvm.dbg.declare({{.*}}!DIExpression(DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}})
+  __block int i;
+// CHECK: call void @llvm.dbg.declare({{.*}}!DIExpression(DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}, DW_OP_deref, DW_OP_plus_uconst, {{[0-9]+}}){{.*}})
+  ^ { i = 1; }();
+}

From 0e9b572949ce00e5ca01bf7555abdda12052a213 Mon Sep 17 00:00:00 2001
From: Stella Laurenzo <stellaraccident@gmail.com>
Date: Fri, 9 Oct 2020 11:49:38 -0700
Subject: [PATCH 11/17] [mlir] Fix TypeID for shared libraries built with
 -fvisibility=hidden.

* Isolates the visibility controlled parts of its implementation to a detail namespace.
* Applies a struct level visibility attribute which applies to the static local within the get() functions.
* The prior version was not emitting a symbol for the static local "instance" fields when the user TU was compiled with -fvisibility=hidden.

Differential Revision: https://reviews.llvm.org/D89153
---
 mlir/include/mlir/Support/TypeID.h | 56 ++++++++++++++++++++++++------
 1 file changed, 45 insertions(+), 11 deletions(-)

diff --git a/mlir/include/mlir/Support/TypeID.h b/mlir/include/mlir/Support/TypeID.h
index 518ff39e866943..ef19182882c637 100644
--- a/mlir/include/mlir/Support/TypeID.h
+++ b/mlir/include/mlir/Support/TypeID.h
@@ -20,6 +20,10 @@
 
 namespace mlir {
 
+namespace detail {
+struct TypeIDExported;
+} // namespace detail
+
 /// This class provides an efficient unique identifier for a specific C++ type.
 /// This allows for a C++ type to be compared, hashed, and stored in an opaque
 /// context. This class is similar in some ways to std::type_index, but can be
@@ -62,19 +66,10 @@ class TypeID {
   bool operator!=(const TypeID &other) const { return !(*this == other); }
 
   /// Construct a type info object for the given type T.
-  /// TODO: This currently won't work when using DLLs as it requires properly
-  /// attaching dllimport and dllexport. Fix this when that information is
-  /// available within LLVM.
   template <typename T>
-  LLVM_EXTERNAL_VISIBILITY static TypeID get() {
-    static Storage instance;
-    return TypeID(&instance);
-  }
+  static TypeID get();
   template <template <typename> class Trait>
-  LLVM_EXTERNAL_VISIBILITY static TypeID get() {
-    static Storage instance;
-    return TypeID(&instance);
-  }
+  static TypeID get();
 
   /// Methods for supporting PointerLikeTypeTraits.
   const void *getAsOpaquePointer() const {
@@ -92,6 +87,8 @@ class TypeID {
 
   /// The storage of this type info object.
   const Storage *storage;
+
+  friend struct detail::TypeIDExported;
 };
 
 /// Enable hashing TypeID.
@@ -99,6 +96,43 @@ inline ::llvm::hash_code hash_value(TypeID id) {
   return llvm::hash_value(id.storage);
 }
 
+namespace detail {
+/// The static local instance of each get method must be emitted with
+/// "default" (public) visibility across all shared libraries, regardless of
+/// whether they are compiled with hidden visibility or not. The only reliable
+/// way to make this happen is to set the visibility attribute at the
+/// containing namespace/struct scope. We don't do this on the TypeID (internal
+/// API) class in order to reduce the scope of what gets exported with
+/// public visibility. Instead, the get() methods on TypeID trampoline
+/// through those on this detail class with specific visibility controls
+/// applied, making visibility declarations on the internal TypeID class not
+/// required (all visibility relevant pieces are here).
+/// TODO: This currently won't work when using DLLs as it requires properly
+/// attaching dllimport and dllexport. Fix this when that information is
+/// available within LLVM.
+struct LLVM_EXTERNAL_VISIBILITY TypeIDExported {
+  template <typename T>
+  static TypeID get() {
+    static TypeID::Storage instance;
+    return TypeID(&instance);
+  }
+  template <template <typename> class Trait>
+  static TypeID get() {
+    static TypeID::Storage instance;
+    return TypeID(&instance);
+  }
+};
+} // namespace detail
+
+template <typename T>
+TypeID TypeID::get() {
+  return detail::TypeIDExported::get<T>();
+}
+template <template <typename> class Trait>
+TypeID TypeID::get() {
+  return detail::TypeIDExported::get<Trait>();
+}
+
 } // end namespace mlir
 
 namespace llvm {

From e20792795065b2fb41128537314044f0a8f0a912 Mon Sep 17 00:00:00 2001
From: Stella Laurenzo <stellaraccident@gmail.com>
Date: Fri, 9 Oct 2020 12:16:45 -0700
Subject: [PATCH 12/17] NFC: Address post-commit doc/formatting comments on
 TypeID.h.

---
 mlir/include/mlir/Support/TypeID.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mlir/include/mlir/Support/TypeID.h b/mlir/include/mlir/Support/TypeID.h
index ef19182882c637..fb8a2215b66997 100644
--- a/mlir/include/mlir/Support/TypeID.h
+++ b/mlir/include/mlir/Support/TypeID.h
@@ -88,6 +88,7 @@ class TypeID {
   /// The storage of this type info object.
   const Storage *storage;
 
+  // See TypeIDExported below for an explanation of the trampoline behavior.
   friend struct detail::TypeIDExported;
 };
 
@@ -97,6 +98,7 @@ inline ::llvm::hash_code hash_value(TypeID id) {
 }
 
 namespace detail {
+
 /// The static local instance of each get method must be emitted with
 /// "default" (public) visibility across all shared libraries, regardless of
 /// whether they are compiled with hidden visibility or not. The only reliable
@@ -122,6 +124,7 @@ struct LLVM_EXTERNAL_VISIBILITY TypeIDExported {
     return TypeID(&instance);
   }
 };
+
 } // namespace detail
 
 template <typename T>

From 62e2ac6461d414f00153a2ca8d7d31428803db22 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Fri, 9 Oct 2020 12:18:52 -0700
Subject: [PATCH 13/17] [NFC][Regalloc] Fix coding style in CalcSpillWeights

---
 llvm/lib/CodeGen/CalcSpillWeights.cpp | 178 +++++++++++++-------------
 1 file changed, 89 insertions(+), 89 deletions(-)

diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 4f59e08637628a..b096593ac620e4 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -42,35 +42,35 @@ void VirtRegAuxInfo::calculateSpillWeightsAndHints() {
 }
 
 // Return the preferred allocation register for reg, given a COPY instruction.
-static Register copyHint(const MachineInstr *mi, unsigned reg,
-                         const TargetRegisterInfo &tri,
-                         const MachineRegisterInfo &mri) {
-  unsigned sub, hsub;
-  Register hreg;
-  if (mi->getOperand(0).getReg() == reg) {
-    sub = mi->getOperand(0).getSubReg();
-    hreg = mi->getOperand(1).getReg();
-    hsub = mi->getOperand(1).getSubReg();
+static Register copyHint(const MachineInstr *MI, unsigned Reg,
+                         const TargetRegisterInfo &TRI,
+                         const MachineRegisterInfo &MRI) {
+  unsigned Sub, HSub;
+  Register HReg;
+  if (MI->getOperand(0).getReg() == Reg) {
+    Sub = MI->getOperand(0).getSubReg();
+    HReg = MI->getOperand(1).getReg();
+    HSub = MI->getOperand(1).getSubReg();
   } else {
-    sub = mi->getOperand(1).getSubReg();
-    hreg = mi->getOperand(0).getReg();
-    hsub = mi->getOperand(0).getSubReg();
+    Sub = MI->getOperand(1).getSubReg();
+    HReg = MI->getOperand(0).getReg();
+    HSub = MI->getOperand(0).getSubReg();
   }
 
-  if (!hreg)
+  if (!HReg)
     return 0;
 
-  if (Register::isVirtualRegister(hreg))
-    return sub == hsub ? hreg : Register();
+  if (Register::isVirtualRegister(HReg))
+    return Sub == HSub ? HReg : Register();
 
-  const TargetRegisterClass *rc = mri.getRegClass(reg);
-  Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+  const TargetRegisterClass *rc = MRI.getRegClass(Reg);
+  Register CopiedPReg = (HSub ? TRI.getSubReg(HReg, HSub) : HReg);
   if (rc->contains(CopiedPReg))
     return CopiedPReg;
 
   // Check if reg:sub matches so that a super register could be hinted.
-  if (sub)
-    return tri.getMatchingSuperReg(CopiedPReg, sub, rc);
+  if (Sub)
+    return TRI.getMatchingSuperReg(CopiedPReg, Sub, rc);
 
   return 0;
 }
@@ -129,54 +129,54 @@ static bool isRematerializable(const LiveInterval &LI,
   return true;
 }
 
-void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) {
-  float weight = weightCalcHelper(li);
+void VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &LI) {
+  float Weight = weightCalcHelper(LI);
   // Check if unspillable.
-  if (weight < 0)
+  if (Weight < 0)
     return;
-  li.setWeight(weight);
+  LI.setWeight(Weight);
 }
 
-float VirtRegAuxInfo::futureWeight(LiveInterval &li, SlotIndex start,
-                                   SlotIndex end) {
-  return weightCalcHelper(li, &start, &end);
+float VirtRegAuxInfo::futureWeight(LiveInterval &LI, SlotIndex Start,
+                                   SlotIndex End) {
+  return weightCalcHelper(LI, &Start, &End);
 }
 
-float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
-                                       SlotIndex *end) {
-  MachineRegisterInfo &mri = MF.getRegInfo();
-  const TargetRegisterInfo &tri = *MF.getSubtarget().getRegisterInfo();
-  MachineBasicBlock *mbb = nullptr;
-  MachineLoop *loop = nullptr;
-  bool isExiting = false;
-  float totalWeight = 0;
-  unsigned numInstr = 0; // Number of instructions using li
-  SmallPtrSet<MachineInstr*, 8> visited;
-
-  std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg());
-
-  if (li.isSpillable() && VRM) {
-    Register Reg = li.reg();
+float VirtRegAuxInfo::weightCalcHelper(LiveInterval &LI, SlotIndex *Start,
+                                       SlotIndex *End) {
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+  MachineBasicBlock *MBB = nullptr;
+  MachineLoop *Loop = nullptr;
+  bool IsExiting = false;
+  float TotalWeight = 0;
+  unsigned NumInstr = 0; // Number of instructions using li
+  SmallPtrSet<MachineInstr *, 8> Visited;
+
+  std::pair<unsigned, unsigned> TargetHint = MRI.getRegAllocationHint(LI.reg());
+
+  if (LI.isSpillable() && VRM) {
+    Register Reg = LI.reg();
     Register Original = VRM->getOriginal(Reg);
     const LiveInterval &OrigInt = LIS.getInterval(Original);
     // li comes from a split of OrigInt. If OrigInt was marked
     // as not spillable, make sure the new interval is marked
     // as not spillable as well.
     if (!OrigInt.isSpillable())
-      li.markNotSpillable();
+      LI.markNotSpillable();
   }
 
   // Don't recompute spill weight for an unspillable register.
-  bool Spillable = li.isSpillable();
+  bool IsSpillable = LI.isSpillable();
 
-  bool localSplitArtifact = start && end;
+  bool IsLocalSplitArtifact = Start && End;
 
   // Do not update future local split artifacts.
-  bool updateLI = !localSplitArtifact;
+  bool ShouldUpdateLI = !IsLocalSplitArtifact;
 
-  if (localSplitArtifact) {
-    MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*end);
-    assert(localMBB == LIS.getMBBFromIndex(*start) &&
+  if (IsLocalSplitArtifact) {
+    MachineBasicBlock *localMBB = LIS.getMBBFromIndex(*End);
+    assert(localMBB == LIS.getMBBFromIndex(*Start) &&
            "start and end are expected to be in the same basic block");
 
     // Local split artifact will have 2 additional copy instructions and they
@@ -184,10 +184,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
     // localLI = COPY other
     // ...
     // other   = COPY localLI
-    totalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
-    totalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
+    TotalWeight += LiveIntervals::getSpillWeight(true, false, &MBFI, localMBB);
+    TotalWeight += LiveIntervals::getSpillWeight(false, true, &MBFI, localMBB);
 
-    numInstr += 2;
+    NumInstr += 2;
   }
 
   // CopyHint is a sortable hint derived from a COPY instruction.
@@ -209,67 +209,67 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
   std::set<CopyHint> CopyHints;
 
   for (MachineRegisterInfo::reg_instr_nodbg_iterator
-           I = mri.reg_instr_nodbg_begin(li.reg()),
-           E = mri.reg_instr_nodbg_end();
+           I = MRI.reg_instr_nodbg_begin(LI.reg()),
+           E = MRI.reg_instr_nodbg_end();
        I != E;) {
-    MachineInstr *mi = &*(I++);
+    MachineInstr *MI = &*(I++);
 
     // For local split artifacts, we are interested only in instructions between
     // the expected start and end of the range.
-    SlotIndex si = LIS.getInstructionIndex(*mi);
-    if (localSplitArtifact && ((si < *start) || (si > *end)))
+    SlotIndex SI = LIS.getInstructionIndex(*MI);
+    if (IsLocalSplitArtifact && ((SI < *Start) || (SI > *End)))
       continue;
 
-    numInstr++;
-    if (mi->isIdentityCopy() || mi->isImplicitDef())
+    NumInstr++;
+    if (MI->isIdentityCopy() || MI->isImplicitDef())
       continue;
-    if (!visited.insert(mi).second)
+    if (!Visited.insert(MI).second)
       continue;
 
-    float weight = 1.0f;
-    if (Spillable) {
+    float Weight = 1.0f;
+    if (IsSpillable) {
       // Get loop info for mi.
-      if (mi->getParent() != mbb) {
-        mbb = mi->getParent();
-        loop = Loops.getLoopFor(mbb);
-        isExiting = loop ? loop->isLoopExiting(mbb) : false;
+      if (MI->getParent() != MBB) {
+        MBB = MI->getParent();
+        Loop = Loops.getLoopFor(MBB);
+        IsExiting = Loop ? Loop->isLoopExiting(MBB) : false;
       }
 
       // Calculate instr weight.
-      bool reads, writes;
-      std::tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg());
-      weight = LiveIntervals::getSpillWeight(writes, reads, &MBFI, *mi);
+      bool Reads, Writes;
+      std::tie(Reads, Writes) = MI->readsWritesVirtualRegister(LI.reg());
+      Weight = LiveIntervals::getSpillWeight(Writes, Reads, &MBFI, *MI);
 
       // Give extra weight to what looks like a loop induction variable update.
-      if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb))
-        weight *= 3;
+      if (Writes && IsExiting && LIS.isLiveOutOfMBB(LI, MBB))
+        Weight *= 3;
 
-      totalWeight += weight;
+      TotalWeight += Weight;
     }
 
     // Get allocation hints from copies.
-    if (!mi->isCopy())
+    if (!MI->isCopy())
       continue;
-    Register hint = copyHint(mi, li.reg(), tri, mri);
+    Register hint = copyHint(MI, LI.reg(), TRI, MRI);
     if (!hint)
       continue;
     // Force hweight onto the stack so that x86 doesn't add hidden precision,
     // making the comparison incorrectly pass (i.e., 1 > 1 == true??).
     //
     // FIXME: we probably shouldn't use floats at all.
-    volatile float hweight = Hint[hint] += weight;
-    if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint))
+    volatile float HWeight = Hint[hint] += Weight;
+    if (Register::isVirtualRegister(hint) || MRI.isAllocatable(hint))
       CopyHints.insert(
-          CopyHint(hint, hweight, Register::isPhysicalRegister(hint)));
+          CopyHint(hint, HWeight, Register::isPhysicalRegister(hint)));
   }
 
   Hint.clear();
 
   // Pass all the sorted copy hints to mri.
-  if (updateLI && CopyHints.size()) {
+  if (ShouldUpdateLI && CopyHints.size()) {
     // Remove a generic hint if previously added by target.
     if (TargetHint.first == 0 && TargetHint.second)
-      mri.clearSimpleHint(li.reg());
+      MRI.clearSimpleHint(LI.reg());
 
     std::set<unsigned> HintedRegs;
     for (auto &Hint : CopyHints) {
@@ -277,23 +277,23 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
           (TargetHint.first != 0 && Hint.Reg == TargetHint.second))
         // Don't add the same reg twice or the target-type hint again.
         continue;
-      mri.addRegAllocationHint(li.reg(), Hint.Reg);
+      MRI.addRegAllocationHint(LI.reg(), Hint.Reg);
     }
 
     // Weakly boost the spill weight of hinted registers.
-    totalWeight *= 1.01F;
+    TotalWeight *= 1.01F;
   }
 
   // If the live interval was already unspillable, leave it that way.
-  if (!Spillable)
+  if (!IsSpillable)
     return -1.0;
 
   // Mark li as unspillable if all live ranges are tiny and the interval
   // is not live at any reg mask.  If the interval is live at a reg mask
   // spilling may be required.
-  if (updateLI && li.isZeroLength(LIS.getSlotIndexes()) &&
-      !li.isLiveAtIndexes(LIS.getRegMaskSlots())) {
-    li.markNotSpillable();
+  if (ShouldUpdateLI && LI.isZeroLength(LIS.getSlotIndexes()) &&
+      !LI.isLiveAtIndexes(LIS.getRegMaskSlots())) {
+    LI.markNotSpillable();
     return -1.0;
   }
 
@@ -301,10 +301,10 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
   // it is a preferred candidate for spilling.
   // FIXME: this gets much more complicated once we support non-trivial
   // re-materialization.
-  if (isRematerializable(li, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
-    totalWeight *= 0.5F;
+  if (isRematerializable(LI, LIS, VRM, *MF.getSubtarget().getInstrInfo()))
+    TotalWeight *= 0.5F;
 
-  if (localSplitArtifact)
-    return normalize(totalWeight, start->distance(*end), numInstr);
-  return normalize(totalWeight, li.getSize(), numInstr);
+  if (IsLocalSplitArtifact)
+    return normalize(TotalWeight, Start->distance(*End), NumInstr);
+  return normalize(TotalWeight, LI.getSize(), NumInstr);
 }

From a2291a58bf1c860d026581fee6fe96019dc25440 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng@google.com>
Date: Mon, 21 Sep 2020 17:41:48 -0400
Subject: [PATCH 14/17]     Enable LSAN for Android

    Make use of the newly added thread-properties API (available since 31).

    Differential Revision: https://reviews.llvm.org/D85927
---
 compiler-rt/cmake/config-ix.cmake             |  2 +-
 compiler-rt/lib/asan/CMakeLists.txt           |  6 ++-
 compiler-rt/lib/asan/tests/CMakeLists.txt     |  2 +
 compiler-rt/lib/lsan/CMakeLists.txt           |  9 +++-
 compiler-rt/lib/lsan/lsan_common.cpp          | 43 +++++++++++++++----
 compiler-rt/lib/lsan/lsan_common.h            |  9 ++--
 compiler-rt/lib/lsan/lsan_common_linux.cpp    | 33 ++++++++++++--
 .../sanitizer_internal_defs.h                 |  5 ++-
 .../lib/sanitizer_common/sanitizer_linux.h    | 10 +++++
 .../sanitizer_linux_libcdep.cpp               | 17 ++++++--
 .../sanitizer_platform_interceptors.h         |  7 +--
 .../test/asan/TestCases/coverage-and-lsan.cpp |  3 +-
 compiler-rt/test/asan/lit.cfg.py              |  3 +-
 compiler-rt/test/lit.common.cfg.py            | 24 +++++++++--
 .../Linux/cleanup_in_tsd_destructor.c         |  2 +-
 .../Linux/disabler_in_tsd_destructor.c        |  2 +-
 .../test/lsan/TestCases/Linux/guard-page.c    |  3 ++
 .../lsan/TestCases/Linux/log-path_test.cpp    |  6 ++-
 .../lsan/TestCases/Linux/use_tls_dynamic.cpp  |  3 +-
 .../Linux/use_tls_pthread_specific_static.cpp |  3 ++
 compiler-rt/test/lsan/TestCases/disabler.c    |  2 +-
 compiler-rt/test/lsan/TestCases/disabler.cpp  |  2 +-
 .../test/lsan/TestCases/ignore_object.c       |  2 +-
 .../lsan/TestCases/large_allocation_leak.cpp  |  2 +-
 .../test/lsan/TestCases/strace_test.cpp       |  3 ++
 .../test/lsan/TestCases/suppressions_file.cpp | 11 +++--
 .../test/lsan/TestCases/swapcontext.cpp       |  3 +-
 .../test/lsan/TestCases/use_registers.cpp     | 13 ++++--
 compiler-rt/test/lsan/lit.common.cfg.py       | 10 ++++-
 29 files changed, 187 insertions(+), 53 deletions(-)

diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 23a47f2b853975..70245988638299 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -681,7 +681,7 @@ else()
 endif()
 
 if (COMPILER_RT_HAS_SANITIZER_COMMON AND LSAN_SUPPORTED_ARCH AND
-    OS_NAME MATCHES "Darwin|Linux|NetBSD|Fuchsia")
+    OS_NAME MATCHES "Android|Darwin|Linux|NetBSD|Fuchsia")
   set(COMPILER_RT_HAS_LSAN TRUE)
 else()
   set(COMPILER_RT_HAS_LSAN FALSE)
diff --git a/compiler-rt/lib/asan/CMakeLists.txt b/compiler-rt/lib/asan/CMakeLists.txt
index 41e889eb79e6fa..ed34c652c81953 100644
--- a/compiler-rt/lib/asan/CMakeLists.txt
+++ b/compiler-rt/lib/asan/CMakeLists.txt
@@ -86,6 +86,8 @@ append_rtti_flag(OFF ASAN_CFLAGS)
 set(ASAN_DYNAMIC_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
 
 if(ANDROID)
+  list(APPEND ASAN_CFLAGS -fno-emulated-tls)
+  list(APPEND ASAN_DYNAMIC_LINK_FLAGS -fuse-ld=lld)
 # Put most Sanitizer shared libraries in the global group. For more details, see
 # android-changes-for-ndk-developers.md#changes-to-library-search-order
   if (COMPILER_RT_HAS_Z_GLOBAL)
@@ -232,7 +234,9 @@ else()
            -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.asan-dynamic-${arch}.vers)
       # The Solaris 11.4 linker supports a subset of GNU ld version scripts,
       # but requires a special option to enable it.
-      if (COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT)
+      # This is used/compatible with ANDROID because we force `lld` on ANDROID (line 90).
+      # Therefore we don't want to add it for ANDROID.
+      if (COMPILER_RT_HAS_GNU_VERSION_SCRIPT_COMPAT AND NOT ANDROID)
           list(APPEND VERSION_SCRIPT_FLAG -Wl,-z,gnu-version-script-compat)
       endif()
       set_property(SOURCE
diff --git a/compiler-rt/lib/asan/tests/CMakeLists.txt b/compiler-rt/lib/asan/tests/CMakeLists.txt
index 6c07d1a8c6ff02..4ffc58cdef024b 100644
--- a/compiler-rt/lib/asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/asan/tests/CMakeLists.txt
@@ -91,6 +91,7 @@ if(APPLE)
 endif()
 if(ANDROID)
   list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -pie)
+  list(APPEND ASAN_UNITTEST_COMMON_LINK_FLAGS -fuse-ld=lld)
 endif()
 
 set(ASAN_UNITTEST_INSTRUMENTED_LINK_FLAGS
@@ -288,6 +289,7 @@ if(ANDROID)
       $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
       $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
       $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
+      $<TARGET_OBJECTS:RTLSanCommon.${arch}>
       $<TARGET_OBJECTS:RTUbsan.${arch}>
       $<TARGET_OBJECTS:RTUbsan_cxx.${arch}>
       ${COMPILER_RT_GTEST_SOURCE}
diff --git a/compiler-rt/lib/lsan/CMakeLists.txt b/compiler-rt/lib/lsan/CMakeLists.txt
index ff8d38d848490f..4685214a7a4ed4 100644
--- a/compiler-rt/lib/lsan/CMakeLists.txt
+++ b/compiler-rt/lib/lsan/CMakeLists.txt
@@ -1,6 +1,7 @@
 include_directories(..)
 
 set(LSAN_CFLAGS ${SANITIZER_COMMON_CFLAGS})
+set(LSAN_LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS})
 append_rtti_flag(OFF LSAN_CFLAGS)
 
 set(LSAN_COMMON_SOURCES
@@ -33,6 +34,11 @@ set(LSAN_HEADERS
 
 set(LSAN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 
+if(ANDROID)
+  list(APPEND LSAN_CFLAGS -fno-emulated-tls)
+  list(APPEND LSAN_LINK_FLAGS -fuse-ld=lld)
+endif()
+
 add_compiler_rt_object_libraries(RTLSanCommon
     OS ${SANITIZER_COMMON_SUPPORTED_OS}
     ARCHS ${LSAN_COMMON_SUPPORTED_ARCH}
@@ -61,7 +67,7 @@ if(COMPILER_RT_HAS_LSAN)
                   RTSanitizerCommonCoverage
                   RTSanitizerCommonSymbolizer
       CFLAGS ${LSAN_CFLAGS}
-      LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+      LINK_FLAGS ${LSAN_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
       LINK_LIBS ${LSAN_LINK_LIBS}
       PARENT_TARGET lsan)
   else()
@@ -78,6 +84,7 @@ if(COMPILER_RT_HAS_LSAN)
                 $<TARGET_OBJECTS:RTLSanCommon.${arch}>
         ADDITIONAL_HEADERS ${LSAN_HEADERS}
         CFLAGS ${LSAN_CFLAGS}
+        LINK_FLAGS ${LSAN_LINK_FLAGS}
         PARENT_TARGET lsan)
     endforeach()
   endif()
diff --git a/compiler-rt/lib/lsan/lsan_common.cpp b/compiler-rt/lib/lsan/lsan_common.cpp
index 107d63ac9117c6..abb559fe3b3225 100644
--- a/compiler-rt/lib/lsan/lsan_common.cpp
+++ b/compiler-rt/lib/lsan/lsan_common.cpp
@@ -71,17 +71,17 @@ static const char kSuppressionLeak[] = "leak";
 static const char *kSuppressionTypes[] = { kSuppressionLeak };
 static const char kStdSuppressions[] =
 #if SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
-  // For more details refer to the SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
-  // definition.
-  "leak:*pthread_exit*\n"
+    // For more details refer to the SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
+    // definition.
+    "leak:*pthread_exit*\n"
 #endif  // SANITIZER_SUPPRESS_LEAK_ON_PTHREAD_EXIT
 #if SANITIZER_MAC
-  // For Darwin and os_log/os_trace: https://reviews.llvm.org/D35173
-  "leak:*_os_trace*\n"
+    // For Darwin and os_log/os_trace: https://reviews.llvm.org/D35173
+    "leak:*_os_trace*\n"
 #endif
-  // TLS leak in some glibc versions, described in
-  // https://sourceware.org/bugzilla/show_bug.cgi?id=12650.
-  "leak:*tls_get_addr*\n";
+    // TLS leak in some glibc versions, described in
+    // https://sourceware.org/bugzilla/show_bug.cgi?id=12650.
+    "leak:*tls_get_addr*\n";
 
 void InitializeSuppressions() {
   CHECK_EQ(nullptr, suppression_ctx);
@@ -294,6 +294,22 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
                                  kReachable);
         }
       }
+#if SANITIZER_ANDROID
+      if (HAS_ANDROID_THREAD_PROPERTIES_API) {
+        auto *cb = +[](void *dtls_begin, void *dtls_end, uptr /*dso_idd*/,
+                       void *arg) -> void {
+          ScanRangeForPointers(reinterpret_cast<uptr>(dtls_begin),
+                               reinterpret_cast<uptr>(dtls_end),
+                               reinterpret_cast<Frontier *>(arg), "DTLS",
+                               kReachable);
+        };
+
+        // FIXME: There might be a race-condition here (and in Bionic) if the
+        // thread is suspended in the middle of updating its DTLS. IOWs, we
+        // could scan already freed memory. (probably fine for now)
+        __libc_iterate_dynamic_tls(os_id, cb, frontier);
+      }
+#else
       if (dtls && !DTLSInDestruction(dtls)) {
         for (uptr j = 0; j < dtls->dtv_size; ++j) {
           uptr dtls_beg = dtls->dtv[j].beg;
@@ -309,6 +325,7 @@ static void ProcessThreads(SuspendedThreadsList const &suspended_threads,
         // this and continue.
         LOG_THREADS("Thread %d has DTLS under destruction.\n", os_id);
       }
+#endif
     }
   }
 }
@@ -575,8 +592,16 @@ static void CheckForLeaksCallback(const SuspendedThreadsList &suspended_threads,
 }
 
 static bool CheckForLeaks() {
+#if SANITIZER_ANDROID
+  // Presence of the ThreadProperties API implies the presence of
+  // TLS support, which is required for calling  __lsan_is_turned_off().
+  // Therefore, this check must preceed that.
+  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
+    return false;
+#endif
+
   if (&__lsan_is_turned_off && __lsan_is_turned_off())
-      return false;
+    return false;
   EnsureMainThreadIDIsCorrect();
   CheckForLeaksParam param;
   LockStuffAndStopTheWorld(CheckForLeaksCallback, &param);
diff --git a/compiler-rt/lib/lsan/lsan_common.h b/compiler-rt/lib/lsan/lsan_common.h
index 3434beede82890..d482a6ef75bb85 100644
--- a/compiler-rt/lib/lsan/lsan_common.h
+++ b/compiler-rt/lib/lsan/lsan_common.h
@@ -29,16 +29,13 @@
 // To enable LeakSanitizer on a new architecture, one needs to implement the
 // internal_clone function as well as (probably) adjust the TLS machinery for
 // the new architecture inside the sanitizer library.
-#if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) &&          \
-    (SANITIZER_WORDSIZE == 64) &&                                        \
+#if (SANITIZER_LINUX || SANITIZER_MAC) && (SANITIZER_WORDSIZE == 64) &&  \
     (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \
      defined(__powerpc64__) || defined(__s390x__))
 #define CAN_SANITIZE_LEAKS 1
-#elif defined(__i386__) && \
-    (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC)
+#elif defined(__i386__) && (SANITIZER_LINUX || SANITIZER_MAC)
 #define CAN_SANITIZE_LEAKS 1
-#elif defined(__arm__) && \
-    SANITIZER_LINUX && !SANITIZER_ANDROID
+#elif defined(__arm__) && SANITIZER_LINUX
 #define CAN_SANITIZE_LEAKS 1
 #elif SANITIZER_NETBSD || SANITIZER_FUCHSIA
 #define CAN_SANITIZE_LEAKS 1
diff --git a/compiler-rt/lib/lsan/lsan_common_linux.cpp b/compiler-rt/lib/lsan/lsan_common_linux.cpp
index c97ef31593dfa8..0a731c3dd4f571 100644
--- a/compiler-rt/lib/lsan/lsan_common_linux.cpp
+++ b/compiler-rt/lib/lsan/lsan_common_linux.cpp
@@ -41,9 +41,28 @@ static bool IsLinker(const LoadedModule& module) {
 
 __attribute__((tls_model("initial-exec")))
 THREADLOCAL int disable_counter;
-bool DisabledInThisThread() { return disable_counter > 0; }
-void DisableInThisThread() { disable_counter++; }
+bool DisabledInThisThread() {
+#if SANITIZER_ANDROID
+  // LSAN is only enabled with Android-S and up.
+  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
+    return true;
+#endif
+  return disable_counter > 0;
+}
+void DisableInThisThread() {
+#if SANITIZER_ANDROID
+  // LSAN is only enabled with Android-S and up.
+  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
+    return;
+#endif
+  disable_counter++;
+}
 void EnableInThisThread() {
+#if SANITIZER_ANDROID
+  // LSAN is only enabled with Android-S and up.
+  if (!HAS_ANDROID_THREAD_PROPERTIES_API)
+    return;
+#endif
   if (disable_counter == 0) {
     DisableCounterUnderflow();
   }
@@ -95,7 +114,15 @@ static int ProcessGlobalRegionsCallback(struct dl_phdr_info *info, size_t size,
 
 // Scans global variables for heap pointers.
 void ProcessGlobalRegions(Frontier *frontier) {
-  if (!flags()->use_globals) return;
+  if (!flags()->use_globals) {
+#if SANITIZER_ANDROID
+    // There are known malloc'ed global variables from libc[++] on Android.
+    // If use_globals is turnt off, we could see leaks.
+    // Issue a warning in case users turn it off by accident.
+    Report("use_globals=0 on Android could lead to false reports.");
+#endif
+    return;
+  }
   dl_iterate_phdr(ProcessGlobalRegionsCallback, frontier);
 }
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
index a6c5514870528c..3756a6b76d3b47 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_internal_defs.h
@@ -104,8 +104,9 @@
 //
 // FIXME: do we have anything like this on Mac?
 #ifndef SANITIZER_CAN_USE_PREINIT_ARRAY
-#if ((SANITIZER_LINUX && !SANITIZER_ANDROID) || SANITIZER_OPENBSD || \
-     SANITIZER_FUCHSIA || SANITIZER_NETBSD) && !defined(PIC)
+#if (SANITIZER_LINUX || SANITIZER_OPENBSD || SANITIZER_FUCHSIA || \
+     SANITIZER_NETBSD) &&                                         \
+    !defined(PIC)
 #define SANITIZER_CAN_USE_PREINIT_ARRAY 1
 // Before Solaris 11.4, .preinit_array is fully supported only with GNU ld.
 // FIXME: Check for those conditions.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
index a8625ca86e8d00..4e31105449e479 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.h
@@ -154,6 +154,16 @@ ALWAYS_INLINE uptr *get_android_tls_ptr() {
   return reinterpret_cast<uptr *>(&__get_tls()[TLS_SLOT_SANITIZER]);
 }
 
+// Bionic provides this API since 31.
+extern "C" SANITIZER_WEAK_ATTRIBUTE void __libc_get_static_tls_bounds(void **,
+                                                                      void **);
+extern "C" SANITIZER_WEAK_ATTRIBUTE void __libc_iterate_dynamic_tls(
+    pid_t, void (*cb)(void *, void *, uptr, void *), void *);
+
+#define HAS_ANDROID_THREAD_PROPERTIES_API (&__libc_iterate_dynamic_tls != 0)
+
+#else
+#define HAS_ANDROID_THREAD_PROPERTIES_API (0)
 #endif  // SANITIZER_ANDROID
 
 }  // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index b8b999363ff26d..26bdf0457f58f2 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -460,7 +460,19 @@ int GetSizeFromHdr(struct dl_phdr_info *info, size_t size, void *data) {
 
 #if !SANITIZER_GO
 static void GetTls(uptr *addr, uptr *size) {
-#if SANITIZER_LINUX && !SANITIZER_ANDROID
+#if SANITIZER_ANDROID
+  if (HAS_ANDROID_THREAD_PROPERTIES_API) {
+    void *start_addr;
+    void *end_addr;
+    __libc_get_static_tls_bounds(&start_addr, &end_addr);
+    *addr = reinterpret_cast<uptr>(start_addr);
+    *size =
+        reinterpret_cast<uptr>(end_addr) - reinterpret_cast<uptr>(start_addr);
+  } else {
+    *addr = 0;
+    *size = 0;
+  }
+#elif SANITIZER_LINUX && !SANITIZER_ANDROID
 #if defined(__x86_64__) || defined(__i386__) || defined(__s390__)
   *addr = ThreadSelf();
   *size = GetTlsSize();
@@ -504,9 +516,6 @@ static void GetTls(uptr *addr, uptr *size) {
 #elif SANITIZER_OPENBSD
   *addr = 0;
   *size = 0;
-#elif SANITIZER_ANDROID
-  *addr = 0;
-  *size = 0;
 #elif SANITIZER_SOLARIS
   // FIXME
   *addr = 0;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 25be06c1bc8f6a..2f598bfb188b6d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -504,14 +504,15 @@
 #define SANITIZER_INTERCEPT_MALLOPT_AND_MALLINFO                            \
   (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_FUCHSIA && \
    SI_NOT_RTEMS && !SI_SOLARIS)
-#define SANITIZER_INTERCEPT_MEMALIGN \
-  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_RTEMS)
+#define SANITIZER_INTERCEPT_MEMALIGN                                      \
+  (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_RTEMS && \
+   !SI_ANDROID)
 #define SANITIZER_INTERCEPT_PVALLOC                                         \
   (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_FUCHSIA && \
    SI_NOT_RTEMS && !SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CFREE                                           \
   (!SI_FREEBSD && !SI_MAC && !SI_NETBSD && !SI_OPENBSD && SI_NOT_FUCHSIA && \
-   SI_NOT_RTEMS && !SI_SOLARIS)
+   SI_NOT_RTEMS && !SI_SOLARIS && !SI_ANDROID)
 #define SANITIZER_INTERCEPT_REALLOCARRAY SI_POSIX
 #define SANITIZER_INTERCEPT_ALIGNED_ALLOC (!SI_MAC && SI_NOT_RTEMS)
 #define SANITIZER_INTERCEPT_MALLOC_USABLE_SIZE \
diff --git a/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp b/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp
index 60851dabb6b7f2..d95d95fc5d5fe6 100644
--- a/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp
+++ b/compiler-rt/test/asan/TestCases/coverage-and-lsan.cpp
@@ -9,7 +9,8 @@
 // RUN: %sancov print %t-dir/*.sancov 2>&1
 //
 // REQUIRES: leak-detection
-
+// FIXME: sancov paths not work with adb
+// UNSUPPORTED: android
 int *g = new int;
 int main(int argc, char **argv) {
   g = 0;
diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py
index 16a73d65b2b1a3..f9c491846a4299 100644
--- a/compiler-rt/test/asan/lit.cfg.py
+++ b/compiler-rt/test/asan/lit.cfg.py
@@ -209,10 +209,11 @@ def build_invocation(compile_flags):
   config.available_features.add('fast-unwinder-works')
 
 # Turn on leak detection on 64-bit Linux.
+leak_detection_android = config.android and 'android-thread-properties-api' in config.available_features and (config.target_arch == 'x86_64' or config.target_arch == 'i386' or config.target_arch == 'i686'  or config.target_arch == 'aarch64' )
 leak_detection_linux = (config.host_os == 'Linux') and (not config.android) and (config.target_arch == 'x86_64' or config.target_arch == 'i386')
 leak_detection_mac = (config.host_os == 'Darwin') and (config.target_arch == 'x86_64')
 leak_detection_netbsd = (config.host_os == 'NetBSD') and (config.target_arch in ['x86_64', 'i386'])
-if leak_detection_linux or leak_detection_mac or leak_detection_netbsd:
+if leak_detection_android or leak_detection_linux or leak_detection_mac or leak_detection_netbsd:
   config.available_features.add('leak-detection')
 
 # Set LD_LIBRARY_PATH to pick dynamic runtime up properly.
diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index be1534d554ea71..05d0bae016b683 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -61,6 +61,13 @@
 # BFD linker in 64-bit android toolchains fails to find libc++_shared.so, which
 # is a transitive shared library dependency (via asan runtime).
 if config.android:
+  # These are needed for tests to upload/download temp files, such as
+  # suppression-files, to device.
+  config.substitutions.append( ('%device_rundir', "/data/local/tmp/Output") )
+  # FIXME: May need to select specific device with `-s SERIAL`
+  config.substitutions.append( ('%push_to_device', "adb push ") )
+  config.substitutions.append( ('%pull_from_device', "adb pull ") )
+  config.substitutions.append( ('%adb_shell ', "adb shell ") )
   # Prepend the flag so that it can be overridden.
   config.target_cflags = "-pie -fuse-ld=gold " + config.target_cflags
   if config.android_ndk_version < 19:
@@ -69,7 +76,11 @@
     # just contains a handful of ABI functions", which makes most C++ code fail
     # to link. In r19 and later we just use the default which is libc++.
     config.cxx_mode_flags.append('-stdlib=libstdc++')
-
+else:
+  config.substitutions.append( ('%device_rundir', "") )
+  config.substitutions.append( ('%push_to_device', "echo ") )
+  config.substitutions.append( ('%pull_from_device', "echo ") )
+  config.substitutions.append( ('%adb_shell', "echo ") )
 config.environment = dict(os.environ)
 
 # Clear some environment variables that might affect Clang.
@@ -341,10 +352,15 @@ def get_macos_aligned_version(macos_vers):
   if config.android_serial:
     env['ANDROID_SERIAL'] = config.android_serial
     config.environment['ANDROID_SERIAL'] = config.android_serial
-
+  # Must use lld because Bionic's TLS layout is not compatible with the Gold convention.
+  # The buildbot script will guarantee lld is built/included.
+  # The check for `has_lld` somehow missed that it exists and always marked tests as "unsupported".
+  config.use_lld = True
+  config.has_lld = True
   adb = os.environ.get('ADB', 'adb')
   try:
     android_api_level_str = subprocess.check_output([adb, "shell", "getprop", "ro.build.version.sdk"], env=env).rstrip()
+    android_api_codename = subprocess.check_output([adb, "shell", "getprop", "ro.build.version.codename"], env=env).rstrip().decode("utf-8")
   except (subprocess.CalledProcessError, OSError):
     lit_config.fatal("Failed to read ro.build.version.sdk (using '%s' as adb)" % adb)
   try:
@@ -355,6 +371,8 @@ def get_macos_aligned_version(macos_vers):
     config.available_features.add('android-26')
   if android_api_level >= 28:
     config.available_features.add('android-28')
+  if android_api_level >= 31 or android_api_codename == 'S':
+    config.available_features.add('android-thread-properties-api')
 
   # Prepare the device.
   android_tmpdir = '/data/local/tmp/Output'
@@ -374,7 +392,7 @@ def get_macos_aligned_version(macos_vers):
     from distutils.version import LooseVersion
     ver = LooseVersion(ver_line.split()[-1].decode())
     # 2.27 introduced some incompatibilities
-    if ver >= LooseVersion("2.27"):
+    if ver >= LooseVersion("2.27") and not config.android:
       config.available_features.add("glibc-2.27")
 
 sancovcc_path = os.path.join(config.llvm_tools_dir, "sancov")
diff --git a/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c b/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
index cf080e4dd7562c..18476d5a9adecd 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
+++ b/compiler-rt/test/lsan/TestCases/Linux/cleanup_in_tsd_destructor.c
@@ -3,7 +3,7 @@
 // user-installed TSD destructors have finished running (since they may contain
 // additional cleanup tasks). LSan doesn't actually meet that goal 100%, but it
 // makes its best effort.
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE:use_tls=1 %run %t
 // RUN: %env_lsan_opts=$LSAN_BASE:use_tls=0 not %run %t 2>&1 | FileCheck %s
diff --git a/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c b/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
index 52819bb9fccf2b..be36b52fc34f2c 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
+++ b/compiler-rt/test/lsan/TestCases/Linux/disabler_in_tsd_destructor.c
@@ -1,5 +1,5 @@
 // Regression test. Disabler should not depend on TSD validity.
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=1:use_ld_allocations=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=1:use_ld_allocations=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE %run %t
 
diff --git a/compiler-rt/test/lsan/TestCases/Linux/guard-page.c b/compiler-rt/test/lsan/TestCases/Linux/guard-page.c
index 7b353ce30f896a..ba03c1ff3535b7 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/guard-page.c
+++ b/compiler-rt/test/lsan/TestCases/Linux/guard-page.c
@@ -1,6 +1,9 @@
 // Check that if LSan finds that SP doesn't point into thread stack (e.g.
 // if swapcontext is used), LSan will not hit the guard page.
 // RUN: %clang_lsan %s -o %t && %run %t
+// Missing 'getcontext' and 'makecontext' on Android.
+// UNSUPPORTED: android
+
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp b/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
index a31b4f64acc5d6..aba3aa12411ce6 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/log-path_test.cpp
@@ -7,7 +7,11 @@
 
 // Good log_path.
 // RUN: rm -f %t.log.*
-// RUN: %env_lsan_opts="use_stacks=0:log_path='"%t.log"'" not %run %t > %t.out 2>&1
+// RUN: %adb_shell 'rm -f %t.log.*'
+// RUN: %env_lsan_opts="use_stacks=0:log_path='"%device_rundir/%t.log"'" not %run %t > %t.out 2>&1
+// adb-pull doesn't support wild cards so we need to rename the log file.
+// RUN: %adb_shell 'mv %device_rundir/%t.log.* %device_rundir/%t.log'
+// RUN: %pull_from_device %device_rundir/%t.log %t.log.ANDROID
 // RUN: FileCheck %s --check-prefix=CHECK-ERROR < %t.log.*
 
 #include <stdio.h>
diff --git a/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp b/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp
index b7ca2d754b1c1a..e81183fcaf951f 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/use_tls_dynamic.cpp
@@ -1,6 +1,7 @@
 // Test that dynamically allocated TLS space is included in the root set.
 
 // This is known to be broken with glibc-2.27+
+// but it should pass with Bionic
 // https://bugs.llvm.org/show_bug.cgi?id=37804
 // XFAIL: glibc-2.27
 
@@ -10,7 +11,7 @@
 // RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=0" not %run %t 2>&1 | FileCheck %s
 // RUN: %env_lsan_opts=$LSAN_BASE:"use_tls=1" %run %t 2>&1
 // RUN: %env_lsan_opts="" %run %t 2>&1
-// UNSUPPORTED: i386-linux,arm,powerpc
+// UNSUPPORTED: arm,powerpc
 
 #ifndef BUILD_DSO
 #include <assert.h>
diff --git a/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp b/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp
index cafe40f0637659..c4398c56ea0b3e 100644
--- a/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp
+++ b/compiler-rt/test/lsan/TestCases/Linux/use_tls_pthread_specific_static.cpp
@@ -19,7 +19,10 @@ int main() {
   int res;
   res = pthread_key_create(&key, NULL);
   assert(res == 0);
+#if !defined(__ANDROID__) && !defined(__BIONIC__)
+  // Bionic doesn't have specific limit.
   assert(key < PTHREAD_KEY_2NDLEVEL_SIZE);
+#endif
   void *p = malloc(1337);
   res = pthread_setspecific(key, p);
   assert(res == 0);
diff --git a/compiler-rt/test/lsan/TestCases/disabler.c b/compiler-rt/test/lsan/TestCases/disabler.c
index f8b7f0da1ba6c7..73f1b3e36e937f 100644
--- a/compiler-rt/test/lsan/TestCases/disabler.c
+++ b/compiler-rt/test/lsan/TestCases/disabler.c
@@ -1,5 +1,5 @@
 // Test for __lsan_disable() / __lsan_enable().
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/lsan/TestCases/disabler.cpp b/compiler-rt/test/lsan/TestCases/disabler.cpp
index c5ffdb0bf6f102..21e508e5314b0d 100644
--- a/compiler-rt/test/lsan/TestCases/disabler.cpp
+++ b/compiler-rt/test/lsan/TestCases/disabler.cpp
@@ -1,5 +1,5 @@
 // Test for ScopedDisabler.
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=0"
 // RUN: %clangxx_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/lsan/TestCases/ignore_object.c b/compiler-rt/test/lsan/TestCases/ignore_object.c
index 53dea75947bb4b..92f03ee33a05d3 100644
--- a/compiler-rt/test/lsan/TestCases/ignore_object.c
+++ b/compiler-rt/test/lsan/TestCases/ignore_object.c
@@ -1,5 +1,5 @@
 // Test for __lsan_ignore_object().
-// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_globals=0:use_tls=0"
+// RUN: LSAN_BASE="report_objects=1:use_registers=0:use_stacks=0:use_tls=0"
 // RUN: %clang_lsan %s -o %t
 // RUN: %env_lsan_opts=$LSAN_BASE not %run %t 2>&1 | FileCheck %s
 
diff --git a/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp b/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp
index 66f364fffcd52b..ef2fe2f8853f90 100644
--- a/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp
+++ b/compiler-rt/test/lsan/TestCases/large_allocation_leak.cpp
@@ -5,7 +5,7 @@
 
 // For 32 bit LSan it's pretty likely that large chunks are "reachable" from some
 // internal data structures (e.g. Glibc global data).
-// UNSUPPORTED: x86, arm
+// UNSUPPORTED: x86, arm, i686
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/strace_test.cpp b/compiler-rt/test/lsan/TestCases/strace_test.cpp
index b25e057538487c..18c809ca329607 100644
--- a/compiler-rt/test/lsan/TestCases/strace_test.cpp
+++ b/compiler-rt/test/lsan/TestCases/strace_test.cpp
@@ -2,6 +2,9 @@
 // REQUIRES: strace
 // RUN: %clangxx_lsan %s -o %t
 // RUN: not strace -o /dev/null %run %t 2>&1 | FileCheck %s
+// FIXME: This technically works in practice but cannot be tested because the
+// fatal-error caused adb to failed. Could not be captured to stderr to lit-check.
+// XFAIL: android
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/suppressions_file.cpp b/compiler-rt/test/lsan/TestCases/suppressions_file.cpp
index 33cf0202d73192..29422bd3c2c0a7 100644
--- a/compiler-rt/test/lsan/TestCases/suppressions_file.cpp
+++ b/compiler-rt/test/lsan/TestCases/suppressions_file.cpp
@@ -3,13 +3,16 @@
 
 // RUN: rm -f %t.supp
 // RUN: touch %t.supp
-// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s --check-prefix=NOSUPP
+// RUN: %push_to_device %t.supp %device_rundir/%t.supp
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%device_rundir/%t.supp'" not %run %t 2>&1 | FileCheck %s --check-prefix=NOSUPP
 
 // RUN: echo "leak:*LSanTestLeakingFunc*" > %t.supp
-// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp'" not %run %t 2>&1 | FileCheck %s
-
+// RUN: %push_to_device  %t.supp %device_rundir/%t.supp
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%device_rundir/%t.supp'" not %run %t 2>&1 | FileCheck %s
+//
 // RUN: echo "leak:%t" > %t.supp
-// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%t.supp':symbolize=false" %run %t
+// RUN: %push_to_device  %t.supp %device_rundir/%t.supp
+// RUN: %env_lsan_opts="$LSAN_BASE:suppressions='%device_rundir/%t.supp':symbolize=false" %run %t
 
 #include <stdio.h>
 #include <stdlib.h>
diff --git a/compiler-rt/test/lsan/TestCases/swapcontext.cpp b/compiler-rt/test/lsan/TestCases/swapcontext.cpp
index afce8d9fe3b18b..d0999598ad4c22 100644
--- a/compiler-rt/test/lsan/TestCases/swapcontext.cpp
+++ b/compiler-rt/test/lsan/TestCases/swapcontext.cpp
@@ -4,7 +4,8 @@
 // RUN: %clangxx_lsan %s -o %t
 // RUN: %env_lsan_opts= %run %t 2>&1
 // RUN: %env_lsan_opts= not %run %t foo 2>&1 | FileCheck %s
-// UNSUPPORTED: arm,powerpc64
+// Missing 'getcontext' and 'makecontext' on Android.
+// UNSUPPORTED: arm,powerpc64,android
 
 #include "sanitizer_common/sanitizer_ucontext.h"
 #include <stdio.h>
diff --git a/compiler-rt/test/lsan/TestCases/use_registers.cpp b/compiler-rt/test/lsan/TestCases/use_registers.cpp
index 2a7d97e0fb45eb..1cf5b17f03f812 100644
--- a/compiler-rt/test/lsan/TestCases/use_registers.cpp
+++ b/compiler-rt/test/lsan/TestCases/use_registers.cpp
@@ -21,11 +21,10 @@ void *registers_thread_func(void *arg) {
 
   // To store the pointer, choose a register which is unlikely to be reused by
   // a function call.
-#if defined(__i386__)
-  asm ( "mov %0, %%esi"
+#if defined(__i386__) || defined(__i686__)
+  asm("mov %0, %%edi"
       :
-      : "r" (p)
-      );
+      : "r"(p));
 #elif defined(__x86_64__)
   asm ( "mov %0, %%r15"
       :
@@ -41,6 +40,12 @@ void *registers_thread_func(void *arg) {
       :
       : "r" (p)
       );
+#elif defined(__aarch64__)
+  // x9-10are used. x11-12 are probably used.
+  // So we pick x13 to be safe.
+  asm("mov x13, %0"
+      :
+      : "r"(p));
 #elif defined(__powerpc__)
   asm ( "mr 30, %0"
       :
diff --git a/compiler-rt/test/lsan/lit.common.cfg.py b/compiler-rt/test/lsan/lit.common.cfg.py
index 1d393880af6a9f..d6a959552b523a 100644
--- a/compiler-rt/test/lsan/lit.common.cfg.py
+++ b/compiler-rt/test/lsan/lit.common.cfg.py
@@ -21,6 +21,7 @@ def get_required_attr(config, attr_name):
 
 # Choose between standalone and LSan+ASan modes.
 lsan_lit_test_mode = get_required_attr(config, 'lsan_lit_test_mode')
+
 if lsan_lit_test_mode == "Standalone":
   config.name = "LeakSanitizer-Standalone"
   lsan_cflags = ["-fsanitize=leak"]
@@ -35,7 +36,8 @@ def get_required_attr(config, attr_name):
 config.name += config.name_suffix
 
 # Platform-specific default LSAN_OPTIONS for lit tests.
-default_lsan_opts = 'detect_leaks=1'
+default_common_opts_str = ':'.join(list(config.default_sanitizer_opts))
+default_lsan_opts = default_common_opts_str + ':detect_leaks=1'
 if config.host_os == 'Darwin':
   # On Darwin, we default to `abort_on_error=1`, which would make tests run
   # much slower. Let's override this and run lit tests with 'abort_on_error=0'.
@@ -53,6 +55,8 @@ def get_required_attr(config, attr_name):
   config.available_features.add('strace')
 
 clang_cflags = ["-O0", config.target_cflags] + config.debug_info_flags
+if config.android:
+  clang_cflags = clang_cflags + ["-fno-emulated-tls"]
 clang_cxxflags = config.cxx_mode_flags + clang_cflags
 lsan_incdir = config.test_source_root + "/../"
 clang_lsan_cflags = clang_cflags + lsan_cflags + ["-I%s" % lsan_incdir]
@@ -76,6 +80,10 @@ def build_invocation(compile_flags):
 if not (supported_linux or supported_darwin or supported_netbsd):
   config.unsupported = True
 
+# Only run the tests on Android, if required API level is present.
+if config.android and 'android-thread-properties-api' not in config.available_features:
+  config.unsupported = True
+
 # Don't support Thumb due to broken fast unwinder
 if re.search('mthumb', config.target_cflags) is not None:
   config.unsupported = True

From e4e23c55c028dc77255447b2c0740ed38f4805a9 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 9 Oct 2020 12:35:23 -0700
Subject: [PATCH 15/17] [Reg2Mem][NewPM] Pin test to legacy PM

This pass hasn't been touched in a long time and isn't used in tree.
---
 llvm/test/Transforms/Reg2Mem/crash.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/Reg2Mem/crash.ll b/llvm/test/Transforms/Reg2Mem/crash.ll
index 52dfeaf0265762..6afd76b48e26bc 100644
--- a/llvm/test/Transforms/Reg2Mem/crash.ll
+++ b/llvm/test/Transforms/Reg2Mem/crash.ll
@@ -1,4 +1,4 @@
-; RUN: opt -reg2mem -disable-output < %s
+; RUN: opt -reg2mem -disable-output -enable-new-pm=0 < %s
 ; PR14782
 
 declare void @f1()

From df295fac6cd14977672b2874700572e0f77b77da Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nicolas.vasilache@gmail.com>
Date: Fri, 9 Oct 2020 10:45:59 +0000
Subject: [PATCH 16/17] Revert "Give attributes C++ namespaces."

This reverts commit 0a34492f36d77f043d371cc91f359b2d65e86475.

This change turned out to be very intrusive wrt some internal projects.
Reverting until this can be sorted out.
---
 mlir/include/mlir/IR/OpBase.td              | 23 ++++++++++++---------
 mlir/lib/TableGen/Pattern.cpp               |  6 +++---
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 15 +++++++++++---
 mlir/tools/mlir-tblgen/RewriterGen.cpp      |  9 ++++----
 4 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index c701f078750773..eaaf5b75230ead 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -791,16 +791,12 @@ class Attr<Pred condition, string descr = ""> :
   // instantiation.
   // TOOD(b/132458159): deduplicate the fields in attribute wrapper classes.
   Attr baseAttr = ?;
-
-  // The fully-qualified C++ namespace where the generated class lives.
-  string cppNamespace = "";
 }
 
 // An attribute of a specific dialect.
 class DialectAttr<Dialect d, Pred condition, string descr = ""> :
     Attr<condition, descr> {
   Dialect dialect = d;
-  let cppNamespace = d.cppNamespace;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1119,6 +1115,16 @@ class EnumAttrInfo<string name, list<EnumAttrCaseInfo> cases> {
   // underlying type is not explicitly specified.
   string underlyingType = "";
 
+  // The C++ namespaces that the enum class definition and utility functions
+  // should be placed into.
+  //
+  // Normally you want to place the full namespace path here. If it is nested,
+  // use "::" as the delimiter, e.g., given "A::B", generated code will be
+  // placed in `namespace A { namespace B { ... } }`. To avoid placing in any
+  // namespace, use "".
+  // TODO: use dialect to provide the namespace.
+  string cppNamespace = "";
+
   // The name of the utility function that converts a value of the underlying
   // type to the corresponding symbol. It will have the following signature:
   //
@@ -1457,8 +1463,7 @@ class StructFieldAttr<string thisName, Attr thisType> {
 // useful when representing data that would normally be in a structure.
 class StructAttr<string name, Dialect d,
                  list<StructFieldAttr> attributes> :
-    DictionaryAttrBase<CPred<"$_self.isa<" # d.cppNamespace
-                                           # "::" # name # ">()">,
+    DictionaryAttrBase<CPred<"$_self.isa<" # name # ">()">,
         "DictionaryAttr with field(s): " #
         StrJoin<!foreach(a, attributes, "'" # a.name # "'"), ", ">.result #
         " (each field having its own constraints)"> {
@@ -1466,16 +1471,14 @@ class StructAttr<string name, Dialect d,
   string className = name;
 
   // Return type should match the name of the structure.
-  let returnType = d.cppNamespace # "::" # name;
+  let returnType = name;
 
   // Storage type should match the name of the structure.
-  let storageType = d.cppNamespace # "::" # name;
+  let storageType = name;
 
   // The dialect this StructAttr belongs to.
   Dialect dialect = d;
 
-  let cppNamespace = d.cppNamespace;
-
   // List of fields that the StructAttr contains.
   list<StructFieldAttr> fields = attributes;
 }
diff --git a/mlir/lib/TableGen/Pattern.cpp b/mlir/lib/TableGen/Pattern.cpp
index cfa3da2c417a47..b32775318df8e1 100644
--- a/mlir/lib/TableGen/Pattern.cpp
+++ b/mlir/lib/TableGen/Pattern.cpp
@@ -219,11 +219,11 @@ std::string SymbolInfoMap::SymbolInfo::getVarDecl(StringRef name) const {
   case Kind::Operand: {
     // Use operand range for captured operands (to support potential variadic
     // operands).
-    return std::string(formatv(
-        "::mlir::Operation::operand_range {0}(op0->getOperands());\n", name));
+    return std::string(
+        formatv("Operation::operand_range {0}(op0->getOperands());\n", name));
   }
   case Kind::Value: {
-    return std::string(formatv("::llvm::ArrayRef<::mlir::Value> {0};\n", name));
+    return std::string(formatv("ArrayRef<Value> {0};\n", name));
   }
   case Kind::Result: {
     // Use the op itself for captured results.
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 3bcf0211455566..5345bc598da956 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -487,7 +487,16 @@ void OpEmitter::genAttrGetters() {
 
   // Emit with return type specified.
   auto emitAttrWithReturnType = [&](StringRef name, Attribute attr) {
-    auto *method = opClass.addMethodAndPrune(attr.getReturnType(), name);
+    Dialect attrDialect = attr.getDialect();
+    // Does the current operation have a different namespace than the attribute?
+    bool differentNamespace =
+        attrDialect && opDialect && attrDialect != opDialect;
+    std::string returnType = differentNamespace
+                                 ? (llvm::Twine(attrDialect.getCppNamespace()) +
+                                    "::" + attr.getReturnType())
+                                       .str()
+                                 : attr.getReturnType().str();
+    auto *method = opClass.addMethodAndPrune(returnType, name);
     auto &body = method->body();
     body << "  auto attr = " << name << "Attr();\n";
     if (attr.hasDefaultValue()) {
@@ -1991,8 +2000,8 @@ void OpEmitter::genOpAsmInterface() {
   opClass.addTrait("::mlir::OpAsmOpInterface::Trait");
 
   // Generate the right accessor for the number of results.
-  auto *method = opClass.addMethodAndPrune(
-      "void", "getAsmResultNames", "::mlir::OpAsmSetValueNameFn", "setNameFn");
+  auto *method = opClass.addMethodAndPrune("void", "getAsmResultNames",
+                                           "OpAsmSetValueNameFn", "setNameFn");
   auto &body = method->body();
   for (int i = 0; i != numResults; ++i) {
     body << "  auto resultGroup" << i << " = getODSResults(" << i << ");\n"
diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp
index 495fbe1715e064..e16900227759d9 100644
--- a/mlir/tools/mlir-tblgen/RewriterGen.cpp
+++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp
@@ -221,8 +221,7 @@ void PatternEmitter::emitOpMatch(DagNode tree, int depth) {
 
   int indent = 4 + 2 * depth;
   os.indent(indent) << formatv(
-      "auto castedOp{0} = ::llvm::dyn_cast_or_null<{1}>(op{0}); "
-      "(void)castedOp{0};\n",
+      "auto castedOp{0} = dyn_cast_or_null<{1}>(op{0}); (void)castedOp{0};\n",
       depth, op.getQualCppClassName());
   // Skip the operand matching at depth 0 as the pattern rewriter already does.
   if (depth != 0) {
@@ -536,7 +535,7 @@ void PatternEmitter::emit(StringRef rewriteName) {
       os << "\n// Rewrite\n";
       emitRewriteLogic();
 
-      os << "return ::mlir::success();\n";
+      os << "return success();\n";
     }
     os << "};\n";
   }
@@ -1146,8 +1145,8 @@ static void emitRewriters(const RecordKeeper &recordKeeper, raw_ostream &os) {
   }
 
   // Emit function to add the generated matchers to the pattern list.
-  os << "void LLVM_ATTRIBUTE_UNUSED populateWithGenerated(::mlir::MLIRContext "
-        "*context, ::mlir::OwningRewritePatternList *patterns) {\n";
+  os << "void LLVM_ATTRIBUTE_UNUSED populateWithGenerated(MLIRContext "
+        "*context, OwningRewritePatternList *patterns) {\n";
   for (const auto &name : rewriterNames) {
     os << "  patterns->insert<" << name << ">(context);\n";
   }

From e0dc3dba3bd1db450391d7fda040d4fcc830e5e3 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <nicolas.vasilache@gmail.com>
Date: Fri, 9 Oct 2020 14:31:52 +0000
Subject: [PATCH 17/17] [mlir][Linalg] NFC - Cleanup explicitly instantiated
 paterns 1/n - LinalgToStandard.cpp

This revision belongs to a series of patches that reduce reliance of Linalg transformations on templated rewrite and conversion patterns.
Instead, this uses a MatchAnyTag pattern for the vast majority of cases and dispatches internally.

Differential Revision: https://reviews.llvm.org/D89133
---
 .../LinalgToStandard/LinalgToStandard.h       |   5 -
 .../Dialect/Linalg/IR/LinalgStructuredOps.td  |   5 +-
 .../Linalg/IR/LinalgStructuredOpsInterface.td |  13 +
 .../Dialect/Linalg/Transforms/Transforms.h    |  54 +++-
 .../LinalgToStandard/LinalgToStandard.cpp     | 250 +++++++-----------
 5 files changed, 161 insertions(+), 166 deletions(-)

diff --git a/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h b/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h
index 6585eaf35ef698..08b3981d0b676d 100644
--- a/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h
+++ b/mlir/include/mlir/Conversion/LinalgToStandard/LinalgToStandard.h
@@ -12,15 +12,10 @@
 #include "mlir/Transforms/DialectConversion.h"
 
 namespace mlir {
-class MLIRContext;
 class ModuleOp;
 template <typename T>
 class OperationPass;
 
-/// Populate the given list with patterns that convert from Linalg to Standard.
-void populateLinalgToStandardConversionPatterns(
-    OwningRewritePatternList &patterns, MLIRContext *ctx);
-
 /// Create a pass to convert Linalg operations to the Standard dialect.
 std::unique_ptr<OperationPass<ModuleOp>> createConvertLinalgToStandardPass();
 
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 9c8197c45ec821..2332f516c44ad4 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -502,8 +502,9 @@ class GenericOpBase<string mnemonic> : LinalgStructuredBase_Op<mnemonic, [
         getIteratorTypesAttrName(), getSymbolSourceAttrName()
       };
     }
-    StringRef getLibraryCallName() {
-      return library_call().hasValue() ? library_call().getValue() : "";
+    std::string getLibraryCallName() {
+      return library_call().hasValue() ?
+        library_call()->str() : "op_has_no_registered_library_name";
     }
     llvm::Optional<unsigned> getSymbolSource() {
       auto ss = symbol_source();
diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td
index 614fd8d2a7de62..dbb89c73954b7b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOpsInterface.td
@@ -594,6 +594,19 @@ def LinalgStructuredInterface : OpInterface<"LinalgOp"> {
                llvm::all_of(this->getOperation()->getResults(), isTensorType);
       }]
     >,
+    InterfaceMethod<
+      /*desc=*/[{
+        Return the name registered for this op when lowering to an external
+        library call.
+      }],
+      /*retTy=*/"std::string",
+      /*methodName=*/"getLibraryCallName",
+      /*args=*/(ins),
+      /*methodBody=*/"",
+      /*defaultImplementation=*/[{
+        return $_op.getLibraryCallName();
+      }]
+    >,
 
     //===------------------------------------------------------------------===//
     // Other static interface methods.
diff --git a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
index 395db396dadca1..7512f69608a4b7 100644
--- a/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
+++ b/mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
@@ -347,9 +347,7 @@ struct LinalgTilingOptions {
   /// values must not fold away when tiling. Otherwise, use a more robust
   /// `tileSizeComputationFunction`.
   LinalgTilingOptions &setTileSizes(SmallVector<Value, 4> ts) {
-    tileSizeComputationFunction = [=](OpBuilder &, Operation *) {
-      return ts;
-    };
+    tileSizeComputationFunction = [=](OpBuilder &, Operation *) { return ts; };
     return *this;
   }
   /// Convenience function to set the `tileSizeComputationFunction` to a
@@ -749,6 +747,56 @@ class ConvOpVectorization : public OpRewritePattern<ConvOp> {
                                 PatternRewriter &rewriter) const override;
 };
 
+//===----------------------------------------------------------------------===//
+// Patterns to convert a LinalgOp to std.call @external library implementation.
+//===----------------------------------------------------------------------===//
+// Create a new call to the type-canonicalized `LinalgOp::getLibraryCallName()`
+// function. The implementation of the function can be either in the same module
+// or in an externally linked library.
+// This is a generic entry point for all LinalgOp, except for CopyOp and
+// IndexedGenericOp, for which omre specialized patterns are provided.
+class LinalgOpToLibraryCallRewrite : public RewritePattern {
+public:
+  LinalgOpToLibraryCallRewrite()
+      : RewritePattern(/*benefit=*/1, MatchAnyOpTypeTag()) {}
+
+  LogicalResult matchAndRewrite(Operation *op,
+                                PatternRewriter &rewriter) const override;
+};
+
+/// Rewrite pattern specialization for CopyOp, kicks in when both input and
+/// output permutations are left unspecified or are the identity.
+class CopyOpToLibraryCallRewrite : public OpRewritePattern<CopyOp> {
+public:
+  using OpRewritePattern<CopyOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(CopyOp op,
+                                PatternRewriter &rewriter) const override;
+};
+
+/// Rewrite CopyOp with permutations into a sequence of TransposeOp and
+/// permutation-free CopyOp. This interplays with TransposeOpConversion and
+/// LinalgConversion<CopyOp> to create a path to the LLVM dialect.
+class CopyTransposeRewrite : public OpRewritePattern<CopyOp> {
+public:
+  using OpRewritePattern<CopyOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(CopyOp op,
+                                PatternRewriter &rewriter) const override;
+};
+
+/// Conversion pattern specialization for IndexedGenericOp, has special handling
+/// for the extra index operands.
+class IndexedGenericOpToLibraryCallRewrite
+    : public OpRewritePattern<IndexedGenericOp> {
+public:
+  using OpRewritePattern<IndexedGenericOp>::OpRewritePattern;
+  LogicalResult matchAndRewrite(IndexedGenericOp op,
+                                PatternRewriter &rewriter) const override;
+};
+
+/// Populate the given list with patterns that convert from Linalg to Standard.
+void populateLinalgToStandardConversionPatterns(
+    OwningRewritePatternList &patterns, MLIRContext *ctx);
+
 //===----------------------------------------------------------------------===//
 // Support for staged pattern application.
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
index ffb56138a7958c..d64e6f9947c76f 100644
--- a/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
+++ b/mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp
@@ -11,6 +11,7 @@
 #include "../PassDetail.h"
 #include "mlir/Dialect/Affine/IR/AffineOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/SCF/SCF.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 
@@ -21,10 +22,15 @@ using namespace mlir::linalg;
 /// generated CallOp. MemRefTypes have their layout canonicalized since the
 /// information is not used in signature generation.
 /// Note that static size information is not modified.
-template <typename LinalgOp>
 static SmallVector<Type, 4> extractOperandTypes(Operation *op) {
   SmallVector<Type, 4> result;
   result.reserve(op->getNumOperands());
+  if (auto indexedGenericOp = dyn_cast<IndexedGenericOp>(op)) {
+    auto *ctx = op->getContext();
+    auto numLoops = indexedGenericOp.getNumLoops();
+    result.reserve(op->getNumOperands() + numLoops);
+    result.assign(numLoops, IndexType::get(ctx));
+  }
   for (auto type : op->getOperandTypes()) {
     // The underlying descriptor type (e.g. LLVM) does not have layout
     // information. Canonicalizing the type at the level of std when going into
@@ -37,21 +43,8 @@ static SmallVector<Type, 4> extractOperandTypes(Operation *op) {
   return result;
 }
 
-template <>
-SmallVector<Type, 4> extractOperandTypes<IndexedGenericOp>(Operation *op) {
-  auto *ctx = op->getContext();
-  auto indexedGenericOp = cast<IndexedGenericOp>(op);
-  auto numLoops = indexedGenericOp.getNumLoops();
-
-  SmallVector<Type, 4> result(numLoops, IndexType::get(ctx));
-  auto canonicalizedOperands = extractOperandTypes<LinalgOp>(op);
-  result.append(canonicalizedOperands.begin(), canonicalizedOperands.end());
-  return result;
-}
-
 // Get a SymbolRefAttr containing the library function name for the LinalgOp.
 // If the library function does not exist, insert a declaration.
-template <typename LinalgOp>
 static FlatSymbolRefAttr getLibraryCallSymbolRef(Operation *op,
                                                  PatternRewriter &rewriter) {
   auto linalgOp = cast<LinalgOp>(op);
@@ -68,7 +61,7 @@ static FlatSymbolRefAttr getLibraryCallSymbolRef(Operation *op,
     return fnNameAttr;
   }
 
-  SmallVector<Type, 4> inputTypes(extractOperandTypes<LinalgOp>(op));
+  SmallVector<Type, 4> inputTypes(extractOperandTypes(op));
   assert(op->getNumResults() == 0 &&
          "Library call for linalg operation can be generated only for ops that "
          "have void return types");
@@ -87,9 +80,7 @@ static FlatSymbolRefAttr getLibraryCallSymbolRef(Operation *op,
   return fnNameAttr;
 }
 
-namespace {
-
-SmallVector<Value, 4>
+static SmallVector<Value, 4>
 createTypeCanonicalizedMemRefOperands(OpBuilder &b, Location loc,
                                       ValueRange operands) {
   SmallVector<Value, 4> res;
@@ -107,154 +98,101 @@ createTypeCanonicalizedMemRefOperands(OpBuilder &b, Location loc,
   return res;
 }
 
-// LinalgOpConversion<LinalgOp> creates a new call to the type-canonicalized
-// `LinalgOp::getLibraryCallName()` function.
-// The implementation of the function can be either in the same module or in an
-// externally linked library.
-template <typename LinalgOp>
-class LinalgOpConversion : public OpRewritePattern<LinalgOp> {
-public:
-  using OpRewritePattern<LinalgOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(LinalgOp op,
-                                PatternRewriter &rewriter) const override {
-    auto libraryCallName = getLibraryCallSymbolRef<LinalgOp>(op, rewriter);
-    if (!libraryCallName)
-      return failure();
-
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(
-        op, libraryCallName.getValue(), TypeRange(),
-        createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(),
-                                              op.getOperands()));
-    return success();
-  }
-};
-
-/// Conversion pattern specialization for CopyOp. This kicks in when both input
-/// and output permutations are left unspecified or are the identity.
-template <>
-class LinalgOpConversion<CopyOp> : public OpRewritePattern<CopyOp> {
-public:
-  using OpRewritePattern<CopyOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CopyOp op,
-                                PatternRewriter &rewriter) const override {
-    auto inputPerm = op.inputPermutation();
-    if (inputPerm.hasValue() && !inputPerm->isIdentity())
-      return failure();
-    auto outputPerm = op.outputPermutation();
-    if (outputPerm.hasValue() && !outputPerm->isIdentity())
-      return failure();
-
-    auto libraryCallName = getLibraryCallSymbolRef<CopyOp>(op, rewriter);
-    if (!libraryCallName)
-      return failure();
-
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(
-        op, libraryCallName.getValue(), TypeRange(),
-        createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(),
-                                              op.getOperands()));
-    return success();
-  }
-};
-
-/// Conversion pattern specialization for IndexedGenericOp.
-template <>
-class LinalgOpConversion<IndexedGenericOp>
-    : public OpRewritePattern<IndexedGenericOp> {
-public:
-  using OpRewritePattern<IndexedGenericOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(IndexedGenericOp op,
-                                PatternRewriter &rewriter) const override {
-    auto libraryCallName =
-        getLibraryCallSymbolRef<IndexedGenericOp>(op, rewriter);
-    if (!libraryCallName)
-      return failure();
-
-    // TODO: Use induction variables values instead of zeros, when
-    // IndexedGenericOp is tiled.
-    auto zero = rewriter.create<mlir::ConstantOp>(
-        op.getLoc(), rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
-    auto indexedGenericOp = cast<IndexedGenericOp>(op);
-    auto numLoops = indexedGenericOp.getNumLoops();
-    SmallVector<Value, 4> operands;
-    operands.reserve(numLoops + op.getNumOperands());
-    for (unsigned i = 0; i < numLoops; ++i)
-      operands.push_back(zero);
-    for (auto operand : op.getOperands())
-      operands.push_back(operand);
-    rewriter.replaceOpWithNewOp<mlir::CallOp>(
-        op, libraryCallName.getValue(), TypeRange(),
-        createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(), operands));
-    return success();
-  }
-};
-
-/// A non-conversion rewrite pattern kicks in to convert CopyOp with
-/// permutations into a sequence of TransposeOp and permutation-free CopyOp.
-/// This interplays together with TransposeOpConversion and
-/// LinalgConversion<CopyOp> to create a path to the LLVM dialect.
-class CopyTransposeConversion : public OpRewritePattern<CopyOp> {
-public:
-  using OpRewritePattern<CopyOp>::OpRewritePattern;
-
-  LogicalResult matchAndRewrite(CopyOp op,
-                                PatternRewriter &rewriter) const override {
-    Value in = op.input(), out = op.output();
+LogicalResult mlir::linalg::LinalgOpToLibraryCallRewrite::matchAndRewrite(
+    Operation *op, PatternRewriter &rewriter) const {
+  // Only LinalgOp for which there is no specialized pattern go through this.
+  if (!isa<LinalgOp>(op) || isa<CopyOp>(op) || isa<IndexedGenericOp>(op))
+    return failure();
+
+  auto libraryCallName = getLibraryCallSymbolRef(op, rewriter);
+  if (!libraryCallName)
+    return failure();
+
+  rewriter.replaceOpWithNewOp<mlir::CallOp>(
+      op, libraryCallName.getValue(), TypeRange(),
+      createTypeCanonicalizedMemRefOperands(rewriter, op->getLoc(),
+                                            op->getOperands()));
+  return success();
+}
 
-    // If either inputPerm or outputPerm are non-identities, insert transposes.
-    auto inputPerm = op.inputPermutation();
-    if (inputPerm.hasValue() && !inputPerm->isIdentity())
-      in = rewriter.create<TransposeOp>(op.getLoc(), in,
-                                        AffineMapAttr::get(*inputPerm));
-    auto outputPerm = op.outputPermutation();
-    if (outputPerm.hasValue() && !outputPerm->isIdentity())
-      out = rewriter.create<TransposeOp>(op.getLoc(), out,
-                                         AffineMapAttr::get(*outputPerm));
+LogicalResult mlir::linalg::CopyOpToLibraryCallRewrite::matchAndRewrite(
+    CopyOp op, PatternRewriter &rewriter) const {
+  auto inputPerm = op.inputPermutation();
+  if (inputPerm.hasValue() && !inputPerm->isIdentity())
+    return failure();
+  auto outputPerm = op.outputPermutation();
+  if (outputPerm.hasValue() && !outputPerm->isIdentity())
+    return failure();
+
+  auto libraryCallName = getLibraryCallSymbolRef(op, rewriter);
+  if (!libraryCallName)
+    return failure();
+
+  rewriter.replaceOpWithNewOp<mlir::CallOp>(
+      op, libraryCallName.getValue(), TypeRange(),
+      createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(),
+                                            op.getOperands()));
+  return success();
+}
 
-    // If nothing was transposed, fail and let the conversion kick in.
-    if (in == op.input() && out == op.output())
-      return failure();
+LogicalResult mlir::linalg::CopyTransposeRewrite::matchAndRewrite(
+    CopyOp op, PatternRewriter &rewriter) const {
+  Value in = op.input(), out = op.output();
+
+  // If either inputPerm or outputPerm are non-identities, insert transposes.
+  auto inputPerm = op.inputPermutation();
+  if (inputPerm.hasValue() && !inputPerm->isIdentity())
+    in = rewriter.create<TransposeOp>(op.getLoc(), in,
+                                      AffineMapAttr::get(*inputPerm));
+  auto outputPerm = op.outputPermutation();
+  if (outputPerm.hasValue() && !outputPerm->isIdentity())
+    out = rewriter.create<TransposeOp>(op.getLoc(), out,
+                                       AffineMapAttr::get(*outputPerm));
+
+  // If nothing was transposed, fail and let the conversion kick in.
+  if (in == op.input() && out == op.output())
+    return failure();
+
+  rewriter.replaceOpWithNewOp<CopyOp>(op, in, out);
+  return success();
+}
 
-    rewriter.replaceOpWithNewOp<CopyOp>(op, in, out);
-    return success();
-  }
-};
-} // namespace
+LogicalResult
+mlir::linalg::IndexedGenericOpToLibraryCallRewrite::matchAndRewrite(
+    IndexedGenericOp op, PatternRewriter &rewriter) const {
+  auto libraryCallName = getLibraryCallSymbolRef(op, rewriter);
+  if (!libraryCallName)
+    return failure();
+
+  // TODO: Use induction variables values instead of zeros, when
+  // IndexedGenericOp is tiled.
+  auto zero = rewriter.create<mlir::ConstantOp>(
+      op.getLoc(), rewriter.getIntegerAttr(rewriter.getIndexType(), 0));
+  auto indexedGenericOp = cast<IndexedGenericOp>(op);
+  auto numLoops = indexedGenericOp.getNumLoops();
+  SmallVector<Value, 4> operands;
+  operands.reserve(numLoops + op.getNumOperands());
+  for (unsigned i = 0; i < numLoops; ++i)
+    operands.push_back(zero);
+  for (auto operand : op.getOperands())
+    operands.push_back(operand);
+  rewriter.replaceOpWithNewOp<mlir::CallOp>(
+      op, libraryCallName.getValue(), TypeRange(),
+      createTypeCanonicalizedMemRefOperands(rewriter, op.getLoc(), operands));
+  return success();
+}
 
 /// Populate the given list with patterns that convert from Linalg to Standard.
-void mlir::populateLinalgToStandardConversionPatterns(
+void mlir::linalg::populateLinalgToStandardConversionPatterns(
     OwningRewritePatternList &patterns, MLIRContext *ctx) {
   // TODO: ConvOp conversion needs to export a descriptor with relevant
   // attribute values such as kernel striding and dilation.
   // clang-format off
   patterns.insert<
-      CopyTransposeConversion,
-      LinalgOpConversion<ConvOp>,
-      LinalgOpConversion<PoolingMaxOp>,
-      LinalgOpConversion<PoolingMinOp>,
-      LinalgOpConversion<PoolingSumOp>,
-      LinalgOpConversion<CopyOp>,
-      LinalgOpConversion<FillOp>,
-      LinalgOpConversion<GenericOp>,
-      LinalgOpConversion<IndexedGenericOp>>(ctx);
-  // TODO: collect all auto-generated named ops with a tblgen directive.
-  patterns.insert<
-      LinalgOpConversion<DotOp>,
-      LinalgOpConversion<BatchMatmulOp>,
-      LinalgOpConversion<MatvecOp>,
-      LinalgOpConversion<VecmatOp>,
-      LinalgOpConversion<MatmulOp>,
-      LinalgOpConversion<ConvWOp>,
-      LinalgOpConversion<ConvNWCOp>,
-      LinalgOpConversion<ConvNCWOp>,
-      LinalgOpConversion<ConvHWOp>,
-      LinalgOpConversion<ConvNHWCOp>,
-      LinalgOpConversion<ConvNCHWOp>,
-      LinalgOpConversion<ConvDHWOp>,
-      LinalgOpConversion<ConvNDHWCOp>,
-      LinalgOpConversion<ConvNCDHWOp>>(ctx);
+      CopyOpToLibraryCallRewrite,
+      CopyTransposeRewrite,
+      IndexedGenericOpToLibraryCallRewrite>(ctx);
+  patterns.insert<LinalgOpToLibraryCallRewrite>();
   // clang-format on
 }