From 2cb004b4a1e0970de9c1f92401ddd4baef45cd4f Mon Sep 17 00:00:00 2001 From: Michael Bedy Date: Tue, 4 Jun 2024 22:34:18 -0400 Subject: [PATCH 1/5] Introduce a pseudo mnemonic for S_DELAY_ALU in MIR. --- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 154 ++++++++++++++ llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h | 23 ++ .../AMDGPU/insert-delay-alu-literal.mir | 175 ++++++++++++++++ .../CodeGen/AMDGPU/insert-delay-alu-parse.mir | 198 ++++++++++++++++++ .../CodeGen/AMDGPU/vopd-src2acc-delay.mir | 4 +- 5 files changed, 552 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir create mode 100644 llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 6ec4178053b207..7bca5b3bb9ef66 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -17,6 +17,160 @@ using namespace llvm; +void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, + std::optional OpIdx, int64_t Imm) const { + + switch(MI.getOpcode()) { + case AMDGPU::S_DELAY_ALU: + assert(OpIdx == 0); + printSDelayAluImm(Imm, OS); + break; + default: + MIRFormatter::printImm(OS, MI, OpIdx, Imm); + break; + } +} + +/// Implement target specific parsing of immediate mnemonics. The mnemonic is +/// dot seperated strings. +bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, + const unsigned OpIdx, + StringRef Src, int64_t &Imm, + ErrorCallbackType ErrorCallback) const +{ + + switch(OpCode) { + case AMDGPU::S_DELAY_ALU: + return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback); + default: + break; + } + return true; // Don't know what this is +} + +void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm, + llvm::raw_ostream &OS) const { + // Construct an immediate string to represent the information encoded in the + // s_delay_alu immediate. + // .id0_[_skip__id1] + constexpr int64_t None = 0; + constexpr int64_t Same = 0; + + uint64_t Id0 = (Imm & 0xF); + uint64_t Skip = ((Imm >> 4) & 0x7); + uint64_t Id1 = ((Imm >> 7) & 0xF); + auto outdep = [&](uint64_t Id) { + if (Id == None) { + OS << "NONE"; + } else if (Id < 5) { + OS << "VALU_DEP_" << Id; + } else if (Id < 8) { + OS << "TRANS32_DEP_" << Id - 4; + } else { + OS << "SALU_CYCLE_" << Id - 8; + } + }; + + OS << ".id0_"; + outdep(Id0); + + // If the second inst is "same" and "none", no need to print the rest of the + // string. + if (Skip == Same && Id1 == None) + return; + + // Encode the second delay specification. + OS << "_skip_"; + if (Skip == 0) { + OS << "SAME"; + } else if (Skip == 1) { + OS << "NEXT"; + } else { + OS << "SKIP_" << Skip - 1; + } + OS << "_id1_"; + outdep(Id1); +} + +bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( + const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src, + llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const +{ + assert(OpIdx == 0); + + Imm = 0; + bool expected = Src.consume_front(".id0_"); + if (!expected) { + return ErrorCallback(Src.begin(), "Expected .id0_"); + } + + auto expect_int = [&](StringRef &Src, int64_t Offset) -> int64_t { + int64_t Dep; + if (!Src.consumeInteger(10, Dep)) { + return Dep + Offset; + } else { + return -1; + } + }; + + auto decode_delay = [&](StringRef &Src) -> int64_t { + if (Src.consume_front("NONE")) { + return 0; + } else if (Src.consume_front("VALU_DEP_")) { + return expect_int(Src, 0); + } else if (Src.consume_front("TRANS32_DEP_")) { + return expect_int(Src, 4); + } else if (Src.consume_front("SALU_CYCLE_")) { + return expect_int(Src, 8); + } + return -1; + }; + + int64_t Delay0 = decode_delay(Src); + int64_t Skip = 0; + int64_t Delay1 = 0; + if (Delay0 == -1) { + return ErrorCallback(Src.begin(), "Could not decode delay0"); + } + + // Set the Imm so far, to that early return has the correct value. + Imm = Delay0; + + // If that was the end of the string, the second instruction is "same" and + // "none" + if (Src.begin() == Src.end()) + return false; + + expected = Src.consume_front("_skip_"); + if (!expected) { + return ErrorCallback(Src.begin(), "Expected _skip_"); + } + + if (Src.consume_front("SAME")) { + Skip = 0; + } else if (Src.consume_front("NEXT")) { + Skip = 1; + } else if (Src.consume_front("SKIP_")) { + if (Src.consumeInteger(10, Skip)) { + return ErrorCallback(Src.begin(), "Expected integer Skip value"); + } + } else { + ErrorCallback(Src.begin(), "Unexpected Skip Value"); + } + + expected = Src.consume_front("_id1_"); + if (!expected) { + return ErrorCallback(Src.begin(), "Expected _id1_"); + } + + Delay1 = decode_delay(Src); + if (Delay1 == -1) { + return ErrorCallback(Src.begin(), "Could not decode delay1"); + } + Imm = Imm | (Skip << 4) | (Delay1 << 7); + return false; +} + bool AMDGPUMIRFormatter::parseCustomPseudoSourceValue( StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const { diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h index 98b5031071cf47..80bb3dfe7a364e 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h @@ -28,12 +28,35 @@ class AMDGPUMIRFormatter final : public MIRFormatter { AMDGPUMIRFormatter() = default; virtual ~AMDGPUMIRFormatter() = default; + /// Implement target specific printing for machine operand immediate value, so + /// that we can have more meaningful mnemonic than a 64-bit integer. Passing + /// None to OpIdx means the index is unknown. + virtual void printImm(raw_ostream &OS, const MachineInstr &MI, + std::optional OpIdx, + int64_t Imm) const override; + + /// Implement target specific parsing of immediate mnemonics. The mnemonic is + /// dot seperated strings. + virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx, + StringRef Src, int64_t &Imm, + ErrorCallbackType ErrorCallback) const override; + /// Implement target specific parsing of target custom pseudo source value. bool parseCustomPseudoSourceValue(StringRef Src, MachineFunction &MF, PerFunctionMIParsingState &PFS, const PseudoSourceValue *&PSV, ErrorCallbackType ErrorCallback) const override; + +private: + /// Print the string to represent s_delay_alu immediate value + void printSDelayAluImm(int64_t Imm, llvm::raw_ostream &OS) const; + + /// Parse the immediate pseudo literal for s_delay_alu + bool parseSDelayAluImmMnemonic( + const unsigned int OpIdx, int64_t &Imm, llvm::StringRef &Src, + llvm::MIRFormatter::ErrorCallbackType &ErrorCallback) const; + }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir new file mode 100644 index 00000000000000..7788e50ed4d24a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir @@ -0,0 +1,175 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -run-pass=amdgpu-insert-delay-alu %s -o - | FileCheck %s + +--- +name: valu_dep_1 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_1 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_2 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_2 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_3 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_3 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_3 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_4 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_4 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec + ; CHECK-NEXT: $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_4 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec + $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: trans32_dep_1 +body: | + bb.0: + ; CHECK-LABEL: name: trans32_dep_1 + ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: trans32_dep_2 +body: | + bb.0: + ; CHECK-LABEL: name: trans32_dep_2 + ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode + ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_2 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: trans32_dep_3 +body: | + bb.0: + ; CHECK-LABEL: name: trans32_dep_3 + ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + ; CHECK-NEXT: $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode + ; CHECK-NEXT: $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode + ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_3 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode + $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: salu_cycle_1 +body: | + bb.0: + ; CHECK-LABEL: name: salu_cycle_1 + ; CHECK: $sgpr0 = S_MOV_B32 0 + ; CHECK-NEXT: S_DELAY_ALU .id0_SALU_CYCLE_1 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec + $sgpr0 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_1_same_trans32_dep_1 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_1_same_trans32_dep_1 + ; CHECK: $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec +... + +--- +name: valu_dep_1_same_salu_cycle_1 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_1_same_salu_cycle_1 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $sgpr0 = S_MOV_B32 0 + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $sgpr0 = S_MOV_B32 0 + $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_1_next_valu_dep_1 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_1_next_valu_dep_1 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_2_next_valu_dep_2 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_2_next_valu_dep_2 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2 + ; CHECK-NEXT: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec +... + diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir new file mode 100644 index 00000000000000..0d264629d4b72c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir @@ -0,0 +1,198 @@ +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -start-after=amdgpu-insert-delay-alu %s -o - | FileCheck %s + +--- +name: valu_dep_1 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_1: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_1 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_2 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_2: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_2 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_3 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_3: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_3) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_3 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_4 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_4: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + ; CHECK-NEXT: v_add_nc_u32_e32 v2, v2, v2 + ; CHECK-NEXT: v_add_nc_u32_e32 v3, v3, v3 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_4) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr2 = V_ADD_U32_e32 $vgpr2, $vgpr2, implicit $exec + $vgpr3 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_4 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: trans32_dep_1 +body: | + bb.0: + ; CHECK-LABEL: {{^}}trans32_dep_1: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_exp_f32_e32 v0, v0 + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + S_DELAY_ALU .id0_TRANS32_DEP_1 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: trans32_dep_2 +body: | + bb.0: + ; CHECK-LABEL: {{^}}trans32_dep_2: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_exp_f32_e32 v0, v0 + ; CHECK-NEXT: v_exp_f32_e32 v1, v1 + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_2) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode + S_DELAY_ALU .id0_TRANS32_DEP_2 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: trans32_dep_3 +body: | + bb.0: + ; CHECK-LABEL: {{^}}trans32_dep_3: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_exp_f32_e32 v0, v0 + ; CHECK-NEXT: v_exp_f32_e32 v1, v1 + ; CHECK-NEXT: v_exp_f32_e32 v2, v2 + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_3) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr1 = V_EXP_F32_e32 $vgpr1, implicit $exec, implicit $mode + $vgpr2 = V_EXP_F32_e32 $vgpr2, implicit $exec, implicit $mode + S_DELAY_ALU .id0_TRANS32_DEP_3 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: salu_cycle_1 +body: | + bb.0: + ; CHECK-LABEL: {{^}}salu_cycle_1: + ; CHECK: %bb.0: + ; CHECK-NEXT: s_mov_b32 s0, 0 + ; CHECK-NEXT: s_delay_alu instid0(SALU_CYCLE_1) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 + $sgpr0 = S_MOV_B32 0 + S_DELAY_ALU .id0_SALU_CYCLE_1 + $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_1_same_trans32_dep_1 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_1_same_trans32_dep_1: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_exp_f32_e32 v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + ; CHECK-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instid1(VALU_DEP_1) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v1 + $vgpr0 = V_EXP_F32_e32 $vgpr0, implicit $exec, implicit $mode + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + S_DELAY_ALU .id0_TRANS32_DEP_1_skip_SAME_id1_VALU_DEP_1 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec +... + +--- +name: valu_dep_1_same_salu_cycle_1 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_1_same_salu_cycle_1: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: s_mov_b32 s0, 0 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, s0, v0 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $sgpr0 = S_MOV_B32 0 + S_DELAY_ALU .id0_VALU_DEP_1_skip_SAME_id1_SALU_CYCLE_1 + $vgpr0 = V_ADD_U32_e32 $sgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_1_next_valu_dep_1 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_1_next_valu_dep_1: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_1_skip_NEXT_id1_VALU_DEP_1 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec +... + +--- +name: valu_dep_2_next_valu_dep_2 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_2_next_valu_dep_2: + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_2_skip_NEXT_id1_VALU_DEP_2 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec +... + diff --git a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir index 70b95b06e402c1..b3bf9081b0d46c 100644 --- a/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir +++ b/llvm/test/CodeGen/AMDGPU/vopd-src2acc-delay.mir @@ -13,7 +13,7 @@ body: | ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec - ; CHECK-NEXT: S_DELAY_ALU 1 + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1 ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_FMAC_F32_e32_X_FMAC_F32_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF @@ -37,7 +37,7 @@ body: | ; CHECK-NEXT: $vgpr3 = IMPLICIT_DEF ; CHECK-NEXT: $vgpr4 = IMPLICIT_DEF ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec - ; CHECK-NEXT: S_DELAY_ALU 1 + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_1 ; CHECK-NEXT: $vgpr0, $vgpr1 = V_DUAL_DOT2C_F32_F16_e32_X_DOT2C_F32_F16_e32_gfx11 $vgpr2, $vgpr3, $vgpr0, $vgpr3, $vgpr4, $vgpr1, implicit $mode, implicit $exec, implicit $mode, implicit $exec, implicit $mode, implicit $exec $vgpr0 = IMPLICIT_DEF $vgpr1 = IMPLICIT_DEF From f6dc318e24fe91fb5d083233e50c666e81aaa1bc Mon Sep 17 00:00:00 2001 From: Michael Bedy Date: Wed, 19 Jun 2024 13:16:38 -0400 Subject: [PATCH 2/5] Adress code formatting review comments. --- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 76 +++++++++---------- 1 file changed, 36 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 7bca5b3bb9ef66..71dbfbe0f9eaff 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -59,20 +59,19 @@ void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm, uint64_t Id0 = (Imm & 0xF); uint64_t Skip = ((Imm >> 4) & 0x7); uint64_t Id1 = ((Imm >> 7) & 0xF); - auto outdep = [&](uint64_t Id) { - if (Id == None) { + auto Outdep = [&](uint64_t Id) { + if (Id == None) OS << "NONE"; - } else if (Id < 5) { + else if (Id < 5) OS << "VALU_DEP_" << Id; - } else if (Id < 8) { + else if (Id < 8) OS << "TRANS32_DEP_" << Id - 4; - } else { + else OS << "SALU_CYCLE_" << Id - 8; - } }; OS << ".id0_"; - outdep(Id0); + Outdep(Id0); // If the second inst is "same" and "none", no need to print the rest of the // string. @@ -81,15 +80,15 @@ void AMDGPUMIRFormatter::printSDelayAluImm(int64_t Imm, // Encode the second delay specification. OS << "_skip_"; - if (Skip == 0) { + if (Skip == 0) OS << "SAME"; - } else if (Skip == 1) { + else if (Skip == 1) OS << "NEXT"; - } else { + else OS << "SKIP_" << Skip - 1; - } + OS << "_id1_"; - outdep(Id1); + Outdep(Id1); } bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( @@ -99,39 +98,37 @@ bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( assert(OpIdx == 0); Imm = 0; - bool expected = Src.consume_front(".id0_"); - if (!expected) { + bool Expected = Src.consume_front(".id0_"); + if (!Expected) return ErrorCallback(Src.begin(), "Expected .id0_"); - } - auto expect_int = [&](StringRef &Src, int64_t Offset) -> int64_t { + auto ExpectInt = [&](StringRef &Src, int64_t Offset) -> int64_t { int64_t Dep; - if (!Src.consumeInteger(10, Dep)) { + if (!Src.consumeInteger(10, Dep)) return Dep + Offset; - } else { + else return -1; - } }; - auto decode_delay = [&](StringRef &Src) -> int64_t { - if (Src.consume_front("NONE")) { + auto DecodeDelay = [&](StringRef &Src) -> int64_t { + if (Src.consume_front("NONE")) return 0; - } else if (Src.consume_front("VALU_DEP_")) { - return expect_int(Src, 0); - } else if (Src.consume_front("TRANS32_DEP_")) { - return expect_int(Src, 4); - } else if (Src.consume_front("SALU_CYCLE_")) { - return expect_int(Src, 8); - } + else if (Src.consume_front("VALU_DEP_")) + return ExpectInt(Src, 0); + else if (Src.consume_front("TRANS32_DEP_")) + return ExpectInt(Src, 4); + else if (Src.consume_front("SALU_CYCLE_")) + return ExpectInt(Src, 8); + return -1; }; - int64_t Delay0 = decode_delay(Src); + int64_t Delay0 = DecodeDelay(Src); int64_t Skip = 0; int64_t Delay1 = 0; - if (Delay0 == -1) { + if (Delay0 == -1) return ErrorCallback(Src.begin(), "Could not decode delay0"); - } + // Set the Imm so far, to that early return has the correct value. Imm = Delay0; @@ -141,10 +138,10 @@ bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( if (Src.begin() == Src.end()) return false; - expected = Src.consume_front("_skip_"); - if (!expected) { + Expected = Src.consume_front("_skip_"); + if (!Expected) return ErrorCallback(Src.begin(), "Expected _skip_"); - } + if (Src.consume_front("SAME")) { Skip = 0; @@ -158,15 +155,14 @@ bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( ErrorCallback(Src.begin(), "Unexpected Skip Value"); } - expected = Src.consume_front("_id1_"); - if (!expected) { + Expected = Src.consume_front("_id1_"); + if (!Expected) return ErrorCallback(Src.begin(), "Expected _id1_"); - } - Delay1 = decode_delay(Src); - if (Delay1 == -1) { + Delay1 = DecodeDelay(Src); + if (Delay1 == -1) return ErrorCallback(Src.begin(), "Could not decode delay1"); - } + Imm = Imm | (Skip << 4) | (Delay1 << 7); return false; } From 42db86c1fd3a9bc456b664bc678a4c1b9c6658dc Mon Sep 17 00:00:00 2001 From: Michael Bedy Date: Wed, 19 Jun 2024 16:03:55 -0400 Subject: [PATCH 3/5] Fix issue with skip > 1, add tests. --- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 1 + .../AMDGPU/insert-delay-alu-literal.mir | 17 +++++++++++++++++ .../CodeGen/AMDGPU/insert-delay-alu-parse.mir | 19 +++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 71dbfbe0f9eaff..680dddc89d224b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -151,6 +151,7 @@ bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( if (Src.consumeInteger(10, Skip)) { return ErrorCallback(Src.begin(), "Expected integer Skip value"); } + Skip += 1; } else { ErrorCallback(Src.begin(), "Unexpected Skip Value"); } diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir index 7788e50ed4d24a..18bc442ae98e04 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-literal.mir @@ -173,3 +173,20 @@ body: | $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec ... +--- +name: valu_dep_2_skip_valu_dep_2 +body: | + bb.0: + ; CHECK-LABEL: name: valu_dep_2_skip_valu_dep_2 + ; CHECK: $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + ; CHECK-NEXT: S_DELAY_ALU .id0_VALU_DEP_2_skip_SKIP_1_id1_VALU_DEP_2 + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + ; CHECK-NEXT: $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec + ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec +... diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir index 0d264629d4b72c..af2e6001502f67 100644 --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-parse.mir @@ -196,3 +196,22 @@ body: | $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec ... +--- +name: valu_dep_2_skip_valu_dep_2 +body: | + bb.0: + ; CHECK-LABEL: {{^}}valu_dep_2_skip_valu_dep_2 + ; CHECK: %bb.0: + ; CHECK-NEXT: v_add_nc_u32_e32 v0, v0, v0 + ; CHECK-NEXT: v_add_nc_u32_e32 v2, v1, v1 + ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v0, v1 + ; CHECK-NEXT: v_add_nc_u32_e32 v4, v3, v3 + ; CHECK-NEXT: v_add_nc_u32_e32 v1, v1, v1 + $vgpr0 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec + $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec + S_DELAY_ALU .id0_VALU_DEP_2_skip_SKIP_1_id1_VALU_DEP_2 + $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec + $vgpr4 = V_ADD_U32_e32 $vgpr3, $vgpr3, implicit $exec + $vgpr1 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec +... From 866475496bb3391d314a9f2ca499ed1b86aa104a Mon Sep 17 00:00:00 2001 From: Michael Bedy Date: Thu, 20 Jun 2024 09:26:52 -0400 Subject: [PATCH 4/5] Fix formatting and code comments. --- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 14 +++++++------- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 680dddc89d224b..9a93a2573c8c64 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -20,7 +20,7 @@ using namespace llvm; void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, std::optional OpIdx, int64_t Imm) const { - switch(MI.getOpcode()) { + switch (MI.getOpcode()) { case AMDGPU::S_DELAY_ALU: assert(OpIdx == 0); printSDelayAluImm(Imm, OS); @@ -39,7 +39,7 @@ bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, ErrorCallbackType ErrorCallback) const { - switch(OpCode) { + switch (OpCode) { case AMDGPU::S_DELAY_ALU: return parseSDelayAluImmMnemonic(OpIdx, Imm, Src, ErrorCallback); default: @@ -106,18 +106,18 @@ bool AMDGPUMIRFormatter::parseSDelayAluImmMnemonic( int64_t Dep; if (!Src.consumeInteger(10, Dep)) return Dep + Offset; - else - return -1; + + return -1; }; auto DecodeDelay = [&](StringRef &Src) -> int64_t { if (Src.consume_front("NONE")) return 0; - else if (Src.consume_front("VALU_DEP_")) + if (Src.consume_front("VALU_DEP_")) return ExpectInt(Src, 0); - else if (Src.consume_front("TRANS32_DEP_")) + if (Src.consume_front("TRANS32_DEP_")) return ExpectInt(Src, 4); - else if (Src.consume_front("SALU_CYCLE_")) + if (Src.consume_front("SALU_CYCLE_")) return ExpectInt(Src, 8); return -1; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h index 80bb3dfe7a364e..c5c94737525296 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h @@ -36,7 +36,7 @@ class AMDGPUMIRFormatter final : public MIRFormatter { int64_t Imm) const override; /// Implement target specific parsing of immediate mnemonics. The mnemonic is - /// dot seperated strings. + /// a string with a leading dot. virtual bool parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx, StringRef Src, int64_t &Imm, ErrorCallbackType ErrorCallback) const override; From 835b2e4cbc2934ee45bc5a72aa814b952fa3f04f Mon Sep 17 00:00:00 2001 From: Michael Bedy Date: Thu, 20 Jun 2024 09:33:01 -0400 Subject: [PATCH 5/5] Fix code comment. --- llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp index 9a93a2573c8c64..11f0cba47afde2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp @@ -32,7 +32,7 @@ void AMDGPUMIRFormatter::printImm(raw_ostream &OS, const MachineInstr &MI, } /// Implement target specific parsing of immediate mnemonics. The mnemonic is -/// dot seperated strings. +/// a string with a leading dot. bool AMDGPUMIRFormatter::parseImmMnemonic(const unsigned OpCode, const unsigned OpIdx, StringRef Src, int64_t &Imm,