From a3a8172a226ab15ad00d20d602aab6945206e54d Mon Sep 17 00:00:00 2001 From: Jack Gallagher Date: Mon, 17 Apr 2023 13:03:46 +0100 Subject: [PATCH] i#3044 AArch64 SVE codec: Add LD1RQ* This patch adds the appropriate macros, tests and codec entries to encode the following variants: LD1RQB { .D }, /Z, [{, #}] LD1RQB { .D }, /Z, [, ] LD1RQD { .D }, /Z, [{, #}] LD1RQD { .D }, /Z, [, , LSL #3] LD1RQH { .H }, /Z, [{, #}] LD1RQH { .H }, /Z, [, , LSL #1] LD1RQW { .S }, /Z, [{, #}] LD1RQW { .S }, /Z, [, , LSL #2] Issue: #3044 --- core/ir/aarch64/codec.c | 141 ++++++++++++++++++------- core/ir/aarch64/codec_sve.txt | 11 +- core/ir/aarch64/instr_create_api.h | 79 ++++++++++++++- core/ir/aarch64/opnd_defs.txt | 3 + suite/tests/api/dis-a64-sve.txt | 158 ++++++++++++++++++++++++++--- suite/tests/api/ir_aarch64_sve.c | 145 ++++++++++++++++++++++++-- 6 files changed, 468 insertions(+), 69 deletions(-) diff --git a/core/ir/aarch64/codec.c b/core/ir/aarch64/codec.c index b1a845c60fb..4216e798ee0 100644 --- a/core/ir/aarch64/codec.c +++ b/core/ir/aarch64/codec.c @@ -4467,16 +4467,19 @@ encode_opnd_s16(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) return encode_opnd_vector_reg(16, 2, opnd, enc_out); } -static inline bool -svemem_gprs_per_element_decode(uint bytes_per_element, aarch64_reg_offset element_size, - uint shift_amount, uint enc, int opcode, byte *pc, - OUT opnd_t *opnd) +static inline opnd_size_t +calculate_mem_transfer(uint bytes_per_element, aarch64_reg_offset element_size) { ASSERT(element_size >= BYTE_REG && element_size <= DOUBLE_REG); const uint elements = get_elements_in_sve_vector(element_size); - const opnd_size_t mem_transfer = opnd_size_from_bytes(bytes_per_element * elements); + return opnd_size_from_bytes(bytes_per_element * elements); +} +static inline bool +svemem_gprs_per_element_decode(opnd_size_t mem_transfer, uint shift_amount, uint enc, + int opcode, byte *pc, OUT opnd_t *opnd) +{ *opnd = opnd_create_base_disp_shift_aarch64( decode_reg(extract_uint(enc, 5, 5), true, true), decode_reg(extract_uint(enc, 16, 5), true, false), DR_EXTEND_UXTX, @@ -4485,15 +4488,9 @@ svemem_gprs_per_element_decode(uint bytes_per_element, aarch64_reg_offset elemen } static inline bool -svemem_gprs_per_element_encode(uint bytes_per_element, aarch64_reg_offset element_size, - uint shift_amount, uint enc, int opcode, byte *pc, - opnd_t opnd, OUT uint *enc_out) +svemem_gprs_per_element_encode(opnd_size_t mem_transfer, uint shift_amount, uint enc, + int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { - ASSERT(element_size >= BYTE_REG && element_size <= DOUBLE_REG); - - const uint elements = get_elements_in_sve_vector(element_size); - const opnd_size_t mem_transfer = opnd_size_from_bytes(bytes_per_element * elements); - if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != mem_transfer || opnd_get_disp(opnd) != 0) return false; @@ -4518,13 +4515,15 @@ svemem_gprs_per_element_encode(uint bytes_per_element, aarch64_reg_offset elemen static inline bool decode_opnd_svemem_gprs_b1(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { - return svemem_gprs_per_element_decode(1, BYTE_REG, 0, enc, opcode, pc, opnd); + return svemem_gprs_per_element_decode(calculate_mem_transfer(1, BYTE_REG), 0, enc, + opcode, pc, opnd); } static inline bool encode_opnd_svemem_gprs_b1(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { - return svemem_gprs_per_element_encode(1, BYTE_REG, 0, enc, opcode, pc, opnd, enc_out); + return svemem_gprs_per_element_encode(calculate_mem_transfer(1, BYTE_REG), 0, enc, + opcode, pc, opnd, enc_out); } /* imm8_10: 8 bit imm at pos 10, split across 20:16 and 12:10. */ @@ -5190,8 +5189,7 @@ encode_opnd_prf12(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out } static inline bool -decode_svemem_gpr_simm4_vl(uint enc, opnd_size_t transfer_size, int scale, - OUT opnd_t *opnd) +decode_svemem_gpr_simm4(uint enc, opnd_size_t transfer_size, int scale, OUT opnd_t *opnd) { const int offset = extract_int(enc, 16, 4) * scale; const reg_id_t rn = decode_reg(extract_uint(enc, 5, 5), true, true); @@ -5202,8 +5200,8 @@ decode_svemem_gpr_simm4_vl(uint enc, opnd_size_t transfer_size, int scale, } static inline bool -encode_svemem_gpr_simm4_vl(uint enc, opnd_size_t transfer_size, int scale, opnd_t opnd, - OUT uint *enc_out) +encode_svemem_gpr_simm4(uint enc, opnd_size_t transfer_size, int scale, opnd_t opnd, + OUT uint *enc_out) { if (!opnd_is_base_disp(opnd) || opnd_get_size(opnd) != transfer_size || opnd_get_index(opnd) != DR_REG_NULL) @@ -5223,6 +5221,36 @@ encode_svemem_gpr_simm4_vl(uint enc, opnd_size_t transfer_size, int scale, opnd_ return true; } +static inline bool +decode_ssz(uint enc, OUT opnd_size_t *transfer_size) +{ + switch (BITS(enc, 22, 21)) { + case 0b00: *transfer_size = OPSZ_16; return true; + case 0b01: *transfer_size = OPSZ_32; return true; + default: break; + } + return false; +} + +/* svemem_gpr_simm4: SVE memory operand [{, #}] */ + +static inline bool +decode_opnd_svemem_ssz_gpr_simm4(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + opnd_size_t transfer_size; + return decode_ssz(enc, &transfer_size) && + decode_svemem_gpr_simm4(enc, transfer_size, 16, opnd); +} + +static inline bool +encode_opnd_svemem_ssz_gpr_simm4(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) +{ + opnd_size_t transfer_size; + return decode_ssz(enc, &transfer_size) && + encode_svemem_gpr_simm4(enc, OPSZ_16, 16, opnd, enc_out); +} + /* SVE memory operand [{, #, MUL VL}] multiple dest registers or nt */ static inline bool @@ -5232,7 +5260,7 @@ decode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, OUT opnd_t const opnd_size_t transfer_size = opnd_size_from_bytes((register_count * dr_get_sve_vl()) / 8); - return decode_svemem_gpr_simm4_vl(enc, transfer_size, register_count, opnd); + return decode_svemem_gpr_simm4(enc, transfer_size, register_count, opnd); } static inline bool @@ -5243,7 +5271,7 @@ encode_opnd_svemem_gpr_simm4_vl_xreg(uint enc, int opcode, byte *pc, opnd_t opnd const opnd_size_t transfer_size = opnd_size_from_bytes((register_count * dr_get_sve_vl()) / 8); - return encode_svemem_gpr_simm4_vl(enc, transfer_size, register_count, opnd, enc_out); + return encode_svemem_gpr_simm4(enc, transfer_size, register_count, opnd, enc_out); } /* hsd_immh_sz: The element size of a vector mediated by immh with possible values h, s @@ -7028,15 +7056,44 @@ memory_transfer_size_from_dtype(uint enc) static inline bool decode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { - return decode_svemem_gpr_simm4_vl(enc, memory_transfer_size_from_dtype(enc), 1, opnd); + return decode_svemem_gpr_simm4(enc, memory_transfer_size_from_dtype(enc), 1, opnd); } static inline bool encode_opnd_svemem_gpr_simm4_vl_1reg(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out) { - return encode_svemem_gpr_simm4_vl(enc, memory_transfer_size_from_dtype(enc), 1, opnd, - enc_out); + return encode_svemem_gpr_simm4(enc, memory_transfer_size_from_dtype(enc), 1, opnd, + enc_out); +} + +/* SVE memory operand [, LSL #x], mem transfer size based on ssz */ + +static inline bool +decode_opnd_svemem_ssz_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) +{ + opnd_size_t mem_transfer; + if (!decode_ssz(enc, &mem_transfer)) + return false; + + const uint shift_amount = BITS(enc, 24, 23); + + return svemem_gprs_per_element_decode(mem_transfer, shift_amount, enc, opcode, pc, + opnd); +} + +static inline bool +encode_opnd_svemem_ssz_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, + OUT uint *enc_out) +{ + opnd_size_t mem_transfer; + if (!decode_ssz(enc, &mem_transfer)) + return false; + + const uint shift_amount = BITS(enc, 24, 23); + + return svemem_gprs_per_element_encode(mem_transfer, shift_amount, enc, opcode, pc, + opnd, enc_out); } static inline bool @@ -7047,8 +7104,9 @@ decode_opnd_svemem_msz_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) const uint shift_amount = elsz; - return svemem_gprs_per_element_decode((1 << elsz) * (dests + 1), elsz, shift_amount, - enc, opcode, pc, opnd); + return svemem_gprs_per_element_decode( + calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, + opcode, pc, opnd); } static inline bool @@ -7060,8 +7118,9 @@ encode_opnd_svemem_msz_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, const uint shift_amount = elsz; - return svemem_gprs_per_element_encode((1 << elsz) * (dests + 1), elsz, shift_amount, - enc, opcode, pc, opnd, enc_out); + return svemem_gprs_per_element_encode( + calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, + opcode, pc, opnd, enc_out); } static inline bool @@ -7074,8 +7133,9 @@ decode_opnd_svemem_msz_stgpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opn const uint shift_amount = elsz; - return svemem_gprs_per_element_decode((1 << elsz) * (dests + 1), elsz, shift_amount, - enc, opcode, pc, opnd); + return svemem_gprs_per_element_decode( + calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, + opcode, pc, opnd); } static inline bool @@ -7088,7 +7148,8 @@ encode_opnd_svemem_msz_stgpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, const uint shift_amount = elsz; bool success = svemem_gprs_per_element_encode( - (1 << elsz) * (dests + 1), elsz, shift_amount, enc, opcode, pc, opnd, enc_out); + calculate_mem_transfer((1 << elsz) * (dests + 1), elsz), shift_amount, enc, + opcode, pc, opnd, enc_out); if (BITS(enc, 20, 16) == 0b11111) return false; @@ -7102,8 +7163,8 @@ decode_opnd_svemem_gpr_shf(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) const uint shift_amount = opnd_size_to_shift_amount(get_opnd_size_from_offset(insz)); - return svemem_gprs_per_element_decode(1 << insz, elsz, shift_amount, enc, opcode, pc, - opnd); + return svemem_gprs_per_element_decode(calculate_mem_transfer(1 << insz, elsz), + shift_amount, enc, opcode, pc, opnd); } static inline bool @@ -7114,8 +7175,8 @@ encode_opnd_svemem_gpr_shf(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint const uint shift_amount = opnd_size_to_shift_amount(get_opnd_size_from_offset(insz)); - return svemem_gprs_per_element_encode(1 << insz, elsz, shift_amount, enc, opcode, pc, - opnd, enc_out); + return svemem_gprs_per_element_encode(calculate_mem_transfer(1 << insz, elsz), + shift_amount, enc, opcode, pc, opnd, enc_out); } static inline bool @@ -7124,7 +7185,8 @@ decode_opnd_svemem_gprs_bhsdx(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) aarch64_reg_offset insz, elsz; sizes_from_dtype(enc, &elsz, &insz, true); - return svemem_gprs_per_element_decode(insz + 1, elsz, 0, enc, opcode, pc, opnd); + return svemem_gprs_per_element_decode(calculate_mem_transfer(insz + 1, elsz), 0, enc, + opcode, pc, opnd); } static inline bool @@ -7134,8 +7196,8 @@ encode_opnd_svemem_gprs_bhsdx(uint enc, int opcode, byte *pc, opnd_t opnd, aarch64_reg_offset insz, elsz; sizes_from_dtype(enc, &elsz, &insz, true); - return svemem_gprs_per_element_encode(insz + 1, elsz, 0, enc, opcode, pc, opnd, - enc_out); + return svemem_gprs_per_element_encode(calculate_mem_transfer(insz + 1, elsz), 0, enc, + opcode, pc, opnd, enc_out); } static inline bool @@ -7750,7 +7812,8 @@ encode_opnd_mem12(uint enc, int opcode, byte *pc, opnd_t opnd, OUT uint *enc_out return encode_opnd_mem12_scale(extract_uint(enc, 30, 2), false, opnd, enc_out); } -/* SVE prefetch memory address (32-bit offset) [, ., { }] */ +/* SVE prefetch memory address (32-bit offset) [, ., { }] + */ static inline bool decode_opnd_sveprf_gpr_vec32(uint enc, int opcode, byte *pc, OUT opnd_t *opnd) { diff --git a/core/ir/aarch64/codec_sve.txt b/core/ir/aarch64/codec_sve.txt index 08d237088cb..03fc7790927 100644 --- a/core/ir/aarch64/codec_sve.txt +++ b/core/ir/aarch64/codec_sve.txt @@ -339,8 +339,15 @@ 1000010011xxxxxx101xxxxxxxxxxxxx n 910 SVE ld1rh z_h_0 : svememx6_h_5 p10_zer_lo 1000010011xxxxxx110xxxxxxxxxxxxx n 910 SVE ld1rh z_s_0 : svememx6_h_5 p10_zer_lo 1000010011xxxxxx111xxxxxxxxxxxxx n 910 SVE ld1rh z_d_0 : svememx6_h_5 p10_zer_lo -10100100001xxxxx000xxxxxxxxxxxxx n 947 SVE ld1rob z_b_0 : svemem_gprs_b1 p10_zer_lo -10100100000xxxxx000xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_gprs_b1 p10_zer_lo +10100100001xxxxx000xxxxxxxxxxxxx n 947 SVE ld1rob z_b_0 : svemem_ssz_gpr_shf p10_zer_lo +101001000000xxxx001xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_ssz_gpr_simm4 p10_zer_lo +10100100000xxxxx000xxxxxxxxxxxxx n 948 SVE ld1rqb z_b_0 : svemem_ssz_gpr_shf p10_zer_lo +101001011000xxxx001xxxxxxxxxxxxx n 1060 SVE ld1rqd z_d_0 : svemem_ssz_gpr_simm4 p10_zer_lo +10100101100xxxxx000xxxxxxxxxxxxx n 1060 SVE ld1rqd z_d_0 : svemem_ssz_gpr_shf p10_zer_lo +101001001000xxxx001xxxxxxxxxxxxx n 1061 SVE ld1rqh z_h_0 : svemem_ssz_gpr_simm4 p10_zer_lo +10100100100xxxxx000xxxxxxxxxxxxx n 1061 SVE ld1rqh z_h_0 : svemem_ssz_gpr_shf p10_zer_lo +101001010000xxxx001xxxxxxxxxxxxx n 1062 SVE ld1rqw z_s_0 : svemem_ssz_gpr_simm4 p10_zer_lo +10100101000xxxxx000xxxxxxxxxxxxx n 1062 SVE ld1rqw z_s_0 : svemem_ssz_gpr_shf p10_zer_lo 1000010111xxxxxx110xxxxxxxxxxxxx n 911 SVE ld1rsb z_h_0 : svememx6_b_5 p10_zer_lo 1000010111xxxxxx101xxxxxxxxxxxxx n 911 SVE ld1rsb z_s_0 : svememx6_b_5 p10_zer_lo 1000010111xxxxxx100xxxxxxxxxxxxx n 911 SVE ld1rsb z_d_0 : svememx6_b_5 p10_zer_lo diff --git a/core/ir/aarch64/instr_create_api.h b/core/ir/aarch64/instr_create_api.h index d77a1f6a312..32807776672 100644 --- a/core/ir/aarch64/instr_create_api.h +++ b/core/ir/aarch64/instr_create_api.h @@ -11368,6 +11368,7 @@ * * This macro is used to encode the forms: * \verbatim + * LD1RQB { .B }, /Z, [{, #}] * LD1RQB { .B }, /Z, [, ] * \endverbatim * \param dc The void * dcontext used to allocate memory for the #instr_t. @@ -11375,12 +11376,85 @@ * \param Pg The governing predicate register, P (Predicate). * \param Rn The first source base register with a register offset, * constructed with the function: - * opnd_create_base_disp_aarch64(Rn, Rm, - * DR_EXTEND_UXTX, 0, 0, 0, OPSZ_1) + * For the [\{, #\}] variant: + * opnd_create_base_disp_aarch64( + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * For the [\, \] variant: + * opnd_create_base_disp_shift_aarch64( + * Xn, Xm, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16, 0) */ #define INSTR_CREATE_ld1rqb_sve_pred(dc, Zt, Pg, Rn) \ instr_create_1dst_2src(dc, OP_ld1rqb, Zt, Rn, Pg) +/** + * Creates a LD1RQH instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LD1RQH { .H }, /Z, [{, #}] + * LD1RQH { .H }, /Z, [, , LSL #1] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * For the [\{, #\}] variant: + * opnd_create_base_disp_aarch64( + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * For the [\, \, LSL #1] variant: + * opnd_create_base_disp_shift_aarch64( + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_16, 1) + */ +#define INSTR_CREATE_ld1rqh_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ld1rqh, Zt, Rn, Pg) + +/** + * Creates a LD1RQW instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LD1RQW { .S }, /Z, [{, #}] + * LD1RQW { .S }, /Z, [, , LSL #2] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * For the [\{, #\}] variant: + * opnd_create_base_disp_aarch64( + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * For the [\, \, LSL #2] variant: + * opnd_create_base_disp_shift_aarch64( + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_16, 2) + */ +#define INSTR_CREATE_ld1rqw_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ld1rqw, Zt, Rn, Pg) + +/** + * Creates a LD1RQD instruction. + * + * This macro is used to encode the forms: + * \verbatim + * LD1RQD { .D }, /Z, [{, #}] + * LD1RQD { .D }, /Z, [, , LSL #3] + * \endverbatim + * \param dc The void * dcontext used to allocate memory for the #instr_t. + * \param Zt The destination vector register, Z (Scalable). + * \param Pg The governing predicate register, P (Predicate). + * \param Rn The first source base register with a register offset, + * constructed with the function: + * For the [\{, #\}] variant: + * opnd_create_base_disp_aarch64( + * Xn, DR_REG_NULL, DR_EXTEND_UXTX, false, 0, 0, OPSZ_16) + * For the [\, \, LSL #3] variant: + * opnd_create_base_disp_shift_aarch64( + * Xn, Xm, DR_EXTEND_UXTX, true, 0, 0, OPSZ_16, 3) + */ +#define INSTR_CREATE_ld1rqd_sve_pred(dc, Zt, Pg, Rn) \ + instr_create_1dst_2src(dc, OP_ld1rqd, Zt, Rn, Pg) + /** * Creates a LD1SB instruction. * @@ -14098,4 +14172,5 @@ */ #define INSTR_CREATE_trn2_sve(dc, Zd, Zn, Zm) \ instr_create_1dst_2src(dc, OP_trn2, Zd, Zn, Zm) + #endif /* DR_IR_MACROS_AARCH64_H */ diff --git a/core/ir/aarch64/opnd_defs.txt b/core/ir/aarch64/opnd_defs.txt index 576685c5f17..f04906312c7 100644 --- a/core/ir/aarch64/opnd_defs.txt +++ b/core/ir/aarch64/opnd_defs.txt @@ -241,6 +241,8 @@ ----------xxxxxxxxxxxx---------- imm12 # immediate for ADD/SUB ----------xxxxxxxxxxxxxxxxx----- mem12q # size is 16 bytes ----------xxxxxxxxxxxxxxxxx----- prf12 # size is 0 bytes (prefetch variant of mem12) +---------??-xxxx------xxxxx----- svemem_ssz_gpr_simm4 # SVE memory operand [{, #}], + # gets memory size from bits 22:21 ---------??-xxxx------xxxxx----- svemem_gpr_simm4_vl_xreg # SVE memory operand [{, #, MUL VL}] # multiple src/dest registers or single non-temporals ---------????------------------- hsd_immh_sz # encoding of vector element size in immh field @@ -320,6 +322,7 @@ -------??-?xxxxx------xxxxx----- svemem_gpr_vec64 # SVE memory address (64-bit offset) [, .D{, }] -------????-xxxx------xxxxx----- svemem_gpr_simm4_vl_1reg # SVE memory operand [{, #, MUL VL}] # 1 src/dest register +-------????xxxxx------xxxxx----- svemem_ssz_gpr_shf # SVE memory operand [, , LSL #x] -------????xxxxx------xxxxx----- svemem_msz_gpr_shf # SVE memory address [, , LSL #x] -------????xxxxx------xxxxx----- svemem_msz_stgpr_shf # SVE memory address [, , LSL #x] -------????xxxxx------xxxxx----- svemem_gpr_shf # GPR offset and base reg for SVE ld/st, with optional shift diff --git a/suite/tests/api/dis-a64-sve.txt b/suite/tests/api/dis-a64-sve.txt index 66cd5c1716b..2423f7571fd 100644 --- a/suite/tests/api/dis-a64-sve.txt +++ b/suite/tests/api/dis-a64-sve.txt @@ -11453,22 +11453,148 @@ a43d1f9b : ld1rob z27.b, p7/Z, [x28, x29] : ld1rob (%x28,%x29)[32byte a43e1fff : ld1rob z31.b, p7/Z, [sp, x30] : ld1rob (%sp,%x30)[32byte] %p7/z -> %z31.b # LD1RQB { .B }, /Z, [, ] (LD1RQB-Z.P.BR-Contiguous) -a4000000 : ld1rqb z0.b, p0/Z, [x0, x0] : ld1rqb (%x0,%x0)[32byte] %p0/z -> %z0.b -a4050482 : ld1rqb z2.b, p1/Z, [x4, x5] : ld1rqb (%x4,%x5)[32byte] %p1/z -> %z2.b -a40708c4 : ld1rqb z4.b, p2/Z, [x6, x7] : ld1rqb (%x6,%x7)[32byte] %p2/z -> %z4.b -a4090906 : ld1rqb z6.b, p2/Z, [x8, x9] : ld1rqb (%x8,%x9)[32byte] %p2/z -> %z6.b -a40b0d48 : ld1rqb z8.b, p3/Z, [x10, x11] : ld1rqb (%x10,%x11)[32byte] %p3/z -> %z8.b -a40c0d6a : ld1rqb z10.b, p3/Z, [x11, x12] : ld1rqb (%x11,%x12)[32byte] %p3/z -> %z10.b -a40e11ac : ld1rqb z12.b, p4/Z, [x13, x14] : ld1rqb (%x13,%x14)[32byte] %p4/z -> %z12.b -a41011ee : ld1rqb z14.b, p4/Z, [x15, x16] : ld1rqb (%x15,%x16)[32byte] %p4/z -> %z14.b -a4121630 : ld1rqb z16.b, p5/Z, [x17, x18] : ld1rqb (%x17,%x18)[32byte] %p5/z -> %z16.b -a4141671 : ld1rqb z17.b, p5/Z, [x19, x20] : ld1rqb (%x19,%x20)[32byte] %p5/z -> %z17.b -a41616b3 : ld1rqb z19.b, p5/Z, [x21, x22] : ld1rqb (%x21,%x22)[32byte] %p5/z -> %z19.b -a4181af5 : ld1rqb z21.b, p6/Z, [x23, x24] : ld1rqb (%x23,%x24)[32byte] %p6/z -> %z21.b -a4191b17 : ld1rqb z23.b, p6/Z, [x24, x25] : ld1rqb (%x24,%x25)[32byte] %p6/z -> %z23.b -a41b1f59 : ld1rqb z25.b, p7/Z, [x26, x27] : ld1rqb (%x26,%x27)[32byte] %p7/z -> %z25.b -a41d1f9b : ld1rqb z27.b, p7/Z, [x28, x29] : ld1rqb (%x28,%x29)[32byte] %p7/z -> %z27.b -a41e1fff : ld1rqb z31.b, p7/Z, [sp, x30] : ld1rqb (%sp,%x30)[32byte] %p7/z -> %z31.b +a4000000 : ld1rqb z0.b, p0/Z, [x0, x0] : ld1rqb (%x0,%x0)[16byte] %p0/z -> %z0.b +a4050482 : ld1rqb z2.b, p1/Z, [x4, x5] : ld1rqb (%x4,%x5)[16byte] %p1/z -> %z2.b +a40708c4 : ld1rqb z4.b, p2/Z, [x6, x7] : ld1rqb (%x6,%x7)[16byte] %p2/z -> %z4.b +a4090906 : ld1rqb z6.b, p2/Z, [x8, x9] : ld1rqb (%x8,%x9)[16byte] %p2/z -> %z6.b +a40b0d48 : ld1rqb z8.b, p3/Z, [x10, x11] : ld1rqb (%x10,%x11)[16byte] %p3/z -> %z8.b +a40c0d6a : ld1rqb z10.b, p3/Z, [x11, x12] : ld1rqb (%x11,%x12)[16byte] %p3/z -> %z10.b +a40e11ac : ld1rqb z12.b, p4/Z, [x13, x14] : ld1rqb (%x13,%x14)[16byte] %p4/z -> %z12.b +a41011ee : ld1rqb z14.b, p4/Z, [x15, x16] : ld1rqb (%x15,%x16)[16byte] %p4/z -> %z14.b +a4121630 : ld1rqb z16.b, p5/Z, [x17, x18] : ld1rqb (%x17,%x18)[16byte] %p5/z -> %z16.b +a4141671 : ld1rqb z17.b, p5/Z, [x19, x20] : ld1rqb (%x19,%x20)[16byte] %p5/z -> %z17.b +a41616b3 : ld1rqb z19.b, p5/Z, [x21, x22] : ld1rqb (%x21,%x22)[16byte] %p5/z -> %z19.b +a4181af5 : ld1rqb z21.b, p6/Z, [x23, x24] : ld1rqb (%x23,%x24)[16byte] %p6/z -> %z21.b +a4191b17 : ld1rqb z23.b, p6/Z, [x24, x25] : ld1rqb (%x24,%x25)[16byte] %p6/z -> %z23.b +a41b1f59 : ld1rqb z25.b, p7/Z, [x26, x27] : ld1rqb (%x26,%x27)[16byte] %p7/z -> %z25.b +a41d1f9b : ld1rqb z27.b, p7/Z, [x28, x29] : ld1rqb (%x28,%x29)[16byte] %p7/z -> %z27.b +a41e1fff : ld1rqb z31.b, p7/Z, [sp, x30] : ld1rqb (%sp,%x30)[16byte] %p7/z -> %z31.b + +# LD1RQB { .B }, /Z, [{, #}] (LD1RQB-Z.P.BI-U8) +a4082000 : ld1rqb z0.b, p0/Z, [x0, #-128] : ld1rqb -0x80(%x0)[16byte] %p0/z -> %z0.b +a4092482 : ld1rqb z2.b, p1/Z, [x4, #-112] : ld1rqb -0x70(%x4)[16byte] %p1/z -> %z2.b +a40a28c4 : ld1rqb z4.b, p2/Z, [x6, #-96] : ld1rqb -0x60(%x6)[16byte] %p2/z -> %z4.b +a40b2906 : ld1rqb z6.b, p2/Z, [x8, #-80] : ld1rqb -0x50(%x8)[16byte] %p2/z -> %z6.b +a40c2d48 : ld1rqb z8.b, p3/Z, [x10, #-64] : ld1rqb -0x40(%x10)[16byte] %p3/z -> %z8.b +a40d2d6a : ld1rqb z10.b, p3/Z, [x11, #-48] : ld1rqb -0x30(%x11)[16byte] %p3/z -> %z10.b +a40e31ac : ld1rqb z12.b, p4/Z, [x13, #-32] : ld1rqb -0x20(%x13)[16byte] %p4/z -> %z12.b +a40f31ee : ld1rqb z14.b, p4/Z, [x15, #-16] : ld1rqb -0x10(%x15)[16byte] %p4/z -> %z14.b +a4003630 : ld1rqb z16.b, p5/Z, [x17, #0] : ld1rqb (%x17)[16byte] %p5/z -> %z16.b +a4003671 : ld1rqb z17.b, p5/Z, [x19, #0] : ld1rqb (%x19)[16byte] %p5/z -> %z17.b +a40136b3 : ld1rqb z19.b, p5/Z, [x21, #16] : ld1rqb +0x10(%x21)[16byte] %p5/z -> %z19.b +a4023af5 : ld1rqb z21.b, p6/Z, [x23, #32] : ld1rqb +0x20(%x23)[16byte] %p6/z -> %z21.b +a4033b17 : ld1rqb z23.b, p6/Z, [x24, #48] : ld1rqb +0x30(%x24)[16byte] %p6/z -> %z23.b +a4043f59 : ld1rqb z25.b, p7/Z, [x26, #64] : ld1rqb +0x40(%x26)[16byte] %p7/z -> %z25.b +a4053f9b : ld1rqb z27.b, p7/Z, [x28, #80] : ld1rqb +0x50(%x28)[16byte] %p7/z -> %z27.b +a4073fff : ld1rqb z31.b, p7/Z, [sp, #112] : ld1rqb +0x70(%sp)[16byte] %p7/z -> %z31.b + +# LD1RQD { .D }, /Z, [, , LSL #3] (LD1RQD-Z.P.BR-Contiguous) +a5800000 : ld1rqd z0.d, p0/Z, [x0, x0, lsl #3] : ld1rqd (%x0,%x0,lsl #3)[16byte] %p0/z -> %z0.d +a5850482 : ld1rqd z2.d, p1/Z, [x4, x5, lsl #3] : ld1rqd (%x4,%x5,lsl #3)[16byte] %p1/z -> %z2.d +a58708c4 : ld1rqd z4.d, p2/Z, [x6, x7, lsl #3] : ld1rqd (%x6,%x7,lsl #3)[16byte] %p2/z -> %z4.d +a5890906 : ld1rqd z6.d, p2/Z, [x8, x9, lsl #3] : ld1rqd (%x8,%x9,lsl #3)[16byte] %p2/z -> %z6.d +a58b0d48 : ld1rqd z8.d, p3/Z, [x10, x11, lsl #3] : ld1rqd (%x10,%x11,lsl #3)[16byte] %p3/z -> %z8.d +a58c0d6a : ld1rqd z10.d, p3/Z, [x11, x12, lsl #3] : ld1rqd (%x11,%x12,lsl #3)[16byte] %p3/z -> %z10.d +a58e11ac : ld1rqd z12.d, p4/Z, [x13, x14, lsl #3] : ld1rqd (%x13,%x14,lsl #3)[16byte] %p4/z -> %z12.d +a59011ee : ld1rqd z14.d, p4/Z, [x15, x16, lsl #3] : ld1rqd (%x15,%x16,lsl #3)[16byte] %p4/z -> %z14.d +a5921630 : ld1rqd z16.d, p5/Z, [x17, x18, lsl #3] : ld1rqd (%x17,%x18,lsl #3)[16byte] %p5/z -> %z16.d +a5941671 : ld1rqd z17.d, p5/Z, [x19, x20, lsl #3] : ld1rqd (%x19,%x20,lsl #3)[16byte] %p5/z -> %z17.d +a59616b3 : ld1rqd z19.d, p5/Z, [x21, x22, lsl #3] : ld1rqd (%x21,%x22,lsl #3)[16byte] %p5/z -> %z19.d +a5981af5 : ld1rqd z21.d, p6/Z, [x23, x24, lsl #3] : ld1rqd (%x23,%x24,lsl #3)[16byte] %p6/z -> %z21.d +a5991b17 : ld1rqd z23.d, p6/Z, [x24, x25, lsl #3] : ld1rqd (%x24,%x25,lsl #3)[16byte] %p6/z -> %z23.d +a59b1f59 : ld1rqd z25.d, p7/Z, [x26, x27, lsl #3] : ld1rqd (%x26,%x27,lsl #3)[16byte] %p7/z -> %z25.d +a59d1f9b : ld1rqd z27.d, p7/Z, [x28, x29, lsl #3] : ld1rqd (%x28,%x29,lsl #3)[16byte] %p7/z -> %z27.d +a59e1fff : ld1rqd z31.d, p7/Z, [sp, x30, lsl #3] : ld1rqd (%sp,%x30,lsl #3)[16byte] %p7/z -> %z31.d + +# LD1RQD { .D }, /Z, [{, #}] (LD1RQD-Z.P.BI-U64) +a5882000 : ld1rqd z0.d, p0/Z, [x0, #-128] : ld1rqd -0x80(%x0)[16byte] %p0/z -> %z0.d +a5892482 : ld1rqd z2.d, p1/Z, [x4, #-112] : ld1rqd -0x70(%x4)[16byte] %p1/z -> %z2.d +a58a28c4 : ld1rqd z4.d, p2/Z, [x6, #-96] : ld1rqd -0x60(%x6)[16byte] %p2/z -> %z4.d +a58b2906 : ld1rqd z6.d, p2/Z, [x8, #-80] : ld1rqd -0x50(%x8)[16byte] %p2/z -> %z6.d +a58c2d48 : ld1rqd z8.d, p3/Z, [x10, #-64] : ld1rqd -0x40(%x10)[16byte] %p3/z -> %z8.d +a58d2d6a : ld1rqd z10.d, p3/Z, [x11, #-48] : ld1rqd -0x30(%x11)[16byte] %p3/z -> %z10.d +a58e31ac : ld1rqd z12.d, p4/Z, [x13, #-32] : ld1rqd -0x20(%x13)[16byte] %p4/z -> %z12.d +a58f31ee : ld1rqd z14.d, p4/Z, [x15, #-16] : ld1rqd -0x10(%x15)[16byte] %p4/z -> %z14.d +a5803630 : ld1rqd z16.d, p5/Z, [x17, #0] : ld1rqd (%x17)[16byte] %p5/z -> %z16.d +a5803671 : ld1rqd z17.d, p5/Z, [x19, #0] : ld1rqd (%x19)[16byte] %p5/z -> %z17.d +a58136b3 : ld1rqd z19.d, p5/Z, [x21, #16] : ld1rqd +0x10(%x21)[16byte] %p5/z -> %z19.d +a5823af5 : ld1rqd z21.d, p6/Z, [x23, #32] : ld1rqd +0x20(%x23)[16byte] %p6/z -> %z21.d +a5833b17 : ld1rqd z23.d, p6/Z, [x24, #48] : ld1rqd +0x30(%x24)[16byte] %p6/z -> %z23.d +a5843f59 : ld1rqd z25.d, p7/Z, [x26, #64] : ld1rqd +0x40(%x26)[16byte] %p7/z -> %z25.d +a5853f9b : ld1rqd z27.d, p7/Z, [x28, #80] : ld1rqd +0x50(%x28)[16byte] %p7/z -> %z27.d +a5873fff : ld1rqd z31.d, p7/Z, [sp, #112] : ld1rqd +0x70(%sp)[16byte] %p7/z -> %z31.d + +# LD1RQH { .H }, /Z, [, , LSL #1] (LD1RQH-Z.P.BR-Contiguous) +a4800000 : ld1rqh z0.h, p0/Z, [x0, x01, LSL #1] : ld1rqh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.h +a4850482 : ld1rqh z2.h, p1/Z, [x4, x51, LSL #1] : ld1rqh (%x4,%x5,lsl #1)[16byte] %p1/z -> %z2.h +a48708c4 : ld1rqh z4.h, p2/Z, [x6, x71, LSL #1] : ld1rqh (%x6,%x7,lsl #1)[16byte] %p2/z -> %z4.h +a4890906 : ld1rqh z6.h, p2/Z, [x8, x91, LSL #1] : ld1rqh (%x8,%x9,lsl #1)[16byte] %p2/z -> %z6.h +a48b0d48 : ld1rqh z8.h, p3/Z, [x10, x111, LSL #1] : ld1rqh (%x10,%x11,lsl #1)[16byte] %p3/z -> %z8.h +a48c0d6a : ld1rqh z10.h, p3/Z, [x11, x121, LSL #1] : ld1rqh (%x11,%x12,lsl #1)[16byte] %p3/z -> %z10.h +a48e11ac : ld1rqh z12.h, p4/Z, [x13, x141, LSL #1] : ld1rqh (%x13,%x14,lsl #1)[16byte] %p4/z -> %z12.h +a49011ee : ld1rqh z14.h, p4/Z, [x15, x161, LSL #1] : ld1rqh (%x15,%x16,lsl #1)[16byte] %p4/z -> %z14.h +a4921630 : ld1rqh z16.h, p5/Z, [x17, x181, LSL #1] : ld1rqh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.h +a4941671 : ld1rqh z17.h, p5/Z, [x19, x201, LSL #1] : ld1rqh (%x19,%x20,lsl #1)[16byte] %p5/z -> %z17.h +a49616b3 : ld1rqh z19.h, p5/Z, [x21, x221, LSL #1] : ld1rqh (%x21,%x22,lsl #1)[16byte] %p5/z -> %z19.h +a4981af5 : ld1rqh z21.h, p6/Z, [x23, x241, LSL #1] : ld1rqh (%x23,%x24,lsl #1)[16byte] %p6/z -> %z21.h +a4991b17 : ld1rqh z23.h, p6/Z, [x24, x251, LSL #1] : ld1rqh (%x24,%x25,lsl #1)[16byte] %p6/z -> %z23.h +a49b1f59 : ld1rqh z25.h, p7/Z, [x26, x271, LSL #1] : ld1rqh (%x26,%x27,lsl #1)[16byte] %p7/z -> %z25.h +a49d1f9b : ld1rqh z27.h, p7/Z, [x28, x291, LSL #1] : ld1rqh (%x28,%x29,lsl #1)[16byte] %p7/z -> %z27.h +a49e1fff : ld1rqh z31.h, p7/Z, [sp, x301, LSL #1] : ld1rqh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.h + +# LD1RQH { .H }, /Z, [{, #}] (LD1RQH-Z.P.BI-U16) +a4882000 : ld1rqh z0.h, p0/Z, [x0, #-128] : ld1rqh -0x80(%x0)[16byte] %p0/z -> %z0.h +a4892482 : ld1rqh z2.h, p1/Z, [x4, #-112] : ld1rqh -0x70(%x4)[16byte] %p1/z -> %z2.h +a48a28c4 : ld1rqh z4.h, p2/Z, [x6, #-96] : ld1rqh -0x60(%x6)[16byte] %p2/z -> %z4.h +a48b2906 : ld1rqh z6.h, p2/Z, [x8, #-80] : ld1rqh -0x50(%x8)[16byte] %p2/z -> %z6.h +a48c2d48 : ld1rqh z8.h, p3/Z, [x10, #-64] : ld1rqh -0x40(%x10)[16byte] %p3/z -> %z8.h +a48d2d6a : ld1rqh z10.h, p3/Z, [x11, #-48] : ld1rqh -0x30(%x11)[16byte] %p3/z -> %z10.h +a48e31ac : ld1rqh z12.h, p4/Z, [x13, #-32] : ld1rqh -0x20(%x13)[16byte] %p4/z -> %z12.h +a48f31ee : ld1rqh z14.h, p4/Z, [x15, #-16] : ld1rqh -0x10(%x15)[16byte] %p4/z -> %z14.h +a4803630 : ld1rqh z16.h, p5/Z, [x17, #0] : ld1rqh (%x17)[16byte] %p5/z -> %z16.h +a4803671 : ld1rqh z17.h, p5/Z, [x19, #0] : ld1rqh (%x19)[16byte] %p5/z -> %z17.h +a48136b3 : ld1rqh z19.h, p5/Z, [x21, #16] : ld1rqh +0x10(%x21)[16byte] %p5/z -> %z19.h +a4823af5 : ld1rqh z21.h, p6/Z, [x23, #32] : ld1rqh +0x20(%x23)[16byte] %p6/z -> %z21.h +a4833b17 : ld1rqh z23.h, p6/Z, [x24, #48] : ld1rqh +0x30(%x24)[16byte] %p6/z -> %z23.h +a4843f59 : ld1rqh z25.h, p7/Z, [x26, #64] : ld1rqh +0x40(%x26)[16byte] %p7/z -> %z25.h +a4853f9b : ld1rqh z27.h, p7/Z, [x28, #80] : ld1rqh +0x50(%x28)[16byte] %p7/z -> %z27.h +a4873fff : ld1rqh z31.h, p7/Z, [sp, #112] : ld1rqh +0x70(%sp)[16byte] %p7/z -> %z31.h + +# LD1RQW { .S }, /Z, [, , LSL #2] (LD1RQW-Z.P.BR-Contiguous) +a5000000 : ld1rqw z0.s, p0/Z, [x0, x0, LSL #2] : ld1rqw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.s +a5050482 : ld1rqw z2.s, p1/Z, [x4, x5, LSL #2] : ld1rqw (%x4,%x5,lsl #2)[16byte] %p1/z -> %z2.s +a50708c4 : ld1rqw z4.s, p2/Z, [x6, x7, LSL #2] : ld1rqw (%x6,%x7,lsl #2)[16byte] %p2/z -> %z4.s +a5090906 : ld1rqw z6.s, p2/Z, [x8, x9, LSL #2] : ld1rqw (%x8,%x9,lsl #2)[16byte] %p2/z -> %z6.s +a50b0d48 : ld1rqw z8.s, p3/Z, [x10, x11, LSL #2] : ld1rqw (%x10,%x11,lsl #2)[16byte] %p3/z -> %z8.s +a50c0d6a : ld1rqw z10.s, p3/Z, [x11, x12, LSL #2] : ld1rqw (%x11,%x12,lsl #2)[16byte] %p3/z -> %z10.s +a50e11ac : ld1rqw z12.s, p4/Z, [x13, x14, LSL #2] : ld1rqw (%x13,%x14,lsl #2)[16byte] %p4/z -> %z12.s +a51011ee : ld1rqw z14.s, p4/Z, [x15, x16, LSL #2] : ld1rqw (%x15,%x16,lsl #2)[16byte] %p4/z -> %z14.s +a5121630 : ld1rqw z16.s, p5/Z, [x17, x18, LSL #2] : ld1rqw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.s +a5141671 : ld1rqw z17.s, p5/Z, [x19, x20, LSL #2] : ld1rqw (%x19,%x20,lsl #2)[16byte] %p5/z -> %z17.s +a51616b3 : ld1rqw z19.s, p5/Z, [x21, x22, LSL #2] : ld1rqw (%x21,%x22,lsl #2)[16byte] %p5/z -> %z19.s +a5181af5 : ld1rqw z21.s, p6/Z, [x23, x24, LSL #2] : ld1rqw (%x23,%x24,lsl #2)[16byte] %p6/z -> %z21.s +a5191b17 : ld1rqw z23.s, p6/Z, [x24, x25, LSL #2] : ld1rqw (%x24,%x25,lsl #2)[16byte] %p6/z -> %z23.s +a51b1f59 : ld1rqw z25.s, p7/Z, [x26, x27, LSL #2] : ld1rqw (%x26,%x27,lsl #2)[16byte] %p7/z -> %z25.s +a51d1f9b : ld1rqw z27.s, p7/Z, [x28, x29, LSL #2] : ld1rqw (%x28,%x29,lsl #2)[16byte] %p7/z -> %z27.s +a51e1fff : ld1rqw z31.s, p7/Z, [sp, x30, LSL #2] : ld1rqw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.s + +# LD1RQW { .S }, /Z, [{, #}] (LD1RQW-Z.P.BI-U32) +a5082000 : ld1rqw z0.s, p0/Z, [x0, #-128] : ld1rqw -0x80(%x0)[16byte] %p0/z -> %z0.s +a5092482 : ld1rqw z2.s, p1/Z, [x4, #-112] : ld1rqw -0x70(%x4)[16byte] %p1/z -> %z2.s +a50a28c4 : ld1rqw z4.s, p2/Z, [x6, #-96] : ld1rqw -0x60(%x6)[16byte] %p2/z -> %z4.s +a50b2906 : ld1rqw z6.s, p2/Z, [x8, #-80] : ld1rqw -0x50(%x8)[16byte] %p2/z -> %z6.s +a50c2d48 : ld1rqw z8.s, p3/Z, [x10, #-64] : ld1rqw -0x40(%x10)[16byte] %p3/z -> %z8.s +a50d2d6a : ld1rqw z10.s, p3/Z, [x11, #-48] : ld1rqw -0x30(%x11)[16byte] %p3/z -> %z10.s +a50e31ac : ld1rqw z12.s, p4/Z, [x13, #-32] : ld1rqw -0x20(%x13)[16byte] %p4/z -> %z12.s +a50f31ee : ld1rqw z14.s, p4/Z, [x15, #-16] : ld1rqw -0x10(%x15)[16byte] %p4/z -> %z14.s +a5003630 : ld1rqw z16.s, p5/Z, [x17, #0] : ld1rqw (%x17)[16byte] %p5/z -> %z16.s +a5003671 : ld1rqw z17.s, p5/Z, [x19, #0] : ld1rqw (%x19)[16byte] %p5/z -> %z17.s +a50136b3 : ld1rqw z19.s, p5/Z, [x21, #16] : ld1rqw +0x10(%x21)[16byte] %p5/z -> %z19.s +a5023af5 : ld1rqw z21.s, p6/Z, [x23, #32] : ld1rqw +0x20(%x23)[16byte] %p6/z -> %z21.s +a5033b17 : ld1rqw z23.s, p6/Z, [x24, #48] : ld1rqw +0x30(%x24)[16byte] %p6/z -> %z23.s +a5043f59 : ld1rqw z25.s, p7/Z, [x26, #64] : ld1rqw +0x40(%x26)[16byte] %p7/z -> %z25.s +a5053f9b : ld1rqw z27.s, p7/Z, [x28, #80] : ld1rqw +0x50(%x28)[16byte] %p7/z -> %z27.s +a5073fff : ld1rqw z31.s, p7/Z, [sp, #112] : ld1rqw +0x70(%sp)[16byte] %p7/z -> %z31.s # LD1RSB { .D }, /Z, [{, #}] (LD1RSB-Z.P.BI-S64) 85c08000 : ld1rsb z0.d, p0/Z, [x0, #0] : ld1rsb (%x0)[1byte] %p0/z -> %z0.d diff --git a/suite/tests/api/ir_aarch64_sve.c b/suite/tests/api/ir_aarch64_sve.c index 1e861f486f1..94c097b329a 100644 --- a/suite/tests/api/ir_aarch64_sve.c +++ b/suite/tests/api/ir_aarch64_sve.c @@ -15368,21 +15368,141 @@ TEST_INSTR(ld1rob_sve_pred) TEST_INSTR(ld1rqb_sve_pred) { - /* Testing LD1RQB { .B }, /Z, [, ] */ const char *const expected_0_0[6] = { - "ld1rqb (%x0,%x0)[32byte] %p0/z -> %z0.b", - "ld1rqb (%x7,%x8)[32byte] %p2/z -> %z5.b", - "ld1rqb (%x12,%x13)[32byte] %p3/z -> %z10.b", - "ld1rqb (%x17,%x18)[32byte] %p5/z -> %z16.b", - "ld1rqb (%x22,%x23)[32byte] %p6/z -> %z21.b", - "ld1rqb (%sp,%x30)[32byte] %p7/z -> %z31.b", + "ld1rqb (%x0,%x0)[16byte] %p0/z -> %z0.b", + "ld1rqb (%x7,%x8)[16byte] %p2/z -> %z5.b", + "ld1rqb (%x12,%x13)[16byte] %p3/z -> %z10.b", + "ld1rqb (%x17,%x18)[16byte] %p5/z -> %z16.b", + "ld1rqb (%x22,%x23)[16byte] %p6/z -> %z21.b", + "ld1rqb (%sp,%x30)[16byte] %p7/z -> %z31.b", }; TEST_LOOP(ld1rqb, ld1rqb_sve_pred, 6, expected_0_0[i], opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), opnd_create_base_disp_aarch64(Xn_six_offset_2_sp[i], Xn_six_offset_3[i], - DR_EXTEND_UXTX, 0, 0, 0, OPSZ_32)); + DR_EXTEND_UXTX, 0, 0, 0, OPSZ_16)); + + /* Testing LD1RQB { .B }, /Z, [{, #}] */ + static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; + const char *const expected_1_0[6] = { + "ld1rqb -0x80(%x0)[16byte] %p0/z -> %z0.b", + "ld1rqb -0x30(%x7)[16byte] %p2/z -> %z5.b", + "ld1rqb (%x12)[16byte] %p3/z -> %z10.b", + "ld1rqb +0x30(%x17)[16byte] %p5/z -> %z16.b", + "ld1rqb +0x50(%x22)[16byte] %p6/z -> %z21.b", + "ld1rqb +0x70(%sp)[16byte] %p7/z -> %z31.b", + }; + TEST_LOOP(ld1rqb, ld1rqb_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_1), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], + OPSZ_16)); +} + +TEST_INSTR(ld1rqd_sve_pred) +{ + /* Testing LD1RQD { .D }, /Z, [, , LSL #3] */ + const char *const expected_0_0[6] = { + "ld1rqd (%x0,%x0,lsl #3)[16byte] %p0/z -> %z0.d", + "ld1rqd (%x7,%x8,lsl #3)[16byte] %p2/z -> %z5.d", + "ld1rqd (%x12,%x13,lsl #3)[16byte] %p3/z -> %z10.d", + "ld1rqd (%x17,%x18,lsl #3)[16byte] %p5/z -> %z16.d", + "ld1rqd (%x22,%x23,lsl #3)[16byte] %p6/z -> %z21.d", + "ld1rqd (%sp,%x30,lsl #3)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1rqd, ld1rqd_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, + true, 0, 0, OPSZ_16, 3)); + + /* Testing LD1RQD { .D }, /Z, [{, #}] */ + static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; + const char *const expected_1_0[6] = { + "ld1rqd -0x80(%x0)[16byte] %p0/z -> %z0.d", + "ld1rqd -0x30(%x7)[16byte] %p2/z -> %z5.d", + "ld1rqd (%x12)[16byte] %p3/z -> %z10.d", + "ld1rqd +0x30(%x17)[16byte] %p5/z -> %z16.d", + "ld1rqd +0x50(%x22)[16byte] %p6/z -> %z21.d", + "ld1rqd +0x70(%sp)[16byte] %p7/z -> %z31.d", + }; + TEST_LOOP(ld1rqd, ld1rqd_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_8), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], + OPSZ_16)); +} + +TEST_INSTR(ld1rqh_sve_pred) +{ + /* Testing LD1RQH { .H }, /Z, [, ] */ + const char *const expected_0_0[6] = { + "ld1rqh (%x0,%x0,lsl #1)[16byte] %p0/z -> %z0.h", + "ld1rqh (%x7,%x8,lsl #1)[16byte] %p2/z -> %z5.h", + "ld1rqh (%x12,%x13,lsl #1)[16byte] %p3/z -> %z10.h", + "ld1rqh (%x17,%x18,lsl #1)[16byte] %p5/z -> %z16.h", + "ld1rqh (%x22,%x23,lsl #1)[16byte] %p6/z -> %z21.h", + "ld1rqh (%sp,%x30,lsl #1)[16byte] %p7/z -> %z31.h", + }; + TEST_LOOP(ld1rqh, ld1rqh_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, + true, 0, 0, OPSZ_16, 1)); + + /* Testing LD1RQH { .H }, /Z, [{, #}] */ + static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; + const char *const expected_1_0[6] = { + "ld1rqh -0x80(%x0)[16byte] %p0/z -> %z0.h", + "ld1rqh -0x30(%x7)[16byte] %p2/z -> %z5.h", + "ld1rqh (%x12)[16byte] %p3/z -> %z10.h", + "ld1rqh +0x30(%x17)[16byte] %p5/z -> %z16.h", + "ld1rqh +0x50(%x22)[16byte] %p6/z -> %z21.h", + "ld1rqh +0x70(%sp)[16byte] %p7/z -> %z31.h", + }; + TEST_LOOP(ld1rqh, ld1rqh_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_2), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], + OPSZ_16)); +} + +TEST_INSTR(ld1rqw_sve_pred) +{ + /* Testing LD1RQW { .S }, /Z, [, , LSL #2] */ + const char *const expected_0_0[6] = { + "ld1rqw (%x0,%x0,lsl #2)[16byte] %p0/z -> %z0.s", + "ld1rqw (%x7,%x8,lsl #2)[16byte] %p2/z -> %z5.s", + "ld1rqw (%x12,%x13,lsl #2)[16byte] %p3/z -> %z10.s", + "ld1rqw (%x17,%x18,lsl #2)[16byte] %p5/z -> %z16.s", + "ld1rqw (%x22,%x23,lsl #2)[16byte] %p6/z -> %z21.s", + "ld1rqw (%sp,%x30,lsl #2)[16byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ld1rqw, ld1rqw_sve_pred, 6, expected_0_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp_shift_aarch64(Xn_six_offset_2_sp[i], + Xn_six_offset_3[i], DR_EXTEND_UXTX, + true, 0, 0, OPSZ_16, 2)); + + /* Testing LD1RQW { .S }, /Z, [{, #}] */ + static const int imm4_1_0[6] = { -128, -48, 0, 48, 80, 112 }; + const char *const expected_1_0[6] = { + "ld1rqw -0x80(%x0)[16byte] %p0/z -> %z0.s", + "ld1rqw -0x30(%x7)[16byte] %p2/z -> %z5.s", + "ld1rqw (%x12)[16byte] %p3/z -> %z10.s", + "ld1rqw +0x30(%x17)[16byte] %p5/z -> %z16.s", + "ld1rqw +0x50(%x22)[16byte] %p6/z -> %z21.s", + "ld1rqw +0x70(%sp)[16byte] %p7/z -> %z31.s", + }; + TEST_LOOP(ld1rqw, ld1rqw_sve_pred, 6, expected_1_0[i], + opnd_create_reg_element_vector(Zn_six_offset_0[i], OPSZ_4), + opnd_create_predicate_reg(Pn_half_six_offset_0[i], false), + opnd_create_base_disp(Xn_six_offset_2_sp[i], DR_REG_NULL, 0, imm4_1_0[i], + OPSZ_16)); } TEST_INSTR(ld1sb_sve_pred) @@ -19954,6 +20074,7 @@ TEST_INSTR(trn2_sve) opnd_create_reg_element_vector(Zn_six_offset_1[i], OPSZ_16), opnd_create_reg_element_vector(Zn_six_offset_2[i], OPSZ_16)); } + int main(int argc, char *argv[]) { @@ -20376,8 +20497,6 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(fcmla_sve_idx); RUN_INSTR_TEST(ld1b_sve_pred); - RUN_INSTR_TEST(ld1rob_sve_pred); - RUN_INSTR_TEST(ld1rqb_sve_pred); RUN_INSTR_TEST(ld1sb_sve_pred); RUN_INSTR_TEST(ldnt1b_sve_pred); RUN_INSTR_TEST(ld1h_sve_pred); @@ -20452,6 +20571,12 @@ main(int argc, char *argv[]) RUN_INSTR_TEST(ldnf1sw_sve_pred); RUN_INSTR_TEST(ldnf1w_sve_pred); + RUN_INSTR_TEST(ld1rob_sve_pred); + RUN_INSTR_TEST(ld1rqb_sve_pred); + RUN_INSTR_TEST(ld1rqd_sve_pred); + RUN_INSTR_TEST(ld1rqh_sve_pred); + RUN_INSTR_TEST(ld1rqw_sve_pred); + RUN_INSTR_TEST(trn1_sve); RUN_INSTR_TEST(trn2_sve);