[Hexagon] Adding vector multiplies. Cleaning up tests.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227609 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Colin LeMahieu 2015-01-30 20:56:54 +00:00
parent 9dbaae6fae
commit a6c6e1ec6c
5 changed files with 404 additions and 20 deletions

View File

@ -2506,6 +2506,50 @@ def M2_vcmpy_s1_sat_r: T_M2_vmpy <"vcmpyr", 0b101, 0b110, 1, 0, 1>;
def M2_vcmpy_s0_sat_r: T_M2_vmpy <"vcmpyr", 0b001, 0b110, 0, 0, 1>;
}
// Vector dual multiply: Rdd=vdmpy(Rss,Rtt)[:<<1]:sat
let isCodeGenOnly = 0 in {
def M2_vdmpys_s1: T_M2_vmpy <"vdmpy", 0b100, 0b100, 1, 0, 1>;
def M2_vdmpys_s0: T_M2_vmpy <"vdmpy", 0b000, 0b100, 0, 0, 1>;
}
// Vector multiply even halfwords: Rdd=vmpyeh(Rss,Rtt)[:<<1]:sat
let isCodeGenOnly = 0 in {
def M2_vmpy2es_s1: T_M2_vmpy <"vmpyeh", 0b100, 0b110, 1, 0, 1>;
def M2_vmpy2es_s0: T_M2_vmpy <"vmpyeh", 0b000, 0b110, 0, 0, 1>;
}
//Rdd=vmpywoh(Rss,Rtt)[:<<1][:rnd]:sat
let isCodeGenOnly = 0 in {
def M2_mmpyh_s0: T_M2_vmpy <"vmpywoh", 0b000, 0b111, 0, 0, 1>;
def M2_mmpyh_s1: T_M2_vmpy <"vmpywoh", 0b100, 0b111, 1, 0, 1>;
def M2_mmpyh_rs0: T_M2_vmpy <"vmpywoh", 0b001, 0b111, 0, 1, 1>;
def M2_mmpyh_rs1: T_M2_vmpy <"vmpywoh", 0b101, 0b111, 1, 1, 1>;
}
//Rdd=vmpyweh(Rss,Rtt)[:<<1][:rnd]:sat
let isCodeGenOnly = 0 in {
def M2_mmpyl_s0: T_M2_vmpy <"vmpyweh", 0b000, 0b101, 0, 0, 1>;
def M2_mmpyl_s1: T_M2_vmpy <"vmpyweh", 0b100, 0b101, 1, 0, 1>;
def M2_mmpyl_rs0: T_M2_vmpy <"vmpyweh", 0b001, 0b101, 0, 1, 1>;
def M2_mmpyl_rs1: T_M2_vmpy <"vmpyweh", 0b101, 0b101, 1, 1, 1>;
}
//Rdd=vmpywouh(Rss,Rtt)[:<<1][:rnd]:sat
let isCodeGenOnly = 0 in {
def M2_mmpyuh_s0: T_M2_vmpy <"vmpywouh", 0b010, 0b111, 0, 0, 1>;
def M2_mmpyuh_s1: T_M2_vmpy <"vmpywouh", 0b110, 0b111, 1, 0, 1>;
def M2_mmpyuh_rs0: T_M2_vmpy <"vmpywouh", 0b011, 0b111, 0, 1, 1>;
def M2_mmpyuh_rs1: T_M2_vmpy <"vmpywouh", 0b111, 0b111, 1, 1, 1>;
}
//Rdd=vmpyweuh(Rss,Rtt)[:<<1][:rnd]:sat
let isCodeGenOnly = 0 in {
def M2_mmpyul_s0: T_M2_vmpy <"vmpyweuh", 0b010, 0b101, 0, 0, 1>;
def M2_mmpyul_s1: T_M2_vmpy <"vmpyweuh", 0b110, 0b101, 1, 0, 1>;
def M2_mmpyul_rs0: T_M2_vmpy <"vmpyweuh", 0b011, 0b101, 0, 1, 1>;
def M2_mmpyul_rs1: T_M2_vmpy <"vmpyweuh", 0b111, 0b101, 1, 1, 1>;
}
let hasNewValue = 1, opNewValue = 0 in
class T_MType_mpy <string mnemonic, bits<4> RegTyBits, RegisterClass RC,
bits<3> MajOp, bits<3> MinOp, bit isSat = 0, bit isRnd = 0,
@ -2547,8 +2591,11 @@ class T_MType_rr2 <string mnemonic, bits<3> MajOp, bits<3> MinOp,
bit isSat = 0, bit isRnd = 0, string op2str = "" >
: T_MType_mpy<mnemonic, 0b1101, IntRegs, MajOp, MinOp, isSat, isRnd, op2str>;
let isCodeGenOnly = 0 in
let isCodeGenOnly = 0 in {
def M2_vradduh : T_MType_dd <"vradduh", 0b000, 0b001, 0, 0>;
def M2_vdmpyrs_s0 : T_MType_dd <"vdmpy", 0b000, 0b000, 1, 1>;
def M2_vdmpyrs_s1 : T_MType_dd <"vdmpy", 0b100, 0b000, 1, 1>;
}
let CextOpcode = "mpyi", InputType = "reg", isCodeGenOnly = 0 in
def M2_mpyi : T_MType_rr1 <"mpyi", 0b000, 0b000>, ImmRegRel;
@ -2561,6 +2608,11 @@ def M2_mpyu_up : T_MType_rr1 <"mpyu", 0b010, 0b001>;
let isCodeGenOnly = 0 in
def M2_dpmpyss_rnd_s0 : T_MType_rr1 <"mpy", 0b001, 0b001, 0, 1>;
let isCodeGenOnly = 0 in {
def M2_vmpy2s_s0pack : T_MType_rr1 <"vmpyh", 0b001, 0b111, 1, 1>;
def M2_vmpy2s_s1pack : T_MType_rr1 <"vmpyh", 0b101, 0b111, 1, 1>;
}
let isCodeGenOnly = 0 in {
def M2_hmmpyh_rs1 : T_MType_rr2 <"mpy", 0b101, 0b100, 1, 1, ".h">;
def M2_hmmpyl_rs1 : T_MType_rr2 <"mpy", 0b111, 0b100, 1, 1, ".l">;
@ -2823,6 +2875,12 @@ def M2_vrcmpyr_s0c: T_XTYPE_Vect <"vrcmpyr", 0b011, 0b001, 1>;
def M2_vrcmacr_s0: T_XTYPE_Vect_acc <"vrcmpyr", 0b000, 0b001, 0>;
def M2_vrcmacr_s0c: T_XTYPE_Vect_acc <"vrcmpyr", 0b011, 0b001, 1>;
}
// Vector reduce halfwords:
// Rdd[+]=vrmpyh(Rss,Rtt)
let isCodeGenOnly = 0 in {
def M2_vrmpy_s0: T_XTYPE_Vect <"vrmpyh", 0b000, 0b010, 0>;
def M2_vrmac_s0: T_XTYPE_Vect_acc <"vrmpyh", 0b000, 0b010, 0>;
}
//===----------------------------------------------------------------------===//
// Template Class -- Vector Multipy with accumulation.
@ -2850,6 +2908,58 @@ class T_M2_vmpy_acc_sat < string opc, bits<3> MajOp, bits<3> MinOp,
let Inst{12-8} = Rtt;
}
class T_M2_vmpy_acc < string opc, bits<3> MajOp, bits<3> MinOp,
bit hasShift, bit isRnd >
: MInst <(outs DoubleRegs:$Rxx),
(ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
"$Rxx += "#opc#"($Rss, $Rtt)"#!if(hasShift,":<<1","")
#!if(isRnd,":rnd",""),
[], "$dst2 = $Rxx",M_tc_3x_SLOT23 > {
bits<5> Rxx;
bits<5> Rss;
bits<5> Rtt;
let IClass = 0b1110;
let Inst{27-24} = 0b1010;
let Inst{23-21} = MajOp;
let Inst{7-5} = MinOp;
let Inst{4-0} = Rxx;
let Inst{20-16} = Rss;
let Inst{12-8} = Rtt;
}
// Vector multiply word by signed half with accumulation
// Rxx+=vmpyw[eo]h(Rss,Rtt)[:<<1][:rnd]:sat
let isCodeGenOnly = 0 in {
def M2_mmacls_s1: T_M2_vmpy_acc_sat <"vmpyweh", 0b100, 0b101, 1, 0>;
def M2_mmacls_s0: T_M2_vmpy_acc_sat <"vmpyweh", 0b000, 0b101, 0, 0>;
def M2_mmacls_rs1: T_M2_vmpy_acc_sat <"vmpyweh", 0b101, 0b101, 1, 1>;
def M2_mmacls_rs0: T_M2_vmpy_acc_sat <"vmpyweh", 0b001, 0b101, 0, 1>;
}
let isCodeGenOnly = 0 in {
def M2_mmachs_s1: T_M2_vmpy_acc_sat <"vmpywoh", 0b100, 0b111, 1, 0>;
def M2_mmachs_s0: T_M2_vmpy_acc_sat <"vmpywoh", 0b000, 0b111, 0, 0>;
def M2_mmachs_rs1: T_M2_vmpy_acc_sat <"vmpywoh", 0b101, 0b111, 1, 1>;
def M2_mmachs_rs0: T_M2_vmpy_acc_sat <"vmpywoh", 0b001, 0b111, 0, 1>;
}
// Vector multiply even halfwords with accumulation
// Rxx+=vmpyeh(Rss,Rtt)[:<<1][:sat]
let isCodeGenOnly = 0 in {
def M2_vmac2es: T_M2_vmpy_acc <"vmpyeh", 0b001, 0b010, 0, 0>;
def M2_vmac2es_s1: T_M2_vmpy_acc_sat <"vmpyeh", 0b100, 0b110, 1, 0>;
def M2_vmac2es_s0: T_M2_vmpy_acc_sat <"vmpyeh", 0b000, 0b110, 0, 0>;
}
// Vector dual multiply with accumulation
// Rxx+=vdmpy(Rss,Rtt)[:sat]
let isCodeGenOnly = 0 in {
def M2_vdmacs_s1: T_M2_vmpy_acc_sat <"vdmpy", 0b100, 0b100, 1, 0>;
def M2_vdmacs_s0: T_M2_vmpy_acc_sat <"vdmpy", 0b000, 0b100, 0, 0>;
}
// Vector complex multiply real or imaginary with accumulation
// Rxx+=vcmpy[ir](Rss,Rtt):sat
let isCodeGenOnly = 0 in {
@ -3027,6 +3137,21 @@ def M2_cnacsc_s0 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b010, 0b111, 1, 0, 1>;
def M2_cmacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "+", 0b110, 0b110, 1, 1, 1>;
def M2_cnacsc_s1 : T_XTYPE_mpy64_acc < "cmpy", "-", 0b110, 0b111, 1, 1, 1>;
}
// Vector multiply halfwords
// Rdd=vmpyh(Rs,Rt)[:<<]:sat
//let Defs = [USR_OVF] in {
let isCodeGenOnly = 0 in {
def M2_vmpy2s_s1 : T_XTYPE_mpy64 < "vmpyh", 0b100, 0b101, 1, 1, 0>;
def M2_vmpy2s_s0 : T_XTYPE_mpy64 < "vmpyh", 0b000, 0b101, 1, 0, 0>;
}
//}
// Rxx+=vmpyh(Rs,Rt)[:<<1][:sat]
let isCodeGenOnly = 0 in {
def M2_vmac2 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
}
def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
(i64 (anyext (i32 IntRegs:$src2))))),

View File

@ -2238,29 +2238,50 @@ def : Pat <(add (mul (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
IntRegs:$src3))>;
// Vector reduce multiply word by signed half (32x16)
// Rdd=vrmpyweh(Rss,Rtt)[:<<1]
// Rdd=vrmpywoh(Rss,Rtt)[:<<1]
// Rxx+=vrmpyweh(Rss,Rtt)[:<<1]
// Rxx+=vrmpywoh(Rss,Rtt)[:<<1]
//Rdd=vrmpyweh(Rss,Rtt)[:<<1]
let isCodeGenOnly = 0 in {
def M4_vrmpyeh_s0 : T_M2_vmpy<"vrmpyweh", 0b010, 0b100, 0, 0, 0>;
def M4_vrmpyeh_s1 : T_M2_vmpy<"vrmpyweh", 0b110, 0b100, 1, 0, 0>;
}
// Multiply and use upper result
// Rd=mpy(Rs,Rt.H):<<1:sat
// Rd=mpy(Rs,Rt.L):<<1:sat
// Rd=mpy(Rs,Rt):<<1
// Rd=mpy(Rs,Rt):<<1:sat
// Rd=mpysu(Rs,Rt)
// Rx+=mpy(Rs,Rt):<<1:sat
// Rx-=mpy(Rs,Rt):<<1:sat
//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
let isCodeGenOnly = 0 in {
def M4_vrmpyoh_s0 : T_M2_vmpy<"vrmpywoh", 0b001, 0b010, 0, 0, 0>;
def M4_vrmpyoh_s1 : T_M2_vmpy<"vrmpywoh", 0b101, 0b010, 1, 0, 0>;
}
//Rdd+=vrmpyweh(Rss,Rtt)[:<<1]
let isCodeGenOnly = 0 in {
def M4_vrmpyeh_acc_s0: T_M2_vmpy_acc<"vrmpyweh", 0b001, 0b110, 0, 0>;
def M4_vrmpyeh_acc_s1: T_M2_vmpy_acc<"vrmpyweh", 0b101, 0b110, 1, 0>;
}
// Vector multiply bytes
// Rdd=vmpybsu(Rs,Rt)
// Rdd=vmpybu(Rs,Rt)
// Rxx+=vmpybsu(Rs,Rt)
// Rxx+=vmpybu(Rs,Rt)
//Rdd=vrmpywoh(Rss,Rtt)[:<<1]
let isCodeGenOnly = 0 in {
def M4_vrmpyoh_acc_s0: T_M2_vmpy_acc<"vrmpywoh", 0b011, 0b110, 0, 0>;
def M4_vrmpyoh_acc_s1: T_M2_vmpy_acc<"vrmpywoh", 0b111, 0b110, 1, 0>;
}
// Vector multiply halfwords, signed by unsigned
// Rdd=vmpyhsu(Rs,Rt)[:<<]:sat
let isCodeGenOnly = 0 in {
def M2_vmpy2su_s0 : T_XTYPE_mpy64 < "vmpyhsu", 0b000, 0b111, 1, 0, 0>;
def M2_vmpy2su_s1 : T_XTYPE_mpy64 < "vmpyhsu", 0b100, 0b111, 1, 1, 0>;
}
// Rxx+=vmpyhsu(Rs,Rt)[:<<1]:sat
let isCodeGenOnly = 0 in {
def M2_vmac2su_s0 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b011, 0b101, 1, 0, 0>;
def M2_vmac2su_s1 : T_XTYPE_mpy64_acc < "vmpyhsu", "+", 0b111, 0b101, 1, 1, 0>;
}
// Vector polynomial multiply halfwords
// Rdd=vpmpyh(Rs,Rt)
let isCodeGenOnly = 0 in
def M4_vpmpyh : T_XTYPE_mpy64 < "vpmpyh", 0b110, 0b111, 0, 0, 0>;
// Rxx^=vpmpyh(Rs,Rt)
let isCodeGenOnly = 0 in
def M4_vpmpyh_acc : T_XTYPE_mpy64_acc < "vpmpyh", "^", 0b101, 0b111, 0, 0, 0>;
// Polynomial multiply words
// Rdd=pmpyw(Rs,Rt)

View File

@ -15,9 +15,29 @@
// XTYPE/MPY
//===----------------------------------------------------------------------===//
//Rdd[+]=vrmpybsu(Rss,Rtt)
let Predicates = [HasV5T], isCodeGenOnly = 0 in {
def M5_vrmpybsu: T_XTYPE_Vect<"vrmpybsu", 0b110, 0b001, 0>;
def M5_vrmacbsu: T_XTYPE_Vect_acc<"vrmpybsu", 0b110, 0b001, 0>;
//Rdd[+]=vrmpybu(Rss,Rtt)
def M5_vrmpybuu: T_XTYPE_Vect<"vrmpybu", 0b100, 0b001, 0>;
def M5_vrmacbuu: T_XTYPE_Vect_acc<"vrmpybu", 0b100, 0b001, 0>;
def M5_vdmpybsu: T_M2_vmpy<"vdmpybsu", 0b101, 0b001, 0, 0, 1>;
def M5_vdmacbsu: T_M2_vmpy_acc_sat <"vdmpybsu", 0b001, 0b001, 0, 0>;
}
// Vector multiply bytes
// Rdd=vmpyb[s]u(Rs,Rt)
let Predicates = [HasV5T], isCodeGenOnly = 0 in {
def M5_vmpybsu: T_XTYPE_mpy64 <"vmpybsu", 0b010, 0b001, 0, 0, 0>;
def M5_vmpybuu: T_XTYPE_mpy64 <"vmpybu", 0b100, 0b001, 0, 0, 0>;
// Rxx+=vmpyb[s]u(Rs,Rt)
def M5_vmacbsu: T_XTYPE_mpy64_acc <"vmpybsu", "+", 0b110, 0b001, 0, 0, 0>;
def M5_vmacbuu: T_XTYPE_mpy64_acc <"vmpybu", "+", 0b100, 0b001, 0, 0, 0>;
// Rd=vaddhub(Rss,Rtt):sat
let hasNewValue = 1, opNewValue = 0 in
def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
@ -765,6 +785,28 @@ def S5_asrhub_rnd_sat_goodsyntax
: SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4),
"$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
// Floating point reciprocal square root approximation
let Uses = [USR], isPredicateLate = 1, isFP = 1,
hasSideEffects = 0, hasNewValue = 1, opNewValue = 0,
validSubTargets = HasV5SubT, isCodeGenOnly = 0 in
def F2_sfinvsqrta: SInst <
(outs IntRegs:$Rd, PredRegs:$Pe),
(ins IntRegs:$Rs),
"$Rd, $Pe = sfinvsqrta($Rs)" > ,
Requires<[HasV5T]> {
bits<5> Rd;
bits<2> Pe;
bits<5> Rs;
let IClass = 0b1000;
let Inst{27-21} = 0b1011111;
let Inst{20-16} = Rs;
let Inst{7} = 0b0;
let Inst{6-5} = Pe;
let Inst{4-0} = Rd;
}
// Complex multiply 32x16
let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23, isCodeGenOnly = 0 in {
def M4_cmpyi_whc : T_S3op_8<"cmpyiwh", 0b101, 1, 1, 1, 1>;

View File

@ -1,11 +1,17 @@
# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple=hexagon -disassemble < %s | FileCheck %s
# Hexagon Programmer's Reference Manual 11.10.4 XTYPE/FP
# Floating point addition
0x11 0xdf 0x15 0xeb
# CHECK: r17 = sfadd(r21, r31)
# Classify floating-point value
0x03 0xd5 0xf1 0x85
# CHECK: p3 = sfclass(r17, #21)
0xb3 0xc2 0x90 0xdc
# CHECK: p3 = dfclass(r17:16, #21)
# Compare floating-point value
0x03 0xd5 0xf1 0xc7
# CHECK: p3 = sfcmp.ge(r17, r21)
0x23 0xd5 0xf1 0xc7
@ -22,10 +28,14 @@
# CHECK: p3 = dfcmp.ge(r17:16, r21:20)
0x63 0xd4 0xf0 0xd2
# CHECK: p3 = dfcmp.uo(r17:16, r21:20)
# Convert floating-point value to other format
0x10 0xc0 0x95 0x84
# CHECK: r17:16 = convert_sf2df(r21)
0x31 0xc0 0x14 0x88
# CHECK: r17 = convert_df2sf(r21:20)
# Convert integer to floating-point value
0x50 0xc0 0xf4 0x80
# CHECK: r17:16 = convert_ud2df(r21:20)
0x70 0xc0 0xf4 0x80
@ -42,6 +52,8 @@
# CHECK: r17 = convert_uw2sf(r21)
0x11 0xc0 0x55 0x8b
# CHECK: r17 = convert_w2sf(r21)
# Convert floating-point value to integer
0x10 0xc0 0xf4 0x80
# CHECK: r17:16 = convert_df2d(r21:20)
0x30 0xc0 0xf4 0x80
@ -74,22 +86,36 @@
# CHECK: r17 = convert_sf2w(r21)
0x31 0xc0 0x95 0x8b
# CHECK: r17 = convert_sf2w(r21):chop
# Floating point extreme value assistance
0x11 0xc0 0xb5 0x8b
# CHECK: r17 = sffixupr(r21)
0x11 0xdf 0xd5 0xeb
# CHECK: r17 = sffixupn(r21, r31)
0x31 0xdf 0xd5 0xeb
# CHECK: r17 = sffixupd(r21, r31)
# Floating point fused multiply-add
0x91 0xdf 0x15 0xef
# CHECK: r17 += sfmpy(r21, r31)
0xb1 0xdf 0x15 0xef
# CHECK: r17 -= sfmpy(r21, r31)
# Floating point fused multiply-add with scaling
0xf1 0xdf 0x75 0xef
# CHECK: r17 += sfmpy(r21, r31, p3):scale
# Floating point reciprocal square root approximation
0x71 0xc0 0xf5 0x8b
# CHECK: r17, p3 = sfinvsqrta(r21)
# Floating point fused multiply-add for library routines
0xd1 0xdf 0x15 0xef
# CHECK: r17 += sfmpy(r21, r31):lib
0xf1 0xdf 0x15 0xef
# CHECK: r17 -= sfmpy(r21, r31):lib
# Create floating-point constant
0xb1 0xc2 0x00 0xd6
# CHECK: r17 = sfmake(#21):pos
0xb1 0xc2 0x40 0xd6
@ -98,13 +124,23 @@
# CHECK: r17:16 = dfmake(#21):pos
0xb0 0xc2 0x40 0xd9
# CHECK: r17:16 = dfmake(#21):neg
# Floating point maximum
0x11 0xdf 0x95 0xeb
# CHECK: r17 = sfmax(r21, r31)
# Floating point minimum
0x31 0xdf 0x95 0xeb
# CHECK: r17 = sfmin(r21, r31)
# Floating point multiply
0x11 0xdf 0x55 0xeb
# CHECK: r17 = sfmpy(r21, r31)
# Floating point reciprocal approximation
0xf1 0xdf 0xf5 0xeb
# CHECK: r17, p3 = sfrecipa(r21, r31)
# Floating point subtraction
0x31 0xdf 0x15 0xeb
# CHECK: r17 = sfsub(r21, r31)

View File

@ -1,5 +1,7 @@
# RUN: llvm-mc --triple hexagon -disassemble < %s | FileCheck %s
# RUN: llvm-mc -triple=hexagon -disassemble < %s | FileCheck %s
# Hexagon Programmer's Reference Manual 11.10.5 XTYPE/MPY
# Multiply and use lower result
0xb1 0xdf 0x35 0xd7
# CHECK: r17 = add(#21, mpyi(r21, r31))
0xbf 0xd1 0x35 0xd8
@ -22,6 +24,44 @@
# CHECK: r17 = mpyi(r21, r31)
0x11 0xdf 0x15 0xef
# CHECK: r17 += mpyi(r21, r31)
# Vector multiply word by signed half (32x16)
0xb0 0xde 0x14 0xe8
# CHECK: r17:16 = vmpyweh(r21:20, r31:30):sat
0xb0 0xde 0x94 0xe8
# CHECK: r17:16 = vmpyweh(r21:20, r31:30):<<1:sat
0xf0 0xde 0x14 0xe8
# CHECK: r17:16 = vmpywoh(r21:20, r31:30):sat
0xf0 0xde 0x94 0xe8
# CHECK: r17:16 = vmpywoh(r21:20, r31:30):<<1:sat
0xb0 0xde 0x34 0xe8
# CHECK: r17:16 = vmpyweh(r21:20, r31:30):rnd:sat
0xb0 0xde 0xb4 0xe8
# CHECK: r17:16 = vmpyweh(r21:20, r31:30):<<1:rnd:sat
0xf0 0xde 0x34 0xe8
# CHECK: r17:16 = vmpywoh(r21:20, r31:30):rnd:sat
0xf0 0xde 0xb4 0xe8
# CHECK: r17:16 = vmpywoh(r21:20, r31:30):<<1:rnd:sat
# Vector multiply word by unsigned half (32x16)
0xb0 0xde 0x54 0xe8
# CHECK: r17:16 = vmpyweuh(r21:20, r31:30):sat
0xb0 0xde 0xd4 0xe8
# CHECK: r17:16 = vmpyweuh(r21:20, r31:30):<<1:sat
0xf0 0xde 0x54 0xe8
# CHECK: r17:16 = vmpywouh(r21:20, r31:30):sat
0xf0 0xde 0xd4 0xe8
# CHECK: r17:16 = vmpywouh(r21:20, r31:30):<<1:sat
0xb0 0xde 0x74 0xe8
# CHECK: r17:16 = vmpyweuh(r21:20, r31:30):rnd:sat
0xb0 0xde 0xf4 0xe8
# CHECK: r17:16 = vmpyweuh(r21:20, r31:30):<<1:rnd:sat
0xf0 0xde 0x74 0xe8
# CHECK: r17:16 = vmpywouh(r21:20, r31:30):rnd:sat
0xf0 0xde 0xf4 0xe8
# CHECK: r17:16 = vmpywouh(r21:20, r31:30):<<1:rnd:sat
# Multiply signed halfwords
0x10 0xdf 0x95 0xe4
# CHECK: r17:16 = mpy(r21.l, r31.l):<<1
0x30 0xdf 0x95 0xe4
@ -118,6 +158,8 @@
# CHECK: r17 -= mpy(r21.h, r31.l):<<1:sat
0xf1 0xdf 0xb5 0xee
# CHECK: r17 -= mpy(r21.h, r31.h):<<1:sat
# Multiply unsigned halfwords
0x10 0xdf 0xd5 0xe4
# CHECK: r17:16 = mpyu(r21.l, r31.l):<<1
0x30 0xdf 0xd5 0xe4
@ -166,10 +208,32 @@
# CHECK: r17 -= mpyu(r21.h, r31.l):<<1
0x71 0xdf 0xf5 0xee
# CHECK: r17 -= mpyu(r21.h, r31.h):<<1
# Polynomial multiply words
0xf0 0xdf 0x55 0xe5
# CHECK: r17:16 = pmpyw(r21, r31)
0xf0 0xdf 0x35 0xe7
# CHECK: r17:16 ^= pmpyw(r21, r31)
# Vector reduce multiply word by signed half (32x16)
0x50 0xde 0x34 0xe8
# CHECK: r17:16 = vrmpywoh(r21:20, r31:30)
0x50 0xde 0xb4 0xe8
# CHECK: r17:16 = vrmpywoh(r21:20, r31:30):<<1
0x90 0xde 0x54 0xe8
# CHECK: r17:16 = vrmpyweh(r21:20, r31:30)
0x90 0xde 0xd4 0xe8
# CHECK: r17:16 = vrmpyweh(r21:20, r31:30):<<1
0xd0 0xde 0x74 0xea
# CHECK: r17:16 += vrmpywoh(r21:20, r31:30)
0xd0 0xde 0xf4 0xea
# CHECK: r17:16 += vrmpywoh(r21:20, r31:30):<<1
0xd0 0xde 0x34 0xea
# CHECK: r17:16 += vrmpyweh(r21:20, r31:30)
0xd0 0xde 0xb4 0xea
# CHECK: r17:16 += vrmpyweh(r21:20, r31:30):<<1
# Multiply and use upper result
0x31 0xdf 0x15 0xed
# CHECK: r17 = mpy(r21, r31)
0x31 0xdf 0x35 0xed
@ -194,6 +258,8 @@
# CHECK: r17 += mpy(r21, r31):<<1:sat
0x31 0xdf 0x75 0xef
# CHECK: r17 -= mpy(r21, r31):<<1:sat
# Multiply and use full result
0x10 0xdf 0x15 0xe5
# CHECK: r17:16 = mpy(r21, r31)
0x10 0xdf 0x55 0xe5
@ -206,3 +272,97 @@
# CHECK: r17:16 += mpyu(r21, r31)
0x10 0xdf 0x75 0xe7
# CHECK: r17:16 -= mpyu(r21, r31)
# Vector dual multiply
0x90 0xde 0x14 0xe8
# CHECK: r17:16 = vdmpy(r21:20, r31:30):sat
0x90 0xde 0x94 0xe8
# CHECK: r17:16 = vdmpy(r21:20, r31:30):<<1:sat
0x90 0xde 0x14 0xea
# CHECK: r17:16 += vdmpy(r21:20, r31:30):sat
0x90 0xde 0x94 0xea
# CHECK: r17:16 += vdmpy(r21:20, r31:30):<<1:sat
# Vector dual multiply with round and pack
0x11 0xde 0x14 0xe9
# CHECK: r17 = vdmpy(r21:20, r31:30):rnd:sat
0x11 0xde 0x94 0xe9
# CHECK: r17 = vdmpy(r21:20, r31:30):<<1:rnd:sat
# Vector reduce multiply bytes
0x30 0xde 0x94 0xe8
# CHECK: r17:16 = vrmpybu(r21:20, r31:30)
0x30 0xde 0xd4 0xe8
# CHECK: r17:16 = vrmpybsu(r21:20, r31:30)
0x30 0xde 0x94 0xea
# CHECK: r17:16 += vrmpybu(r21:20, r31:30)
0x30 0xde 0xd4 0xea
# CHECK: r17:16 += vrmpybsu(r21:20, r31:30)
# Vector dual multiply signed by unsigned bytes
0x30 0xde 0xb4 0xe8
# CHECK: r17:16 = vdmpybsu(r21:20, r31:30):sat
0x30 0xde 0x34 0xea
# CHECK: r17:16 += vdmpybsu(r21:20, r31:30):sat
# Vector multiply even haldwords
0xd0 0xde 0x14 0xe8
# CHECK: r17:16 = vmpyeh(r21:20, r31:30):sat
0xd0 0xde 0x94 0xe8
# CHECK: r17:16 = vmpyeh(r21:20, r31:30):<<1:sat
0x50 0xde 0x34 0xea
# CHECK: r17:16 += vmpyeh(r21:20, r31:30)
0xd0 0xde 0x14 0xea
# CHECK: r17:16 += vmpyeh(r21:20, r31:30):sat
0xd0 0xde 0x94 0xea
# CHECK: r17:16 += vmpyeh(r21:20, r31:30):<<1:sat
# Vector multiply halfwords
0xb0 0xdf 0x15 0xe5
# CHECK: r17:16 = vmpyh(r21, r31):sat
0xb0 0xdf 0x95 0xe5
# CHECK: r17:16 = vmpyh(r21, r31):<<1:sat
0x30 0xdf 0x35 0xe7
# CHECK: r17:16 += vmpyh(r21, r31)
0xb0 0xdf 0x15 0xe7
# CHECK: r17:16 += vmpyh(r21, r31):sat
0xb0 0xdf 0x95 0xe7
# CHECK: r17:16 += vmpyh(r21, r31):<<1:sat
# Vector multiply halfwords with round and pack
0xf1 0xdf 0x35 0xed
# CHECK: r17 = vmpyh(r21, r31):rnd:sat
0xf1 0xdf 0xb5 0xed
# CHECK: r17 = vmpyh(r21, r31):<<1:rnd:sat
# Vector multiply halfwords signed by unsigned
0xf0 0xdf 0x15 0xe5
# CHECK: r17:16 = vmpyhsu(r21, r31):sat
0xf0 0xdf 0x95 0xe5
# CHECK: r17:16 = vmpyhsu(r21, r31):<<1:sat
0xb0 0xdf 0x75 0xe7
# CHECK: r17:16 += vmpyhsu(r21, r31):sat
0xb0 0xdf 0xf5 0xe7
# CHECK: r17:16 += vmpyhsu(r21, r31):<<1:sat
# Vector reduce multiply halfwords
0x50 0xde 0x14 0xe8
# CHECK: r17:16 = vrmpyh(r21:20, r31:30)
0x50 0xde 0x14 0xea
# CHECK: r17:16 += vrmpyh(r21:20, r31:30)
# Vector multiply bytes
0x30 0xdf 0x55 0xe5
# CHECK: r17:16 = vmpybsu(r21, r31)
0x30 0xdf 0x95 0xe5
# CHECK: r17:16 = vmpybu(r21, r31)
0x30 0xdf 0x95 0xe7
# CHECK: r17:16 += vmpybu(r21, r31)
0x30 0xdf 0xd5 0xe7
# CHECK: r17:16 += vmpybsu(r21, r31)
# Vector polynomial multiply halfwords
0xf0 0xdf 0xd5 0xe5
# CHECK: r17:16 = vpmpyh(r21, r31)
0xf0 0xdf 0xb5 0xe7
# CHECK: r17:16 ^= vpmpyh(r21, r31)