From 42ba77db537274c797d74ddfa80902e58901529a Mon Sep 17 00:00:00 2001 From: Jyotsna Verma Date: Tue, 23 Apr 2013 21:17:40 +0000 Subject: [PATCH] Hexagon: Use multiclass for combine and STri[bhwd]_shl_V4 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180145 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Hexagon/HexagonInstrInfo.td | 161 +++++++++++----------- lib/Target/Hexagon/HexagonInstrInfoV4.td | 164 +++++++++-------------- test/CodeGen/Hexagon/always-ext.ll | 45 +++++++ 3 files changed, 194 insertions(+), 176 deletions(-) create mode 100644 test/CodeGen/Hexagon/always-ext.ll diff --git a/lib/Target/Hexagon/HexagonInstrInfo.td b/lib/Target/Hexagon/HexagonInstrInfo.td index 6e2637b508b..053f8c465c3 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.td +++ b/lib/Target/Hexagon/HexagonInstrInfo.td @@ -83,21 +83,30 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 8 in //===----------------------------------------------------------------------===// // ALU32/ALU (Instructions with register-register form) //===----------------------------------------------------------------------===// -multiclass ALU32_Pbase, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; + +def HexagonWrapperCombineII : + SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>; + +def HexagonWrapperCombineRR : + SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>; + +multiclass ALU32_Pbase { let isPredicatedNew = isPredNew in - def NAME : ALU32_rr<(outs IntRegs:$dst), + def NAME : ALU32_rr<(outs RC:$dst), (ins PredRegs:$src1, IntRegs:$src2, IntRegs: $src3), !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew,".new) $dst = ", ") $dst = ")#mnemonic#"($src2, $src3)", []>; } -multiclass ALU32_Pred { +multiclass ALU32_Pred { let isPredicatedFalse = PredNot in { - defm _c#NAME : ALU32_Pbase; + defm _c#NAME : ALU32_Pbase; // Predicate new - defm _cdn#NAME : ALU32_Pbase; + defm _cdn#NAME : ALU32_Pbase; } } @@ -112,8 +121,8 @@ multiclass ALU32_base { (i32 IntRegs:$src2)))]>; let neverHasSideEffects = 1, isPredicated = 1 in { - defm Pt : ALU32_Pred; - defm NotPt : ALU32_Pred; + defm Pt : ALU32_Pred; + defm NotPt : ALU32_Pred; } } } @@ -127,6 +136,37 @@ let isCommutable = 1 in { defm SUB_rr : ALU32_base<"sub", "SUB", sub>, ImmRegRel, PredNewRel; +// Combines the two integer registers SRC1 and SRC2 into a double register. +let isPredicable = 1 in +class T_Combine : ALU32_rr<(outs DoubleRegs:$dst), + (ins IntRegs:$src1, IntRegs:$src2), + "$dst = combine($src1, $src2)", + [(set (i64 DoubleRegs:$dst), + (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1), + (i32 IntRegs:$src2))))]>; + +multiclass Combine_base { + let BaseOpcode = "combine" in { + def NAME : T_Combine; + let neverHasSideEffects = 1, isPredicated = 1 in { + defm Pt : ALU32_Pred<"combine", DoubleRegs, 0>; + defm NotPt : ALU32_Pred<"combine", DoubleRegs, 1>; + } + } +} + +defm COMBINE_rr : Combine_base, PredNewRel; + +// Combines the two immediates SRC1 and SRC2 into a double register. +class COMBINE_imm : + ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2), + "$dst = combine(#$src1, #$src2)", + [(set (i64 DoubleRegs:$dst), + (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>; + +let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in +def COMBINE_Ii : COMBINE_imm; + //===----------------------------------------------------------------------===// // ALU32/ALU (ADD with register-immediate form) //===----------------------------------------------------------------------===// @@ -344,52 +384,6 @@ def TFCR : CRInst<(outs CRRegs:$dst), (ins IntRegs:$src1), // ALU32/PERM + //===----------------------------------------------------------------------===// -// Combine. - -def SDTHexagonI64I32I32 : SDTypeProfile<1, 2, - [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; - -def HexagonWrapperCombineII : - SDNode<"HexagonISD::WrapperCombineII", SDTHexagonI64I32I32>; -def HexagonWrapperCombineRR : - SDNode<"HexagonISD::WrapperCombineRR", SDTHexagonI64I32I32>; - -// Combines the two integer registers SRC1 and SRC2 into a double register. -let isPredicable = 1 in -def COMBINE_rr : ALU32_rr<(outs DoubleRegs:$dst), (ins IntRegs:$src1, - IntRegs:$src2), - "$dst = combine($src1, $src2)", - [(set (i64 DoubleRegs:$dst), - (i64 (HexagonWrapperCombineRR (i32 IntRegs:$src1), - (i32 IntRegs:$src2))))]>; - -// Rd=combine(Rt.[HL], Rs.[HL]) -class COMBINE_halves: ALU32_rr<(outs IntRegs:$dst), - (ins IntRegs:$src1, - IntRegs:$src2), - "$dst = combine($src1."# A #", $src2."# B #")", []>; - -let isPredicable = 1 in { - def COMBINE_hh : COMBINE_halves<"H", "H">; - def COMBINE_hl : COMBINE_halves<"H", "L">; - def COMBINE_lh : COMBINE_halves<"L", "H">; - def COMBINE_ll : COMBINE_halves<"L", "L">; -} - -def : Pat<(i32 (trunc (i64 (srl (i64 DoubleRegs:$a), (i32 16))))), - (COMBINE_lh (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_hireg), - (EXTRACT_SUBREG (i64 DoubleRegs:$a), subreg_loreg))>; - -// Combines the two immediates SRC1 and SRC2 into a double register. -class COMBINE_imm : - ALU32_ii<(outs DoubleRegs:$dst), (ins imm1:$src1, imm2:$src2), - "$dst = combine(#$src1, #$src2)", - [(set (i64 DoubleRegs:$dst), - (i64 (HexagonWrapperCombineII (i32 pat1:$src1), (i32 pat2:$src2))))]>; - -let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 8 in -def COMBINE_Ii : COMBINE_imm; - // Mux. def VMUX_prr64 : ALU64_rr<(outs DoubleRegs:$dst), (ins PredRegs:$src1, DoubleRegs:$src2, @@ -492,32 +486,6 @@ def : Pat <(sext_inreg (i32 IntRegs:$src1), i16), // ALU32/PRED + //===----------------------------------------------------------------------===// -// Conditional combine. -let neverHasSideEffects = 1, isPredicated = 1 in { -def COMBINE_rr_cPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1) $dst = combine($src2, $src3)", - []>; - -let isPredicatedFalse = 1 in -def COMBINE_rr_cNotPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1) $dst = combine($src2, $src3)", - []>; - -let isPredicatedNew = 1 in -def COMBINE_rr_cdnPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if ($src1.new) $dst = combine($src2, $src3)", - []>; - -let isPredicatedNew = 1, isPredicatedFalse = 1 in -def COMBINE_rr_cdnNotPt : ALU32_rr<(outs DoubleRegs:$dst), - (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3), - "if (!$src1.new) $dst = combine($src2, $src3)", - []>; -} - // Compare. defm CMPGTU : CMP32_rr_ri_u9<"cmp.gtu", "CMPGTU", setugt>, ImmRegRel; defm CMPGT : CMP32_rr_ri_s10<"cmp.gt", "CMPGT", setgt>, ImmRegRel; @@ -2162,6 +2130,13 @@ def : Pat <(brcond (not (i1 PredRegs:$src1)), bb:$offset), def : Pat <(and (i1 PredRegs:$src1), (not (i1 PredRegs:$src2))), (i1 (AND_pnotp (i1 PredRegs:$src1), (i1 PredRegs:$src2)))>; + +let AddedComplexity = 100 in +def : Pat <(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$global))), + (i64 (COMBINE_rr (TFRI 0), + (LDriub_indexed (CONST32_set tglobaladdr:$global), 0)))>, + Requires<[NoV4T]>; + // Map from i1 loads to 32 bits. This assumes that the i1* is byte aligned. let AddedComplexity = 10 in def : Pat <(i32 (zextloadi1 ADDRriS11_0:$addr)), @@ -2492,6 +2467,13 @@ def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>, Requires<[NoV4T]>; +let AddedComplexity = 100 in +def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))), + (i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1, + s11_2ExtPred:$offset)))>, + Requires<[NoV4T]>; + +let AddedComplexity = 10 in def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), (i32 (LDriw ADDRriS11_0:$src1))>; @@ -2508,6 +2490,27 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))), (i64 (SXTW (i32 (MUX_ii (i1 PredRegs:$src1), 1, 0))))>; +let AddedComplexity = 100 in +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 (i32 (add IntRegs:$src2, + s11_2ExtPred:$offset2)))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw_indexed IntRegs:$src2, + s11_2ExtPred:$offset2)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zextloadi32 ADDRriS11_2:$srcLow)))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + (LDriw ADDRriS11_2:$srcLow)))>; + +def: Pat<(i64 (or (i64 (shl (i64 DoubleRegs:$srcHigh), + (i32 32))), + (i64 (zext (i32 IntRegs:$srcLow))))), + (i64 (COMBINE_rr (EXTRACT_SUBREG (i64 DoubleRegs:$srcHigh), subreg_loreg), + IntRegs:$srcLow))>; + // Any extended 64-bit load. // anyext i32 -> i64 def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 6c3dfd90521..a77606be750 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -588,17 +588,59 @@ def : Pat<(store (i64 DoubleRegs:$src4), u2ImmPred:$src3, DoubleRegs:$src4)>; } -// memd(Ru<<#u2+#U6)=Rtt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, -validSubTargets = HasV4SubT in -def STrid_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, DoubleRegs:$src4), - "memd($src1<<#$src2+#$src3) = $src4", - [(store (i64 DoubleRegs:$src4), +let isExtended = 1, opExtendable = 2 in +class T_ST_LongOff : + STInst<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, RC:$src4), + mnemonic#"($src1<<#$src2+##$src3) = $src4", + [(stOp (VT RC:$src4), (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), u0AlwaysExtPred:$src3))]>, Requires<[HasV4T]>; +let isExtended = 1, opExtendable = 2, mayStore = 1, isNVStore = 1 in +class T_ST_LongOff_nv : + NVInst_V4<(outs), + (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), + mnemonic#"($src1<<#$src2+##$src3) = $src4.new", + []>, + Requires<[HasV4T]>; + +multiclass ST_LongOff { + let BaseOpcode = BaseOp#"_shl" in { + let isNVStorable = 1 in + def NAME#_V4 : T_ST_LongOff; + + def NAME#_nv_V4 : T_ST_LongOff_nv; + } +} + +let AddedComplexity = 10, validSubTargets = HasV4SubT in { + def STrid_shl_V4 : T_ST_LongOff<"memd", store, DoubleRegs, i64>; + defm STrib_shl : ST_LongOff <"memb", "STrib", truncstorei8>, NewValueRel; + defm STrih_shl : ST_LongOff <"memh", "Strih", truncstorei16>, NewValueRel; + defm STriw_shl : ST_LongOff <"memw", "STriw", store>, NewValueRel; +} + +let AddedComplexity = 40 in +multiclass T_ST_LOff_Pats { + def : Pat<(stOp (VT RC:$src4), + (add (shl IntRegs:$src1, u2ImmPred:$src2), + (NumUsesBelowThresCONST32 tglobaladdr:$src3))), + (I IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>; + + def : Pat<(stOp (VT RC:$src4), + (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$src3))), + (I IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>; +} + +defm : T_ST_LOff_Pats; +defm : T_ST_LOff_Pats; +defm : T_ST_LOff_Pats; +defm : T_ST_LOff_Pats; + // memd(Rx++#s4:3)=Rtt // memd(Rx++#s4:3:circ(Mu))=Rtt // memd(Rx++I:circ(Mu))=Rtt @@ -676,17 +718,6 @@ def : Pat <(truncstorei8 s8ExtPred:$src2, (i32 IntRegs:$src1)), (STrib_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, Requires<[HasV4T]>; -// memb(Ru<<#u2+#U6)=Rt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STrib_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memb($src1<<#$src2+#$src3) = $src4", - [(truncstorei8 (i32 IntRegs:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - // memb(Rx++#s4:0:circ(Mu))=Rt // memb(Rx++I:circ(Mu))=Rt // memb(Rx++Mu)=Rt @@ -707,17 +738,6 @@ def : Pat <(truncstorei16 s8ExtPred:$src2, (i32 IntRegs:$src1)), // TODO: needs to be implemented. // memh(Ru<<#u2+#U6)=Rt.H -// memh(Ru<<#u2+#U6)=Rt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STrih_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memh($src1<<#$src2+#$src3) = $src4", - [(truncstorei16 (i32 IntRegs:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - // memh(Rx++#s4:1:circ(Mu))=Rt.H // memh(Rx++#s4:1:circ(Mu))=Rt // memh(Rx++I:circ(Mu))=Rt.H @@ -754,17 +774,6 @@ def : Pat <(store s8ExtPred:$src2, (i32 IntRegs:$src1)), (STriw_imm_V4 IntRegs:$src1, 0, s8ExtPred:$src2)>, Requires<[HasV4T]>; -// memw(Ru<<#u2+#U6)=Rt -let isExtended = 1, opExtendable = 2, AddedComplexity = 10, isNVStorable = 1, -validSubTargets = HasV4SubT in -def STriw_shl_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memw($src1<<#$src2+#$src3) = $src4", - [(store (i32 IntRegs:$src4), - (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2), - u0AlwaysExtPred:$src3))]>, - Requires<[HasV4T]>; - // memw(Rx++#s4:2)=Rt // memw(Rx++#s4:2:circ(Mu))=Rt // memw(Rx++I:circ(Mu))=Rt @@ -883,15 +892,6 @@ mayStore = 1 in { defm STriw: ST_MEMri_nv<"memw", "STriw", IntRegs, 13, 8>, AddrModeRel; } -// memb(Ru<<#u2+#U6)=Nt.new -let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, -isNVStore = 1, validSubTargets = HasV4SubT in -def STrib_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memb($src1<<#$src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - //===----------------------------------------------------------------------===// // Post increment store // mem[bhwd](Rx++#s4:[0123])=Nt.new @@ -949,29 +949,11 @@ defm POST_STwri: ST_PostInc_nv <"memw", "STriw", IntRegs, s4_2Imm>, AddrModeRel; // memb(Rx++I:circ(Mu))=Nt.new // memb(Rx++Mu)=Nt.new // memb(Rx++Mu:brev)=Nt.new -// memh(Ru<<#u2+#U6)=Nt.new -let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, -isNVStore = 1, validSubTargets = HasV4SubT in -def STrih_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memh($src1<<#$src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - // memh(Rx++#s4:1:circ(Mu))=Nt.new // memh(Rx++I:circ(Mu))=Nt.new // memh(Rx++Mu)=Nt.new // memh(Rx++Mu:brev)=Nt.new -// memw(Ru<<#u2+#U6)=Nt.new -let isExtended = 1, opExtendable = 2, mayStore = 1, AddedComplexity = 10, -isNVStore = 1, validSubTargets = HasV4SubT in -def STriw_shl_nv_V4 : NVInst_V4<(outs), - (ins IntRegs:$src1, u2Imm:$src2, u0AlwaysExt:$src3, IntRegs:$src4), - "memw($src1<<#$src2+#$src3) = $src4.new", - []>, - Requires<[HasV4T]>; - // memw(Rx++#s4:2:circ(Mu))=Nt.new // memw(Rx++I:circ(Mu))=Nt.new // memw(Rx++Mu)=Nt.new @@ -3071,19 +3053,21 @@ def : Pat<(HexagonCONST32_GP tglobaladdr:$src1), // Load - Indirect with long offset: These instructions take global address // as an operand -let AddedComplexity = 10 in +let isExtended = 1, opExtendable = 3, AddedComplexity = 40, +validSubTargets = HasV4SubT in def LDrid_ind_lo_V4 : LDInst<(outs DoubleRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset), + (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), "$dst=memd($src1<<#$src2+##$offset)", [(set (i64 DoubleRegs:$dst), (load (add (shl IntRegs:$src1, u2ImmPred:$src2), (HexagonCONST32 tglobaladdr:$offset))))]>, Requires<[HasV4T]>; -let AddedComplexity = 10 in +let AddedComplexity = 40 in multiclass LD_indirect_lo { +let isExtended = 1, opExtendable = 3, validSubTargets = HasV4SubT in def _lo_V4 : LDInst<(outs IntRegs:$dst), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$offset), + (ins IntRegs:$src1, u2Imm:$src2, globaladdressExt:$offset), !strconcat("$dst = ", !strconcat(OpcStr, "($src1<<#$src2+##$offset)")), [(set IntRegs:$dst, @@ -3094,37 +3078,23 @@ multiclass LD_indirect_lo { defm LDrib_ind : LD_indirect_lo<"memb", sextloadi8>; defm LDriub_ind : LD_indirect_lo<"memub", zextloadi8>; +defm LDriub_ind_anyext : LD_indirect_lo<"memub", extloadi8>; defm LDrih_ind : LD_indirect_lo<"memh", sextloadi16>; defm LDriuh_ind : LD_indirect_lo<"memuh", zextloadi16>; +defm LDriuh_ind_anyext : LD_indirect_lo<"memuh", extloadi16>; defm LDriw_ind : LD_indirect_lo<"memw", load>; -// Store - Indirect with long offset: These instructions take global address -// as an operand -let AddedComplexity = 10 in -def STrid_ind_lo_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3, - DoubleRegs:$src4), - "memd($src1<<#$src2+#$src3) = $src4", - [(store (i64 DoubleRegs:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))]>, - Requires<[HasV4T]>; +let AddedComplexity = 40 in +def : Pat <(i32 (sextloadi8 (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), + (i32 (LDrib_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, + Requires<[HasV4T]>; -let AddedComplexity = 10 in -multiclass ST_indirect_lo { - def _lo_V4 : STInst<(outs), - (ins IntRegs:$src1, u2Imm:$src2, globaladdress:$src3, - IntRegs:$src4), - !strconcat(OpcStr, "($src1<<#$src2+##$src3) = $src4"), - [(OpNode (i32 IntRegs:$src4), - (add (shl IntRegs:$src1, u2ImmPred:$src2), - (HexagonCONST32 tglobaladdr:$src3)))]>, - Requires<[HasV4T]>; -} - -defm STrib_ind : ST_indirect_lo<"memb", truncstorei8>; -defm STrih_ind : ST_indirect_lo<"memh", truncstorei16>; -defm STriw_ind : ST_indirect_lo<"memw", store>; +let AddedComplexity = 40 in +def : Pat <(i32 (zextloadi8 (add IntRegs:$src1, + (NumUsesBelowThresCONST32 tglobaladdr:$offset)))), + (i32 (LDriub_ind_lo_V4 IntRegs:$src1, 0, tglobaladdr:$offset))>, + Requires<[HasV4T]>; let Predicates = [HasV4T], AddedComplexity = 30 in { def : Pat<(truncstorei8 (i32 IntRegs:$src1), u0AlwaysExtPred:$src2), diff --git a/test/CodeGen/Hexagon/always-ext.ll b/test/CodeGen/Hexagon/always-ext.ll new file mode 100644 index 00000000000..9c8d708ba87 --- /dev/null +++ b/test/CodeGen/Hexagon/always-ext.ll @@ -0,0 +1,45 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s + +; Check that we don't generate an invalid packet with too many instructions +; due to a store that has a must-extend operand. + +; CHECK: CuSuiteAdd.exit.us +; CHECK: { +; CHECK-NOT: call abort +; CHECK: memw(##0) +; CHECK: memw(r{{[0-9+]}}<<#2+##4) +; CHECK: } + +%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111 = type { i8*, void (%struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*)*, i32, i32, i8*, [23 x i32]* } +%struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112 = type { i32, [1024 x %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111*], i32 } + +@__func__.CuSuiteAdd = external unnamed_addr constant [11 x i8], align 8 +@.str24 = external unnamed_addr constant [140 x i8], align 8 + +declare void @_Assert() + +define void @CuSuiteAddSuite() nounwind { +entry: + br i1 undef, label %for.body.us, label %for.end + +for.body.us: ; preds = %entry + %0 = load %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** null, align 4 + %1 = load i32* undef, align 4 + %cmp.i.us = icmp slt i32 %1, 1024 + br i1 %cmp.i.us, label %CuSuiteAdd.exit.us, label %cond.false6.i.us + +cond.false6.i.us: ; preds = %for.body.us + tail call void @_Assert() nounwind + unreachable + +CuSuiteAdd.exit.us: ; preds = %for.body.us + %arrayidx.i.us = getelementptr inbounds %struct.CuSuite.2.29.32.38.41.44.53.56.68.86.112* null, i32 0, i32 1, i32 %1 + store %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111* %0, %struct.CuTest.1.28.31.37.40.43.52.55.67.85.111** %arrayidx.i.us, align 4 + call void @llvm.trap() + unreachable + +for.end: ; preds = %entry + ret void +} + +declare void @llvm.trap() noreturn nounwind