Hexagon: Add V4 combine instructions and some more Def Pats for V2.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174331 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jyotsna Verma 2013-02-04 15:52:56 +00:00
parent 0d3731478e
commit 3e1635d08c
5 changed files with 191 additions and 9 deletions

View File

@ -52,6 +52,8 @@ namespace llvm {
WrapperCP, WrapperCP,
WrapperCombineII, WrapperCombineII,
WrapperCombineRR, WrapperCombineRR,
WrapperCombineRI_V4,
WrapperCombineIR_V4,
WrapperPackhl, WrapperPackhl,
WrapperSplatB, WrapperSplatB,
WrapperSplatH, WrapperSplatH,

View File

@ -2825,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
// i1 -> i64 // i1 -> i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))), def : Pat <(i64 (zext (i1 PredRegs:$src1))),
(i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>; (i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
Requires<[NoV4T]>;
// i32 -> i64 // i32 -> i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))), def : Pat <(i64 (zext (i32 IntRegs:$src1))),
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
Requires<[NoV4T]>;
// i8 -> i64 // i8 -> i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)), def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i16 -> i64 // i16 -> i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)), def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[NoV4T]>;
// i32 -> i64 // i32 -> i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)), def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)), def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
(i32 (LDriw ADDRriS11_0:$src1))>; (i32 (LDriw ADDRriS11_0:$src1))>;
@ -2862,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
// Any extended 64-bit load. // Any extended 64-bit load.
// anyext i32 -> i64 // anyext i32 -> i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)), def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
// When there is an offset we should prefer the pattern below over the pattern above.
// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
// So this complexity below is comfortably higher to allow for choosing the below.
// If this is not done then we generate addresses such as
// ********************************************
// r1 = add (r0, #4)
// r1 = memw(r1 + #0)
// instead of
// r1 = memw(r0 + #4)
// ********************************************
let AddedComplexity = 100 in
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
(i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[NoV4T]>;
// anyext i16 -> i64. // anyext i16 -> i64.
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)), def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
Requires<[NoV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
(i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[NoV4T]>;
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs). // Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
def : Pat<(i64 (zext (i32 IntRegs:$src1))), def : Pat<(i64 (zext (i32 IntRegs:$src1))),
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>; (i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
Requires<[NoV4T]>;
// Multiply 64-bit unsigned and use upper result. // Multiply 64-bit unsigned and use upper result.
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)), def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),

View File

@ -280,6 +280,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
[]>, []>,
Requires<[HasV4T]>; Requires<[HasV4T]>;
def HexagonWrapperCombineRI_V4 :
SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
def HexagonWrapperCombineIR_V4 :
SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
(COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
Requires<[HasV4T]>;
def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
(COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
Requires<[HasV4T]>;
let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6, let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
neverHasSideEffects = 1, validSubTargets = HasV4SubT in neverHasSideEffects = 1, validSubTargets = HasV4SubT in
def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst), def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
@ -1143,6 +1156,73 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
u16ImmPred:$offset))), u16ImmPred:$offset))),
(i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>, (i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
Requires<[HasV4T]>; Requires<[HasV4T]>;
// zext i1->i64
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
(i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
Requires<[HasV4T]>;
// zext i32->i64
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
(i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
Requires<[HasV4T]>;
// zext i8->i64
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
(i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
s11_0ExtPred:$offset))),
(i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
s11_0ExtPred:$offset)))>,
Requires<[HasV4T]>;
// zext i16->i64
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
(i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
(i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[HasV4T]>;
// anyext i16->i64
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
(i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
Requires<[HasV4T]>;
let AddedComplexity = 20 in
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
s11_1ExtPred:$offset))),
(i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
s11_1ExtPred:$offset)))>,
Requires<[HasV4T]>;
// zext i32->i64
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
Requires<[HasV4T]>;
let AddedComplexity = 100 in
def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
(i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[HasV4T]>;
// anyext i32->i64
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
(i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
Requires<[HasV4T]>;
let AddedComplexity = 100 in
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
(i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
s11_2ExtPred:$offset)))>,
Requires<[HasV4T]>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//

View File

@ -0,0 +1,55 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: word
; CHECK: combine(#0
define void @word(i32* nocapture %a) nounwind {
entry:
%0 = load i32* %a, align 4, !tbaa !0
%1 = zext i32 %0 to i64
%add.ptr = getelementptr inbounds i32* %a, i32 1
%2 = load i32* %add.ptr, align 4, !tbaa !0
%3 = zext i32 %2 to i64
%4 = shl nuw i64 %3, 32
%ins = or i64 %4, %1
tail call void @bar(i64 %ins) nounwind
ret void
}
declare void @bar(i64)
; CHECK: halfword
; CHECK: combine(#0
define void @halfword(i16* nocapture %a) nounwind {
entry:
%0 = load i16* %a, align 2, !tbaa !3
%1 = zext i16 %0 to i64
%add.ptr = getelementptr inbounds i16* %a, i32 1
%2 = load i16* %add.ptr, align 2, !tbaa !3
%3 = zext i16 %2 to i64
%4 = shl nuw nsw i64 %3, 16
%ins = or i64 %4, %1
tail call void @bar(i64 %ins) nounwind
ret void
}
; CHECK: byte
; CHECK: combine(#0
define void @byte(i8* nocapture %a) nounwind {
entry:
%0 = load i8* %a, align 1, !tbaa !1
%1 = zext i8 %0 to i64
%add.ptr = getelementptr inbounds i8* %a, i32 1
%2 = load i8* %add.ptr, align 1, !tbaa !1
%3 = zext i8 %2 to i64
%4 = shl nuw nsw i64 %3, 8
%ins = or i64 %4, %1
tail call void @bar(i64 %ins) nounwind
ret void
}
!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
!3 = metadata !{metadata !"short", metadata !1}

View File

@ -1,5 +1,5 @@
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s ; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}}) ; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32) ; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
%struct.small = type { i32, i32 } %struct.small = type { i32, i32 }