mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
Hexagon: Add V4 combine instructions and some more Def Pats for V2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174331 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0d3731478e
commit
3e1635d08c
@ -52,6 +52,8 @@ namespace llvm {
|
||||
WrapperCP,
|
||||
WrapperCombineII,
|
||||
WrapperCombineRR,
|
||||
WrapperCombineRI_V4,
|
||||
WrapperCombineIR_V4,
|
||||
WrapperPackhl,
|
||||
WrapperSplatB,
|
||||
WrapperSplatH,
|
||||
|
@ -2825,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
|
||||
|
||||
// i1 -> i64
|
||||
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// i32 -> i64
|
||||
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// i8 -> i64
|
||||
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
|
||||
s11_0ExtPred:$offset))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
|
||||
s11_0ExtPred:$offset)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// i16 -> i64
|
||||
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
|
||||
s11_1ExtPred:$offset))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
|
||||
s11_1ExtPred:$offset)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// i32 -> i64
|
||||
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
|
||||
(i32 (LDriw ADDRriS11_0:$src1))>;
|
||||
@ -2862,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
|
||||
// Any extended 64-bit load.
|
||||
// anyext i32 -> i64
|
||||
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// When there is an offset we should prefer the pattern below over the pattern above.
|
||||
// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
|
||||
// So this complexity below is comfortably higher to allow for choosing the below.
|
||||
// If this is not done then we generate addresses such as
|
||||
// ********************************************
|
||||
// r1 = add (r0, #4)
|
||||
// r1 = memw(r1 + #0)
|
||||
// instead of
|
||||
// r1 = memw(r0 + #4)
|
||||
// ********************************************
|
||||
let AddedComplexity = 100 in
|
||||
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
|
||||
s11_2ExtPred:$offset)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// anyext i16 -> i64.
|
||||
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
|
||||
s11_1ExtPred:$offset))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
|
||||
s11_1ExtPred:$offset)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
|
||||
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
|
||||
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
|
||||
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
|
||||
Requires<[NoV4T]>;
|
||||
|
||||
// Multiply 64-bit unsigned and use upper result.
|
||||
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
|
||||
|
@ -280,6 +280,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
|
||||
[]>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
def HexagonWrapperCombineRI_V4 :
|
||||
SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
|
||||
def HexagonWrapperCombineIR_V4 :
|
||||
SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
|
||||
|
||||
def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
|
||||
(COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
|
||||
(COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
|
||||
neverHasSideEffects = 1, validSubTargets = HasV4SubT in
|
||||
def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
|
||||
@ -1143,6 +1156,73 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
|
||||
u16ImmPred:$offset))),
|
||||
(i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
|
||||
Requires<[HasV4T]>;
|
||||
// zext i1->i64
|
||||
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
|
||||
(i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
// zext i32->i64
|
||||
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
|
||||
(i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
|
||||
Requires<[HasV4T]>;
|
||||
// zext i8->i64
|
||||
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
|
||||
s11_0ExtPred:$offset))),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
|
||||
s11_0ExtPred:$offset)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
// zext i16->i64
|
||||
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
|
||||
s11_1ExtPred:$offset))),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
|
||||
s11_1ExtPred:$offset)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
// anyext i16->i64
|
||||
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
let AddedComplexity = 20 in
|
||||
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
|
||||
s11_1ExtPred:$offset))),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
|
||||
s11_1ExtPred:$offset)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
// zext i32->i64
|
||||
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
let AddedComplexity = 100 in
|
||||
def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
|
||||
s11_2ExtPred:$offset)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
// anyext i32->i64
|
||||
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
let AddedComplexity = 100 in
|
||||
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
|
||||
(i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
|
||||
s11_2ExtPred:$offset)))>,
|
||||
Requires<[HasV4T]>;
|
||||
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
55
test/CodeGen/Hexagon/combine_ir.ll
Normal file
55
test/CodeGen/Hexagon/combine_ir.ll
Normal file
@ -0,0 +1,55 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
||||
; CHECK: word
|
||||
; CHECK: combine(#0
|
||||
|
||||
define void @word(i32* nocapture %a) nounwind {
|
||||
entry:
|
||||
%0 = load i32* %a, align 4, !tbaa !0
|
||||
%1 = zext i32 %0 to i64
|
||||
%add.ptr = getelementptr inbounds i32* %a, i32 1
|
||||
%2 = load i32* %add.ptr, align 4, !tbaa !0
|
||||
%3 = zext i32 %2 to i64
|
||||
%4 = shl nuw i64 %3, 32
|
||||
%ins = or i64 %4, %1
|
||||
tail call void @bar(i64 %ins) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @bar(i64)
|
||||
|
||||
; CHECK: halfword
|
||||
; CHECK: combine(#0
|
||||
|
||||
define void @halfword(i16* nocapture %a) nounwind {
|
||||
entry:
|
||||
%0 = load i16* %a, align 2, !tbaa !3
|
||||
%1 = zext i16 %0 to i64
|
||||
%add.ptr = getelementptr inbounds i16* %a, i32 1
|
||||
%2 = load i16* %add.ptr, align 2, !tbaa !3
|
||||
%3 = zext i16 %2 to i64
|
||||
%4 = shl nuw nsw i64 %3, 16
|
||||
%ins = or i64 %4, %1
|
||||
tail call void @bar(i64 %ins) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: byte
|
||||
; CHECK: combine(#0
|
||||
|
||||
define void @byte(i8* nocapture %a) nounwind {
|
||||
entry:
|
||||
%0 = load i8* %a, align 1, !tbaa !1
|
||||
%1 = zext i8 %0 to i64
|
||||
%add.ptr = getelementptr inbounds i8* %a, i32 1
|
||||
%2 = load i8* %add.ptr, align 1, !tbaa !1
|
||||
%3 = zext i8 %2 to i64
|
||||
%4 = shl nuw nsw i64 %3, 8
|
||||
%ins = or i64 %4, %1
|
||||
tail call void @bar(i64 %ins) nounwind
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = metadata !{metadata !"int", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||
!3 = metadata !{metadata !"short", metadata !1}
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
||||
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
|
||||
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
|
||||
; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
|
||||
|
||||
%struct.small = type { i32, i32 }
|
||||
|
Loading…
x
Reference in New Issue
Block a user