mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-27 14:34:58 +00:00
Hexagon: Add V4 combine instructions and some more Def Pats for V2.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@174331 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
0d3731478e
commit
3e1635d08c
@ -52,6 +52,8 @@ namespace llvm {
|
|||||||
WrapperCP,
|
WrapperCP,
|
||||||
WrapperCombineII,
|
WrapperCombineII,
|
||||||
WrapperCombineRR,
|
WrapperCombineRR,
|
||||||
|
WrapperCombineRI_V4,
|
||||||
|
WrapperCombineIR_V4,
|
||||||
WrapperPackhl,
|
WrapperPackhl,
|
||||||
WrapperSplatB,
|
WrapperSplatB,
|
||||||
WrapperSplatH,
|
WrapperSplatH,
|
||||||
|
@ -2825,23 +2825,42 @@ def : Pat <(i32 (zext (i1 PredRegs:$src1))),
|
|||||||
|
|
||||||
// i1 -> i64
|
// i1 -> i64
|
||||||
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
|
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>;
|
(i64 (COMBINE_rr (TFRI 0), (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// i32 -> i64
|
// i32 -> i64
|
||||||
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
|
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// i8 -> i64
|
// i8 -> i64
|
||||||
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
|
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (LDriub ADDRriS11_0:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 20 in
|
||||||
|
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
|
||||||
|
s11_0ExtPred:$offset))),
|
||||||
|
(i64 (COMBINE_rr (TFRI 0), (LDriub_indexed IntRegs:$src1,
|
||||||
|
s11_0ExtPred:$offset)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// i16 -> i64
|
// i16 -> i64
|
||||||
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
|
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (LDriuh ADDRriS11_1:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 20 in
|
||||||
|
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
|
||||||
|
s11_1ExtPred:$offset))),
|
||||||
|
(i64 (COMBINE_rr (TFRI 0), (LDriuh_indexed IntRegs:$src1,
|
||||||
|
s11_1ExtPred:$offset)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// i32 -> i64
|
// i32 -> i64
|
||||||
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
|
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
|
def: Pat <(i32 (zextloadi1 ADDRriS11_0:$src1)),
|
||||||
(i32 (LDriw ADDRriS11_0:$src1))>;
|
(i32 (LDriw ADDRriS11_0:$src1))>;
|
||||||
@ -2862,15 +2881,41 @@ def : Pat <(i64 (anyext (i1 PredRegs:$src1))),
|
|||||||
// Any extended 64-bit load.
|
// Any extended 64-bit load.
|
||||||
// anyext i32 -> i64
|
// anyext i32 -> i64
|
||||||
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
|
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (LDriw ADDRriS11_2:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
|
// When there is an offset we should prefer the pattern below over the pattern above.
|
||||||
|
// The complexity of the above is 13 (gleaned from HexagonGenDAGIsel.inc)
|
||||||
|
// So this complexity below is comfortably higher to allow for choosing the below.
|
||||||
|
// If this is not done then we generate addresses such as
|
||||||
|
// ********************************************
|
||||||
|
// r1 = add (r0, #4)
|
||||||
|
// r1 = memw(r1 + #0)
|
||||||
|
// instead of
|
||||||
|
// r1 = memw(r0 + #4)
|
||||||
|
// ********************************************
|
||||||
|
let AddedComplexity = 100 in
|
||||||
|
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
|
||||||
|
(i64 (COMBINE_rr (TFRI 0), (LDriw_indexed IntRegs:$src1,
|
||||||
|
s11_2ExtPred:$offset)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// anyext i16 -> i64.
|
// anyext i16 -> i64.
|
||||||
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
|
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (LDrih ADDRriS11_2:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 20 in
|
||||||
|
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
|
||||||
|
s11_1ExtPred:$offset))),
|
||||||
|
(i64 (COMBINE_rr (TFRI 0), (LDrih_indexed IntRegs:$src1,
|
||||||
|
s11_1ExtPred:$offset)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
|
// Map from Rdd = zxtw(Rs) -> Rdd = combine(0, Rs).
|
||||||
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
|
def : Pat<(i64 (zext (i32 IntRegs:$src1))),
|
||||||
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>;
|
(i64 (COMBINE_rr (TFRI 0), (i32 IntRegs:$src1)))>,
|
||||||
|
Requires<[NoV4T]>;
|
||||||
|
|
||||||
// Multiply 64-bit unsigned and use upper result.
|
// Multiply 64-bit unsigned and use upper result.
|
||||||
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
|
def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
|
||||||
|
@ -280,6 +280,19 @@ def COMBINE_Ir_V4 : ALU32_ir<(outs DoubleRegs:$dst),
|
|||||||
[]>,
|
[]>,
|
||||||
Requires<[HasV4T]>;
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
def HexagonWrapperCombineRI_V4 :
|
||||||
|
SDNode<"HexagonISD::WrapperCombineRI_V4", SDTHexagonI64I32I32>;
|
||||||
|
def HexagonWrapperCombineIR_V4 :
|
||||||
|
SDNode<"HexagonISD::WrapperCombineIR_V4", SDTHexagonI64I32I32>;
|
||||||
|
|
||||||
|
def : Pat <(HexagonWrapperCombineRI_V4 IntRegs:$r, s8ExtPred:$i),
|
||||||
|
(COMBINE_rI_V4 IntRegs:$r, s8ExtPred:$i)>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
def : Pat <(HexagonWrapperCombineIR_V4 s8ExtPred:$i, IntRegs:$r),
|
||||||
|
(COMBINE_Ir_V4 s8ExtPred:$i, IntRegs:$r)>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
|
let isExtendable = 1, opExtendable = 2, isExtentSigned = 0, opExtentBits = 6,
|
||||||
neverHasSideEffects = 1, validSubTargets = HasV4SubT in
|
neverHasSideEffects = 1, validSubTargets = HasV4SubT in
|
||||||
def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
|
def COMBINE_iI_V4 : ALU32_ii<(outs DoubleRegs:$dst),
|
||||||
@ -1143,6 +1156,73 @@ def : Pat <(i32 (load (add (HexagonCONST32_GP tglobaladdr:$global),
|
|||||||
u16ImmPred:$offset))),
|
u16ImmPred:$offset))),
|
||||||
(i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
|
(i32 (LDriw_GP_V4 tglobaladdr:$global, u16ImmPred:$offset))>,
|
||||||
Requires<[HasV4T]>;
|
Requires<[HasV4T]>;
|
||||||
|
// zext i1->i64
|
||||||
|
def : Pat <(i64 (zext (i1 PredRegs:$src1))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (MUX_ii (i1 PredRegs:$src1), 1, 0)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
// zext i32->i64
|
||||||
|
def : Pat <(i64 (zext (i32 IntRegs:$src1))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (i32 IntRegs:$src1)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
// zext i8->i64
|
||||||
|
def: Pat <(i64 (zextloadi8 ADDRriS11_0:$src1)),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriub ADDRriS11_0:$src1)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 20 in
|
||||||
|
def: Pat <(i64 (zextloadi8 (add (i32 IntRegs:$src1),
|
||||||
|
s11_0ExtPred:$offset))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriub_indexed IntRegs:$src1,
|
||||||
|
s11_0ExtPred:$offset)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
// zext i16->i64
|
||||||
|
def: Pat <(i64 (zextloadi16 ADDRriS11_1:$src1)),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriuh ADDRriS11_1:$src1)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 20 in
|
||||||
|
def: Pat <(i64 (zextloadi16 (add (i32 IntRegs:$src1),
|
||||||
|
s11_1ExtPred:$offset))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriuh_indexed IntRegs:$src1,
|
||||||
|
s11_1ExtPred:$offset)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
// anyext i16->i64
|
||||||
|
def: Pat <(i64 (extloadi16 ADDRriS11_2:$src1)),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDrih ADDRriS11_2:$src1)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 20 in
|
||||||
|
def: Pat <(i64 (extloadi16 (add (i32 IntRegs:$src1),
|
||||||
|
s11_1ExtPred:$offset))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDrih_indexed IntRegs:$src1,
|
||||||
|
s11_1ExtPred:$offset)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
// zext i32->i64
|
||||||
|
def: Pat <(i64 (zextloadi32 ADDRriS11_2:$src1)),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 100 in
|
||||||
|
def: Pat <(i64 (zextloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
|
||||||
|
s11_2ExtPred:$offset)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
// anyext i32->i64
|
||||||
|
def: Pat <(i64 (extloadi32 ADDRriS11_2:$src1)),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriw ADDRriS11_2:$src1)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
let AddedComplexity = 100 in
|
||||||
|
def: Pat <(i64 (extloadi32 (i32 (add IntRegs:$src1, s11_2ExtPred:$offset)))),
|
||||||
|
(i64 (COMBINE_Ir_V4 0, (LDriw_indexed IntRegs:$src1,
|
||||||
|
s11_2ExtPred:$offset)))>,
|
||||||
|
Requires<[HasV4T]>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
55
test/CodeGen/Hexagon/combine_ir.ll
Normal file
55
test/CodeGen/Hexagon/combine_ir.ll
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
||||||
|
; CHECK: word
|
||||||
|
; CHECK: combine(#0
|
||||||
|
|
||||||
|
define void @word(i32* nocapture %a) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = load i32* %a, align 4, !tbaa !0
|
||||||
|
%1 = zext i32 %0 to i64
|
||||||
|
%add.ptr = getelementptr inbounds i32* %a, i32 1
|
||||||
|
%2 = load i32* %add.ptr, align 4, !tbaa !0
|
||||||
|
%3 = zext i32 %2 to i64
|
||||||
|
%4 = shl nuw i64 %3, 32
|
||||||
|
%ins = or i64 %4, %1
|
||||||
|
tail call void @bar(i64 %ins) nounwind
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @bar(i64)
|
||||||
|
|
||||||
|
; CHECK: halfword
|
||||||
|
; CHECK: combine(#0
|
||||||
|
|
||||||
|
define void @halfword(i16* nocapture %a) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = load i16* %a, align 2, !tbaa !3
|
||||||
|
%1 = zext i16 %0 to i64
|
||||||
|
%add.ptr = getelementptr inbounds i16* %a, i32 1
|
||||||
|
%2 = load i16* %add.ptr, align 2, !tbaa !3
|
||||||
|
%3 = zext i16 %2 to i64
|
||||||
|
%4 = shl nuw nsw i64 %3, 16
|
||||||
|
%ins = or i64 %4, %1
|
||||||
|
tail call void @bar(i64 %ins) nounwind
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; CHECK: byte
|
||||||
|
; CHECK: combine(#0
|
||||||
|
|
||||||
|
define void @byte(i8* nocapture %a) nounwind {
|
||||||
|
entry:
|
||||||
|
%0 = load i8* %a, align 1, !tbaa !1
|
||||||
|
%1 = zext i8 %0 to i64
|
||||||
|
%add.ptr = getelementptr inbounds i8* %a, i32 1
|
||||||
|
%2 = load i8* %add.ptr, align 1, !tbaa !1
|
||||||
|
%3 = zext i8 %2 to i64
|
||||||
|
%4 = shl nuw nsw i64 %3, 8
|
||||||
|
%ins = or i64 %4, %1
|
||||||
|
tail call void @bar(i64 %ins) nounwind
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
!0 = metadata !{metadata !"int", metadata !1}
|
||||||
|
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||||
|
!2 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||||
|
!3 = metadata !{metadata !"short", metadata !1}
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
||||||
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine(r{{[0-9]}}, r{{[0-9]}})
|
; CHECK: r{{[0-9]}}:{{[0-9]}} = combine({{r[0-9]|#0}}, r{{[0-9]}})
|
||||||
; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
|
; CHECK: r{{[0-9]}}:{{[0-9]}} |= asl(r{{[0-9]}}:{{[0-9]}}, #32)
|
||||||
|
|
||||||
%struct.small = type { i32, i32 }
|
%struct.small = type { i32, i32 }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user