diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td index 022a7f61365..fee83fb8110 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV4.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td @@ -3188,6 +3188,93 @@ def STriw_offset_ext_V4 : STInst<(outs), (add IntRegs:$src1, u6_2ImmPred:$src2))]>, Requires<[HasV4T]>; +def : Pat<(i64 (ctlz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTLZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + +def : Pat<(i64 (cttz (i64 DoubleRegs:$src1))), + (i64 (COMBINE_Ir_V4 (i32 0), (i32 (CTTZ64_rr DoubleRegs:$src1))))>, + Requires<[HasV4T]>; + + +// i8 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi8. +let Predicates = [HasV4T], AddedComplexity = 120 in { +def: Pat <(i64 (extloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (zextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (sextloadi8 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrib_abs_V4 tglobaladdr:$addr)))>; + +def: Pat <(i64 (extloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (zextloadi8 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriub_abs_V4 FoldGlobalAddr:$addr)))>; + +def: Pat <(i64 (sextloadi8 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrib_abs_V4 FoldGlobalAddr:$addr)))>; +} +// i16 -> i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi16. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDrih_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi16 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriuh_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi16 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDrih_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} +// i32->i64 loads +// We need a complexity of 120 here to overide preceeding handling of +// zextloadi32. +let AddedComplexity = 120 in { +def: Pat <(i64 (extloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 (NumUsesBelowThresCONST32 tglobaladdr:$addr))), + (i64 (SXTW (LDriw_abs_V4 tglobaladdr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (extloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (zextloadi32 FoldGlobalAddr:$addr)), + (i64 (COMBINE_Ir_V4 0, (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; + +def: Pat <(i64 (sextloadi32 FoldGlobalAddr:$addr)), + (i64 (SXTW (LDriw_abs_V4 FoldGlobalAddr:$addr)))>, + Requires<[HasV4T]>; +} // Indexed store double word - global address. // memw(Rs+#u6:2)=#S8 diff --git a/test/CodeGen/Hexagon/extload-combine.ll b/test/CodeGen/Hexagon/extload-combine.ll new file mode 100644 index 00000000000..b3b8bf07032 --- /dev/null +++ b/test/CodeGen/Hexagon/extload-combine.ll @@ -0,0 +1,80 @@ +; RUN: llc -march=hexagon -mcpu=hexagonv4 -O3 < %s | FileCheck %s +; Check that the combine/stxw instructions are being generated. +; In case of combine one of the operand should be 0 and another should be +; the output of absolute addressing load instruction. + +@a = external global i16 +@b = external global i16 +@c = external global i16 +@char_a = external global i8 +@char_b = external global i8 +@char_c = external global i8 +@int_a = external global i32 +@int_b = external global i32 +@int_c = external global i32 + +; Function Attrs: nounwind +define i64 @short_test1() #0 { +; CHECK: [[VAR:r[0-9]+]]{{ *}}={{ *}}memuh(## +; CHECK: combine(#0, [[VAR]]) +entry: + store i16 0, i16* @a, align 2 + %0 = load i16* @b, align 2 + %conv2 = zext i16 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @short_test2() #0 { +; CHECK: [[VAR1:r[0-9]+]]{{ *}}={{ *}}memh(## +; CHECK: sxtw([[VAR1]]) +entry: + store i16 0, i16* @a, align 2 + %0 = load i16* @c, align 2 + %conv2 = sext i16 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @char_test1() #0 { +; CHECK: [[VAR2:r[0-9]+]]{{ *}}={{ *}}memub(## +; CHECK: combine(#0, [[VAR2]]) +entry: + store i8 0, i8* @char_a, align 1 + %0 = load i8* @char_b, align 1 + %conv2 = zext i8 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @char_test2() #0 { +; CHECK: [[VAR3:r[0-9]+]]{{ *}}={{ *}}memb(## +; CHECK: sxtw([[VAR3]]) +entry: + store i8 0, i8* @char_a, align 1 + %0 = load i8* @char_c, align 1 + %conv2 = sext i8 %0 to i64 + ret i64 %conv2 +} + +; Function Attrs: nounwind +define i64 @int_test1() #0 { +; CHECK: [[VAR4:r[0-9]+]]{{ *}}={{ *}}memw(## +; CHECK: combine(#0, [[VAR4]]) +entry: + store i32 0, i32* @int_a, align 4 + %0 = load i32* @int_b, align 4 + %conv = zext i32 %0 to i64 + ret i64 %conv +} + +; Function Attrs: nounwind +define i64 @int_test2() #0 { +; CHECK: [[VAR5:r[0-9]+]]{{ *}}={{ *}}memw(## +; CHECK: sxtw([[VAR5]]) +entry: + store i32 0, i32* @int_a, align 4 + %0 = load i32* @int_c, align 4 + %conv = sext i32 %0 to i64 + ret i64 %conv +}