Add HasAVX1Only predicate and use it for patterns that have an AVX1 instruction and an AVX2 instruction rather than relying on AddedComplexity.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162654 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Craig Topper 2012-08-27 06:08:57 +00:00
parent e10fa862f8
commit 3a1683f88f
2 changed files with 62 additions and 48 deletions

View File

@ -560,6 +560,7 @@ def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
def HasAVX : Predicate<"Subtarget->hasAVX()">; def HasAVX : Predicate<"Subtarget->hasAVX()">;
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
def HasAES : Predicate<"Subtarget->hasAES()">; def HasAES : Predicate<"Subtarget->hasAES()">;

View File

@ -414,15 +414,16 @@ def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
[]>, VEX_4V; []>, VEX_4V;
} }
let Predicates = [HasAVX2], AddedComplexity = 5 in { let Predicates = [HasAVX2] in {
def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>; def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>; def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>; def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>; def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
} }
// AVX has no support for 256-bit integer instructions, but since the 128-bit // AVX1 has no support for 256-bit integer instructions, but since the 128-bit
// VPXOR instruction writes zero to its upper part, it's safe build zeros. // VPXOR instruction writes zero to its upper part, it's safe build zeros.
let Predicates = [HasAVX1Only] in {
def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
@ -438,6 +439,7 @@ def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
(SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
}
// We set canFoldAsLoad because this can be converted to a constant-pool // We set canFoldAsLoad because this can be converted to a constant-pool
// load of an all-ones value if folding it would be beneficial. // load of an all-ones value if folding it would be beneficial.
@ -2500,6 +2502,26 @@ let Constraints = "$src1 = $dst" in {
SSEPackedDouble>, TB, OpSize; SSEPackedDouble>, TB, OpSize;
} // Constraints = "$src1 = $dst" } // Constraints = "$src1 = $dst"
let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
}
let Predicates = [HasAVX], AddedComplexity = 1 in { let Predicates = [HasAVX], AddedComplexity = 1 in {
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the // FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
// problem is during lowering, where it's not possible to recognize the load // problem is during lowering, where it's not possible to recognize the load
@ -4320,28 +4342,6 @@ let Constraints = "$src1 = $dst" in {
} }
} // ExeDomain = SSEPackedInt } // ExeDomain = SSEPackedInt
// Patterns for using AVX1 instructions with integer vectors
// Here to give AVX2 priority
let Predicates = [HasAVX] in {
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
}
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
// SSE2 - Packed Integer Extract and Insert // SSE2 - Packed Integer Extract and Insert
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
@ -7239,6 +7239,18 @@ def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
(i32 imm)), (i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2, (VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>; (INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
}
let Predicates = [HasAVX1Only] in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(i32 imm)), (i32 imm)),
(VINSERTF128rr VR256:$src1, VR128:$src2, (VINSERTF128rr VR256:$src1, VR128:$src2,
@ -7256,14 +7268,6 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(VINSERTF128rr VR256:$src1, VR128:$src2, (VINSERTF128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>; (INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
(i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(i32 imm)), (i32 imm)),
(VINSERTF128rm VR256:$src1, addr:$src2, (VINSERTF128rm VR256:$src1, addr:$src2,
@ -7319,6 +7323,9 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v2f64 (VEXTRACTF128rr (v2f64 (VEXTRACTF128rr
(v4f64 VR256:$src1), (v4f64 VR256:$src1),
(EXTRACT_get_vextractf128_imm VR128:$ext)))>; (EXTRACT_get_vextractf128_imm VR128:$ext)))>;
}
let Predicates = [HasAVX1Only] in {
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v2i64 (VEXTRACTF128rr (v2i64 (VEXTRACTF128rr
(v4i64 VR256:$src1), (v4i64 VR256:$src1),
@ -7451,29 +7458,29 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
} }
let Predicates = [HasAVX] in { let Predicates = [HasAVX] in {
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
(memopv4f64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
}
let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
(memopv8f32 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
(bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
(memopv4i64 addr:$src2), (i8 imm:$imm))), (memopv4i64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
(memopv4f64 addr:$src2), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
(bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))), (bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
@ -7660,19 +7667,22 @@ let Predicates = [HasAVX2] in {
} }
// AVX1 broadcast patterns // AVX1 broadcast patterns
let Predicates = [HasAVX] in { let Predicates = [HasAVX1Only] in {
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>; (VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
(VBROADCASTSDYrm addr:$src)>; (VBROADCASTSDYrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
}
let Predicates = [HasAVX] in {
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSYrm addr:$src)>; (VBROADCASTSSYrm addr:$src)>;
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
(VBROADCASTSDYrm addr:$src)>; (VBROADCASTSDYrm addr:$src)>;
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
(VBROADCASTSSrm addr:$src)>; (VBROADCASTSSrm addr:$src)>;
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
(VBROADCASTSSrm addr:$src)>;
// Provide fallback in case the load node that is used in the patterns above // Provide fallback in case the load node that is used in the patterns above
// is used by additional users, which prevents the pattern selection. // is used by additional users, which prevents the pattern selection.
@ -7752,7 +7762,6 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks // VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
// //
let AddedComplexity = 1 in {
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
(ins VR256:$src1, VR256:$src2, i8imm:$src3), (ins VR256:$src1, VR256:$src2, i8imm:$src3),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
@ -7763,9 +7772,8 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2), [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
(i8 imm:$src3)))]>, VEX_4V; (i8 imm:$src3)))]>, VEX_4V;
}
let Predicates = [HasAVX2], AddedComplexity = 1 in { let Predicates = [HasAVX2] in {
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
(VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
@ -7800,7 +7808,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
[]>, VEX_4V; []>, VEX_4V;
} }
let Predicates = [HasAVX2], AddedComplexity = 1 in { let Predicates = [HasAVX2] in {
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
(i32 imm)), (i32 imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2, (VINSERTI128rr VR256:$src1, VR128:$src2,
@ -7817,6 +7825,11 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
(i32 imm)), (i32 imm)),
(VINSERTI128rr VR256:$src1, VR128:$src2, (VINSERTI128rr VR256:$src1, VR128:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>; (INSERT_get_vinsertf128_imm VR256:$ins))>;
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
(i32 imm)),
(VINSERTI128rm VR256:$src1, addr:$src2,
(INSERT_get_vinsertf128_imm VR256:$ins))>;
} }
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -7833,7 +7846,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
(ins i128mem:$dst, VR256:$src1, i8imm:$src2), (ins i128mem:$dst, VR256:$src1, i8imm:$src2),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
let Predicates = [HasAVX2], AddedComplexity = 1 in { let Predicates = [HasAVX2] in {
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)), def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
(v2i64 (VEXTRACTI128rr (v2i64 (VEXTRACTI128rr
(v4i64 VR256:$src1), (v4i64 VR256:$src1),