mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-26 23:32:58 +00:00
Add HasAVX1Only predicate and use it for patterns that have an AVX1 instruction and an AVX2 instruction rather than relying on AddedComplexity.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@162654 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e10fa862f8
commit
3a1683f88f
@ -560,6 +560,7 @@ def HasSSE42 : Predicate<"Subtarget->hasSSE42()">;
|
||||
def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">;
|
||||
def HasAVX : Predicate<"Subtarget->hasAVX()">;
|
||||
def HasAVX2 : Predicate<"Subtarget->hasAVX2()">;
|
||||
def HasAVX1Only : Predicate<"Subtarget->hasAVX() && !Subtarget->hasAVX2()">;
|
||||
|
||||
def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
@ -414,15 +414,16 @@ def AVX2_SET0 : PDI<0xef, MRMInitReg, (outs VR256:$dst), (ins), "",
|
||||
[]>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2], AddedComplexity = 5 in {
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(v4i64 immAllZerosV), (AVX2_SET0)>;
|
||||
def : Pat<(v8i32 immAllZerosV), (AVX2_SET0)>;
|
||||
def : Pat<(v16i16 immAllZerosV), (AVX2_SET0)>;
|
||||
def : Pat<(v32i8 immAllZerosV), (AVX2_SET0)>;
|
||||
}
|
||||
|
||||
// AVX has no support for 256-bit integer instructions, but since the 128-bit
|
||||
// AVX1 has no support for 256-bit integer instructions, but since the 128-bit
|
||||
// VPXOR instruction writes zero to its upper part, it's safe build zeros.
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
|
||||
def : Pat<(bc_v32i8 (v8f32 immAllZerosV)),
|
||||
(SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>;
|
||||
@ -438,6 +439,7 @@ def : Pat<(bc_v8i32 (v8f32 immAllZerosV)),
|
||||
def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
|
||||
def : Pat<(bc_v4i64 (v8f32 immAllZerosV)),
|
||||
(SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>;
|
||||
}
|
||||
|
||||
// We set canFoldAsLoad because this can be converted to a constant-pool
|
||||
// load of an all-ones value if folding it would be beneficial.
|
||||
@ -2500,6 +2502,26 @@ let Constraints = "$src1 = $dst" in {
|
||||
SSEPackedDouble>, TB, OpSize;
|
||||
} // Constraints = "$src1 = $dst"
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX], AddedComplexity = 1 in {
|
||||
// FIXME: Instead of X86Movddup, there should be a X86Unpckl here, the
|
||||
// problem is during lowering, where it's not possible to recognize the load
|
||||
@ -4320,28 +4342,6 @@ let Constraints = "$src1 = $dst" in {
|
||||
}
|
||||
} // ExeDomain = SSEPackedInt
|
||||
|
||||
// Patterns for using AVX1 instructions with integer vectors
|
||||
// Here to give AVX2 priority
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKLPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPSYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (memopv4i64 addr:$src2)))),
|
||||
(VUNPCKHPSYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPSYrr VR256:$src1, VR256:$src2)>;
|
||||
|
||||
def : Pat<(v4i64 (X86Unpckl VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKLPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKLPDYrr VR256:$src1, VR256:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckh VR256:$src1, (memopv4i64 addr:$src2))),
|
||||
(VUNPCKHPDYrm VR256:$src1, addr:$src2)>;
|
||||
def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)),
|
||||
(VUNPCKHPDYrr VR256:$src1, VR256:$src2)>;
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
// SSE2 - Packed Integer Extract and Insert
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -7239,6 +7239,18 @@ def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
|
||||
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
@ -7256,14 +7268,6 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||
(VINSERTF128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
|
||||
def : Pat<(vinsertf128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTF128rm VR256:$src1, addr:$src2,
|
||||
@ -7319,6 +7323,9 @@ def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2f64 (VEXTRACTF128rr
|
||||
(v4f64 VR256:$src1),
|
||||
(EXTRACT_get_vextractf128_imm VR128:$ext)))>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2i64 (VEXTRACTF128rr
|
||||
(v4i64 VR256:$src1),
|
||||
@ -7451,29 +7458,29 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
|
||||
(memopv4f64 addr:$src2), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
|
||||
def : Pat<(v8f32 (X86VPerm2x128 VR256:$src1,
|
||||
(memopv8f32 addr:$src2), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1,
|
||||
(bc_v8i32 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
|
||||
(memopv4i64 addr:$src2), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1,
|
||||
(memopv4f64 addr:$src2), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1,
|
||||
(bc_v32i8 (memopv4i64 addr:$src2)), (i8 imm:$imm))),
|
||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||
@ -7660,19 +7667,22 @@ let Predicates = [HasAVX2] in {
|
||||
}
|
||||
|
||||
// AVX1 broadcast patterns
|
||||
let Predicates = [HasAVX] in {
|
||||
let Predicates = [HasAVX1Only] in {
|
||||
def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))),
|
||||
(VBROADCASTSSYrm addr:$src)>;
|
||||
def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))),
|
||||
(VBROADCASTSDYrm addr:$src)>;
|
||||
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
|
||||
(VBROADCASTSSrm addr:$src)>;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX] in {
|
||||
def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))),
|
||||
(VBROADCASTSSYrm addr:$src)>;
|
||||
def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))),
|
||||
(VBROADCASTSDYrm addr:$src)>;
|
||||
def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))),
|
||||
(VBROADCASTSSrm addr:$src)>;
|
||||
def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
|
||||
(VBROADCASTSSrm addr:$src)>;
|
||||
|
||||
// Provide fallback in case the load node that is used in the patterns above
|
||||
// is used by additional users, which prevents the pattern selection.
|
||||
@ -7752,7 +7762,6 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, v4f64>, VEX_W;
|
||||
//===----------------------------------------------------------------------===//
|
||||
// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks
|
||||
//
|
||||
let AddedComplexity = 1 in {
|
||||
def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst),
|
||||
(ins VR256:$src1, VR256:$src2, i8imm:$src3),
|
||||
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
@ -7763,9 +7772,8 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
|
||||
"vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
|
||||
[(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (memopv4i64 addr:$src2),
|
||||
(i8 imm:$src3)))]>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2], AddedComplexity = 1 in {
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
(VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||
def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||
@ -7800,7 +7808,7 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
|
||||
[]>, VEX_4V;
|
||||
}
|
||||
|
||||
let Predicates = [HasAVX2], AddedComplexity = 1 in {
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||
@ -7817,6 +7825,11 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rr VR256:$src1, VR128:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
|
||||
def : Pat<(vinsertf128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
|
||||
(i32 imm)),
|
||||
(VINSERTI128rm VR256:$src1, addr:$src2,
|
||||
(INSERT_get_vinsertf128_imm VR256:$ins))>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -7833,7 +7846,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
|
||||
(ins i128mem:$dst, VR256:$src1, i8imm:$src2),
|
||||
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX;
|
||||
|
||||
let Predicates = [HasAVX2], AddedComplexity = 1 in {
|
||||
let Predicates = [HasAVX2] in {
|
||||
def : Pat<(vextractf128_extract:$ext VR256:$src1, (i32 imm)),
|
||||
(v2i64 (VEXTRACTI128rr
|
||||
(v4i64 VR256:$src1),
|
||||
|
Loading…
x
Reference in New Issue
Block a user