mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-10 02:36:06 +00:00
Separate out the AES-NI instructions from the SSE4.2 instructions. Add
a new subtarget option for AES and check for the support. Add "westmere" line of processors and add AES-NI support to the core i7. Add a couple of TODOs for information I couldn't verify. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100231 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7249ef0455
commit
6d1cd1cd04
@ -74,6 +74,8 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
|
|||||||
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
|
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
|
||||||
"HasVectorUAMem", "true",
|
"HasVectorUAMem", "true",
|
||||||
"Allow unaligned memory operands on vector/SIMD instructions">;
|
"Allow unaligned memory operands on vector/SIMD instructions">;
|
||||||
|
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
|
||||||
|
"Enable AES instructions">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 processors supported.
|
// X86 processors supported.
|
||||||
@ -101,11 +103,17 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
|||||||
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||||
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
|
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
|
||||||
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||||
|
// "Arrandale" along with corei3 and corei5
|
||||||
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||||
FeatureFastUAMem]>;
|
FeatureFastUAMem, FeatureAES]>;
|
||||||
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||||
FeatureFastUAMem]>;
|
FeatureFastUAMem]>;
|
||||||
|
// Westmere is a similar machine to nehalem with some additional features.
|
||||||
|
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
|
||||||
|
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||||
|
FeatureFastUAMem, FeatureAES]>;
|
||||||
// Sandy Bridge does not have FMA
|
// Sandy Bridge does not have FMA
|
||||||
|
// FIXME: Wikipedia says it does... it should have AES as well.
|
||||||
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
|
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
|
||||||
|
|
||||||
def : Proc<"k6", [FeatureMMX]>;
|
def : Proc<"k6", [FeatureMMX]>;
|
||||||
|
@ -311,6 +311,21 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
|||||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
|
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
|
||||||
Requires<[HasSSE42]>;
|
Requires<[HasSSE42]>;
|
||||||
|
|
||||||
|
// AES Instruction Templates:
|
||||||
|
//
|
||||||
|
// AES8I
|
||||||
|
// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8
|
||||||
|
// and TA encodings.
|
||||||
|
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
|
list<dag>pattern>
|
||||||
|
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
|
||||||
|
Requires<[HasAES]>;
|
||||||
|
|
||||||
|
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||||
|
list<dag> pattern>
|
||||||
|
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
|
||||||
|
Requires<[HasAES]>;
|
||||||
|
|
||||||
// X86-64 Instruction templates...
|
// X86-64 Instruction templates...
|
||||||
//
|
//
|
||||||
|
|
||||||
|
@ -330,6 +330,7 @@ def OptForSize : Predicate<"OptForSize">;
|
|||||||
def OptForSpeed : Predicate<"!OptForSize">;
|
def OptForSpeed : Predicate<"!OptForSize">;
|
||||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||||
|
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 Instruction Format Definitions.
|
// X86 Instruction Format Definitions.
|
||||||
|
@ -3848,53 +3848,6 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
|
|||||||
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
|
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
|
||||||
(PCMPGTQrm VR128:$src1, addr:$src2)>;
|
(PCMPGTQrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
// TODO: These should be AES as a feature set.
|
|
||||||
defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc",
|
|
||||||
int_x86_aesni_aesimc>;
|
|
||||||
defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc",
|
|
||||||
int_x86_aesni_aesenc>;
|
|
||||||
defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast",
|
|
||||||
int_x86_aesni_aesenclast>;
|
|
||||||
defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec",
|
|
||||||
int_x86_aesni_aesdec>;
|
|
||||||
defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast",
|
|
||||||
int_x86_aesni_aesdeclast>;
|
|
||||||
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
|
|
||||||
(AESIMCrr VR128:$src1, VR128:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
|
|
||||||
(AESIMCrm VR128:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
|
|
||||||
(AESENCrr VR128:$src1, VR128:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
|
|
||||||
(AESENCrm VR128:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
|
|
||||||
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
|
|
||||||
(AESENCLASTrm VR128:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
|
|
||||||
(AESDECrr VR128:$src1, VR128:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
|
|
||||||
(AESDECrm VR128:$src1, addr:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
|
|
||||||
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
|
|
||||||
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
|
|
||||||
(AESDECLASTrm VR128:$src1, addr:$src2)>;
|
|
||||||
|
|
||||||
def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst),
|
|
||||||
(ins VR128:$src1, i32i8imm:$src2),
|
|
||||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
|
||||||
[(set VR128:$dst,
|
|
||||||
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
|
|
||||||
OpSize;
|
|
||||||
def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst),
|
|
||||||
(ins i128mem:$src1, i32i8imm:$src2),
|
|
||||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
|
||||||
[(set VR128:$dst,
|
|
||||||
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
|
|
||||||
imm:$src2))]>,
|
|
||||||
OpSize;
|
|
||||||
|
|
||||||
// crc intrinsic instruction
|
// crc intrinsic instruction
|
||||||
// This set of instructions are only rm, the only difference is the size
|
// This set of instructions are only rm, the only difference is the size
|
||||||
// of r and m.
|
// of r and m.
|
||||||
@ -4056,3 +4009,73 @@ defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
|
|||||||
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
|
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
|
||||||
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
|
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
|
||||||
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
|
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
|
||||||
|
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
// AES-NI Instructions
|
||||||
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
let Constraints = "$src1 = $dst" in {
|
||||||
|
multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||||
|
Intrinsic IntId128, bit Commutable = 0> {
|
||||||
|
def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, VR128:$src2),
|
||||||
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
|
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
|
||||||
|
OpSize {
|
||||||
|
let isCommutable = Commutable;
|
||||||
|
}
|
||||||
|
def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, i128mem:$src2),
|
||||||
|
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(IntId128 VR128:$src1,
|
||||||
|
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
defm AESIMC : AESI_binop_rm_int<0xDB, "aesimc",
|
||||||
|
int_x86_aesni_aesimc>;
|
||||||
|
defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
|
||||||
|
int_x86_aesni_aesenc>;
|
||||||
|
defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
|
||||||
|
int_x86_aesni_aesenclast>;
|
||||||
|
defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
|
||||||
|
int_x86_aesni_aesdec>;
|
||||||
|
defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
|
||||||
|
int_x86_aesni_aesdeclast>;
|
||||||
|
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
|
||||||
|
(AESIMCrr VR128:$src1, VR128:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
|
||||||
|
(AESIMCrm VR128:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
|
||||||
|
(AESENCrr VR128:$src1, VR128:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
|
||||||
|
(AESENCrm VR128:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
|
||||||
|
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
|
||||||
|
(AESENCLASTrm VR128:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
|
||||||
|
(AESDECrr VR128:$src1, VR128:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
|
||||||
|
(AESDECrm VR128:$src1, addr:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
|
||||||
|
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
|
||||||
|
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
|
||||||
|
(AESDECLASTrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
|
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
|
||||||
|
(ins VR128:$src1, i32i8imm:$src2),
|
||||||
|
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
|
||||||
|
OpSize;
|
||||||
|
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
|
||||||
|
(ins i128mem:$src1, i32i8imm:$src2),
|
||||||
|
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||||
|
[(set VR128:$dst,
|
||||||
|
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
|
||||||
|
imm:$src2))]>,
|
||||||
|
OpSize;
|
||||||
|
|
||||||
|
@ -259,6 +259,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
|||||||
|
|
||||||
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
||||||
HasAVX = ((ECX >> 28) & 0x1);
|
HasAVX = ((ECX >> 28) & 0x1);
|
||||||
|
HasAES = IsIntel && ((ECX >> 25) & 0x1);
|
||||||
|
|
||||||
if (IsIntel || IsAMD) {
|
if (IsIntel || IsAMD) {
|
||||||
// Determine if bit test memory instructions are slow.
|
// Determine if bit test memory instructions are slow.
|
||||||
@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
|
|||||||
, HasX86_64(false)
|
, HasX86_64(false)
|
||||||
, HasSSE4A(false)
|
, HasSSE4A(false)
|
||||||
, HasAVX(false)
|
, HasAVX(false)
|
||||||
|
, HasAES(false)
|
||||||
, HasFMA3(false)
|
, HasFMA3(false)
|
||||||
, HasFMA4(false)
|
, HasFMA4(false)
|
||||||
, IsBTMemSlow(false)
|
, IsBTMemSlow(false)
|
||||||
|
@ -69,6 +69,9 @@ protected:
|
|||||||
/// HasAVX - Target has AVX instructions
|
/// HasAVX - Target has AVX instructions
|
||||||
bool HasAVX;
|
bool HasAVX;
|
||||||
|
|
||||||
|
/// HasAES - Target has AES instructions
|
||||||
|
bool HasAES;
|
||||||
|
|
||||||
/// HasFMA3 - Target has 3-operand fused multiply-add
|
/// HasFMA3 - Target has 3-operand fused multiply-add
|
||||||
bool HasFMA3;
|
bool HasFMA3;
|
||||||
|
|
||||||
@ -148,6 +151,7 @@ public:
|
|||||||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||||
bool hasAVX() const { return HasAVX; }
|
bool hasAVX() const { return HasAVX; }
|
||||||
|
bool hasAES() const { return HasAES; }
|
||||||
bool hasFMA3() const { return HasFMA3; }
|
bool hasFMA3() const { return HasFMA3; }
|
||||||
bool hasFMA4() const { return HasFMA4; }
|
bool hasFMA4() const { return HasFMA4; }
|
||||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||||
|
Loading…
x
Reference in New Issue
Block a user