Separate out the AES-NI instructions from the SSE4.2 instructions. Add

a new subtarget option for AES and check for the support.  Add "westmere"
line of processors and add AES-NI support to the core i7.

Add a couple of TODOs for information I couldn't verify.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100231 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Christopher 2010-04-02 21:54:27 +00:00
parent 7249ef0455
commit 6d1cd1cd04
6 changed files with 101 additions and 48 deletions

View File

@ -74,6 +74,8 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
"HasVectorUAMem", "true", "HasVectorUAMem", "true",
"Allow unaligned memory operands on vector/SIMD instructions">; "Allow unaligned memory operands on vector/SIMD instructions">;
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
"Enable AES instructions">;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// X86 processors supported. // X86 processors supported.
@ -101,11 +103,17 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
// "Arrandale" along with corei3 and corei5
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>; FeatureFastUAMem, FeatureAES]>;
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem]>; FeatureFastUAMem]>;
// Westmere is a similar machine to nehalem with some additional features.
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
FeatureFastUAMem, FeatureAES]>;
// Sandy Bridge does not have FMA // Sandy Bridge does not have FMA
// FIXME: Wikipedia says it does... it should have AES as well.
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6", [FeatureMMX]>;

View File

@ -311,6 +311,21 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA, : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasSSE42]>; Requires<[HasSSE42]>;
// AES Instruction Templates:
//
// AES8I
// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8
// and TA encodings.
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag>pattern>
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
Requires<[HasAES]>;
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
list<dag> pattern>
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
Requires<[HasAES]>;
// X86-64 Instruction templates... // X86-64 Instruction templates...
// //

View File

@ -330,6 +330,7 @@ def OptForSize : Predicate<"OptForSize">;
def OptForSpeed : Predicate<"!OptForSize">; def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
def HasAES : Predicate<"Subtarget->hasAES()">;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions. // X86 Instruction Format Definitions.

View File

@ -3848,53 +3848,6 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
(PCMPGTQrm VR128:$src1, addr:$src2)>; (PCMPGTQrm VR128:$src1, addr:$src2)>;
// TODO: These should be AES as a feature set.
defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc",
int_x86_aesni_aesimc>;
defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc",
int_x86_aesni_aesenc>;
defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast",
int_x86_aesni_aesenclast>;
defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec",
int_x86_aesni_aesdec>;
defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast",
int_x86_aesni_aesdeclast>;
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
(AESIMCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
(AESIMCrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
(AESENCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
(AESENCrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
(AESENCLASTrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
(AESDECrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
(AESDECrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
OpSize;
def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i32i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;
// crc intrinsic instruction // crc intrinsic instruction
// This set of instructions are only rm, the only difference is the size // This set of instructions are only rm, the only difference is the size
// of r and m. // of r and m.
@ -4056,3 +4009,73 @@ defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>; defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>; defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>; defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
//===----------------------------------------------------------------------===//
// AES-NI Instructions
//===----------------------------------------------------------------------===//
let Constraints = "$src1 = $dst" in {
multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
Intrinsic IntId128, bit Commutable = 0> {
def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
OpSize {
let isCommutable = Commutable;
}
def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
(IntId128 VR128:$src1,
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
}
}
defm AESIMC : AESI_binop_rm_int<0xDB, "aesimc",
int_x86_aesni_aesimc>;
defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
int_x86_aesni_aesenc>;
defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
int_x86_aesni_aesenclast>;
defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
int_x86_aesni_aesdec>;
defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
int_x86_aesni_aesdeclast>;
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
(AESIMCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
(AESIMCrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
(AESENCrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
(AESENCrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
(AESENCLASTrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
(AESDECrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
(AESDECrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
(AESDECLASTrm VR128:$src1, addr:$src2)>;
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, i32i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
OpSize;
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
(ins i128mem:$src1, i32i8imm:$src2),
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst,
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
imm:$src2))]>,
OpSize;

View File

@ -259,6 +259,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
HasAVX = ((ECX >> 28) & 0x1); HasAVX = ((ECX >> 28) & 0x1);
HasAES = IsIntel && ((ECX >> 25) & 0x1);
if (IsIntel || IsAMD) { if (IsIntel || IsAMD) {
// Determine if bit test memory instructions are slow. // Determine if bit test memory instructions are slow.
@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
, HasX86_64(false) , HasX86_64(false)
, HasSSE4A(false) , HasSSE4A(false)
, HasAVX(false) , HasAVX(false)
, HasAES(false)
, HasFMA3(false) , HasFMA3(false)
, HasFMA4(false) , HasFMA4(false)
, IsBTMemSlow(false) , IsBTMemSlow(false)

View File

@ -69,6 +69,9 @@ protected:
/// HasAVX - Target has AVX instructions /// HasAVX - Target has AVX instructions
bool HasAVX; bool HasAVX;
/// HasAES - Target has AES instructions
bool HasAES;
/// HasFMA3 - Target has 3-operand fused multiply-add /// HasFMA3 - Target has 3-operand fused multiply-add
bool HasFMA3; bool HasFMA3;
@ -148,6 +151,7 @@ public:
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
bool hasAVX() const { return HasAVX; } bool hasAVX() const { return HasAVX; }
bool hasAES() const { return HasAES; }
bool hasFMA3() const { return HasFMA3; } bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; } bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; } bool isBTMemSlow() const { return IsBTMemSlow; }