From 6d1cd1cd04abb1828a14980e15252985c8c728b2 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 2 Apr 2010 21:54:27 +0000 Subject: [PATCH] Separate out the AES-NI instructions from the SSE4.2 instructions. Add a new subtarget option for AES and check for the support. Add "westmere" line of processors and add AES-NI support to the core i7. Add a couple of TODOs for information I couldn't verify. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 10 ++- lib/Target/X86/X86InstrFormats.td | 15 ++++ lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSSE.td | 117 ++++++++++++++++++------------ lib/Target/X86/X86Subtarget.cpp | 2 + lib/Target/X86/X86Subtarget.h | 4 + 6 files changed, 101 insertions(+), 48 deletions(-) diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6b62795a6e1..89cc84ffb3c 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -74,6 +74,8 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true", def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem", "HasVectorUAMem", "true", "Allow unaligned memory operands on vector/SIMD instructions">; +def FeatureAES : SubtargetFeature<"aes", "HasAES", "true", + "Enable AES instructions">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -101,11 +103,17 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>; def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>; +// "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, - FeatureFastUAMem]>; + FeatureFastUAMem, FeatureAES]>; def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, FeatureFastUAMem]>; +// Westmere is a similar machine to nehalem with some additional features. +// Westmere is the corei3/i5/i7 path from nehalem to sandybridge +def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem, + FeatureFastUAMem, FeatureAES]>; // Sandy Bridge does not have FMA +// FIXME: Wikipedia says it does... it should have AES as well. def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>; def : Proc<"k6", [FeatureMMX]>; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index d25ec260491..cbe4c825629 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -311,6 +311,21 @@ class SS42AI o, Format F, dag outs, dag ins, string asm, : Ii8, TA, Requires<[HasSSE42]>; +// AES Instruction Templates: +// +// AES8I +// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8 +// and TA encodings. +class AES8I o, Format F, dag outs, dag ins, string asm, + listpattern> + : I, T8, + Requires<[HasAES]>; + +class AESAI o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, TA, + Requires<[HasAES]>; + // X86-64 Instruction templates... // diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 8fccc8a37ac..65b7ec023d8 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -330,6 +330,7 @@ def OptForSize : Predicate<"OptForSize">; def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; +def HasAES : Predicate<"Subtarget->hasAES()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index dadc2a663b5..08105c5588f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3848,53 +3848,6 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)), def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))), (PCMPGTQrm VR128:$src1, addr:$src2)>; -// TODO: These should be AES as a feature set. -defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc", - int_x86_aesni_aesimc>; -defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc", - int_x86_aesni_aesenc>; -defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast", - int_x86_aesni_aesenclast>; -defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec", - int_x86_aesni_aesdec>; -defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast", - int_x86_aesni_aesdeclast>; - -def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)), - (AESIMCrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))), - (AESIMCrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), - (AESENCrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), - (AESENCrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), - (AESENCLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), - (AESENCLASTrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), - (AESDECrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), - (AESDECrm VR128:$src1, addr:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), - (AESDECLASTrr VR128:$src1, VR128:$src2)>; -def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), - (AESDECLASTrm VR128:$src1, addr:$src2)>; - -def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, - OpSize; -def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst), - (ins i128mem:$src1, i32i8imm:$src2), - "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", - [(set VR128:$dst, - (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), - imm:$src2))]>, - OpSize; - // crc intrinsic instruction // This set of instructions are only rm, the only difference is the size // of r and m. @@ -4056,3 +4009,73 @@ defm PCMPESTRIC : SS42AI_pcmpestri; defm PCMPESTRIO : SS42AI_pcmpestri; defm PCMPESTRIS : SS42AI_pcmpestri; defm PCMPESTRIZ : SS42AI_pcmpestri; + +//===----------------------------------------------------------------------===// +// AES-NI Instructions +//===----------------------------------------------------------------------===// + +let Constraints = "$src1 = $dst" in { + multiclass AESI_binop_rm_int opc, string OpcodeStr, + Intrinsic IntId128, bit Commutable = 0> { + def rr : AES8I, + OpSize { + let isCommutable = Commutable; + } + def rm : AES8I, OpSize; + } +} + +defm AESIMC : AESI_binop_rm_int<0xDB, "aesimc", + int_x86_aesni_aesimc>; +defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", + int_x86_aesni_aesenc>; +defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", + int_x86_aesni_aesenclast>; +defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", + int_x86_aesni_aesdec>; +defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", + int_x86_aesni_aesdeclast>; + +def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)), + (AESIMCrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))), + (AESIMCrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)), + (AESENCrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))), + (AESENCrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)), + (AESENCLASTrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))), + (AESENCLASTrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)), + (AESDECrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))), + (AESDECrm VR128:$src1, addr:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)), + (AESDECLASTrr VR128:$src1, VR128:$src2)>; +def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))), + (AESDECLASTrm VR128:$src1, addr:$src2)>; + +def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), + (ins VR128:$src1, i32i8imm:$src2), + "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, + OpSize; +def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), + (ins i128mem:$src1, i32i8imm:$src2), + "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)), + imm:$src2))]>, + OpSize; + diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 8a0cde49aea..09a26858eb7 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -259,6 +259,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { HasFMA3 = IsIntel && ((ECX >> 12) & 0x1); HasAVX = ((ECX >> 28) & 0x1); + HasAES = IsIntel && ((ECX >> 25) & 0x1); if (IsIntel || IsAMD) { // Determine if bit test memory instructions are slow. @@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, , HasX86_64(false) , HasSSE4A(false) , HasAVX(false) + , HasAES(false) , HasFMA3(false) , HasFMA4(false) , IsBTMemSlow(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index bf30154625b..8a873f04df4 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -69,6 +69,9 @@ protected: /// HasAVX - Target has AVX instructions bool HasAVX; + /// HasAES - Target has AES instructions + bool HasAES; + /// HasFMA3 - Target has 3-operand fused multiply-add bool HasFMA3; @@ -148,6 +151,7 @@ public: bool has3DNow() const { return X863DNowLevel >= ThreeDNow; } bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasAVX() const { return HasAVX; } + bool hasAES() const { return HasAES; } bool hasFMA3() const { return HasFMA3; } bool hasFMA4() const { return HasFMA4; } bool isBTMemSlow() const { return IsBTMemSlow; }