mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
Separate out the AES-NI instructions from the SSE4.2 instructions. Add
a new subtarget option for AES and check for the support. Add "westmere" line of processors and add AES-NI support to the core i7. Add a couple of TODOs for information I couldn't verify. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@100231 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7249ef0455
commit
6d1cd1cd04
@ -74,6 +74,8 @@ def FeatureFMA4 : SubtargetFeature<"fma4", "HasFMA4", "true",
|
||||
def FeatureVectorUAMem : SubtargetFeature<"vector-unaligned-mem",
|
||||
"HasVectorUAMem", "true",
|
||||
"Allow unaligned memory operands on vector/SIMD instructions">;
|
||||
def FeatureAES : SubtargetFeature<"aes", "HasAES", "true",
|
||||
"Enable AES instructions">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 processors supported.
|
||||
@ -101,11 +103,17 @@ def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
|
||||
def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
|
||||
// "Arrandale" along with corei3 and corei5
|
||||
def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureFastUAMem]>;
|
||||
FeatureFastUAMem, FeatureAES]>;
|
||||
def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureFastUAMem]>;
|
||||
// Westmere is a similar machine to nehalem with some additional features.
|
||||
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
|
||||
def : Proc<"westmere", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
|
||||
FeatureFastUAMem, FeatureAES]>;
|
||||
// Sandy Bridge does not have FMA
|
||||
// FIXME: Wikipedia says it does... it should have AES as well.
|
||||
def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
|
||||
|
||||
def : Proc<"k6", [FeatureMMX]>;
|
||||
|
@ -311,6 +311,21 @@ class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
|
||||
Requires<[HasSSE42]>;
|
||||
|
||||
// AES Instruction Templates:
|
||||
//
|
||||
// AES8I
|
||||
// FIXME: Verify these, they appear to use the same encoding as the SSE4.2 T8
|
||||
// and TA encodings.
|
||||
class AES8I<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag>pattern>
|
||||
: I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
|
||||
Requires<[HasAES]>;
|
||||
|
||||
class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
|
||||
list<dag> pattern>
|
||||
: Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
|
||||
Requires<[HasAES]>;
|
||||
|
||||
// X86-64 Instruction templates...
|
||||
//
|
||||
|
||||
|
@ -330,6 +330,7 @@ def OptForSize : Predicate<"OptForSize">;
|
||||
def OptForSpeed : Predicate<"!OptForSize">;
|
||||
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
|
||||
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
|
||||
def HasAES : Predicate<"Subtarget->hasAES()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// X86 Instruction Format Definitions.
|
||||
|
@ -3848,53 +3848,6 @@ def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, VR128:$src2)),
|
||||
def : Pat<(v2i64 (X86pcmpgtq VR128:$src1, (memop addr:$src2))),
|
||||
(PCMPGTQrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
// TODO: These should be AES as a feature set.
|
||||
defm AESIMC : SS42I_binop_rm_int<0xDB, "aesimc",
|
||||
int_x86_aesni_aesimc>;
|
||||
defm AESENC : SS42I_binop_rm_int<0xDC, "aesenc",
|
||||
int_x86_aesni_aesenc>;
|
||||
defm AESENCLAST : SS42I_binop_rm_int<0xDD, "aesenclast",
|
||||
int_x86_aesni_aesenclast>;
|
||||
defm AESDEC : SS42I_binop_rm_int<0xDE, "aesdec",
|
||||
int_x86_aesni_aesdec>;
|
||||
defm AESDECLAST : SS42I_binop_rm_int<0xDF, "aesdeclast",
|
||||
int_x86_aesni_aesdeclast>;
|
||||
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
|
||||
(AESIMCrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
|
||||
(AESIMCrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
|
||||
(AESENCrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
|
||||
(AESENCrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
|
||||
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
|
||||
(AESENCLASTrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
|
||||
(AESDECrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
|
||||
(AESDECrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
|
||||
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
|
||||
(AESDECLASTrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def AESKEYGENASSIST128rr : SS42AI<0xDF, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i32i8imm:$src2),
|
||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
|
||||
OpSize;
|
||||
def AESKEYGENASSIST128rm : SS42AI<0xDF, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, i32i8imm:$src2),
|
||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
|
||||
imm:$src2))]>,
|
||||
OpSize;
|
||||
|
||||
// crc intrinsic instruction
|
||||
// This set of instructions are only rm, the only difference is the size
|
||||
// of r and m.
|
||||
@ -4056,3 +4009,73 @@ defm PCMPESTRIC : SS42AI_pcmpestri<int_x86_sse42_pcmpestric128>;
|
||||
defm PCMPESTRIO : SS42AI_pcmpestri<int_x86_sse42_pcmpestrio128>;
|
||||
defm PCMPESTRIS : SS42AI_pcmpestri<int_x86_sse42_pcmpestris128>;
|
||||
defm PCMPESTRIZ : SS42AI_pcmpestri<int_x86_sse42_pcmpestriz128>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// AES-NI Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let Constraints = "$src1 = $dst" in {
|
||||
multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr,
|
||||
Intrinsic IntId128, bit Commutable = 0> {
|
||||
def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>,
|
||||
OpSize {
|
||||
let isCommutable = Commutable;
|
||||
}
|
||||
def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2),
|
||||
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
|
||||
[(set VR128:$dst,
|
||||
(IntId128 VR128:$src1,
|
||||
(bitconvert (memopv16i8 addr:$src2))))]>, OpSize;
|
||||
}
|
||||
}
|
||||
|
||||
defm AESIMC : AESI_binop_rm_int<0xDB, "aesimc",
|
||||
int_x86_aesni_aesimc>;
|
||||
defm AESENC : AESI_binop_rm_int<0xDC, "aesenc",
|
||||
int_x86_aesni_aesenc>;
|
||||
defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast",
|
||||
int_x86_aesni_aesenclast>;
|
||||
defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec",
|
||||
int_x86_aesni_aesdec>;
|
||||
defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast",
|
||||
int_x86_aesni_aesdeclast>;
|
||||
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, VR128:$src2)),
|
||||
(AESIMCrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesimc VR128:$src1, (memop addr:$src2))),
|
||||
(AESIMCrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, VR128:$src2)),
|
||||
(AESENCrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenc VR128:$src1, (memop addr:$src2))),
|
||||
(AESENCrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, VR128:$src2)),
|
||||
(AESENCLASTrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesenclast VR128:$src1, (memop addr:$src2))),
|
||||
(AESENCLASTrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, VR128:$src2)),
|
||||
(AESDECrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdec VR128:$src1, (memop addr:$src2))),
|
||||
(AESDECrm VR128:$src1, addr:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, VR128:$src2)),
|
||||
(AESDECLASTrr VR128:$src1, VR128:$src2)>;
|
||||
def : Pat<(v2i64 (int_x86_aesni_aesdeclast VR128:$src1, (memop addr:$src2))),
|
||||
(AESDECLASTrm VR128:$src1, addr:$src2)>;
|
||||
|
||||
def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i32i8imm:$src2),
|
||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>,
|
||||
OpSize;
|
||||
def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins i128mem:$src1, i32i8imm:$src2),
|
||||
"aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}",
|
||||
[(set VR128:$dst,
|
||||
(int_x86_aesni_aeskeygenassist (bitconvert (memopv2i64 addr:$src1)),
|
||||
imm:$src2))]>,
|
||||
OpSize;
|
||||
|
||||
|
@ -259,6 +259,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
|
||||
|
||||
HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
|
||||
HasAVX = ((ECX >> 28) & 0x1);
|
||||
HasAES = IsIntel && ((ECX >> 25) & 0x1);
|
||||
|
||||
if (IsIntel || IsAMD) {
|
||||
// Determine if bit test memory instructions are slow.
|
||||
@ -286,6 +287,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
|
||||
, HasX86_64(false)
|
||||
, HasSSE4A(false)
|
||||
, HasAVX(false)
|
||||
, HasAES(false)
|
||||
, HasFMA3(false)
|
||||
, HasFMA4(false)
|
||||
, IsBTMemSlow(false)
|
||||
|
@ -69,6 +69,9 @@ protected:
|
||||
/// HasAVX - Target has AVX instructions
|
||||
bool HasAVX;
|
||||
|
||||
/// HasAES - Target has AES instructions
|
||||
bool HasAES;
|
||||
|
||||
/// HasFMA3 - Target has 3-operand fused multiply-add
|
||||
bool HasFMA3;
|
||||
|
||||
@ -148,6 +151,7 @@ public:
|
||||
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
|
||||
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
|
||||
bool hasAVX() const { return HasAVX; }
|
||||
bool hasAES() const { return HasAES; }
|
||||
bool hasFMA3() const { return HasFMA3; }
|
||||
bool hasFMA4() const { return HasFMA4; }
|
||||
bool isBTMemSlow() const { return IsBTMemSlow; }
|
||||
|
Loading…
Reference in New Issue
Block a user