Implement XMM subregs.

Extracting the low element of a vector is now done with EXTRACT_SUBREG,
and the zero-extension performed by load movss is now modeled with
SUBREG_TO_REG, and so on.

Register-to-register movss and movsd are no longer considered copies;
they are two-address instructions which insert a scalar into a vector.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@97354 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2010-02-28 00:17:42 +00:00
parent a363a9b71a
commit 874cadaf21
5 changed files with 168 additions and 163 deletions

View File

@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
{ X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 },
{ X86::MOVSDrr, X86::MOVSDmr, 0, 0 },
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
{ X86::MOVSSrr, X86::MOVSSmr, 0, 0 },
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
{ X86::MUL16r, X86::MUL16m, 1, 0 },
@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
{ X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 },
{ X86::MOVSDrr, X86::MOVSDrm, 0 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
{ X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 },
{ X86::MOVSSrr, X86::MOVSSrm, 0 },
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
{ X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
{ X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
case X86::MOV16rr:
case X86::MOV32rr:
case X86::MOV64rr:
case X86::MOVSSrr:
case X86::MOVSDrr:
// FP Stack register class copies
case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
case X86::MOV_Fp3264: case X86::MOV_Fp3280:
case X86::MOV_Fp6432: case X86::MOV_Fp8032:
// Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
// copies are done with FsMOVAPSrr and FsMOVAPDrr.
case X86::FsMOVAPSrr:
case X86::FsMOVAPDrr:
case X86::MOVAPSrr:
case X86::MOVAPDrr:
case X86::MOVDQArr:
case X86::MOVSS2PSrr:
case X86::MOVSD2PDrr:
case X86::MOVPS2SSrr:
case X86::MOVPD2SDrr:
case X86::MMX_MOVQ64rr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() &&

View File

@ -370,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
// SSE1 Instructions
//===----------------------------------------------------------------------===//
// Move Instructions
let neverHasSideEffects = 1 in
def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
"movss\t{$src, $dst|$dst, $src}", []>;
// Move Instructions. Register-to-register movss is not used for FR32
// register copies because it's a partial register update; FsMOVAPSrr is
// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
// because INSERT_SUBREG requires that the insert be implementable in terms of
// a copy, and just mentioned, we don't use movss for copies.
let Constraints = "$src1 = $dst" in
def MOVSSrr : SSI<0x10, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
"movss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
// Extract the low 32-bit value from one vector and insert it into another.
let AddedComplexity = 15 in
def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
(MOVSSrr VR128:$src1,
(EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
// Implicitly promote a 32-bit scalar to a vector.
def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
(INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (loadf32 addr:$src))]>;
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
}
// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
// Extract and store.
def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSSmr addr:$dst,
(EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
// Conversion instructions
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
"cvttss2si\t{$src, $dst|$dst, $src}",
@ -1090,76 +1128,67 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
// FR32 to 128-bit vector conversion.
let isAsCheapAsAMove = 1 in
def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4f32 (scalar_to_vector FR32:$src)))]>;
def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
// FIXME: may not be able to eliminate this movss with coalescing the src and
// dest register classes are different. We really want to write this pattern
// like this:
// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
// (f32 FR32:$src)>;
let isAsCheapAsAMove = 1 in
def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
(iPTR 0)))]>;
def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store (f32 (vector_extract (v4f32 VR128:$src),
(iPTR 0))), addr:$dst)]>;
// Move to lower bits of a VR128, leaving upper bits alone.
// Three operand (but two address) aliases.
let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
"movss\t{$src2, $dst|$dst, $src2}", []>;
let AddedComplexity = 15 in
def MOVLPSrr : SSI<0x10, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movss\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v4f32 (movl VR128:$src1, VR128:$src2)))]>;
}
// Move to lower bits of a VR128 and zeroing upper bits.
// Loading from memory automatically zeroing upper bits.
let AddedComplexity = 20 in
def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
(loadf32 addr:$src))))))]>;
def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
(MOVZSS2PSrm addr:$src)>;
def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
//===---------------------------------------------------------------------===//
// SSE2 Instructions
//===---------------------------------------------------------------------===//
// Move Instructions
let neverHasSideEffects = 1 in
def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
"movsd\t{$src, $dst|$dst, $src}", []>;
let canFoldAsLoad = 1, isReMaterializable = 1 in
// Move Instructions. Register-to-register movsd is not used for FR64
// register copies because it's a partial register update; FsMOVAPDrr is
// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
// because INSERT_SUBREG requires that the insert be implementable in terms of
// a copy, and just mentioned, we don't use movsd for copies.
let Constraints = "$src1 = $dst" in
def MOVSDrr : SDI<0x10, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
"movsd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
// Extract the low 64-bit value from one vector and insert it into another.
let AddedComplexity = 15 in
def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
(MOVSDrr VR128:$src1,
(EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
// Implicitly promote a 64-bit scalar to a vector.
def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
(INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
// Loading from memory automatically zeroing upper bits.
let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (loadf64 addr:$src))]>;
// MOVSDrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG.
let AddedComplexity = 20 in {
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
}
// Store scalar value to memory.
def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
// Extract and store.
def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
addr:$dst),
(MOVSDmr addr:$dst,
(EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
// Conversion instructions
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
"cvttsd2si\t{$src, $dst|$dst, $src}",
@ -1213,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Requires<[HasSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
(CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
(CVTSS2SDrr (MOVSSrm addr:$src))>,
Requires<[HasSSE2, OptForSpeed]>;
// Match intrinsics which expect XMM operand(s).
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@ -2397,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
// FR64 to 128-bit vector conversion.
let isAsCheapAsAMove = 1 in
def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (scalar_to_vector FR64:$src)))]>;
def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@ -2436,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
// FIXME: may not be able to eliminate this movss with coalescing the src and
// dest register classes are different. We really want to write this pattern
// like this:
// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
// (f32 FR32:$src)>;
let isAsCheapAsAMove = 1 in
def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
(iPTR 0)))]>;
def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store (f64 (vector_extract (v2f64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
(f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@ -2466,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
// Move to lower bits of a VR128, leaving upper bits alone.
// Three operand (but two address) aliases.
let Constraints = "$src1 = $dst" in {
let neverHasSideEffects = 1 in
def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
"movsd\t{$src2, $dst|$dst, $src2}", []>;
let AddedComplexity = 15 in
def MOVLPDrr : SDI<0x10, MRMSrcReg,
(outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
"movsd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
(v2f64 (movl VR128:$src1, VR128:$src2)))]>;
}
// Store / copy lower 64-bits of a XMM register.
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
// Move to lower bits of a VR128 and zeroing upper bits.
// Loading from memory automatically zeroing upper bits.
let AddedComplexity = 20 in {
def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
(v2f64 (X86vzmovl (v2f64 (scalar_to_vector
(loadf64 addr:$src))))))]>;
def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
(MOVZSD2PDrm addr:$src)>;
def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
(MOVZSD2PDrm addr:$src)>;
def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
}
// movd / movq to XMM register zero-extends
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
@ -3049,13 +3024,15 @@ let Predicates = [HasSSE2] in {
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
(MOVLSD2PDrr (V_SET0), FR64:$src)>;
(MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
(MOVLSS2PSrr (V_SET0), FR32:$src)>;
(MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(MOVLPSrr (V_SET0), VR128:$src)>;
(MOVSSrr (v4f32 (V_SET0)),
(f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
(MOVLPSrr (V_SET0), VR128:$src)>;
(MOVSSrr (v4i32 (V_SET0)),
(EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
// Splat v2f64 / v2i64
@ -3190,15 +3167,19 @@ def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
(MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
(MOVSSrr (v4i32 VR128:$src1),
(EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
(MOVSDrr (v2i64 VR128:$src1),
(EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
Requires<[HasSSE2]>;
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
(MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
(MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
Requires<[HasSSE2]>;
}
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but

View File

@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR16_NOREXRegClass;
else if (A == &X86::GR16_ABCDRegClass)
return &X86::GR16_ABCDRegClass;
} else if (B == &X86::FR32RegClass) {
return A;
}
break;
case 2:
@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
A == &X86::GR16_NOREXRegClass)
return &X86::GR16_ABCDRegClass;
} else if (B == &X86::FR64RegClass) {
return A;
}
break;
case 3:
@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR32_NOREXRegClass;
else if (A == &X86::GR32_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
} else if (B == &X86::VR128RegClass) {
return A;
}
break;
case 4:

View File

@ -35,7 +35,8 @@ namespace X86 {
/// these indices must be kept in sync with the class indices in the
/// X86RegisterInfo.td file.
enum SubregIndex {
SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
};
}

View File

@ -158,22 +158,22 @@ let Namespace = "X86" in {
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
// YMM Registers, used by AVX instructions
def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>;
def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>;
def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>;
def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>;
def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
def x86_subreg_16bit : PatLeaf<(i32 3)>;
def x86_subreg_32bit : PatLeaf<(i32 4)>;
def x86_subreg_ss : PatLeaf<(i32 1)>;
def x86_subreg_sd : PatLeaf<(i32 2)>;
def x86_subreg_xmm : PatLeaf<(i32 3)>;
def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
[EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
let SubRegClassList = [FR32, FR64];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
YMM12, YMM13, YMM14, YMM15]>;
YMM12, YMM13, YMM14, YMM15]> {
let SubRegClassList = [FR32, FR64, VR128];
}
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {