AArch64: use RegisterOperand for NEON registers.

Previously we modelled VPR128 and VPR64 as essentially identical
register-classes containing V0-V31 (which had Q0-Q31 as "sub_alias"
sub-registers). This model is starting to cause significant problems
for code generation, particularly writing EXTRACT/INSERT_SUBREG
patterns for converting between the two.

The change here switches to classifying VPR64 & VPR128 as
RegisterOperands, which are essentially aliases for RegisterClasses
with different parsing and printing behaviour. This fits almost
exactly with their real status (VPR128 == FPR128 printed strangely,
VPR64 == FPR64 printed strangely).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190665 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2013-09-13 07:26:52 +00:00
parent dc6fc4fa1f
commit 630c5e06d6
11 changed files with 238 additions and 274 deletions

View File

@ -32,17 +32,18 @@ using namespace llvm;
/// argument to be printed as "bN". /// argument to be printed as "bN".
static bool printModifiedFPRAsmOperand(const MachineOperand &MO, static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
const TargetRegisterInfo *TRI, const TargetRegisterInfo *TRI,
const TargetRegisterClass &RegClass, char RegType, raw_ostream &O) {
raw_ostream &O) {
if (!MO.isReg()) if (!MO.isReg())
return true; return true;
for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
if (RegClass.contains(*AR)) { if (AArch64::FPR8RegClass.contains(*AR)) {
O << AArch64InstPrinter::getRegisterName(*AR); O << RegType << TRI->getEncodingValue(MO.getReg());
return false; return false;
} }
} }
// The register doesn't correspond to anything floating-point like.
return true; return true;
} }
@ -157,7 +158,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// register. Technically, we could allocate the argument as a VPR128, but // register. Technically, we could allocate the argument as a VPR128, but
// that leads to extremely dodgy copies being generated to get the data // that leads to extremely dodgy copies being generated to get the data
// there. // there.
if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
O << AArch64InstPrinter::getRegisterName(MO.getReg()); O << AArch64InstPrinter::getRegisterName(MO.getReg());
break; break;
case MachineOperand::MO_Immediate: case MachineOperand::MO_Immediate:
@ -211,25 +212,12 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
// copies ...). // copies ...).
llvm_unreachable("FIXME: Unimplemented register pairs"); llvm_unreachable("FIXME: Unimplemented register pairs");
case 'b': case 'b':
// Output 8-bit FP/SIMD scalar register operand, prefixed with b.
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
AArch64::FPR8RegClass, O);
case 'h': case 'h':
// Output 16-bit FP/SIMD scalar register operand, prefixed with h.
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
AArch64::FPR16RegClass, O);
case 's': case 's':
// Output 32-bit FP/SIMD scalar register operand, prefixed with s.
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
AArch64::FPR32RegClass, O);
case 'd': case 'd':
// Output 64-bit FP/SIMD scalar register operand, prefixed with d.
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
AArch64::FPR64RegClass, O);
case 'q': case 'q':
// Output 128-bit FP/SIMD scalar register operand, prefixed with q.
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
AArch64::FPR128RegClass, O); ExtraCode[0], O);
case 'A': case 'A':
// Output symbolic address with appropriate relocation modifier (also // Output symbolic address with appropriate relocation modifier (also
// suitable for ADRP). // suitable for ADRP).

View File

@ -57,17 +57,17 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
if (Subtarget->hasNEON()) { if (Subtarget->hasNEON()) {
// And the vectors // And the vectors
addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass); addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass); addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass); addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass); addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass); addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass); addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass); addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass); addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass); addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass); addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass); addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
} }
computeRegisterProperties(); computeRegisterProperties();
@ -3610,14 +3610,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
return std::make_pair(0U, &AArch64::FPR16RegClass); return std::make_pair(0U, &AArch64::FPR16RegClass);
else if (VT == MVT::f32) else if (VT == MVT::f32)
return std::make_pair(0U, &AArch64::FPR32RegClass); return std::make_pair(0U, &AArch64::FPR32RegClass);
else if (VT == MVT::f64)
return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT.getSizeInBits() == 64) else if (VT.getSizeInBits() == 64)
return std::make_pair(0U, &AArch64::VPR64RegClass); return std::make_pair(0U, &AArch64::FPR64RegClass);
else if (VT == MVT::f128)
return std::make_pair(0U, &AArch64::FPR128RegClass);
else if (VT.getSizeInBits() == 128) else if (VT.getSizeInBits() == 128)
return std::make_pair(0U, &AArch64::VPR128RegClass); return std::make_pair(0U, &AArch64::FPR128RegClass);
break; break;
} }
} }

View File

@ -125,6 +125,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Call sequence pseudo-instructions // Call sequence pseudo-instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -2196,13 +2198,13 @@ def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)), def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)), def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra), def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra),
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)), def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)),
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
} }
@ -5162,4 +5164,4 @@ defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
// Advanced SIMD (NEON) Support // Advanced SIMD (NEON) Support
// //
include "AArch64InstrNEON.td" include "AArch64InstrNEON.td"

View File

@ -215,8 +215,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and // class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
// two operands constraints. // two operands constraints.
class NeonI_3VSame_Constraint_impl<string asmop, string asmlane, class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode, RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
SDPatternOperator opnode> bits<5> opcode, SDPatternOperator opnode>
: NeonI_3VSame<q, u, size, opcode, : NeonI_3VSame<q, u, size, opcode,
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm), (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane, asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
@ -321,11 +321,13 @@ defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
// ORR disassembled as MOV if Vn==Vm // ORR disassembled as MOV if Vn==Vm
// Vector Move - register // Vector Move - register
// Alias for ORR if Vn=Vm and it is the preferred syntax // Alias for ORR if Vn=Vm.
// FIXME: This is actually the preferred syntax but TableGen can't deal with
// custom printing of aliases.
def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
(ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>; (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
(ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>; (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{ def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0)); ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
@ -571,7 +573,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
// NeonI_compare_aliases class: swaps register operands to implement // NeonI_compare_aliases class: swaps register operands to implement
// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. // comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
class NeonI_compare_aliases<string asmop, string asmlane, class NeonI_compare_aliases<string asmop, string asmlane,
Instruction inst, RegisterClass VPRC> Instruction inst, RegisterOperand VPRC>
: NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane # : NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
", $Rm" # asmlane, ", $Rm" # asmlane,
(inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>; (inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
@ -1324,7 +1326,7 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
} }
class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane, class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
Instruction inst, RegisterClass VPRC> Instruction inst, RegisterOperand VPRC>
: NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"), : NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
(inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>; (inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
@ -1401,7 +1403,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
// Vector Floating Point Move Immediate // Vector Floating Point Move Immediate
class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy, class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
Operand immOpType, bit q, bit op> Operand immOpType, bit q, bit op>
: NeonI_1VModImm<q, op, : NeonI_1VModImm<q, op,
(outs VPRC:$Rd), (ins immOpType:$Imm), (outs VPRC:$Rd), (ins immOpType:$Imm),
@ -1456,7 +1458,7 @@ def shr_imm32 : shr_imm<"32">;
def shr_imm64 : shr_imm<"64">; def shr_imm64 : shr_imm<"64">;
class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T, class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
: NeonI_2VShiftImm<q, u, opcode, : NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm", asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
@ -1634,7 +1636,7 @@ defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
// Rounding/Saturating shift // Rounding/Saturating shift
class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T, class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterClass VPRC, ValueType Ty, Operand ImmTy, RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
SDPatternOperator OpNode> SDPatternOperator OpNode>
: NeonI_2VShiftImm<q, u, opcode, : NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
@ -1736,7 +1738,7 @@ defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T, class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterClass VPRC, ValueType Ty, Operand ImmTy, RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
SDNode OpNode> SDNode OpNode>
: NeonI_2VShiftImm<q, u, opcode, : NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
@ -1792,7 +1794,7 @@ defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
// Rounding shift accumulate // Rounding shift accumulate
class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T, class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterClass VPRC, ValueType Ty, Operand ImmTy, RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
SDPatternOperator OpNode> SDPatternOperator OpNode>
: NeonI_2VShiftImm<q, u, opcode, : NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
@ -1847,7 +1849,7 @@ defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
// Shift insert by immediate // Shift insert by immediate
class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T, class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterClass VPRC, ValueType Ty, Operand ImmTy, RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
SDPatternOperator OpNode> SDPatternOperator OpNode>
: NeonI_2VShiftImm<q, u, opcode, : NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm), (outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
@ -1953,7 +1955,7 @@ class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT, class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
string SrcT, Operand ImmTy> string SrcT, Operand ImmTy>
: NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd), : NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
(ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm), (ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm", asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
[], NoItinerary> { [], NoItinerary> {
let Constraints = "$src = $Rd"; let Constraints = "$src = $Rd";
@ -2040,15 +2042,18 @@ multiclass Neon_shiftNarrow_patterns<string shr> {
def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H") (v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
VPR128:$Rn, imm:$Imm)))))), VPR128:$Rn, imm:$Imm)))))),
(SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>; (SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S") (v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
VPR128:$Rn, imm:$Imm)))))), VPR128:$Rn, imm:$Imm)))))),
(SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>; (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D") (v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
VPR128:$Rn, imm:$Imm)))))), VPR128:$Rn, imm:$Imm)))))),
(SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>; (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
} }
multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> { multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
@ -2060,17 +2065,20 @@ multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
(!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; (!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine (v1i64 VPR64:$src), def : Pat<(Neon_combine (v1i64 VPR64:$src),
(v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))), (v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
(!cast<Instruction>(prefix # "_16B") (!cast<Instruction>(prefix # "_16B")
VPR64:$src, VPR128:$Rn, imm:$Imm)>; (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine (v1i64 VPR64:$src), def : Pat<(Neon_combine (v1i64 VPR64:$src),
(v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))), (v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
(!cast<Instruction>(prefix # "_8H") (!cast<Instruction>(prefix # "_8H")
VPR64:$src, VPR128:$Rn, imm:$Imm)>; (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
def : Pat<(Neon_combine (v1i64 VPR64:$src), def : Pat<(Neon_combine (v1i64 VPR64:$src),
(v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))), (v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
(!cast<Instruction>(prefix # "_4S") (!cast<Instruction>(prefix # "_4S")
VPR64:$src, VPR128:$Rn, imm:$Imm)>; (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, imm:$Imm)>;
} }
defm : Neon_shiftNarrow_patterns<"lshr">; defm : Neon_shiftNarrow_patterns<"lshr">;
@ -2086,7 +2094,7 @@ defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
// Convert fix-point and float-pointing // Convert fix-point and float-pointing
class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T, class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
RegisterClass VPRC, ValueType DestTy, ValueType SrcTy, RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
Operand ImmTy, SDPatternOperator IntOp> Operand ImmTy, SDPatternOperator IntOp>
: NeonI_2VShiftImm<q, u, opcode, : NeonI_2VShiftImm<q, u, opcode,
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm), (outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
@ -2162,7 +2170,7 @@ defm NI_zext_high : Neon_sshll2_0<zext>;
class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
SDPatternOperator opnode, SDPatternOperator ext, SDPatternOperator opnode, SDPatternOperator ext,
RegisterClass OpVPR, RegisterOperand OpVPR,
ValueType ResTy, ValueType OpTy> ValueType ResTy, ValueType OpTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
@ -2244,7 +2252,7 @@ defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
SDPatternOperator opnode, SDPatternOperator ext, SDPatternOperator opnode, SDPatternOperator ext,
RegisterClass OpVPR, RegisterOperand OpVPR,
ValueType ResTy, ValueType OpTy> ValueType ResTy, ValueType OpTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm), (outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
@ -2325,7 +2333,7 @@ multiclass NeonI_get_high
} }
defm NI_get_hi : NeonI_get_high; defm NI_get_hi : NeonI_get_high;
// pattern for addhn/subhn with 2 operands // pattern for addhn/subhn with 2 operands
class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
@ -2361,7 +2369,7 @@ defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
SDPatternOperator opnode, SDPatternOperator opnode,
RegisterClass ResVPR, RegisterClass OpVPR, RegisterOperand ResVPR, RegisterOperand OpVPR,
ValueType ResTy, ValueType OpTy> ValueType ResTy, ValueType OpTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), (outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
@ -2388,79 +2396,71 @@ multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
// pattern for acle intrinsic with 3 operands
class NeonI_3VDN_addhn2_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS,
SDPatternOperator opnode, SDPatternOperator get_hi,
ValueType OpTy, ValueType OpSTy>
: NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm),
asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
[(set (v2i64 VPR128:$Rd),
(Neon_combine
(v1i64 VPR64:$src),
(v1i64 (bitconvert
(OpSTy (get_hi
(OpTy (opnode (OpTy VPR128:$Rn),
(OpTy VPR128:$Rm)))))))))],
NoItinerary> {
let Constraints = "$src = $Rd";
}
multiclass NeonI_3VDN_addhn2_3Op_v1<bit u, bits<4> opcode,
string asmop,
SDPatternOperator opnode>
{
def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h",
opnode, NI_get_hi_8h, v8i16, v8i8>;
def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s",
opnode, NI_get_hi_4s, v4i32, v4i16>;
def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d",
opnode, NI_get_hi_2d, v2i64, v2i32>;
}
defm ADDHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>;
defm SUBHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>;
// pattern for acle intrinsic with 3 operands // pattern for acle intrinsic with 3 operands
class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS>
SDPatternOperator opnode,
ValueType OpTy, ValueType OpSTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm), (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS, asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
[(set (v2i64 VPR128:$Rd), [], NoItinerary> {
(Neon_combine (v1i64 VPR64:$src),
(v1i64 (bitconvert
(OpSTy (opnode (OpTy VPR128:$Rn),
(OpTy VPR128:$Rm)))))))],
NoItinerary> {
let Constraints = "$src = $Rd"; let Constraints = "$src = $Rd";
let neverHasSideEffects = 1;
} }
multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode, multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
string asmop, string asmop> {
SDPatternOperator opnode> def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
{ def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h", def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
opnode, v8i16, v8i8>;
def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s",
opnode, v4i32, v4i16>;
def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d",
opnode, v2i64, v2i32>;
} }
defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2", defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
int_arm_neon_vraddhn>; defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2",
int_arm_neon_vrsubhn>; defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
// Patterns have to be separate because there's a SUBREG_TO_REG in the output
// part.
class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
SDPatternOperator coreop>
: Pat<(Neon_combine (v1i64 VPR64:$src),
(v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
(SrcTy VPR128:$Rm)))))),
(INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
VPR128:$Rn, VPR128:$Rm)>;
// addhn2 patterns
def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
// subhn2 patterns
def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
// raddhn2 patterns
def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
// rsubhn2 patterns
def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
// pattern that need to extend result // pattern that need to extend result
class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
SDPatternOperator opnode, SDPatternOperator opnode,
RegisterClass OpVPR, RegisterOperand OpVPR,
ValueType ResTy, ValueType OpTy, ValueType OpSTy> ValueType ResTy, ValueType OpTy, ValueType OpSTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm), (outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
@ -2528,7 +2528,7 @@ defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
SDPatternOperator opnode, SDPatternOperator subop, SDPatternOperator opnode, SDPatternOperator subop,
RegisterClass OpVPR, RegisterOperand OpVPR,
ValueType ResTy, ValueType OpTy, ValueType OpSTy> ValueType ResTy, ValueType OpTy, ValueType OpSTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
@ -2684,7 +2684,7 @@ defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode, class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
string asmop, string ResS, string OpS, string asmop, string ResS, string OpS,
SDPatternOperator subop, SDPatternOperator opnode, SDPatternOperator subop, SDPatternOperator opnode,
RegisterClass OpVPR, RegisterOperand OpVPR,
ValueType ResTy, ValueType OpTy> ValueType ResTy, ValueType OpTy>
: NeonI_3VDiff<q, u, size, opcode, : NeonI_3VDiff<q, u, size, opcode,
(outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm), (outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
@ -2856,11 +2856,7 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD> class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
: Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), : Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
(SUBREG_TO_REG (i64 0), (INSTD VPR64:$Rn, VPR64:$Rm)>;
(INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
(EXTRACT_SUBREG VPR64:$Rm, sub_64)),
sub_64)>;
// Scalar Integer Add // Scalar Integer Add
let isCommutable = 1 in { let isCommutable = 1 in {
@ -2994,54 +2990,28 @@ def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
(f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>; def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
(f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>; def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
(f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>;
def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))),
(f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>;
def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))),
(f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>;
def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))),
(f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>;
def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))),
(f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>;
def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))),
(f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>;
def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))),
(f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>;
def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))),
(f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>;
def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))),
(f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>;
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
(v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
(v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
(v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
(v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
(v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>; def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
(v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
sub_alias))>;
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
(v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
sub_alias))>; def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
(v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src), def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
sub_alias))>; def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
(v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
sub_alias))>;
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
(v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
sub_alias))>;
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
(v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
sub_alias))>;

View File

@ -17,10 +17,6 @@ def sub_64 : SubRegIndex<64>;
def sub_32 : SubRegIndex<32>; def sub_32 : SubRegIndex<32>;
def sub_16 : SubRegIndex<16>; def sub_16 : SubRegIndex<16>;
def sub_8 : SubRegIndex<8>; def sub_8 : SubRegIndex<8>;
// The VPR registers are handled as sub-registers of FPR equivalents, but
// they're really the same thing. We give this concept a special index.
def sub_alias : SubRegIndex<128>;
} }
// Registers are identified with 5-bit ID numbers. // Registers are identified with 5-bit ID numbers.
@ -149,48 +145,28 @@ def FPR32 : RegisterClass<"AArch64", [f32], 32,
(sequence "S%u", 0, 31)> { (sequence "S%u", 0, 31)> {
} }
def FPR64 : RegisterClass<"AArch64", [f64], 64, def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64],
(sequence "D%u", 0, 31)> { 64, (sequence "D%u", 0, 31)>;
}
def FPR128 : RegisterClass<"AArch64",
[f128,v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
(sequence "Q%u", 0, 31)>;
def FPR128 : RegisterClass<"AArch64", [f128], 128,
(sequence "Q%u", 0, 31)> {
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Vector registers: // Vector registers:
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// NEON registers simply specify the overall vector, and it's expected that def VPR64AsmOperand : AsmOperandClass {
// Instructions will individually specify the acceptable data layout. In let Name = "VPR";
// principle this leaves two approaches open: let PredicateMethod = "isReg";
// + An operand, giving a single ADDvvv instruction (for example). This turns let RenderMethod = "addRegOperands";
// out to be unworkable in the assembly parser (without every Instruction
// having a "cvt" function, at least) because the constraints can't be
// properly enforced. It also complicates specifying patterns since each
// instruction will accept many types.
// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
// details about NEON registers, but simplifies most other details.
//
// The second approach was taken.
foreach Index = 0-31 in {
def V # Index : AArch64RegWithSubs<Index, "v" # Index,
[!cast<Register>("Q" # Index)],
[sub_alias]>,
DwarfRegNum<[!add(Index, 64)]>;
} }
// These two classes contain the same registers, which should be reasonably def VPR64 : RegisterOperand<FPR64, "printVPRRegister">;
// sensible for MC and allocation purposes, but allows them to be treated
// separately for things like stack spilling.
def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,
(sequence "V%u", 0, 31)>;
def VPR128 : RegisterClass<"AArch64", def VPR128 : RegisterOperand<FPR128, "printVPRRegister">;
[v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
(sequence "V%u", 0, 31)>;
// Flags register // Flags register
def NZCV : Register<"nzcv"> { def NZCV : Register<"nzcv"> {

View File

@ -1556,22 +1556,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
std::string LowerReg = Tok.getString().lower(); std::string LowerReg = Tok.getString().lower();
size_t DotPos = LowerReg.find('.'); size_t DotPos = LowerReg.find('.');
RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); bool IsVec128 = false;
if (RegNum == AArch64::NoRegister) {
RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
.Case("ip0", AArch64::X16)
.Case("ip1", AArch64::X17)
.Case("fp", AArch64::X29)
.Case("lr", AArch64::X30)
.Default(AArch64::NoRegister);
}
if (RegNum == AArch64::NoRegister)
return false;
SMLoc S = Tok.getLoc(); SMLoc S = Tok.getLoc();
RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
if (DotPos == StringRef::npos) { if (DotPos == std::string::npos) {
Layout = StringRef(); Layout = StringRef();
} else { } else {
// Everything afterwards needs to be a literal token, expected to be // Everything afterwards needs to be a literal token, expected to be
@ -1582,19 +1571,76 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
// would go out of scope when we return). // would go out of scope when we return).
LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
// See if it's a 128-bit layout first.
Layout = StringSwitch<const char *>(LayoutText) Layout = StringSwitch<const char *>(LayoutText)
.Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") .Case(".d", ".d").Case(".2d", ".2d")
.Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") .Case(".s", ".s").Case(".4s", ".4s")
.Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") .Case(".h", ".h").Case(".8h", ".8h")
.Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") .Case(".b", ".b").Case(".16b", ".16b")
.Default(""); .Default("");
if (Layout.size() != 0)
IsVec128 = true;
else {
Layout = StringSwitch<const char *>(LayoutText)
.Case(".1d", ".1d")
.Case(".2s", ".2s")
.Case(".4h", ".4h")
.Case(".8b", ".8b")
.Default("");
}
if (Layout.size() == 0) { if (Layout.size() == 0) {
// Malformed register // If we've still not pinned it down the register is malformed.
return false; return false;
} }
} }
RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
if (RegNum == AArch64::NoRegister) {
RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
.Case("ip0", AArch64::X16)
.Case("ip1", AArch64::X17)
.Case("fp", AArch64::X29)
.Case("lr", AArch64::X30)
.Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0)
.Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1)
.Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2)
.Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3)
.Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4)
.Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5)
.Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6)
.Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7)
.Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8)
.Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9)
.Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10)
.Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11)
.Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12)
.Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13)
.Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14)
.Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15)
.Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16)
.Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17)
.Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18)
.Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19)
.Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20)
.Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21)
.Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22)
.Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23)
.Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24)
.Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25)
.Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26)
.Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27)
.Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28)
.Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29)
.Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30)
.Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31)
.Default(AArch64::NoRegister);
}
if (RegNum == AArch64::NoRegister)
return false;
return true; return true;
} }

View File

@ -85,12 +85,6 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address, unsigned RegNo, uint64_t Address,
const void *Decoder); const void *Decoder);
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
unsigned RegNo, uint64_t Address,
const void *Decoder);
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS, unsigned OptionHiS,
@ -355,28 +349,6 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
return MCDisassembler::Success; return MCDisassembler::Success;
} }
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address,
const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus
DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
uint64_t Address, const void *Decoder) {
if (RegNo > 31)
return MCDisassembler::Fail;
uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Register));
return MCDisassembler::Success;
}
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
unsigned OptionHiS, unsigned OptionHiS,
uint64_t Address, uint64_t Address,
@ -608,11 +580,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
if (IsToVec) { if (IsToVec) {
DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
} else { } else {
DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
} }
// Add the lane // Add the lane

View File

@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
O << "#" << (Imm * MemScale); O << "#" << (Imm * MemScale);
} }
void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
unsigned Reg = MI->getOperand(OpNo).getReg();
std::string Name = getRegisterName(Reg);
Name[0] = 'v';
O << Name;
}
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) { raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo); const MCOperand &Op = MI->getOperand(OpNo);

View File

@ -157,6 +157,7 @@ public:
void printRegExtendOperand(const MCInst *MI, unsigned OpNum, void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);

View File

@ -195,13 +195,15 @@
//---------------------------------------------------------------------- //----------------------------------------------------------------------
// Vector Move - register // Vector Move - register
//---------------------------------------------------------------------- //----------------------------------------------------------------------
// FIXME: these should all print with the "mov" syntax.
mov v0.8b, v31.8b mov v0.8b, v31.8b
mov v15.16b, v16.16b mov v15.16b, v16.16b
orr v0.8b, v31.8b, v31.8b orr v0.8b, v31.8b, v31.8b
orr v15.16b, v16.16b, v16.16b orr v15.16b, v16.16b, v16.16b
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e] // CHECK: orr v0.8b, v31.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e] // CHECK: orr v15.16b, v16.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e] // CHECK: orr v0.8b, v31.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e] // CHECK: orr v15.16b, v16.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]

View File

@ -131,8 +131,11 @@
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# Vector Move - register # Vector Move - register
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# CHECK: mov v1.16b, v15.16b
# CHECK: mov v25.8b, v4.8b # FIXME: these should print as "mov", but TableGen can't handle it.
# CHECK: orr v1.16b, v15.16b, v15.16b
# CHECK: orr v25.8b, v4.8b, v4.8b
0xe1 0x1d 0xaf 0x4e 0xe1 0x1d 0xaf 0x4e
0x99 0x1c 0xa4 0x0e 0x99 0x1c 0xa4 0x0e