mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 21:29:41 +00:00
AArch64: use RegisterOperand for NEON registers.
Previously we modelled VPR128 and VPR64 as essentially identical register-classes containing V0-V31 (which had Q0-Q31 as "sub_alias" sub-registers). This model is starting to cause significant problems for code generation, particularly writing EXTRACT/INSERT_SUBREG patterns for converting between the two. The change here switches to classifying VPR64 & VPR128 as RegisterOperands, which are essentially aliases for RegisterClasses with different parsing and printing behaviour. This fits almost exactly with their real status (VPR128 == FPR128 printed strangely, VPR64 == FPR64 printed strangely). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@190665 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
dc6fc4fa1f
commit
630c5e06d6
@ -32,17 +32,18 @@ using namespace llvm;
|
||||
/// argument to be printed as "bN".
|
||||
static bool printModifiedFPRAsmOperand(const MachineOperand &MO,
|
||||
const TargetRegisterInfo *TRI,
|
||||
const TargetRegisterClass &RegClass,
|
||||
raw_ostream &O) {
|
||||
char RegType, raw_ostream &O) {
|
||||
if (!MO.isReg())
|
||||
return true;
|
||||
|
||||
for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) {
|
||||
if (RegClass.contains(*AR)) {
|
||||
O << AArch64InstPrinter::getRegisterName(*AR);
|
||||
if (AArch64::FPR8RegClass.contains(*AR)) {
|
||||
O << RegType << TRI->getEncodingValue(MO.getReg());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// The register doesn't correspond to anything floating-point like.
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -157,7 +158,7 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
// register. Technically, we could allocate the argument as a VPR128, but
|
||||
// that leads to extremely dodgy copies being generated to get the data
|
||||
// there.
|
||||
if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O))
|
||||
if (printModifiedFPRAsmOperand(MO, TRI, 'v', O))
|
||||
O << AArch64InstPrinter::getRegisterName(MO.getReg());
|
||||
break;
|
||||
case MachineOperand::MO_Immediate:
|
||||
@ -211,25 +212,12 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
|
||||
// copies ...).
|
||||
llvm_unreachable("FIXME: Unimplemented register pairs");
|
||||
case 'b':
|
||||
// Output 8-bit FP/SIMD scalar register operand, prefixed with b.
|
||||
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
|
||||
AArch64::FPR8RegClass, O);
|
||||
case 'h':
|
||||
// Output 16-bit FP/SIMD scalar register operand, prefixed with h.
|
||||
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
|
||||
AArch64::FPR16RegClass, O);
|
||||
case 's':
|
||||
// Output 32-bit FP/SIMD scalar register operand, prefixed with s.
|
||||
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
|
||||
AArch64::FPR32RegClass, O);
|
||||
case 'd':
|
||||
// Output 64-bit FP/SIMD scalar register operand, prefixed with d.
|
||||
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
|
||||
AArch64::FPR64RegClass, O);
|
||||
case 'q':
|
||||
// Output 128-bit FP/SIMD scalar register operand, prefixed with q.
|
||||
return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI,
|
||||
AArch64::FPR128RegClass, O);
|
||||
ExtraCode[0], O);
|
||||
case 'A':
|
||||
// Output symbolic address with appropriate relocation modifier (also
|
||||
// suitable for ADRP).
|
||||
|
@ -57,17 +57,17 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
|
||||
|
||||
if (Subtarget->hasNEON()) {
|
||||
// And the vectors
|
||||
addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
|
||||
addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
|
||||
addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass);
|
||||
addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass);
|
||||
addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass);
|
||||
addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass);
|
||||
addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass);
|
||||
addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass);
|
||||
addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass);
|
||||
}
|
||||
|
||||
computeRegisterProperties();
|
||||
@ -3610,14 +3610,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
|
||||
return std::make_pair(0U, &AArch64::FPR16RegClass);
|
||||
else if (VT == MVT::f32)
|
||||
return std::make_pair(0U, &AArch64::FPR32RegClass);
|
||||
else if (VT == MVT::f64)
|
||||
return std::make_pair(0U, &AArch64::FPR64RegClass);
|
||||
else if (VT.getSizeInBits() == 64)
|
||||
return std::make_pair(0U, &AArch64::VPR64RegClass);
|
||||
else if (VT == MVT::f128)
|
||||
return std::make_pair(0U, &AArch64::FPR128RegClass);
|
||||
return std::make_pair(0U, &AArch64::FPR64RegClass);
|
||||
else if (VT.getSizeInBits() == 128)
|
||||
return std::make_pair(0U, &AArch64::VPR128RegClass);
|
||||
return std::make_pair(0U, &AArch64::FPR128RegClass);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -125,6 +125,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>;
|
||||
|
||||
def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>;
|
||||
|
||||
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Call sequence pseudo-instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -2196,13 +2198,13 @@ def : Pat<(fsub (fmul FPR32:$Rn, FPR32:$Rm), FPR32:$Ra),
|
||||
def : Pat<(fsub (fneg FPR32:$Ra), (fmul FPR32:$Rn, FPR32:$Rm)),
|
||||
(FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
|
||||
|
||||
def : Pat<(fadd FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
def : Pat<(fadd FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
|
||||
(FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub FPR64:$Ra, (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
def : Pat<(fsub FPR64:$Ra, (fmul (f64 FPR64:$Rn), FPR64:$Rm)),
|
||||
(FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub (fmul FPR64:$Rn, FPR64:$Rm), FPR64:$Ra),
|
||||
def : Pat<(fsub (fmul (f64 FPR64:$Rn), FPR64:$Rm), FPR64:$Ra),
|
||||
(FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
def : Pat<(fsub (fneg FPR64:$Ra), (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
def : Pat<(fsub (fneg (f64 FPR64:$Ra)), (fmul FPR64:$Rn, FPR64:$Rm)),
|
||||
(FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
|
||||
}
|
||||
|
||||
@ -5162,4 +5164,4 @@ defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)),
|
||||
// Advanced SIMD (NEON) Support
|
||||
//
|
||||
|
||||
include "AArch64InstrNEON.td"
|
||||
include "AArch64InstrNEON.td"
|
||||
|
@ -215,8 +215,8 @@ defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul",
|
||||
// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and
|
||||
// two operands constraints.
|
||||
class NeonI_3VSame_Constraint_impl<string asmop, string asmlane,
|
||||
RegisterClass VPRC, ValueType OpTy, bit q, bit u, bits<2> size, bits<5> opcode,
|
||||
SDPatternOperator opnode>
|
||||
RegisterOperand VPRC, ValueType OpTy, bit q, bit u, bits<2> size,
|
||||
bits<5> opcode, SDPatternOperator opnode>
|
||||
: NeonI_3VSame<q, u, size, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, VPRC:$Rm),
|
||||
asmop # "\t$Rd" # asmlane # ", $Rn" # asmlane # ", $Rm" # asmlane,
|
||||
@ -321,11 +321,13 @@ defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>;
|
||||
// ORR disassembled as MOV if Vn==Vm
|
||||
|
||||
// Vector Move - register
|
||||
// Alias for ORR if Vn=Vm and it is the preferred syntax
|
||||
// Alias for ORR if Vn=Vm.
|
||||
// FIXME: This is actually the preferred syntax but TableGen can't deal with
|
||||
// custom printing of aliases.
|
||||
def : NeonInstAlias<"mov $Rd.8b, $Rn.8b",
|
||||
(ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn)>;
|
||||
(ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>;
|
||||
def : NeonInstAlias<"mov $Rd.16b, $Rn.16b",
|
||||
(ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn)>;
|
||||
(ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>;
|
||||
|
||||
def Neon_immAllOnes: PatLeaf<(Neon_movi (i32 timm), (i32 imm)), [{
|
||||
ConstantSDNode *ImmConstVal = cast<ConstantSDNode>(N->getOperand(0));
|
||||
@ -571,7 +573,7 @@ def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs),
|
||||
// NeonI_compare_aliases class: swaps register operands to implement
|
||||
// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed.
|
||||
class NeonI_compare_aliases<string asmop, string asmlane,
|
||||
Instruction inst, RegisterClass VPRC>
|
||||
Instruction inst, RegisterOperand VPRC>
|
||||
: NeonInstAlias<asmop # "\t$Rd" # asmlane #", $Rn" # asmlane #
|
||||
", $Rm" # asmlane,
|
||||
(inst VPRC:$Rd, VPRC:$Rm, VPRC:$Rn), 0b0>;
|
||||
@ -1324,7 +1326,7 @@ defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>;
|
||||
}
|
||||
|
||||
class NeonI_mov_imm_lsl_aliases<string asmop, string asmlane,
|
||||
Instruction inst, RegisterClass VPRC>
|
||||
Instruction inst, RegisterOperand VPRC>
|
||||
: NeonInstAlias<!strconcat(asmop, " $Rd," # asmlane # ", $Imm"),
|
||||
(inst VPRC:$Rd, neon_uimm8:$Imm, 0), 0b0>;
|
||||
|
||||
@ -1401,7 +1403,7 @@ def MOVIdi : NeonI_1VModImm<0b0, 0b1,
|
||||
|
||||
// Vector Floating Point Move Immediate
|
||||
|
||||
class NeonI_FMOV_impl<string asmlane, RegisterClass VPRC, ValueType OpTy,
|
||||
class NeonI_FMOV_impl<string asmlane, RegisterOperand VPRC, ValueType OpTy,
|
||||
Operand immOpType, bit q, bit op>
|
||||
: NeonI_1VModImm<q, op,
|
||||
(outs VPRC:$Rd), (ins immOpType:$Imm),
|
||||
@ -1456,7 +1458,7 @@ def shr_imm32 : shr_imm<"32">;
|
||||
def shr_imm64 : shr_imm<"64">;
|
||||
|
||||
class N2VShift<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterClass VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
|
||||
RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode>
|
||||
: NeonI_2VShiftImm<q, u, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
|
||||
asmop # "\t$Rd." # T # ", $Rn." # T # ", $Imm",
|
||||
@ -1634,7 +1636,7 @@ defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>;
|
||||
|
||||
// Rounding/Saturating shift
|
||||
class N2VShift_RQ<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterClass VPRC, ValueType Ty, Operand ImmTy,
|
||||
RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
|
||||
SDPatternOperator OpNode>
|
||||
: NeonI_2VShiftImm<q, u, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
|
||||
@ -1736,7 +1738,7 @@ defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>;
|
||||
defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>;
|
||||
|
||||
class N2VShiftAdd<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterClass VPRC, ValueType Ty, Operand ImmTy,
|
||||
RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
|
||||
SDNode OpNode>
|
||||
: NeonI_2VShiftImm<q, u, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
|
||||
@ -1792,7 +1794,7 @@ defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>;
|
||||
|
||||
// Rounding shift accumulate
|
||||
class N2VShiftAdd_R<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterClass VPRC, ValueType Ty, Operand ImmTy,
|
||||
RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
|
||||
SDPatternOperator OpNode>
|
||||
: NeonI_2VShiftImm<q, u, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
|
||||
@ -1847,7 +1849,7 @@ defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>;
|
||||
|
||||
// Shift insert by immediate
|
||||
class N2VShiftIns<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterClass VPRC, ValueType Ty, Operand ImmTy,
|
||||
RegisterOperand VPRC, ValueType Ty, Operand ImmTy,
|
||||
SDPatternOperator OpNode>
|
||||
: NeonI_2VShiftImm<q, u, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$src, VPRC:$Rn, ImmTy:$Imm),
|
||||
@ -1953,7 +1955,7 @@ class N2VShR_Narrow<bit q, bit u, bits<5> opcode, string asmop, string DestT,
|
||||
class N2VShR_Narrow_Hi<bit q, bit u, bits<5> opcode, string asmop, string DestT,
|
||||
string SrcT, Operand ImmTy>
|
||||
: NeonI_2VShiftImm<q, u, opcode, (outs VPR128:$Rd),
|
||||
(ins VPR64:$src, VPR128:$Rn, ImmTy:$Imm),
|
||||
(ins VPR128:$src, VPR128:$Rn, ImmTy:$Imm),
|
||||
asmop # "\t$Rd." # DestT # ", $Rn." # SrcT # ", $Imm",
|
||||
[], NoItinerary> {
|
||||
let Constraints = "$src = $Rd";
|
||||
@ -2040,15 +2042,18 @@ multiclass Neon_shiftNarrow_patterns<string shr> {
|
||||
def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
|
||||
(v8i8 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm8H")
|
||||
VPR128:$Rn, imm:$Imm)))))),
|
||||
(SHRNvvi_16B VPR64:$src, VPR128:$Rn, imm:$Imm)>;
|
||||
(SHRNvvi_16B (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, imm:$Imm)>;
|
||||
def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
|
||||
(v4i16 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm4S")
|
||||
VPR128:$Rn, imm:$Imm)))))),
|
||||
(SHRNvvi_8H VPR64:$src, VPR128:$Rn, imm:$Imm)>;
|
||||
(SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, imm:$Imm)>;
|
||||
def : Pat<(Neon_combine (v1i64 VPR64:$src), (v1i64 (bitconvert
|
||||
(v2i32 (trunc (!cast<PatFrag>("Neon_" # shr # "Imm2D")
|
||||
VPR128:$Rn, imm:$Imm)))))),
|
||||
(SHRNvvi_4S VPR64:$src, VPR128:$Rn, imm:$Imm)>;
|
||||
(SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, imm:$Imm)>;
|
||||
}
|
||||
|
||||
multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
|
||||
@ -2060,17 +2065,20 @@ multiclass Neon_shiftNarrow_QR_patterns<SDPatternOperator op, string prefix> {
|
||||
(!cast<Instruction>(prefix # "_2S") VPR128:$Rn, imm:$Imm)>;
|
||||
|
||||
def : Pat<(Neon_combine (v1i64 VPR64:$src),
|
||||
(v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
|
||||
(v1i64 (bitconvert (v8i8 (op (v8i16 VPR128:$Rn), imm:$Imm))))),
|
||||
(!cast<Instruction>(prefix # "_16B")
|
||||
VPR64:$src, VPR128:$Rn, imm:$Imm)>;
|
||||
(SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, imm:$Imm)>;
|
||||
def : Pat<(Neon_combine (v1i64 VPR64:$src),
|
||||
(v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
|
||||
(v1i64 (bitconvert (v4i16 (op (v4i32 VPR128:$Rn), imm:$Imm))))),
|
||||
(!cast<Instruction>(prefix # "_8H")
|
||||
VPR64:$src, VPR128:$Rn, imm:$Imm)>;
|
||||
(SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, imm:$Imm)>;
|
||||
def : Pat<(Neon_combine (v1i64 VPR64:$src),
|
||||
(v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
|
||||
(v1i64 (bitconvert (v2i32 (op (v2i64 VPR128:$Rn), imm:$Imm))))),
|
||||
(!cast<Instruction>(prefix # "_4S")
|
||||
VPR64:$src, VPR128:$Rn, imm:$Imm)>;
|
||||
(SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, imm:$Imm)>;
|
||||
}
|
||||
|
||||
defm : Neon_shiftNarrow_patterns<"lshr">;
|
||||
@ -2086,7 +2094,7 @@ defm : Neon_shiftNarrow_QR_patterns<int_aarch64_neon_vuqrshrn, "UQRSHRNvvi">;
|
||||
|
||||
// Convert fix-point and float-pointing
|
||||
class N2VCvt_Fx<bit q, bit u, bits<5> opcode, string asmop, string T,
|
||||
RegisterClass VPRC, ValueType DestTy, ValueType SrcTy,
|
||||
RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy,
|
||||
Operand ImmTy, SDPatternOperator IntOp>
|
||||
: NeonI_2VShiftImm<q, u, opcode,
|
||||
(outs VPRC:$Rd), (ins VPRC:$Rn, ImmTy:$Imm),
|
||||
@ -2162,7 +2170,7 @@ defm NI_zext_high : Neon_sshll2_0<zext>;
|
||||
class NeonI_3VDL<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode, SDPatternOperator ext,
|
||||
RegisterClass OpVPR,
|
||||
RegisterOperand OpVPR,
|
||||
ValueType ResTy, ValueType OpTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
|
||||
@ -2244,7 +2252,7 @@ defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>;
|
||||
class NeonI_3VDW<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode, SDPatternOperator ext,
|
||||
RegisterClass OpVPR,
|
||||
RegisterOperand OpVPR,
|
||||
ValueType ResTy, ValueType OpTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR128:$Rn, OpVPR:$Rm),
|
||||
@ -2325,7 +2333,7 @@ multiclass NeonI_get_high
|
||||
}
|
||||
|
||||
defm NI_get_hi : NeonI_get_high;
|
||||
|
||||
|
||||
// pattern for addhn/subhn with 2 operands
|
||||
class NeonI_3VDN_addhn_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
@ -2361,7 +2369,7 @@ defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>;
|
||||
class NeonI_3VD_2Op<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode,
|
||||
RegisterClass ResVPR, RegisterClass OpVPR,
|
||||
RegisterOperand ResVPR, RegisterOperand OpVPR,
|
||||
ValueType ResTy, ValueType OpTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs ResVPR:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
|
||||
@ -2388,79 +2396,71 @@ multiclass NeonI_3VDN_2Op<bit u, bits<4> opcode,
|
||||
defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>;
|
||||
defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>;
|
||||
|
||||
// pattern for acle intrinsic with 3 operands
|
||||
class NeonI_3VDN_addhn2_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode, SDPatternOperator get_hi,
|
||||
ValueType OpTy, ValueType OpSTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm),
|
||||
asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
|
||||
[(set (v2i64 VPR128:$Rd),
|
||||
(Neon_combine
|
||||
(v1i64 VPR64:$src),
|
||||
(v1i64 (bitconvert
|
||||
(OpSTy (get_hi
|
||||
(OpTy (opnode (OpTy VPR128:$Rn),
|
||||
(OpTy VPR128:$Rm)))))))))],
|
||||
NoItinerary> {
|
||||
let Constraints = "$src = $Rd";
|
||||
}
|
||||
|
||||
multiclass NeonI_3VDN_addhn2_3Op_v1<bit u, bits<4> opcode,
|
||||
string asmop,
|
||||
SDPatternOperator opnode>
|
||||
{
|
||||
def _16b8h : NeonI_3VDN_addhn2_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h",
|
||||
opnode, NI_get_hi_8h, v8i16, v8i8>;
|
||||
def _8h4s : NeonI_3VDN_addhn2_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s",
|
||||
opnode, NI_get_hi_4s, v4i32, v4i16>;
|
||||
def _4s2d : NeonI_3VDN_addhn2_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d",
|
||||
opnode, NI_get_hi_2d, v2i64, v2i32>;
|
||||
}
|
||||
|
||||
defm ADDHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0100, "addhn2", add>;
|
||||
defm SUBHN2vvv : NeonI_3VDN_addhn2_3Op_v1<0b0, 0b0110, "subhn2", sub>;
|
||||
|
||||
// pattern for acle intrinsic with 3 operands
|
||||
class NeonI_3VDN_3Op<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode,
|
||||
ValueType OpTy, ValueType OpSTy>
|
||||
string asmop, string ResS, string OpS>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR64:$src, VPR128:$Rn, VPR128:$Rm),
|
||||
(outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, VPR128:$Rm),
|
||||
asmop # "\t$Rd." # ResS # ", $Rn." # OpS # ", $Rm." # OpS,
|
||||
[(set (v2i64 VPR128:$Rd),
|
||||
(Neon_combine (v1i64 VPR64:$src),
|
||||
(v1i64 (bitconvert
|
||||
(OpSTy (opnode (OpTy VPR128:$Rn),
|
||||
(OpTy VPR128:$Rm)))))))],
|
||||
NoItinerary> {
|
||||
[], NoItinerary> {
|
||||
let Constraints = "$src = $Rd";
|
||||
let neverHasSideEffects = 1;
|
||||
}
|
||||
|
||||
multiclass NeonI_3VDN_3Op_v1<bit u, bits<4> opcode,
|
||||
string asmop,
|
||||
SDPatternOperator opnode>
|
||||
{
|
||||
def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h",
|
||||
opnode, v8i16, v8i8>;
|
||||
def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s",
|
||||
opnode, v4i32, v4i16>;
|
||||
def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d",
|
||||
opnode, v2i64, v2i32>;
|
||||
string asmop> {
|
||||
def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">;
|
||||
def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">;
|
||||
def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">;
|
||||
}
|
||||
|
||||
defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2",
|
||||
int_arm_neon_vraddhn>;
|
||||
defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2",
|
||||
int_arm_neon_vrsubhn>;
|
||||
defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">;
|
||||
defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">;
|
||||
|
||||
defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">;
|
||||
defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">;
|
||||
|
||||
// Patterns have to be separate because there's a SUBREG_TO_REG in the output
|
||||
// part.
|
||||
class NarrowHighHalfPat<Instruction INST, ValueType DstTy, ValueType SrcTy,
|
||||
SDPatternOperator coreop>
|
||||
: Pat<(Neon_combine (v1i64 VPR64:$src),
|
||||
(v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn),
|
||||
(SrcTy VPR128:$Rm)))))),
|
||||
(INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64),
|
||||
VPR128:$Rn, VPR128:$Rm)>;
|
||||
|
||||
// addhn2 patterns
|
||||
def : NarrowHighHalfPat<ADDHN2vvv_16b8h, v8i8, v8i16,
|
||||
BinOpFrag<(NI_get_hi_8h (add node:$LHS, node:$RHS))>>;
|
||||
def : NarrowHighHalfPat<ADDHN2vvv_8h4s, v4i16, v4i32,
|
||||
BinOpFrag<(NI_get_hi_4s (add node:$LHS, node:$RHS))>>;
|
||||
def : NarrowHighHalfPat<ADDHN2vvv_4s2d, v2i32, v2i64,
|
||||
BinOpFrag<(NI_get_hi_2d (add node:$LHS, node:$RHS))>>;
|
||||
|
||||
// subhn2 patterns
|
||||
def : NarrowHighHalfPat<SUBHN2vvv_16b8h, v8i8, v8i16,
|
||||
BinOpFrag<(NI_get_hi_8h (sub node:$LHS, node:$RHS))>>;
|
||||
def : NarrowHighHalfPat<SUBHN2vvv_8h4s, v4i16, v4i32,
|
||||
BinOpFrag<(NI_get_hi_4s (sub node:$LHS, node:$RHS))>>;
|
||||
def : NarrowHighHalfPat<SUBHN2vvv_4s2d, v2i32, v2i64,
|
||||
BinOpFrag<(NI_get_hi_2d (sub node:$LHS, node:$RHS))>>;
|
||||
|
||||
// raddhn2 patterns
|
||||
def : NarrowHighHalfPat<RADDHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vraddhn>;
|
||||
def : NarrowHighHalfPat<RADDHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vraddhn>;
|
||||
def : NarrowHighHalfPat<RADDHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vraddhn>;
|
||||
|
||||
// rsubhn2 patterns
|
||||
def : NarrowHighHalfPat<RSUBHN2vvv_16b8h, v8i8, v8i16, int_arm_neon_vrsubhn>;
|
||||
def : NarrowHighHalfPat<RSUBHN2vvv_8h4s, v4i16, v4i32, int_arm_neon_vrsubhn>;
|
||||
def : NarrowHighHalfPat<RSUBHN2vvv_4s2d, v2i32, v2i64, int_arm_neon_vrsubhn>;
|
||||
|
||||
// pattern that need to extend result
|
||||
class NeonI_3VDL_Ext<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode,
|
||||
RegisterClass OpVPR,
|
||||
RegisterOperand OpVPR,
|
||||
ValueType ResTy, ValueType OpTy, ValueType OpSTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins OpVPR:$Rn, OpVPR:$Rm),
|
||||
@ -2528,7 +2528,7 @@ defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>;
|
||||
class NeonI_3VDL_Aba<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator opnode, SDPatternOperator subop,
|
||||
RegisterClass OpVPR,
|
||||
RegisterOperand OpVPR,
|
||||
ValueType ResTy, ValueType OpTy, ValueType OpSTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
|
||||
@ -2684,7 +2684,7 @@ defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>;
|
||||
class NeonI_3VDL2_3Op_mlas<bit q, bit u, bits<2> size, bits<4> opcode,
|
||||
string asmop, string ResS, string OpS,
|
||||
SDPatternOperator subop, SDPatternOperator opnode,
|
||||
RegisterClass OpVPR,
|
||||
RegisterOperand OpVPR,
|
||||
ValueType ResTy, ValueType OpTy>
|
||||
: NeonI_3VDiff<q, u, size, opcode,
|
||||
(outs VPR128:$Rd), (ins VPR128:$src, OpVPR:$Rn, OpVPR:$Rm),
|
||||
@ -2856,11 +2856,7 @@ multiclass NeonI_Scalar3Same_BHSD_sizes<bit u, bits<5> opcode,
|
||||
|
||||
class Neon_Scalar_D_size_patterns<SDPatternOperator opnode, Instruction INSTD>
|
||||
: Pat<(v1i64 (opnode (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))),
|
||||
(SUBREG_TO_REG (i64 0),
|
||||
(INSTD (EXTRACT_SUBREG VPR64:$Rn, sub_64),
|
||||
(EXTRACT_SUBREG VPR64:$Rm, sub_64)),
|
||||
sub_64)>;
|
||||
|
||||
(INSTD VPR64:$Rn, VPR64:$Rm)>;
|
||||
|
||||
// Scalar Integer Add
|
||||
let isCommutable = 1 in {
|
||||
@ -2994,54 +2990,28 @@ def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>;
|
||||
|
||||
def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))),
|
||||
(f64 (EXTRACT_SUBREG (v8i8 VPR64:$src), sub_64))>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))),
|
||||
(f64 (EXTRACT_SUBREG (v4i16 VPR64:$src), sub_64))>;
|
||||
def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))),
|
||||
(f64 (EXTRACT_SUBREG (v2i32 VPR64:$src), sub_64))>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))),
|
||||
(f64 (EXTRACT_SUBREG (v2f32 VPR64:$src), sub_64))>;
|
||||
def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))),
|
||||
(f64 (EXTRACT_SUBREG (v1i64 VPR64:$src), sub_64))>;
|
||||
def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))),
|
||||
(f128 (EXTRACT_SUBREG (v16i8 VPR128:$src), sub_alias))>;
|
||||
def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))),
|
||||
(f128 (EXTRACT_SUBREG (v8i16 VPR128:$src), sub_alias))>;
|
||||
def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))),
|
||||
(f128 (EXTRACT_SUBREG (v4i32 VPR128:$src), sub_alias))>;
|
||||
def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))),
|
||||
(f128 (EXTRACT_SUBREG (v2i64 VPR128:$src), sub_alias))>;
|
||||
def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))),
|
||||
(f128 (EXTRACT_SUBREG (v4f32 VPR128:$src), sub_alias))>;
|
||||
def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))),
|
||||
(f128 (EXTRACT_SUBREG (v2f64 VPR128:$src), sub_alias))>;
|
||||
def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>;
|
||||
def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>;
|
||||
|
||||
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))),
|
||||
(v8i8 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
|
||||
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))),
|
||||
(v4i16 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
|
||||
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))),
|
||||
(v2i32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
|
||||
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))),
|
||||
(v2f32 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
|
||||
def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))),
|
||||
(v1i64 (SUBREG_TO_REG (i64 0), (f64 FPR64:$src), sub_64))>;
|
||||
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))),
|
||||
(v16i8 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
|
||||
sub_alias))>;
|
||||
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))),
|
||||
(v8i16 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
|
||||
sub_alias))>;
|
||||
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))),
|
||||
(v4i32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
|
||||
sub_alias))>;
|
||||
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))),
|
||||
(v2i64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
|
||||
sub_alias))>;
|
||||
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))),
|
||||
(v4f32 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
|
||||
sub_alias))>;
|
||||
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))),
|
||||
(v2f64 (SUBREG_TO_REG (i128 0), (f128 FPR128:$src),
|
||||
sub_alias))>;
|
||||
def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>;
|
||||
|
||||
def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>;
|
||||
def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>;
|
||||
def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>;
|
||||
def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>;
|
||||
def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>;
|
||||
|
||||
def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>;
|
||||
def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>;
|
||||
def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>;
|
||||
def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>;
|
||||
def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>;
|
||||
def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>;
|
||||
|
@ -17,10 +17,6 @@ def sub_64 : SubRegIndex<64>;
|
||||
def sub_32 : SubRegIndex<32>;
|
||||
def sub_16 : SubRegIndex<16>;
|
||||
def sub_8 : SubRegIndex<8>;
|
||||
|
||||
// The VPR registers are handled as sub-registers of FPR equivalents, but
|
||||
// they're really the same thing. We give this concept a special index.
|
||||
def sub_alias : SubRegIndex<128>;
|
||||
}
|
||||
|
||||
// Registers are identified with 5-bit ID numbers.
|
||||
@ -149,48 +145,28 @@ def FPR32 : RegisterClass<"AArch64", [f32], 32,
|
||||
(sequence "S%u", 0, 31)> {
|
||||
}
|
||||
|
||||
def FPR64 : RegisterClass<"AArch64", [f64], 64,
|
||||
(sequence "D%u", 0, 31)> {
|
||||
}
|
||||
def FPR64 : RegisterClass<"AArch64", [f64, v2f32, v2i32, v4i16, v8i8, v1i64],
|
||||
64, (sequence "D%u", 0, 31)>;
|
||||
|
||||
def FPR128 : RegisterClass<"AArch64",
|
||||
[f128,v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
|
||||
(sequence "Q%u", 0, 31)>;
|
||||
|
||||
def FPR128 : RegisterClass<"AArch64", [f128], 128,
|
||||
(sequence "Q%u", 0, 31)> {
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Vector registers:
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// NEON registers simply specify the overall vector, and it's expected that
|
||||
// Instructions will individually specify the acceptable data layout. In
|
||||
// principle this leaves two approaches open:
|
||||
// + An operand, giving a single ADDvvv instruction (for example). This turns
|
||||
// out to be unworkable in the assembly parser (without every Instruction
|
||||
// having a "cvt" function, at least) because the constraints can't be
|
||||
// properly enforced. It also complicates specifying patterns since each
|
||||
// instruction will accept many types.
|
||||
// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific
|
||||
// details about NEON registers, but simplifies most other details.
|
||||
//
|
||||
// The second approach was taken.
|
||||
|
||||
foreach Index = 0-31 in {
|
||||
def V # Index : AArch64RegWithSubs<Index, "v" # Index,
|
||||
[!cast<Register>("Q" # Index)],
|
||||
[sub_alias]>,
|
||||
DwarfRegNum<[!add(Index, 64)]>;
|
||||
def VPR64AsmOperand : AsmOperandClass {
|
||||
let Name = "VPR";
|
||||
let PredicateMethod = "isReg";
|
||||
let RenderMethod = "addRegOperands";
|
||||
}
|
||||
|
||||
// These two classes contain the same registers, which should be reasonably
|
||||
// sensible for MC and allocation purposes, but allows them to be treated
|
||||
// separately for things like stack spilling.
|
||||
def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8, v1i64], 64,
|
||||
(sequence "V%u", 0, 31)>;
|
||||
def VPR64 : RegisterOperand<FPR64, "printVPRRegister">;
|
||||
|
||||
def VPR128 : RegisterClass<"AArch64",
|
||||
[v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128,
|
||||
(sequence "V%u", 0, 31)>;
|
||||
def VPR128 : RegisterOperand<FPR128, "printVPRRegister">;
|
||||
|
||||
// Flags register
|
||||
def NZCV : Register<"nzcv"> {
|
||||
|
@ -1556,22 +1556,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
|
||||
std::string LowerReg = Tok.getString().lower();
|
||||
size_t DotPos = LowerReg.find('.');
|
||||
|
||||
RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
|
||||
if (RegNum == AArch64::NoRegister) {
|
||||
RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
|
||||
.Case("ip0", AArch64::X16)
|
||||
.Case("ip1", AArch64::X17)
|
||||
.Case("fp", AArch64::X29)
|
||||
.Case("lr", AArch64::X30)
|
||||
.Default(AArch64::NoRegister);
|
||||
}
|
||||
if (RegNum == AArch64::NoRegister)
|
||||
return false;
|
||||
|
||||
bool IsVec128 = false;
|
||||
SMLoc S = Tok.getLoc();
|
||||
RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos);
|
||||
|
||||
if (DotPos == StringRef::npos) {
|
||||
if (DotPos == std::string::npos) {
|
||||
Layout = StringRef();
|
||||
} else {
|
||||
// Everything afterwards needs to be a literal token, expected to be
|
||||
@ -1582,19 +1571,76 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc,
|
||||
// would go out of scope when we return).
|
||||
LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1);
|
||||
std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos);
|
||||
|
||||
// See if it's a 128-bit layout first.
|
||||
Layout = StringSwitch<const char *>(LayoutText)
|
||||
.Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d")
|
||||
.Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s")
|
||||
.Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h")
|
||||
.Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b")
|
||||
.Case(".d", ".d").Case(".2d", ".2d")
|
||||
.Case(".s", ".s").Case(".4s", ".4s")
|
||||
.Case(".h", ".h").Case(".8h", ".8h")
|
||||
.Case(".b", ".b").Case(".16b", ".16b")
|
||||
.Default("");
|
||||
|
||||
if (Layout.size() != 0)
|
||||
IsVec128 = true;
|
||||
else {
|
||||
Layout = StringSwitch<const char *>(LayoutText)
|
||||
.Case(".1d", ".1d")
|
||||
.Case(".2s", ".2s")
|
||||
.Case(".4h", ".4h")
|
||||
.Case(".8b", ".8b")
|
||||
.Default("");
|
||||
}
|
||||
|
||||
if (Layout.size() == 0) {
|
||||
// Malformed register
|
||||
// If we've still not pinned it down the register is malformed.
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
RegNum = MatchRegisterName(LowerReg.substr(0, DotPos));
|
||||
if (RegNum == AArch64::NoRegister) {
|
||||
RegNum = StringSwitch<unsigned>(LowerReg.substr(0, DotPos))
|
||||
.Case("ip0", AArch64::X16)
|
||||
.Case("ip1", AArch64::X17)
|
||||
.Case("fp", AArch64::X29)
|
||||
.Case("lr", AArch64::X30)
|
||||
.Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0)
|
||||
.Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1)
|
||||
.Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2)
|
||||
.Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3)
|
||||
.Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4)
|
||||
.Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5)
|
||||
.Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6)
|
||||
.Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7)
|
||||
.Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8)
|
||||
.Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9)
|
||||
.Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10)
|
||||
.Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11)
|
||||
.Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12)
|
||||
.Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13)
|
||||
.Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14)
|
||||
.Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15)
|
||||
.Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16)
|
||||
.Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17)
|
||||
.Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18)
|
||||
.Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19)
|
||||
.Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20)
|
||||
.Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21)
|
||||
.Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22)
|
||||
.Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23)
|
||||
.Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24)
|
||||
.Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25)
|
||||
.Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26)
|
||||
.Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27)
|
||||
.Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28)
|
||||
.Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29)
|
||||
.Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30)
|
||||
.Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31)
|
||||
.Default(AArch64::NoRegister);
|
||||
}
|
||||
if (RegNum == AArch64::NoRegister)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -85,12 +85,6 @@ static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst,
|
||||
unsigned RegNo, uint64_t Address,
|
||||
const void *Decoder);
|
||||
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decoder);
|
||||
static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst,
|
||||
unsigned RegNo, uint64_t Address,
|
||||
const void *Decoder);
|
||||
|
||||
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
|
||||
unsigned OptionHiS,
|
||||
@ -355,28 +349,6 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeVPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address,
|
||||
const void *Decoder) {
|
||||
if (RegNo > 31)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
uint16_t Register = getReg(Decoder, AArch64::VPR64RegClassID, RegNo);
|
||||
Inst.addOperand(MCOperand::CreateReg(Register));
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus
|
||||
DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo,
|
||||
uint64_t Address, const void *Decoder) {
|
||||
if (RegNo > 31)
|
||||
return MCDisassembler::Fail;
|
||||
|
||||
uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo);
|
||||
Inst.addOperand(MCOperand::CreateReg(Register));
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst,
|
||||
unsigned OptionHiS,
|
||||
uint64_t Address,
|
||||
@ -608,11 +580,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn,
|
||||
unsigned IsToVec = fieldFromInstruction(Insn, 16, 1);
|
||||
|
||||
if (IsToVec) {
|
||||
DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder);
|
||||
DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder);
|
||||
DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder);
|
||||
} else {
|
||||
DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder);
|
||||
DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder);
|
||||
DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder);
|
||||
}
|
||||
|
||||
// Add the lane
|
||||
|
@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum,
|
||||
O << "#" << (Imm * MemScale);
|
||||
}
|
||||
|
||||
void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
unsigned Reg = MI->getOperand(OpNo).getReg();
|
||||
std::string Name = getRegisterName(Reg);
|
||||
Name[0] = 'v';
|
||||
O << Name;
|
||||
}
|
||||
|
||||
void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &Op = MI->getOperand(OpNo);
|
||||
|
@ -157,6 +157,7 @@ public:
|
||||
void printRegExtendOperand(const MCInst *MI, unsigned OpNum,
|
||||
raw_ostream &O, A64SE::ShiftExtSpecifiers Ext);
|
||||
|
||||
void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
|
||||
|
||||
|
@ -195,13 +195,15 @@
|
||||
//----------------------------------------------------------------------
|
||||
// Vector Move - register
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
// FIXME: these should all print with the "mov" syntax.
|
||||
mov v0.8b, v31.8b
|
||||
mov v15.16b, v16.16b
|
||||
orr v0.8b, v31.8b, v31.8b
|
||||
orr v15.16b, v16.16b, v16.16b
|
||||
|
||||
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
|
||||
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
|
||||
// CHECK: mov v0.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
|
||||
// CHECK: mov v15.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
|
||||
// CHECK: orr v0.8b, v31.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
|
||||
// CHECK: orr v15.16b, v16.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
|
||||
// CHECK: orr v0.8b, v31.8b, v31.8b // encoding: [0xe0,0x1f,0xbf,0x0e]
|
||||
// CHECK: orr v15.16b, v16.16b, v16.16b // encoding: [0x0f,0x1e,0xb0,0x4e]
|
||||
|
||||
|
@ -131,8 +131,11 @@
|
||||
#------------------------------------------------------------------------------
|
||||
# Vector Move - register
|
||||
#------------------------------------------------------------------------------
|
||||
# CHECK: mov v1.16b, v15.16b
|
||||
# CHECK: mov v25.8b, v4.8b
|
||||
|
||||
# FIXME: these should print as "mov", but TableGen can't handle it.
|
||||
|
||||
# CHECK: orr v1.16b, v15.16b, v15.16b
|
||||
# CHECK: orr v25.8b, v4.8b, v4.8b
|
||||
0xe1 0x1d 0xaf 0x4e
|
||||
0x99 0x1c 0xa4 0x0e
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user