mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-21 21:29:41 +00:00
fix PR11334
- FP_EXTEND only support extending from vectors with matching elements. This results in the scalarization of extending to v2f64 from v2f32, which will be legalized to v4f32 not matching with v2f64. - add X86-specific VFPEXT supproting extending from v4f32 to v2f64. - add BUILD_VECTOR lowering helper to recover back the original extending from v4f32 to v2f64. - test case is enhanced to include different vector width. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161894 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
fc1a161d76
commit
7091b2451d
@ -5114,6 +5114,82 @@ X86TargetLowering::LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const {
|
|||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LowerVectorFpExtend - Recognize the scalarized FP_EXTEND from v2f32 to v2f64
|
||||||
|
// and convert it into X86ISD::VFPEXT due to the current ISD::FP_EXTEND has the
|
||||||
|
// constraint of matching input/output vector elements.
|
||||||
|
SDValue
|
||||||
|
X86TargetLowering::LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const {
|
||||||
|
DebugLoc DL = Op.getDebugLoc();
|
||||||
|
SDNode *N = Op.getNode();
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
unsigned NumElts = Op.getNumOperands();
|
||||||
|
|
||||||
|
// Check supported types and sub-targets.
|
||||||
|
//
|
||||||
|
// Only v2f32 -> v2f64 needs special handling.
|
||||||
|
if (VT != MVT::v2f64 || !Subtarget->hasSSE2())
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue VecIn;
|
||||||
|
EVT VecInVT;
|
||||||
|
SmallVector<int, 8> Mask;
|
||||||
|
EVT SrcVT = MVT::Other;
|
||||||
|
|
||||||
|
// Check the patterns could be translated into X86vfpext.
|
||||||
|
for (unsigned i = 0; i < NumElts; ++i) {
|
||||||
|
SDValue In = N->getOperand(i);
|
||||||
|
unsigned Opcode = In.getOpcode();
|
||||||
|
|
||||||
|
// Skip if the element is undefined.
|
||||||
|
if (Opcode == ISD::UNDEF) {
|
||||||
|
Mask.push_back(-1);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quit if one of the elements is not defined from 'fpext'.
|
||||||
|
if (Opcode != ISD::FP_EXTEND)
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Check how the source of 'fpext' is defined.
|
||||||
|
SDValue L2In = In.getOperand(0);
|
||||||
|
EVT L2InVT = L2In.getValueType();
|
||||||
|
|
||||||
|
// Check the original type
|
||||||
|
if (SrcVT == MVT::Other)
|
||||||
|
SrcVT = L2InVT;
|
||||||
|
else if (SrcVT != L2InVT) // Quit if non-homogenous typed.
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
// Check whether the value being 'fpext'ed is extracted from the same
|
||||||
|
// source.
|
||||||
|
Opcode = L2In.getOpcode();
|
||||||
|
|
||||||
|
// Quit if it's not extracted with a constant index.
|
||||||
|
if (Opcode != ISD::EXTRACT_VECTOR_ELT ||
|
||||||
|
!isa<ConstantSDNode>(L2In.getOperand(1)))
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
SDValue ExtractedFromVec = L2In.getOperand(0);
|
||||||
|
|
||||||
|
if (VecIn.getNode() == 0) {
|
||||||
|
VecIn = ExtractedFromVec;
|
||||||
|
VecInVT = ExtractedFromVec.getValueType();
|
||||||
|
} else if (VecIn != ExtractedFromVec) // Quit if built from more than 1 vec.
|
||||||
|
return SDValue();
|
||||||
|
|
||||||
|
Mask.push_back(cast<ConstantSDNode>(L2In.getOperand(1))->getZExtValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fill the remaining mask as undef.
|
||||||
|
for (unsigned i = NumElts; i < VecInVT.getVectorNumElements(); ++i)
|
||||||
|
Mask.push_back(-1);
|
||||||
|
|
||||||
|
return DAG.getNode(X86ISD::VFPEXT, DL, VT,
|
||||||
|
DAG.getVectorShuffle(VecInVT, DL,
|
||||||
|
VecIn, DAG.getUNDEF(VecInVT),
|
||||||
|
&Mask[0]));
|
||||||
|
}
|
||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
@ -5146,6 +5222,10 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
if (Broadcast.getNode())
|
if (Broadcast.getNode())
|
||||||
return Broadcast;
|
return Broadcast;
|
||||||
|
|
||||||
|
SDValue FpExt = LowerVectorFpExtend(Op, DAG);
|
||||||
|
if (FpExt.getNode())
|
||||||
|
return FpExt;
|
||||||
|
|
||||||
unsigned EVTBits = ExtVT.getSizeInBits();
|
unsigned EVTBits = ExtVT.getSizeInBits();
|
||||||
|
|
||||||
unsigned NumZero = 0;
|
unsigned NumZero = 0;
|
||||||
@ -11343,6 +11423,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
|
case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG";
|
||||||
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
|
||||||
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
|
||||||
|
case X86ISD::VFPEXT: return "X86ISD::VFPEXT";
|
||||||
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
case X86ISD::VSHLDQ: return "X86ISD::VSHLDQ";
|
||||||
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
|
case X86ISD::VSRLDQ: return "X86ISD::VSRLDQ";
|
||||||
case X86ISD::VSHL: return "X86ISD::VSHL";
|
case X86ISD::VSHL: return "X86ISD::VSHL";
|
||||||
|
@ -227,6 +227,9 @@ namespace llvm {
|
|||||||
// VSEXT_MOVL - Vector move low and sign extend.
|
// VSEXT_MOVL - Vector move low and sign extend.
|
||||||
VSEXT_MOVL,
|
VSEXT_MOVL,
|
||||||
|
|
||||||
|
// VFPEXT - Vector FP extend.
|
||||||
|
VFPEXT,
|
||||||
|
|
||||||
// VSHL, VSRL - 128-bit vector logical left / right shift
|
// VSHL, VSRL - 128-bit vector logical left / right shift
|
||||||
VSHLDQ, VSRLDQ,
|
VSHLDQ, VSRLDQ,
|
||||||
|
|
||||||
@ -828,6 +831,8 @@ namespace llvm {
|
|||||||
SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
|
SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
|
||||||
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
|
SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
|
SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
virtual SDValue
|
virtual SDValue
|
||||||
LowerFormalArguments(SDValue Chain,
|
LowerFormalArguments(SDValue Chain,
|
||||||
CallingConv::ID CallConv, bool isVarArg,
|
CallingConv::ID CallConv, bool isVarArg,
|
||||||
|
@ -81,6 +81,11 @@ def X86vsmovl : SDNode<"X86ISD::VSEXT_MOVL",
|
|||||||
|
|
||||||
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad,
|
||||||
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
||||||
|
|
||||||
|
def X86vfpext : SDNode<"X86ISD::VFPEXT",
|
||||||
|
SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
|
||||||
|
SDTCisFP<0>, SDTCisFP<1>]>>;
|
||||||
|
|
||||||
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
|
def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>;
|
||||||
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
|
def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>;
|
||||||
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
|
def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>;
|
||||||
|
@ -2101,12 +2101,20 @@ let Predicates = [HasAVX] in {
|
|||||||
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
|
def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
|
||||||
(VCVTPD2PSYrm addr:$src)>;
|
(VCVTPD2PSYrm addr:$src)>;
|
||||||
|
|
||||||
|
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
|
||||||
|
(VCVTPS2PDrr VR128:$src)>;
|
||||||
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
|
def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
|
||||||
(VCVTPS2PDYrr VR128:$src)>;
|
(VCVTPS2PDYrr VR128:$src)>;
|
||||||
def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
|
def : Pat<(v4f64 (fextend (loadv4f32 addr:$src))),
|
||||||
(VCVTPS2PDYrm addr:$src)>;
|
(VCVTPS2PDYrm addr:$src)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let Predicates = [HasSSE2] in {
|
||||||
|
// Match fextend for 128 conversions
|
||||||
|
def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
|
||||||
|
(CVTPS2PDrr VR128:$src)>;
|
||||||
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// SSE 1 & 2 - Compare Instructions
|
// SSE 1 & 2 - Compare Instructions
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
56
test/CodeGen/X86/pr11334.ll
Normal file
56
test/CodeGen/X86/pr11334.ll
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=corei7 | FileCheck %s
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-pc-linux -mcpu=core-avx-i | FileCheck %s --check-prefix=AVX
|
||||||
|
|
||||||
|
define <2 x double> @v2f2d_ext_vec(<2 x float> %v1) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: v2f2d_ext_vec
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; AVX: v2f2d_ext_vec
|
||||||
|
; AVX: vcvtps2pd
|
||||||
|
%f1 = fpext <2 x float> %v1 to <2 x double>
|
||||||
|
ret <2 x double> %f1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: v3f2d_ext_vec
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; CHECK: movhlps
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; AVX: v3f2d_ext_vec
|
||||||
|
; AVX: vcvtps2pd
|
||||||
|
; AVX: ret
|
||||||
|
%f1 = fpext <3 x float> %v1 to <3 x double>
|
||||||
|
ret <3 x double> %f1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <4 x double> @v4f2d_ext_vec(<4 x float> %v1) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: v4f2d_ext_vec
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; CHECK: movhlps
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; AVX: v4f2d_ext_vec
|
||||||
|
; AVX: vcvtps2pd
|
||||||
|
; AVX: ret
|
||||||
|
%f1 = fpext <4 x float> %v1 to <4 x double>
|
||||||
|
ret <4 x double> %f1
|
||||||
|
}
|
||||||
|
|
||||||
|
define <8 x double> @v8f2d_ext_vec(<8 x float> %v1) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: v8f2d_ext_vec
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; CHECK: movhlps
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; CHECK: movhlps
|
||||||
|
; CHECK: cvtps2pd
|
||||||
|
; AVX: v8f2d_ext_vec
|
||||||
|
; AVX: vcvtps2pd
|
||||||
|
; AVX: vextractf128
|
||||||
|
; AVX: vcvtps2pd
|
||||||
|
; AVX: ret
|
||||||
|
%f1 = fpext <8 x float> %v1 to <8 x double>
|
||||||
|
ret <8 x double> %f1
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user