A bit more memset / memcpy optimization.

Turns them into calls to memset / memcpy if 1) buffer(s) are not DWORD aligned,
2) size is not known to be greater or equal to some minimum value (currently 128).


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26224 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-02-16 00:21:07 +00:00
parent a48654ef23
commit 18a8452f3d
3 changed files with 53 additions and 7 deletions

View File

@ -1772,6 +1772,25 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
if (Align == 0) Align = 1;
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
// If not DWORD aligned, call memset if size is less than the threshold.
// It knows how to align to the right boundary first.
if ((Align & 3) != 0 &&
!(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData().getIntPtrType();
std::vector<std::pair<SDOperand, const Type*> > Args;
Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
// Extend the ubyte argument to be an int value for the call.
SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
Args.push_back(std::make_pair(Val, IntPtrTy));
Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
std::pair<SDOperand,SDOperand> CallResult =
LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
return CallResult.second;
}
MVT::ValueType AVT;
SDOperand Count;
if (ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
@ -1782,7 +1801,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
if (I)
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
@ -1792,7 +1811,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
break;
case 0: // DWORD aligned
AVT = MVT::i32;
if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
if (I)
Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
@ -1812,7 +1831,7 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
InFlag);
InFlag = Chain.getValue(1);
} else {
AVT = MVT::i8;
AVT = MVT::i8;
Count = Op.getOperand(3);
Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
InFlag = Chain.getValue(1);
@ -1832,20 +1851,36 @@ SDOperand X86TargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) {
(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
if (Align == 0) Align = 1;
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
// If not DWORD aligned, call memcpy if size is less than the threshold.
// It knows how to align to the right boundary first.
if ((Align & 3) != 0 &&
!(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData().getIntPtrType();
std::vector<std::pair<SDOperand, const Type*> > Args;
Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
std::pair<SDOperand,SDOperand> CallResult =
LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
return CallResult.second;
}
MVT::ValueType AVT;
SDOperand Count;
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
if (I)
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
DAG.getConstant(1, MVT::i8));
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
break;
case 0: // DWORD aligned
AVT = MVT::i32;
if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
if (I)
Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),

View File

@ -146,6 +146,8 @@ static const char *GetCurrentX86CPU() {
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
stackAlignment = 8;
// FIXME: this is a known good value for Yonah. Not sure about others.
MinRepStrSizeThreshold = 128;
indirectExternAndWeakGlobals = false;
X86SSELevel = NoMMXSSE;
X863DNowLevel = NoThreeDNow;

View File

@ -44,6 +44,9 @@ protected:
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
/// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
unsigned MinRepStrSizeThreshold;
/// Used by instruction selector
bool indirectExternAndWeakGlobals;
@ -62,6 +65,12 @@ public:
/// function for this subtarget.
unsigned getStackAlignment() const { return stackAlignment; }
/// getMinRepStrSizeThreshold - Returns the minimum memset / memcpy size
/// required to turn the operation into a X86 rep/movs or rep/stos
/// instruction. This is only used if the src / dst alignment is not DWORD
/// aligned.
unsigned getMinRepStrSizeThreshold() const { return MinRepStrSizeThreshold; }
/// Returns true if the instruction selector should treat global values
/// referencing external or weak symbols as indirect rather than direct
/// references.