Lower the i8 extension in memset to a multiply instead of a potentially long series of shifts and ors.

We could implement a DAGCombine to turn x * 0x0101 back into logic operations
on targets that doesn't support the multiply or it is slow (p4) if someone cares
enough.

Example code:
  void test(char *s, int a) {
      __builtin_memset(s, a, 4);
  }
before:
  _test:                                  ## @test
    movzbl  8(%esp), %eax
    movl  %eax, %ecx
    shll  $8, %ecx
    orl %eax, %ecx
    movl  %ecx, %eax
    shll  $16, %eax
    orl %ecx, %eax
    movl  4(%esp), %ecx
    movl  %eax, 4(%ecx)
    movl  %eax, (%ecx)
    ret
after:
  _test:                                  ## @test
    movzbl  8(%esp), %eax
    imull $16843009, %eax, %eax   ## imm = 0x1010101
    movl  4(%esp), %ecx
    movl  %eax, 4(%ecx)
    movl  %eax, (%ecx)
    ret


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122707 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2011-01-02 19:44:58 +00:00
parent 3458534f11
commit 8c06aa1c59
2 changed files with 28 additions and 15 deletions

View File

@ -3132,6 +3132,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
/// SplatByte - Distribute ByteVal over NumBits bits.
static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
APInt Val = APInt(NumBits, ByteVal);
unsigned Shift = 8;
for (unsigned i = NumBits; i > 8; i >>= 1) {
Val = (Val << Shift) | Val;
Shift <<= 1;
}
return Val;
}
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@ -3140,27 +3151,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
APInt Val = APInt(NumBits, C->getZExtValue() & 255);
unsigned Shift = 8;
for (unsigned i = NumBits; i > 8; i >>= 1) {
Val = (Val << Shift) | Val;
Shift <<= 1;
}
APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
if (VT.isInteger())
return DAG.getConstant(Val, VT);
return DAG.getConstantFP(APFloat(Val), VT);
}
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
unsigned Shift = 8;
for (unsigned i = NumBits; i > 8; i >>= 1) {
Value = DAG.getNode(ISD::OR, dl, VT,
DAG.getNode(ISD::SHL, dl, VT, Value,
DAG.getConstant(Shift,
TLI.getShiftAmountTy())),
Value);
Shift <<= 1;
if (NumBits > 8) {
// Use a multiplication with 0x010101... to extend the input to the
// required length.
APInt Magic = SplatByte(NumBits, 0x01);
Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
return Value;

View File

@ -17,3 +17,14 @@ entry:
call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
unreachable
}
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
define void @t3(i8* nocapture %s, i8 %a) nounwind {
entry:
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 8, i32 1, i1 false)
ret void
; CHECK: t3:
; CHECK: imull $16843009
}