1
0
mirror of https://github.com/c64scene-ar/llvm-6502.git synced 2025-03-31 11:32:42 +00:00

Try to reuse the value when lowering memset.

This allows us to compile:
  void test(char *s, int a) {
    __builtin_memset(s, a, 15);
  }
into 1 mul + 3 stores instead of 3 muls + 3 stores.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122710 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2011-01-02 19:57:05 +00:00
parent 8c06aa1c59
commit 80220369b0
3 changed files with 30 additions and 47 deletions
lib
CodeGen/SelectionDAG
Target/X86
test/CodeGen/X86

@ -3527,16 +3527,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
// Find the largest store and generate the bit pattern for it.
EVT LargestVT = MemOps[0];
for (unsigned i = 1; i < NumMemOps; i++)
if (MemOps[i].bitsGT(LargestVT))
LargestVT = MemOps[i];
SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
unsigned VTSize = VT.getSizeInBits() / 8;
SDValue Value = getMemsetValue(Src, VT, DAG, dl);
// If this store is smaller than the largest store see whether we can get
// the smaller value for free with a truncate.
SDValue Value = MemSetValue;
if (VT.bitsLT(LargestVT)) {
if (!LargestVT.isVector() && !VT.isVector() &&
TLI.isTruncateFree(LargestVT, VT))
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
else
Value = getMemsetValue(Src, VT, DAG, dl);
}
assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
DstPtrInfo.getWithOffset(DstOff),
isVol, false, Align);
OutChains.push_back(Store);
DstOff += VTSize;
DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,

@ -41,50 +41,6 @@ saved a few instructions.
//===---------------------------------------------------------------------===//
Poor codegen:
int X[2];
int b;
void test(void) {
memset(X, b, 2*sizeof(X[0]));
}
llc:
movq _b@GOTPCREL(%rip), %rax
movzbq (%rax), %rax
movq %rax, %rcx
shlq $8, %rcx
orq %rax, %rcx
movq %rcx, %rax
shlq $16, %rax
orq %rcx, %rax
movq %rax, %rcx
shlq $32, %rcx
movq _X@GOTPCREL(%rip), %rdx
orq %rax, %rcx
movq %rcx, (%rdx)
ret
gcc:
movq _b@GOTPCREL(%rip), %rax
movabsq $72340172838076673, %rdx
movzbq (%rax), %rax
imulq %rdx, %rax
movq _X@GOTPCREL(%rip), %rdx
movq %rax, (%rdx)
ret
And the codegen is even worse for the following
(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
void fill1(char *s, int a)
{
__builtin_memset(s, a, 15);
}
For this version, we duplicate the computation of the constant to store.
//===---------------------------------------------------------------------===//
It's not possible to reference AH, BH, CH, and DH registers in an instruction
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
emits a CopyFromReg which gets turned into a movb and that can be allocated a

@ -28,3 +28,12 @@ entry:
; CHECK: imull $16843009
}
define void @t4(i8* nocapture %s, i8 %a) nounwind {
entry:
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
ret void
; CHECK: t4:
; CHECK: imull $16843009
; CHECK-NOT: imul
; CHECK: ret
}