mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-31 11:32:42 +00:00
Try to reuse the value when lowering memset.
This allows us to compile: void test(char *s, int a) { __builtin_memset(s, a, 15); } into 1 mul + 3 stores instead of 3 muls + 3 stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122710 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8c06aa1c59
commit
80220369b0
@ -3527,16 +3527,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
|
||||
SmallVector<SDValue, 8> OutChains;
|
||||
uint64_t DstOff = 0;
|
||||
unsigned NumMemOps = MemOps.size();
|
||||
|
||||
// Find the largest store and generate the bit pattern for it.
|
||||
EVT LargestVT = MemOps[0];
|
||||
for (unsigned i = 1; i < NumMemOps; i++)
|
||||
if (MemOps[i].bitsGT(LargestVT))
|
||||
LargestVT = MemOps[i];
|
||||
SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
|
||||
|
||||
for (unsigned i = 0; i < NumMemOps; i++) {
|
||||
EVT VT = MemOps[i];
|
||||
unsigned VTSize = VT.getSizeInBits() / 8;
|
||||
SDValue Value = getMemsetValue(Src, VT, DAG, dl);
|
||||
|
||||
// If this store is smaller than the largest store see whether we can get
|
||||
// the smaller value for free with a truncate.
|
||||
SDValue Value = MemSetValue;
|
||||
if (VT.bitsLT(LargestVT)) {
|
||||
if (!LargestVT.isVector() && !VT.isVector() &&
|
||||
TLI.isTruncateFree(LargestVT, VT))
|
||||
Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
|
||||
else
|
||||
Value = getMemsetValue(Src, VT, DAG, dl);
|
||||
}
|
||||
assert(Value.getValueType() == VT && "Value with wrong type.");
|
||||
SDValue Store = DAG.getStore(Chain, dl, Value,
|
||||
getMemBasePlusOffset(Dst, DstOff, DAG),
|
||||
DstPtrInfo.getWithOffset(DstOff),
|
||||
isVol, false, Align);
|
||||
OutChains.push_back(Store);
|
||||
DstOff += VTSize;
|
||||
DstOff += VT.getSizeInBits() / 8;
|
||||
}
|
||||
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
|
||||
|
@ -41,50 +41,6 @@ saved a few instructions.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Poor codegen:
|
||||
|
||||
int X[2];
|
||||
int b;
|
||||
void test(void) {
|
||||
memset(X, b, 2*sizeof(X[0]));
|
||||
}
|
||||
|
||||
llc:
|
||||
movq _b@GOTPCREL(%rip), %rax
|
||||
movzbq (%rax), %rax
|
||||
movq %rax, %rcx
|
||||
shlq $8, %rcx
|
||||
orq %rax, %rcx
|
||||
movq %rcx, %rax
|
||||
shlq $16, %rax
|
||||
orq %rcx, %rax
|
||||
movq %rax, %rcx
|
||||
shlq $32, %rcx
|
||||
movq _X@GOTPCREL(%rip), %rdx
|
||||
orq %rax, %rcx
|
||||
movq %rcx, (%rdx)
|
||||
ret
|
||||
|
||||
gcc:
|
||||
movq _b@GOTPCREL(%rip), %rax
|
||||
movabsq $72340172838076673, %rdx
|
||||
movzbq (%rax), %rax
|
||||
imulq %rdx, %rax
|
||||
movq _X@GOTPCREL(%rip), %rdx
|
||||
movq %rax, (%rdx)
|
||||
ret
|
||||
|
||||
And the codegen is even worse for the following
|
||||
(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
|
||||
void fill1(char *s, int a)
|
||||
{
|
||||
__builtin_memset(s, a, 15);
|
||||
}
|
||||
|
||||
For this version, we duplicate the computation of the constant to store.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
It's not possible to reference AH, BH, CH, and DH registers in an instruction
|
||||
requiring REX prefix. However, divb and mulb both produce results in AH. If isel
|
||||
emits a CopyFromReg which gets turned into a movb and that can be allocated a
|
||||
|
@ -28,3 +28,12 @@ entry:
|
||||
; CHECK: imull $16843009
|
||||
}
|
||||
|
||||
define void @t4(i8* nocapture %s, i8 %a) nounwind {
|
||||
entry:
|
||||
tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false)
|
||||
ret void
|
||||
; CHECK: t4:
|
||||
; CHECK: imull $16843009
|
||||
; CHECK-NOT: imul
|
||||
; CHECK: ret
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user