diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b639b7c9cf7..110812c4371 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3527,16 +3527,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, SmallVector OutChains; uint64_t DstOff = 0; unsigned NumMemOps = MemOps.size(); + + // Find the largest store and generate the bit pattern for it. + EVT LargestVT = MemOps[0]; + for (unsigned i = 1; i < NumMemOps; i++) + if (MemOps[i].bitsGT(LargestVT)) + LargestVT = MemOps[i]; + SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl); + for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; - unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value = getMemsetValue(Src, VT, DAG, dl); + + // If this store is smaller than the largest store see whether we can get + // the smaller value for free with a truncate. + SDValue Value = MemSetValue; + if (VT.bitsLT(LargestVT)) { + if (!LargestVT.isVector() && !VT.isVector() && + TLI.isTruncateFree(LargestVT, VT)) + Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue); + else + Value = getMemsetValue(Src, VT, DAG, dl); + } + assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore(Chain, dl, Value, getMemBasePlusOffset(Dst, DstOff, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); - DstOff += VTSize; + DstOff += VT.getSizeInBits() / 8; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, diff --git a/lib/Target/X86/README-X86-64.txt b/lib/Target/X86/README-X86-64.txt index 78c4dc00ee7..e21d69a7bcb 100644 --- a/lib/Target/X86/README-X86-64.txt +++ b/lib/Target/X86/README-X86-64.txt @@ -41,50 +41,6 @@ saved a few instructions. //===---------------------------------------------------------------------===// -Poor codegen: - -int X[2]; -int b; -void test(void) { - memset(X, b, 2*sizeof(X[0])); -} - -llc: - movq _b@GOTPCREL(%rip), %rax - movzbq (%rax), %rax - movq %rax, %rcx - shlq $8, %rcx - orq %rax, %rcx - movq %rcx, %rax - shlq $16, %rax - orq %rcx, %rax - movq %rax, %rcx - shlq $32, %rcx - movq _X@GOTPCREL(%rip), %rdx - orq %rax, %rcx - movq %rcx, (%rdx) - ret - -gcc: - movq _b@GOTPCREL(%rip), %rax - movabsq $72340172838076673, %rdx - movzbq (%rax), %rax - imulq %rdx, %rax - movq _X@GOTPCREL(%rip), %rdx - movq %rax, (%rdx) - ret - -And the codegen is even worse for the following -(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103): - void fill1(char *s, int a) - { - __builtin_memset(s, a, 15); - } - -For this version, we duplicate the computation of the constant to store. - -//===---------------------------------------------------------------------===// - It's not possible to reference AH, BH, CH, and DH registers in an instruction requiring REX prefix. However, divb and mulb both produce results in AH. If isel emits a CopyFromReg which gets turned into a movb and that can be allocated a diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll index 11f619cd6da..ae6b6e9772b 100644 --- a/test/CodeGen/X86/memset-2.ll +++ b/test/CodeGen/X86/memset-2.ll @@ -28,3 +28,12 @@ entry: ; CHECK: imull $16843009 } +define void @t4(i8* nocapture %s, i8 %a) nounwind { +entry: + tail call void @llvm.memset.p0i8.i32(i8* %s, i8 %a, i32 15, i32 1, i1 false) + ret void +; CHECK: t4: +; CHECK: imull $16843009 +; CHECK-NOT: imul +; CHECK: ret +}