mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-17 03:30:28 +00:00
Implement rdar://7860110 (also in target/readme.txt) narrowing
a load/or/and/store sequence into a narrower store when it is safe. Daniel tells me that clang will start producing this sort of thing with bitfields, and this does trigger a few dozen times on 176.gcc produced by llvm-gcc even now. This compiles code like CodeGen/X86/2009-05-28-DAGCombineCrash.ll into: movl %eax, 36(%rdi) instead of: movl $4294967295, %eax ## imm = 0xFFFFFFFF andq 32(%rdi), %rax shlq $32, %rcx addq %rax, %rcx movq %rcx, 32(%rdi) and each of the testcases into a single store. Each of them used to compile into craziness like this: _test4: movl $65535, %eax ## imm = 0xFFFF andl (%rdi), %eax shll $16, %esi addl %eax, %esi movl %esi, (%rdi) ret git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101343 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ae541aad5c
commit
2392ae7d73
@ -254,24 +254,28 @@ namespace {
|
||||
/// looking for a better chain (aliasing node.)
|
||||
SDValue FindBetterChain(SDNode *N, SDValue Chain);
|
||||
|
||||
/// getShiftAmountTy - Returns a type large enough to hold any valid
|
||||
/// shift amount - before type legalization these can be huge.
|
||||
EVT getShiftAmountTy() {
|
||||
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
|
||||
}
|
||||
|
||||
public:
|
||||
public:
|
||||
DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
|
||||
: DAG(D),
|
||||
TLI(D.getTargetLoweringInfo()),
|
||||
Level(Unrestricted),
|
||||
OptLevel(OL),
|
||||
LegalOperations(false),
|
||||
LegalTypes(false),
|
||||
AA(A) {}
|
||||
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
|
||||
OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
|
||||
|
||||
/// Run - runs the dag combiner on all nodes in the work list
|
||||
void Run(CombineLevel AtLevel);
|
||||
|
||||
SelectionDAG &getDAG() const { return DAG; }
|
||||
|
||||
/// getShiftAmountTy - Returns a type large enough to hold any valid
|
||||
/// shift amount - before type legalization these can be huge.
|
||||
EVT getShiftAmountTy() {
|
||||
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
|
||||
}
|
||||
|
||||
/// isTypeLegal - This method returns true if we are running before type
|
||||
/// legalization or if the specified VT is legal.
|
||||
bool isTypeLegal(const EVT &VT) {
|
||||
if (!LegalTypes) return true;
|
||||
return TLI.isTypeLegal(VT);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@ -3950,7 +3954,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
|
||||
VT.isInteger() && !VT.isVector()) {
|
||||
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
|
||||
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
|
||||
if (TLI.isTypeLegal(IntXVT) || !LegalTypes) {
|
||||
if (isTypeLegal(IntXVT)) {
|
||||
SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
|
||||
IntXVT, N0.getOperand(1));
|
||||
AddToWorkList(X.getNode());
|
||||
@ -4465,7 +4469,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
|
||||
ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
|
||||
|
||||
// fold (fp_round_inreg c1fp) -> c1fp
|
||||
if (N0CFP && (TLI.isTypeLegal(EVT) || !LegalTypes)) {
|
||||
if (N0CFP && isTypeLegal(EVT)) {
|
||||
SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
|
||||
return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
|
||||
}
|
||||
@ -5146,6 +5150,123 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
|
||||
/// load is having specific bytes cleared out. If so, return the byte size
|
||||
/// being masked out and the shift amount.
|
||||
static std::pair<unsigned, unsigned>
|
||||
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
|
||||
std::pair<unsigned, unsigned> Result(0, 0);
|
||||
|
||||
// Check for the structure we're looking for.
|
||||
if (V->getOpcode() != ISD::AND ||
|
||||
!isa<ConstantSDNode>(V->getOperand(1)) ||
|
||||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
|
||||
return Result;
|
||||
|
||||
// Check the chain and pointer. The store should be chained directly to the
|
||||
// load (TODO: Or through a TF node!) since it's to the same address.
|
||||
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
|
||||
if (LD->getBasePtr() != Ptr ||
|
||||
V->getOperand(0).getNode() != Chain.getNode())
|
||||
return Result;
|
||||
|
||||
// This only handles simple types.
|
||||
if (V.getValueType() != MVT::i16 &&
|
||||
V.getValueType() != MVT::i32 &&
|
||||
V.getValueType() != MVT::i64)
|
||||
return Result;
|
||||
|
||||
// Check the constant mask. Invert it so that the bits being masked out are
|
||||
// 0 and the bits being kept are 1. Use getSExtValue so that leading bits
|
||||
// follow the sign bit for uniformity.
|
||||
uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
|
||||
unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
|
||||
if (NotMaskLZ & 7) return Result; // Must be multiple of a byte.
|
||||
unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
|
||||
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
|
||||
if (NotMaskLZ == 64) return Result; // All zero mask.
|
||||
|
||||
// See if we have a continuous run of bits. If so, we have 0*1+0*
|
||||
if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
|
||||
return Result;
|
||||
|
||||
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
|
||||
if (V.getValueType() != MVT::i64 && NotMaskLZ)
|
||||
NotMaskLZ -= 64-V.getValueSizeInBits();
|
||||
|
||||
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
|
||||
switch (MaskedBytes) {
|
||||
case 1:
|
||||
case 2:
|
||||
case 4: break;
|
||||
default: return Result; // All one mask, or 5-byte mask.
|
||||
}
|
||||
|
||||
// Verify that the first bit starts at a multiple of mask so that the access
|
||||
// is aligned the same as the access width.
|
||||
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
|
||||
|
||||
Result.first = MaskedBytes;
|
||||
Result.second = NotMaskTZ/8;
|
||||
return Result;
|
||||
}
|
||||
|
||||
|
||||
/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
|
||||
/// provides a value as specified by MaskInfo. If so, replace the specified
|
||||
/// store with a narrower store of truncated IVal.
|
||||
static SDNode *
|
||||
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
|
||||
SDValue IVal, StoreSDNode *St,
|
||||
DAGCombiner *DC) {
|
||||
unsigned NumBytes = MaskInfo.first;
|
||||
unsigned ByteShift = MaskInfo.second;
|
||||
SelectionDAG &DAG = DC->getDAG();
|
||||
|
||||
// Check to see if IVal is all zeros in the part being masked in by the 'or'
|
||||
// that uses this. If not, this is not a replacement.
|
||||
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
|
||||
ByteShift*8, (ByteShift+NumBytes)*8);
|
||||
if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
|
||||
|
||||
// Check that it is legal on the target to do this. It is legal if the new
|
||||
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
|
||||
// legalization.
|
||||
MVT VT = MVT::getIntegerVT(NumBytes*8);
|
||||
if (!DC->isTypeLegal(VT))
|
||||
return 0;
|
||||
|
||||
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
|
||||
// shifted by ByteShift and truncated down to NumBytes.
|
||||
if (ByteShift)
|
||||
IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
|
||||
DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
|
||||
|
||||
// Figure out the offset for the store and the alignment of the access.
|
||||
unsigned StOffset;
|
||||
unsigned NewAlign = St->getAlignment();
|
||||
|
||||
if (DAG.getTargetLoweringInfo().isLittleEndian())
|
||||
StOffset = ByteShift;
|
||||
else
|
||||
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
|
||||
|
||||
SDValue Ptr = St->getBasePtr();
|
||||
if (StOffset) {
|
||||
Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
|
||||
Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
|
||||
NewAlign = MinAlign(NewAlign, StOffset);
|
||||
}
|
||||
|
||||
// Truncate down to the new size.
|
||||
IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
|
||||
|
||||
++OpsNarrowed;
|
||||
return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
|
||||
St->getSrcValue(), St->getSrcValueOffset()+StOffset,
|
||||
false, false, NewAlign).getNode();
|
||||
}
|
||||
|
||||
|
||||
/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
|
||||
/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
|
||||
@ -5165,6 +5286,28 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
||||
return SDValue();
|
||||
|
||||
unsigned Opc = Value.getOpcode();
|
||||
|
||||
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
|
||||
// is a byte mask indicating a consecutive number of bytes, check to see if
|
||||
// Y is known to provide just those bytes. If so, we try to replace the
|
||||
// load + replace + store sequence with a single (narrower) store, which makes
|
||||
// the load dead.
|
||||
if (Opc == ISD::OR) {
|
||||
std::pair<unsigned, unsigned> MaskedLoad;
|
||||
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
|
||||
if (MaskedLoad.first)
|
||||
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
|
||||
Value.getOperand(1), ST,this))
|
||||
return SDValue(NewST, 0);
|
||||
|
||||
// Or is commutative, so try swapping X and Y.
|
||||
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
|
||||
if (MaskedLoad.first)
|
||||
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
|
||||
Value.getOperand(0), ST,this))
|
||||
return SDValue(NewST, 0);
|
||||
}
|
||||
|
||||
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
|
||||
Value.getOperand(1).getOpcode() != ISD::Constant)
|
||||
return SDValue();
|
||||
@ -5212,8 +5355,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
|
||||
PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
|
||||
|
||||
unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
|
||||
if (NewAlign <
|
||||
TLI.getTargetData()->getABITypeAlignment(NewVT.getTypeForEVT(*DAG.getContext())))
|
||||
const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
|
||||
if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
|
||||
return SDValue();
|
||||
|
||||
SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
|
||||
@ -5283,8 +5426,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
||||
case MVT::ppcf128:
|
||||
break;
|
||||
case MVT::f32:
|
||||
if (((TLI.isTypeLegal(MVT::i32) || !LegalTypes) && !LegalOperations &&
|
||||
!ST->isVolatile()) ||
|
||||
if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
|
||||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
|
||||
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
|
||||
bitcastToAPInt().getZExtValue(), MVT::i32);
|
||||
@ -5295,7 +5437,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
|
||||
}
|
||||
break;
|
||||
case MVT::f64:
|
||||
if (((TLI.isTypeLegal(MVT::i64) || !LegalTypes) && !LegalOperations &&
|
||||
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
|
||||
!ST->isVolatile()) ||
|
||||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
|
||||
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
|
||||
@ -5660,7 +5802,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
|
||||
}
|
||||
|
||||
// Add count and size info.
|
||||
if (!TLI.isTypeLegal(VT) && LegalTypes)
|
||||
if (!isTypeLegal(VT))
|
||||
return SDValue();
|
||||
|
||||
// Return the new VECTOR_SHUFFLE node.
|
||||
|
@ -263,19 +263,6 @@ if anyone cared enough about sincos.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Turn this into a single byte store with no load (the other 3 bytes are
|
||||
unmodified):
|
||||
|
||||
define void @test(i32* %P) {
|
||||
%tmp = load i32* %P
|
||||
%tmp14 = or i32 %tmp, 3305111552
|
||||
%tmp15 = and i32 %tmp14, 3321888767
|
||||
store i32 %tmp15, i32* %P
|
||||
ret void
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
quantum_sigma_x in 462.libquantum contains the following loop:
|
||||
|
||||
for(i=0; i<reg->size; i++)
|
||||
|
81
test/CodeGen/X86/store-narrow.ll
Normal file
81
test/CodeGen/X86/store-narrow.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; rdar://7860110
|
||||
; RUN: llc < %s | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
||||
target triple = "x86_64-apple-darwin10.2"
|
||||
|
||||
define void @test1(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
||||
entry:
|
||||
%A = load i32* %a0, align 4
|
||||
%B = and i32 %A, -256 ; 0xFFFFFF00
|
||||
%C = zext i8 %a1 to i32
|
||||
%D = or i32 %C, %B
|
||||
store i32 %D, i32* %a0, align 4
|
||||
ret void
|
||||
|
||||
; CHECK: test1:
|
||||
; CHECK: movb %sil, (%rdi)
|
||||
}
|
||||
|
||||
define void @test2(i32* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
||||
entry:
|
||||
%A = load i32* %a0, align 4
|
||||
%B = and i32 %A, -65281 ; 0xFFFF00FF
|
||||
%C = zext i8 %a1 to i32
|
||||
%CS = shl i32 %C, 8
|
||||
%D = or i32 %B, %CS
|
||||
store i32 %D, i32* %a0, align 4
|
||||
ret void
|
||||
; CHECK: test2:
|
||||
; CHECK: movb %sil, 1(%rdi)
|
||||
}
|
||||
|
||||
define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
||||
entry:
|
||||
%A = load i32* %a0, align 4
|
||||
%B = and i32 %A, -65536 ; 0xFFFF0000
|
||||
%C = zext i16 %a1 to i32
|
||||
%D = or i32 %B, %C
|
||||
store i32 %D, i32* %a0, align 4
|
||||
ret void
|
||||
; CHECK: test3:
|
||||
; CHECK: movw %si, (%rdi)
|
||||
}
|
||||
|
||||
define void @test4(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
||||
entry:
|
||||
%A = load i32* %a0, align 4
|
||||
%B = and i32 %A, 65535 ; 0x0000FFFF
|
||||
%C = zext i16 %a1 to i32
|
||||
%CS = shl i32 %C, 16
|
||||
%D = or i32 %B, %CS
|
||||
store i32 %D, i32* %a0, align 4
|
||||
ret void
|
||||
; CHECK: test4:
|
||||
; CHECK: movw %si, 2(%rdi)
|
||||
}
|
||||
|
||||
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
|
||||
entry:
|
||||
%A = load i64* %a0, align 4
|
||||
%B = and i64 %A, -4294901761 ; 0xFFFFFFFF0000FFFF
|
||||
%C = zext i16 %a1 to i64
|
||||
%CS = shl i64 %C, 16
|
||||
%D = or i64 %B, %CS
|
||||
store i64 %D, i64* %a0, align 4
|
||||
ret void
|
||||
; CHECK: test5:
|
||||
; CHECK: movw %si, 2(%rdi)
|
||||
}
|
||||
|
||||
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
|
||||
entry:
|
||||
%A = load i64* %a0, align 4
|
||||
%B = and i64 %A, -280375465082881 ; 0xFFFF00FFFFFFFFFF
|
||||
%C = zext i8 %a1 to i64
|
||||
%CS = shl i64 %C, 40
|
||||
%D = or i64 %B, %CS
|
||||
store i64 %D, i64* %a0, align 4
|
||||
ret void
|
||||
; CHECK: test6:
|
||||
; CHECK: movb %sil, 5(%rdi)
|
||||
}
|
Loading…
Reference in New Issue
Block a user