diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 88502549a59..4e66738f680 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -325,6 +325,44 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI); } +static bool +IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, + int64_t &Offset) +{ + if (Addr.getOpcode() != ISD::ADD) { + return false; + } + ConstantSDNode *CN = 0; + if (!(CN = dyn_cast(Addr.getOperand(1)))) { + return false; + } + int64_t off = CN->getSExtValue(); + const SDValue &Base = Addr.getOperand(0); + const SDValue *Root = &Base; + if (Base.getOpcode() == ISD::ADD && + Base.getOperand(1).getOpcode() == ISD::SHL) { + ConstantSDNode *CN = dyn_cast(Base.getOperand(1) + .getOperand(1)); + if (CN && (CN->getSExtValue() >= 2)) { + Root = &Base.getOperand(0); + } + } + if (isa(*Root)) { + // All frame indicies are word aligned + AlignedBase = Base; + Offset = off; + return true; + } + if (Root->getOpcode() == XCoreISD::DPRelativeWrapper || + Root->getOpcode() == XCoreISD::CPRelativeWrapper) { + // All dp / cp relative addresses are word aligned + AlignedBase = Base; + Offset = off; + return true; + } + return false; +} + SDValue XCoreTargetLowering:: LowerLOAD(SDValue Op, SelectionDAG &DAG) { @@ -344,6 +382,61 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) SDValue BasePtr = LD->getBasePtr(); DebugLoc dl = Op.getDebugLoc(); + SDValue Base; + int64_t Offset; + if (!LD->isVolatile() && + IsWordAlignedBasePlusConstantOffset(BasePtr, Base, Offset)) { + if (Offset % 4 == 0) { + // We've managed to infer better alignment information than the load + // already has. Use an aligned load. + return DAG.getLoad(getPointerTy(), dl, Chain, BasePtr, NULL, 4); + } + // Lower to + // ldw low, base[offset >> 2] + // ldw high, base[(offset >> 2) + 1] + // shr low_shifted, low, (offset & 0x3) * 8 + // shl high_shifted, high, 32 - (offset & 0x3) * 8 + // or result, low_shifted, high_shifted + SDValue LowOffset = DAG.getConstant(Offset & ~0x3, MVT::i32); + SDValue HighOffset = DAG.getConstant((Offset & ~0x3) + 4, MVT::i32); + SDValue LowShift = DAG.getConstant((Offset & 0x3) * 8, MVT::i32); + SDValue HighShift = DAG.getConstant(32 - (Offset & 0x3) * 8, MVT::i32); + + SDValue LowAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, LowOffset); + SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, Base, HighOffset); + + SDValue Low = DAG.getLoad(getPointerTy(), dl, Chain, + LowAddr, NULL, 4); + SDValue High = DAG.getLoad(getPointerTy(), dl, Chain, + HighAddr, NULL, 4); + SDValue LowShifted = DAG.getNode(ISD::SRL, dl, MVT::i32, Low, LowShift); + SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, HighShift); + SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, LowShifted, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1), + High.getValue(1)); + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } + + if (LD->getAlignment() == 2) { + int SVOffset = LD->getSrcValueOffset(); + SDValue Low = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, Chain, + BasePtr, LD->getSrcValue(), SVOffset, MVT::i16, + LD->isVolatile(), 2); + SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, + DAG.getConstant(1, MVT::i32)); + SDValue High = DAG.getExtLoad(ISD::EXTLOAD, dl, MVT::i32, Chain, + HighAddr, LD->getSrcValue(), SVOffset + 2, + MVT::i16, LD->isVolatile(), 2); + SDValue HighShifted = DAG.getNode(ISD::SHL, dl, MVT::i32, High, + DAG.getConstant(16, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, MVT::i32, Low, HighShifted); + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Low.getValue(1), + High.getValue(1)); + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); + } + // Lower to a call to __misaligned_load(BasePtr). const Type *IntPtrTy = getTargetData()->getIntPtrType(); TargetLowering::ArgListTy Args; @@ -385,6 +478,22 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) SDValue Value = ST->getValue(); DebugLoc dl = Op.getDebugLoc(); + if (ST->getAlignment() == 2) { + int SVOffset = ST->getSrcValueOffset(); + SDValue Low = Value; + SDValue High = DAG.getNode(ISD::SRL, dl, MVT::i32, Value, + DAG.getConstant(16, MVT::i32)); + SDValue StoreLow = DAG.getTruncStore(Chain, dl, Low, BasePtr, + ST->getSrcValue(), SVOffset, MVT::i16, + ST->isVolatile(), 2); + SDValue HighAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, + DAG.getConstant(1, MVT::i32)); + SDValue StoreHigh = DAG.getTruncStore(Chain, dl, High, HighAddr, + ST->getSrcValue(), SVOffset + 2, + MVT::i16, ST->isVolatile(), 2); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, StoreLow, StoreHigh); + } + // Lower to a call to __misaligned_store(BasePtr, Value). const Type *IntPtrTy = getTargetData()->getIntPtrType(); TargetLowering::ArgListTy Args; diff --git a/test/CodeGen/XCore/unaligned_load.ll b/test/CodeGen/XCore/unaligned_load.ll index a6a50893b91..c1372ed42f9 100644 --- a/test/CodeGen/XCore/unaligned_load.ll +++ b/test/CodeGen/XCore/unaligned_load.ll @@ -1,5 +1,11 @@ ; RUN: llvm-as < %s | llc -march=xcore > %t1.s ; RUN: grep "bl __misaligned_load" %t1.s | count 1 +; RUN: grep ld16s %t1.s | count 2 +; RUN: grep ldw %t1.s | count 2 +; RUN: grep shl %t1.s | count 2 +; RUN: grep shr %t1.s | count 1 +; RUN: grep zext %t1.s | count 1 +; RUN: grep "or " %t1.s | count 2 ; Byte aligned load. Expands to call to __misaligned_load. define i32 @align1(i32* %p) nounwind { @@ -7,3 +13,19 @@ entry: %0 = load i32* %p, align 1 ; [#uses=1] ret i32 %0 } + +; Half word aligned load. Expands to two 16bit loads. +define i32 @align2(i32* %p) nounwind { +entry: + %0 = load i32* %p, align 2 ; [#uses=1] + ret i32 %0 +} + +@a = global [5 x i8] zeroinitializer, align 4 + +; Constant offset from word aligned base. Expands to two 32bit loads. +define i32 @align3() nounwind { +entry: + %0 = load i32* bitcast (i8* getelementptr ([5 x i8]* @a, i32 0, i32 1) to i32*), align 1 + ret i32 %0 +} diff --git a/test/CodeGen/XCore/unaligned_store.ll b/test/CodeGen/XCore/unaligned_store.ll index b7a519299fd..120d6529ece 100644 --- a/test/CodeGen/XCore/unaligned_store.ll +++ b/test/CodeGen/XCore/unaligned_store.ll @@ -1,5 +1,7 @@ ; RUN: llvm-as < %s | llc -march=xcore > %t1.s ; RUN: grep "bl __misaligned_store" %t1.s | count 1 +; RUN: grep st16 %t1.s | count 2 +; RUN: grep shr %t1.s | count 1 ; Byte aligned store. Expands to call to __misaligned_store. define void @align1(i32* %p, i32 %val) nounwind { @@ -7,3 +9,10 @@ entry: store i32 %val, i32* %p, align 1 ret void } + +; Half word aligned store. Expands to two 16bit stores. +define void @align2(i32* %p, i32 %val) nounwind { +entry: + store i32 %val, i32* %p, align 2 + ret void +}