diff --git a/lib/Target/SystemZ/README.txt b/lib/Target/SystemZ/README.txt index 4107685e307..d5361fb658c 100644 --- a/lib/Target/SystemZ/README.txt +++ b/lib/Target/SystemZ/README.txt @@ -35,10 +35,6 @@ performance measurements. -- -We don't support prefetching yet. - --- - There is no scheduling support. -- diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h index bb6ceac83b6..ea6c7d17fa5 100644 --- a/lib/Target/SystemZ/SystemZ.h +++ b/lib/Target/SystemZ/SystemZ.h @@ -57,6 +57,10 @@ namespace llvm { const unsigned CCMASK_SRST_NOTFOUND = CCMASK_2; const unsigned CCMASK_SRST = CCMASK_1 | CCMASK_2; + // Mask assignments for PFD. + const unsigned PFD_READ = 1; + const unsigned PFD_WRITE = 2; + // Return true if Val fits an LLILL operand. static inline bool isImmLL(uint64_t Val) { return (Val & ~0x000000000000ffffULL) == 0; diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 65b2ad2164f..20afab782a9 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -194,6 +194,9 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); + // Handle prefetches with PFD or PFDRL. + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + // Handle floating-point types. for (unsigned I = MVT::FIRST_FP_VALUETYPE; I <= MVT::LAST_FP_VALUETYPE; @@ -1806,6 +1809,26 @@ SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, SystemZ::R15D, Op.getOperand(1)); } +SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, + SelectionDAG &DAG) const { + bool IsData = cast(Op.getOperand(4))->getZExtValue(); + if (!IsData) + // Just preserve the chain. + return Op.getOperand(0); + + bool IsWrite = cast(Op.getOperand(2))->getZExtValue(); + unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; + MemIntrinsicSDNode *Node = cast(Op.getNode()); + SDValue Ops[] = { + Op.getOperand(0), + DAG.getConstant(Code, MVT::i32), + Op.getOperand(1) + }; + return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, SDLoc(Op), + Node->getVTList(), Ops, array_lengthof(Ops), + Node->getMemoryVT(), Node->getMemOperand()); +} + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1869,6 +1892,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op, return lowerSTACKSAVE(Op, DAG); case ISD::STACKRESTORE: return lowerSTACKRESTORE(Op, DAG); + case ISD::PREFETCH: + return lowerPREFETCH(Op, DAG); default: llvm_unreachable("Unexpected node to lower"); } @@ -1909,6 +1934,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { OPCODE(ATOMIC_LOADW_UMIN); OPCODE(ATOMIC_LOADW_UMAX); OPCODE(ATOMIC_CMP_SWAPW); + OPCODE(PREFETCH); } return NULL; #undef OPCODE diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index 604453d2dde..f6a2ce041ec 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -132,7 +132,12 @@ namespace SystemZISD { // operand into the high bits // Operand 4: the negative of operand 2, for rotating the other way // Operand 5: the width of the field in bits (8 or 16) - ATOMIC_CMP_SWAPW + ATOMIC_CMP_SWAPW, + + // Prefetch from the second operand using the 4-bit control code in + // the first operand. The code is 1 for a load prefetch and 2 for + // a store prefetch. + PREFETCH }; } @@ -225,6 +230,7 @@ private: SDValue lowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; // If the last instruction before MBBI in MBB was some form of COMPARE, // try to replace it with a COMPARE AND BRANCH just before MBBI. diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index f53833210ec..a7e18ec6812 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -540,6 +540,10 @@ class InstSS op, dag outs, dag ins, string asmstr, list pattern> // One output operand and five input operands. The first two operands // are registers and the other three are immediates. // +// Prefetch: +// One 4-bit immediate operand and one address operand. The immediate +// operand is 1 for a load prefetch and 2 for a store prefetch. +// // The format determines which input operands are tied to output operands, // and also determines the shape of any address operand. // @@ -1304,6 +1308,22 @@ class RotateSelectRIEf opcode, RegisterOperand cls1, let DisableEncoding = "$R1src"; } +class PrefetchRXY opcode, SDPatternOperator operator> + : InstRXY; + +class PrefetchRILPC opcode, + SDPatternOperator operator> + : InstRIL { + // We want PC-relative addresses to be tried ahead of BD and BDX addresses. + // However, BDXs have two extra operands and are therefore 6 units more + // complex. + let AddedComplexity = 7; +} + // A floating-point load-and test operation. Create both a normal unary // operation and one that acts as a comparison against zero. multiclass LoadAndTestRRE opcode, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index d857a572fb9..8e1f5ac3c97 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -1034,6 +1034,13 @@ let mayLoad = 1, Defs = [CC] in let mayLoad = 1, Defs = [CC], Uses = [R0W] in defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>; +//===----------------------------------------------------------------------===// +// Prefetch +//===----------------------------------------------------------------------===// + +def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>; +def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>; + //===----------------------------------------------------------------------===// // Atomic operations //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index 5745e29cd8f..e2c43d6e582 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -64,6 +64,9 @@ def SDT_ZString : SDTypeProfile<1, 3, SDTCisPtrTy<2>, SDTCisVT<3, i32>]>; def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; +def SDT_ZPrefetch : SDTypeProfile<0, 2, + [SDTCisVT<0, i8>, + SDTCisPtrTy<1>]>; //===----------------------------------------------------------------------===// // Node definitions @@ -130,6 +133,9 @@ def z_search_string : SDNode<"SystemZISD::SEARCH_STRING", SDT_ZString, [SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>; def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic, [SDNPInGlue]>; +def z_prefetch : SDNode<"SystemZISD::PREFETCH", SDT_ZPrefetch, + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; //===----------------------------------------------------------------------===// // Pattern fragments diff --git a/test/CodeGen/SystemZ/prefetch-01.ll b/test/CodeGen/SystemZ/prefetch-01.ll new file mode 100644 index 00000000000..bb7fea99ca7 --- /dev/null +++ b/test/CodeGen/SystemZ/prefetch-01.ll @@ -0,0 +1,87 @@ +; Test data prefetching. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare void @llvm.prefetch(i8*, i32, i32, i32) + +@g = global [4096 x i8] zeroinitializer + +; Check that instruction read prefetches are ignored. +define void @f1(i8 *%ptr) { +; CHECK-LABEL: f1: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 0) + ret void +} + +; Check that instruction write prefetches are ignored. +define void @f2(i8 *%ptr) { +; CHECK-LABEL: f2: +; CHECK-NOT: %r2 +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 0) + ret void +} + +; Check data read prefetches. +define void @f3(i8 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: pfd 1, 0(%r2) +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 0, i32 0, i32 1) + ret void +} + +; Check data write prefetches. +define void @f4(i8 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: pfd 2, 0(%r2) +; CHECK: br %r14 + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check an address at the negative end of the range. +define void @f5(i8 *%base, i64 %index) { +; CHECK-LABEL: f5: +; CHECK: pfd 2, -524288({{%r2,%r3|%r3,%r2}}) +; CHECK: br %r14 + %add = add i64 %index, -524288 + %ptr = getelementptr i8 *%base, i64 %add + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check an address at the positive end of the range. +define void @f6(i8 *%base, i64 %index) { +; CHECK-LABEL: f6: +; CHECK: pfd 2, 524287({{%r2,%r3|%r3,%r2}}) +; CHECK: br %r14 + %add = add i64 %index, 524287 + %ptr = getelementptr i8 *%base, i64 %add + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check that the next address up still compiles. +define void @f7(i8 *%base, i64 %index) { +; CHECK-LABEL: f7: +; CHECK: 524288 +; CHECK: pfd 2, +; CHECK: br %r14 + %add = add i64 %index, 524288 + %ptr = getelementptr i8 *%base, i64 %add + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} + +; Check pc-relative prefetches. +define void @f8() { +; CHECK-LABEL: f8: +; CHECK: pfdrl 2, g +; CHECK: br %r14 + %ptr = getelementptr [4096 x i8] *@g, i64 0, i64 0 + call void @llvm.prefetch(i8 *%ptr, i32 1, i32 0, i32 1) + ret void +} diff --git a/test/MC/Disassembler/SystemZ/insns-pcrel.txt b/test/MC/Disassembler/SystemZ/insns-pcrel.txt index c565b6e47b1..c250f199792 100644 --- a/test/MC/Disassembler/SystemZ/insns-pcrel.txt +++ b/test/MC/Disassembler/SystemZ/insns-pcrel.txt @@ -1330,3 +1330,35 @@ # 0x0000077c: # CHECK: brctg %r15, 0x1077a 0xa7 0xf7 0x7f 0xff + +# 0x00000780: +# CHECK: pfdrl 0, 0x780 +0xc6 0x02 0x00 0x00 0x00 0x00 + +# 0x00000786: +# CHECK: pfdrl 15, 0x786 +0xc6 0xf2 0x00 0x00 0x00 0x00 + +# 0x0000078c: +# CHECK: pfdrl 0, 0x78a +0xc6 0x02 0xff 0xff 0xff 0xff + +# 0x00000792: +# CHECK: pfdrl 15, 0x790 +0xc6 0xf2 0xff 0xff 0xff 0xff + +# 0x00000798: +# CHECK: pfdrl 0, 0xffffffff00000798 +0xc6 0x02 0x80 0x00 0x00 0x00 + +# 0x0000079e: +# CHECK: pfdrl 15, 0xffffffff0000079e +0xc6 0xf2 0x80 0x00 0x00 0x00 + +# 0x000007a4: +# CHECK: pfdrl 0, 0x1000007a2 +0xc6 0x02 0x7f 0xff 0xff 0xff + +# 0x000007aa: +# CHECK: pfdrl 15, 0x1000007a8 +0xc6 0xf2 0x7f 0xff 0xff 0xff diff --git a/test/MC/Disassembler/SystemZ/insns.txt b/test/MC/Disassembler/SystemZ/insns.txt index 3f4f6c3dd17..360785e94ba 100644 --- a/test/MC/Disassembler/SystemZ/insns.txt +++ b/test/MC/Disassembler/SystemZ/insns.txt @@ -5329,6 +5329,36 @@ # CHECK: oy %r15, 0 0xe3 0xf0 0x00 0x00 0x00 0x56 +# CHECK: pfd 0, -524288 +0xe3 0x00 0x00 0x00 0x80 0x36 + +# CHECK: pfd 0, -1 +0xe3 0x00 0x0f 0xff 0xff 0x36 + +# CHECK: pfd 0, 0 +0xe3 0x00 0x00 0x00 0x00 0x36 + +# CHECK: pfd 0, 1 +0xe3 0x00 0x00 0x01 0x00 0x36 + +# CHECK: pfd 0, 524287 +0xe3 0x00 0x0f 0xff 0x7f 0x36 + +# CHECK: pfd 0, 0(%r1) +0xe3 0x00 0x10 0x00 0x00 0x36 + +# CHECK: pfd 0, 0(%r15) +0xe3 0x00 0xf0 0x00 0x00 0x36 + +# CHECK: pfd 0, 524287(%r1,%r15) +0xe3 0x01 0xff 0xff 0x7f 0x36 + +# CHECK: pfd 0, 524287(%r15,%r1) +0xe3 0x0f 0x1f 0xff 0x7f 0x36 + +# CHECK: pfd 15, 0 +0xe3 0xf0 0x00 0x00 0x00 0x36 + # CHECK: risbg %r0, %r0, 0, 0, 0 0xec 0x00 0x00 0x00 0x00 0x55 diff --git a/test/MC/SystemZ/insn-bad.s b/test/MC/SystemZ/insn-bad.s index aa3f4c9d83b..1c478caa037 100644 --- a/test/MC/SystemZ/insn-bad.s +++ b/test/MC/SystemZ/insn-bad.s @@ -2275,6 +2275,40 @@ oy %r0, -524289 oy %r0, 524288 +#CHECK: error: invalid operand +#CHECK: pfd -1, 0 +#CHECK: error: invalid operand +#CHECK: pfd 16, 0 +#CHECK: error: invalid operand +#CHECK: pfd 1, -524289 +#CHECK: error: invalid operand +#CHECK: pfd 1, 524288 + + pfd -1, 0 + pfd 16, 0 + pfd 1, -524289 + pfd 1, 524288 + +#CHECK: error: invalid operand +#CHECK: pfdrl -1, 0 +#CHECK: error: invalid operand +#CHECK: pfdrl 16, 0 +#CHECK: error: offset out of range +#CHECK: pfdrl 1, -0x1000000002 +#CHECK: error: offset out of range +#CHECK: pfdrl 1, -1 +#CHECK: error: offset out of range +#CHECK: pfdrl 1, 1 +#CHECK: error: offset out of range +#CHECK: pfdrl 1, 0x100000000 + + pfdrl -1, 0 + pfdrl 16, 0 + pfdrl 1, -0x1000000002 + pfdrl 1, -1 + pfdrl 1, 1 + pfdrl 1, 0x100000000 + #CHECK: error: invalid operand #CHECK: risbg %r0,%r0,0,0,-1 #CHECK: error: invalid operand diff --git a/test/MC/SystemZ/insn-good.s b/test/MC/SystemZ/insn-good.s index 2c52d3ae608..9930d8ce6d1 100644 --- a/test/MC/SystemZ/insn-good.s +++ b/test/MC/SystemZ/insn-good.s @@ -6106,6 +6106,65 @@ oy %r0, 524287(%r15,%r1) oy %r15, 0 +#CHECK: pfd 0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x36] +#CHECK: pfd 0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x36] +#CHECK: pfd 0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x36] +#CHECK: pfd 0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x36] +#CHECK: pfd 0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x36] +#CHECK: pfd 0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x36] +#CHECK: pfd 0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x36] +#CHECK: pfd 0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x36] +#CHECK: pfd 0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x36] +#CHECK: pfd 15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x36] + + pfd 0, -524288 + pfd 0, -1 + pfd 0, 0 + pfd 0, 1 + pfd 0, 524287 + pfd 0, 0(%r1) + pfd 0, 0(%r15) + pfd 0, 524287(%r1,%r15) + pfd 0, 524287(%r15,%r1) + pfd 15, 0 + +#CHECK: pfdrl 0, .[[LAB:L.*]]-4294967296 # encoding: [0xc6,0x02,A,A,A,A] +#CHECK: fixup A - offset: 2, value: (.[[LAB]]-4294967296)+2, kind: FK_390_PC32DBL + pfdrl 0, -0x100000000 +#CHECK: pfdrl 0, .[[LAB:L.*]]-2 # encoding: [0xc6,0x02,A,A,A,A] +#CHECK: fixup A - offset: 2, value: (.[[LAB]]-2)+2, kind: FK_390_PC32DBL + pfdrl 0, -2 +#CHECK: pfdrl 0, .[[LAB:L.*]] # encoding: [0xc6,0x02,A,A,A,A] +#CHECK: fixup A - offset: 2, value: .[[LAB]]+2, kind: FK_390_PC32DBL + pfdrl 0, 0 +#CHECK: pfdrl 0, .[[LAB:L.*]]+4294967294 # encoding: [0xc6,0x02,A,A,A,A] +#CHECK: fixup A - offset: 2, value: (.[[LAB]]+4294967294)+2, kind: FK_390_PC32DBL + pfdrl 0, 0xfffffffe + +#CHECK: pfdrl 0, foo # encoding: [0xc6,0x02,A,A,A,A] +# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL +#CHECK: pfdrl 15, foo # encoding: [0xc6,0xf2,A,A,A,A] +# fixup A - offset: 2, value: foo+2, kind: FK_390_PC32DBL + + pfdrl 0, foo + pfdrl 15, foo + +#CHECK: pfdrl 3, bar+100 # encoding: [0xc6,0x32,A,A,A,A] +# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL +#CHECK: pfdrl 4, bar+100 # encoding: [0xc6,0x42,A,A,A,A] +# fixup A - offset: 2, value: (bar+100)+2, kind: FK_390_PC32DBL + + pfdrl 3, bar+100 + pfdrl 4, bar+100 + +#CHECK: pfdrl 7, frob@PLT # encoding: [0xc6,0x72,A,A,A,A] +# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL +#CHECK: pfdrl 8, frob@PLT # encoding: [0xc6,0x82,A,A,A,A] +# fixup A - offset: 2, value: frob@PLT+2, kind: FK_390_PC32DBL + + pfdrl 7, frob@PLT + pfdrl 8, frob@PLT + #CHECK: risbg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x55] #CHECK: risbg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x55] #CHECK: risbg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x55]