[SystemZ] Use CLST to implement strcmp

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188544 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Richard Sandiford 2013-08-16 11:21:54 +00:00
parent 6a079fef4f
commit e1b2af731e
15 changed files with 347 additions and 11 deletions

View File

@ -699,7 +699,7 @@ public:
case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl:
case LibFunc::log2: case LibFunc::log2f: case LibFunc::log2l:
case LibFunc::exp2: case LibFunc::exp2f: case LibFunc::exp2l:
case LibFunc::memcmp:
case LibFunc::memcmp: case LibFunc::strcmp:
return true;
}
return false;

View File

@ -108,6 +108,20 @@ public:
MachinePointerInfo Op2PtrInfo) const {
return std::make_pair(SDValue(), SDValue());
}
/// EmitTargetCodeForStrcmp - Emit target-specific code that performs a
/// strcmp, in cases where that is faster than a libcall. The first
/// returned SDValue is the result of the strcmp and the second is
/// the chain. Both SDValues can be null if a normal libcall should
/// be used.
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc dl,
SDValue Chain,
SDValue Op1, SDValue Op2,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
return std::make_pair(SDValue(), SDValue());
}
};
} // end llvm namespace

View File

@ -5558,6 +5558,35 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) {
return false;
}
/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form.
/// If so, return true and lower it, otherwise return false and it will be
/// lowered like a normal call.
bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) {
// Verify that the prototype makes sense. int strcmp(void*,void*)
if (I.getNumArgOperands() != 2)
return false;
const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1);
if (!Arg0->getType()->isPointerTy() ||
!Arg1->getType()->isPointerTy() ||
!I.getType()->isIntegerTy())
return false;
const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo();
std::pair<SDValue, SDValue> Res =
TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(),
getValue(Arg0), getValue(Arg1),
MachinePointerInfo(Arg0),
MachinePointerInfo(Arg1));
if (Res.first.getNode()) {
processIntegerCallValue(I, Res.first, true);
PendingLoads.push_back(Res.second);
return true;
}
return false;
}
/// visitUnaryFloatCall - If a call instruction is a unary floating-point
/// operation (as expected), translate it to an SDNode with the specified opcode
/// and return true.
@ -5704,6 +5733,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
if (visitMemCmpCall(I))
return;
break;
case LibFunc::strcmp:
if (visitStrCmpCall(I))
return;
break;
}
}
}

View File

@ -523,6 +523,7 @@ private:
void visitPHI(const PHINode &I);
void visitCall(const CallInst &I);
bool visitMemCmpCall(const CallInst &I);
bool visitStrCmpCall(const CallInst &I);
bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode);
void visitAtomicLoad(const LoadInst &I);
void visitAtomicStore(const StoreInst &I);

View File

@ -1702,6 +1702,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(UDIVREM64);
OPCODE(MVC);
OPCODE(CLC);
OPCODE(STRCMP);
OPCODE(IPM);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
@ -2261,6 +2262,66 @@ SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI,
return MBB;
}
// Decompose string pseudo-instruction MI into a loop that continually performs
// Opcode until CC != 3.
MachineBasicBlock *
SystemZTargetLowering::emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *MBB,
unsigned Opcode) const {
const SystemZInstrInfo *TII = TM.getInstrInfo();
MachineFunction &MF = *MBB->getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
DebugLoc DL = MI->getDebugLoc();
uint64_t End1Reg = MI->getOperand(0).getReg();
uint64_t Start1Reg = MI->getOperand(1).getReg();
uint64_t Start2Reg = MI->getOperand(2).getReg();
uint64_t CharReg = MI->getOperand(3).getReg();
const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
uint64_t This1Reg = MRI.createVirtualRegister(RC);
uint64_t This2Reg = MRI.createVirtualRegister(RC);
uint64_t End2Reg = MRI.createVirtualRegister(RC);
MachineBasicBlock *StartMBB = MBB;
MachineBasicBlock *DoneMBB = splitBlockAfter(MI, MBB);
MachineBasicBlock *LoopMBB = emitBlockAfter(StartMBB);
// StartMBB:
// R0W = %CharReg
// # fall through to LoopMMB
BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0W).addReg(CharReg);
MBB->addSuccessor(LoopMBB);
// LoopMBB:
// %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
// %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
// %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0W
// JO LoopMBB
// # fall through to DoneMMB
MBB = LoopMBB;
MBB->addLiveIn(SystemZ::R0W);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
.addReg(Start1Reg).addMBB(StartMBB)
.addReg(End1Reg).addMBB(LoopMBB);
BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
.addReg(Start2Reg).addMBB(StartMBB)
.addReg(End2Reg).addMBB(LoopMBB);
BuildMI(MBB, DL, TII->get(Opcode))
.addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
.addReg(This1Reg).addReg(This2Reg);
BuildMI(MBB, DL, TII->get(SystemZ::BRC))
.addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB);
MBB->addSuccessor(LoopMBB);
MBB->addSuccessor(DoneMBB);
DoneMBB->addLiveIn(SystemZ::CC);
MI->eraseFromParent();
return DoneMBB;
}
MachineBasicBlock *SystemZTargetLowering::
EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
switch (MI->getOpcode()) {
@ -2488,6 +2549,8 @@ EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
case SystemZ::CLCWrapper:
return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
case SystemZ::CLSTLoop:
return emitStringWrapper(MI, MBB, SystemZ::CLST);
default:
llvm_unreachable("Unexpected instr type to insert");
}

View File

@ -84,6 +84,10 @@ namespace SystemZISD {
// as for MVC.
CLC,
// Use a CLST-based sequence to implement strcmp(). The two input operands
// are the addresses of the strings to compare.
STRCMP,
// Store the CC value in bits 29 and 28 of an integer.
IPM,
@ -240,6 +244,9 @@ private:
MachineBasicBlock *emitMemMemWrapper(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Opcode) const;
MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
MachineBasicBlock *BB,
unsigned Opcode) const;
};
} // end namespace llvm

View File

@ -1405,3 +1405,23 @@ multiclass MemorySS<string mnemonic, bits<8> opcode,
[(operator bdaddr12only:$dest, bdaddr12only:$src,
imm32len8:$length)]>;
}
// Define an instruction that operates on two strings, both terminated
// by the character in R0. The instruction processes a CPU-determinated
// number of bytes at a time and sets CC to 3 if the instruction needs
// to be repeated. Also define a pseudo instruction that represents
// the full loop (the main instruction plus the branch on CC==3).
multiclass StringRRE<string mnemonic, bits<16> opcode,
SDPatternOperator operator> {
def "" : InstRRE<opcode, (outs GR64:$R1, GR64:$R2),
(ins GR64:$R1src, GR64:$R2src),
mnemonic#"\t$R1, $R2", []> {
let Constraints = "$R1 = $R1src, $R2 = $R2src";
let DisableEncoding = "$R1src, $R2src";
}
let usesCustomInserter = 1 in
def Loop : Pseudo<(outs GR64:$end),
(ins GR64:$start1, GR64:$start2, GR32:$char),
[(set GR64:$end, (operator GR64:$start1, GR64:$start2,
GR32:$char))]>;
}

View File

@ -996,6 +996,10 @@ defm : ZXB<z_ucmp, GR64, CLGFR>;
let mayLoad = 1, Defs = [CC] in
defm CLC : MemorySS<"clc", 0xD5, z_clc>;
// String comparison.
let mayLoad = 1, Defs = [CC], Uses = [R0W] in
defm CLST : StringRRE<"clst", 0xB25D, z_strcmp>;
//===----------------------------------------------------------------------===//
// Atomic operations
//===----------------------------------------------------------------------===//

View File

@ -58,6 +58,11 @@ def SDT_ZMemMemLength : SDTypeProfile<0, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
SDTCisVT<2, i32>]>;
def SDT_ZString : SDTypeProfile<1, 3,
[SDTCisPtrTy<0>,
SDTCisPtrTy<1>,
SDTCisPtrTy<2>,
SDTCisVT<3, i32>]>;
def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
//===----------------------------------------------------------------------===//
@ -114,6 +119,8 @@ def z_mvc : SDNode<"SystemZISD::MVC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPMayStore, SDNPMayLoad]>;
def z_clc : SDNode<"SystemZISD::CLC", SDT_ZMemMemLength,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
def z_strcmp : SDNode<"SystemZISD::STRCMP", SDT_ZString,
[SDNPHasChain, SDNPOutGlue, SDNPMayLoad]>;
def z_ipm : SDNode<"SystemZISD::IPM", SDT_ZI32Intrinsic,
[SDNPInGlue]>;

View File

@ -126,6 +126,19 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
return SDValue();
}
// Convert the current CC value into an integer that is 0 if CC == 0,
// less than zero if CC == 1 and greater than zero if CC >= 2.
// The sequence starts with IPM, which puts CC into bits 29 and 28
// of an integer and clears bits 30 and 31.
static SDValue addIPMSequence(SDLoc DL, SDValue Glue, SelectionDAG &DAG) {
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
DAG.getConstant(28, MVT::i32));
SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
DAG.getConstant(31, MVT::i32));
return ROTL;
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src1, SDValue Src2, SDValue Size,
@ -139,17 +152,21 @@ EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
Chain = DAG.getNode(SystemZISD::CLC, DL, VTs, Chain,
Src1, Src2, Size);
SDValue Glue = Chain.getValue(1);
// IPM inserts the CC value into bits 29 and 28, with 0 meaning "equal",
// 1 meaning "less" and 2 meaning "greater". Bits 30 and 31 are zero.
// Convert this into an integer that is respectively equal, less
// or greater than 0.
SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, Glue);
SDValue SRL = DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
DAG.getConstant(28, MVT::i32));
SDValue ROTL = DAG.getNode(ISD::ROTL, DL, MVT::i32, SRL,
DAG.getConstant(31, MVT::i32));
return std::make_pair(ROTL, Chain);
return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
}
}
return std::make_pair(SDValue(), SDValue());
}
std::pair<SDValue, SDValue> SystemZSelectionDAGInfo::
EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src1, SDValue Src2,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const {
SDVTList VTs = DAG.getVTList(Src1.getValueType(), MVT::Other, MVT::Glue);
SDValue Unused = DAG.getNode(SystemZISD::STRCMP, DL, VTs, Chain, Src1, Src2,
DAG.getConstant(0, MVT::i32));
Chain = Unused.getValue(1);
SDValue Glue = Chain.getValue(2);
return std::make_pair(addIPMSequence(DL, Glue, DAG), Chain);
}

View File

@ -45,6 +45,12 @@ public:
SDValue Src1, SDValue Src2, SDValue Size,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
virtual std::pair<SDValue, SDValue>
EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc DL, SDValue Chain,
SDValue Src1, SDValue Src2,
MachinePointerInfo Op1PtrInfo,
MachinePointerInfo Op2PtrInfo) const LLVM_OVERRIDE;
};
}

View File

@ -0,0 +1,70 @@
; Test strcmp using CLST, i32 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare signext i32 @strcmp(i8 *%src1, i8 *%src2)
; Check a case where the result is used as an integer.
define i32 @f1(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f1:
; CHECK: lhi %r0, 0
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK: clst %r2, %r3
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: rll %r2, [[REG]], 31
; CHECK: br %r14
%res = call i32 @strcmp(i8 *%src1, i8 *%src2)
ret i32 %res
}
; Check a case where the result is tested for equality.
define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f2:
; CHECK: lhi %r0, 0
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK: clst %r2, %r3
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: je {{\.L.*}}
; CHECK: br %r14
%res = call i32 @strcmp(i8 *%src1, i8 *%src2)
%cmp = icmp eq i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret void
}
; Test a case where the result is used both as an integer and for
; branching.
define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) {
; CHECK-LABEL: f3:
; CHECK: lhi %r0, 0
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK: clst %r2, %r3
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: rll %r2, [[REG]], 31
; CHECK: jl {{\.L*}}
; CHECK: br %r14
entry:
%res = call i32 @strcmp(i8 *%src1, i8 *%src2)
%cmp = icmp slt i32 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i32 0, i32 *%dest
br label %exit
exit:
ret i32 %res
}

View File

@ -0,0 +1,72 @@
; Test strcmp using CLST, i64 version.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
declare i64 @strcmp(i8 *%src1, i8 *%src2)
; Check a case where the result is used as an integer.
define i64 @f1(i8 *%src1, i8 *%src2) {
; CHECK-LABEL: f1:
; CHECK: lhi %r0, 0
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK: clst %r2, %r3
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: rll [[REG]], [[REG]], 31
; CHECK: lgfr %r2, [[REG]]
; CHECK: br %r14
%res = call i64 @strcmp(i8 *%src1, i8 *%src2)
ret i64 %res
}
; Check a case where the result is tested for equality.
define void @f2(i8 *%src1, i8 *%src2, i64 *%dest) {
; CHECK-LABEL: f2:
; CHECK: lhi %r0, 0
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK: clst %r2, %r3
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: je {{\.L.*}}
; CHECK: br %r14
%res = call i64 @strcmp(i8 *%src1, i8 *%src2)
%cmp = icmp eq i64 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i64 0, i64 *%dest
br label %exit
exit:
ret void
}
; Test a case where the result is used both as an integer and for
; branching.
define i64 @f3(i8 *%src1, i8 *%src2, i64 *%dest) {
; CHECK-LABEL: f3:
; CHECK: lhi %r0, 0
; CHECK: [[LABEL:\.[^:]*]]:
; CHECK: clst %r2, %r3
; CHECK-NEXT: jo [[LABEL]]
; CHECK-NEXT: BB#{{[0-9]+}}
; CHECK-NEXT: ipm [[REG:%r[0-5]]]
; CHECK: srl [[REG]], 28
; CHECK: rll [[REG]], [[REG]], 31
; CHECK: lgfr %r2, [[REG]]
; CHECK: jl {{\.L*}}
; CHECK: br %r14
entry:
%res = call i64 @strcmp(i8 *%src1, i8 *%src2)
%cmp = icmp slt i64 %res, 0
br i1 %cmp, label %exit, label %store
store:
store i64 0, i64 *%dest
br label %exit
exit:
ret i64 %res
}

View File

@ -1597,6 +1597,18 @@
# CHECK: clr %r7, %r8
0x15 0x78
# CHECK: clst %r0, %r0
0xb2 0x5d 0x00 0x00
# CHECK: clst %r0, %r15
0xb2 0x5d 0x00 0x0f
# CHECK: clst %r15, %r0
0xb2 0x5d 0x00 0xf0
# CHECK: clst %r7, %r8
0xb2 0x5d 0x00 0x78
# CHECK: cl %r0, 0
0x55 0x00 0x00 0x00

View File

@ -2841,6 +2841,16 @@
clrl %r7,frob@PLT
clrl %r8,frob@PLT
#CHECK: clst %r0, %r0 # encoding: [0xb2,0x5d,0x00,0x00]
#CHECK: clst %r0, %r15 # encoding: [0xb2,0x5d,0x00,0x0f]
#CHECK: clst %r15, %r0 # encoding: [0xb2,0x5d,0x00,0xf0]
#CHECK: clst %r7, %r8 # encoding: [0xb2,0x5d,0x00,0x78]
clst %r0,%r0
clst %r0,%r15
clst %r15,%r0
clst %r7,%r8
#CHECK: cly %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x55]
#CHECK: cly %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x55]
#CHECK: cly %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x55]