[mips][FastISel] Implement bswap.

Summary: Implement bswap intrinsic for MIPS FastISel. It's very different for misp32 r1/r2 .

Based on a patch by Reed Kotler.

Test Plan:
bswap1.ll
test-suite

Reviewers: dsanders, rkotler

Subscribers: llvm-commits, rfuhler

Differential Revision: http://reviews.llvm.org/D7219

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238760 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Vasileios Kalintiris 2015-06-01 16:40:45 +00:00
parent 0cc6b87583
commit a509ef9a17
2 changed files with 122 additions and 0 deletions

View File

@ -1313,6 +1313,70 @@ bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
default:
return false;
case Intrinsic::bswap: {
Type *RetTy = II->getCalledFunction()->getReturnType();
MVT VT;
if (!isTypeSupported(RetTy, VT))
return false;
unsigned SrcReg = getRegForValue(II->getOperand(0));
if (SrcReg == 0)
return false;
unsigned DestReg = createResultReg(&Mips::GPR32RegClass);
if (DestReg == 0)
return false;
if (VT == MVT::i16) {
if (Subtarget->hasMips32r2()) {
emitInst(Mips::WSBH, DestReg).addReg(SrcReg);
updateValueMap(II, DestReg);
return true;
} else {
unsigned TempReg[3];
for (int i = 0; i < 3; i++) {
TempReg[i] = createResultReg(&Mips::GPR32RegClass);
if (TempReg[i] == 0)
return false;
}
emitInst(Mips::SLL, TempReg[0]).addReg(SrcReg).addImm(8);
emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(8);
emitInst(Mips::OR, TempReg[2]).addReg(TempReg[0]).addReg(TempReg[1]);
emitInst(Mips::ANDi, DestReg).addReg(TempReg[2]).addImm(0xFFFF);
updateValueMap(II, DestReg);
return true;
}
} else if (VT == MVT::i32) {
if (Subtarget->hasMips32r2()) {
unsigned TempReg = createResultReg(&Mips::GPR32RegClass);
emitInst(Mips::WSBH, TempReg).addReg(SrcReg);
emitInst(Mips::ROTR, DestReg).addReg(TempReg).addImm(16);
updateValueMap(II, DestReg);
return true;
} else {
unsigned TempReg[8];
for (int i = 0; i < 8; i++) {
TempReg[i] = createResultReg(&Mips::GPR32RegClass);
if (TempReg[i] == 0)
return false;
}
emitInst(Mips::SRL, TempReg[0]).addReg(SrcReg).addImm(8);
emitInst(Mips::SRL, TempReg[1]).addReg(SrcReg).addImm(24);
emitInst(Mips::ANDi, TempReg[2]).addReg(TempReg[0]).addImm(0xFF00);
emitInst(Mips::OR, TempReg[3]).addReg(TempReg[1]).addReg(TempReg[2]);
emitInst(Mips::ANDi, TempReg[4]).addReg(SrcReg).addImm(0xFF00);
emitInst(Mips::SLL, TempReg[5]).addReg(TempReg[4]).addImm(8);
emitInst(Mips::SLL, TempReg[6]).addReg(SrcReg).addImm(24);
emitInst(Mips::OR, TempReg[7]).addReg(TempReg[3]).addReg(TempReg[5]);
emitInst(Mips::OR, DestReg).addReg(TempReg[6]).addReg(TempReg[7]);
updateValueMap(II, DestReg);
return true;
}
}
return false;
}
case Intrinsic::memcpy:
case Intrinsic::memmove: {
const auto *MTI = cast<MemTransferInst>(II);

View File

@ -0,0 +1,58 @@
; RUN: llc < %s -march=mipsel -mcpu=mips32 -O0 -relocation-model=pic \
; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=32R1
; RUN: llc < %s -march=mipsel -mcpu=mips32r2 -O0 -relocation-model=pic \
; RUN: -fast-isel=true -mips-fast-isel -fast-isel-abort=1 | FileCheck %s \
; RUN: -check-prefix=ALL -check-prefix=32R2
@a = global i16 -21829, align 2
@b = global i32 -1430532899, align 4
@a1 = common global i16 0, align 2
@b1 = common global i32 0, align 4
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
define void @b16() {
; ALL-LABEL: b16:
; ALL: lw $[[A_ADDR:[0-9]+]], %got(a)($[[GOT_ADDR:[0-9]+]])
; ALL: lhu $[[A_VAL:[0-9]+]], 0($[[A_ADDR]])
; 32R1: sll $[[TMP1:[0-9]+]], $[[A_VAL]], 8
; 32R1: srl $[[TMP2:[0-9]+]], $[[A_VAL]], 8
; 32R1: or $[[TMP3:[0-9]+]], $[[TMP1]], $[[TMP2]]
; 32R1: andi $[[TMP4:[0-9]+]], $[[TMP3]], 65535
; 32R2: wsbh $[[RESULT:[0-9]+]], $[[A_VAL]]
%1 = load i16, i16* @a, align 2
%2 = call i16 @llvm.bswap.i16(i16 %1)
store i16 %2, i16* @a1, align 2
ret void
}
define void @b32() {
; ALL-LABEL: b32:
; ALL: lw $[[B_ADDR:[0-9]+]], %got(b)($[[GOT_ADDR:[0-9]+]])
; ALL: lw $[[B_VAL:[0-9]+]], 0($[[B_ADDR]])
; 32R1: srl $[[TMP1:[0-9]+]], $[[B_VAL]], 8
; 32R1: srl $[[TMP2:[0-9]+]], $[[B_VAL]], 24
; 32R1: andi $[[TMP3:[0-9]+]], $[[TMP1]], 65280
; 32R1: or $[[TMP4:[0-9]+]], $[[TMP2]], $[[TMP3]]
; 32R1: andi $[[TMP5:[0-9]+]], $[[B_VAL]], 65280
; 32R1: sll $[[TMP6:[0-9]+]], $[[TMP5]], 8
; 32R1: sll $[[TMP7:[0-9]+]], $[[B_VAL]], 24
; 32R1: or $[[TMP8:[0-9]+]], $[[TMP4]], $[[TMP6]]
; 32R1: or $[[RESULT:[0-9]+]], $[[TMP7]], $[[TMP8]]
; 32R2: wsbh $[[TMP:[0-9]+]], $[[B_VAL]]
; 32R2: rotr $[[RESULT:[0-9]+]], $[[TMP]], 16
%1 = load i32, i32* @b, align 4
%2 = call i32 @llvm.bswap.i32(i32 %1)
store i32 %2, i32* @b1, align 4
ret void
}