X86: Try to use a smaller encoding by transforming (X << C1) & C2 into (X & (C2 >> C1)) & C1. (Part of PR5039)

This tends to happen a lot with bitfield code generated by clang. A simple example for x86_64 is
uint64_t foo(uint64_t x) { return (x&1) << 42; }
which used to compile into bloated code:
	shlq	$42, %rdi               ## encoding: [0x48,0xc1,0xe7,0x2a]
	movabsq	$4398046511104, %rax    ## encoding: [0x48,0xb8,0x00,0x00,0x00,0x00,0x00,0x04,0x00,0x00]
	andq	%rdi, %rax              ## encoding: [0x48,0x21,0xf8]
	ret                             ## encoding: [0xc3]

with this patch we can fold the immediate into the and:
	andq	$1, %rdi                ## encoding: [0x48,0x83,0xe7,0x01]
	movq	%rdi, %rax              ## encoding: [0x48,0x89,0xf8]
	shlq	$42, %rax               ## encoding: [0x48,0xc1,0xe0,0x2a]
	ret                             ## encoding: [0xc3]

It's possible to save another byte by using 'andl' instead of 'andq' but I currently see no way of doing
that without making this code even more complicated. See the TODOs in the code.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129990 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2011-04-22 15:30:40 +00:00
parent eab631362d
commit b20a8fc8a6
2 changed files with 176 additions and 0 deletions

View File

@ -1580,6 +1580,81 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
return RetVal; return RetVal;
break; break;
} }
case ISD::AND:
case ISD::OR:
case ISD::XOR: {
// For operations of the form (x << C1) op C2, check if we can use a smaller
// encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1);
if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
break;
// i8 is unshrinkable, i16 should be promoted to i32.
if (NVT != MVT::i32 && NVT != MVT::i64)
break;
ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (!Cst || !ShlCst)
break;
int64_t Val = Cst->getSExtValue();
uint64_t ShlVal = ShlCst->getZExtValue();
// Make sure that we don't change the operation by removing bits.
// This only matters for OR and XOR, AND is unaffected.
if (Opcode != ISD::AND && ((Val >> ShlVal) << ShlVal) != Val)
break;
unsigned ShlOp, Op;
EVT CstVT = NVT;
// Check the minimum bitwidth for the new constant.
// TODO: AND32ri is the same as AND64ri32 with zext imm.
// TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
// TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
CstVT = MVT::i8;
else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
CstVT = MVT::i32;
// Bail if there is no smaller encoding.
if (NVT == CstVT)
break;
switch (NVT.getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unsupported VT!");
case MVT::i32:
assert(CstVT == MVT::i8);
ShlOp = X86::SHL32ri;
switch (Opcode) {
case ISD::AND: Op = X86::AND32ri8; break;
case ISD::OR: Op = X86::OR32ri8; break;
case ISD::XOR: Op = X86::XOR32ri8; break;
}
break;
case MVT::i64:
assert(CstVT == MVT::i8 || CstVT == MVT::i32);
ShlOp = X86::SHL64ri;
switch (Opcode) {
case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
case ISD::OR: Op = CstVT==MVT::i8? X86::OR64ri8 : X86::OR64ri32; break;
case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
}
break;
}
// Emit the smaller op and the shift.
SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, CstVT);
SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
getI8Imm(ShlVal));
break;
}
case X86ISD::UMUL: { case X86ISD::UMUL: {
SDValue N0 = Node->getOperand(0); SDValue N0 = Node->getOperand(0);
SDValue N1 = Node->getOperand(1); SDValue N1 = Node->getOperand(1);

View File

@ -0,0 +1,101 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
; PR5039
define i32 @test1(i32 %x) nounwind {
%and = shl i32 %x, 10
%shl = and i32 %and, 31744
ret i32 %shl
; CHECK: test1:
; CHECK: andl $31
; CHECK: shll $10
}
define i32 @test2(i32 %x) nounwind {
%or = shl i32 %x, 10
%shl = or i32 %or, 31744
ret i32 %shl
; CHECK: test2:
; CHECK: orl $31
; CHECK: shll $10
}
define i32 @test3(i32 %x) nounwind {
%xor = shl i32 %x, 10
%shl = xor i32 %xor, 31744
ret i32 %shl
; CHECK: test3:
; CHECK: xorl $31
; CHECK: shll $10
}
define i64 @test4(i64 %x) nounwind {
%and = shl i64 %x, 40
%shl = and i64 %and, 264982302294016
ret i64 %shl
; CHECK: test4:
; CHECK: andq $241
; CHECK: shlq $40
}
define i64 @test5(i64 %x) nounwind {
%and = shl i64 %x, 40
%shl = and i64 %and, 34084860461056
ret i64 %shl
; CHECK: test5:
; CHECK: andq $31
; CHECK: shlq $40
}
define i64 @test6(i64 %x) nounwind {
%and = shl i64 %x, 32
%shl = and i64 %and, -281474976710656
ret i64 %shl
; CHECK: test6:
; CHECK: andq $-65536
; CHECK: shlq $32
}
define i64 @test7(i64 %x) nounwind {
%or = shl i64 %x, 40
%shl = or i64 %or, 264982302294016
ret i64 %shl
; CHECK: test7:
; CHECK: orq $241
; CHECK: shlq $40
}
define i64 @test8(i64 %x) nounwind {
%or = shl i64 %x, 40
%shl = or i64 %or, 34084860461056
ret i64 %shl
; CHECK: test8:
; CHECK: orq $31
; CHECK: shlq $40
}
define i64 @test9(i64 %x) nounwind {
%xor = shl i64 %x, 40
%shl = xor i64 %xor, 264982302294016
ret i64 %shl
; CHECK: test9:
; CHECK: orq $241
; CHECK: shlq $40
}
define i64 @test10(i64 %x) nounwind {
%xor = shl i64 %x, 40
%shl = xor i64 %xor, 34084860461056
ret i64 %shl
; CHECK: test10:
; CHECK: xorq $31
; CHECK: shlq $40
}
define i64 @test11(i64 %x) nounwind {
%xor = shl i64 %x, 33
%shl = xor i64 %xor, -562949953421312
ret i64 %shl
; CHECK: test11:
; CHECK: xorq $-65536
; CHECK: shlq $33
}