Add SARX/SHRX/SHLX code generation support

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164675 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Liao 2012-09-26 08:26:25 +00:00
parent 6bcdb5b903
commit 4fa2ddbb94
5 changed files with 241 additions and 2 deletions

View File

@ -565,6 +565,12 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
// BMI/BMI2 foldable instructions
{ X86::RORX32ri, X86::RORX32mi, 0 },
{ X86::RORX64ri, X86::RORX64mi, 0 },
{ X86::SARX32rr, X86::SARX32rm, 0 },
{ X86::SARX64rr, X86::SARX64rm, 0 },
{ X86::SHRX32rr, X86::SHRX32rm, 0 },
{ X86::SHRX64rr, X86::SHRX64rm, 0 },
{ X86::SHLX32rr, X86::SHLX32rm, 0 },
{ X86::SHLX64rr, X86::SHLX64rm, 0 },
};
for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) {

View File

@ -896,4 +896,59 @@ let Predicates = [HasBMI2] in {
(RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>;
def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)),
(RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>;
// Prefer SARX/SHRX/SHLX over SAR/SHR/SHL with variable shift BUT not
// immedidate shift, i.e. the following code is considered better
//
// mov %edi, %esi
// shl $imm, %esi
// ... %edi, ...
//
// than
//
// movb $imm, %sil
// shlx %sil, %edi, %esi
// ... %edi, ...
//
let AddedComplexity = 1 in {
def : Pat<(sra GR32:$src1, GR8:$src2),
(SARX32rr GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
def : Pat<(sra GR64:$src1, GR8:$src2),
(SARX64rr GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
def : Pat<(srl GR32:$src1, GR8:$src2),
(SHRX32rr GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
def : Pat<(srl GR64:$src1, GR8:$src2),
(SHRX64rr GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
def : Pat<(shl GR32:$src1, GR8:$src2),
(SHLX32rr GR32:$src1,
(INSERT_SUBREG
(i32 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
def : Pat<(shl GR64:$src1, GR8:$src2),
(SHLX64rr GR64:$src1,
(INSERT_SUBREG
(i64 (IMPLICIT_DEF)), GR8:$src2, sub_8bit))>;
}
// Patterns on SARXrm/SHRXrm/SHLXrm are explicitly omitted to favor
//
// mov (%ecx), %esi
// shl $imm, $esi
//
// over
//
// movb $imm %al
// shlx %al, (%ecx), %esi
//
// As SARXrr/SHRXrr/SHLXrr is favored on variable shift, the peephole
// optimization will fold them into SARXrm/SHRXrm/SHLXrm if possible.
}

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=corei7 | FileCheck %s
; rdar://5571034
; This requires physreg joining, %vreg13 is live everywhere:

View File

@ -0,0 +1,178 @@
; RUN: llc -mtriple=i386-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI2 %s
; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=core-avx2 < %s | FileCheck --check-prefix=BMI264 %s
define i32 @shl32(i32 %x, i32 %shamt) nounwind uwtable readnone {
entry:
%shl = shl i32 %x, %shamt
; BMI2: shl32
; BMI2: shlxl
; BMI2: ret
; BMI264: shl32
; BMI264: shlxl
; BMI264: ret
ret i32 %shl
}
define i32 @shl32i(i32 %x) nounwind uwtable readnone {
entry:
%shl = shl i32 %x, 5
; BMI2: shl32i
; BMI2-NOT: shlxl
; BMI2: ret
; BMI264: shl32i
; BMI264-NOT: shlxl
; BMI264: ret
ret i32 %shl
}
define i32 @shl32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
entry:
%x = load i32* %p
%shl = shl i32 %x, %shamt
; BMI2: shl32p
; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: shl32p
; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
define i32 @shl32pi(i32* %p) nounwind uwtable readnone {
entry:
%x = load i32* %p
%shl = shl i32 %x, 5
; BMI2: shl32pi
; BMI2-NOT: shlxl
; BMI2: ret
; BMI264: shl32pi
; BMI264-NOT: shlxl
; BMI264: ret
ret i32 %shl
}
define i64 @shl64(i64 %x, i64 %shamt) nounwind uwtable readnone {
entry:
%shl = shl i64 %x, %shamt
; BMI264: shl64
; BMI264: shlxq
; BMI264: ret
ret i64 %shl
}
define i64 @shl64i(i64 %x) nounwind uwtable readnone {
entry:
%shl = shl i64 %x, 7
; BMI264: shl64i
; BMI264-NOT: shlxq
; BMI264: ret
ret i64 %shl
}
define i64 @shl64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
entry:
%x = load i64* %p
%shl = shl i64 %x, %shamt
; BMI264: shl64p
; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
define i64 @shl64pi(i64* %p) nounwind uwtable readnone {
entry:
%x = load i64* %p
%shl = shl i64 %x, 7
; BMI264: shl64p
; BMI264-NOT: shlxq
; BMI264: ret
ret i64 %shl
}
define i32 @lshr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
entry:
%shl = lshr i32 %x, %shamt
; BMI2: lshr32
; BMI2: shrxl
; BMI2: ret
; BMI264: lshr32
; BMI264: shrxl
; BMI264: ret
ret i32 %shl
}
define i32 @lshr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
entry:
%x = load i32* %p
%shl = lshr i32 %x, %shamt
; BMI2: lshr32p
; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: lshr32
; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
define i64 @lshr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
entry:
%shl = lshr i64 %x, %shamt
; BMI264: lshr64
; BMI264: shrxq
; BMI264: ret
ret i64 %shl
}
define i64 @lshr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
entry:
%x = load i64* %p
%shl = lshr i64 %x, %shamt
; BMI264: lshr64p
; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}
define i32 @ashr32(i32 %x, i32 %shamt) nounwind uwtable readnone {
entry:
%shl = ashr i32 %x, %shamt
; BMI2: ashr32
; BMI2: sarxl
; BMI2: ret
; BMI264: ashr32
; BMI264: sarxl
; BMI264: ret
ret i32 %shl
}
define i32 @ashr32p(i32* %p, i32 %shamt) nounwind uwtable readnone {
entry:
%x = load i32* %p
%shl = ashr i32 %x, %shamt
; BMI2: ashr32p
; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI2: ret
; BMI264: ashr32
; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i32 %shl
}
define i64 @ashr64(i64 %x, i64 %shamt) nounwind uwtable readnone {
entry:
%shl = ashr i64 %x, %shamt
; BMI264: ashr64
; BMI264: sarxq
; BMI264: ret
ret i64 %shl
}
define i64 @ashr64p(i64* %p, i64 %shamt) nounwind uwtable readnone {
entry:
%x = load i64* %p
%shl = ashr i64 %x, %shamt
; BMI264: ashr64p
; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: ret
ret i64 %shl
}

View File

@ -1,4 +1,4 @@
; RUN: llc -mtriple=i386-apple-darwin9 -fast-isel=false -O0 < %s | FileCheck %s
; RUN: llc -mtriple=i386-apple-darwin9 -mcpu=corei7 -fast-isel=false -O0 < %s | FileCheck %s
; Gather non-machine specific tests for the transformations in
; CodeGen/SelectionDAG/TargetLowering. Currently, these