From 6bcdb5b9034a02901175e94e89b4815f28f2f141 Mon Sep 17 00:00:00 2001 From: Michael Liao Date: Wed, 26 Sep 2012 08:24:51 +0000 Subject: [PATCH] Add RORX code generation support git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164674 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 4 ++++ lib/Target/X86/X86InstrShiftRotate.td | 23 ++++++++++++++++++++ test/CodeGen/X86/ptr-rotate.ll | 2 +- test/CodeGen/X86/rot32.ll | 29 ++++++++++++++++++++++++- test/CodeGen/X86/rot64.ll | 31 +++++++++++++++++++++++++-- test/CodeGen/X86/rotate2.ll | 2 +- 6 files changed, 86 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index f575e801844..2fb2ed2104c 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -561,6 +561,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VSQRTPSYr_Int, X86::VSQRTPSYm_Int, TB_ALIGN_32 }, { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm, TB_NO_REVERSE }, { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm, TB_NO_REVERSE }, + + // BMI/BMI2 foldable instructions + { X86::RORX32ri, X86::RORX32mi, 0 }, + { X86::RORX64ri, X86::RORX64mi, 0 }, }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index bdeb63ffbd6..fe7d0ecf896 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -839,6 +839,16 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, } // Defs = [EFLAGS] +def ROT32L2R_imm8 : SDNodeXFormgetZExtValue()); +}]>; + +def ROT64L2R_imm8 : SDNodeXFormgetZExtValue()); +}]>; + multiclass bmi_rotate { let neverHasSideEffects = 1 in { def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), @@ -873,4 +883,17 @@ let Predicates = [HasBMI2] in { defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; + + // Prefer RORX which is non-destructive and doesn't update EFLAGS. + let AddedComplexity = 10 in { + def : Pat<(rotl GR32:$src, (i8 imm:$shamt)), + (RORX32ri GR32:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl GR64:$src, (i8 imm:$shamt)), + (RORX64ri GR64:$src, (ROT64L2R_imm8 imm:$shamt))>; + } + + def : Pat<(rotl (loadi32 addr:$src), (i8 imm:$shamt)), + (RORX32mi addr:$src, (ROT32L2R_imm8 imm:$shamt))>; + def : Pat<(rotl (loadi64 addr:$src), (i8 imm:$shamt)), + (RORX64mi addr:$src, (ROT64L2R_imm8 imm:$shamt))>; } diff --git a/test/CodeGen/X86/ptr-rotate.ll b/test/CodeGen/X86/ptr-rotate.ll index 6debd16ba5d..fbd13b50364 100644 --- a/test/CodeGen/X86/ptr-rotate.ll +++ b/test/CodeGen/X86/ptr-rotate.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=i386-apple-darwin -o - < %s | FileCheck %s +; RUN: llc -mtriple=i386-apple-darwin -mcpu=corei7 -o - < %s | FileCheck %s define i32 @func(i8* %A) nounwind readnone { entry: diff --git a/test/CodeGen/X86/rot32.ll b/test/CodeGen/X86/rot32.ll index 99602fd64ff..e95a734e048 100644 --- a/test/CodeGen/X86/rot32.ll +++ b/test/CodeGen/X86/rot32.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -march=x86 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=corei7 | FileCheck %s +; RUN: llc < %s -march=x86 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2 define i32 @foo(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: @@ -48,12 +49,25 @@ define i32 @xfoo(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xfoo: ; CHECK: roll $7 +; BMI2: xfoo: +; BMI2: rorxl $25 %0 = lshr i32 %x, 25 %1 = shl i32 %x, 7 %2 = or i32 %0, %1 ret i32 %2 } +define i32 @xfoop(i32* %p) nounwind readnone { +entry: +; BMI2: xfoop: +; BMI2: rorxl $25, ({{.+}}), %{{.+}} + %x = load i32* %p + %a = lshr i32 %x, 25 + %b = shl i32 %x, 7 + %c = or i32 %a, %b + ret i32 %c +} + define i32 @xbar(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xbar: @@ -68,12 +82,25 @@ define i32 @xun(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xun: ; CHECK: roll $25 +; BMI2: xun: +; BMI2: rorxl $7 %0 = lshr i32 %x, 7 %1 = shl i32 %x, 25 %2 = or i32 %0, %1 ret i32 %2 } +define i32 @xunp(i32* %p) nounwind readnone { +entry: +; BMI2: xunp: +; BMI2: rorxl $7, ({{.+}}), %{{.+}} + %x = load i32* %p + %a = lshr i32 %x, 7 + %b = shl i32 %x, 25 + %c = or i32 %a, %b + ret i32 %c +} + define i32 @xbu(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: ; CHECK: xbu: diff --git a/test/CodeGen/X86/rot64.ll b/test/CodeGen/X86/rot64.ll index 4e082bb860b..7fa982d83b6 100644 --- a/test/CodeGen/X86/rot64.ll +++ b/test/CodeGen/X86/rot64.ll @@ -1,8 +1,9 @@ -; RUN: llc < %s -march=x86-64 > %t -; RUN: grep rol %t | count 3 +; RUN: llc < %s -march=x86-64 -mcpu=corei7 > %t +; RUN: grep rol %t | count 5 ; RUN: grep ror %t | count 1 ; RUN: grep shld %t | count 2 ; RUN: grep shrd %t | count 2 +; RUN: llc < %s -march=x86-64 -mcpu=core-avx2 | FileCheck %s --check-prefix=BMI2 define i64 @foo(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: @@ -42,12 +43,25 @@ entry: define i64 @xfoo(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: +; BMI2: xfoo: +; BMI2: rorxq $57 %0 = lshr i64 %x, 57 %1 = shl i64 %x, 7 %2 = or i64 %0, %1 ret i64 %2 } +define i64 @xfoop(i64* %p) nounwind readnone { +entry: +; BMI2: xfoop: +; BMI2: rorxq $57, ({{.+}}), %{{.+}} + %x = load i64* %p + %a = lshr i64 %x, 57 + %b = shl i64 %x, 7 + %c = or i64 %a, %b + ret i64 %c +} + define i64 @xbar(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: %0 = shl i64 %y, 7 @@ -58,12 +72,25 @@ entry: define i64 @xun(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: +; BMI2: xun: +; BMI2: rorxq $7 %0 = lshr i64 %x, 7 %1 = shl i64 %x, 57 %2 = or i64 %0, %1 ret i64 %2 } +define i64 @xunp(i64* %p) nounwind readnone { +entry: +; BMI2: xunp: +; BMI2: rorxq $7, ({{.+}}), %{{.+}} + %x = load i64* %p + %a = lshr i64 %x, 7 + %b = shl i64 %x, 57 + %c = or i64 %a, %b + ret i64 %c +} + define i64 @xbu(i64 %x, i64 %y, i64 %z) nounwind readnone { entry: %0 = lshr i64 %y, 7 diff --git a/test/CodeGen/X86/rotate2.ll b/test/CodeGen/X86/rotate2.ll index 2eea3999e7b..2316c708507 100644 --- a/test/CodeGen/X86/rotate2.ll +++ b/test/CodeGen/X86/rotate2.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep rol | count 2 +; RUN: llc < %s -march=x86-64 -mcpu=corei7 | grep rol | count 2 define i64 @test1(i64 %x) nounwind { entry: