mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
Implement "punpckldq %xmm0, $xmm0" as "pshufd $0x50, %xmm0, %xmm" unless optimizing for code size.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@56711 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
19c874638d
commit
b7a75a5a54
@ -32,6 +32,7 @@
|
|||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||||
#include "llvm/Target/TargetMachine.h"
|
#include "llvm/Target/TargetMachine.h"
|
||||||
|
#include "llvm/Target/TargetOptions.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
#include "llvm/Support/MathExtras.h"
|
||||||
@ -130,12 +131,17 @@ namespace {
|
|||||||
///
|
///
|
||||||
MachineBasicBlock *CurBB;
|
MachineBasicBlock *CurBB;
|
||||||
|
|
||||||
|
/// OptForSize - If true, selector should try to optimize for code size
|
||||||
|
/// instead of performance.
|
||||||
|
bool OptForSize;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
|
X86DAGToDAGISel(X86TargetMachine &tm, bool fast)
|
||||||
: SelectionDAGISel(X86Lowering, fast),
|
: SelectionDAGISel(X86Lowering, fast),
|
||||||
ContainsFPCode(false), TM(tm),
|
ContainsFPCode(false), TM(tm),
|
||||||
X86Lowering(*TM.getTargetLowering()),
|
X86Lowering(*TM.getTargetLowering()),
|
||||||
Subtarget(&TM.getSubtarget<X86Subtarget>()) {}
|
Subtarget(&TM.getSubtarget<X86Subtarget>()),
|
||||||
|
OptForSize(OptimizeForSize) {}
|
||||||
|
|
||||||
virtual bool runOnFunction(Function &Fn) {
|
virtual bool runOnFunction(Function &Fn) {
|
||||||
// Make sure we re-emit a set of the global base reg if necessary
|
// Make sure we re-emit a set of the global base reg if necessary
|
||||||
@ -650,6 +656,10 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
|
|||||||
/// when it has created a SelectionDAG for us to codegen.
|
/// when it has created a SelectionDAG for us to codegen.
|
||||||
void X86DAGToDAGISel::InstructionSelect() {
|
void X86DAGToDAGISel::InstructionSelect() {
|
||||||
CurBB = BB; // BB can change as result of isel.
|
CurBB = BB; // BB can change as result of isel.
|
||||||
|
if (!OptForSize) {
|
||||||
|
const Function *F = CurDAG->getMachineFunction().getFunction();
|
||||||
|
OptForSize = !F->isDeclaration() && F->hasNote(Attribute::OptimizeForSize);
|
||||||
|
}
|
||||||
|
|
||||||
DEBUG(BB->dump());
|
DEBUG(BB->dump());
|
||||||
if (!Fast)
|
if (!Fast)
|
||||||
|
@ -186,6 +186,7 @@ def In64BitMode : Predicate<"Subtarget->is64Bit()">;
|
|||||||
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
|
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
|
||||||
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
|
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
|
||||||
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
|
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
|
||||||
|
def OptForSpeed : Predicate<"!OptForSize">;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// X86 Instruction Format Definitions.
|
// X86 Instruction Format Definitions.
|
||||||
|
@ -744,7 +744,7 @@ def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
|
|||||||
addr:$dst)]>;
|
addr:$dst)]>;
|
||||||
|
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
let AddedComplexity = 15 in {
|
let AddedComplexity = 20 in {
|
||||||
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
|
||||||
"movlhps\t{$src2, $dst|$dst, $src2}",
|
"movlhps\t{$src2, $dst|$dst, $src2}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
@ -759,7 +759,7 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:
|
|||||||
} // AddedComplexity
|
} // AddedComplexity
|
||||||
} // Constraints = "$src1 = $dst"
|
} // Constraints = "$src1 = $dst"
|
||||||
|
|
||||||
let AddedComplexity = 15 in
|
let AddedComplexity = 20 in
|
||||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
|
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef), MOVDDUP_shuffle_mask)),
|
||||||
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
|
(MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
|
||||||
|
|
||||||
@ -2921,6 +2921,7 @@ def : Pat<(vector_shuffle (bc_v4i32 (memopv4f32 addr:$src1)), (undef),
|
|||||||
SHUFP_unary_shuffle_mask:$sm),
|
SHUFP_unary_shuffle_mask:$sm),
|
||||||
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
(PSHUFDmi addr:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// Special binary v4i32 shuffle cases with SHUFPS.
|
// Special binary v4i32 shuffle cases with SHUFPS.
|
||||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
|
def : Pat<(v4i32 (vector_shuffle VR128:$src1, (v4i32 VR128:$src2),
|
||||||
PSHUFD_binary_shuffle_mask:$sm)),
|
PSHUFD_binary_shuffle_mask:$sm)),
|
||||||
@ -2937,11 +2938,21 @@ def : Pat<(v2i64 (vector_shuffle VR128:$src1, VR128:$src2,
|
|||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
// Special unary SHUFPDrri case.
|
// Special unary SHUFPDrri case.
|
||||||
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
|
def : Pat<(v2i64 (vector_shuffle VR128:$src1, (undef),
|
||||||
SHUFP_unary_shuffle_mask:$sm)),
|
SHUFP_unary_shuffle_mask:$sm)),
|
||||||
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
(SHUFPDrri VR128:$src1, VR128:$src1, SHUFP_unary_shuffle_mask:$sm)>,
|
||||||
Requires<[HasSSE2]>;
|
Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
|
// vector_shuffle v1, <undef>, <0, 0, 1, 1, ...>
|
||||||
|
let AddedComplexity = 15 in {
|
||||||
|
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||||
|
UNPCKL_v_undef_shuffle_mask:$sm)),
|
||||||
|
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||||
|
Requires<[OptForSpeed, HasSSE2]>;
|
||||||
|
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||||
|
UNPCKL_v_undef_shuffle_mask:$sm)),
|
||||||
|
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||||
|
Requires<[OptForSpeed, HasSSE2]>;
|
||||||
|
}
|
||||||
let AddedComplexity = 10 in {
|
let AddedComplexity = 10 in {
|
||||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||||
UNPCKL_v_undef_shuffle_mask)),
|
UNPCKL_v_undef_shuffle_mask)),
|
||||||
@ -2958,6 +2969,16 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
|||||||
}
|
}
|
||||||
|
|
||||||
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
|
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
|
||||||
|
let AddedComplexity = 15 in {
|
||||||
|
def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
||||||
|
UNPCKH_v_undef_shuffle_mask:$sm)),
|
||||||
|
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||||
|
Requires<[OptForSpeed, HasSSE2]>;
|
||||||
|
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||||
|
UNPCKH_v_undef_shuffle_mask:$sm)),
|
||||||
|
(PSHUFDri VR128:$src, PSHUFD_shuffle_mask:$sm)>,
|
||||||
|
Requires<[OptForSpeed, HasSSE2]>;
|
||||||
|
}
|
||||||
let AddedComplexity = 10 in {
|
let AddedComplexity = 10 in {
|
||||||
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
def : Pat<(v4f32 (vector_shuffle VR128:$src, (undef),
|
||||||
UNPCKH_v_undef_shuffle_mask)),
|
UNPCKH_v_undef_shuffle_mask)),
|
||||||
@ -2973,7 +2994,7 @@ def : Pat<(v4i32 (vector_shuffle VR128:$src, (undef),
|
|||||||
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
(PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
let AddedComplexity = 15 in {
|
let AddedComplexity = 20 in {
|
||||||
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
|
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
|
||||||
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
def : Pat<(v4i32 (vector_shuffle VR128:$src1, VR128:$src2,
|
||||||
MOVHP_shuffle_mask)),
|
MOVHP_shuffle_mask)),
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
; RUN: grep pshufd %t | count 1
|
; RUN: grep pshufd %t | count 1
|
||||||
; RUN: grep unpckhpd %t | count 1
|
; RUN: grep unpckhpd %t | count 1
|
||||||
|
|
||||||
define void @test1(<4 x float>* %F, float* %f) {
|
define void @test1(<4 x float>* %F, float* %f) nounwind {
|
||||||
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
|
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
|
||||||
%tmp7 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
|
%tmp7 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
|
||||||
%tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
|
%tmp2 = extractelement <4 x float> %tmp7, i32 0 ; <float> [#uses=1]
|
||||||
@ -12,21 +12,21 @@ define void @test1(<4 x float>* %F, float* %f) {
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define float @test2(<4 x float>* %F, float* %f) {
|
define float @test2(<4 x float>* %F, float* %f) nounwind {
|
||||||
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
|
%tmp = load <4 x float>* %F ; <<4 x float>> [#uses=2]
|
||||||
%tmp7 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
|
%tmp7 = add <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
|
||||||
%tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
|
%tmp2 = extractelement <4 x float> %tmp7, i32 2 ; <float> [#uses=1]
|
||||||
ret float %tmp2
|
ret float %tmp2
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @test3(float* %R, <4 x float>* %P1) {
|
define void @test3(float* %R, <4 x float>* %P1) nounwind {
|
||||||
%X = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
|
%X = load <4 x float>* %P1 ; <<4 x float>> [#uses=1]
|
||||||
%tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
|
%tmp = extractelement <4 x float> %X, i32 3 ; <float> [#uses=1]
|
||||||
store float %tmp, float* %R
|
store float %tmp, float* %R
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define double @test4(double %A) {
|
define double @test4(double %A) nounwind {
|
||||||
%tmp1 = call <2 x double> @foo( ) ; <<2 x double>> [#uses=1]
|
%tmp1 = call <2 x double> @foo( ) ; <<2 x double>> [#uses=1]
|
||||||
%tmp2 = extractelement <2 x double> %tmp1, i32 1 ; <double> [#uses=1]
|
%tmp2 = extractelement <2 x double> %tmp1, i32 1 ; <double> [#uses=1]
|
||||||
%tmp3 = add double %tmp2, %A ; <double> [#uses=1]
|
%tmp3 = add double %tmp2, %A ; <double> [#uses=1]
|
||||||
|
19
test/CodeGen/X86/vec_shuffle-23.ll
Normal file
19
test/CodeGen/X86/vec_shuffle-23.ll
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | not grep punpck
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 | grep pshufd
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -optimize-size | grep punpck
|
||||||
|
|
||||||
|
define i32 @t() nounwind {
|
||||||
|
entry:
|
||||||
|
%a = alloca <4 x i32> ; <<4 x i32>*> [#uses=2]
|
||||||
|
%b = alloca <4 x i32> ; <<4 x i32>*> [#uses=5]
|
||||||
|
volatile store <4 x i32> < i32 0, i32 1, i32 2, i32 3 >, <4 x i32>* %a
|
||||||
|
%tmp = load <4 x i32>* %a ; <<4 x i32>> [#uses=1]
|
||||||
|
store <4 x i32> %tmp, <4 x i32>* %b
|
||||||
|
%tmp1 = load <4 x i32>* %b ; <<4 x i32>> [#uses=1]
|
||||||
|
%tmp2 = load <4 x i32>* %b ; <<4 x i32>> [#uses=1]
|
||||||
|
%punpckldq = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> < i32 0, i32 4, i32 1, i32 5 > ; <<4 x i32>> [#uses=1]
|
||||||
|
store <4 x i32> %punpckldq, <4 x i32>* %b
|
||||||
|
%tmp3 = load <4 x i32>* %b ; <<4 x i32>> [#uses=1]
|
||||||
|
%result = extractelement <4 x i32> %tmp3, i32 0 ; <i32> [#uses=1]
|
||||||
|
ret i32 %result
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user