mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-27 02:31:09 +00:00
Fix a number of byval / memcpy / memset related codegen issues.
1. x86-64 byval alignment should be max of 8 and alignment of type. Previously the code was not doing what the commit message was saying. 2. Do not use byte repeat move and store operations. These are slow. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55139 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
66b17ba0d2
commit
1887c1c2f9
@ -796,8 +796,14 @@ static void getMaxByValAlign(const Type *Ty, unsigned &MaxAlign) {
|
|||||||
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
|
/// that contain SSE vectors are placed at 16-byte boundaries while the rest
|
||||||
/// are at 4-byte boundaries.
|
/// are at 4-byte boundaries.
|
||||||
unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
unsigned X86TargetLowering::getByValTypeAlignment(const Type *Ty) const {
|
||||||
if (Subtarget->is64Bit())
|
if (Subtarget->is64Bit()) {
|
||||||
return getTargetData()->getABITypeAlignment(Ty);
|
// Max of 8 and alignment of type.
|
||||||
|
unsigned TyAlign = getTargetData()->getABITypeAlignment(Ty);
|
||||||
|
if (TyAlign > 8)
|
||||||
|
return TyAlign;
|
||||||
|
return 8;
|
||||||
|
}
|
||||||
|
|
||||||
unsigned Align = 4;
|
unsigned Align = 4;
|
||||||
if (Subtarget->hasSSE1())
|
if (Subtarget->hasSSE1())
|
||||||
getMaxByValAlign(Ty, Align);
|
getMaxByValAlign(Ty, Align);
|
||||||
@ -5014,16 +5020,16 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
|
X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
|
||||||
SDValue Chain,
|
SDValue Chain,
|
||||||
SDValue Dst, SDValue Src,
|
SDValue Dst, SDValue Src,
|
||||||
SDValue Size, unsigned Align,
|
SDValue Size, unsigned Align,
|
||||||
const Value *DstSV, uint64_t DstSVOff) {
|
const Value *DstSV, uint64_t DstSVOff) {
|
||||||
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
||||||
|
|
||||||
/// If not DWORD aligned or size is more than the threshold, call the library.
|
/// If not DWORD aligned or size is more than the threshold, call the library.
|
||||||
/// The libc version is likely to be faster for these cases. It can use the
|
/// The libc version is likely to be faster for these cases. It can use the
|
||||||
/// address value and run time information about the CPU.
|
/// address value and run time information about the CPU.
|
||||||
if ((Align & 3) == 0 ||
|
if ((Align & 3) != 0 ||
|
||||||
!ConstantSize ||
|
!ConstantSize ||
|
||||||
ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
|
ConstantSize->getValue() > getSubtarget()->getMaxInlineSizeThreshold()) {
|
||||||
SDValue InFlag(0, 0);
|
SDValue InFlag(0, 0);
|
||||||
@ -5065,27 +5071,27 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
|
|||||||
|
|
||||||
// If the value is a constant, then we can potentially use larger sets.
|
// If the value is a constant, then we can potentially use larger sets.
|
||||||
switch (Align & 3) {
|
switch (Align & 3) {
|
||||||
case 2: // WORD aligned
|
case 2: // WORD aligned
|
||||||
AVT = MVT::i16;
|
AVT = MVT::i16;
|
||||||
ValReg = X86::AX;
|
ValReg = X86::AX;
|
||||||
Val = (Val << 8) | Val;
|
Val = (Val << 8) | Val;
|
||||||
break;
|
break;
|
||||||
case 0: // DWORD aligned
|
case 0: // DWORD aligned
|
||||||
AVT = MVT::i32;
|
AVT = MVT::i32;
|
||||||
ValReg = X86::EAX;
|
ValReg = X86::EAX;
|
||||||
Val = (Val << 8) | Val;
|
Val = (Val << 8) | Val;
|
||||||
Val = (Val << 16) | Val;
|
Val = (Val << 16) | Val;
|
||||||
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
|
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned
|
||||||
AVT = MVT::i64;
|
AVT = MVT::i64;
|
||||||
ValReg = X86::RAX;
|
ValReg = X86::RAX;
|
||||||
Val = (Val << 32) | Val;
|
Val = (Val << 32) | Val;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default: // Byte aligned
|
default: // Byte aligned
|
||||||
AVT = MVT::i8;
|
AVT = MVT::i8;
|
||||||
ValReg = X86::AL;
|
ValReg = X86::AL;
|
||||||
Count = DAG.getIntPtrConstant(SizeVal);
|
Count = DAG.getIntPtrConstant(SizeVal);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (AVT.bitsGT(MVT::i8)) {
|
if (AVT.bitsGT(MVT::i8)) {
|
||||||
@ -5153,13 +5159,11 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG,
|
|||||||
|
|
||||||
SDValue
|
SDValue
|
||||||
X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
|
X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
|
||||||
SDValue Chain,
|
SDValue Chain, SDValue Dst, SDValue Src,
|
||||||
SDValue Dst, SDValue Src,
|
SDValue Size, unsigned Align,
|
||||||
SDValue Size, unsigned Align,
|
bool AlwaysInline,
|
||||||
bool AlwaysInline,
|
const Value *DstSV, uint64_t DstSVOff,
|
||||||
const Value *DstSV, uint64_t DstSVOff,
|
const Value *SrcSV, uint64_t SrcSVOff) {
|
||||||
const Value *SrcSV, uint64_t SrcSVOff){
|
|
||||||
|
|
||||||
// This requires the copy size to be a constant, preferrably
|
// This requires the copy size to be a constant, preferrably
|
||||||
// within a subtarget-specific limit.
|
// within a subtarget-specific limit.
|
||||||
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
||||||
@ -5169,21 +5173,19 @@ X86TargetLowering::EmitTargetCodeForMemcpy(SelectionDAG &DAG,
|
|||||||
if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
|
if (!AlwaysInline && SizeVal > getSubtarget()->getMaxInlineSizeThreshold())
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
MVT AVT;
|
/// If not DWORD aligned, call the library.
|
||||||
unsigned BytesLeft = 0;
|
if ((Align & 3) != 0)
|
||||||
if (Align >= 8 && Subtarget->is64Bit())
|
return SDValue();
|
||||||
|
|
||||||
|
// DWORD aligned
|
||||||
|
MVT AVT = MVT::i32;
|
||||||
|
if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) // QWORD aligned
|
||||||
AVT = MVT::i64;
|
AVT = MVT::i64;
|
||||||
else if (Align >= 4)
|
|
||||||
AVT = MVT::i32;
|
|
||||||
else if (Align >= 2)
|
|
||||||
AVT = MVT::i16;
|
|
||||||
else
|
|
||||||
AVT = MVT::i8;
|
|
||||||
|
|
||||||
unsigned UBytes = AVT.getSizeInBits() / 8;
|
unsigned UBytes = AVT.getSizeInBits() / 8;
|
||||||
unsigned CountVal = SizeVal / UBytes;
|
unsigned CountVal = SizeVal / UBytes;
|
||||||
SDValue Count = DAG.getIntPtrConstant(CountVal);
|
SDValue Count = DAG.getIntPtrConstant(CountVal);
|
||||||
BytesLeft = SizeVal % UBytes;
|
unsigned BytesLeft = SizeVal % UBytes;
|
||||||
|
|
||||||
SDValue InFlag(0, 0);
|
SDValue InFlag(0, 0);
|
||||||
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
|
Chain = DAG.getCopyToReg(Chain, Subtarget->is64Bit() ? X86::RCX : X86::ECX,
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 3
|
; RUN: llvm-as < %s | llc -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
|
||||||
|
|
||||||
@A = global [32 x i32] zeroinitializer
|
@A = global [32 x i32] zeroinitializer
|
||||||
@B = global [32 x i32] zeroinitializer
|
@B = global [32 x i32] zeroinitializer
|
||||||
|
|
||||||
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
|
declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
|
||||||
|
|
||||||
define void @main() {
|
define void @main() nounwind {
|
||||||
; dword copy
|
; dword copy
|
||||||
call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
|
call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
|
||||||
i8* bitcast ([32 x i32]* @B to i8*),
|
i8* bitcast ([32 x i32]* @B to i8*),
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsl | count 2
|
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
|
||||||
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
||||||
|
|
||||||
%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
|
%struct.s = type { i32, i32, i32, i32, i32, i32, i32, i32,
|
||||||
@ -7,7 +7,7 @@
|
|||||||
i32, i32, i32, i32, i32, i32, i32, i32,
|
i32, i32, i32, i32, i32, i32, i32, i32,
|
||||||
i32 }
|
i32 }
|
||||||
|
|
||||||
define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) {
|
define void @g(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6) nounwind {
|
||||||
entry:
|
entry:
|
||||||
%d = alloca %struct.s, align 16
|
%d = alloca %struct.s, align 16
|
||||||
%tmp = getelementptr %struct.s* %d, i32 0, i32 0
|
%tmp = getelementptr %struct.s* %d, i32 0, i32 0
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsw | count 2
|
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
|
||||||
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
||||||
|
|
||||||
%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
|
%struct.s = type { i16, i16, i16, i16, i16, i16, i16, i16,
|
||||||
@ -13,7 +13,7 @@
|
|||||||
|
|
||||||
|
|
||||||
define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
|
define void @g(i16 signext %a1, i16 signext %a2, i16 signext %a3,
|
||||||
i16 signext %a4, i16 signext %a5, i16 signext %a6) {
|
i16 signext %a4, i16 signext %a5, i16 signext %a6) nounwind {
|
||||||
entry:
|
entry:
|
||||||
%a = alloca %struct.s, align 16
|
%a = alloca %struct.s, align 16
|
||||||
%tmp = getelementptr %struct.s* %a, i32 0, i32 0
|
%tmp = getelementptr %struct.s* %a, i32 0, i32 0
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsb | count 2
|
; RUN: llvm-as < %s | llc -march=x86-64 | grep rep.movsq | count 2
|
||||||
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
; RUN: llvm-as < %s | llc -march=x86 | grep rep.movsl | count 2
|
||||||
|
|
||||||
%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
|
%struct.s = type { i8, i8, i8, i8, i8, i8, i8, i8,
|
||||||
|
45
test/CodeGen/X86/memset-2.ll
Normal file
45
test/CodeGen/X86/memset-2.ll
Normal file
@ -0,0 +1,45 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -march=x86 | not grep rep
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 | grep memset
|
||||||
|
|
||||||
|
declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
|
||||||
|
|
||||||
|
define fastcc i32 @cli_scanzip(i32 %desc) nounwind {
|
||||||
|
entry:
|
||||||
|
br label %bb8.i.i.i.i
|
||||||
|
|
||||||
|
bb8.i.i.i.i: ; preds = %bb8.i.i.i.i, %entry
|
||||||
|
icmp eq i32 0, 0 ; <i1>:0 [#uses=1]
|
||||||
|
br i1 %0, label %bb61.i.i.i, label %bb8.i.i.i.i
|
||||||
|
|
||||||
|
bb32.i.i.i: ; preds = %bb61.i.i.i
|
||||||
|
ptrtoint i8* %tail.0.i.i.i to i32 ; <i32>:1 [#uses=1]
|
||||||
|
sub i32 0, %1 ; <i32>:2 [#uses=1]
|
||||||
|
icmp sgt i32 %2, 19 ; <i1>:3 [#uses=1]
|
||||||
|
br i1 %3, label %bb34.i.i.i, label %bb61.i.i.i
|
||||||
|
|
||||||
|
bb34.i.i.i: ; preds = %bb32.i.i.i
|
||||||
|
load i32* null, align 4 ; <i32>:4 [#uses=1]
|
||||||
|
icmp eq i32 %4, 101010256 ; <i1>:5 [#uses=1]
|
||||||
|
br i1 %5, label %bb8.i11.i.i.i, label %bb61.i.i.i
|
||||||
|
|
||||||
|
bb8.i11.i.i.i: ; preds = %bb8.i11.i.i.i, %bb34.i.i.i
|
||||||
|
icmp eq i32 0, 0 ; <i1>:6 [#uses=1]
|
||||||
|
br i1 %6, label %cli_dbgmsg.exit49.i, label %bb8.i11.i.i.i
|
||||||
|
|
||||||
|
cli_dbgmsg.exit49.i: ; preds = %bb8.i11.i.i.i
|
||||||
|
icmp eq [32768 x i8]* null, null ; <i1>:7 [#uses=1]
|
||||||
|
br i1 %7, label %bb1.i28.i, label %bb8.i.i
|
||||||
|
|
||||||
|
bb61.i.i.i: ; preds = %bb61.i.i.i, %bb34.i.i.i, %bb32.i.i.i, %bb8.i.i.i.i
|
||||||
|
%tail.0.i.i.i = getelementptr [1024 x i8]* null, i32 0, i32 0 ; <i8*> [#uses=2]
|
||||||
|
load i8* %tail.0.i.i.i, align 1 ; <i8>:8 [#uses=1]
|
||||||
|
icmp eq i8 %8, 80 ; <i1>:9 [#uses=1]
|
||||||
|
br i1 %9, label %bb32.i.i.i, label %bb61.i.i.i
|
||||||
|
|
||||||
|
bb1.i28.i: ; preds = %cli_dbgmsg.exit49.i
|
||||||
|
call void @llvm.memset.i32( i8* null, i8 0, i32 88, i32 1 ) nounwind
|
||||||
|
unreachable
|
||||||
|
|
||||||
|
bb8.i.i: ; preds = %bb8.i.i, %cli_dbgmsg.exit49.i
|
||||||
|
br label %bb8.i.i
|
||||||
|
}
|
@ -1,12 +1,10 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 | grep stosb
|
; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep stosl
|
||||||
|
; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin | grep movq | count 10
|
||||||
|
|
||||||
target triple = "i386-apple-darwin9"
|
define void @bork() nounwind {
|
||||||
%struct.S = type { [80 x i8] }
|
|
||||||
|
|
||||||
define %struct.S* @bork() {
|
|
||||||
entry:
|
entry:
|
||||||
call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 1 )
|
call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
|
||||||
ret %struct.S* null
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
|
declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
|
||||||
|
Loading…
x
Reference in New Issue
Block a user