From 2a0f224ce9cac7baf3bced94ce304277c5ec74da Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Sat, 14 Feb 2004 04:46:05 +0000 Subject: [PATCH] Codegen llvm.memset into rep stos[bwd]. Simplify code for llvm.memcpy git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@11442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/InstSelectSimple.cpp | 80 ++++++++++++++++++++++++----- lib/Target/X86/X86ISelSimple.cpp | 80 ++++++++++++++++++++++++----- 2 files changed, 134 insertions(+), 26 deletions(-) diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 54aa1ad0d23..d4a5ac2d860 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -1158,6 +1158,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) { case Intrinsic::va_copy: case Intrinsic::va_end: case Intrinsic::memcpy: + case Intrinsic::memset: // We directly implement these intrinsics break; default: @@ -1200,7 +1201,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { // Turn the byte code into # iterations unsigned ByteReg; unsigned CountReg; - + unsigned Opcode; switch (Align & 3) { case 2: // WORD aligned if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { @@ -1209,6 +1210,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { CountReg = makeAnotherReg(Type::IntTy); BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1); } + Opcode = X86::REP_MOVSW; break; case 0: // DWORD aligned if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { @@ -1217,10 +1219,12 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { CountReg = makeAnotherReg(Type::IntTy); BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2); } + Opcode = X86::REP_MOVSD; break; case 1: // BYTE aligned case 3: // BYTE aligned CountReg = getReg(CI.getOperand(3)); + Opcode = X86::REP_MOVSB; break; } @@ -1231,20 +1235,70 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg); BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1); BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2); - - switch (Align & 3) { - case 1: // BYTE aligned - case 3: // BYTE aligned - BuildMI(BB, X86::REP_MOVSB, 0); - break; - case 2: // WORD aligned - BuildMI(BB, X86::REP_MOVSW, 0); - break; - case 0: // DWORD aligned - BuildMI(BB, X86::REP_MOVSD, 0); - break; + BuildMI(BB, Opcode, 0); + return; + } + case Intrinsic::memset: { + assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!"); + unsigned Align = 1; + if (ConstantInt *AlignC = dyn_cast(CI.getOperand(4))) { + Align = AlignC->getRawValue(); + if (Align == 0) Align = 1; } + // Turn the byte code into # iterations + unsigned ByteReg; + unsigned CountReg; + unsigned Opcode; + if (ConstantInt *ValC = dyn_cast(CI.getOperand(2))) { + unsigned Val = ValC->getRawValue() & 255; + + // If the value is a constant, then we can potentially use larger copies. + switch (Align & 3) { + case 2: // WORD aligned + if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { + CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2)); + } else { + CountReg = makeAnotherReg(Type::IntTy); + BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1); + } + BuildMI(BB, X86::MOVir16, 1, X86::AX).addZImm((Val << 8) | Val); + Opcode = X86::REP_STOSW; + break; + case 0: // DWORD aligned + if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { + CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4)); + } else { + CountReg = makeAnotherReg(Type::IntTy); + BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2); + } + Val = (Val << 8) | Val; + BuildMI(BB, X86::MOVir32, 1, X86::EAX).addZImm((Val << 16) | Val); + Opcode = X86::REP_STOSD; + break; + case 1: // BYTE aligned + case 3: // BYTE aligned + CountReg = getReg(CI.getOperand(3)); + BuildMI(BB, X86::MOVir8, 1, X86::AL).addZImm(Val); + Opcode = X86::REP_STOSB; + break; + } + } else { + // If it's not a constant value we are storing, just fall back. We could + // try to be clever to form 16 bit and 32 bit values, but we don't yet. + unsigned ValReg = getReg(CI.getOperand(2)); + BuildMI(BB, X86::MOVrr8, 1, X86::AL).addReg(ValReg); + CountReg = getReg(CI.getOperand(3)); + Opcode = X86::REP_STOSB; + } + + // No matter what the alignment is, we put the source in ESI, the + // destination in EDI, and the count in ECX. + TmpReg1 = getReg(CI.getOperand(1)); + //TmpReg2 = getReg(CI.getOperand(2)); + BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg); + BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1); + BuildMI(BB, Opcode, 0); return; } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index 54aa1ad0d23..d4a5ac2d860 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -1158,6 +1158,7 @@ void ISel::LowerUnknownIntrinsicFunctionCalls(Function &F) { case Intrinsic::va_copy: case Intrinsic::va_end: case Intrinsic::memcpy: + case Intrinsic::memset: // We directly implement these intrinsics break; default: @@ -1200,7 +1201,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { // Turn the byte code into # iterations unsigned ByteReg; unsigned CountReg; - + unsigned Opcode; switch (Align & 3) { case 2: // WORD aligned if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { @@ -1209,6 +1210,7 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { CountReg = makeAnotherReg(Type::IntTy); BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1); } + Opcode = X86::REP_MOVSW; break; case 0: // DWORD aligned if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { @@ -1217,10 +1219,12 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { CountReg = makeAnotherReg(Type::IntTy); BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2); } + Opcode = X86::REP_MOVSD; break; case 1: // BYTE aligned case 3: // BYTE aligned CountReg = getReg(CI.getOperand(3)); + Opcode = X86::REP_MOVSB; break; } @@ -1231,20 +1235,70 @@ void ISel::visitIntrinsicCall(Intrinsic::ID ID, CallInst &CI) { BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg); BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1); BuildMI(BB, X86::MOVrr32, 1, X86::ESI).addReg(TmpReg2); - - switch (Align & 3) { - case 1: // BYTE aligned - case 3: // BYTE aligned - BuildMI(BB, X86::REP_MOVSB, 0); - break; - case 2: // WORD aligned - BuildMI(BB, X86::REP_MOVSW, 0); - break; - case 0: // DWORD aligned - BuildMI(BB, X86::REP_MOVSD, 0); - break; + BuildMI(BB, Opcode, 0); + return; + } + case Intrinsic::memset: { + assert(CI.getNumOperands() == 5 && "Illegal llvm.memset call!"); + unsigned Align = 1; + if (ConstantInt *AlignC = dyn_cast(CI.getOperand(4))) { + Align = AlignC->getRawValue(); + if (Align == 0) Align = 1; } + // Turn the byte code into # iterations + unsigned ByteReg; + unsigned CountReg; + unsigned Opcode; + if (ConstantInt *ValC = dyn_cast(CI.getOperand(2))) { + unsigned Val = ValC->getRawValue() & 255; + + // If the value is a constant, then we can potentially use larger copies. + switch (Align & 3) { + case 2: // WORD aligned + if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { + CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/2)); + } else { + CountReg = makeAnotherReg(Type::IntTy); + BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(1); + } + BuildMI(BB, X86::MOVir16, 1, X86::AX).addZImm((Val << 8) | Val); + Opcode = X86::REP_STOSW; + break; + case 0: // DWORD aligned + if (ConstantInt *I = dyn_cast(CI.getOperand(3))) { + CountReg = getReg(ConstantUInt::get(Type::UIntTy, I->getRawValue()/4)); + } else { + CountReg = makeAnotherReg(Type::IntTy); + BuildMI(BB, X86::SHRir32, 2, CountReg).addReg(ByteReg).addZImm(2); + } + Val = (Val << 8) | Val; + BuildMI(BB, X86::MOVir32, 1, X86::EAX).addZImm((Val << 16) | Val); + Opcode = X86::REP_STOSD; + break; + case 1: // BYTE aligned + case 3: // BYTE aligned + CountReg = getReg(CI.getOperand(3)); + BuildMI(BB, X86::MOVir8, 1, X86::AL).addZImm(Val); + Opcode = X86::REP_STOSB; + break; + } + } else { + // If it's not a constant value we are storing, just fall back. We could + // try to be clever to form 16 bit and 32 bit values, but we don't yet. + unsigned ValReg = getReg(CI.getOperand(2)); + BuildMI(BB, X86::MOVrr8, 1, X86::AL).addReg(ValReg); + CountReg = getReg(CI.getOperand(3)); + Opcode = X86::REP_STOSB; + } + + // No matter what the alignment is, we put the source in ESI, the + // destination in EDI, and the count in ECX. + TmpReg1 = getReg(CI.getOperand(1)); + //TmpReg2 = getReg(CI.getOperand(2)); + BuildMI(BB, X86::MOVrr32, 1, X86::ECX).addReg(CountReg); + BuildMI(BB, X86::MOVrr32, 1, X86::EDI).addReg(TmpReg1); + BuildMI(BB, Opcode, 0); return; }