mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
Revision r128665 added an optimization to make use of NEON multiplier
accumulator forwarding. Specifically (from SVN log entry): Distribute (A + B) * C to (A * C) + (B * C) to make use of NEON multiplier accumulator forwarding: vadd d3, d0, d1 vmul d3, d3, d2 => vmul d3, d0, d2 vmla d3, d1, d2 Make sure it catches cases where operand 1 is add/fadd/sub/fsub, which was intended in the original revision. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@133127 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c06b5bf340
commit
689edc8b28
@ -5687,7 +5687,7 @@ static SDValue PerformVMULCombine(SDNode *N,
|
|||||||
unsigned Opcode = N0.getOpcode();
|
unsigned Opcode = N0.getOpcode();
|
||||||
if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
|
if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
|
||||||
Opcode != ISD::FADD && Opcode != ISD::FSUB) {
|
Opcode != ISD::FADD && Opcode != ISD::FSUB) {
|
||||||
Opcode = N0.getOpcode();
|
Opcode = N1.getOpcode();
|
||||||
if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
|
if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
|
||||||
Opcode != ISD::FADD && Opcode != ISD::FSUB)
|
Opcode != ISD::FADD && Opcode != ISD::FSUB)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
@ -492,3 +492,25 @@ entry:
|
|||||||
store <8 x i8> %10, <8 x i8>* %11, align 8
|
store <8 x i8> %10, <8 x i8>* %11, align 8
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define void @distribute2_commutative(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
|
||||||
|
entry:
|
||||||
|
; CHECK: distribute2_commutative
|
||||||
|
; CHECK-NOT: vadd.i8
|
||||||
|
; CHECK: vmul.i8
|
||||||
|
; CHECK: vmla.i8
|
||||||
|
%0 = trunc i32 %mul to i8
|
||||||
|
%1 = insertelement <8 x i8> undef, i8 %0, i32 0
|
||||||
|
%2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
|
||||||
|
%3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
|
||||||
|
%4 = bitcast <16 x i8> %3 to <2 x double>
|
||||||
|
%5 = extractelement <2 x double> %4, i32 1
|
||||||
|
%6 = bitcast double %5 to <8 x i8>
|
||||||
|
%7 = extractelement <2 x double> %4, i32 0
|
||||||
|
%8 = bitcast double %7 to <8 x i8>
|
||||||
|
%9 = add <8 x i8> %6, %8
|
||||||
|
%10 = mul <8 x i8> %2, %9
|
||||||
|
%11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
|
||||||
|
store <8 x i8> %10, <8 x i8>* %11, align 8
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user