mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-12 17:32:19 +00:00
7023b85187
We should be talking about the number of source elements, not the number of destination elements, given we know at this point that the source and dest element numbers are not the same. While we're at it, avoid writing to std::vector::end()... Bug found with random testing and a lot of coffee. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220051 91177308-0d34-0410-b5e6-96231b3b80d8
23 lines
927 B
LLVM
23 lines
927 B
LLVM
; RUN: llc < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
|
target triple = "aarch64-unknown-linux-gnu"
|
|
|
|
define <4 x i16> @f(<4 x i32> %vqdmlal_v3.i, <8 x i16> %x5) {
|
|
entry:
|
|
; Check that we don't just dup the input vector. The code emitted is ext, dup, ext, ext
|
|
; but only match the last three instructions as the first two could be combined to
|
|
; a dup2 at some stage.
|
|
; CHECK: dup
|
|
; CHECK: ext
|
|
; CHECK: ext
|
|
%x4 = extractelement <4 x i32> %vqdmlal_v3.i, i32 2
|
|
%vgetq_lane = trunc i32 %x4 to i16
|
|
%vecinit.i = insertelement <4 x i16> undef, i16 %vgetq_lane, i32 0
|
|
%vecinit2.i = insertelement <4 x i16> %vecinit.i, i16 %vgetq_lane, i32 2
|
|
%vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vgetq_lane, i32 3
|
|
%vgetq_lane261 = extractelement <8 x i16> %x5, i32 0
|
|
%vset_lane267 = insertelement <4 x i16> %vecinit3.i, i16 %vgetq_lane261, i32 1
|
|
ret <4 x i16> %vset_lane267
|
|
}
|