mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-14 11:32:34 +00:00
1b1fbccf49
vector operation legalization with support for custom target lowering and fallback to expand when it fails, and use this to implement sext and anyext load lowering for x86 in a more principled way. Previously, the x86 backend relied on a target DAG combine to "combine away" sextload and extload nodes prior to legalization, or would expand them during legalization with terrible code. This is particularly problematic because the DAG combine relies on running over non-canonical DAG nodes at just the right time to match several common and important patterns. It used a combine rather than lowering because we didn't have good lowering support, and to expose some tricks being employed to more combine phases. With this change it becomes a proper lowering operation, the backend marks that it can lower these nodes, and I've added support for handling the canonical forms that don't have direct legal representations such as sextload of a v4i8 -> v4i64 on AVX1. With this change, our test cases for this behavior continue to pass even after the DAG combiner beigns running more systematically over every node. There is some noise caused by this in the test suite where we actually use vector extends instead of subregister extraction. This doesn't really seem like the right thing to do, but is unlikely to be a critical regression. We do regress in one case where by lowering to the target-specific patterns early we were able to combine away extraneous legal math nodes. However, this regression is completely addressed by switching to a widening based legalization which is what I'm working toward anyways, so I've just switched the test to that mode. Differential Revision: http://reviews.llvm.org/D4654 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213897 91177308-0d34-0410-b5e6-96231b3b80d8
69 lines
2.0 KiB
LLVM
69 lines
2.0 KiB
LLVM
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx -x86-experimental-vector-widening-legalization | FileCheck %s
|
|
|
|
; Check that we perform a scalar XOR on i32.
|
|
|
|
; CHECK: pull_bitcast
|
|
; CHECK: xorl
|
|
; CHECK: ret
|
|
define void @pull_bitcast (<4 x i8>* %pA, <4 x i8>* %pB) {
|
|
%A = load <4 x i8>* %pA
|
|
%B = load <4 x i8>* %pB
|
|
%C = xor <4 x i8> %A, %B
|
|
store <4 x i8> %C, <4 x i8>* %pA
|
|
ret void
|
|
}
|
|
|
|
; CHECK: multi_use_swizzle
|
|
; CHECK: mov
|
|
; CHECK-NEXT: shuf
|
|
; CHECK-NEXT: shuf
|
|
; CHECK-NEXT: shuf
|
|
; CHECK-NEXT: xor
|
|
; CHECK-NEXT: ret
|
|
define <4 x i32> @multi_use_swizzle (<4 x i32>* %pA, <4 x i32>* %pB) {
|
|
%A = load <4 x i32>* %pA
|
|
%B = load <4 x i32>* %pB
|
|
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 1, i32 5, i32 6>
|
|
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 2>
|
|
%S2 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 2>
|
|
%R = xor <4 x i32> %S1, %S2
|
|
ret <4 x i32> %R
|
|
}
|
|
|
|
; CHECK: pull_bitcast2
|
|
; CHECK: xorl
|
|
; CHECK: ret
|
|
define <4 x i8> @pull_bitcast2 (<4 x i8>* %pA, <4 x i8>* %pB, <4 x i8>* %pC) {
|
|
%A = load <4 x i8>* %pA
|
|
store <4 x i8> %A, <4 x i8>* %pC
|
|
%B = load <4 x i8>* %pB
|
|
%C = xor <4 x i8> %A, %B
|
|
store <4 x i8> %C, <4 x i8>* %pA
|
|
ret <4 x i8> %C
|
|
}
|
|
|
|
|
|
|
|
; CHECK: reverse_1
|
|
; CHECK-NOT: shuf
|
|
; CHECK: ret
|
|
define <4 x i32> @reverse_1 (<4 x i32>* %pA, <4 x i32>* %pB) {
|
|
%A = load <4 x i32>* %pA
|
|
%B = load <4 x i32>* %pB
|
|
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
ret <4 x i32> %S1
|
|
}
|
|
|
|
|
|
; CHECK: no_reverse_shuff
|
|
; CHECK: shuf
|
|
; CHECK: ret
|
|
define <4 x i32> @no_reverse_shuff (<4 x i32>* %pA, <4 x i32>* %pB) {
|
|
%A = load <4 x i32>* %pA
|
|
%B = load <4 x i32>* %pB
|
|
%S = shufflevector <4 x i32> %A, <4 x i32> %B, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
|
|
%S1 = shufflevector <4 x i32> %S, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
|
|
ret <4 x i32> %S1
|
|
}
|