From e9bbacd0a8da448bb117c1868df0e64dcb0b1385 Mon Sep 17 00:00:00 2001 From: Asiri Rathnayake Date: Wed, 1 Oct 2014 09:59:45 +0000 Subject: [PATCH] Add missing natual vector cast. Summary: The natual vector cast node (similar to bitcast) AArch64ISD::NVCAST was introduced in r217159 and r217138. This patch adds a missing cast from v2f32 to v1i64 which is causing some compilation failures. Also added test cases to cover various modimm types and BUILD_VECTORs with i64 elements. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218751 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AArch64/AArch64ISelLowering.cpp | 1 + lib/Target/AArch64/AArch64InstrInfo.td | 1 + test/CodeGen/AArch64/aarch64-be-bv.ll | 65 ++++++++++++++++++++++ 3 files changed, 67 insertions(+) diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 16fd45aa960..41eb9fd0b2d 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -823,6 +823,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN"; case AArch64ISD::SITOF: return "AArch64ISD::SITOF"; case AArch64ISD::UITOF: return "AArch64ISD::UITOF"; + case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST"; case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I"; case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I"; case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I"; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 31839c78290..c2e2e8b22de 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -4986,6 +4986,7 @@ def : Pat<(v8i8 (AArch64NvCast (v2f32 FPR64:$src))), (v8i8 FPR64:$src)>; def : Pat<(v4i16 (AArch64NvCast (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>; def : Pat<(v2i32 (AArch64NvCast (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>; def : Pat<(v2f32 (AArch64NvCast (v2f32 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (AArch64NvCast (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>; // Natural vector casts (128 bit) def : Pat<(v16i8 (AArch64NvCast (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>; diff --git a/test/CodeGen/AArch64/aarch64-be-bv.ll b/test/CodeGen/AArch64/aarch64-be-bv.ll index 45eca5916d6..01642a4f3bf 100644 --- a/test/CodeGen/AArch64/aarch64-be-bv.ll +++ b/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -377,9 +377,11 @@ define i16 @orr_modimm_t6() nounwind { declare i8 @f_v8i8(<8 x i8> %arg) declare i16 @f_v4i16(<4 x i16> %arg) declare i32 @f_v2i32(<2 x i32> %arg) +declare i64 @f_v1i64(<1 x i64> %arg) declare i8 @f_v16i8(<16 x i8> %arg) declare i16 @f_v8i16(<8 x i16> %arg) declare i32 @f_v4i32(<4 x i32> %arg) +declare i64 @f_v2i64(<2 x i64> %arg) ; CHECK-LABEL: modimm_t1_call: define void @modimm_t1_call() { @@ -395,6 +397,9 @@ define void @modimm_t1_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -410,6 +415,10 @@ define void @modimm_t1_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -428,6 +437,9 @@ define void @modimm_t2_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #8 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -443,6 +455,10 @@ define void @modimm_t2_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -461,6 +477,9 @@ define void @modimm_t3_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #16 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #16 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -476,6 +495,10 @@ define void @modimm_t3_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #16 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -494,6 +517,9 @@ define void @modimm_t4_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, lsl #24 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, lsl #24 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -509,6 +535,10 @@ define void @modimm_t4_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, lsl #24 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -527,6 +557,9 @@ define void @modimm_t5_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.4h, #0x5 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -542,6 +575,10 @@ define void @modimm_t5_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -560,6 +597,9 @@ define void @modimm_t6_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.4h, #0x5, lsl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].8h, #0x5, lsl #8 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -575,6 +615,10 @@ define void @modimm_t6_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].8h, #0x2, lsl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -593,6 +637,9 @@ define void @modimm_t7_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #8 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #8 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -608,6 +655,10 @@ define void @modimm_t7_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #8 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -626,6 +677,9 @@ define void @modimm_t8_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: movi v{{[0-9]+}}.2s, #0x5, msl #16 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: movi v[[REG1:[0-9]+]].4s, #0x5, msl #16 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -641,6 +695,10 @@ define void @modimm_t8_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: movi v[[REG:[0-9]+]].4s, #0x2, msl #16 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void } @@ -725,6 +783,9 @@ define void @modimm_t11_call() { ; CHECK-NEXT: rev64 v{{[0-9]+}}.2s, v[[REG1]].2s ; CHECK-NEXT: bl f_v2i32 call i32 @f_v2i32(<2 x i32> ) + ; CHECK: fmov v{{[0-9]+}}.2s, #0.39062500 + ; CHECK-NEXT: bl f_v1i64 + call i64 @f_v1i64(<1 x i64> ) ; CHECK: fmov v[[REG1:[0-9]+]].4s, #3.25000000 ; CHECK-NEXT: rev64 v[[REG2:[0-9]+]].16b, v[[REG1]].16b ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 @@ -740,6 +801,10 @@ define void @modimm_t11_call() { ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 ; CHECK-NEXT: bl f_v4i32 call i32 @f_v4i32(<4 x i32> ) + ; CHECK: fmov v[[REG:[0-9]+]].4s, #2.5000000 + ; CHECK-NEXT: ext v[[REG2]].16b, v[[REG2]].16b, v[[REG2]].16b, #8 + ; CHECK-NEXT: bl f_v2i64 + call i64 @f_v2i64(<2 x i64> ) ret void }