diff --git a/test/CodeGen/R600/indirect-private-64.ll b/test/CodeGen/R600/indirect-private-64.ll new file mode 100644 index 00000000000..a6f1e932dbd --- /dev/null +++ b/test/CodeGen/R600/indirect-private-64.ll @@ -0,0 +1,31 @@ +; REQUIRES: asserts +; XFAIL: * +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare void @llvm.AMDGPU.barrier.local() noduplicate nounwind + +; SI-LABEL: @indirect_access_f64_alloca: +; SI: BUFFER_STORE_DWORD +define void @f64_alloca(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in, i32 %b) nounwind { + %val = load double addrspace(1)* %in, align 8 + %array = alloca double, i32 16, align 8 + %ptr = getelementptr double* %array, i32 %b + store double %val, double* %ptr, align 8 + call void @llvm.AMDGPU.barrier.local() noduplicate nounwind + %result = load double* %ptr, align 8 + store double %result, double addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @indirect_access_v2f64_alloca: +; SI: BUFFER_STORE_DWORDX4 +define void @v2f64_alloca(<2 x double> addrspace(1)* noalias %out, <2 x double> addrspace(1)* noalias %in, i32 %b) nounwind { + %val = load <2 x double> addrspace(1)* %in, align 16 + %array = alloca <2 x double>, i32 16, align 16 + %ptr = getelementptr <2 x double>* %array, i32 %b + store <2 x double> %val, <2 x double>* %ptr, align 16 + call void @llvm.AMDGPU.barrier.local() noduplicate nounwind + %result = load <2 x double>* %ptr, align 16 + store <2 x double> %result, <2 x double> addrspace(1)* %out, align 16 + ret void +} diff --git a/test/CodeGen/R600/insert_vector_elt_f64.ll b/test/CodeGen/R600/insert_vector_elt_f64.ll new file mode 100644 index 00000000000..e334be17491 --- /dev/null +++ b/test/CodeGen/R600/insert_vector_elt_f64.ll @@ -0,0 +1,36 @@ +; REQUIRES: asserts +; XFAIL: * +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s + + +; SI-LABEL: @dynamic_insertelement_v2f64: +; SI: BUFFER_STORE_DWORDX4 +define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind { + %vecins = insertelement <2 x double> %a, double 8.0, i32 %b + store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: @dynamic_insertelement_v2f64: +; SI: BUFFER_STORE_DWORDX4 +define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind { + %vecins = insertelement <2 x i64> %a, i64 5, i32 %b + store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8 + ret void +} + +; SI-LABEL: @dynamic_insertelement_v4f64: +; SI: BUFFER_STORE_DWORDX4 +define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind { + %vecins = insertelement <4 x double> %a, double 8.0, i32 %b + store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16 + ret void +} + +; SI-LABEL: @dynamic_insertelement_v8f64: +; SI: BUFFER_STORE_DWORDX4 +define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind { + %vecins = insertelement <8 x double> %a, double 8.0, i32 %b + store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16 + ret void +}