diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 3a4723ebb84..1b6c35d6951 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -1017,12 +1017,16 @@ defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] >; -defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; +defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", + [(set f32:$dst, (fdiv FP_ONE, (fsqrt f32:$src0)))] +>; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] >; defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; -defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; +defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", + [(set f64:$dst, (fdiv FP_ONE, (fsqrt f64:$src0)))] +>; defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", [(set f32:$dst, (fsqrt f32:$src0))] diff --git a/test/CodeGen/R600/rsq.ll b/test/CodeGen/R600/rsq.ll new file mode 100644 index 00000000000..87c05701104 --- /dev/null +++ b/test/CodeGen/R600/rsq.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s + +declare float @llvm.sqrt.f32(float) nounwind readnone +declare double @llvm.sqrt.f64(double) nounwind readnone + +; SI-LABEL: @rsq_f32 +; SI: V_RSQ_F32_e32 +; SI: S_ENDPGM +define void @rsq_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) nounwind { + %val = load float addrspace(1)* %in, align 4 + %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone + %div = fdiv float 1.0, %sqrt + store float %div, float addrspace(1)* %out, align 4 + ret void +} + +; SI-LABEL: @rsq_f64 +; SI: V_RSQ_F64_e32 +; SI: S_ENDPGM +define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) nounwind { + %val = load double addrspace(1)* %in, align 4 + %sqrt = call double @llvm.sqrt.f64(double %val) nounwind readnone + %div = fdiv double 1.0, %sqrt + store double %div, double addrspace(1)* %out, align 4 + ret void +}