From e5dacc55ad0122eb4b8913955cf6339fa3ff2d6e Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 11 Jan 2010 17:58:34 +0000 Subject: [PATCH] Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it has an immediate with at least 32 bits of leading zeros, to avoid needing to materialize that immediate in a register first. FileCheckize, tidy, and extend a testcase to cover this case. This fixes rdar://7527390. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93160 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Instr64bit.td | 11 ++++++++ test/CodeGen/X86/x86-64-and-mask.ll | 43 +++++++++++++++++++++++++++-- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td index 1835c8feb62..bc36be21955 100644 --- a/lib/Target/X86/X86Instr64bit.td +++ b/lib/Target/X86/X86Instr64bit.td @@ -1966,6 +1966,17 @@ def : Pat<(add GR64:$src1, 0x0000000080000000), def : Pat<(store (add (loadi64 addr:$dst), 0x00000000800000000), addr:$dst), (SUB64mi32 addr:$dst, 0xffffffff80000000)>; +// Use a 32-bit and with implicit zero-extension instead of a 64-bit and if it +// has an immediate with at least 32 bits of leading zeros, to avoid needing to +// materialize that immediate in a register first. +def : Pat<(and GR64:$src, i64immZExt32:$imm), + (SUBREG_TO_REG + (i64 0), + (AND32ri + (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit), + imm:$imm), + x86_subreg_32bit)>; + // r & (2^32-1) ==> movz def : Pat<(and GR64:$src, 0x00000000FFFFFFFF), (MOVZX64rr32 (EXTRACT_SUBREG GR64:$src, x86_subreg_32bit))>; diff --git a/test/CodeGen/X86/x86-64-and-mask.ll b/test/CodeGen/X86/x86-64-and-mask.ll index 3c738911126..2465f23a768 100644 --- a/test/CodeGen/X86/x86-64-and-mask.ll +++ b/test/CodeGen/X86/x86-64-and-mask.ll @@ -1,12 +1,49 @@ -; RUN: llc < %s | grep {movl.*%edi, %eax} -; This should be a single mov, not a load of immediate + andq. +; RUN: llc < %s | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" target triple = "x86_64-apple-darwin8" -define i64 @test(i64 %x) nounwind { +; This should be a single mov, not a load of immediate + andq. +; CHECK: test: +; CHECK: movl %edi, %eax + +define i64 @test(i64 %x) nounwind { entry: %tmp123 = and i64 %x, 4294967295 ; [#uses=1] ret i64 %tmp123 } +; This copy can't be coalesced away because it needs the implicit zero-extend. +; CHECK: bbb: +; CHECK: movl %edi, %edi + +define void @bbb(i64 %x) nounwind { + %t = and i64 %x, 4294967295 + call void @foo(i64 %t) + ret void +} + +; This should use a 32-bit and with implicit zero-extension, not a 64-bit and +; with a separate mov to materialize the mask. +; rdar://7527390 +; CHECK: ccc: +; CHECK: andl $-1048593, %edi + +declare void @foo(i64 %x) nounwind + +define void @ccc(i64 %x) nounwind { + %t = and i64 %x, 4293918703 + call void @foo(i64 %t) + ret void +} + +; This requires a mov and a 64-bit and. +; CHECK: ddd: +; CHECK: movabsq $4294967296, %rax +; CHECK: andq %rax, %rdi + +define void @ddd(i64 %x) nounwind { + %t = and i64 %x, 4294967296 + call void @foo(i64 %t) + ret void +}