llvm-6502/test/CodeGen/X86/musttail-fastcall.ll

; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512

; While we don't support varargs with fastcall, we do support forwarding.

@asdf = internal constant [4 x i8] c"asdf"

declare void @puts(i8*)

define i32 @call_fast_thunk() {
  %r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
  ret i32 %r
}

define x86_fastcallcc i32 @fast_thunk(...) {
  call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
  %r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)
  ret i32 %r
}

; Check that we spill and fill around the call to puts.

; CHECK-LABEL: @fast_thunk@0:
; CHECK-DAG: movl %ecx, {{.*}}
; CHECK-DAG: movl %edx, {{.*}}
; CHECK: calll _puts
; CHECK-DAG: movl {{.*}}, %ecx
; CHECK-DAG: movl {{.*}}, %edx
; CHECK: jmp @fast_target@12

define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
  %a0 = add i32 %a, %b
  %a1 = add i32 %a0, %c
  ret i32 %a1
}

; Repeat the test for vectorcall, which has XMM registers.

define i32 @call_vector_thunk() {
  %r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
  ret i32 %r
}

define x86_vectorcallcc i32 @vector_thunk(...) {
  call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
  %r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)
  ret i32 %r
}

; Check that we spill and fill SSE registers around the call to puts.

; CHECK-LABEL: vector_thunk@@0:
; CHECK-DAG: movl %ecx, {{.*}}
; CHECK-DAG: movl %edx, {{.*}}

; SSE2-DAG: movups %xmm0, {{.*}}
; SSE2-DAG: movups %xmm1, {{.*}}
; SSE2-DAG: movups %xmm2, {{.*}}
; SSE2-DAG: movups %xmm3, {{.*}}
; SSE2-DAG: movups %xmm4, {{.*}}
; SSE2-DAG: movups %xmm5, {{.*}}

; AVX-DAG: vmovups %ymm0, {{.*}}
; AVX-DAG: vmovups %ymm1, {{.*}}
; AVX-DAG: vmovups %ymm2, {{.*}}
; AVX-DAG: vmovups %ymm3, {{.*}}
; AVX-DAG: vmovups %ymm4, {{.*}}
; AVX-DAG: vmovups %ymm5, {{.*}}

; AVX512-DAG: vmovups %zmm0, {{.*}}
; AVX512-DAG: vmovups %zmm1, {{.*}}
; AVX512-DAG: vmovups %zmm2, {{.*}}
; AVX512-DAG: vmovups %zmm3, {{.*}}
; AVX512-DAG: vmovups %zmm4, {{.*}}
; AVX512-DAG: vmovups %zmm5, {{.*}}

; CHECK: calll _puts

; SSE2-DAG: movups {{.*}}, %xmm0
; SSE2-DAG: movups {{.*}}, %xmm1
; SSE2-DAG: movups {{.*}}, %xmm2
; SSE2-DAG: movups {{.*}}, %xmm3
; SSE2-DAG: movups {{.*}}, %xmm4
; SSE2-DAG: movups {{.*}}, %xmm5

; AVX-DAG: vmovups {{.*}}, %ymm0
; AVX-DAG: vmovups {{.*}}, %ymm1
; AVX-DAG: vmovups {{.*}}, %ymm2
; AVX-DAG: vmovups {{.*}}, %ymm3
; AVX-DAG: vmovups {{.*}}, %ymm4
; AVX-DAG: vmovups {{.*}}, %ymm5

; AVX512-DAG: vmovups {{.*}}, %zmm0
; AVX512-DAG: vmovups {{.*}}, %zmm1
; AVX512-DAG: vmovups {{.*}}, %zmm2
; AVX512-DAG: vmovups {{.*}}, %zmm3
; AVX512-DAG: vmovups {{.*}}, %zmm4
; AVX512-DAG: vmovups {{.*}}, %zmm5

; CHECK-DAG: movl {{.*}}, %ecx
; CHECK-DAG: movl {{.*}}, %edx
; CHECK: jmp vector_target@@12

define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
  %a0 = add i32 %a, %b
  %a1 = add i32 %a0, %c
  ret i32 %a1
}
Make musttail more robust for vector types on x86 Previously I tried to plug musttail into the existing vararg lowering code. That turned out to be a mistake, because non-vararg calls use significantly different register lowering, even on x86. For example, AVX vectors are usually passed in registers to normal functions and memory to vararg functions. Now musttail uses a completely separate lowering. Hopefully this can be used as the basis for non-x86 perfect forwarding. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D6156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224745 91177308-0d34-0410-b5e6-96231b3b80d8 2014-12-22 23:58:37 +00:00			`; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 \| FileCheck %s --check-prefix=CHECK --check-prefix=SSE2`
			`; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX`
			`; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f \| FileCheck %s --check-prefix=CHECK --check-prefix=AVX512`

			`; While we don't support varargs with fastcall, we do support forwarding.`

			`@asdf = internal constant [4 x i8] c"asdf"`

			`declare void @puts(i8*)`

			`define i32 @call_fast_thunk() {`
			`%r = call x86_fastcallcc i32 (...)* @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)`
			`ret i32 %r`
			`}`

			`define x86_fastcallcc i32 @fast_thunk(...) {`
[opaque pointer type] Add textual IR support for explicit type parameter to gep operator Similar to gep (r230786) and load (r230794) changes. Similar migration script can be used to update test cases, which successfully migrated all of LLVM and Polly, but about 4 test cases needed manually changes in Clang. (this script will read the contents of stdin and massage it into stdout - wrap it in the 'apply.sh' script shown in previous commits + xargs to apply it over a large set of test cases) import fileinput import sys import re rep = re.compile(r"(getelementptr(?:\s+inbounds)?\s\()((<\d\s+x\s+)?([^@]?)(\|\saddrspace\(\d+\))\s\(?(3)>)\s*)(?=$\|%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|zeroinitializer\|<\|\[\[[a-zA-Z]\|\{\{)", re.MULTILINE \| re.DOTALL) def conv(match): line = match.group(1) line += match.group(4) line += ", " line += match.group(2) return line line = sys.stdin.read() off = 0 for match in re.finditer(rep, line): sys.stdout.write(line[off:match.start()]) sys.stdout.write(conv(match)) off = match.end() sys.stdout.write(line[off:]) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232184 91177308-0d34-0410-b5e6-96231b3b80d8 2015-03-13 18:20:45 +00:00			`call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))`
Make musttail more robust for vector types on x86 Previously I tried to plug musttail into the existing vararg lowering code. That turned out to be a mistake, because non-vararg calls use significantly different register lowering, even on x86. For example, AVX vectors are usually passed in registers to normal functions and memory to vararg functions. Now musttail uses a completely separate lowering. Hopefully this can be used as the basis for non-x86 perfect forwarding. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D6156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224745 91177308-0d34-0410-b5e6-96231b3b80d8 2014-12-22 23:58:37 +00:00			`%r = musttail call x86_fastcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @fast_target to i32 (...)*) (...)`
			`ret i32 %r`
			`}`

			`; Check that we spill and fill around the call to puts.`

			`; CHECK-LABEL: @fast_thunk@0:`
			`; CHECK-DAG: movl %ecx, {{.*}}`
			`; CHECK-DAG: movl %edx, {{.*}}`
			`; CHECK: calll _puts`
			`; CHECK-DAG: movl {{.*}}, %ecx`
			`; CHECK-DAG: movl {{.*}}, %edx`
			`; CHECK: jmp @fast_target@12`

			`define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {`
			`%a0 = add i32 %a, %b`
			`%a1 = add i32 %a0, %c`
			`ret i32 %a1`
			`}`

			`; Repeat the test for vectorcall, which has XMM registers.`

			`define i32 @call_vector_thunk() {`
			`%r = call x86_vectorcallcc i32 (...)* @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)`
			`ret i32 %r`
			`}`

			`define x86_vectorcallcc i32 @vector_thunk(...) {`
[opaque pointer type] Add textual IR support for explicit type parameter to gep operator Similar to gep (r230786) and load (r230794) changes. Similar migration script can be used to update test cases, which successfully migrated all of LLVM and Polly, but about 4 test cases needed manually changes in Clang. (this script will read the contents of stdin and massage it into stdout - wrap it in the 'apply.sh' script shown in previous commits + xargs to apply it over a large set of test cases) import fileinput import sys import re rep = re.compile(r"(getelementptr(?:\s+inbounds)?\s\()((<\d\s+x\s+)?([^@]?)(\|\saddrspace\(\d+\))\s\(?(3)>)\s*)(?=$\|%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|zeroinitializer\|<\|\[\[[a-zA-Z]\|\{\{)", re.MULTILINE \| re.DOTALL) def conv(match): line = match.group(1) line += match.group(4) line += ", " line += match.group(2) return line line = sys.stdin.read() off = 0 for match in re.finditer(rep, line): sys.stdout.write(line[off:match.start()]) sys.stdout.write(conv(match)) off = match.end() sys.stdout.write(line[off:]) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232184 91177308-0d34-0410-b5e6-96231b3b80d8 2015-03-13 18:20:45 +00:00			`call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))`
Make musttail more robust for vector types on x86 Previously I tried to plug musttail into the existing vararg lowering code. That turned out to be a mistake, because non-vararg calls use significantly different register lowering, even on x86. For example, AVX vectors are usually passed in registers to normal functions and memory to vararg functions. Now musttail uses a completely separate lowering. Hopefully this can be used as the basis for non-x86 perfect forwarding. Reviewers: majnemer Differential Revision: http://reviews.llvm.org/D6156 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224745 91177308-0d34-0410-b5e6-96231b3b80d8 2014-12-22 23:58:37 +00:00			`%r = musttail call x86_vectorcallcc i32 (...)* bitcast (i32 (i32, i32, i32)* @vector_target to i32 (...)*) (...)`
			`ret i32 %r`
			`}`

			`; Check that we spill and fill SSE registers around the call to puts.`

			`; CHECK-LABEL: vector_thunk@@0:`
			`; CHECK-DAG: movl %ecx, {{.*}}`
			`; CHECK-DAG: movl %edx, {{.*}}`

			`; SSE2-DAG: movups %xmm0, {{.*}}`
			`; SSE2-DAG: movups %xmm1, {{.*}}`
			`; SSE2-DAG: movups %xmm2, {{.*}}`
			`; SSE2-DAG: movups %xmm3, {{.*}}`
			`; SSE2-DAG: movups %xmm4, {{.*}}`
			`; SSE2-DAG: movups %xmm5, {{.*}}`

			`; AVX-DAG: vmovups %ymm0, {{.*}}`
			`; AVX-DAG: vmovups %ymm1, {{.*}}`
			`; AVX-DAG: vmovups %ymm2, {{.*}}`
			`; AVX-DAG: vmovups %ymm3, {{.*}}`
			`; AVX-DAG: vmovups %ymm4, {{.*}}`
			`; AVX-DAG: vmovups %ymm5, {{.*}}`

			`; AVX512-DAG: vmovups %zmm0, {{.*}}`
			`; AVX512-DAG: vmovups %zmm1, {{.*}}`
			`; AVX512-DAG: vmovups %zmm2, {{.*}}`
			`; AVX512-DAG: vmovups %zmm3, {{.*}}`
			`; AVX512-DAG: vmovups %zmm4, {{.*}}`
			`; AVX512-DAG: vmovups %zmm5, {{.*}}`

			`; CHECK: calll _puts`

			`; SSE2-DAG: movups {{.*}}, %xmm0`
			`; SSE2-DAG: movups {{.*}}, %xmm1`
			`; SSE2-DAG: movups {{.*}}, %xmm2`
			`; SSE2-DAG: movups {{.*}}, %xmm3`
			`; SSE2-DAG: movups {{.*}}, %xmm4`
			`; SSE2-DAG: movups {{.*}}, %xmm5`

			`; AVX-DAG: vmovups {{.*}}, %ymm0`
			`; AVX-DAG: vmovups {{.*}}, %ymm1`
			`; AVX-DAG: vmovups {{.*}}, %ymm2`
			`; AVX-DAG: vmovups {{.*}}, %ymm3`
			`; AVX-DAG: vmovups {{.*}}, %ymm4`
			`; AVX-DAG: vmovups {{.*}}, %ymm5`

			`; AVX512-DAG: vmovups {{.*}}, %zmm0`
			`; AVX512-DAG: vmovups {{.*}}, %zmm1`
			`; AVX512-DAG: vmovups {{.*}}, %zmm2`
			`; AVX512-DAG: vmovups {{.*}}, %zmm3`
			`; AVX512-DAG: vmovups {{.*}}, %zmm4`
			`; AVX512-DAG: vmovups {{.*}}, %zmm5`

			`; CHECK-DAG: movl {{.*}}, %ecx`
			`; CHECK-DAG: movl {{.*}}, %edx`
			`; CHECK: jmp vector_target@@12`

			`define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {`
			`%a0 = add i32 %a, %b`
			`%a1 = add i32 %a0, %c`
			`ret i32 %a1`
			`}`