/* cilk_fake.h -*-C++-*- * ************************************************************************* * * Copyright (C) 2011-2016, Intel Corporation * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * Neither the name of Intel Corporation nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * ********************************************************************* * * PLEASE NOTE: This file is a downstream copy of a file mainitained in * a repository at cilkplus.org. Changes made to this file that are not * submitted through the contribution process detailed at * http://www.cilkplus.org/submit-cilk-contribution will be lost the next * time that a new version is released. Changes only submitted to the * GNU compiler collection or posted to the git repository at * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are * not tracked. * * We welcome your contributions to this open source project. Thank you * for your assistance in helping us improve Cilk Plus. **************************************************************************/ /** * @file cilk_fake.h * * @brief Macros to simulate a compiled Cilk program. * * Used carefully, these macros can be used to create a Cilk program with a * non-Cilk compiler by manually inserting the code necessary for interacting * with the Cilk runtime library. They are not intended to be pretty (you * wouldn't want to write a whole program using these macros), but they are * useful for experiments. They also work well as an illustration of what the * compiler generates. * * Details of the mechanisms used in these macros are described in * design-notes/CilkPlusABI.docx * * Example 1: fib in C++ * --------------------- * * #include * * int fib(int n) * { * CILK_FAKE_PROLOG(); * * if (n < 2) * return n; * * int a, b; * CILK_FAKE_SPAWN_R(a, fib(n - 1)); * b = fib(n - 2); * CILK_FAKE_SYNC(); * * return a + b; * } * * * Example 2: fib in C * ------------------- * * #include * * int fib(int n); * * void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n) * { * CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf); * *a = fib(n - 1); * CILK_FAKE_SPAWN_HELPER_EPILOG(); * } * * int fib(int n) * { * CILK_FAKE_PROLOG(); * * if (n < 2) * return n; * * int a, b; * CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n)); * b = fib(n - 2); * CILK_FAKE_SYNC(); * * CILK_FAKE_EPILOG(); * return a + b; * } */ #ifndef INCLUDED_CILK_FAKE_DOT_H #define INCLUDED_CILK_FAKE_DOT_H // This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already // defined but is less than 1, then the data structures in // will not match the expectations of facilities in this header. Therefore, // for successful compilation, __CILKRTS_ABI_VERSION must either be not // defined, or defined to be 1 or greater. #ifndef __CILKRTS_ABI_VERSION // ABI version was not specified. Set it to 1. # define __CILKRTS_ABI_VERSION 1 #elif __CILKRTS_ABI_VERSION < 1 // ABI version was specified but was too old. Fail compilation. # error cilk_fake.h requirs an ABI version of 1 or greater #endif #include // alloca is defined in malloc.h on Windows, alloca.h on Linux #ifndef _MSC_VER #include #else #include // Define offsetof #include #endif // Allows use of a different version that the one defined in abi.h #define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24) /* Initialize frame. To be called when worker is known */ __CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf, __cilkrts_worker *w) { sf->call_parent = w->current_stack_frame; sf->worker = w; sf->flags = CILK_FAKE_VERSION_FLAG; w->current_stack_frame = sf; } /* Initialize frame. To be called when worker is not known */ __CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf) { __cilkrts_worker* w = __cilkrts_get_tls_worker(); uint32_t last_flag = 0; if (! w) { w = __cilkrts_bind_thread_1(); last_flag = CILK_FRAME_LAST; } __cilk_fake_enter_frame_fast(sf, w); sf->flags |= last_flag; } /* Initialize frame. To be called within the spawn helper */ __CILKRTS_INLINE void __cilk_fake_helper_enter_frame( __cilkrts_stack_frame *sf, __cilkrts_stack_frame *parent_sf) { sf->worker = 0; sf->call_parent = parent_sf; } /* Called from the spawn helper to push the parent continuation on the task * deque so that it can be stolen. */ __CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf) { /* Initialize spawn helper frame. * call_parent was saved in __cilk_fake_helper_enter_frame */ __cilkrts_stack_frame *parent = sf->call_parent; __cilkrts_worker *w = parent->worker; __cilk_fake_enter_frame_fast(sf, w); /* Append a node to the pedigree */ sf->spawn_helper_pedigree = w->pedigree; parent->parent_pedigree = w->pedigree; w->pedigree.rank = 0; w->pedigree.parent = &sf->spawn_helper_pedigree; /* Push parent onto the task deque */ __cilkrts_stack_frame *volatile *tail = w->tail; *tail++ = sf->call_parent; /* The stores must be separated by a store fence (noop on x86) * or the second store is a release (st8.rel on Itanium) */ w->tail = tail; sf->flags |= CILK_FRAME_DETACHED; } /* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */ static int __cilk_fake_dummy = 8; /* The following macro is used to force the compiler into generating a frame * pointer. We never change the value of __cilk_fake_dummy, so the alloca() * is never called, but we need the 'if' statement and the __cilk_fake_dummy * variable so that the compiler does not attempt to optimize it away. */ #define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \ if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \ (sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \ } while (0) #ifndef CILK_FAKE_NO_SHRINKWRAP /* "shrink-wrap" optimization enabled. Do not initialize frame on entry, * except to clear worker pointer. Instead, defer initialization until * the first spawn. */ # define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0)) # define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \ if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \ } while (0) #else /* "shrink-wrap" optimization disabled. Initialize frame immediately on * entry. Do not initialize frame on spawn. */ # define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \ __cilk_fake_enter_frame(&(sf)) # define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf)) #endif /* Prologue of a spawning function. Declares and initializes the stack * frame. */ #define CILK_FAKE_PROLOG() \ __cilk_fake_stack_frame __cilk_sf; \ CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \ CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf) /* Prologue of a spawning function where the current worker is already known. * Declares and initializes the stack frame without looking up the worker from * TLS. */ #define CILK_FAKE_PROLOG_FAST(w) \ __cilk_fake_stack_frame __cilk_sf; \ CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \ __cilk_fake_enter_frame_fast(&__cilk_sf, (w)) /* Simulate a cilk_sync */ #define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf) /* Epilog at the end of a spawning function. Does a sync and calls the * runtime for leaving the frame. */ #ifdef __cplusplus // Epilogue is run automatically by __cilk_fake_stack_frame destructor. # define CILK_FAKE_EPILOG() ((void) __cilk_sf) #else # define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf) #endif // C /* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and * __cilk_fake_stack_frame destructor body. */ #define CILK_FAKE_CLEANUP_FRAME(sf) do { \ if (! (sf).worker) break; \ CILK_FAKE_SYNC_IMP(sf); \ CILK_FAKE_POP_FRAME(sf); \ if ((sf).flags != CILK_FAKE_VERSION_FLAG) \ __cilkrts_leave_frame(&(sf)); \ } while (0) /* Implementation of CILK_FAKE_SYNC with sf argument */ #define CILK_FAKE_SYNC_IMP(sf) do { \ if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \ (sf).parent_pedigree = (sf).worker->pedigree; \ CILK_FAKE_SAVE_FP(sf); \ if (! CILK_SETJMP((sf).ctx)) \ __cilkrts_sync(&(sf)); \ } \ ++(sf).worker->pedigree.rank; \ } while (0) /* Save the floating-point control registers. * The definition of CILK_FAKE_SAVE_FP is compiler specific (and * architecture specific on Windows) */ #ifdef _MSC_VER # define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr) # define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr) # if defined(_M_IX86) /* Windows x86 */ # define CILK_FAKE_SAVE_FP(sf) do { \ __asm \ { \ mov eax, sf \ stmxcsr [eax+MXCSR_OFFSET] \ fnstcw [eax+FPCSR_OFFSET] \ } \ } while (0) # elif defined(_M_X64) /* Windows Intel64 - Not needed - saved by setjmp call */ # define CILK_FAKE_SAVE_FP(sf) ((void) sf) # else # error "Unknown architecture" # endif /* Microsoft architecture specifics */ #else /* Non-Windows */ # define CILK_FAKE_SAVE_FP(sf) do { \ __asm__ ( "stmxcsr %0\n\t" \ "fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \ } while (0) #endif /* Call the spawn helper as part of a fake spawn */ #define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \ CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \ CILK_FAKE_SAVE_FP(__cilk_sf); \ if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \ helper; \ } \ } while (0) /* Body of a spawn helper function. In addition to the worker and the * expression to spawn, pass it any number of statements to be executed before * detaching. */ #define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \ CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \ __VA_ARGS__; \ __cilk_fake_detach(&__cilk_sf); \ expr; \ CILK_FAKE_SPAWN_HELPER_EPILOG() /* Prolog for a spawn helper function */ #define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \ __cilk_fake_spawn_helper_stack_frame __cilk_sf; \ __cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf)) /* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG * and the __cilk_fake_spawn_helper_frame destructor. */ #define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \ if (! (sf).worker) break; \ CILK_FAKE_POP_FRAME(sf); \ __cilkrts_leave_frame(&(sf)); \ } while (0) /* Epilog to execute at the end of a spawn helper */ #ifdef __cplusplus // Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor # define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf) #else # define CILK_FAKE_SPAWN_HELPER_EPILOG() \ CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf) #endif /* Pop the current frame off of the call chain */ #define CILK_FAKE_POP_FRAME(sf) do { \ (sf).worker->current_stack_frame = (sf).call_parent; \ (sf).call_parent = 0; \ } while (0) #ifdef _WIN32 /* define macros for synching functions before allowing them to propagate. */ # define CILK_FAKE_EXCEPT_BEGIN \ if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) { # define CILK_FAKE_EXCEPT_END \ } else { \ assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\ == CILK_FRAME_EXCEPTING); \ __cilkrts_rethrow(&__cilk_sf); \ exit(0); \ } #else # define CILK_EXCEPT_BEGIN { # define CILK_EXCEPT_END } #endif #ifdef __cplusplus // The following definitions depend on C++ features. // Wrap a functor (probably a lambda), so that a call to it cannot be // inlined. template class __cilk_fake_noinline_wrapper { F&& m_fn; public: __cilk_fake_noinline_wrapper(F&& fn) : m_fn(static_cast(fn)) { } #ifdef _WIN32 __declspec(noinline) void operator()(__cilkrts_stack_frame *sf); #else void operator()(__cilkrts_stack_frame *sf) __attribute__((noinline)); #endif }; template void __cilk_fake_noinline_wrapper::operator()(__cilkrts_stack_frame *sf) { m_fn(sf); } template inline __cilk_fake_noinline_wrapper __cilk_fake_make_noinline_wrapper(F&& fn) { return __cilk_fake_noinline_wrapper(static_cast(fn)); } // Simulate "_Cilk_spawn expr", where expr must be a function call. // // Note: this macro does not correctly construct function arguments. // According to the ABI specification, function arguments should be evaluated // before the detach and destroyed after the detach. This macro both // evaluates and destroys them after the detach. This means that if any part // of the function argument expression depends on a value that is modified in // the continuation of the spawn, race will occur between the continuation and // the argument evaluation. // // To work around this problem, this macro accepts an arbitrary list of // declarations and statements (separated by semicolons) that are evaluated // before the detach. Thus, to simulate: // // _Cilk_spawn f(expr); // // one would write: // // CILK_FAKE_SPAWN(f(arg), auto arg = expr); // // Despite appearing in the reverse order, the 'arg' variable is created and // initialized before the detach and the call to f(arg) occurs after the // detach. #define CILK_FAKE_SPAWN(expr, ...) \ CILK_FAKE_CALL_SPAWN_HELPER( \ CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf)) // Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints. #define CILK_FAKE_SPAWN_R(ret, expr, ...) \ CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__) // Create a spawn helper as a C++11 lambda function. In addition to the // expression to spawn, this macro takes a any number of statements to be // executed before detaching. #define CILK_FAKE_SPAWN_HELPER(expr, ...) \ __cilk_fake_make_noinline_wrapper([&](__cilkrts_stack_frame *parent_sf) { \ CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \ }) // C++ version of a __cilkrts_stack_frame for a spawning function. // This struct is identical to __cilkrts_stack_frame except that the // destructor automatically does frame cleanup. struct __cilk_fake_stack_frame : __cilkrts_stack_frame { // Extension of __cilkrts_stack_frame with constructor and destructor __cilk_fake_stack_frame() { } __forceinline ~__cilk_fake_stack_frame() { CILK_FAKE_CLEANUP_FRAME(*this); } }; // C++ version of a __cilkrts_stack_frame for a spawn helper. // This struct is identical to __cilkrts_stack_frame except that the // destructor automatically does frame cleanup. struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame { // Extension of __cilkrts_stack_frame with constructor and destructor __cilk_fake_spawn_helper_stack_frame() { worker = 0; } __forceinline ~__cilk_fake_spawn_helper_stack_frame() { CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this); } }; #else // For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are // identical to __cilkrts_stack_frame. Frame cleanup must be performed // excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG) typedef __cilkrts_stack_frame __cilk_fake_stack_frame; typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame; #endif #endif // ! defined(INCLUDED_CILK_FAKE_DOT_H)