mirror of
https://github.com/autc04/Retro68.git
synced 2025-01-02 17:31:35 +00:00
879 lines
30 KiB
C++
879 lines
30 KiB
C++
/*
|
|
Copyright (c) 2014-2016 Intel Corporation. All Rights Reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in the
|
|
documentation and/or other materials provided with the distribution.
|
|
* Neither the name of Intel Corporation nor the names of its
|
|
contributors may be used to endorse or promote products derived
|
|
from this software without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
|
|
#include "offload_target.h"
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#ifdef SEP_SUPPORT
|
|
#include <fcntl.h>
|
|
#include <sys/ioctl.h>
|
|
#endif // SEP_SUPPORT
|
|
#include <omp.h>
|
|
#include <map>
|
|
|
|
// typedef offload_func_with_parms.
|
|
// Pointer to function that represents an offloaded entry point.
|
|
// The parameters are a temporary fix for parameters on the stack.
|
|
typedef void (*offload_func_with_parms)(void *);
|
|
|
|
// Target console and file logging
|
|
const char *prefix;
|
|
int console_enabled = 0;
|
|
int offload_report_level = 0;
|
|
|
|
// Trace information
|
|
static const char* vardesc_direction_as_string[] = {
|
|
"NOCOPY",
|
|
"IN",
|
|
"OUT",
|
|
"INOUT"
|
|
};
|
|
static const char* vardesc_type_as_string[] = {
|
|
"unknown",
|
|
"data",
|
|
"data_ptr",
|
|
"func_ptr",
|
|
"void_ptr",
|
|
"string_ptr",
|
|
"dv",
|
|
"dv_data",
|
|
"dv_data_slice",
|
|
"dv_ptr",
|
|
"dv_ptr_data",
|
|
"dv_ptr_data_slice",
|
|
"cean_var",
|
|
"cean_var_ptr",
|
|
"c_data_ptr_array",
|
|
"c_extended_type",
|
|
"c_func_ptr_array",
|
|
"c_void_ptr_array",
|
|
"c_string_ptr_array",
|
|
"c_data_ptr_ptr",
|
|
"c_func_ptr_ptr",
|
|
"c_void_ptr_ptr",
|
|
"c_string_ptr_ptr",
|
|
"c_cean_var_ptr_ptr",
|
|
};
|
|
|
|
int mic_index = -1;
|
|
int mic_engines_total = -1;
|
|
uint64_t mic_frequency = 0;
|
|
int offload_number = 0;
|
|
static std::map<void*, RefInfo*> ref_data;
|
|
static mutex_t add_ref_lock;
|
|
|
|
#ifdef SEP_SUPPORT
|
|
static const char* sep_monitor_env = "SEP_MONITOR";
|
|
static bool sep_monitor = false;
|
|
static const char* sep_device_env = "SEP_DEVICE";
|
|
static const char* sep_device = "/dev/sep3.8/c";
|
|
static int sep_counter = 0;
|
|
|
|
#define SEP_API_IOC_MAGIC 99
|
|
#define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
|
|
#define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
|
|
|
|
static void add_ref_count(void * buf, bool created)
|
|
{
|
|
mutex_locker_t locker(add_ref_lock);
|
|
RefInfo * info = ref_data[buf];
|
|
|
|
if (info) {
|
|
info->count++;
|
|
}
|
|
else {
|
|
info = new RefInfo((int)created,(long)1);
|
|
}
|
|
info->is_added |= created;
|
|
ref_data[buf] = info;
|
|
}
|
|
|
|
static void BufReleaseRef(void * buf)
|
|
{
|
|
mutex_locker_t locker(add_ref_lock);
|
|
RefInfo * info = ref_data[buf];
|
|
|
|
if (info) {
|
|
--info->count;
|
|
if (info->count == 0 && info->is_added) {
|
|
OFFLOAD_TRACE(1, "Calling COIBufferReleaseRef AddRef count = %d\n",
|
|
((RefInfo *) ref_data[buf])->count);
|
|
BufferReleaseRef(buf);
|
|
info->is_added = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
static int VTPauseSampling(void)
|
|
{
|
|
int ret = -1;
|
|
int handle = open(sep_device, O_RDWR);
|
|
if (handle > 0) {
|
|
ret = ioctl(handle, SEP_IOCTL_PAUSE);
|
|
close(handle);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
static int VTResumeSampling(void)
|
|
{
|
|
int ret = -1;
|
|
int handle = open(sep_device, O_RDWR);
|
|
if (handle > 0) {
|
|
ret = ioctl(handle, SEP_IOCTL_RESUME);
|
|
close(handle);
|
|
}
|
|
return ret;
|
|
}
|
|
#endif // SEP_SUPPORT
|
|
|
|
void OffloadDescriptor::offload(
|
|
uint32_t buffer_count,
|
|
void** buffers,
|
|
void* misc_data,
|
|
uint16_t misc_data_len,
|
|
void* return_data,
|
|
uint16_t return_data_len
|
|
)
|
|
{
|
|
FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
|
|
const char *name = func->data;
|
|
OffloadDescriptor ofld;
|
|
char *in_data = 0;
|
|
char *out_data = 0;
|
|
char *timer_data = 0;
|
|
|
|
console_enabled = func->console_enabled;
|
|
timer_enabled = func->timer_enabled;
|
|
offload_report_level = func->offload_report_level;
|
|
offload_number = func->offload_number;
|
|
ofld.set_offload_number(func->offload_number);
|
|
|
|
#ifdef SEP_SUPPORT
|
|
if (sep_monitor) {
|
|
if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
|
|
OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
|
|
VTResumeSampling();
|
|
}
|
|
}
|
|
#endif // SEP_SUPPORT
|
|
|
|
OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
|
|
c_offload_start_target_func,
|
|
"Offload \"%s\" started\n", name);
|
|
|
|
// initialize timer data
|
|
OFFLOAD_TIMER_INIT();
|
|
|
|
OFFLOAD_TIMER_START(c_offload_target_total_time);
|
|
|
|
OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
|
|
|
|
// get input/output buffer addresses
|
|
if (func->in_datalen > 0 || func->out_datalen > 0) {
|
|
if (func->data_offset != 0) {
|
|
in_data = (char*) misc_data + func->data_offset;
|
|
out_data = (char*) return_data;
|
|
}
|
|
else {
|
|
char *inout_buf = (char*) buffers[--buffer_count];
|
|
in_data = inout_buf;
|
|
out_data = inout_buf;
|
|
}
|
|
}
|
|
|
|
// assign variable descriptors
|
|
ofld.m_vars_total = func->vars_num;
|
|
if (ofld.m_vars_total > 0) {
|
|
uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
|
|
|
|
ofld.m_vars = (VarDesc*) malloc(var_data_len);
|
|
if (ofld.m_vars == NULL)
|
|
LIBOFFLOAD_ERROR(c_malloc);
|
|
memcpy(ofld.m_vars, in_data, var_data_len);
|
|
|
|
ofld.m_vars_extra =
|
|
(VarExtra*) malloc(ofld.m_vars_total * sizeof(VarExtra));
|
|
if (ofld.m_vars == NULL)
|
|
LIBOFFLOAD_ERROR(c_malloc);
|
|
|
|
in_data += var_data_len;
|
|
func->in_datalen -= var_data_len;
|
|
}
|
|
|
|
// timer data
|
|
if (func->timer_enabled) {
|
|
uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
|
|
|
|
timer_data = out_data;
|
|
out_data += timer_data_len;
|
|
func->out_datalen -= timer_data_len;
|
|
}
|
|
|
|
// init Marshallers
|
|
ofld.m_in.init_buffer(in_data, func->in_datalen);
|
|
ofld.m_out.init_buffer(out_data, func->out_datalen);
|
|
|
|
// copy buffers to offload descriptor
|
|
std::copy(buffers, buffers + buffer_count,
|
|
std::back_inserter(ofld.m_buffers));
|
|
|
|
OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
|
|
|
|
// find offload entry address
|
|
OFFLOAD_TIMER_START(c_offload_target_func_lookup);
|
|
|
|
offload_func_with_parms entry = (offload_func_with_parms)
|
|
__offload_entries.find_addr(name);
|
|
|
|
if (entry == NULL) {
|
|
#if OFFLOAD_DEBUG > 0
|
|
if (console_enabled > 2) {
|
|
__offload_entries.dump();
|
|
}
|
|
#endif
|
|
LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
|
|
exit(1);
|
|
}
|
|
|
|
OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
|
|
|
|
OFFLOAD_TIMER_START(c_offload_target_func_time);
|
|
|
|
// execute offload entry
|
|
entry(&ofld);
|
|
|
|
OFFLOAD_TIMER_STOP(c_offload_target_func_time);
|
|
|
|
OFFLOAD_TIMER_STOP(c_offload_target_total_time);
|
|
|
|
// copy timer data to the buffer
|
|
OFFLOAD_TIMER_TARGET_DATA(timer_data);
|
|
|
|
OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
|
|
|
|
#ifdef SEP_SUPPORT
|
|
if (sep_monitor) {
|
|
if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
|
|
OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
|
|
VTPauseSampling();
|
|
}
|
|
}
|
|
#endif // SEP_SUPPORT
|
|
}
|
|
|
|
void OffloadDescriptor::merge_var_descs(
|
|
VarDesc *vars,
|
|
VarDesc2 *vars2,
|
|
int vars_total
|
|
)
|
|
{
|
|
// number of variable descriptors received from host and generated
|
|
// locally should match
|
|
if (m_vars_total < vars_total) {
|
|
LIBOFFLOAD_ERROR(c_merge_var_descs1);
|
|
exit(1);
|
|
}
|
|
|
|
for (int i = 0; i < m_vars_total; i++) {
|
|
// instead of m_vars[i].type.src we will use m_vars_extra[i].type_src
|
|
|
|
if (i < vars_total) {
|
|
// variable type must match
|
|
if (m_vars[i].type.bits != vars[i].type.bits) {
|
|
OFFLOAD_TRACE(2,
|
|
"m_vars[%d].type.bits=%08x, vars[%d].type.bits=%08x\n",
|
|
i, m_vars[i].type.bits, i, vars[i].type.bits);
|
|
LIBOFFLOAD_ERROR(c_merge_var_descs2);
|
|
exit(1);
|
|
}
|
|
|
|
if (m_vars[i].type.src == c_extended_type) {
|
|
VarDescExtendedType *etype =
|
|
reinterpret_cast<VarDescExtendedType*>(vars[i].ptr);
|
|
m_vars_extra[i].type_src = etype->extended_type;
|
|
m_vars[i].ptr = etype->ptr;
|
|
}
|
|
else {
|
|
m_vars_extra[i].type_src = m_vars[i].type.src;
|
|
if (!(m_vars[i].flags.use_device_ptr &&
|
|
m_vars[i].type.src == c_dv)) {
|
|
m_vars[i].ptr = vars[i].ptr;
|
|
}
|
|
}
|
|
// instead of m_vars[i].type.dst we will use m_vars_extra[i].type_dst
|
|
if (i < vars_total && m_vars[i].type.dst == c_extended_type) {
|
|
VarDescExtendedType *etype =
|
|
reinterpret_cast<VarDescExtendedType*>(vars[i].into);
|
|
m_vars_extra[i].type_dst = etype->extended_type;
|
|
m_vars[i].into = etype->ptr;
|
|
}
|
|
else {
|
|
m_vars_extra[i].type_dst = m_vars[i].type.dst;
|
|
m_vars[i].into = vars[i].into;
|
|
}
|
|
|
|
const char *var_sname = "";
|
|
if (vars2 != NULL) {
|
|
if (vars2[i].sname != NULL) {
|
|
var_sname = vars2[i].sname;
|
|
}
|
|
}
|
|
OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
|
|
" VarDesc %d, var=%s, %s, %s\n",
|
|
i, var_sname,
|
|
vardesc_direction_as_string[m_vars[i].direction.bits],
|
|
vardesc_type_as_string[m_vars_extra[i].type_src]);
|
|
if (vars2 != NULL && vars2[i].dname != NULL) {
|
|
OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
|
|
vardesc_type_as_string[m_vars_extra[i].type_dst]);
|
|
}
|
|
}
|
|
else {
|
|
m_vars_extra[i].type_src = m_vars[i].type.src;
|
|
m_vars_extra[i].type_dst = m_vars[i].type.dst;
|
|
}
|
|
|
|
OFFLOAD_TRACE(2,
|
|
" type_src=%d, type_dstn=%d, direction=%d, "
|
|
"alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
|
|
"offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
|
|
m_vars_extra[i].type_src,
|
|
m_vars_extra[i].type_dst,
|
|
m_vars[i].direction.bits,
|
|
m_vars[i].alloc_if,
|
|
m_vars[i].free_if,
|
|
m_vars[i].align,
|
|
m_vars[i].mic_offset,
|
|
m_vars[i].flags.bits,
|
|
m_vars[i].offset,
|
|
m_vars[i].size,
|
|
m_vars[i].count,
|
|
m_vars[i].ptr,
|
|
m_vars[i].into);
|
|
}
|
|
}
|
|
|
|
void OffloadDescriptor::scatter_copyin_data()
|
|
{
|
|
OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
|
|
|
|
OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
|
|
m_in.get_buffer_start(),
|
|
m_in.get_buffer_size());
|
|
OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
|
|
m_in.get_buffer_size());
|
|
|
|
// receive data
|
|
for (int i = 0; i < m_vars_total; i++) {
|
|
bool src_is_for_mic = (m_vars[i].direction.out ||
|
|
m_vars[i].into == NULL);
|
|
void** ptr_addr = src_is_for_mic ?
|
|
static_cast<void**>(m_vars[i].ptr) :
|
|
static_cast<void**>(m_vars[i].into);
|
|
int type = src_is_for_mic ? m_vars_extra[i].type_src :
|
|
m_vars_extra[i].type_dst;
|
|
bool is_static = src_is_for_mic ?
|
|
m_vars[i].flags.is_static :
|
|
m_vars[i].flags.is_static_dstn;
|
|
void *ptr = NULL;
|
|
|
|
if (m_vars[i].flags.alloc_disp) {
|
|
int64_t offset = 0;
|
|
m_in.receive_data(&offset, sizeof(offset));
|
|
}
|
|
if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
|
|
VAR_TYPE_IS_DV_DATA(type)) {
|
|
ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
|
|
reinterpret_cast<ArrDesc*>(ptr_addr) :
|
|
*reinterpret_cast<ArrDesc**>(ptr_addr);
|
|
ptr_addr = reinterpret_cast<void**>(&dvp->Base);
|
|
}
|
|
// Set pointer values
|
|
switch (type) {
|
|
case c_data_ptr_array:
|
|
{
|
|
int j = m_vars[i].ptr_arr_offset;
|
|
int max_el = j + m_vars[i].count;
|
|
char *dst_arr_ptr = (src_is_for_mic)?
|
|
*(reinterpret_cast<char**>(m_vars[i].ptr)) :
|
|
reinterpret_cast<char*>(m_vars[i].into);
|
|
|
|
// if is_pointer is 1 it means that pointer array itself
|
|
// is defined either via pointer or as class member.
|
|
// i.e. arr_ptr[0:5] or this->ARR[0:5]
|
|
if (m_vars[i].flags.is_pointer) {
|
|
int64_t offset = 0;
|
|
m_in.receive_data(&offset, sizeof(offset));
|
|
dst_arr_ptr = *((char**)dst_arr_ptr) + offset;
|
|
}
|
|
for (; j < max_el; j++) {
|
|
if (src_is_for_mic) {
|
|
m_vars[j].ptr =
|
|
dst_arr_ptr + m_vars[j].ptr_arr_offset;
|
|
}
|
|
else {
|
|
m_vars[j].into =
|
|
dst_arr_ptr + m_vars[j].ptr_arr_offset;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case c_data:
|
|
case c_void_ptr:
|
|
case c_void_ptr_ptr:
|
|
case c_cean_var:
|
|
case c_dv:
|
|
break;
|
|
|
|
case c_string_ptr:
|
|
case c_data_ptr:
|
|
case c_string_ptr_ptr:
|
|
case c_data_ptr_ptr:
|
|
case c_cean_var_ptr:
|
|
case c_cean_var_ptr_ptr:
|
|
case c_dv_ptr:
|
|
// Don't need ptr_addr value for variables from stack buffer.
|
|
// Stack buffer address is set at var_desc with #0.
|
|
if (i != 0 && m_vars[i].flags.is_stack_buf) {
|
|
break;
|
|
}
|
|
if (TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_src) ||
|
|
TYPE_IS_PTR_TO_PTR(m_vars_extra[i].type_dst)) {
|
|
int64_t offset;
|
|
|
|
m_in.receive_data(&offset, sizeof(offset));
|
|
ptr_addr = reinterpret_cast<void**>(
|
|
reinterpret_cast<char*>(*ptr_addr) + offset);
|
|
|
|
}
|
|
|
|
if (m_vars[i].alloc_if && !m_vars[i].flags.preallocated) {
|
|
void *buf = NULL;
|
|
if (m_vars[i].flags.sink_addr) {
|
|
m_in.receive_data(&buf, sizeof(buf));
|
|
}
|
|
else {
|
|
buf = m_buffers.front();
|
|
m_buffers.pop_front();
|
|
}
|
|
if (buf) {
|
|
if (!is_static) {
|
|
if (!m_vars[i].flags.sink_addr) {
|
|
// increment buffer reference
|
|
OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
|
|
BufferAddRef(buf);
|
|
OFFLOAD_TRACE(1, "Calling COIBufferAddRef %p\n", buf);
|
|
OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
|
|
}
|
|
add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
|
|
OFFLOAD_TRACE(1, " AddRef count = %d\n",
|
|
((RefInfo *) ref_data[buf])->count);
|
|
}
|
|
ptr = static_cast<char*>(buf) +
|
|
m_vars[i].mic_offset +
|
|
(m_vars[i].flags.is_stack_buf ?
|
|
0 : m_vars[i].offset);
|
|
|
|
}
|
|
*ptr_addr = ptr;
|
|
}
|
|
else if (m_vars[i].flags.sink_addr) {
|
|
void *buf;
|
|
m_in.receive_data(&buf, sizeof(buf));
|
|
void *ptr = static_cast<char*>(buf) +
|
|
m_vars[i].mic_offset +
|
|
(m_vars[i].flags.is_stack_buf ?
|
|
0 : m_vars[i].offset);
|
|
*ptr_addr = ptr;
|
|
}
|
|
break;
|
|
|
|
case c_func_ptr:
|
|
case c_func_ptr_ptr:
|
|
break;
|
|
|
|
case c_dv_data:
|
|
case c_dv_ptr_data:
|
|
case c_dv_data_slice:
|
|
case c_dv_ptr_data_slice:
|
|
if (m_vars[i].alloc_if) {
|
|
void *buf;
|
|
if (m_vars[i].flags.sink_addr) {
|
|
m_in.receive_data(&buf, sizeof(buf));
|
|
}
|
|
else {
|
|
buf = m_buffers.front();
|
|
m_buffers.pop_front();
|
|
}
|
|
if (buf) {
|
|
if (!is_static) {
|
|
if (!m_vars[i].flags.sink_addr) {
|
|
// increment buffer reference
|
|
OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
|
|
BufferAddRef(buf);
|
|
OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
|
|
}
|
|
add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
|
|
}
|
|
ptr = static_cast<char*>(buf) +
|
|
m_vars[i].mic_offset + m_vars[i].offset;
|
|
}
|
|
*ptr_addr = ptr;
|
|
}
|
|
else if (m_vars[i].flags.sink_addr) {
|
|
void *buf;
|
|
m_in.receive_data(&buf, sizeof(buf));
|
|
ptr = static_cast<char*>(buf) +
|
|
m_vars[i].mic_offset + m_vars[i].offset;
|
|
*ptr_addr = ptr;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
LIBOFFLOAD_ERROR(c_unknown_var_type, type);
|
|
abort();
|
|
}
|
|
// Release obsolete buffers for stack of persistent objects.
|
|
// The vardesc with i==0 and flags.is_stack_buf==TRUE is always for
|
|
// stack buffer pointer.
|
|
if (i == 0 &&
|
|
m_vars[i].flags.is_stack_buf &&
|
|
!m_vars[i].direction.bits &&
|
|
m_vars[i].alloc_if &&
|
|
m_vars[i].size != 0) {
|
|
for (int j=0; j < m_vars[i].size; j++) {
|
|
void *buf;
|
|
m_in.receive_data(&buf, sizeof(buf));
|
|
OFFLOAD_TRACE(4, "Releasing stack buffer %p\n", buf);
|
|
BufferReleaseRef(buf);
|
|
ref_data.erase(buf);
|
|
}
|
|
}
|
|
// Do copyin
|
|
switch (m_vars_extra[i].type_dst) {
|
|
case c_data_ptr_array:
|
|
break;
|
|
case c_data:
|
|
case c_void_ptr:
|
|
case c_void_ptr_ptr:
|
|
case c_cean_var:
|
|
if (m_vars[i].direction.in &&
|
|
!m_vars[i].flags.is_static_dstn) {
|
|
int64_t size;
|
|
int64_t disp;
|
|
char* ptr = m_vars[i].into ?
|
|
static_cast<char*>(m_vars[i].into) :
|
|
static_cast<char*>(m_vars[i].ptr);
|
|
if (m_vars_extra[i].type_dst == c_cean_var) {
|
|
m_in.receive_data((&size), sizeof(int64_t));
|
|
m_in.receive_data((&disp), sizeof(int64_t));
|
|
}
|
|
else {
|
|
size = m_vars[i].size;
|
|
disp = 0;
|
|
}
|
|
m_in.receive_data(ptr + disp, size);
|
|
}
|
|
break;
|
|
|
|
case c_dv:
|
|
if (m_vars[i].direction.bits ||
|
|
m_vars[i].alloc_if ||
|
|
m_vars[i].free_if) {
|
|
char* ptr = m_vars[i].into ?
|
|
static_cast<char*>(m_vars[i].into) :
|
|
static_cast<char*>(m_vars[i].ptr);
|
|
m_in.receive_data(ptr + sizeof(uint64_t),
|
|
m_vars[i].size - sizeof(uint64_t));
|
|
}
|
|
break;
|
|
|
|
case c_string_ptr:
|
|
case c_data_ptr:
|
|
case c_string_ptr_ptr:
|
|
case c_data_ptr_ptr:
|
|
case c_cean_var_ptr:
|
|
case c_cean_var_ptr_ptr:
|
|
case c_dv_ptr:
|
|
case c_dv_data:
|
|
case c_dv_ptr_data:
|
|
case c_dv_data_slice:
|
|
case c_dv_ptr_data_slice:
|
|
break;
|
|
|
|
case c_func_ptr:
|
|
case c_func_ptr_ptr:
|
|
if (m_vars[i].direction.in) {
|
|
m_in.receive_func_ptr((const void**) m_vars[i].ptr);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
|
|
m_in.get_tfr_size());
|
|
|
|
OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
|
|
|
|
OFFLOAD_TIMER_START(c_offload_target_compute);
|
|
}
|
|
|
|
void OffloadDescriptor::gather_copyout_data()
|
|
{
|
|
OFFLOAD_TIMER_STOP(c_offload_target_compute);
|
|
|
|
OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
|
|
|
|
for (int i = 0; i < m_vars_total; i++) {
|
|
bool src_is_for_mic = (m_vars[i].direction.out ||
|
|
m_vars[i].into == NULL);
|
|
if (m_vars[i].flags.is_stack_buf) {
|
|
continue;
|
|
}
|
|
switch (m_vars_extra[i].type_src) {
|
|
case c_data_ptr_array:
|
|
break;
|
|
case c_data:
|
|
case c_void_ptr:
|
|
case c_void_ptr_ptr:
|
|
case c_cean_var:
|
|
if (m_vars[i].direction.out &&
|
|
!m_vars[i].flags.is_static) {
|
|
m_out.send_data(
|
|
static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
|
|
m_vars[i].size);
|
|
}
|
|
break;
|
|
|
|
case c_dv:
|
|
break;
|
|
|
|
case c_string_ptr:
|
|
case c_data_ptr:
|
|
case c_string_ptr_ptr:
|
|
case c_data_ptr_ptr:
|
|
case c_cean_var_ptr:
|
|
case c_cean_var_ptr_ptr:
|
|
case c_dv_ptr:
|
|
if (m_vars[i].free_if &&
|
|
src_is_for_mic &&
|
|
!m_vars[i].flags.preallocated &&
|
|
!m_vars[i].flags.is_static) {
|
|
void *buf = *static_cast<char**>(m_vars[i].ptr) -
|
|
m_vars[i].mic_offset -
|
|
(m_vars[i].flags.is_stack_buf?
|
|
0 : m_vars[i].offset);
|
|
if (buf == NULL) {
|
|
break;
|
|
}
|
|
// decrement buffer reference count
|
|
OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
|
|
BufReleaseRef(buf);
|
|
OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
|
|
}
|
|
if (m_vars[i].flags.preallocated && m_vars[i].alloc_if) {
|
|
m_out.send_data((void*) m_vars[i].ptr, sizeof(void*));
|
|
}
|
|
break;
|
|
|
|
case c_func_ptr:
|
|
case c_func_ptr_ptr:
|
|
if (m_vars[i].direction.out) {
|
|
m_out.send_func_ptr(*((void**) m_vars[i].ptr));
|
|
}
|
|
break;
|
|
|
|
case c_dv_data:
|
|
case c_dv_ptr_data:
|
|
case c_dv_data_slice:
|
|
case c_dv_ptr_data_slice:
|
|
if (src_is_for_mic &&
|
|
m_vars[i].free_if &&
|
|
!m_vars[i].flags.is_static) {
|
|
ArrDesc *dvp = (m_vars_extra[i].type_src == c_dv_data ||
|
|
m_vars_extra[i].type_src == c_dv_data_slice) ?
|
|
static_cast<ArrDesc*>(m_vars[i].ptr) :
|
|
*static_cast<ArrDesc**>(m_vars[i].ptr);
|
|
|
|
void *buf = reinterpret_cast<char*>(dvp->Base) -
|
|
m_vars[i].mic_offset -
|
|
m_vars[i].offset;
|
|
|
|
if (buf == NULL) {
|
|
break;
|
|
}
|
|
|
|
// decrement buffer reference count
|
|
OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
|
|
BufReleaseRef(buf);
|
|
OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
|
|
abort();
|
|
}
|
|
|
|
if (m_vars[i].into) {
|
|
switch (m_vars_extra[i].type_dst) {
|
|
case c_data_ptr_array:
|
|
break;
|
|
case c_data:
|
|
case c_void_ptr:
|
|
case c_void_ptr_ptr:
|
|
case c_cean_var:
|
|
case c_dv:
|
|
break;
|
|
|
|
case c_string_ptr:
|
|
case c_data_ptr:
|
|
case c_string_ptr_ptr:
|
|
case c_data_ptr_ptr:
|
|
case c_cean_var_ptr:
|
|
case c_cean_var_ptr_ptr:
|
|
case c_dv_ptr:
|
|
if (m_vars[i].direction.in &&
|
|
m_vars[i].free_if &&
|
|
!m_vars[i].flags.is_static_dstn) {
|
|
void *buf = *static_cast<char**>(m_vars[i].into) -
|
|
m_vars[i].mic_offset -
|
|
(m_vars[i].flags.is_stack_buf?
|
|
0 : m_vars[i].offset);
|
|
|
|
if (buf == NULL) {
|
|
break;
|
|
}
|
|
// decrement buffer reference count
|
|
OFFLOAD_TIMER_START(
|
|
c_offload_target_release_buffer_refs);
|
|
BufReleaseRef(buf);
|
|
OFFLOAD_TIMER_STOP(
|
|
c_offload_target_release_buffer_refs);
|
|
}
|
|
break;
|
|
|
|
case c_func_ptr:
|
|
case c_func_ptr_ptr:
|
|
break;
|
|
|
|
case c_dv_data:
|
|
case c_dv_ptr_data:
|
|
case c_dv_data_slice:
|
|
case c_dv_ptr_data_slice:
|
|
if (m_vars[i].free_if &&
|
|
m_vars[i].direction.in &&
|
|
!m_vars[i].flags.is_static_dstn) {
|
|
ArrDesc *dvp =
|
|
(m_vars_extra[i].type_dst == c_dv_data_slice ||
|
|
m_vars_extra[i].type_dst == c_dv_data) ?
|
|
static_cast<ArrDesc*>(m_vars[i].into) :
|
|
*static_cast<ArrDesc**>(m_vars[i].into);
|
|
void *buf = reinterpret_cast<char*>(dvp->Base) -
|
|
m_vars[i].mic_offset -
|
|
m_vars[i].offset;
|
|
|
|
if (buf == NULL) {
|
|
break;
|
|
}
|
|
// decrement buffer reference count
|
|
OFFLOAD_TIMER_START(
|
|
c_offload_target_release_buffer_refs);
|
|
BufReleaseRef(buf);
|
|
OFFLOAD_TIMER_STOP(
|
|
c_offload_target_release_buffer_refs);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars_extra[i].type_dst);
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
|
|
OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
|
|
m_out.get_buffer_start(),
|
|
m_out.get_buffer_size());
|
|
|
|
OFFLOAD_DEBUG_DUMP_BYTES(2,
|
|
m_out.get_buffer_start(),
|
|
m_out.get_buffer_size());
|
|
|
|
OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
|
|
"Total copyout data sent to host: [%lld] bytes\n",
|
|
m_out.get_tfr_size());
|
|
|
|
OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
|
|
}
|
|
|
|
void __offload_target_init(void)
|
|
{
|
|
#ifdef SEP_SUPPORT
|
|
const char* env_var = getenv(sep_monitor_env);
|
|
if (env_var != 0 && *env_var != '\0') {
|
|
sep_monitor = atoi(env_var);
|
|
}
|
|
env_var = getenv(sep_device_env);
|
|
if (env_var != 0 && *env_var != '\0') {
|
|
sep_device = env_var;
|
|
}
|
|
#endif // SEP_SUPPORT
|
|
|
|
prefix = report_get_message_str(c_report_mic);
|
|
|
|
// init frequency
|
|
mic_frequency = COIPerfGetCycleFrequency();
|
|
}
|
|
|
|
// User-visible offload API
|
|
|
|
int _Offload_number_of_devices(void)
|
|
{
|
|
return mic_engines_total;
|
|
}
|
|
|
|
int _Offload_get_device_number(void)
|
|
{
|
|
return mic_index;
|
|
}
|
|
|
|
int _Offload_get_physical_device_number(void)
|
|
{
|
|
uint32_t index;
|
|
EngineGetIndex(&index);
|
|
return index;
|
|
}
|