offload_target.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777
  1. /*
  2. Copyright (c) 2014 Intel Corporation. All Rights Reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions
  5. are met:
  6. * Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. * Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in the
  10. documentation and/or other materials provided with the distribution.
  11. * Neither the name of Intel Corporation nor the names of its
  12. contributors may be used to endorse or promote products derived
  13. from this software without specific prior written permission.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include "offload_target.h"
  27. #include <stdlib.h>
  28. #include <unistd.h>
  29. #ifdef SEP_SUPPORT
  30. #include <fcntl.h>
  31. #include <sys/ioctl.h>
  32. #endif // SEP_SUPPORT
  33. #include <omp.h>
  34. #include <map>
  35. // typedef offload_func_with_parms.
  36. // Pointer to function that represents an offloaded entry point.
  37. // The parameters are a temporary fix for parameters on the stack.
  38. typedef void (*offload_func_with_parms)(void *);
  39. // Target console and file logging
  40. const char *prefix;
  41. int console_enabled = 0;
  42. int offload_report_level = 0;
  43. // Trace information
  44. static const char* vardesc_direction_as_string[] = {
  45. "NOCOPY",
  46. "IN",
  47. "OUT",
  48. "INOUT"
  49. };
  50. static const char* vardesc_type_as_string[] = {
  51. "unknown",
  52. "data",
  53. "data_ptr",
  54. "func_ptr",
  55. "void_ptr",
  56. "string_ptr",
  57. "dv",
  58. "dv_data",
  59. "dv_data_slice",
  60. "dv_ptr",
  61. "dv_ptr_data",
  62. "dv_ptr_data_slice",
  63. "cean_var",
  64. "cean_var_ptr",
  65. "c_data_ptr_array"
  66. };
  67. int mic_index = -1;
  68. int mic_engines_total = -1;
  69. uint64_t mic_frequency = 0;
  70. int offload_number = 0;
  71. static std::map<void*, RefInfo*> ref_data;
  72. static mutex_t add_ref_lock;
  73. #ifdef SEP_SUPPORT
  74. static const char* sep_monitor_env = "SEP_MONITOR";
  75. static bool sep_monitor = false;
  76. static const char* sep_device_env = "SEP_DEVICE";
  77. static const char* sep_device = "/dev/sep3.8/c";
  78. static int sep_counter = 0;
  79. #define SEP_API_IOC_MAGIC 99
  80. #define SEP_IOCTL_PAUSE _IO (SEP_API_IOC_MAGIC, 31)
  81. #define SEP_IOCTL_RESUME _IO (SEP_API_IOC_MAGIC, 32)
  82. static void add_ref_count(void * buf, bool created)
  83. {
  84. mutex_locker_t locker(add_ref_lock);
  85. RefInfo * info = ref_data[buf];
  86. if (info) {
  87. info->count++;
  88. }
  89. else {
  90. info = new RefInfo((int)created,(long)1);
  91. }
  92. info->is_added |= created;
  93. ref_data[buf] = info;
  94. }
  95. static void BufReleaseRef(void * buf)
  96. {
  97. mutex_locker_t locker(add_ref_lock);
  98. RefInfo * info = ref_data[buf];
  99. if (info) {
  100. --info->count;
  101. if (info->count == 0 && info->is_added) {
  102. BufferReleaseRef(buf);
  103. info->is_added = 0;
  104. }
  105. }
  106. }
  107. static int VTPauseSampling(void)
  108. {
  109. int ret = -1;
  110. int handle = open(sep_device, O_RDWR);
  111. if (handle > 0) {
  112. ret = ioctl(handle, SEP_IOCTL_PAUSE);
  113. close(handle);
  114. }
  115. return ret;
  116. }
  117. static int VTResumeSampling(void)
  118. {
  119. int ret = -1;
  120. int handle = open(sep_device, O_RDWR);
  121. if (handle > 0) {
  122. ret = ioctl(handle, SEP_IOCTL_RESUME);
  123. close(handle);
  124. }
  125. return ret;
  126. }
  127. #endif // SEP_SUPPORT
  128. void OffloadDescriptor::offload(
  129. uint32_t buffer_count,
  130. void** buffers,
  131. void* misc_data,
  132. uint16_t misc_data_len,
  133. void* return_data,
  134. uint16_t return_data_len
  135. )
  136. {
  137. FunctionDescriptor *func = (FunctionDescriptor*) misc_data;
  138. const char *name = func->data;
  139. OffloadDescriptor ofld;
  140. char *in_data = 0;
  141. char *out_data = 0;
  142. char *timer_data = 0;
  143. console_enabled = func->console_enabled;
  144. timer_enabled = func->timer_enabled;
  145. offload_report_level = func->offload_report_level;
  146. offload_number = func->offload_number;
  147. ofld.set_offload_number(func->offload_number);
  148. #ifdef SEP_SUPPORT
  149. if (sep_monitor) {
  150. if (__sync_fetch_and_add(&sep_counter, 1) == 0) {
  151. OFFLOAD_DEBUG_TRACE(2, "VTResumeSampling\n");
  152. VTResumeSampling();
  153. }
  154. }
  155. #endif // SEP_SUPPORT
  156. OFFLOAD_DEBUG_TRACE_1(2, ofld.get_offload_number(),
  157. c_offload_start_target_func,
  158. "Offload \"%s\" started\n", name);
  159. // initialize timer data
  160. OFFLOAD_TIMER_INIT();
  161. OFFLOAD_TIMER_START(c_offload_target_total_time);
  162. OFFLOAD_TIMER_START(c_offload_target_descriptor_setup);
  163. // get input/output buffer addresses
  164. if (func->in_datalen > 0 || func->out_datalen > 0) {
  165. if (func->data_offset != 0) {
  166. in_data = (char*) misc_data + func->data_offset;
  167. out_data = (char*) return_data;
  168. }
  169. else {
  170. char *inout_buf = (char*) buffers[--buffer_count];
  171. in_data = inout_buf;
  172. out_data = inout_buf;
  173. }
  174. }
  175. // assign variable descriptors
  176. ofld.m_vars_total = func->vars_num;
  177. if (ofld.m_vars_total > 0) {
  178. uint64_t var_data_len = ofld.m_vars_total * sizeof(VarDesc);
  179. ofld.m_vars = (VarDesc*) malloc(var_data_len);
  180. if (ofld.m_vars == NULL)
  181. LIBOFFLOAD_ERROR(c_malloc);
  182. memcpy(ofld.m_vars, in_data, var_data_len);
  183. in_data += var_data_len;
  184. func->in_datalen -= var_data_len;
  185. }
  186. // timer data
  187. if (func->timer_enabled) {
  188. uint64_t timer_data_len = OFFLOAD_TIMER_DATALEN();
  189. timer_data = out_data;
  190. out_data += timer_data_len;
  191. func->out_datalen -= timer_data_len;
  192. }
  193. // init Marshallers
  194. ofld.m_in.init_buffer(in_data, func->in_datalen);
  195. ofld.m_out.init_buffer(out_data, func->out_datalen);
  196. // copy buffers to offload descriptor
  197. std::copy(buffers, buffers + buffer_count,
  198. std::back_inserter(ofld.m_buffers));
  199. OFFLOAD_TIMER_STOP(c_offload_target_descriptor_setup);
  200. // find offload entry address
  201. OFFLOAD_TIMER_START(c_offload_target_func_lookup);
  202. offload_func_with_parms entry = (offload_func_with_parms)
  203. __offload_entries.find_addr(name);
  204. if (entry == NULL) {
  205. #if OFFLOAD_DEBUG > 0
  206. if (console_enabled > 2) {
  207. __offload_entries.dump();
  208. }
  209. #endif
  210. LIBOFFLOAD_ERROR(c_offload_descriptor_offload, name);
  211. exit(1);
  212. }
  213. OFFLOAD_TIMER_STOP(c_offload_target_func_lookup);
  214. OFFLOAD_TIMER_START(c_offload_target_func_time);
  215. // execute offload entry
  216. entry(&ofld);
  217. OFFLOAD_TIMER_STOP(c_offload_target_func_time);
  218. OFFLOAD_TIMER_STOP(c_offload_target_total_time);
  219. // copy timer data to the buffer
  220. OFFLOAD_TIMER_TARGET_DATA(timer_data);
  221. OFFLOAD_DEBUG_TRACE(2, "Offload \"%s\" finished\n", name);
  222. #ifdef SEP_SUPPORT
  223. if (sep_monitor) {
  224. if (__sync_sub_and_fetch(&sep_counter, 1) == 0) {
  225. OFFLOAD_DEBUG_TRACE(2, "VTPauseSampling\n");
  226. VTPauseSampling();
  227. }
  228. }
  229. #endif // SEP_SUPPORT
  230. }
  231. void OffloadDescriptor::merge_var_descs(
  232. VarDesc *vars,
  233. VarDesc2 *vars2,
  234. int vars_total
  235. )
  236. {
  237. // number of variable descriptors received from host and generated
  238. // locally should match
  239. if (m_vars_total < vars_total) {
  240. LIBOFFLOAD_ERROR(c_merge_var_descs1);
  241. exit(1);
  242. }
  243. for (int i = 0; i < m_vars_total; i++) {
  244. if (i < vars_total) {
  245. // variable type must match
  246. if (m_vars[i].type.bits != vars[i].type.bits) {
  247. LIBOFFLOAD_ERROR(c_merge_var_descs2);
  248. exit(1);
  249. }
  250. m_vars[i].ptr = vars[i].ptr;
  251. m_vars[i].into = vars[i].into;
  252. const char *var_sname = "";
  253. if (vars2 != NULL) {
  254. if (vars2[i].sname != NULL) {
  255. var_sname = vars2[i].sname;
  256. }
  257. }
  258. OFFLOAD_DEBUG_TRACE_1(2, get_offload_number(), c_offload_var,
  259. " VarDesc %d, var=%s, %s, %s\n",
  260. i, var_sname,
  261. vardesc_direction_as_string[m_vars[i].direction.bits],
  262. vardesc_type_as_string[m_vars[i].type.src]);
  263. if (vars2 != NULL && vars2[i].dname != NULL) {
  264. OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
  265. vardesc_type_as_string[m_vars[i].type.dst]);
  266. }
  267. }
  268. OFFLOAD_TRACE(2,
  269. " type_src=%d, type_dstn=%d, direction=%d, "
  270. "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
  271. "offset=%lld, size=%lld, count/disp=%lld, ptr=%p into=%p\n",
  272. m_vars[i].type.src,
  273. m_vars[i].type.dst,
  274. m_vars[i].direction.bits,
  275. m_vars[i].alloc_if,
  276. m_vars[i].free_if,
  277. m_vars[i].align,
  278. m_vars[i].mic_offset,
  279. m_vars[i].flags.bits,
  280. m_vars[i].offset,
  281. m_vars[i].size,
  282. m_vars[i].count,
  283. m_vars[i].ptr,
  284. m_vars[i].into);
  285. }
  286. }
  287. void OffloadDescriptor::scatter_copyin_data()
  288. {
  289. OFFLOAD_TIMER_START(c_offload_target_scatter_inputs);
  290. OFFLOAD_DEBUG_TRACE(2, "IN buffer @ %p size %lld\n",
  291. m_in.get_buffer_start(),
  292. m_in.get_buffer_size());
  293. OFFLOAD_DEBUG_DUMP_BYTES(2, m_in.get_buffer_start(),
  294. m_in.get_buffer_size());
  295. // receive data
  296. for (int i = 0; i < m_vars_total; i++) {
  297. bool src_is_for_mic = (m_vars[i].direction.out ||
  298. m_vars[i].into == NULL);
  299. void** ptr_addr = src_is_for_mic ?
  300. static_cast<void**>(m_vars[i].ptr) :
  301. static_cast<void**>(m_vars[i].into);
  302. int type = src_is_for_mic ? m_vars[i].type.src :
  303. m_vars[i].type.dst;
  304. bool is_static = src_is_for_mic ?
  305. m_vars[i].flags.is_static :
  306. m_vars[i].flags.is_static_dstn;
  307. void *ptr = NULL;
  308. if (m_vars[i].flags.alloc_disp) {
  309. int64_t offset = 0;
  310. m_in.receive_data(&offset, sizeof(offset));
  311. m_vars[i].offset = -offset;
  312. }
  313. if (VAR_TYPE_IS_DV_DATA_SLICE(type) ||
  314. VAR_TYPE_IS_DV_DATA(type)) {
  315. ArrDesc *dvp = (type == c_dv_data_slice || type == c_dv_data)?
  316. reinterpret_cast<ArrDesc*>(ptr_addr) :
  317. *reinterpret_cast<ArrDesc**>(ptr_addr);
  318. ptr_addr = reinterpret_cast<void**>(&dvp->Base);
  319. }
  320. // Set pointer values
  321. switch (type) {
  322. case c_data_ptr_array:
  323. {
  324. int j = m_vars[i].ptr_arr_offset;
  325. int max_el = j + m_vars[i].count;
  326. char *dst_arr_ptr = (src_is_for_mic)?
  327. *(reinterpret_cast<char**>(m_vars[i].ptr)) :
  328. reinterpret_cast<char*>(m_vars[i].into);
  329. for (; j < max_el; j++) {
  330. if (src_is_for_mic) {
  331. m_vars[j].ptr =
  332. dst_arr_ptr + m_vars[j].ptr_arr_offset;
  333. }
  334. else {
  335. m_vars[j].into =
  336. dst_arr_ptr + m_vars[j].ptr_arr_offset;
  337. }
  338. }
  339. }
  340. break;
  341. case c_data:
  342. case c_void_ptr:
  343. case c_cean_var:
  344. case c_dv:
  345. break;
  346. case c_string_ptr:
  347. case c_data_ptr:
  348. case c_cean_var_ptr:
  349. case c_dv_ptr:
  350. if (m_vars[i].alloc_if) {
  351. void *buf;
  352. if (m_vars[i].flags.sink_addr) {
  353. m_in.receive_data(&buf, sizeof(buf));
  354. }
  355. else {
  356. buf = m_buffers.front();
  357. m_buffers.pop_front();
  358. }
  359. if (buf) {
  360. if (!is_static) {
  361. if (!m_vars[i].flags.sink_addr) {
  362. // increment buffer reference
  363. OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
  364. BufferAddRef(buf);
  365. OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
  366. }
  367. add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
  368. }
  369. ptr = static_cast<char*>(buf) +
  370. m_vars[i].mic_offset +
  371. (m_vars[i].flags.is_stack_buf ?
  372. 0 : m_vars[i].offset);
  373. }
  374. *ptr_addr = ptr;
  375. }
  376. else if (m_vars[i].flags.sink_addr) {
  377. void *buf;
  378. m_in.receive_data(&buf, sizeof(buf));
  379. void *ptr = static_cast<char*>(buf) +
  380. m_vars[i].mic_offset +
  381. (m_vars[i].flags.is_stack_buf ?
  382. 0 : m_vars[i].offset);
  383. *ptr_addr = ptr;
  384. }
  385. break;
  386. case c_func_ptr:
  387. break;
  388. case c_dv_data:
  389. case c_dv_ptr_data:
  390. case c_dv_data_slice:
  391. case c_dv_ptr_data_slice:
  392. if (m_vars[i].alloc_if) {
  393. void *buf;
  394. if (m_vars[i].flags.sink_addr) {
  395. m_in.receive_data(&buf, sizeof(buf));
  396. }
  397. else {
  398. buf = m_buffers.front();
  399. m_buffers.pop_front();
  400. }
  401. if (buf) {
  402. if (!is_static) {
  403. if (!m_vars[i].flags.sink_addr) {
  404. // increment buffer reference
  405. OFFLOAD_TIMER_START(c_offload_target_add_buffer_refs);
  406. BufferAddRef(buf);
  407. OFFLOAD_TIMER_STOP(c_offload_target_add_buffer_refs);
  408. }
  409. add_ref_count(buf, 0 == m_vars[i].flags.sink_addr);
  410. }
  411. ptr = static_cast<char*>(buf) +
  412. m_vars[i].mic_offset + m_vars[i].offset;
  413. }
  414. *ptr_addr = ptr;
  415. }
  416. else if (m_vars[i].flags.sink_addr) {
  417. void *buf;
  418. m_in.receive_data(&buf, sizeof(buf));
  419. ptr = static_cast<char*>(buf) +
  420. m_vars[i].mic_offset + m_vars[i].offset;
  421. *ptr_addr = ptr;
  422. }
  423. break;
  424. default:
  425. LIBOFFLOAD_ERROR(c_unknown_var_type, type);
  426. abort();
  427. }
  428. // Release obsolete buffers for stack of persistent objects
  429. if (type = c_data_ptr &&
  430. m_vars[i].flags.is_stack_buf &&
  431. !m_vars[i].direction.bits &&
  432. m_vars[i].alloc_if &&
  433. m_vars[i].size != 0) {
  434. for (int j=0; j < m_vars[i].size; j++) {
  435. void *buf;
  436. m_in.receive_data(&buf, sizeof(buf));
  437. BufferReleaseRef(buf);
  438. ref_data.erase(buf);
  439. }
  440. }
  441. // Do copyin
  442. switch (m_vars[i].type.dst) {
  443. case c_data_ptr_array:
  444. break;
  445. case c_data:
  446. case c_void_ptr:
  447. case c_cean_var:
  448. if (m_vars[i].direction.in &&
  449. !m_vars[i].flags.is_static_dstn) {
  450. int64_t size;
  451. int64_t disp;
  452. char* ptr = m_vars[i].into ?
  453. static_cast<char*>(m_vars[i].into) :
  454. static_cast<char*>(m_vars[i].ptr);
  455. if (m_vars[i].type.dst == c_cean_var) {
  456. m_in.receive_data((&size), sizeof(int64_t));
  457. m_in.receive_data((&disp), sizeof(int64_t));
  458. }
  459. else {
  460. size = m_vars[i].size;
  461. disp = 0;
  462. }
  463. m_in.receive_data(ptr + disp, size);
  464. }
  465. break;
  466. case c_dv:
  467. if (m_vars[i].direction.bits ||
  468. m_vars[i].alloc_if ||
  469. m_vars[i].free_if) {
  470. char* ptr = m_vars[i].into ?
  471. static_cast<char*>(m_vars[i].into) :
  472. static_cast<char*>(m_vars[i].ptr);
  473. m_in.receive_data(ptr + sizeof(uint64_t),
  474. m_vars[i].size - sizeof(uint64_t));
  475. }
  476. break;
  477. case c_string_ptr:
  478. case c_data_ptr:
  479. case c_cean_var_ptr:
  480. case c_dv_ptr:
  481. case c_dv_data:
  482. case c_dv_ptr_data:
  483. case c_dv_data_slice:
  484. case c_dv_ptr_data_slice:
  485. break;
  486. case c_func_ptr:
  487. if (m_vars[i].direction.in) {
  488. m_in.receive_func_ptr((const void**) m_vars[i].ptr);
  489. }
  490. break;
  491. default:
  492. LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
  493. abort();
  494. }
  495. }
  496. OFFLOAD_TRACE(1, "Total copyin data received from host: [%lld] bytes\n",
  497. m_in.get_tfr_size());
  498. OFFLOAD_TIMER_STOP(c_offload_target_scatter_inputs);
  499. OFFLOAD_TIMER_START(c_offload_target_compute);
  500. }
  501. void OffloadDescriptor::gather_copyout_data()
  502. {
  503. OFFLOAD_TIMER_STOP(c_offload_target_compute);
  504. OFFLOAD_TIMER_START(c_offload_target_gather_outputs);
  505. for (int i = 0; i < m_vars_total; i++) {
  506. bool src_is_for_mic = (m_vars[i].direction.out ||
  507. m_vars[i].into == NULL);
  508. switch (m_vars[i].type.src) {
  509. case c_data_ptr_array:
  510. break;
  511. case c_data:
  512. case c_void_ptr:
  513. case c_cean_var:
  514. if (m_vars[i].direction.out &&
  515. !m_vars[i].flags.is_static) {
  516. m_out.send_data(
  517. static_cast<char*>(m_vars[i].ptr) + m_vars[i].disp,
  518. m_vars[i].size);
  519. }
  520. break;
  521. case c_dv:
  522. break;
  523. case c_string_ptr:
  524. case c_data_ptr:
  525. case c_cean_var_ptr:
  526. case c_dv_ptr:
  527. if (m_vars[i].free_if &&
  528. src_is_for_mic &&
  529. !m_vars[i].flags.is_static) {
  530. void *buf = *static_cast<char**>(m_vars[i].ptr) -
  531. m_vars[i].mic_offset -
  532. (m_vars[i].flags.is_stack_buf?
  533. 0 : m_vars[i].offset);
  534. if (buf == NULL) {
  535. break;
  536. }
  537. // decrement buffer reference count
  538. OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
  539. BufReleaseRef(buf);
  540. OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
  541. }
  542. break;
  543. case c_func_ptr:
  544. if (m_vars[i].direction.out) {
  545. m_out.send_func_ptr(*((void**) m_vars[i].ptr));
  546. }
  547. break;
  548. case c_dv_data:
  549. case c_dv_ptr_data:
  550. case c_dv_data_slice:
  551. case c_dv_ptr_data_slice:
  552. if (src_is_for_mic &&
  553. m_vars[i].free_if &&
  554. !m_vars[i].flags.is_static) {
  555. ArrDesc *dvp = (m_vars[i].type.src == c_dv_data ||
  556. m_vars[i].type.src == c_dv_data_slice) ?
  557. static_cast<ArrDesc*>(m_vars[i].ptr) :
  558. *static_cast<ArrDesc**>(m_vars[i].ptr);
  559. void *buf = reinterpret_cast<char*>(dvp->Base) -
  560. m_vars[i].mic_offset -
  561. m_vars[i].offset;
  562. if (buf == NULL) {
  563. break;
  564. }
  565. // decrement buffer reference count
  566. OFFLOAD_TIMER_START(c_offload_target_release_buffer_refs);
  567. BufReleaseRef(buf);
  568. OFFLOAD_TIMER_STOP(c_offload_target_release_buffer_refs);
  569. }
  570. break;
  571. default:
  572. LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
  573. abort();
  574. }
  575. if (m_vars[i].into) {
  576. switch (m_vars[i].type.dst) {
  577. case c_data_ptr_array:
  578. break;
  579. case c_data:
  580. case c_void_ptr:
  581. case c_cean_var:
  582. case c_dv:
  583. break;
  584. case c_string_ptr:
  585. case c_data_ptr:
  586. case c_cean_var_ptr:
  587. case c_dv_ptr:
  588. if (m_vars[i].direction.in &&
  589. m_vars[i].free_if &&
  590. !m_vars[i].flags.is_static_dstn) {
  591. void *buf = *static_cast<char**>(m_vars[i].into) -
  592. m_vars[i].mic_offset -
  593. (m_vars[i].flags.is_stack_buf?
  594. 0 : m_vars[i].offset);
  595. if (buf == NULL) {
  596. break;
  597. }
  598. // decrement buffer reference count
  599. OFFLOAD_TIMER_START(
  600. c_offload_target_release_buffer_refs);
  601. BufReleaseRef(buf);
  602. OFFLOAD_TIMER_STOP(
  603. c_offload_target_release_buffer_refs);
  604. }
  605. break;
  606. case c_func_ptr:
  607. break;
  608. case c_dv_data:
  609. case c_dv_ptr_data:
  610. case c_dv_data_slice:
  611. case c_dv_ptr_data_slice:
  612. if (m_vars[i].free_if &&
  613. m_vars[i].direction.in &&
  614. !m_vars[i].flags.is_static_dstn) {
  615. ArrDesc *dvp =
  616. (m_vars[i].type.dst == c_dv_data_slice ||
  617. m_vars[i].type.dst == c_dv_data) ?
  618. static_cast<ArrDesc*>(m_vars[i].into) :
  619. *static_cast<ArrDesc**>(m_vars[i].into);
  620. void *buf = reinterpret_cast<char*>(dvp->Base) -
  621. m_vars[i].mic_offset -
  622. m_vars[i].offset;
  623. if (buf == NULL) {
  624. break;
  625. }
  626. // decrement buffer reference count
  627. OFFLOAD_TIMER_START(
  628. c_offload_target_release_buffer_refs);
  629. BufReleaseRef(buf);
  630. OFFLOAD_TIMER_STOP(
  631. c_offload_target_release_buffer_refs);
  632. }
  633. break;
  634. default:
  635. LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.dst);
  636. abort();
  637. }
  638. }
  639. }
  640. OFFLOAD_DEBUG_TRACE(2, "OUT buffer @ p %p size %lld\n",
  641. m_out.get_buffer_start(),
  642. m_out.get_buffer_size());
  643. OFFLOAD_DEBUG_DUMP_BYTES(2,
  644. m_out.get_buffer_start(),
  645. m_out.get_buffer_size());
  646. OFFLOAD_DEBUG_TRACE_1(1, get_offload_number(), c_offload_copyout_data,
  647. "Total copyout data sent to host: [%lld] bytes\n",
  648. m_out.get_tfr_size());
  649. OFFLOAD_TIMER_STOP(c_offload_target_gather_outputs);
  650. }
  651. void __offload_target_init(void)
  652. {
  653. #ifdef SEP_SUPPORT
  654. const char* env_var = getenv(sep_monitor_env);
  655. if (env_var != 0 && *env_var != '\0') {
  656. sep_monitor = atoi(env_var);
  657. }
  658. env_var = getenv(sep_device_env);
  659. if (env_var != 0 && *env_var != '\0') {
  660. sep_device = env_var;
  661. }
  662. #endif // SEP_SUPPORT
  663. prefix = report_get_message_str(c_report_mic);
  664. // init frequency
  665. mic_frequency = COIPerfGetCycleFrequency();
  666. }
  667. // User-visible offload API
  668. int _Offload_number_of_devices(void)
  669. {
  670. return mic_engines_total;
  671. }
  672. int _Offload_get_device_number(void)
  673. {
  674. return mic_index;
  675. }
  676. int _Offload_get_physical_device_number(void)
  677. {
  678. uint32_t index;
  679. EngineGetIndex(&index);
  680. return index;
  681. }