offload_host.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. /*
  2. Copyright (c) 2014 Intel Corporation. All Rights Reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions
  5. are met:
  6. * Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. * Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in the
  10. documentation and/or other materials provided with the distribution.
  11. * Neither the name of Intel Corporation nor the names of its
  12. contributors may be used to endorse or promote products derived
  13. from this software without specific prior written permission.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. /*! \file
  27. \brief The parts of the runtime library used only on the host
  28. */
  29. #ifndef OFFLOAD_HOST_H_INCLUDED
  30. #define OFFLOAD_HOST_H_INCLUDED
  31. #ifndef TARGET_WINNT
  32. #include <unistd.h>
  33. #endif // TARGET_WINNT
  34. #include "offload_common.h"
  35. #include "offload_util.h"
  36. #include "offload_engine.h"
  37. #include "offload_env.h"
  38. #include "offload_orsl.h"
  39. #include "coi/coi_client.h"
  40. // MIC engines.
  41. extern Engine* mic_engines;
  42. extern uint32_t mic_engines_total;
  43. //! The target image is packed as follows.
  44. /*! 1. 8 bytes containing the size of the target binary */
  45. /*! 2. a null-terminated string which is the binary name */
  46. /*! 3. <size> number of bytes that are the contents of the image */
  47. /*! The address of symbol __offload_target_image
  48. is the address of this structure. */
  49. struct Image {
  50. int64_t size; //!< Size in bytes of the target binary name and contents
  51. char data[]; //!< The name and contents of the target image
  52. };
  53. // The offload descriptor.
  54. class OffloadDescriptor
  55. {
  56. public:
  57. OffloadDescriptor(
  58. int index,
  59. _Offload_status *status,
  60. bool is_mandatory,
  61. bool is_openmp,
  62. OffloadHostTimerData * timer_data
  63. ) :
  64. m_device(mic_engines[index % mic_engines_total]),
  65. m_is_mandatory(is_mandatory),
  66. m_is_openmp(is_openmp),
  67. m_inout_buf(0),
  68. m_func_desc(0),
  69. m_func_desc_size(0),
  70. m_in_deps(0),
  71. m_in_deps_total(0),
  72. m_out_deps(0),
  73. m_out_deps_total(0),
  74. m_vars(0),
  75. m_vars_extra(0),
  76. m_status(status),
  77. m_timer_data(timer_data)
  78. {}
  79. ~OffloadDescriptor()
  80. {
  81. if (m_in_deps != 0) {
  82. free(m_in_deps);
  83. }
  84. if (m_out_deps != 0) {
  85. free(m_out_deps);
  86. }
  87. if (m_func_desc != 0) {
  88. free(m_func_desc);
  89. }
  90. if (m_vars != 0) {
  91. free(m_vars);
  92. free(m_vars_extra);
  93. }
  94. }
  95. bool offload(const char *name, bool is_empty,
  96. VarDesc *vars, VarDesc2 *vars2, int vars_total,
  97. const void **waits, int num_waits, const void **signal,
  98. int entry_id, const void *stack_addr);
  99. bool offload_finish();
  100. bool is_signaled();
  101. OffloadHostTimerData* get_timer_data() const {
  102. return m_timer_data;
  103. }
  104. private:
  105. bool wait_dependencies(const void **waits, int num_waits);
  106. bool setup_descriptors(VarDesc *vars, VarDesc2 *vars2, int vars_total,
  107. int entry_id, const void *stack_addr);
  108. bool setup_misc_data(const char *name);
  109. bool send_pointer_data(bool is_async);
  110. bool send_noncontiguous_pointer_data(
  111. int i,
  112. PtrData* src_buf,
  113. PtrData* dst_buf,
  114. COIEVENT *event);
  115. bool recieve_noncontiguous_pointer_data(
  116. int i,
  117. char* src_data,
  118. COIBUFFER dst_buf,
  119. COIEVENT *event);
  120. bool gather_copyin_data();
  121. bool compute();
  122. bool receive_pointer_data(bool is_async);
  123. bool scatter_copyout_data();
  124. void cleanup();
  125. bool find_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
  126. int64_t length, bool error_does_not_exist = true);
  127. bool alloc_ptr_data(PtrData* &ptr_data, void *base, int64_t disp,
  128. int64_t length, int64_t alloc_disp, int align);
  129. bool init_static_ptr_data(PtrData *ptr_data);
  130. bool init_mic_address(PtrData *ptr_data);
  131. bool offload_stack_memory_manager(const void * stack_begin, int routine_id,
  132. int buf_size, int align, bool *is_new);
  133. bool nullify_target_stack(COIBUFFER targ_buf, uint64_t size);
  134. bool gen_var_descs_for_pointer_array(int i);
  135. void report_coi_error(error_types msg, COIRESULT res);
  136. _Offload_result translate_coi_error(COIRESULT res) const;
  137. private:
  138. typedef std::list<COIBUFFER> BufferList;
  139. // extra data associated with each variable descriptor
  140. struct VarExtra {
  141. PtrData* src_data;
  142. PtrData* dst_data;
  143. AutoData* auto_data;
  144. int64_t cpu_disp;
  145. int64_t cpu_offset;
  146. CeanReadRanges *read_rng_src;
  147. CeanReadRanges *read_rng_dst;
  148. int64_t ptr_arr_offset;
  149. bool is_arr_ptr_el;
  150. };
  151. template<typename T> class ReadArrElements {
  152. public:
  153. ReadArrElements():
  154. ranges(NULL),
  155. el_size(sizeof(T)),
  156. offset(0),
  157. count(0),
  158. is_empty(true),
  159. base(NULL)
  160. {}
  161. bool read_next(bool flag)
  162. {
  163. if (flag != 0) {
  164. if (is_empty) {
  165. if (ranges) {
  166. if (!get_next_range(ranges, &offset)) {
  167. // ranges are over
  168. return false;
  169. }
  170. }
  171. // all contiguous elements are over
  172. else if (count != 0) {
  173. return false;
  174. }
  175. length_cur = size;
  176. }
  177. else {
  178. offset += el_size;
  179. }
  180. val = (T)get_el_value(base, offset, el_size);
  181. length_cur -= el_size;
  182. count++;
  183. is_empty = length_cur == 0;
  184. }
  185. return true;
  186. }
  187. public:
  188. CeanReadRanges * ranges;
  189. T val;
  190. int el_size;
  191. int64_t size,
  192. offset,
  193. length_cur;
  194. bool is_empty;
  195. int count;
  196. char *base;
  197. };
  198. // ptr_data for persistent auto objects
  199. PtrData* m_stack_ptr_data;
  200. PtrDataList m_destroy_stack;
  201. // Engine
  202. Engine& m_device;
  203. // if true offload is mandatory
  204. bool m_is_mandatory;
  205. // if true offload has openmp origin
  206. const bool m_is_openmp;
  207. // The Marshaller for the inputs of the offloaded region.
  208. Marshaller m_in;
  209. // The Marshaller for the outputs of the offloaded region.
  210. Marshaller m_out;
  211. // List of buffers that are passed to dispatch call
  212. BufferList m_compute_buffers;
  213. // List of buffers that need to be destroyed at the end of offload
  214. BufferList m_destroy_buffers;
  215. // Variable descriptors
  216. VarDesc* m_vars;
  217. VarExtra* m_vars_extra;
  218. int m_vars_total;
  219. // Pointer to a user-specified status variable
  220. _Offload_status *m_status;
  221. // Function descriptor
  222. FunctionDescriptor* m_func_desc;
  223. uint32_t m_func_desc_size;
  224. // Buffer for transferring copyin/copyout data
  225. COIBUFFER m_inout_buf;
  226. // Dependencies
  227. COIEVENT *m_in_deps;
  228. uint32_t m_in_deps_total;
  229. COIEVENT *m_out_deps;
  230. uint32_t m_out_deps_total;
  231. // Timer data
  232. OffloadHostTimerData *m_timer_data;
  233. // copyin/copyout data length
  234. uint64_t m_in_datalen;
  235. uint64_t m_out_datalen;
  236. // a boolean value calculated in setup_descriptors. If true we need to do
  237. // a run function on the target. Otherwise it may be optimized away.
  238. bool m_need_runfunction;
  239. };
  240. // Initialization types for MIC
  241. enum OffloadInitType {
  242. c_init_on_start, // all devices before entering main
  243. c_init_on_offload, // single device before starting the first offload
  244. c_init_on_offload_all // all devices before starting the first offload
  245. };
  246. // Initializes library and registers specified offload image.
  247. extern "C" void __offload_register_image(const void* image);
  248. extern "C" void __offload_unregister_image(const void* image);
  249. // Initializes offload runtime library.
  250. extern int __offload_init_library(void);
  251. // thread data for associating pipelines with threads
  252. extern pthread_key_t mic_thread_key;
  253. // Environment variables for devices
  254. extern MicEnvVar mic_env_vars;
  255. // CPU frequency
  256. extern uint64_t cpu_frequency;
  257. // LD_LIBRARY_PATH for MIC libraries
  258. extern char* mic_library_path;
  259. // stack size for target
  260. extern uint32_t mic_stack_size;
  261. // Preallocated memory size for buffers on MIC
  262. extern uint64_t mic_buffer_size;
  263. // Setting controlling inout proxy
  264. extern bool mic_proxy_io;
  265. extern char* mic_proxy_fs_root;
  266. // Threshold for creating buffers with large pages
  267. extern uint64_t __offload_use_2mb_buffers;
  268. // offload initialization type
  269. extern OffloadInitType __offload_init_type;
  270. // Device number to offload to when device is not explicitly specified.
  271. extern int __omp_device_num;
  272. // target executable
  273. extern TargetImage* __target_exe;
  274. // IDB support
  275. // Called by the offload runtime after initialization of offload infrastructure
  276. // has been completed.
  277. extern "C" void __dbg_target_so_loaded();
  278. // Called by the offload runtime when the offload infrastructure is about to be
  279. // shut down, currently at application exit.
  280. extern "C" void __dbg_target_so_unloaded();
  281. // Null-terminated string containing path to the process image of the hosting
  282. // application (offload_main)
  283. #define MAX_TARGET_NAME 512
  284. extern "C" char __dbg_target_exe_name[MAX_TARGET_NAME];
  285. // Integer specifying the process id
  286. extern "C" pid_t __dbg_target_so_pid;
  287. // Integer specifying the 0-based device number
  288. extern "C" int __dbg_target_id;
  289. // Set to non-zero by the host-side debugger to enable offload debugging
  290. // support
  291. extern "C" int __dbg_is_attached;
  292. // Major version of the debugger support API
  293. extern "C" const int __dbg_api_major_version;
  294. // Minor version of the debugger support API
  295. extern "C" const int __dbg_api_minor_version;
  296. #endif // OFFLOAD_HOST_H_INCLUDED