offload_host.cpp 162 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403
  1. /*
  2. Copyright (c) 2014 Intel Corporation. All Rights Reserved.
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions
  5. are met:
  6. * Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. * Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in the
  10. documentation and/or other materials provided with the distribution.
  11. * Neither the name of Intel Corporation nor the names of its
  12. contributors may be used to endorse or promote products derived
  13. from this software without specific prior written permission.
  14. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. // Forward declaration as the following 2 functions are declared as friend in offload_engine.h
  27. // CLANG does not like static to been after friend declaration.
  28. static void __offload_init_library_once(void);
  29. static void __offload_fini_library(void);
  30. #include "offload_host.h"
  31. #ifdef MYO_SUPPORT
  32. #include "offload_myo_host.h"
  33. #endif
  34. #include <malloc.h>
  35. #ifndef TARGET_WINNT
  36. #include <alloca.h>
  37. #include <elf.h>
  38. #endif // TARGET_WINNT
  39. #include <errno.h>
  40. #include <fcntl.h>
  41. #include <stdlib.h>
  42. #include <string.h>
  43. #include <sys/stat.h>
  44. #include <sys/types.h>
  45. #include <sys/stat.h>
  46. #include <algorithm>
  47. #include <bitset>
  48. #if defined(HOST_WINNT)
  49. #define PATH_SEPARATOR ";"
  50. #else
  51. #define PATH_SEPARATOR ":"
  52. #endif
  53. #define GET_OFFLOAD_NUMBER(timer_data) \
  54. timer_data? timer_data->offload_number : 0
  55. #ifdef TARGET_WINNT
  56. // Small subset of ELF declarations for Windows which is needed to compile
  57. // this file. ELF header is used to understand what binary type is contained
  58. // in the target image - shared library or executable.
  59. typedef uint16_t Elf64_Half;
  60. typedef uint32_t Elf64_Word;
  61. typedef uint64_t Elf64_Addr;
  62. typedef uint64_t Elf64_Off;
  63. #define EI_NIDENT 16
  64. #define ET_EXEC 2
  65. #define ET_DYN 3
  66. typedef struct
  67. {
  68. unsigned char e_ident[EI_NIDENT];
  69. Elf64_Half e_type;
  70. Elf64_Half e_machine;
  71. Elf64_Word e_version;
  72. Elf64_Addr e_entry;
  73. Elf64_Off e_phoff;
  74. Elf64_Off e_shoff;
  75. Elf64_Word e_flags;
  76. Elf64_Half e_ehsize;
  77. Elf64_Half e_phentsize;
  78. Elf64_Half e_phnum;
  79. Elf64_Half e_shentsize;
  80. Elf64_Half e_shnum;
  81. Elf64_Half e_shstrndx;
  82. } Elf64_Ehdr;
  83. #endif // TARGET_WINNT
  84. // Host console and file logging
  85. const char *prefix;
  86. int console_enabled = 0;
  87. int offload_number = 0;
  88. static const char *htrace_envname = "H_TRACE";
  89. static const char *offload_report_envname = "OFFLOAD_REPORT";
  90. static char *timer_envname = "H_TIME";
  91. // Trace information
  92. static const char* vardesc_direction_as_string[] = {
  93. "NOCOPY",
  94. "IN",
  95. "OUT",
  96. "INOUT"
  97. };
  98. static const char* vardesc_type_as_string[] = {
  99. "unknown",
  100. "data",
  101. "data_ptr",
  102. "func_ptr",
  103. "void_ptr",
  104. "string_ptr",
  105. "dv",
  106. "dv_data",
  107. "dv_data_slice",
  108. "dv_ptr",
  109. "dv_ptr_data",
  110. "dv_ptr_data_slice",
  111. "cean_var",
  112. "cean_var_ptr",
  113. "c_data_ptr_array",
  114. "c_func_ptr_array",
  115. "c_void_ptr_array",
  116. "c_string_ptr_array"
  117. };
  118. Engine* mic_engines = 0;
  119. uint32_t mic_engines_total = 0;
  120. pthread_key_t mic_thread_key;
  121. MicEnvVar mic_env_vars;
  122. uint64_t cpu_frequency = 0;
  123. // MIC_STACKSIZE
  124. uint32_t mic_stack_size = 12 * 1024 * 1024;
  125. // MIC_BUFFERSIZE
  126. uint64_t mic_buffer_size = 0;
  127. // MIC_LD_LIBRARY_PATH
  128. char* mic_library_path = 0;
  129. // MIC_PROXY_IO
  130. bool mic_proxy_io = true;
  131. // MIC_PROXY_FS_ROOT
  132. char* mic_proxy_fs_root = 0;
  133. // Threshold for creating buffers with large pages. Buffer is created
  134. // with large pages hint if its size exceeds the threshold value.
  135. // By default large pages are disabled right now (by setting default
  136. // value for threshold to MAX) due to HSD 4114629.
  137. uint64_t __offload_use_2mb_buffers = 0xffffffffffffffffULL;
  138. static const char *mic_use_2mb_buffers_envname =
  139. "MIC_USE_2MB_BUFFERS";
  140. static uint64_t __offload_use_async_buffer_write = 2 * 1024 * 1024;
  141. static const char *mic_use_async_buffer_write_envname =
  142. "MIC_USE_ASYNC_BUFFER_WRITE";
  143. static uint64_t __offload_use_async_buffer_read = 2 * 1024 * 1024;
  144. static const char *mic_use_async_buffer_read_envname =
  145. "MIC_USE_ASYNC_BUFFER_READ";
  146. // device initialization type
  147. OffloadInitType __offload_init_type = c_init_on_offload_all;
  148. static const char *offload_init_envname = "OFFLOAD_INIT";
  149. // active wait
  150. static bool __offload_active_wait = true;
  151. static const char *offload_active_wait_envname = "OFFLOAD_ACTIVE_WAIT";
  152. // OMP_DEFAULT_DEVICE
  153. int __omp_device_num = 0;
  154. static const char *omp_device_num_envname = "OMP_DEFAULT_DEVICE";
  155. // The list of pending target libraries
  156. static bool __target_libs;
  157. static TargetImageList __target_libs_list;
  158. static mutex_t __target_libs_lock;
  159. static mutex_t stack_alloc_lock;
  160. // Target executable
  161. TargetImage* __target_exe;
  162. static char * offload_get_src_base(void * ptr, uint8_t type)
  163. {
  164. char *base;
  165. if (VAR_TYPE_IS_PTR(type)) {
  166. base = *static_cast<char**>(ptr);
  167. }
  168. else if (VAR_TYPE_IS_SCALAR(type)) {
  169. base = static_cast<char*>(ptr);
  170. }
  171. else if (VAR_TYPE_IS_DV_DATA_SLICE(type) || VAR_TYPE_IS_DV_DATA(type)) {
  172. ArrDesc *dvp;
  173. if (VAR_TYPE_IS_DV_DATA_SLICE(type)) {
  174. const arr_desc *ap = static_cast<const arr_desc*>(ptr);
  175. dvp = (type == c_dv_data_slice) ?
  176. reinterpret_cast<ArrDesc*>(ap->base) :
  177. *reinterpret_cast<ArrDesc**>(ap->base);
  178. }
  179. else {
  180. dvp = (type == c_dv_data) ?
  181. static_cast<ArrDesc*>(ptr) :
  182. *static_cast<ArrDesc**>(ptr);
  183. }
  184. base = reinterpret_cast<char*>(dvp->Base);
  185. }
  186. else {
  187. base = NULL;
  188. }
  189. return base;
  190. }
  191. void OffloadDescriptor::report_coi_error(error_types msg, COIRESULT res)
  192. {
  193. // special case for the 'process died' error
  194. if (res == COI_PROCESS_DIED) {
  195. m_device.fini_process(true);
  196. }
  197. else {
  198. switch (msg) {
  199. case c_buf_create:
  200. if (res == COI_OUT_OF_MEMORY) {
  201. msg = c_buf_create_out_of_mem;
  202. }
  203. /* fallthru */
  204. case c_buf_create_from_mem:
  205. case c_buf_get_address:
  206. case c_pipeline_create:
  207. case c_pipeline_run_func:
  208. LIBOFFLOAD_ERROR(msg, m_device.get_logical_index(), res);
  209. break;
  210. case c_buf_read:
  211. case c_buf_write:
  212. case c_buf_copy:
  213. case c_buf_map:
  214. case c_buf_unmap:
  215. case c_buf_destroy:
  216. case c_buf_set_state:
  217. LIBOFFLOAD_ERROR(msg, res);
  218. break;
  219. default:
  220. break;
  221. }
  222. }
  223. exit(1);
  224. }
  225. _Offload_result OffloadDescriptor::translate_coi_error(COIRESULT res) const
  226. {
  227. switch (res) {
  228. case COI_SUCCESS:
  229. return OFFLOAD_SUCCESS;
  230. case COI_PROCESS_DIED:
  231. return OFFLOAD_PROCESS_DIED;
  232. case COI_OUT_OF_MEMORY:
  233. return OFFLOAD_OUT_OF_MEMORY;
  234. default:
  235. return OFFLOAD_ERROR;
  236. }
  237. }
  238. bool OffloadDescriptor::alloc_ptr_data(
  239. PtrData* &ptr_data,
  240. void *base,
  241. int64_t disp,
  242. int64_t size,
  243. int64_t alloc_disp,
  244. int align
  245. )
  246. {
  247. // total length of base
  248. int64_t length = disp + size;
  249. bool is_new;
  250. OFFLOAD_TRACE(3, "Creating association for data: addr %p, length %lld\n",
  251. base, length);
  252. // add new entry
  253. ptr_data = m_device.insert_ptr_data(base, length, is_new);
  254. if (is_new) {
  255. OFFLOAD_TRACE(3, "Added new association\n");
  256. if (length > 0) {
  257. OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
  258. COIRESULT res;
  259. // align should be a power of 2
  260. if (align > 0 && (align & (align - 1)) == 0) {
  261. // offset within mic_buffer. Can do offset optimization
  262. // only when source address alignment satisfies requested
  263. // alignment on the target (cq172736).
  264. if ((reinterpret_cast<intptr_t>(base) & (align - 1)) == 0) {
  265. ptr_data->mic_offset = reinterpret_cast<intptr_t>(base) & 4095;
  266. }
  267. }
  268. // buffer size and flags
  269. uint64_t buffer_size = length + ptr_data->mic_offset;
  270. uint32_t buffer_flags = 0;
  271. // create buffer with large pages if data length exceeds
  272. // large page threshold
  273. if (length >= __offload_use_2mb_buffers) {
  274. buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
  275. }
  276. // create CPU buffer
  277. OFFLOAD_DEBUG_TRACE_1(3,
  278. GET_OFFLOAD_NUMBER(get_timer_data()),
  279. c_offload_create_buf_host,
  280. "Creating buffer from source memory %p, "
  281. "length %lld\n", base, length);
  282. // result is not checked because we can continue without cpu
  283. // buffer. In this case we will use COIBufferRead/Write instead
  284. // of COIBufferCopy.
  285. COI::BufferCreateFromMemory(length,
  286. COI_BUFFER_NORMAL,
  287. 0,
  288. base,
  289. 1,
  290. &m_device.get_process(),
  291. &ptr_data->cpu_buf);
  292. OFFLOAD_DEBUG_TRACE_1(3,
  293. GET_OFFLOAD_NUMBER(get_timer_data()),
  294. c_offload_create_buf_mic,
  295. "Creating buffer for sink: size %lld, offset %d, "
  296. "flags =0x%x\n", buffer_size - alloc_disp,
  297. ptr_data->mic_offset, buffer_flags);
  298. // create MIC buffer
  299. res = COI::BufferCreate(buffer_size - alloc_disp,
  300. COI_BUFFER_NORMAL,
  301. buffer_flags,
  302. 0,
  303. 1,
  304. &m_device.get_process(),
  305. &ptr_data->mic_buf);
  306. if (res != COI_SUCCESS) {
  307. if (m_status != 0) {
  308. m_status->result = translate_coi_error(res);
  309. }
  310. else if (m_is_mandatory) {
  311. report_coi_error(c_buf_create, res);
  312. }
  313. ptr_data->alloc_ptr_data_lock.unlock();
  314. return false;
  315. }
  316. // make buffer valid on the device.
  317. res = COI::BufferSetState(ptr_data->mic_buf,
  318. m_device.get_process(),
  319. COI_BUFFER_VALID,
  320. COI_BUFFER_NO_MOVE,
  321. 0, 0, 0);
  322. if (res != COI_SUCCESS) {
  323. if (m_status != 0) {
  324. m_status->result = translate_coi_error(res);
  325. }
  326. else if (m_is_mandatory) {
  327. report_coi_error(c_buf_set_state, res);
  328. }
  329. ptr_data->alloc_ptr_data_lock.unlock();
  330. return false;
  331. }
  332. res = COI::BufferSetState(ptr_data->mic_buf,
  333. COI_PROCESS_SOURCE,
  334. COI_BUFFER_INVALID,
  335. COI_BUFFER_NO_MOVE,
  336. 0, 0, 0);
  337. if (res != COI_SUCCESS) {
  338. if (m_status != 0) {
  339. m_status->result = translate_coi_error(res);
  340. }
  341. else if (m_is_mandatory) {
  342. report_coi_error(c_buf_set_state, res);
  343. }
  344. ptr_data->alloc_ptr_data_lock.unlock();
  345. return false;
  346. }
  347. }
  348. ptr_data->alloc_disp = alloc_disp;
  349. ptr_data->alloc_ptr_data_lock.unlock();
  350. }
  351. else {
  352. mutex_locker_t locker(ptr_data->alloc_ptr_data_lock);
  353. OFFLOAD_TRACE(3, "Found existing association: addr %p, length %lld, "
  354. "is_static %d\n",
  355. ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
  356. ptr_data->is_static);
  357. // This is not a new entry. Make sure that provided address range fits
  358. // into existing one.
  359. MemRange addr_range(base, length - ptr_data->alloc_disp);
  360. if (!ptr_data->cpu_addr.contains(addr_range)) {
  361. LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
  362. exit(1);
  363. }
  364. // if the entry is associated with static data it may not have buffers
  365. // created because they are created on demand.
  366. if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
  367. return false;
  368. }
  369. }
  370. return true;
  371. }
  372. bool OffloadDescriptor::find_ptr_data(
  373. PtrData* &ptr_data,
  374. void *base,
  375. int64_t disp,
  376. int64_t size,
  377. bool report_error
  378. )
  379. {
  380. // total length of base
  381. int64_t length = disp + size;
  382. OFFLOAD_TRACE(3, "Looking for association for data: addr %p, "
  383. "length %lld\n", base, length);
  384. // find existing association in pointer table
  385. ptr_data = m_device.find_ptr_data(base);
  386. if (ptr_data == 0) {
  387. if (report_error) {
  388. LIBOFFLOAD_ERROR(c_no_ptr_data, base);
  389. exit(1);
  390. }
  391. OFFLOAD_TRACE(3, "Association does not exist\n");
  392. return true;
  393. }
  394. OFFLOAD_TRACE(3, "Found association: base %p, length %lld, is_static %d\n",
  395. ptr_data->cpu_addr.start(), ptr_data->cpu_addr.length(),
  396. ptr_data->is_static);
  397. // make sure that provided address range fits into existing one
  398. MemRange addr_range(base, length);
  399. if (!ptr_data->cpu_addr.contains(addr_range)) {
  400. if (report_error) {
  401. LIBOFFLOAD_ERROR(c_bad_ptr_mem_range);
  402. exit(1);
  403. }
  404. OFFLOAD_TRACE(3, "Existing association partially overlaps with "
  405. "data address range\n");
  406. ptr_data = 0;
  407. return true;
  408. }
  409. // if the entry is associated with static data it may not have buffers
  410. // created because they are created on demand.
  411. if (ptr_data->is_static && !init_static_ptr_data(ptr_data)) {
  412. return false;
  413. }
  414. return true;
  415. }
  416. bool OffloadDescriptor::init_static_ptr_data(PtrData *ptr_data)
  417. {
  418. OffloadTimer timer(get_timer_data(), c_offload_host_alloc_buffers);
  419. if (ptr_data->cpu_buf == 0) {
  420. OFFLOAD_TRACE(3, "Creating buffer from source memory %llx\n",
  421. ptr_data->cpu_addr.start());
  422. COIRESULT res = COI::BufferCreateFromMemory(
  423. ptr_data->cpu_addr.length(),
  424. COI_BUFFER_NORMAL,
  425. 0,
  426. const_cast<void*>(ptr_data->cpu_addr.start()),
  427. 1, &m_device.get_process(),
  428. &ptr_data->cpu_buf);
  429. if (res != COI_SUCCESS) {
  430. if (m_status != 0) {
  431. m_status->result = translate_coi_error(res);
  432. return false;
  433. }
  434. report_coi_error(c_buf_create_from_mem, res);
  435. }
  436. }
  437. if (ptr_data->mic_buf == 0) {
  438. OFFLOAD_TRACE(3, "Creating buffer from sink memory %llx\n",
  439. ptr_data->mic_addr);
  440. COIRESULT res = COI::BufferCreateFromMemory(
  441. ptr_data->cpu_addr.length(),
  442. COI_BUFFER_NORMAL,
  443. COI_SINK_MEMORY,
  444. reinterpret_cast<void*>(ptr_data->mic_addr),
  445. 1, &m_device.get_process(),
  446. &ptr_data->mic_buf);
  447. if (res != COI_SUCCESS) {
  448. if (m_status != 0) {
  449. m_status->result = translate_coi_error(res);
  450. return false;
  451. }
  452. report_coi_error(c_buf_create_from_mem, res);
  453. }
  454. }
  455. return true;
  456. }
  457. bool OffloadDescriptor::init_mic_address(PtrData *ptr_data)
  458. {
  459. if (ptr_data->mic_buf != 0 && ptr_data->mic_addr == 0) {
  460. COIRESULT res = COI::BufferGetSinkAddress(ptr_data->mic_buf,
  461. &ptr_data->mic_addr);
  462. if (res != COI_SUCCESS) {
  463. if (m_status != 0) {
  464. m_status->result = translate_coi_error(res);
  465. }
  466. else if (m_is_mandatory) {
  467. report_coi_error(c_buf_get_address, res);
  468. }
  469. return false;
  470. }
  471. }
  472. return true;
  473. }
  474. bool OffloadDescriptor::nullify_target_stack(
  475. COIBUFFER targ_buf,
  476. uint64_t size
  477. )
  478. {
  479. char * ptr = (char*)malloc(size);
  480. if (ptr == NULL)
  481. LIBOFFLOAD_ERROR(c_malloc);
  482. COIRESULT res;
  483. memset(ptr, 0, size);
  484. res = COI::BufferWrite(
  485. targ_buf,
  486. 0,
  487. ptr,
  488. size,
  489. COI_COPY_UNSPECIFIED,
  490. 0, 0, 0);
  491. free(ptr);
  492. if (res != COI_SUCCESS) {
  493. if (m_status != 0) {
  494. m_status->result = translate_coi_error(res);
  495. return false;
  496. }
  497. report_coi_error(c_buf_write, res);
  498. }
  499. return true;
  500. }
  501. bool OffloadDescriptor::offload_stack_memory_manager(
  502. const void * stack_begin,
  503. int routine_id,
  504. int buf_size,
  505. int align,
  506. bool *is_new)
  507. {
  508. mutex_locker_t locker(stack_alloc_lock);
  509. PersistData * new_el;
  510. PersistDataList::iterator it_begin = m_device.m_persist_list.begin();
  511. PersistDataList::iterator it_end;
  512. int erase = 0;
  513. *is_new = false;
  514. for (PersistDataList::iterator it = m_device.m_persist_list.begin();
  515. it != m_device.m_persist_list.end(); it++) {
  516. PersistData cur_el = *it;
  517. if (stack_begin > it->stack_cpu_addr) {
  518. // this stack data must be destroyed
  519. m_destroy_stack.push_front(cur_el.stack_ptr_data);
  520. it_end = it;
  521. erase++;
  522. }
  523. else if (stack_begin == it->stack_cpu_addr) {
  524. if (routine_id != it-> routine_id) {
  525. // this stack data must be destroyed
  526. m_destroy_stack.push_front(cur_el.stack_ptr_data);
  527. it_end = it;
  528. erase++;
  529. break;
  530. }
  531. else {
  532. // stack data is reused
  533. m_stack_ptr_data = it->stack_ptr_data;
  534. if (erase > 0) {
  535. // all obsolete stack sections must be erased from the list
  536. m_device.m_persist_list.erase(it_begin, ++it_end);
  537. m_in_datalen +=
  538. erase * sizeof(new_el->stack_ptr_data->mic_addr);
  539. }
  540. OFFLOAD_TRACE(3, "Reuse of stack buffer with addr %p\n",
  541. m_stack_ptr_data->mic_addr);
  542. return true;
  543. }
  544. }
  545. else if (stack_begin < it->stack_cpu_addr) {
  546. break;
  547. }
  548. }
  549. if (erase > 0) {
  550. // all obsolete stack sections must be erased from the list
  551. m_device.m_persist_list.erase(it_begin, ++it_end);
  552. m_in_datalen += erase * sizeof(new_el->stack_ptr_data->mic_addr);
  553. }
  554. // new stack table is created
  555. new_el = new PersistData(stack_begin, routine_id, buf_size);
  556. // create MIC buffer
  557. COIRESULT res;
  558. uint32_t buffer_flags = 0;
  559. // create buffer with large pages if data length exceeds
  560. // large page threshold
  561. if (buf_size >= __offload_use_2mb_buffers) {
  562. buffer_flags = COI_OPTIMIZE_HUGE_PAGE_SIZE;
  563. }
  564. res = COI::BufferCreate(buf_size,
  565. COI_BUFFER_NORMAL,
  566. buffer_flags,
  567. 0,
  568. 1,
  569. &m_device.get_process(),
  570. &new_el->stack_ptr_data->mic_buf);
  571. if (res != COI_SUCCESS) {
  572. if (m_status != 0) {
  573. m_status->result = translate_coi_error(res);
  574. }
  575. else if (m_is_mandatory) {
  576. report_coi_error(c_buf_create, res);
  577. }
  578. return false;
  579. }
  580. // make buffer valid on the device.
  581. res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
  582. m_device.get_process(),
  583. COI_BUFFER_VALID,
  584. COI_BUFFER_NO_MOVE,
  585. 0, 0, 0);
  586. if (res != COI_SUCCESS) {
  587. if (m_status != 0) {
  588. m_status->result = translate_coi_error(res);
  589. }
  590. else if (m_is_mandatory) {
  591. report_coi_error(c_buf_set_state, res);
  592. }
  593. return false;
  594. }
  595. res = COI::BufferSetState(new_el->stack_ptr_data->mic_buf,
  596. COI_PROCESS_SOURCE,
  597. COI_BUFFER_INVALID,
  598. COI_BUFFER_NO_MOVE,
  599. 0, 0, 0);
  600. if (res != COI_SUCCESS) {
  601. if (m_status != 0) {
  602. m_status->result = translate_coi_error(res);
  603. }
  604. else if (m_is_mandatory) {
  605. report_coi_error(c_buf_set_state, res);
  606. }
  607. return false;
  608. }
  609. // persistence algorithm requires target stack initialy to be nullified
  610. if (!nullify_target_stack(new_el->stack_ptr_data->mic_buf, buf_size)) {
  611. return false;
  612. }
  613. m_stack_ptr_data = new_el->stack_ptr_data;
  614. init_mic_address(m_stack_ptr_data);
  615. OFFLOAD_TRACE(3, "Allocating stack buffer with addr %p\n",
  616. m_stack_ptr_data->mic_addr);
  617. m_device.m_persist_list.push_front(*new_el);
  618. init_mic_address(new_el->stack_ptr_data);
  619. *is_new = true;
  620. return true;
  621. }
  622. bool OffloadDescriptor::setup_descriptors(
  623. VarDesc *vars,
  624. VarDesc2 *vars2,
  625. int vars_total,
  626. int entry_id,
  627. const void *stack_addr
  628. )
  629. {
  630. COIRESULT res;
  631. OffloadTimer timer(get_timer_data(), c_offload_host_setup_buffers);
  632. // make a copy of variable descriptors
  633. m_vars_total = vars_total;
  634. if (vars_total > 0) {
  635. m_vars = (VarDesc*) malloc(m_vars_total * sizeof(VarDesc));
  636. if (m_vars == NULL)
  637. LIBOFFLOAD_ERROR(c_malloc);
  638. memcpy(m_vars, vars, m_vars_total * sizeof(VarDesc));
  639. m_vars_extra = (VarExtra*) malloc(m_vars_total * sizeof(VarExtra));
  640. if (m_vars_extra == NULL)
  641. LIBOFFLOAD_ERROR(c_malloc);
  642. }
  643. // dependencies
  644. m_in_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * (m_vars_total + 1));
  645. if (m_in_deps == NULL)
  646. LIBOFFLOAD_ERROR(c_malloc);
  647. if (m_vars_total > 0) {
  648. m_out_deps = (COIEVENT*) malloc(sizeof(COIEVENT) * m_vars_total);
  649. if (m_out_deps == NULL)
  650. LIBOFFLOAD_ERROR(c_malloc);
  651. }
  652. // copyin/copyout data length
  653. m_in_datalen = 0;
  654. m_out_datalen = 0;
  655. // First pass over variable descriptors
  656. // - Calculate size of the input and output non-pointer data
  657. // - Allocate buffers for input and output pointers
  658. for (int i = 0; i < m_vars_total; i++) {
  659. void* alloc_base = NULL;
  660. int64_t alloc_disp = 0;
  661. int64_t alloc_size;
  662. bool src_is_for_mic = (m_vars[i].direction.out ||
  663. m_vars[i].into == NULL);
  664. const char *var_sname = "";
  665. if (vars2 != NULL && i < vars_total) {
  666. if (vars2[i].sname != NULL) {
  667. var_sname = vars2[i].sname;
  668. }
  669. }
  670. OFFLOAD_TRACE(2, " VarDesc %d, var=%s, %s, %s\n",
  671. i, var_sname,
  672. vardesc_direction_as_string[m_vars[i].direction.bits],
  673. vardesc_type_as_string[m_vars[i].type.src]);
  674. if (vars2 != NULL && i < vars_total && vars2[i].dname != NULL) {
  675. OFFLOAD_TRACE(2, " into=%s, %s\n", vars2[i].dname,
  676. vardesc_type_as_string[m_vars[i].type.dst]);
  677. }
  678. OFFLOAD_TRACE(2,
  679. " type_src=%d, type_dstn=%d, direction=%d, "
  680. "alloc_if=%d, free_if=%d, align=%d, mic_offset=%d, flags=0x%x, "
  681. "offset=%lld, size=%lld, count/disp=%lld, ptr=%p, into=%p\n",
  682. m_vars[i].type.src,
  683. m_vars[i].type.dst,
  684. m_vars[i].direction.bits,
  685. m_vars[i].alloc_if,
  686. m_vars[i].free_if,
  687. m_vars[i].align,
  688. m_vars[i].mic_offset,
  689. m_vars[i].flags.bits,
  690. m_vars[i].offset,
  691. m_vars[i].size,
  692. m_vars[i].count,
  693. m_vars[i].ptr,
  694. m_vars[i].into);
  695. if (m_vars[i].alloc != NULL) {
  696. // array descriptor
  697. const arr_desc *ap =
  698. static_cast<const arr_desc*>(m_vars[i].alloc);
  699. // debug dump
  700. __arr_desc_dump(" ", "ALLOC", ap, 0);
  701. __arr_data_offset_and_length(ap, alloc_disp, alloc_size);
  702. alloc_base = reinterpret_cast<void*>(ap->base);
  703. }
  704. m_vars_extra[i].cpu_disp = 0;
  705. m_vars_extra[i].cpu_offset = 0;
  706. m_vars_extra[i].src_data = 0;
  707. m_vars_extra[i].read_rng_src = 0;
  708. m_vars_extra[i].read_rng_dst = 0;
  709. // flag is_arr_ptr_el is 1 only for var_descs generated
  710. // for c_data_ptr_array type
  711. if (i < vars_total) {
  712. m_vars_extra[i].is_arr_ptr_el = 0;
  713. }
  714. switch (m_vars[i].type.src) {
  715. case c_data_ptr_array:
  716. {
  717. const arr_desc *ap;
  718. const VarDesc3 *vd3 =
  719. static_cast<const VarDesc3*>(m_vars[i].ptr);
  720. int flags = vd3->array_fields;
  721. OFFLOAD_TRACE(2,
  722. " pointer array flags = %04x\n", flags);
  723. OFFLOAD_TRACE(2,
  724. " pointer array type is %s\n",
  725. vardesc_type_as_string[flags & 0x3f]);
  726. ap = static_cast<const arr_desc*>(vd3->ptr_array);
  727. __arr_desc_dump(" ", "ptr array", ap, 0);
  728. if (m_vars[i].into) {
  729. ap = static_cast<const arr_desc*>(m_vars[i].into);
  730. __arr_desc_dump(
  731. " ", "into array", ap, 0);
  732. }
  733. if ((flags & (1<<flag_align_is_array)) != 0) {
  734. ap = static_cast<const arr_desc*>(vd3->align_array);
  735. __arr_desc_dump(
  736. " ", "align array", ap, 0);
  737. }
  738. if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
  739. ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
  740. __arr_desc_dump(
  741. " ", "alloc_if array", ap, 0);
  742. }
  743. if ((flags & (1<<flag_free_if_is_array)) != 0) {
  744. ap = static_cast<const arr_desc*>(vd3->free_if_array);
  745. __arr_desc_dump(
  746. " ", "free_if array", ap, 0);
  747. }
  748. if ((flags & (1<<flag_extent_start_is_array)) != 0) {
  749. ap = static_cast<const arr_desc*>(vd3->extent_start);
  750. __arr_desc_dump(
  751. " ", "extent_start array", ap, 0);
  752. } else if ((flags &
  753. (1<<flag_extent_start_is_scalar)) != 0) {
  754. OFFLOAD_TRACE(2,
  755. " extent_start scalar = %d\n",
  756. (int64_t)vd3->extent_start);
  757. }
  758. if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
  759. ap = static_cast<const arr_desc*>
  760. (vd3->extent_elements);
  761. __arr_desc_dump(
  762. " ", "extent_elements array", ap, 0);
  763. } else if ((flags &
  764. (1<<flag_extent_elements_is_scalar)) != 0) {
  765. OFFLOAD_TRACE(2,
  766. " extent_elements scalar = %d\n",
  767. (int64_t)vd3->extent_elements);
  768. }
  769. if ((flags & (1<<flag_into_start_is_array)) != 0) {
  770. ap = static_cast<const arr_desc*>(vd3->into_start);
  771. __arr_desc_dump(
  772. " ", "into_start array", ap, 0);
  773. } else if ((flags &
  774. (1<<flag_into_start_is_scalar)) != 0) {
  775. OFFLOAD_TRACE(2,
  776. " into_start scalar = %d\n",
  777. (int64_t)vd3->into_start);
  778. }
  779. if ((flags & (1<<flag_into_elements_is_array)) != 0) {
  780. ap = static_cast<const arr_desc*>(vd3->into_elements);
  781. __arr_desc_dump(
  782. " ", "into_elements array", ap, 0);
  783. } else if ((flags &
  784. (1<<flag_into_elements_is_scalar)) != 0) {
  785. OFFLOAD_TRACE(2,
  786. " into_elements scalar = %d\n",
  787. (int64_t)vd3->into_elements);
  788. }
  789. if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
  790. ap = static_cast<const arr_desc*>(vd3->alloc_start);
  791. __arr_desc_dump(
  792. " ", "alloc_start array", ap, 0);
  793. } else if ((flags &
  794. (1<<flag_alloc_start_is_scalar)) != 0) {
  795. OFFLOAD_TRACE(2,
  796. " alloc_start scalar = %d\n",
  797. (int64_t)vd3->alloc_start);
  798. }
  799. if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
  800. ap = static_cast<const arr_desc*>(vd3->alloc_elements);
  801. __arr_desc_dump(
  802. " ", "alloc_elements array", ap, 0);
  803. } else if ((flags &
  804. (1<<flag_alloc_elements_is_scalar)) != 0) {
  805. OFFLOAD_TRACE(2,
  806. " alloc_elements scalar = %d\n",
  807. (int64_t)vd3->alloc_elements);
  808. }
  809. }
  810. if (!gen_var_descs_for_pointer_array(i)) {
  811. return false;
  812. }
  813. break;
  814. case c_data:
  815. case c_void_ptr:
  816. case c_cean_var:
  817. // In all uses later
  818. // VarDesc.size will have the length of the data to be
  819. // transferred
  820. // VarDesc.disp will have an offset from base
  821. if (m_vars[i].type.src == c_cean_var) {
  822. // array descriptor
  823. const arr_desc *ap =
  824. static_cast<const arr_desc*>(m_vars[i].ptr);
  825. // debug dump
  826. __arr_desc_dump("", "IN/OUT", ap, 0);
  827. // offset and length are derived from the array descriptor
  828. __arr_data_offset_and_length(ap, m_vars[i].disp,
  829. m_vars[i].size);
  830. if (!is_arr_desc_contiguous(ap)) {
  831. m_vars[i].flags.is_noncont_src = 1;
  832. m_vars_extra[i].read_rng_src =
  833. init_read_ranges_arr_desc(ap);
  834. }
  835. // all necessary information about length and offset is
  836. // transferred in var descriptor. There is no need to send
  837. // array descriptor to the target side.
  838. m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
  839. }
  840. else {
  841. m_vars[i].size *= m_vars[i].count;
  842. m_vars[i].disp = 0;
  843. }
  844. if (m_vars[i].direction.bits) {
  845. // make sure that transfer size > 0
  846. if (m_vars[i].size <= 0) {
  847. LIBOFFLOAD_ERROR(c_zero_or_neg_transfer_size);
  848. exit(1);
  849. }
  850. if (m_vars[i].flags.is_static) {
  851. PtrData *ptr_data;
  852. // find data associated with variable
  853. if (!find_ptr_data(ptr_data,
  854. m_vars[i].ptr,
  855. m_vars[i].disp,
  856. m_vars[i].size,
  857. false)) {
  858. return false;
  859. }
  860. if (ptr_data != 0) {
  861. // offset to base from the beginning of the buffer
  862. // memory
  863. m_vars[i].offset =
  864. (char*) m_vars[i].ptr -
  865. (char*) ptr_data->cpu_addr.start();
  866. }
  867. else {
  868. m_vars[i].flags.is_static = false;
  869. if (m_vars[i].into == NULL) {
  870. m_vars[i].flags.is_static_dstn = false;
  871. }
  872. }
  873. m_vars_extra[i].src_data = ptr_data;
  874. }
  875. if (m_is_openmp) {
  876. if (m_vars[i].flags.is_static) {
  877. // Static data is transferred only by omp target
  878. // update construct which passes zeros for
  879. // alloc_if and free_if.
  880. if (m_vars[i].alloc_if || m_vars[i].free_if) {
  881. m_vars[i].direction.bits = c_parameter_nocopy;
  882. }
  883. }
  884. else {
  885. AutoData *auto_data;
  886. if (m_vars[i].alloc_if) {
  887. auto_data = m_device.insert_auto_data(
  888. m_vars[i].ptr, m_vars[i].size);
  889. auto_data->add_reference();
  890. }
  891. else {
  892. // TODO: what should be done if var is not in
  893. // the table?
  894. auto_data = m_device.find_auto_data(
  895. m_vars[i].ptr);
  896. }
  897. // For automatic variables data is transferred
  898. // only if alloc_if == 0 && free_if == 0
  899. // or reference count is 1
  900. if ((m_vars[i].alloc_if || m_vars[i].free_if) &&
  901. auto_data != 0 &&
  902. auto_data->get_reference() != 1) {
  903. m_vars[i].direction.bits = c_parameter_nocopy;
  904. }
  905. // save data for later use
  906. m_vars_extra[i].auto_data = auto_data;
  907. }
  908. }
  909. if (m_vars[i].direction.in &&
  910. !m_vars[i].flags.is_static) {
  911. m_in_datalen += m_vars[i].size;
  912. // for non-static target destination defined as CEAN
  913. // expression we pass to target its size and dist
  914. if (m_vars[i].into == NULL &&
  915. m_vars[i].type.src == c_cean_var) {
  916. m_in_datalen += 2 * sizeof(uint64_t);
  917. }
  918. m_need_runfunction = true;
  919. }
  920. if (m_vars[i].direction.out &&
  921. !m_vars[i].flags.is_static) {
  922. m_out_datalen += m_vars[i].size;
  923. m_need_runfunction = true;
  924. }
  925. }
  926. break;
  927. case c_dv:
  928. if (m_vars[i].direction.bits ||
  929. m_vars[i].alloc_if ||
  930. m_vars[i].free_if) {
  931. ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].ptr);
  932. // debug dump
  933. __dv_desc_dump("IN/OUT", dvp);
  934. // send dope vector contents excluding base
  935. m_in_datalen += m_vars[i].size - sizeof(uint64_t);
  936. m_need_runfunction = true;
  937. }
  938. break;
  939. case c_string_ptr:
  940. if ((m_vars[i].direction.bits ||
  941. m_vars[i].alloc_if ||
  942. m_vars[i].free_if) &&
  943. m_vars[i].size == 0) {
  944. m_vars[i].size = 1;
  945. m_vars[i].count =
  946. strlen(*static_cast<char**>(m_vars[i].ptr)) + 1;
  947. }
  948. /* fallthru */
  949. case c_data_ptr:
  950. if (m_vars[i].flags.is_stack_buf &&
  951. !m_vars[i].direction.bits &&
  952. m_vars[i].alloc_if) {
  953. // this var_desc is for stack buffer
  954. bool is_new;
  955. if (!offload_stack_memory_manager(
  956. stack_addr, entry_id,
  957. m_vars[i].count, m_vars[i].align, &is_new)) {
  958. return false;
  959. }
  960. if (is_new) {
  961. m_compute_buffers.push_back(
  962. m_stack_ptr_data->mic_buf);
  963. m_device.m_persist_list.front().cpu_stack_addr =
  964. static_cast<char*>(m_vars[i].ptr);
  965. }
  966. else {
  967. m_vars[i].flags.sink_addr = 1;
  968. m_in_datalen += sizeof(m_stack_ptr_data->mic_addr);
  969. }
  970. m_vars[i].size = m_destroy_stack.size();
  971. m_vars_extra[i].src_data = m_stack_ptr_data;
  972. // need to add reference for buffer
  973. m_need_runfunction = true;
  974. break;
  975. }
  976. /* fallthru */
  977. case c_cean_var_ptr:
  978. case c_dv_ptr:
  979. if (m_vars[i].type.src == c_cean_var_ptr) {
  980. // array descriptor
  981. const arr_desc *ap =
  982. static_cast<const arr_desc*>(m_vars[i].ptr);
  983. // debug dump
  984. __arr_desc_dump("", "IN/OUT", ap, 1);
  985. // offset and length are derived from the array descriptor
  986. __arr_data_offset_and_length(ap, m_vars[i].disp,
  987. m_vars[i].size);
  988. if (!is_arr_desc_contiguous(ap)) {
  989. m_vars[i].flags.is_noncont_src = 1;
  990. m_vars_extra[i].read_rng_src =
  991. init_read_ranges_arr_desc(ap);
  992. }
  993. // all necessary information about length and offset is
  994. // transferred in var descriptor. There is no need to send
  995. // array descriptor to the target side.
  996. m_vars[i].ptr = reinterpret_cast<void*>(ap->base);
  997. }
  998. else if (m_vars[i].type.src == c_dv_ptr) {
  999. // need to send DV to the device unless it is 'nocopy'
  1000. if (m_vars[i].direction.bits ||
  1001. m_vars[i].alloc_if ||
  1002. m_vars[i].free_if) {
  1003. ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].ptr);
  1004. // debug dump
  1005. __dv_desc_dump("IN/OUT", dvp);
  1006. m_vars[i].direction.bits = c_parameter_in;
  1007. }
  1008. // no displacement
  1009. m_vars[i].disp = 0;
  1010. }
  1011. else {
  1012. // c_data_ptr or c_string_ptr
  1013. m_vars[i].size *= m_vars[i].count;
  1014. m_vars[i].disp = 0;
  1015. }
  1016. if (m_vars[i].direction.bits ||
  1017. m_vars[i].alloc_if ||
  1018. m_vars[i].free_if) {
  1019. PtrData *ptr_data;
  1020. // check that buffer length >= 0
  1021. if (m_vars[i].alloc_if &&
  1022. m_vars[i].disp + m_vars[i].size < 0) {
  1023. LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
  1024. exit(1);
  1025. }
  1026. // base address
  1027. void *base = *static_cast<void**>(m_vars[i].ptr);
  1028. // allocate buffer if we have no INTO and don't need
  1029. // allocation for the ptr at target
  1030. if (src_is_for_mic) {
  1031. if (m_vars[i].flags.is_stack_buf) {
  1032. // for stack persistent objects ptr data is created
  1033. // by var_desc with number 0.
  1034. // Its ptr_data is stored at m_stack_ptr_data
  1035. ptr_data = m_stack_ptr_data;
  1036. m_vars[i].flags.sink_addr = 1;
  1037. }
  1038. else if (m_vars[i].alloc_if) {
  1039. // add new entry
  1040. if (!alloc_ptr_data(
  1041. ptr_data,
  1042. base,
  1043. (alloc_base != NULL) ?
  1044. alloc_disp : m_vars[i].disp,
  1045. (alloc_base != NULL) ?
  1046. alloc_size : m_vars[i].size,
  1047. alloc_disp,
  1048. (alloc_base != NULL) ?
  1049. 0 : m_vars[i].align)) {
  1050. return false;
  1051. }
  1052. if (ptr_data->add_reference() == 0 &&
  1053. ptr_data->mic_buf != 0) {
  1054. // add buffer to the list of buffers that
  1055. // are passed to dispatch call
  1056. m_compute_buffers.push_back(
  1057. ptr_data->mic_buf);
  1058. }
  1059. else {
  1060. // will send buffer address to device
  1061. m_vars[i].flags.sink_addr = 1;
  1062. }
  1063. if (!ptr_data->is_static) {
  1064. // need to add reference for buffer
  1065. m_need_runfunction = true;
  1066. }
  1067. }
  1068. else {
  1069. bool error_if_not_found = true;
  1070. if (m_is_openmp) {
  1071. // For omp target update variable is ignored
  1072. // if it does not exist.
  1073. if (!m_vars[i].alloc_if &&
  1074. !m_vars[i].free_if) {
  1075. error_if_not_found = false;
  1076. }
  1077. }
  1078. // use existing association from pointer table
  1079. if (!find_ptr_data(ptr_data,
  1080. base,
  1081. m_vars[i].disp,
  1082. m_vars[i].size,
  1083. error_if_not_found)) {
  1084. return false;
  1085. }
  1086. if (m_is_openmp) {
  1087. // make var nocopy if it does not exist
  1088. if (ptr_data == 0) {
  1089. m_vars[i].direction.bits =
  1090. c_parameter_nocopy;
  1091. }
  1092. }
  1093. if (ptr_data != 0) {
  1094. m_vars[i].flags.sink_addr = 1;
  1095. }
  1096. }
  1097. if (ptr_data != 0) {
  1098. if (m_is_openmp) {
  1099. // data is transferred only if
  1100. // alloc_if == 0 && free_if == 0
  1101. // or reference count is 1
  1102. if ((m_vars[i].alloc_if ||
  1103. m_vars[i].free_if) &&
  1104. ptr_data->get_reference() != 1) {
  1105. m_vars[i].direction.bits =
  1106. c_parameter_nocopy;
  1107. }
  1108. }
  1109. if (ptr_data->alloc_disp != 0) {
  1110. m_vars[i].flags.alloc_disp = 1;
  1111. m_in_datalen += sizeof(alloc_disp);
  1112. }
  1113. if (m_vars[i].flags.sink_addr) {
  1114. // get buffers's address on the sink
  1115. if (!init_mic_address(ptr_data)) {
  1116. return false;
  1117. }
  1118. m_in_datalen += sizeof(ptr_data->mic_addr);
  1119. }
  1120. if (!ptr_data->is_static && m_vars[i].free_if) {
  1121. // need to decrement buffer reference on target
  1122. m_need_runfunction = true;
  1123. }
  1124. // offset to base from the beginning of the buffer
  1125. // memory
  1126. m_vars[i].offset = (char*) base -
  1127. (char*) ptr_data->cpu_addr.start();
  1128. // copy other pointer properties to var descriptor
  1129. m_vars[i].mic_offset = ptr_data->mic_offset;
  1130. m_vars[i].flags.is_static = ptr_data->is_static;
  1131. }
  1132. }
  1133. else {
  1134. if (!find_ptr_data(ptr_data,
  1135. base,
  1136. m_vars[i].disp,
  1137. m_vars[i].size,
  1138. false)) {
  1139. return false;
  1140. }
  1141. if (ptr_data) {
  1142. m_vars[i].offset =
  1143. (char*) base -
  1144. (char*) ptr_data->cpu_addr.start();
  1145. }
  1146. }
  1147. // save pointer data
  1148. m_vars_extra[i].src_data = ptr_data;
  1149. }
  1150. break;
  1151. case c_func_ptr:
  1152. if (m_vars[i].direction.in) {
  1153. m_in_datalen += __offload_funcs.max_name_length();
  1154. }
  1155. if (m_vars[i].direction.out) {
  1156. m_out_datalen += __offload_funcs.max_name_length();
  1157. }
  1158. m_need_runfunction = true;
  1159. break;
  1160. case c_dv_data:
  1161. case c_dv_ptr_data:
  1162. case c_dv_data_slice:
  1163. case c_dv_ptr_data_slice:
  1164. ArrDesc *dvp;
  1165. if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
  1166. const arr_desc *ap;
  1167. ap = static_cast<const arr_desc*>(m_vars[i].ptr);
  1168. dvp = (m_vars[i].type.src == c_dv_data_slice) ?
  1169. reinterpret_cast<ArrDesc*>(ap->base) :
  1170. *reinterpret_cast<ArrDesc**>(ap->base);
  1171. }
  1172. else {
  1173. dvp = (m_vars[i].type.src == c_dv_data) ?
  1174. static_cast<ArrDesc*>(m_vars[i].ptr) :
  1175. *static_cast<ArrDesc**>(m_vars[i].ptr);
  1176. }
  1177. // if allocatable dope vector isn't allocated don't
  1178. // transfer its data
  1179. if (!__dv_is_allocated(dvp)) {
  1180. m_vars[i].direction.bits = c_parameter_nocopy;
  1181. m_vars[i].alloc_if = 0;
  1182. m_vars[i].free_if = 0;
  1183. }
  1184. if (m_vars[i].direction.bits ||
  1185. m_vars[i].alloc_if ||
  1186. m_vars[i].free_if) {
  1187. const arr_desc *ap;
  1188. if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
  1189. ap = static_cast<const arr_desc*>(m_vars[i].ptr);
  1190. // debug dump
  1191. __arr_desc_dump("", "IN/OUT", ap, 0);
  1192. }
  1193. if (!__dv_is_contiguous(dvp)) {
  1194. m_vars[i].flags.is_noncont_src = 1;
  1195. m_vars_extra[i].read_rng_src =
  1196. init_read_ranges_dv(dvp);
  1197. }
  1198. // size and displacement
  1199. if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src)) {
  1200. // offset and length are derived from the
  1201. // array descriptor
  1202. __arr_data_offset_and_length(ap,
  1203. m_vars[i].disp,
  1204. m_vars[i].size);
  1205. if (m_vars[i].direction.bits) {
  1206. if (!is_arr_desc_contiguous(ap)) {
  1207. if (m_vars[i].flags.is_noncont_src) {
  1208. LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
  1209. return false;
  1210. }
  1211. m_vars[i].flags.is_noncont_src = 1;
  1212. m_vars_extra[i].read_rng_src =
  1213. init_read_ranges_arr_desc(ap);
  1214. }
  1215. }
  1216. }
  1217. else {
  1218. if (m_vars[i].flags.has_length) {
  1219. m_vars[i].size =
  1220. __dv_data_length(dvp, m_vars[i].count);
  1221. }
  1222. else {
  1223. m_vars[i].size = __dv_data_length(dvp);
  1224. }
  1225. m_vars[i].disp = 0;
  1226. }
  1227. // check that length >= 0
  1228. if (m_vars[i].alloc_if &&
  1229. (m_vars[i].disp + m_vars[i].size < 0)) {
  1230. LIBOFFLOAD_ERROR(c_zero_or_neg_ptr_len);
  1231. exit(1);
  1232. }
  1233. // base address
  1234. void *base = reinterpret_cast<void*>(dvp->Base);
  1235. PtrData *ptr_data;
  1236. // allocate buffer if we have no INTO and don't need
  1237. // allocation for the ptr at target
  1238. if (src_is_for_mic) {
  1239. if (m_vars[i].alloc_if) {
  1240. // add new entry
  1241. if (!alloc_ptr_data(
  1242. ptr_data,
  1243. base,
  1244. (alloc_base != NULL) ?
  1245. alloc_disp : m_vars[i].disp,
  1246. (alloc_base != NULL) ?
  1247. alloc_size : m_vars[i].size,
  1248. alloc_disp,
  1249. (alloc_base != NULL) ?
  1250. 0 : m_vars[i].align)) {
  1251. return false;
  1252. }
  1253. if (ptr_data->add_reference() == 0 &&
  1254. ptr_data->mic_buf != 0) {
  1255. // add buffer to the list of buffers
  1256. // that are passed to dispatch call
  1257. m_compute_buffers.push_back(
  1258. ptr_data->mic_buf);
  1259. }
  1260. else {
  1261. // will send buffer address to device
  1262. m_vars[i].flags.sink_addr = 1;
  1263. }
  1264. if (!ptr_data->is_static) {
  1265. // need to add reference for buffer
  1266. m_need_runfunction = true;
  1267. }
  1268. }
  1269. else {
  1270. bool error_if_not_found = true;
  1271. if (m_is_openmp) {
  1272. // For omp target update variable is ignored
  1273. // if it does not exist.
  1274. if (!m_vars[i].alloc_if &&
  1275. !m_vars[i].free_if) {
  1276. error_if_not_found = false;
  1277. }
  1278. }
  1279. // use existing association from pointer table
  1280. if (!find_ptr_data(ptr_data,
  1281. base,
  1282. m_vars[i].disp,
  1283. m_vars[i].size,
  1284. error_if_not_found)) {
  1285. return false;
  1286. }
  1287. if (m_is_openmp) {
  1288. // make var nocopy if it does not exist
  1289. if (ptr_data == 0) {
  1290. m_vars[i].direction.bits =
  1291. c_parameter_nocopy;
  1292. }
  1293. }
  1294. if (ptr_data != 0) {
  1295. // need to update base in dope vector on device
  1296. m_vars[i].flags.sink_addr = 1;
  1297. }
  1298. }
  1299. if (ptr_data != 0) {
  1300. if (m_is_openmp) {
  1301. // data is transferred only if
  1302. // alloc_if == 0 && free_if == 0
  1303. // or reference count is 1
  1304. if ((m_vars[i].alloc_if ||
  1305. m_vars[i].free_if) &&
  1306. ptr_data->get_reference() != 1) {
  1307. m_vars[i].direction.bits =
  1308. c_parameter_nocopy;
  1309. }
  1310. }
  1311. if (ptr_data->alloc_disp != 0) {
  1312. m_vars[i].flags.alloc_disp = 1;
  1313. m_in_datalen += sizeof(alloc_disp);
  1314. }
  1315. if (m_vars[i].flags.sink_addr) {
  1316. // get buffers's address on the sink
  1317. if (!init_mic_address(ptr_data)) {
  1318. return false;
  1319. }
  1320. m_in_datalen += sizeof(ptr_data->mic_addr);
  1321. }
  1322. if (!ptr_data->is_static && m_vars[i].free_if) {
  1323. // need to decrement buffer reference on target
  1324. m_need_runfunction = true;
  1325. }
  1326. // offset to base from the beginning of the buffer
  1327. // memory
  1328. m_vars[i].offset =
  1329. (char*) base -
  1330. (char*) ptr_data->cpu_addr.start();
  1331. // copy other pointer properties to var descriptor
  1332. m_vars[i].mic_offset = ptr_data->mic_offset;
  1333. m_vars[i].flags.is_static = ptr_data->is_static;
  1334. }
  1335. }
  1336. else { // !src_is_for_mic
  1337. if (!find_ptr_data(ptr_data,
  1338. base,
  1339. m_vars[i].disp,
  1340. m_vars[i].size,
  1341. false)) {
  1342. return false;
  1343. }
  1344. m_vars[i].offset = !ptr_data ? 0 :
  1345. (char*) base -
  1346. (char*) ptr_data->cpu_addr.start();
  1347. }
  1348. // save pointer data
  1349. m_vars_extra[i].src_data = ptr_data;
  1350. }
  1351. break;
  1352. default:
  1353. LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
  1354. LIBOFFLOAD_ABORT;
  1355. }
  1356. if (m_vars[i].type.src == c_data_ptr_array) {
  1357. continue;
  1358. }
  1359. if (src_is_for_mic && m_vars[i].flags.is_stack_buf) {
  1360. m_vars[i].offset = static_cast<char*>(m_vars[i].ptr) -
  1361. m_device.m_persist_list.front().cpu_stack_addr;
  1362. }
  1363. // if source is used at CPU save its offset and disp
  1364. if (m_vars[i].into == NULL || m_vars[i].direction.in) {
  1365. m_vars_extra[i].cpu_offset = m_vars[i].offset;
  1366. m_vars_extra[i].cpu_disp = m_vars[i].disp;
  1367. }
  1368. // If "into" is define we need to do the similar work for it
  1369. if (!m_vars[i].into) {
  1370. continue;
  1371. }
  1372. int64_t into_disp =0, into_offset = 0;
  1373. switch (m_vars[i].type.dst) {
  1374. case c_data_ptr_array:
  1375. break;
  1376. case c_data:
  1377. case c_void_ptr:
  1378. case c_cean_var: {
  1379. int64_t size = m_vars[i].size;
  1380. if (m_vars[i].type.dst == c_cean_var) {
  1381. // array descriptor
  1382. const arr_desc *ap =
  1383. static_cast<const arr_desc*>(m_vars[i].into);
  1384. // debug dump
  1385. __arr_desc_dump(" ", "INTO", ap, 0);
  1386. // offset and length are derived from the array descriptor
  1387. __arr_data_offset_and_length(ap, into_disp, size);
  1388. if (!is_arr_desc_contiguous(ap)) {
  1389. m_vars[i].flags.is_noncont_dst = 1;
  1390. m_vars_extra[i].read_rng_dst =
  1391. init_read_ranges_arr_desc(ap);
  1392. if (!cean_ranges_match(
  1393. m_vars_extra[i].read_rng_src,
  1394. m_vars_extra[i].read_rng_dst)) {
  1395. LIBOFFLOAD_ERROR(c_ranges_dont_match);
  1396. exit(1);
  1397. }
  1398. }
  1399. m_vars[i].into = reinterpret_cast<void*>(ap->base);
  1400. }
  1401. int64_t size_src = m_vars_extra[i].read_rng_src ?
  1402. cean_get_transf_size(m_vars_extra[i].read_rng_src) :
  1403. m_vars[i].size;
  1404. int64_t size_dst = m_vars_extra[i].read_rng_dst ?
  1405. cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
  1406. size;
  1407. // It's supposed that "into" size must be not less
  1408. // than src size
  1409. if (size_src > size_dst) {
  1410. LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
  1411. size_src, size_dst);
  1412. exit(1);
  1413. }
  1414. if (m_vars[i].direction.bits) {
  1415. if (m_vars[i].flags.is_static_dstn) {
  1416. PtrData *ptr_data;
  1417. // find data associated with variable
  1418. if (!find_ptr_data(ptr_data, m_vars[i].into,
  1419. into_disp, size, false)) {
  1420. return false;
  1421. }
  1422. if (ptr_data != 0) {
  1423. // offset to base from the beginning of the buffer
  1424. // memory
  1425. into_offset =
  1426. (char*) m_vars[i].into -
  1427. (char*) ptr_data->cpu_addr.start();
  1428. }
  1429. else {
  1430. m_vars[i].flags.is_static_dstn = false;
  1431. }
  1432. m_vars_extra[i].dst_data = ptr_data;
  1433. }
  1434. }
  1435. if (m_vars[i].direction.in &&
  1436. !m_vars[i].flags.is_static_dstn) {
  1437. m_in_datalen += m_vars[i].size;
  1438. // for non-static target destination defined as CEAN
  1439. // expression we pass to target its size and dist
  1440. if (m_vars[i].type.dst == c_cean_var) {
  1441. m_in_datalen += 2 * sizeof(uint64_t);
  1442. }
  1443. m_need_runfunction = true;
  1444. }
  1445. break;
  1446. }
  1447. case c_dv:
  1448. if (m_vars[i].direction.bits ||
  1449. m_vars[i].alloc_if ||
  1450. m_vars[i].free_if) {
  1451. ArrDesc *dvp = static_cast<ArrDesc*>(m_vars[i].into);
  1452. // debug dump
  1453. __dv_desc_dump("INTO", dvp);
  1454. // send dope vector contents excluding base
  1455. m_in_datalen += m_vars[i].size - sizeof(uint64_t);
  1456. m_need_runfunction = true;
  1457. }
  1458. break;
  1459. case c_string_ptr:
  1460. case c_data_ptr:
  1461. case c_cean_var_ptr:
  1462. case c_dv_ptr: {
  1463. int64_t size = m_vars[i].size;
  1464. if (m_vars[i].type.dst == c_cean_var_ptr) {
  1465. // array descriptor
  1466. const arr_desc *ap =
  1467. static_cast<const arr_desc*>(m_vars[i].into);
  1468. // debug dump
  1469. __arr_desc_dump(" ", "INTO", ap, 1);
  1470. // offset and length are derived from the array descriptor
  1471. __arr_data_offset_and_length(ap, into_disp, size);
  1472. if (!is_arr_desc_contiguous(ap)) {
  1473. m_vars[i].flags.is_noncont_src = 1;
  1474. m_vars_extra[i].read_rng_dst =
  1475. init_read_ranges_arr_desc(ap);
  1476. if (!cean_ranges_match(
  1477. m_vars_extra[i].read_rng_src,
  1478. m_vars_extra[i].read_rng_dst)) {
  1479. LIBOFFLOAD_ERROR(c_ranges_dont_match);
  1480. }
  1481. }
  1482. m_vars[i].into = reinterpret_cast<char**>(ap->base);
  1483. }
  1484. else if (m_vars[i].type.dst == c_dv_ptr) {
  1485. // need to send DV to the device unless it is 'nocopy'
  1486. if (m_vars[i].direction.bits ||
  1487. m_vars[i].alloc_if ||
  1488. m_vars[i].free_if) {
  1489. ArrDesc *dvp = *static_cast<ArrDesc**>(m_vars[i].into);
  1490. // debug dump
  1491. __dv_desc_dump("INTO", dvp);
  1492. m_vars[i].direction.bits = c_parameter_in;
  1493. }
  1494. }
  1495. int64_t size_src = m_vars_extra[i].read_rng_src ?
  1496. cean_get_transf_size(m_vars_extra[i].read_rng_src) :
  1497. m_vars[i].size;
  1498. int64_t size_dst = m_vars_extra[i].read_rng_dst ?
  1499. cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
  1500. size;
  1501. // It's supposed that "into" size must be not less than
  1502. // src size
  1503. if (size_src > size_dst) {
  1504. LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
  1505. size_src, size_dst);
  1506. exit(1);
  1507. }
  1508. if (m_vars[i].direction.bits) {
  1509. PtrData *ptr_data;
  1510. // base address
  1511. void *base = *static_cast<void**>(m_vars[i].into);
  1512. if (m_vars[i].direction.in) {
  1513. // allocate buffer
  1514. if (m_vars[i].flags.is_stack_buf) {
  1515. // for stack persistent objects ptr data is created
  1516. // by var_desc with number 0.
  1517. // Its ptr_data is stored at m_stack_ptr_data
  1518. ptr_data = m_stack_ptr_data;
  1519. m_vars[i].flags.sink_addr = 1;
  1520. }
  1521. else if (m_vars[i].alloc_if) {
  1522. // add new entry
  1523. if (!alloc_ptr_data(
  1524. ptr_data,
  1525. base,
  1526. (alloc_base != NULL) ?
  1527. alloc_disp : into_disp,
  1528. (alloc_base != NULL) ?
  1529. alloc_size : size,
  1530. alloc_disp,
  1531. (alloc_base != NULL) ?
  1532. 0 : m_vars[i].align)) {
  1533. return false;
  1534. }
  1535. if (ptr_data->add_reference() == 0 &&
  1536. ptr_data->mic_buf != 0) {
  1537. // add buffer to the list of buffers that
  1538. // are passed to dispatch call
  1539. m_compute_buffers.push_back(
  1540. ptr_data->mic_buf);
  1541. }
  1542. else {
  1543. // will send buffer address to device
  1544. m_vars[i].flags.sink_addr = 1;
  1545. }
  1546. if (!ptr_data->is_static) {
  1547. // need to add reference for buffer
  1548. m_need_runfunction = true;
  1549. }
  1550. }
  1551. else {
  1552. // use existing association from pointer table
  1553. if (!find_ptr_data(ptr_data, base, into_disp, size)) {
  1554. return false;
  1555. }
  1556. m_vars[i].flags.sink_addr = 1;
  1557. }
  1558. if (ptr_data->alloc_disp != 0) {
  1559. m_vars[i].flags.alloc_disp = 1;
  1560. m_in_datalen += sizeof(alloc_disp);
  1561. }
  1562. if (m_vars[i].flags.sink_addr) {
  1563. // get buffers's address on the sink
  1564. if (!init_mic_address(ptr_data)) {
  1565. return false;
  1566. }
  1567. m_in_datalen += sizeof(ptr_data->mic_addr);
  1568. }
  1569. if (!ptr_data->is_static && m_vars[i].free_if) {
  1570. // need to decrement buffer reference on target
  1571. m_need_runfunction = true;
  1572. }
  1573. // copy other pointer properties to var descriptor
  1574. m_vars[i].mic_offset = ptr_data->mic_offset;
  1575. m_vars[i].flags.is_static_dstn = ptr_data->is_static;
  1576. }
  1577. else {
  1578. if (!find_ptr_data(ptr_data,
  1579. base,
  1580. into_disp,
  1581. m_vars[i].size,
  1582. false)) {
  1583. return false;
  1584. }
  1585. }
  1586. if (ptr_data) {
  1587. into_offset = ptr_data ?
  1588. (char*) base -
  1589. (char*) ptr_data->cpu_addr.start() :
  1590. 0;
  1591. }
  1592. // save pointer data
  1593. m_vars_extra[i].dst_data = ptr_data;
  1594. }
  1595. break;
  1596. }
  1597. case c_func_ptr:
  1598. break;
  1599. case c_dv_data:
  1600. case c_dv_ptr_data:
  1601. case c_dv_data_slice:
  1602. case c_dv_ptr_data_slice:
  1603. if (m_vars[i].direction.bits ||
  1604. m_vars[i].alloc_if ||
  1605. m_vars[i].free_if) {
  1606. const arr_desc *ap;
  1607. ArrDesc *dvp;
  1608. PtrData *ptr_data;
  1609. int64_t disp;
  1610. int64_t size;
  1611. if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
  1612. ap = static_cast<const arr_desc*>(m_vars[i].into);
  1613. // debug dump
  1614. __arr_desc_dump(" ", "INTO", ap, 0);
  1615. dvp = (m_vars[i].type.dst == c_dv_data_slice) ?
  1616. reinterpret_cast<ArrDesc*>(ap->base) :
  1617. *reinterpret_cast<ArrDesc**>(ap->base);
  1618. }
  1619. else {
  1620. dvp = (m_vars[i].type.dst == c_dv_data) ?
  1621. static_cast<ArrDesc*>(m_vars[i].into) :
  1622. *static_cast<ArrDesc**>(m_vars[i].into);
  1623. }
  1624. if (!__dv_is_contiguous(dvp)) {
  1625. m_vars[i].flags.is_noncont_dst = 1;
  1626. m_vars_extra[i].read_rng_dst =
  1627. init_read_ranges_dv(dvp);
  1628. }
  1629. // size and displacement
  1630. if (VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
  1631. // offset and length are derived from the array
  1632. // descriptor
  1633. __arr_data_offset_and_length(ap, into_disp, size);
  1634. if (m_vars[i].direction.bits) {
  1635. if (!is_arr_desc_contiguous(ap)) {
  1636. if (m_vars[i].flags.is_noncont_dst) {
  1637. LIBOFFLOAD_ERROR(c_slice_of_noncont_array);
  1638. return false;
  1639. }
  1640. m_vars[i].flags.is_noncont_dst = 1;
  1641. m_vars_extra[i].read_rng_dst =
  1642. init_read_ranges_arr_desc(ap);
  1643. if (!cean_ranges_match(
  1644. m_vars_extra[i].read_rng_src,
  1645. m_vars_extra[i].read_rng_dst)) {
  1646. LIBOFFLOAD_ERROR(c_ranges_dont_match);
  1647. }
  1648. }
  1649. }
  1650. }
  1651. else {
  1652. if (m_vars[i].flags.has_length) {
  1653. size = __dv_data_length(dvp, m_vars[i].count);
  1654. }
  1655. else {
  1656. size = __dv_data_length(dvp);
  1657. }
  1658. disp = 0;
  1659. }
  1660. int64_t size_src =
  1661. m_vars_extra[i].read_rng_src ?
  1662. cean_get_transf_size(m_vars_extra[i].read_rng_src) :
  1663. m_vars[i].size;
  1664. int64_t size_dst =
  1665. m_vars_extra[i].read_rng_dst ?
  1666. cean_get_transf_size(m_vars_extra[i].read_rng_dst) :
  1667. size;
  1668. // It's supposed that "into" size must be not less
  1669. // than src size
  1670. if (size_src > size_dst) {
  1671. LIBOFFLOAD_ERROR(c_different_src_and_dstn_sizes,
  1672. size_src, size_dst);
  1673. exit(1);
  1674. }
  1675. // base address
  1676. void *base = reinterpret_cast<void*>(dvp->Base);
  1677. // allocate buffer
  1678. if (m_vars[i].direction.in) {
  1679. if (m_vars[i].alloc_if) {
  1680. // add new entry
  1681. if (!alloc_ptr_data(
  1682. ptr_data,
  1683. base,
  1684. (alloc_base != NULL) ?
  1685. alloc_disp : into_disp,
  1686. (alloc_base != NULL) ?
  1687. alloc_size : size,
  1688. alloc_disp,
  1689. (alloc_base != NULL) ?
  1690. 0 : m_vars[i].align)) {
  1691. return false;
  1692. }
  1693. if (ptr_data->add_reference() == 0 &&
  1694. ptr_data->mic_buf !=0) {
  1695. // add buffer to the list of buffers
  1696. // that are passed to dispatch call
  1697. m_compute_buffers.push_back(
  1698. ptr_data->mic_buf);
  1699. }
  1700. else {
  1701. // will send buffer address to device
  1702. m_vars[i].flags.sink_addr = 1;
  1703. }
  1704. if (!ptr_data->is_static) {
  1705. // need to add reference for buffer
  1706. m_need_runfunction = true;
  1707. }
  1708. }
  1709. else {
  1710. // use existing association from pointer table
  1711. if (!find_ptr_data(ptr_data, base, into_disp, size)) {
  1712. return false;
  1713. }
  1714. // need to update base in dope vector on device
  1715. m_vars[i].flags.sink_addr = 1;
  1716. }
  1717. if (ptr_data->alloc_disp != 0) {
  1718. m_vars[i].flags.alloc_disp = 1;
  1719. m_in_datalen += sizeof(alloc_disp);
  1720. }
  1721. if (m_vars[i].flags.sink_addr) {
  1722. // get buffers's address on the sink
  1723. if (!init_mic_address(ptr_data)) {
  1724. return false;
  1725. }
  1726. m_in_datalen += sizeof(ptr_data->mic_addr);
  1727. }
  1728. if (!ptr_data->is_static && m_vars[i].free_if) {
  1729. // need to decrement buffer reference on target
  1730. m_need_runfunction = true;
  1731. }
  1732. // offset to base from the beginning of the buffer
  1733. // memory
  1734. into_offset =
  1735. (char*) base - (char*) ptr_data->cpu_addr.start();
  1736. // copy other pointer properties to var descriptor
  1737. m_vars[i].mic_offset = ptr_data->mic_offset;
  1738. m_vars[i].flags.is_static_dstn = ptr_data->is_static;
  1739. }
  1740. else { // src_is_for_mic
  1741. if (!find_ptr_data(ptr_data,
  1742. base,
  1743. into_disp,
  1744. size,
  1745. false)) {
  1746. return false;
  1747. }
  1748. into_offset = !ptr_data ?
  1749. 0 :
  1750. (char*) base - (char*) ptr_data->cpu_addr.start();
  1751. }
  1752. // save pointer data
  1753. m_vars_extra[i].dst_data = ptr_data;
  1754. }
  1755. break;
  1756. default:
  1757. LIBOFFLOAD_ERROR(c_unknown_var_type, m_vars[i].type.src);
  1758. LIBOFFLOAD_ABORT;
  1759. }
  1760. // if into is used at CPU save its offset and disp
  1761. if (m_vars[i].direction.out) {
  1762. m_vars_extra[i].cpu_offset = into_offset;
  1763. m_vars_extra[i].cpu_disp = into_disp;
  1764. }
  1765. else {
  1766. if (m_vars[i].flags.is_stack_buf) {
  1767. into_offset = static_cast<char*>(m_vars[i].into) -
  1768. m_device.m_persist_list.front().cpu_stack_addr;
  1769. }
  1770. m_vars[i].offset = into_offset;
  1771. m_vars[i].disp = into_disp;
  1772. }
  1773. }
  1774. return true;
  1775. }
  1776. bool OffloadDescriptor::setup_misc_data(const char *name)
  1777. {
  1778. OffloadTimer timer(get_timer_data(), c_offload_host_setup_misc_data);
  1779. // we can skip run functon call together with wait if offloaded
  1780. // region is empty and there is no user defined non-pointer IN/OUT data
  1781. if (m_need_runfunction) {
  1782. // variable descriptors are sent as input data
  1783. m_in_datalen += m_vars_total * sizeof(VarDesc);
  1784. // timer data is sent as a part of the output data
  1785. m_out_datalen += OFFLOAD_TIMER_DATALEN();
  1786. // max from input data and output data length
  1787. uint64_t data_len = m_in_datalen > m_out_datalen ? m_in_datalen :
  1788. m_out_datalen;
  1789. // Misc data has the following layout
  1790. // <Function Descriptor>
  1791. // <Function Name>
  1792. // <In/Out Data> (optional)
  1793. //
  1794. // We can transfer copyin/copyout data in misc/return data which can
  1795. // be passed to run function call if its size does not exceed
  1796. // COI_PIPELINE_MAX_IN_MISC_DATA_LEN. Otherwise we have to allocate
  1797. // buffer for it.
  1798. m_func_desc_size = sizeof(FunctionDescriptor) + strlen(name) + 1;
  1799. m_func_desc_size = (m_func_desc_size + 7) & ~7;
  1800. int misc_data_offset = 0;
  1801. int misc_data_size = 0;
  1802. if (data_len > 0) {
  1803. if (m_func_desc_size +
  1804. m_in_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN &&
  1805. m_out_datalen <= COI_PIPELINE_MAX_IN_MISC_DATA_LEN) {
  1806. // use misc/return data for copyin/copyout
  1807. misc_data_offset = m_func_desc_size;
  1808. misc_data_size = data_len;
  1809. }
  1810. else {
  1811. OffloadTimer timer_buf(get_timer_data(),
  1812. c_offload_host_alloc_data_buffer);
  1813. // send/receive data using buffer
  1814. COIRESULT res = COI::BufferCreate(data_len,
  1815. COI_BUFFER_NORMAL,
  1816. 0, 0,
  1817. 1, &m_device.get_process(),
  1818. &m_inout_buf);
  1819. if (res != COI_SUCCESS) {
  1820. if (m_status != 0) {
  1821. m_status->result = translate_coi_error(res);
  1822. return false;
  1823. }
  1824. report_coi_error(c_buf_create, res);
  1825. }
  1826. m_compute_buffers.push_back(m_inout_buf);
  1827. m_destroy_buffers.push_back(m_inout_buf);
  1828. }
  1829. }
  1830. // initialize function descriptor
  1831. m_func_desc = (FunctionDescriptor*) malloc(m_func_desc_size +
  1832. misc_data_size);
  1833. if (m_func_desc == NULL)
  1834. LIBOFFLOAD_ERROR(c_malloc);
  1835. m_func_desc->console_enabled = console_enabled;
  1836. m_func_desc->timer_enabled =
  1837. timer_enabled || (offload_report_level && offload_report_enabled);
  1838. m_func_desc->offload_report_level = offload_report_level;
  1839. m_func_desc->offload_number = GET_OFFLOAD_NUMBER(get_timer_data());
  1840. m_func_desc->in_datalen = m_in_datalen;
  1841. m_func_desc->out_datalen = m_out_datalen;
  1842. m_func_desc->vars_num = m_vars_total;
  1843. m_func_desc->data_offset = misc_data_offset;
  1844. // append entry name
  1845. strcpy(m_func_desc->data, name);
  1846. }
  1847. return true;
  1848. }
  1849. bool OffloadDescriptor::wait_dependencies(
  1850. const void **waits,
  1851. int num_waits
  1852. )
  1853. {
  1854. OffloadTimer timer(get_timer_data(), c_offload_host_wait_deps);
  1855. bool ret = true;
  1856. for (int i = 0; i < num_waits; i++) {
  1857. OffloadDescriptor *task = m_device.find_signal(waits[i], true);
  1858. if (task == 0) {
  1859. LIBOFFLOAD_ERROR(c_offload1, m_device.get_logical_index(),
  1860. waits[i]);
  1861. LIBOFFLOAD_ABORT;
  1862. }
  1863. if (!task->offload_finish()) {
  1864. ret = false;
  1865. }
  1866. task->cleanup();
  1867. delete task;
  1868. }
  1869. return ret;
  1870. }
  1871. bool OffloadDescriptor::offload(
  1872. const char *name,
  1873. bool is_empty,
  1874. VarDesc *vars,
  1875. VarDesc2 *vars2,
  1876. int vars_total,
  1877. const void **waits,
  1878. int num_waits,
  1879. const void **signal,
  1880. int entry_id,
  1881. const void *stack_addr
  1882. )
  1883. {
  1884. if (signal == 0) {
  1885. OFFLOAD_DEBUG_TRACE_1(1,
  1886. GET_OFFLOAD_NUMBER(get_timer_data()),
  1887. c_offload_init_func,
  1888. "Offload function %s, is_empty=%d, #varDescs=%d, "
  1889. "#waits=%d, signal=none\n",
  1890. name, is_empty, vars_total, num_waits);
  1891. OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
  1892. c_offload_sent_pointer_data,
  1893. "#Wait : %d \n", num_waits);
  1894. OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
  1895. c_offload_signal,
  1896. "none %d\n", 0);
  1897. }
  1898. else {
  1899. OFFLOAD_DEBUG_TRACE_1(1,
  1900. GET_OFFLOAD_NUMBER(get_timer_data()),
  1901. c_offload_init_func,
  1902. "Offload function %s, is_empty=%d, #varDescs=%d, "
  1903. "#waits=%d, signal=%p\n",
  1904. name, is_empty, vars_total, num_waits,
  1905. *signal);
  1906. OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
  1907. c_offload_signal,
  1908. "%d\n", signal);
  1909. }
  1910. OFFLOAD_REPORT(3, GET_OFFLOAD_NUMBER(get_timer_data()),
  1911. c_offload_wait,
  1912. "#Wait : %d %p\n", num_waits, waits);
  1913. if (m_status != 0) {
  1914. m_status->result = OFFLOAD_SUCCESS;
  1915. m_status->device_number = m_device.get_logical_index();
  1916. }
  1917. m_need_runfunction = !is_empty;
  1918. // wait for dependencies to finish
  1919. if (!wait_dependencies(waits, num_waits)) {
  1920. cleanup();
  1921. return false;
  1922. }
  1923. // setup buffers
  1924. if (!setup_descriptors(vars, vars2, vars_total, entry_id, stack_addr)) {
  1925. cleanup();
  1926. return false;
  1927. }
  1928. // initiate send for pointers. Want to do it as early as possible.
  1929. if (!send_pointer_data(signal != 0)) {
  1930. cleanup();
  1931. return false;
  1932. }
  1933. // setup misc data for run function
  1934. if (!setup_misc_data(name)) {
  1935. cleanup();
  1936. return false;
  1937. }
  1938. // gather copyin data into buffer
  1939. if (!gather_copyin_data()) {
  1940. cleanup();
  1941. return false;
  1942. }
  1943. // Start the computation
  1944. if (!compute()) {
  1945. cleanup();
  1946. return false;
  1947. }
  1948. // initiate receive for pointers
  1949. if (!receive_pointer_data(signal != 0)) {
  1950. cleanup();
  1951. return false;
  1952. }
  1953. // if there is a signal save descriptor for the later use.
  1954. if (signal != 0) {
  1955. m_device.add_signal(*signal, this);
  1956. return true;
  1957. }
  1958. // wait for the offload to finish.
  1959. if (!offload_finish()) {
  1960. cleanup();
  1961. return false;
  1962. }
  1963. cleanup();
  1964. return true;
  1965. }
  1966. bool OffloadDescriptor::offload_finish()
  1967. {
  1968. COIRESULT res;
  1969. // wait for compute dependencies to become signaled
  1970. if (m_in_deps_total > 0) {
  1971. OffloadTimer timer(get_timer_data(), c_offload_host_wait_compute);
  1972. if (__offload_active_wait) {
  1973. // keep CPU busy
  1974. do {
  1975. res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
  1976. }
  1977. while (res == COI_TIME_OUT_REACHED);
  1978. }
  1979. else {
  1980. res = COI::EventWait(m_in_deps_total, m_in_deps, -1, 1, 0, 0);
  1981. }
  1982. if (res != COI_SUCCESS) {
  1983. if (m_status != 0) {
  1984. m_status->result = translate_coi_error(res);
  1985. return false;
  1986. }
  1987. report_coi_error(c_event_wait, res);
  1988. }
  1989. }
  1990. // scatter copyout data received from target
  1991. if (!scatter_copyout_data()) {
  1992. return false;
  1993. }
  1994. // wait for receive dependencies to become signaled
  1995. if (m_out_deps_total > 0) {
  1996. OffloadTimer timer(get_timer_data(), c_offload_host_wait_buffers_reads);
  1997. if (__offload_active_wait) {
  1998. // keep CPU busy
  1999. do {
  2000. res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
  2001. }
  2002. while (res == COI_TIME_OUT_REACHED);
  2003. }
  2004. else {
  2005. res = COI::EventWait(m_out_deps_total, m_out_deps, -1, 1, 0, 0);
  2006. }
  2007. if (res != COI_SUCCESS) {
  2008. if (m_status != 0) {
  2009. m_status->result = translate_coi_error(res);
  2010. return false;
  2011. }
  2012. report_coi_error(c_event_wait, res);
  2013. }
  2014. }
  2015. // destroy buffers
  2016. {
  2017. OffloadTimer timer(get_timer_data(), c_offload_host_destroy_buffers);
  2018. for (BufferList::const_iterator it = m_destroy_buffers.begin();
  2019. it != m_destroy_buffers.end(); it++) {
  2020. res = COI::BufferDestroy(*it);
  2021. if (res != COI_SUCCESS) {
  2022. if (m_status != 0) {
  2023. m_status->result = translate_coi_error(res);
  2024. return false;
  2025. }
  2026. report_coi_error(c_buf_destroy, res);
  2027. }
  2028. }
  2029. }
  2030. return true;
  2031. }
  2032. void OffloadDescriptor::cleanup()
  2033. {
  2034. // release device in orsl
  2035. ORSL::release(m_device.get_logical_index());
  2036. OFFLOAD_TIMER_STOP(get_timer_data(), c_offload_host_total_offload);
  2037. // report stuff
  2038. Offload_Report_Epilog(get_timer_data());
  2039. }
  2040. bool OffloadDescriptor::is_signaled()
  2041. {
  2042. bool signaled = true;
  2043. COIRESULT res;
  2044. // check compute and receive dependencies
  2045. if (m_in_deps_total > 0) {
  2046. res = COI::EventWait(m_in_deps_total, m_in_deps, 0, 1, 0, 0);
  2047. signaled = signaled && (res == COI_SUCCESS);
  2048. }
  2049. if (m_out_deps_total > 0) {
  2050. res = COI::EventWait(m_out_deps_total, m_out_deps, 0, 1, 0, 0);
  2051. signaled = signaled && (res == COI_SUCCESS);
  2052. }
  2053. return signaled;
  2054. }
  2055. // Send pointer data if source or destination or both of them are
  2056. // noncontiguous. There is guarantee that length of destination enough for
  2057. // transfered data.
  2058. bool OffloadDescriptor::send_noncontiguous_pointer_data(
  2059. int i,
  2060. PtrData* src_data,
  2061. PtrData* dst_data,
  2062. COIEVENT *event
  2063. )
  2064. {
  2065. int64_t offset_src, offset_dst;
  2066. int64_t length_src, length_dst;
  2067. int64_t length_src_cur, length_dst_cur;
  2068. int64_t send_size, data_sent = 0;
  2069. COIRESULT res;
  2070. bool dst_is_empty = true;
  2071. bool src_is_empty = true;
  2072. // Set length_src and length_dst
  2073. length_src = (m_vars_extra[i].read_rng_src) ?
  2074. m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
  2075. length_dst = !m_vars[i].into ? length_src :
  2076. (m_vars_extra[i].read_rng_dst) ?
  2077. m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
  2078. send_size = (length_src < length_dst) ? length_src : length_dst;
  2079. // consequently get contiguous ranges,
  2080. // define corresponded destination offset and send data
  2081. do {
  2082. if (src_is_empty) {
  2083. if (m_vars_extra[i].read_rng_src) {
  2084. if (!get_next_range(m_vars_extra[i].read_rng_src,
  2085. &offset_src)) {
  2086. // source ranges are over - nothing to send
  2087. break;
  2088. }
  2089. }
  2090. else if (data_sent == 0) {
  2091. offset_src = m_vars_extra[i].cpu_disp;
  2092. }
  2093. else {
  2094. break;
  2095. }
  2096. length_src_cur = length_src;
  2097. }
  2098. else {
  2099. // if source is contiguous or its contiguous range is greater
  2100. // than destination one
  2101. offset_src += send_size;
  2102. }
  2103. length_src_cur -= send_size;
  2104. src_is_empty = length_src_cur == 0;
  2105. if (dst_is_empty) {
  2106. if (m_vars[i].into) {
  2107. if (m_vars_extra[i].read_rng_dst) {
  2108. if (!get_next_range(m_vars_extra[i].read_rng_dst,
  2109. &offset_dst)) {
  2110. // destination ranges are over
  2111. LIBOFFLOAD_ERROR(c_destination_is_over);
  2112. return false;
  2113. }
  2114. }
  2115. // into is contiguous.
  2116. else {
  2117. offset_dst = m_vars[i].disp;
  2118. }
  2119. length_dst_cur = length_dst;
  2120. }
  2121. // same as source
  2122. else {
  2123. offset_dst = offset_src;
  2124. length_dst_cur = length_src;
  2125. }
  2126. }
  2127. else {
  2128. // if destination is contiguous or its contiguous range is greater
  2129. // than source one
  2130. offset_dst += send_size;
  2131. }
  2132. length_dst_cur -= send_size;
  2133. dst_is_empty = length_dst_cur == 0;
  2134. if (src_data != 0 && src_data->cpu_buf != 0) {
  2135. res = COI::BufferCopy(
  2136. dst_data->mic_buf,
  2137. src_data->cpu_buf,
  2138. m_vars[i].mic_offset - dst_data->alloc_disp +
  2139. m_vars[i].offset + offset_dst,
  2140. m_vars_extra[i].cpu_offset + offset_src,
  2141. send_size,
  2142. COI_COPY_UNSPECIFIED,
  2143. 0, 0,
  2144. event);
  2145. if (res != COI_SUCCESS) {
  2146. if (m_status != 0) {
  2147. m_status->result = translate_coi_error(res);
  2148. return false;
  2149. }
  2150. report_coi_error(c_buf_copy, res);
  2151. }
  2152. }
  2153. else {
  2154. char *base = offload_get_src_base(m_vars[i].ptr,
  2155. m_vars[i].type.src);
  2156. res = COI::BufferWrite(
  2157. dst_data->mic_buf,
  2158. m_vars[i].mic_offset - dst_data->alloc_disp +
  2159. m_vars[i].offset + offset_dst,
  2160. base + offset_src,
  2161. send_size,
  2162. COI_COPY_UNSPECIFIED,
  2163. 0, 0,
  2164. event);
  2165. if (res != COI_SUCCESS) {
  2166. if (m_status != 0) {
  2167. m_status->result = translate_coi_error(res);
  2168. return false;
  2169. }
  2170. report_coi_error(c_buf_write, res);
  2171. }
  2172. }
  2173. data_sent += length_src;
  2174. }
  2175. while (true);
  2176. return true;
  2177. }
  2178. bool OffloadDescriptor::send_pointer_data(bool is_async)
  2179. {
  2180. OffloadTimer timer(get_timer_data(), c_offload_host_send_pointers);
  2181. uint64_t ptr_sent = 0;
  2182. COIRESULT res;
  2183. // Initiate send for pointer data
  2184. for (int i = 0; i < m_vars_total; i++) {
  2185. switch (m_vars[i].type.dst) {
  2186. case c_data_ptr_array:
  2187. break;
  2188. case c_data:
  2189. case c_void_ptr:
  2190. case c_cean_var:
  2191. if (m_vars[i].direction.in &&
  2192. m_vars[i].flags.is_static_dstn) {
  2193. COIEVENT *event =
  2194. (is_async ||
  2195. m_vars[i].size >= __offload_use_async_buffer_write) ?
  2196. &m_in_deps[m_in_deps_total++] : 0;
  2197. PtrData* dst_data = m_vars[i].into ?
  2198. m_vars_extra[i].dst_data :
  2199. m_vars_extra[i].src_data;
  2200. PtrData* src_data =
  2201. VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
  2202. VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
  2203. m_vars[i].flags.is_static ?
  2204. m_vars_extra[i].src_data : 0;
  2205. if (m_vars[i].flags.is_noncont_src ||
  2206. m_vars[i].flags.is_noncont_dst) {
  2207. if (!send_noncontiguous_pointer_data(
  2208. i, src_data, dst_data, event)) {
  2209. return false;
  2210. }
  2211. }
  2212. else if (src_data != 0 && src_data->cpu_buf != 0) {
  2213. res = COI::BufferCopy(
  2214. dst_data->mic_buf,
  2215. src_data->cpu_buf,
  2216. m_vars[i].mic_offset - dst_data->alloc_disp +
  2217. m_vars[i].offset + m_vars[i].disp,
  2218. m_vars_extra[i].cpu_offset +
  2219. m_vars_extra[i].cpu_disp,
  2220. m_vars[i].size,
  2221. COI_COPY_UNSPECIFIED,
  2222. 0, 0,
  2223. event);
  2224. if (res != COI_SUCCESS) {
  2225. if (m_status != 0) {
  2226. m_status->result = translate_coi_error(res);
  2227. return false;
  2228. }
  2229. report_coi_error(c_buf_copy, res);
  2230. }
  2231. }
  2232. else {
  2233. char *base = offload_get_src_base(m_vars[i].ptr,
  2234. m_vars[i].type.src);
  2235. res = COI::BufferWrite(
  2236. dst_data->mic_buf,
  2237. m_vars[i].mic_offset - dst_data->alloc_disp +
  2238. m_vars[i].offset + m_vars[i].disp,
  2239. base + m_vars_extra[i].cpu_disp,
  2240. m_vars[i].size,
  2241. COI_COPY_UNSPECIFIED,
  2242. 0, 0,
  2243. event);
  2244. if (res != COI_SUCCESS) {
  2245. if (m_status != 0) {
  2246. m_status->result = translate_coi_error(res);
  2247. return false;
  2248. }
  2249. report_coi_error(c_buf_write, res);
  2250. }
  2251. }
  2252. ptr_sent += m_vars[i].size;
  2253. }
  2254. break;
  2255. case c_string_ptr:
  2256. case c_data_ptr:
  2257. case c_cean_var_ptr:
  2258. case c_dv_ptr:
  2259. if (m_vars[i].direction.in && m_vars[i].size > 0) {
  2260. COIEVENT *event =
  2261. (is_async ||
  2262. m_vars[i].size >= __offload_use_async_buffer_write) ?
  2263. &m_in_deps[m_in_deps_total++] : 0;
  2264. PtrData* dst_data = m_vars[i].into ?
  2265. m_vars_extra[i].dst_data :
  2266. m_vars_extra[i].src_data;
  2267. PtrData* src_data =
  2268. VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
  2269. VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
  2270. m_vars[i].flags.is_static ?
  2271. m_vars_extra[i].src_data : 0;
  2272. if (m_vars[i].flags.is_noncont_src ||
  2273. m_vars[i].flags.is_noncont_dst) {
  2274. send_noncontiguous_pointer_data(
  2275. i, src_data, dst_data, event);
  2276. }
  2277. else if (src_data != 0 && src_data->cpu_buf != 0) {
  2278. res = COI::BufferCopy(
  2279. dst_data->mic_buf,
  2280. src_data->cpu_buf,
  2281. m_vars[i].mic_offset - dst_data->alloc_disp +
  2282. m_vars[i].offset + m_vars[i].disp,
  2283. m_vars_extra[i].cpu_offset +
  2284. m_vars_extra[i].cpu_disp,
  2285. m_vars[i].size,
  2286. COI_COPY_UNSPECIFIED,
  2287. 0, 0,
  2288. event);
  2289. if (res != COI_SUCCESS) {
  2290. if (m_status != 0) {
  2291. m_status->result = translate_coi_error(res);
  2292. return false;
  2293. }
  2294. report_coi_error(c_buf_copy, res);
  2295. }
  2296. }
  2297. else {
  2298. char *base = offload_get_src_base(m_vars[i].ptr,
  2299. m_vars[i].type.src);
  2300. res = COI::BufferWrite(
  2301. dst_data->mic_buf,
  2302. m_vars[i].mic_offset - dst_data->alloc_disp +
  2303. m_vars[i].offset + m_vars[i].disp,
  2304. base + m_vars_extra[i].cpu_disp,
  2305. m_vars[i].size,
  2306. COI_COPY_UNSPECIFIED,
  2307. 0, 0,
  2308. event);
  2309. if (res != COI_SUCCESS) {
  2310. if (m_status != 0) {
  2311. m_status->result = translate_coi_error(res);
  2312. return false;
  2313. }
  2314. report_coi_error(c_buf_write, res);
  2315. }
  2316. }
  2317. ptr_sent += m_vars[i].size;
  2318. }
  2319. break;
  2320. case c_dv_data:
  2321. case c_dv_ptr_data:
  2322. if (m_vars[i].direction.in &&
  2323. m_vars[i].size > 0) {
  2324. PtrData *ptr_data = m_vars[i].into ?
  2325. m_vars_extra[i].dst_data :
  2326. m_vars_extra[i].src_data;
  2327. PtrData* src_data = m_vars_extra[i].src_data;
  2328. COIEVENT *event =
  2329. (is_async ||
  2330. m_vars[i].size >= __offload_use_async_buffer_write) ?
  2331. &m_in_deps[m_in_deps_total++] : 0;
  2332. if (m_vars[i].flags.is_noncont_src ||
  2333. m_vars[i].flags.is_noncont_dst) {
  2334. send_noncontiguous_pointer_data(
  2335. i, src_data, ptr_data, event);
  2336. }
  2337. else if (src_data && src_data->cpu_buf != 0) {
  2338. res = COI::BufferCopy(
  2339. ptr_data->mic_buf,
  2340. src_data->cpu_buf,
  2341. m_vars[i].offset + ptr_data->mic_offset -
  2342. ptr_data->alloc_disp +
  2343. m_vars[i].disp,
  2344. m_vars_extra[i].cpu_offset +
  2345. m_vars_extra[i].cpu_disp,
  2346. m_vars[i].size,
  2347. COI_COPY_UNSPECIFIED,
  2348. 0, 0,
  2349. event);
  2350. if (res != COI_SUCCESS) {
  2351. if (m_status != 0) {
  2352. m_status->result = translate_coi_error(res);
  2353. return false;
  2354. }
  2355. report_coi_error(c_buf_copy, res);
  2356. }
  2357. }
  2358. else {
  2359. char *base = offload_get_src_base(m_vars[i].ptr,
  2360. m_vars[i].type.src);
  2361. res = COI::BufferWrite(
  2362. ptr_data->mic_buf,
  2363. ptr_data->mic_offset - ptr_data->alloc_disp +
  2364. m_vars[i].offset + m_vars[i].disp,
  2365. base + m_vars_extra[i].cpu_disp,
  2366. m_vars[i].size,
  2367. COI_COPY_UNSPECIFIED,
  2368. 0, 0,
  2369. event);
  2370. if (res != COI_SUCCESS) {
  2371. if (m_status != 0) {
  2372. m_status->result = translate_coi_error(res);
  2373. return false;
  2374. }
  2375. report_coi_error(c_buf_write, res);
  2376. }
  2377. }
  2378. ptr_sent += m_vars[i].size;
  2379. }
  2380. break;
  2381. case c_dv_data_slice:
  2382. case c_dv_ptr_data_slice:
  2383. if (m_vars[i].direction.in &&
  2384. m_vars[i].size > 0) {
  2385. PtrData *dst_data = m_vars[i].into ?
  2386. m_vars_extra[i].dst_data :
  2387. m_vars_extra[i].src_data;
  2388. PtrData* src_data =
  2389. (VAR_TYPE_IS_PTR(m_vars[i].type.src) ||
  2390. VAR_TYPE_IS_DV_DATA(m_vars[i].type.src) ||
  2391. VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) ||
  2392. VAR_TYPE_IS_SCALAR(m_vars[i].type.src) &&
  2393. m_vars[i].flags.is_static) ?
  2394. m_vars_extra[i].src_data : 0;
  2395. COIEVENT *event =
  2396. (is_async ||
  2397. m_vars[i].size >= __offload_use_async_buffer_write) ?
  2398. &m_in_deps[m_in_deps_total++] : 0;
  2399. if (m_vars[i].flags.is_noncont_src ||
  2400. m_vars[i].flags.is_noncont_dst) {
  2401. send_noncontiguous_pointer_data(
  2402. i, src_data, dst_data, event);
  2403. }
  2404. else if (src_data && src_data->cpu_buf != 0) {
  2405. res = COI::BufferCopy(
  2406. dst_data->mic_buf,
  2407. src_data->cpu_buf,
  2408. m_vars[i].offset - dst_data->alloc_disp +
  2409. dst_data->mic_offset +
  2410. m_vars[i].disp,
  2411. m_vars_extra[i].cpu_offset +
  2412. m_vars_extra[i].cpu_disp,
  2413. m_vars[i].size,
  2414. COI_COPY_UNSPECIFIED,
  2415. 0, 0,
  2416. event);
  2417. if (res != COI_SUCCESS) {
  2418. if (m_status != 0) {
  2419. m_status->result = translate_coi_error(res);
  2420. return false;
  2421. }
  2422. report_coi_error(c_buf_copy, res);
  2423. }
  2424. }
  2425. else {
  2426. char *base = offload_get_src_base(m_vars[i].ptr,
  2427. m_vars[i].type.src);
  2428. res = COI::BufferWrite(
  2429. dst_data->mic_buf,
  2430. dst_data->mic_offset - dst_data->alloc_disp +
  2431. m_vars[i].offset + m_vars[i].disp,
  2432. base + m_vars_extra[i].cpu_disp,
  2433. m_vars[i].size,
  2434. COI_COPY_UNSPECIFIED,
  2435. 0, 0,
  2436. event);
  2437. if (res != COI_SUCCESS) {
  2438. if (m_status != 0) {
  2439. m_status->result = translate_coi_error(res);
  2440. return false;
  2441. }
  2442. report_coi_error(c_buf_write, res);
  2443. }
  2444. }
  2445. ptr_sent += m_vars[i].size;
  2446. }
  2447. break;
  2448. default:
  2449. break;
  2450. }
  2451. // alloc field isn't used at target.
  2452. // We can reuse it for offset of array pointers.
  2453. if (m_vars_extra[i].is_arr_ptr_el) {
  2454. m_vars[i].ptr_arr_offset = m_vars_extra[i].ptr_arr_offset;
  2455. }
  2456. }
  2457. if (m_status) {
  2458. m_status->data_sent += ptr_sent;
  2459. }
  2460. OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), ptr_sent);
  2461. OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
  2462. c_offload_sent_pointer_data,
  2463. "Total pointer data sent to target: [%lld] bytes\n",
  2464. ptr_sent);
  2465. return true;
  2466. }
  2467. bool OffloadDescriptor::gather_copyin_data()
  2468. {
  2469. OffloadTimer timer(get_timer_data(), c_offload_host_gather_inputs);
  2470. if (m_need_runfunction && m_in_datalen > 0) {
  2471. COIMAPINSTANCE map_inst;
  2472. char *data;
  2473. // init marshaller
  2474. if (m_inout_buf != 0) {
  2475. OffloadTimer timer_map(get_timer_data(),
  2476. c_offload_host_map_in_data_buffer);
  2477. COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_in_datalen,
  2478. COI_MAP_WRITE_ENTIRE_BUFFER,
  2479. 0, 0, 0, &map_inst,
  2480. reinterpret_cast<void**>(&data));
  2481. if (res != COI_SUCCESS) {
  2482. if (m_status != 0) {
  2483. m_status->result = translate_coi_error(res);
  2484. return false;
  2485. }
  2486. report_coi_error(c_buf_map, res);
  2487. }
  2488. }
  2489. else {
  2490. data = (char*) m_func_desc + m_func_desc->data_offset;
  2491. }
  2492. // send variable descriptors
  2493. memcpy(data, m_vars, m_vars_total * sizeof(VarDesc));
  2494. data += m_vars_total * sizeof(VarDesc);
  2495. // init marshaller
  2496. m_in.init_buffer(data, m_in_datalen);
  2497. // Gather copy data into buffer
  2498. for (int i = 0; i < m_vars_total; i++) {
  2499. bool src_is_for_mic = (m_vars[i].direction.out ||
  2500. m_vars[i].into == NULL);
  2501. PtrData* ptr_data = src_is_for_mic ?
  2502. m_vars_extra[i].src_data :
  2503. m_vars_extra[i].dst_data;
  2504. if (m_vars[i].flags.alloc_disp) {
  2505. m_in.send_data(&ptr_data->alloc_disp,
  2506. sizeof(ptr_data->alloc_disp));
  2507. }
  2508. // send sink address to the target
  2509. if (m_vars[i].flags.sink_addr) {
  2510. m_in.send_data(&ptr_data->mic_addr,
  2511. sizeof(ptr_data->mic_addr));
  2512. }
  2513. switch (m_vars[i].type.dst) {
  2514. case c_data_ptr_array:
  2515. break;
  2516. case c_data:
  2517. case c_void_ptr:
  2518. case c_cean_var:
  2519. if (m_vars[i].direction.in &&
  2520. !m_vars[i].flags.is_static_dstn) {
  2521. char *ptr = offload_get_src_base(m_vars[i].ptr,
  2522. m_vars[i].type.src);
  2523. if (m_vars[i].type.dst == c_cean_var) {
  2524. // offset and length are derived from the array
  2525. // descriptor
  2526. int64_t size = m_vars[i].size;
  2527. int64_t disp = m_vars[i].disp;
  2528. m_in.send_data(reinterpret_cast<char*>(&size),
  2529. sizeof(int64_t));
  2530. m_in.send_data(reinterpret_cast<char*>(&disp),
  2531. sizeof(int64_t));
  2532. }
  2533. m_in.send_data(ptr + m_vars_extra[i].cpu_disp,
  2534. m_vars[i].size);
  2535. }
  2536. break;
  2537. case c_dv:
  2538. if (m_vars[i].direction.bits ||
  2539. m_vars[i].alloc_if ||
  2540. m_vars[i].free_if) {
  2541. // send dope vector excluding base
  2542. char *ptr = static_cast<char*>(m_vars[i].ptr);
  2543. m_in.send_data(ptr + sizeof(uint64_t),
  2544. m_vars[i].size - sizeof(uint64_t));
  2545. }
  2546. break;
  2547. case c_data_ptr:
  2548. // send to target addresses of obsolete
  2549. // stacks to be released
  2550. if (m_vars[i].flags.is_stack_buf &&
  2551. !m_vars[i].direction.bits &&
  2552. m_vars[i].alloc_if &&
  2553. m_vars[i].size != 0) {
  2554. for (PtrDataList::iterator it =
  2555. m_destroy_stack.begin();
  2556. it != m_destroy_stack.end(); it++) {
  2557. PtrData * ptr_data = *it;
  2558. m_in.send_data(&(ptr_data->mic_addr),
  2559. sizeof(ptr_data->mic_addr));
  2560. }
  2561. }
  2562. break;
  2563. case c_func_ptr:
  2564. if (m_vars[i].direction.in) {
  2565. m_in.send_func_ptr(*((const void**) m_vars[i].ptr));
  2566. }
  2567. break;
  2568. default:
  2569. break;
  2570. }
  2571. }
  2572. if (m_status) {
  2573. m_status->data_sent += m_in.get_tfr_size();
  2574. }
  2575. if (m_func_desc->data_offset == 0) {
  2576. OffloadTimer timer_unmap(get_timer_data(),
  2577. c_offload_host_unmap_in_data_buffer);
  2578. COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
  2579. if (res != COI_SUCCESS) {
  2580. if (m_status != 0) {
  2581. m_status->result = translate_coi_error(res);
  2582. return false;
  2583. }
  2584. report_coi_error(c_buf_unmap, res);
  2585. }
  2586. }
  2587. }
  2588. OFFLOAD_TIMER_HOST_SDATA(get_timer_data(), m_in.get_tfr_size());
  2589. OFFLOAD_DEBUG_TRACE_1(1,
  2590. GET_OFFLOAD_NUMBER(get_timer_data()), c_offload_copyin_data,
  2591. "Total copyin data sent to target: [%lld] bytes\n",
  2592. m_in.get_tfr_size());
  2593. return true;
  2594. }
  2595. bool OffloadDescriptor::compute()
  2596. {
  2597. OffloadTimer timer(get_timer_data(), c_offload_host_start_compute);
  2598. if (m_need_runfunction) {
  2599. OFFLOAD_DEBUG_TRACE_1(2, GET_OFFLOAD_NUMBER(get_timer_data()),
  2600. c_offload_compute, "Compute task on MIC\n");
  2601. void* misc = m_func_desc;
  2602. int misc_len = m_func_desc_size;
  2603. void* ret = 0;
  2604. int ret_len = 0;
  2605. if (m_func_desc->data_offset != 0) {
  2606. misc_len += m_in_datalen;
  2607. if (m_out_datalen > 0) {
  2608. ret = (char*) m_func_desc + m_func_desc->data_offset;
  2609. ret_len = m_out_datalen;
  2610. }
  2611. }
  2612. // dispatch task
  2613. COIRESULT res;
  2614. COIEVENT event;
  2615. res = m_device.compute(m_compute_buffers,
  2616. misc, misc_len,
  2617. ret, ret_len,
  2618. m_in_deps_total,
  2619. m_in_deps_total > 0 ? m_in_deps : 0,
  2620. &event);
  2621. if (res != COI_SUCCESS) {
  2622. if (m_status != 0) {
  2623. m_status->result = translate_coi_error(res);
  2624. return false;
  2625. }
  2626. report_coi_error(c_pipeline_run_func, res);
  2627. }
  2628. m_in_deps_total = 1;
  2629. m_in_deps[0] = event;
  2630. }
  2631. return true;
  2632. }
  2633. // recieve pointer data if source or destination or both of them are
  2634. // noncontiguous. There is guarantee that length of destination enough for
  2635. // transfered data.
  2636. bool OffloadDescriptor::recieve_noncontiguous_pointer_data(
  2637. int i,
  2638. char* base,
  2639. COIBUFFER dst_buf,
  2640. COIEVENT *event
  2641. )
  2642. {
  2643. int64_t offset_src, offset_dst;
  2644. int64_t length_src, length_dst;
  2645. int64_t length_src_cur, length_dst_cur;
  2646. int64_t recieve_size, data_recieved = 0;
  2647. COIRESULT res;
  2648. bool dst_is_empty = true;
  2649. bool src_is_empty = true;
  2650. // Set length_src and length_dst
  2651. length_src = (m_vars_extra[i].read_rng_src) ?
  2652. m_vars_extra[i].read_rng_src->range_size : m_vars[i].size;
  2653. length_dst = !m_vars[i].into ? length_src :
  2654. (m_vars_extra[i].read_rng_dst) ?
  2655. m_vars_extra[i].read_rng_dst->range_size : m_vars[i].size;
  2656. recieve_size = (length_src < length_dst) ? length_src : length_dst;
  2657. // consequently get contiguous ranges,
  2658. // define corresponded destination offset and recieve data
  2659. do {
  2660. // get sorce offset
  2661. if (src_is_empty) {
  2662. if (m_vars_extra[i].read_rng_src) {
  2663. if (!get_next_range(m_vars_extra[i].read_rng_src,
  2664. &offset_src)) {
  2665. // source ranges are over - nothing to send
  2666. break;
  2667. }
  2668. }
  2669. else if (data_recieved == 0) {
  2670. offset_src = 0;
  2671. }
  2672. else {
  2673. break;
  2674. }
  2675. length_src_cur = length_src;
  2676. }
  2677. else {
  2678. // if source is contiguous or its contiguous range is greater
  2679. // than destination one
  2680. offset_src += recieve_size;
  2681. }
  2682. length_src_cur -= recieve_size;
  2683. src_is_empty = length_src_cur == 0;
  2684. // get destination offset
  2685. if (dst_is_empty) {
  2686. if (m_vars[i].into) {
  2687. if (m_vars_extra[i].read_rng_dst) {
  2688. if (!get_next_range(m_vars_extra[i].read_rng_dst,
  2689. &offset_dst)) {
  2690. // destination ranges are over
  2691. LIBOFFLOAD_ERROR(c_destination_is_over);
  2692. return false;
  2693. }
  2694. }
  2695. // destination is contiguous.
  2696. else {
  2697. offset_dst = m_vars_extra[i].cpu_disp;
  2698. }
  2699. length_dst_cur = length_dst;
  2700. }
  2701. // same as source
  2702. else {
  2703. offset_dst = offset_src;
  2704. length_dst_cur = length_src;
  2705. }
  2706. }
  2707. else {
  2708. // if destination is contiguous or its contiguous range is greater
  2709. // than source one
  2710. offset_dst += recieve_size;
  2711. }
  2712. length_dst_cur -= recieve_size;
  2713. dst_is_empty = length_dst_cur == 0;
  2714. if (dst_buf != 0) {
  2715. res = COI::BufferCopy(
  2716. dst_buf,
  2717. m_vars_extra[i].src_data->mic_buf,
  2718. m_vars_extra[i].cpu_offset + offset_dst,
  2719. m_vars[i].offset + offset_src +
  2720. m_vars[i].mic_offset -
  2721. m_vars_extra[i].src_data->alloc_disp,
  2722. recieve_size,
  2723. COI_COPY_UNSPECIFIED,
  2724. m_in_deps_total,
  2725. m_in_deps_total > 0 ? m_in_deps : 0,
  2726. event);
  2727. if (res != COI_SUCCESS) {
  2728. if (m_status != 0) {
  2729. m_status->result = translate_coi_error(res);
  2730. return false;
  2731. }
  2732. report_coi_error(c_buf_copy, res);
  2733. }
  2734. }
  2735. else {
  2736. res = COI::BufferRead(
  2737. m_vars_extra[i].src_data->mic_buf,
  2738. m_vars[i].offset + offset_src +
  2739. m_vars[i].mic_offset -
  2740. m_vars_extra[i].src_data->alloc_disp,
  2741. base + offset_dst,
  2742. recieve_size,
  2743. COI_COPY_UNSPECIFIED,
  2744. m_in_deps_total,
  2745. m_in_deps_total > 0 ? m_in_deps : 0,
  2746. event);
  2747. if (res != COI_SUCCESS) {
  2748. if (m_status != 0) {
  2749. m_status->result = translate_coi_error(res);
  2750. return false;
  2751. }
  2752. report_coi_error(c_buf_read, res);
  2753. }
  2754. }
  2755. data_recieved += recieve_size;
  2756. }
  2757. while (true);
  2758. return true;
  2759. }
  2760. bool OffloadDescriptor::receive_pointer_data(bool is_async)
  2761. {
  2762. OffloadTimer timer(get_timer_data(), c_offload_host_start_buffers_reads);
  2763. uint64_t ptr_received = 0;
  2764. COIRESULT res;
  2765. for (int i = 0; i < m_vars_total; i++) {
  2766. switch (m_vars[i].type.src) {
  2767. case c_data_ptr_array:
  2768. break;
  2769. case c_data:
  2770. case c_void_ptr:
  2771. case c_cean_var:
  2772. if (m_vars[i].direction.out &&
  2773. m_vars[i].flags.is_static) {
  2774. COIEVENT *event =
  2775. (is_async ||
  2776. m_in_deps_total > 0 ||
  2777. m_vars[i].size >= __offload_use_async_buffer_read) ?
  2778. &m_out_deps[m_out_deps_total++] : 0;
  2779. PtrData *ptr_data = NULL;
  2780. COIBUFFER dst_buf = NULL; // buffer at host
  2781. char *base;
  2782. if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
  2783. ptr_data = m_vars[i].into ?
  2784. m_vars_extra[i].dst_data :
  2785. m_vars_extra[i].src_data;
  2786. }
  2787. else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
  2788. if (m_vars[i].flags.is_static_dstn) {
  2789. ptr_data = m_vars[i].into ?
  2790. m_vars_extra[i].dst_data :
  2791. m_vars_extra[i].src_data;
  2792. }
  2793. }
  2794. dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
  2795. if (dst_buf == NULL) {
  2796. base = offload_get_src_base(
  2797. m_vars[i].into ?
  2798. static_cast<char*>(m_vars[i].into) :
  2799. static_cast<char*>(m_vars[i].ptr),
  2800. m_vars[i].type.dst);
  2801. }
  2802. if (m_vars[i].flags.is_noncont_src ||
  2803. m_vars[i].flags.is_noncont_dst) {
  2804. recieve_noncontiguous_pointer_data(
  2805. i, base, dst_buf, event);
  2806. }
  2807. else if (dst_buf != 0) {
  2808. res = COI::BufferCopy(
  2809. dst_buf,
  2810. m_vars_extra[i].src_data->mic_buf,
  2811. m_vars_extra[i].cpu_offset +
  2812. m_vars_extra[i].cpu_disp,
  2813. m_vars[i].offset + m_vars[i].disp,
  2814. m_vars[i].size,
  2815. COI_COPY_UNSPECIFIED,
  2816. m_in_deps_total,
  2817. m_in_deps_total > 0 ? m_in_deps : 0,
  2818. event);
  2819. if (res != COI_SUCCESS) {
  2820. if (m_status != 0) {
  2821. m_status->result = translate_coi_error(res);
  2822. return false;
  2823. }
  2824. report_coi_error(c_buf_copy, res);
  2825. }
  2826. }
  2827. else {
  2828. res = COI::BufferRead(
  2829. m_vars_extra[i].src_data->mic_buf,
  2830. m_vars[i].offset + m_vars[i].disp,
  2831. base + m_vars_extra[i].cpu_offset +
  2832. m_vars_extra[i].cpu_disp,
  2833. m_vars[i].size,
  2834. COI_COPY_UNSPECIFIED,
  2835. m_in_deps_total,
  2836. m_in_deps_total > 0 ? m_in_deps : 0,
  2837. event);
  2838. if (res != COI_SUCCESS) {
  2839. if (m_status != 0) {
  2840. m_status->result = translate_coi_error(res);
  2841. return false;
  2842. }
  2843. report_coi_error(c_buf_read, res);
  2844. }
  2845. }
  2846. ptr_received += m_vars[i].size;
  2847. }
  2848. break;
  2849. case c_string_ptr:
  2850. case c_data_ptr:
  2851. case c_cean_var_ptr:
  2852. case c_dv_data:
  2853. case c_dv_ptr_data:
  2854. case c_dv_data_slice:
  2855. case c_dv_ptr_data_slice:
  2856. case c_dv_ptr: {
  2857. COIBUFFER dst_buf = NULL; // buffer on host
  2858. if (m_vars[i].direction.out && m_vars[i].size > 0) {
  2859. COIEVENT *event =
  2860. (is_async ||
  2861. m_in_deps_total > 0 ||
  2862. m_vars[i].size >= __offload_use_async_buffer_read) ?
  2863. &m_out_deps[m_out_deps_total++] : 0;
  2864. uint64_t dst_offset = 0;
  2865. char *base = static_cast<char*>(m_vars[i].ptr);
  2866. if (VAR_TYPE_IS_PTR(m_vars[i].type.dst)) {
  2867. PtrData *ptr_data = m_vars[i].into ?
  2868. m_vars_extra[i].dst_data :
  2869. m_vars_extra[i].src_data;
  2870. dst_buf = ptr_data ? ptr_data->cpu_buf : NULL;
  2871. if (dst_buf == NULL) {
  2872. base = m_vars[i].into ?
  2873. *static_cast<char**>(m_vars[i].into) :
  2874. *static_cast<char**>(m_vars[i].ptr);
  2875. }
  2876. dst_offset = m_vars_extra[i].cpu_offset +
  2877. m_vars_extra[i].cpu_disp;
  2878. }
  2879. else if (VAR_TYPE_IS_SCALAR(m_vars[i].type.dst)) {
  2880. if (m_vars[i].flags.is_static_dstn) {
  2881. dst_buf = m_vars[i].into ?
  2882. m_vars_extra[i].dst_data->cpu_buf :
  2883. m_vars_extra[i].src_data->cpu_buf;
  2884. }
  2885. if (dst_buf == NULL) {
  2886. base = offload_get_src_base(
  2887. m_vars[i].into ?
  2888. static_cast<char*>(m_vars[i].into) :
  2889. static_cast<char*>(m_vars[i].ptr),
  2890. m_vars[i].type.dst);
  2891. }
  2892. dst_offset = m_vars_extra[i].cpu_offset +
  2893. m_vars_extra[i].cpu_disp;
  2894. }
  2895. else if (VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst) ||
  2896. VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst)) {
  2897. PtrData *ptr_data = m_vars[i].into != 0 ?
  2898. m_vars_extra[i].dst_data :
  2899. m_vars_extra[i].src_data;
  2900. dst_buf = ptr_data != 0 ? ptr_data->cpu_buf : 0;
  2901. if (dst_buf == NULL) {
  2902. base = offload_get_src_base(
  2903. m_vars[i].into ?
  2904. static_cast<char*>(m_vars[i].into) :
  2905. static_cast<char*>(m_vars[i].ptr),
  2906. m_vars[i].type.dst);
  2907. }
  2908. dst_offset = m_vars_extra[i].cpu_offset +
  2909. m_vars_extra[i].cpu_disp;
  2910. }
  2911. if (m_vars[i].flags.is_noncont_src ||
  2912. m_vars[i].flags.is_noncont_dst) {
  2913. recieve_noncontiguous_pointer_data(
  2914. i, base, dst_buf, event);
  2915. }
  2916. else if (dst_buf != 0) {
  2917. res = COI::BufferCopy(
  2918. dst_buf,
  2919. m_vars_extra[i].src_data->mic_buf,
  2920. dst_offset,
  2921. m_vars[i].offset + m_vars[i].disp +
  2922. m_vars[i].mic_offset -
  2923. m_vars_extra[i].src_data->alloc_disp,
  2924. m_vars[i].size,
  2925. COI_COPY_UNSPECIFIED,
  2926. m_in_deps_total,
  2927. m_in_deps_total > 0 ? m_in_deps : 0,
  2928. event);
  2929. if (res != COI_SUCCESS) {
  2930. if (m_status != 0) {
  2931. m_status->result = translate_coi_error(res);
  2932. return false;
  2933. }
  2934. report_coi_error(c_buf_copy, res);
  2935. }
  2936. }
  2937. else {
  2938. res = COI::BufferRead(
  2939. m_vars_extra[i].src_data->mic_buf,
  2940. m_vars[i].offset + m_vars[i].disp +
  2941. m_vars[i].mic_offset -
  2942. m_vars_extra[i].src_data->alloc_disp,
  2943. base + dst_offset,
  2944. m_vars[i].size,
  2945. COI_COPY_UNSPECIFIED,
  2946. m_in_deps_total,
  2947. m_in_deps_total > 0 ? m_in_deps : 0,
  2948. event);
  2949. if (res != COI_SUCCESS) {
  2950. if (m_status != 0) {
  2951. m_status->result = translate_coi_error(res);
  2952. return false;
  2953. }
  2954. report_coi_error(c_buf_read, res);
  2955. }
  2956. }
  2957. ptr_received += m_vars[i].size;
  2958. }
  2959. break;
  2960. }
  2961. default:
  2962. break;
  2963. }
  2964. // destroy buffers for obsolete stacks
  2965. if (m_destroy_stack.size() != 0) {
  2966. for (PtrDataList::iterator it = m_destroy_stack.begin();
  2967. it != m_destroy_stack.end(); it++) {
  2968. PtrData *ptr_data = *it;
  2969. m_destroy_buffers.push_back(ptr_data->mic_buf);
  2970. OFFLOAD_TRACE(3, "Removing stack buffer with addr %p\n",
  2971. ptr_data->mic_addr);
  2972. }
  2973. m_destroy_stack.clear();
  2974. }
  2975. if (m_vars[i].free_if) {
  2976. // remove association for automatic variables
  2977. if (m_is_openmp && !m_vars[i].flags.is_static &&
  2978. (m_vars[i].type.src == c_data ||
  2979. m_vars[i].type.src == c_void_ptr ||
  2980. m_vars[i].type.src == c_cean_var)) {
  2981. AutoData *auto_data = m_vars_extra[i].auto_data;
  2982. if (auto_data != 0 && auto_data->remove_reference() == 0) {
  2983. m_device.remove_auto_data(auto_data->cpu_addr.start());
  2984. }
  2985. }
  2986. // destroy buffers
  2987. if (m_vars[i].direction.out || m_vars[i].into == NULL) {
  2988. if (!VAR_TYPE_IS_PTR(m_vars[i].type.src) &&
  2989. !VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.src) &&
  2990. !VAR_TYPE_IS_DV_DATA(m_vars[i].type.src)) {
  2991. continue;
  2992. }
  2993. PtrData *ptr_data = m_vars_extra[i].src_data;
  2994. if (ptr_data->remove_reference() == 0) {
  2995. // destroy buffers
  2996. if (ptr_data->cpu_buf != 0) {
  2997. m_destroy_buffers.push_back(ptr_data->cpu_buf);
  2998. }
  2999. if (ptr_data->mic_buf != 0) {
  3000. m_destroy_buffers.push_back(ptr_data->mic_buf);
  3001. }
  3002. OFFLOAD_TRACE(3, "Removing association for addr %p\n",
  3003. ptr_data->cpu_addr.start());
  3004. // remove association from map
  3005. m_device.remove_ptr_data(ptr_data->cpu_addr.start());
  3006. }
  3007. }
  3008. else if (VAR_TYPE_IS_PTR(m_vars[i].type.dst) ||
  3009. VAR_TYPE_IS_DV_DATA_SLICE(m_vars[i].type.dst) ||
  3010. VAR_TYPE_IS_DV_DATA(m_vars[i].type.dst)) {
  3011. PtrData *ptr_data = m_vars_extra[i].dst_data;
  3012. if (ptr_data->remove_reference() == 0) {
  3013. // destroy buffers
  3014. if (ptr_data->cpu_buf != 0) {
  3015. m_destroy_buffers.push_back(ptr_data->cpu_buf);
  3016. }
  3017. if (ptr_data->mic_buf != 0) {
  3018. m_destroy_buffers.push_back(ptr_data->mic_buf);
  3019. }
  3020. OFFLOAD_TRACE(3, "Removing association for addr %p\n",
  3021. ptr_data->cpu_addr.start());
  3022. // remove association from map
  3023. m_device.remove_ptr_data(ptr_data->cpu_addr.start());
  3024. }
  3025. }
  3026. }
  3027. }
  3028. if (m_status) {
  3029. m_status->data_received += ptr_received;
  3030. }
  3031. OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), ptr_received);
  3032. OFFLOAD_DEBUG_TRACE_1(1, GET_OFFLOAD_NUMBER(get_timer_data()),
  3033. c_offload_received_pointer_data,
  3034. "Total pointer data received from target: [%lld] bytes\n",
  3035. ptr_received);
  3036. return true;
  3037. }
  3038. bool OffloadDescriptor::scatter_copyout_data()
  3039. {
  3040. OffloadTimer timer(get_timer_data(), c_offload_host_scatter_outputs);
  3041. if (m_need_runfunction && m_out_datalen > 0) {
  3042. // total size that need to be transferred from target to host
  3043. COIMAPINSTANCE map_inst;
  3044. COIRESULT res;
  3045. char *data;
  3046. // output data buffer
  3047. if (m_func_desc->data_offset == 0) {
  3048. OffloadTimer timer_map(get_timer_data(),
  3049. c_offload_host_map_out_data_buffer);
  3050. COIRESULT res = COI::BufferMap(m_inout_buf, 0, m_out_datalen,
  3051. COI_MAP_READ_ONLY, 0, 0, 0,
  3052. &map_inst,
  3053. reinterpret_cast<void**>(&data));
  3054. if (res != COI_SUCCESS) {
  3055. if (m_status != 0) {
  3056. m_status->result = translate_coi_error(res);
  3057. return false;
  3058. }
  3059. report_coi_error(c_buf_map, res);
  3060. }
  3061. }
  3062. else {
  3063. data = (char*) m_func_desc + m_func_desc->data_offset;
  3064. }
  3065. // get timing data
  3066. OFFLOAD_TIMER_TARGET_DATA(get_timer_data(), data);
  3067. data += OFFLOAD_TIMER_DATALEN();
  3068. // initialize output marshaller
  3069. m_out.init_buffer(data, m_out_datalen);
  3070. for (int i = 0; i < m_vars_total; i++) {
  3071. switch (m_vars[i].type.src) {
  3072. case c_data_ptr_array:
  3073. break;
  3074. case c_data:
  3075. case c_void_ptr:
  3076. case c_cean_var:
  3077. if (m_vars[i].direction.out &&
  3078. !m_vars[i].flags.is_static) {
  3079. if (m_vars[i].into) {
  3080. char *ptr = offload_get_src_base(
  3081. static_cast<char*>(m_vars[i].into),
  3082. m_vars[i].type.dst);
  3083. m_out.receive_data(ptr + m_vars_extra[i].cpu_disp,
  3084. m_vars[i].size);
  3085. }
  3086. else {
  3087. m_out.receive_data(
  3088. static_cast<char*>(m_vars[i].ptr) +
  3089. m_vars_extra[i].cpu_disp,
  3090. m_vars[i].size);
  3091. }
  3092. }
  3093. break;
  3094. case c_func_ptr:
  3095. if (m_vars[i].direction.out) {
  3096. m_out.receive_func_ptr((const void**) m_vars[i].ptr);
  3097. }
  3098. break;
  3099. default:
  3100. break;
  3101. }
  3102. }
  3103. if (m_status) {
  3104. m_status->data_received += m_out.get_tfr_size();
  3105. }
  3106. if (m_func_desc->data_offset == 0) {
  3107. OffloadTimer timer_unmap(get_timer_data(),
  3108. c_offload_host_unmap_out_data_buffer);
  3109. COIRESULT res = COI::BufferUnmap(map_inst, 0, 0, 0);
  3110. if (res != COI_SUCCESS) {
  3111. if (m_status != 0) {
  3112. m_status->result = translate_coi_error(res);
  3113. return false;
  3114. }
  3115. report_coi_error(c_buf_unmap, res);
  3116. }
  3117. }
  3118. }
  3119. OFFLOAD_TIMER_HOST_RDATA(get_timer_data(), m_out.get_tfr_size());
  3120. OFFLOAD_TRACE(1, "Total copyout data received from target: [%lld] bytes\n",
  3121. m_out.get_tfr_size());
  3122. return true;
  3123. }
  3124. void get_arr_desc_numbers(
  3125. const arr_desc *ap,
  3126. int64_t el_size,
  3127. int64_t &offset,
  3128. int64_t &size,
  3129. int &el_number,
  3130. CeanReadRanges* &ptr_ranges
  3131. )
  3132. {
  3133. if (is_arr_desc_contiguous(ap)) {
  3134. ptr_ranges = NULL;
  3135. __arr_data_offset_and_length(ap, offset, size);
  3136. el_number = size / el_size;
  3137. }
  3138. else {
  3139. ptr_ranges = init_read_ranges_arr_desc(ap);
  3140. el_number = (ptr_ranges->range_size / el_size) *
  3141. ptr_ranges->range_max_number;
  3142. size = ptr_ranges->range_size;
  3143. }
  3144. }
  3145. arr_desc * make_arr_desc(
  3146. void* ptr_val,
  3147. int64_t extent_start_val,
  3148. int64_t extent_elements_val,
  3149. int64_t size
  3150. )
  3151. {
  3152. arr_desc *res;
  3153. res = (arr_desc *)malloc(sizeof(arr_desc));
  3154. if (res == NULL)
  3155. LIBOFFLOAD_ERROR(c_malloc);
  3156. res->base = reinterpret_cast<int64_t>(ptr_val);
  3157. res->rank = 1;
  3158. res->dim[0].size = size;
  3159. res->dim[0].lindex = 0;
  3160. res->dim[0].lower = extent_start_val;
  3161. res->dim[0].upper = extent_elements_val + extent_start_val - 1;
  3162. res->dim[0].stride = 1;
  3163. return res;
  3164. }
  3165. bool OffloadDescriptor::gen_var_descs_for_pointer_array(int i)
  3166. {
  3167. int pointers_number;
  3168. int tmp_val;
  3169. int new_index = m_vars_total;
  3170. const arr_desc *ap;
  3171. const VarDesc3 *vd3 = static_cast<const VarDesc3*>(m_vars[i].ptr);
  3172. int flags = vd3->array_fields;
  3173. bool src_is_for_mic = (m_vars[i].direction.out ||
  3174. m_vars[i].into == NULL);
  3175. ReadArrElements<void *> ptr;
  3176. ReadArrElements<void *> into;
  3177. ReadArrElements<int64_t> ext_start;
  3178. ReadArrElements<int64_t> ext_elements;
  3179. ReadArrElements<int64_t> align;
  3180. ReadArrElements<int64_t> alloc_if;
  3181. ReadArrElements<int64_t> free_if;
  3182. ReadArrElements<int64_t> into_start;
  3183. ReadArrElements<int64_t> into_elem;
  3184. ReadArrElements<int64_t> alloc_start;
  3185. ReadArrElements<int64_t> alloc_elem;
  3186. ap = static_cast<const arr_desc*>(vd3->ptr_array);
  3187. // "pointers_number" for total number of transfered pointers.
  3188. // For each of them we create new var_desc and put it at the bottom
  3189. // of the var_desc's array
  3190. get_arr_desc_numbers(ap, sizeof(void *), ptr.offset, ptr.size,
  3191. pointers_number, ptr.ranges);
  3192. ptr.base = reinterpret_cast<char*>(ap->base);
  3193. // 2. prepare memory for new var_descs
  3194. m_vars_total += pointers_number;
  3195. m_vars = (VarDesc*)realloc(m_vars, m_vars_total * sizeof(VarDesc));
  3196. if (m_vars == NULL)
  3197. LIBOFFLOAD_ERROR(c_malloc);
  3198. m_vars_extra =
  3199. (VarExtra*)realloc(m_vars_extra, m_vars_total * sizeof(VarExtra));
  3200. if (m_vars_extra == NULL)
  3201. LIBOFFLOAD_ERROR(c_malloc);
  3202. m_in_deps =
  3203. (COIEVENT*)realloc(m_in_deps, sizeof(COIEVENT) * (m_vars_total + 1));
  3204. if (m_in_deps == NULL)
  3205. LIBOFFLOAD_ERROR(c_malloc);
  3206. m_out_deps =
  3207. (COIEVENT*)realloc(m_out_deps, sizeof(COIEVENT) * m_vars_total);
  3208. if (m_out_deps == NULL)
  3209. LIBOFFLOAD_ERROR(c_malloc);
  3210. // 3. Prepare for reading new var_desc's fields
  3211. // EXTENT START
  3212. if ((flags & (1<<flag_extent_start_is_array)) != 0) {
  3213. ap = static_cast<const arr_desc*>(vd3->extent_start);
  3214. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, ext_start.offset,
  3215. ext_start.size, tmp_val, ext_start.ranges);
  3216. ext_start.base = reinterpret_cast<char*>(ap->base);
  3217. ext_start.el_size = ap->dim[ap->rank - 1].size;
  3218. if (tmp_val < pointers_number) {
  3219. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
  3220. return false;
  3221. }
  3222. }
  3223. else if ((flags & (1<<flag_extent_start_is_scalar)) != 0) {
  3224. ext_start.val = (int64_t)vd3->extent_start;
  3225. }
  3226. else {
  3227. ext_start.val = 0;
  3228. }
  3229. // EXTENT ELEMENTS NUMBER
  3230. if ((flags & (1<<flag_extent_elements_is_array)) != 0) {
  3231. ap = static_cast<const arr_desc*>(vd3->extent_elements);
  3232. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
  3233. ext_elements.offset, ext_elements.size,
  3234. tmp_val, ext_elements.ranges);
  3235. ext_elements.base = reinterpret_cast<char*>(ap->base);
  3236. ext_elements.el_size = ap->dim[ap->rank - 1].size;
  3237. if (tmp_val < pointers_number) {
  3238. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
  3239. return false;
  3240. }
  3241. }
  3242. else if ((flags & (1<<flag_extent_elements_is_scalar)) != 0) {
  3243. ext_elements.val = (int64_t)vd3->extent_elements;
  3244. }
  3245. else {
  3246. ext_elements.val = m_vars[i].count;
  3247. }
  3248. // ALLOC_IF
  3249. if ((flags & (1<<flag_alloc_if_is_array)) != 0) {
  3250. ap = static_cast<const arr_desc*>(vd3->alloc_if_array);
  3251. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_if.offset,
  3252. alloc_if.size, tmp_val, alloc_if.ranges);
  3253. alloc_if.base = reinterpret_cast<char*>(ap->base);
  3254. alloc_if.el_size = ap->dim[ap->rank - 1].size;
  3255. if (tmp_val < pointers_number) {
  3256. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
  3257. return false;
  3258. }
  3259. }
  3260. else {
  3261. alloc_if.val = m_vars[i].count;
  3262. }
  3263. // FREE_IF
  3264. if ((flags & (1<<flag_free_if_is_array)) != 0) {
  3265. ap = static_cast<const arr_desc*>(vd3->free_if_array);
  3266. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, free_if.offset,
  3267. free_if.size, tmp_val, free_if.ranges);
  3268. free_if.base = reinterpret_cast<char*>(ap->base);
  3269. free_if.el_size = ap->dim[ap->rank - 1].size;
  3270. if (tmp_val < pointers_number) {
  3271. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
  3272. return false;
  3273. }
  3274. }
  3275. else {
  3276. free_if.val = m_vars[i].count;
  3277. }
  3278. // ALIGN
  3279. if ((flags & (1<<flag_align_is_array)) != 0) {
  3280. ap = static_cast<const arr_desc*>(vd3->align_array);
  3281. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, align.offset,
  3282. align.size, tmp_val, align.ranges);
  3283. align.base = reinterpret_cast<char*>(ap->base);
  3284. align.el_size = ap->dim[ap->rank - 1].size;
  3285. if (tmp_val < pointers_number) {
  3286. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
  3287. return false;
  3288. }
  3289. }
  3290. else {
  3291. align.val = m_vars[i].align;
  3292. }
  3293. // 3.1 INTO
  3294. if (m_vars[i].into) {
  3295. ap = static_cast<const arr_desc*>(m_vars[i].into);
  3296. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into.offset,
  3297. into.size, tmp_val, into.ranges);
  3298. into.base = reinterpret_cast<char*>(ap->base);
  3299. if (tmp_val < pointers_number) {
  3300. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
  3301. return false;
  3302. }
  3303. }
  3304. // 3.2 INTO_START
  3305. if ((flags & (1<<flag_into_start_is_array)) != 0) {
  3306. ap = static_cast<const arr_desc*>(vd3->into_start);
  3307. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_start.offset,
  3308. into_start.size, tmp_val, into_start.ranges);
  3309. into_start.base = reinterpret_cast<char*>(ap->base);
  3310. into_start.el_size = ap->dim[ap->rank - 1].size;
  3311. if (tmp_val < pointers_number) {
  3312. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
  3313. return false;
  3314. }
  3315. }
  3316. else if ((flags & (1<<flag_into_start_is_scalar)) != 0) {
  3317. into_start.val = (int64_t)vd3->into_start;
  3318. }
  3319. else {
  3320. into_start.val = 0;
  3321. }
  3322. // 3.3 INTO_ELEMENTS
  3323. if ((flags & (1<<flag_into_elements_is_array)) != 0) {
  3324. ap = static_cast<const arr_desc*>(vd3->into_elements);
  3325. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, into_elem.offset,
  3326. into_elem.size, tmp_val, into_elem.ranges);
  3327. into_elem.base = reinterpret_cast<char*>(ap->base);
  3328. into_elem.el_size = ap->dim[ap->rank - 1].size;
  3329. if (tmp_val < pointers_number) {
  3330. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
  3331. return false;
  3332. }
  3333. }
  3334. else if ((flags & (1<<flag_into_elements_is_scalar)) != 0) {
  3335. into_elem.val = (int64_t)vd3->into_elements;
  3336. }
  3337. else {
  3338. into_elem.val = m_vars[i].count;
  3339. }
  3340. // alloc_start
  3341. if ((flags & (1<<flag_alloc_start_is_array)) != 0) {
  3342. ap = static_cast<const arr_desc*>(vd3->alloc_start);
  3343. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size,
  3344. alloc_start.offset, alloc_start.size, tmp_val,
  3345. alloc_start.ranges);
  3346. alloc_start.base = reinterpret_cast<char*>(ap->base);
  3347. alloc_start.el_size = ap->dim[ap->rank - 1].size;
  3348. if (tmp_val < pointers_number) {
  3349. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
  3350. return false;
  3351. }
  3352. }
  3353. else if ((flags & (1<<flag_alloc_start_is_scalar)) != 0) {
  3354. alloc_start.val = (int64_t)vd3->alloc_start;
  3355. }
  3356. else {
  3357. alloc_start.val = 0;
  3358. }
  3359. // alloc_elem
  3360. if ((flags & (1<<flag_alloc_elements_is_array)) != 0) {
  3361. ap = static_cast<const arr_desc*>(vd3->alloc_elements);
  3362. get_arr_desc_numbers(ap, ap->dim[ap->rank - 1].size, alloc_elem.offset,
  3363. alloc_elem.size, tmp_val, alloc_elem.ranges);
  3364. alloc_elem.base = reinterpret_cast<char*>(ap->base);
  3365. alloc_elem.el_size = ap->dim[ap->rank - 1].size;
  3366. if (tmp_val < pointers_number) {
  3367. LIBOFFLOAD_ERROR(c_pointer_array_mismatch,
  3368. "alloc_extent elements");
  3369. return false;
  3370. }
  3371. }
  3372. else if ((flags & (1<<flag_alloc_elements_is_scalar)) != 0) {
  3373. alloc_elem.val = (int64_t)vd3->alloc_elements;
  3374. }
  3375. else {
  3376. alloc_elem.val = 0;
  3377. }
  3378. for (int k = 0; k < pointers_number; k++) {
  3379. int type = flags & 0x3f;
  3380. int type_src, type_dst;
  3381. // Get new values
  3382. // type_src, type_dst
  3383. type_src = type_dst = (type == c_data_ptr_array) ?
  3384. c_data_ptr : (type == c_func_ptr_array) ?
  3385. c_func_ptr : (type == c_void_ptr_array) ?
  3386. c_void_ptr : (type == c_string_ptr_array) ?
  3387. c_string_ptr : 0;
  3388. // Get ptr val
  3389. if (!ptr.read_next(true)) {
  3390. break;
  3391. }
  3392. else {
  3393. ptr.val = (void*)(ptr.base + ptr.offset);
  3394. }
  3395. // !!! If we got error at phase of reading - it's an internal
  3396. // !!! error, as we must detect mismatch before
  3397. // Get into val
  3398. if (m_vars[i].into) {
  3399. if (!into.read_next(true)) {
  3400. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into");
  3401. LIBOFFLOAD_ABORT;
  3402. }
  3403. else {
  3404. into.val = (void*)(into.base + into.offset);
  3405. }
  3406. }
  3407. // Get other components of the clause
  3408. if (!ext_start.read_next(flags & (1<<flag_extent_start_is_array))) {
  3409. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent start");
  3410. LIBOFFLOAD_ABORT;
  3411. }
  3412. if (!ext_elements.read_next(
  3413. flags & (1<<flag_extent_elements_is_array))) {
  3414. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "extent elements");
  3415. LIBOFFLOAD_ABORT;
  3416. }
  3417. if (!alloc_if.read_next(flags & (1<<flag_alloc_if_is_array))) {
  3418. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_if");
  3419. LIBOFFLOAD_ABORT;
  3420. }
  3421. if (!free_if.read_next(flags & (1<<flag_free_if_is_array))) {
  3422. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "free_if");
  3423. LIBOFFLOAD_ABORT;
  3424. }
  3425. if (!align.read_next(flags & (1<<flag_align_is_array))) {
  3426. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "align");
  3427. LIBOFFLOAD_ABORT;
  3428. }
  3429. if (!into_start.read_next(flags & (1<<flag_into_start_is_array))) {
  3430. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent start");
  3431. LIBOFFLOAD_ABORT;
  3432. }
  3433. if (!into_elem.read_next(flags & (1<<flag_into_elements_is_array))) {
  3434. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "into_extent elements");
  3435. LIBOFFLOAD_ABORT;
  3436. }
  3437. if (!alloc_start.read_next(flags & (1<<flag_alloc_start_is_array))) {
  3438. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent start");
  3439. LIBOFFLOAD_ABORT;
  3440. }
  3441. if (!alloc_elem.read_next(
  3442. flags & (1<<flag_alloc_elements_is_array))) {
  3443. LIBOFFLOAD_ERROR(c_pointer_array_mismatch, "alloc_extent elements");
  3444. LIBOFFLOAD_ABORT;
  3445. }
  3446. m_vars[new_index + k].direction.bits = m_vars[i].direction.bits;
  3447. m_vars[new_index + k].alloc_if = alloc_if.val;
  3448. m_vars[new_index + k].free_if = free_if.val;
  3449. m_vars[new_index + k].align = align.val;
  3450. m_vars[new_index + k].mic_offset = 0;
  3451. m_vars[new_index + k].flags.bits = m_vars[i].flags.bits;
  3452. m_vars[new_index + k].offset = 0;
  3453. m_vars[new_index + k].size = m_vars[i].size;
  3454. if (ext_start.val == 0) {
  3455. m_vars[new_index + k].count = ext_elements.val;
  3456. m_vars[new_index + k].ptr = ptr.val;
  3457. if (type_src == c_string_ptr) {
  3458. m_vars[new_index + k].size = 0;
  3459. }
  3460. }
  3461. else {
  3462. m_vars[new_index + k].count = 0;
  3463. m_vars[new_index + k].ptr =
  3464. static_cast<void*>(make_arr_desc(
  3465. ptr.val,
  3466. ext_start.val,
  3467. ext_elements.val,
  3468. m_vars[i].size));
  3469. type_src = type_src == c_data_ptr ? c_cean_var_ptr :
  3470. c_string_ptr ? c_cean_var_ptr :
  3471. type_src;
  3472. if (!m_vars[i].into) {
  3473. type_dst = type_src;
  3474. }
  3475. }
  3476. if (m_vars[i].into && into_elem.val != 0) {
  3477. m_vars[new_index + k].into =
  3478. static_cast<void*>(make_arr_desc(
  3479. into.val,
  3480. into_start.val,
  3481. into_elem.val,
  3482. m_vars[i].size));
  3483. type_dst = (type == c_data_ptr_array) ? c_cean_var_ptr :
  3484. (type == c_string_ptr_array) ? c_cean_var_ptr :
  3485. type_src;
  3486. }
  3487. else {
  3488. m_vars[new_index + k].into = NULL;
  3489. }
  3490. if (alloc_elem.val != 0) {
  3491. m_vars[new_index + k].alloc =
  3492. static_cast<void*>(make_arr_desc(
  3493. ptr.val,
  3494. alloc_start.val,
  3495. alloc_elem.val,
  3496. m_vars[i].size));
  3497. }
  3498. else {
  3499. m_vars[new_index + k].alloc = NULL;
  3500. }
  3501. m_vars[new_index + k].type.src = type_src;
  3502. m_vars[new_index + k].type.dst = type_dst;
  3503. m_vars_extra[new_index + k].is_arr_ptr_el = 1;
  3504. m_vars_extra[new_index + k].ptr_arr_offset =
  3505. src_is_for_mic ? ptr.offset : into.offset;
  3506. }
  3507. // count and alloc fields are useless at target. They can be reused
  3508. // for pointer arrays.
  3509. m_vars[i].count = pointers_number;
  3510. m_vars[i].ptr_arr_offset = new_index;
  3511. return true;
  3512. }
  3513. static void __offload_fini_library(void)
  3514. {
  3515. OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ...\n");
  3516. if (mic_engines_total > 0) {
  3517. delete[] mic_engines;
  3518. if (mic_proxy_fs_root != 0) {
  3519. free(mic_proxy_fs_root);
  3520. mic_proxy_fs_root = 0;
  3521. }
  3522. if (mic_library_path != 0) {
  3523. free(mic_library_path);
  3524. mic_library_path = 0;
  3525. }
  3526. // destroy thread key
  3527. thread_key_delete(mic_thread_key);
  3528. }
  3529. // unload COI library
  3530. if (COI::is_available) {
  3531. COI::fini();
  3532. }
  3533. OFFLOAD_DEBUG_TRACE(2, "Cleanup offload library ... done\n");
  3534. }
  3535. static void __offload_init_library_once(void)
  3536. {
  3537. COIRESULT res;
  3538. uint32_t num_devices;
  3539. std::bitset<MIC_ENGINES_MAX> devices;
  3540. prefix = report_get_message_str(c_report_host);
  3541. // initialize trace
  3542. const char *env_var = getenv(htrace_envname);
  3543. if (env_var != 0 && *env_var != '\0') {
  3544. int64_t new_val;
  3545. if (__offload_parse_int_string(env_var, new_val)) {
  3546. console_enabled = new_val & 0x0f;
  3547. }
  3548. }
  3549. env_var = getenv(offload_report_envname);
  3550. if (env_var != 0 && *env_var != '\0') {
  3551. int64_t env_val;
  3552. if (__offload_parse_int_string(env_var, env_val)) {
  3553. if (env_val == OFFLOAD_REPORT_1 ||
  3554. env_val == OFFLOAD_REPORT_2 ||
  3555. env_val == OFFLOAD_REPORT_3) {
  3556. offload_report_level = env_val;
  3557. }
  3558. else {
  3559. LIBOFFLOAD_ERROR(c_invalid_env_report_value,
  3560. offload_report_envname);
  3561. }
  3562. }
  3563. else {
  3564. LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
  3565. offload_report_envname);
  3566. }
  3567. }
  3568. else if (!offload_report_level) {
  3569. env_var = getenv(timer_envname);
  3570. if (env_var != 0 && *env_var != '\0') {
  3571. timer_enabled = atoi(env_var);
  3572. }
  3573. }
  3574. // initialize COI
  3575. if (!COI::init()) {
  3576. return;
  3577. }
  3578. // get number of devices installed in the system
  3579. res = COI::EngineGetCount(COI_ISA_KNC, &num_devices);
  3580. if (res != COI_SUCCESS) {
  3581. return;
  3582. }
  3583. if (num_devices > MIC_ENGINES_MAX) {
  3584. num_devices = MIC_ENGINES_MAX;
  3585. }
  3586. // fill in the list of devices that can be used for offloading
  3587. env_var = getenv("OFFLOAD_DEVICES");
  3588. if (env_var != 0) {
  3589. if (strcasecmp(env_var, "none") != 0) {
  3590. // value is composed of comma separated physical device indexes
  3591. char *buf = strdup(env_var);
  3592. char *str, *ptr;
  3593. for (str = strtok_r(buf, ",", &ptr); str != 0;
  3594. str = strtok_r(0, ",", &ptr)) {
  3595. // convert string to an int
  3596. int64_t num;
  3597. if (!__offload_parse_int_string(str, num)) {
  3598. LIBOFFLOAD_ERROR(c_mic_init5);
  3599. // fallback to using all installed devices
  3600. devices.reset();
  3601. for (int i = 0; i < num_devices; i++) {
  3602. devices.set(i);
  3603. }
  3604. break;
  3605. }
  3606. if (num < 0 || num >= num_devices) {
  3607. LIBOFFLOAD_ERROR(c_mic_init6, num);
  3608. continue;
  3609. }
  3610. devices.set(num);
  3611. }
  3612. free(buf);
  3613. }
  3614. }
  3615. else {
  3616. // use all available devices
  3617. for (int i = 0; i < num_devices; i++) {
  3618. COIENGINE engine;
  3619. res = COI::EngineGetHandle(COI_ISA_KNC, i, &engine);
  3620. if (res == COI_SUCCESS) {
  3621. devices.set(i);
  3622. }
  3623. }
  3624. }
  3625. mic_engines_total = devices.count();
  3626. // no need to continue if there are no devices to offload to
  3627. if (mic_engines_total <= 0) {
  3628. return;
  3629. }
  3630. // initialize indexes for available devices
  3631. mic_engines = new Engine[mic_engines_total];
  3632. for (int p_idx = 0, l_idx = 0; p_idx < num_devices; p_idx++) {
  3633. if (devices[p_idx]) {
  3634. mic_engines[l_idx].set_indexes(l_idx, p_idx);
  3635. l_idx++;
  3636. }
  3637. }
  3638. // library search path for device binaries
  3639. env_var = getenv("MIC_LD_LIBRARY_PATH");
  3640. if (env_var != 0) {
  3641. mic_library_path = strdup(env_var);
  3642. }
  3643. // memory size reserved for COI buffers
  3644. env_var = getenv("MIC_BUFFERSIZE");
  3645. if (env_var != 0) {
  3646. uint64_t new_size;
  3647. if (__offload_parse_size_string(env_var, new_size)) {
  3648. mic_buffer_size = new_size;
  3649. }
  3650. else {
  3651. LIBOFFLOAD_ERROR(c_invalid_env_var_value, "MIC_BUFFERSIZE");
  3652. }
  3653. }
  3654. // determine stacksize for the pipeline on the device
  3655. env_var = getenv("MIC_STACKSIZE");
  3656. if (env_var != 0 && *env_var != '\0') {
  3657. uint64_t new_size;
  3658. if (__offload_parse_size_string(env_var, new_size) &&
  3659. (new_size >= 16384) && ((new_size & 4095) == 0)) {
  3660. mic_stack_size = new_size;
  3661. }
  3662. else {
  3663. LIBOFFLOAD_ERROR(c_mic_init3);
  3664. }
  3665. }
  3666. // proxy I/O
  3667. env_var = getenv("MIC_PROXY_IO");
  3668. if (env_var != 0 && *env_var != '\0') {
  3669. int64_t new_val;
  3670. if (__offload_parse_int_string(env_var, new_val)) {
  3671. mic_proxy_io = new_val;
  3672. }
  3673. else {
  3674. LIBOFFLOAD_ERROR(c_invalid_env_var_int_value, "MIC_PROXY_IO");
  3675. }
  3676. }
  3677. env_var = getenv("MIC_PROXY_FS_ROOT");
  3678. if (env_var != 0 && *env_var != '\0') {
  3679. mic_proxy_fs_root = strdup(env_var);
  3680. }
  3681. // Prepare environment for the target process using the following
  3682. // rules
  3683. // - If MIC_ENV_PREFIX is set then any environment variable on the
  3684. // host which has that prefix are copied to the device without
  3685. // the prefix.
  3686. // All other host environment variables are ignored.
  3687. // - If MIC_ENV_PREFIX is not set or if MIC_ENV_PREFIX="" then host
  3688. // environment is duplicated.
  3689. env_var = getenv("MIC_ENV_PREFIX");
  3690. if (env_var != 0 && *env_var != '\0') {
  3691. mic_env_vars.set_prefix(env_var);
  3692. int len = strlen(env_var);
  3693. for (int i = 0; environ[i] != 0; i++) {
  3694. if (strncmp(environ[i], env_var, len) == 0 &&
  3695. strncmp(environ[i], "MIC_LD_LIBRARY_PATH", 19) != 0 &&
  3696. environ[i][len] != '=') {
  3697. mic_env_vars.analyze_env_var(environ[i]);
  3698. }
  3699. }
  3700. }
  3701. // create key for thread data
  3702. if (thread_key_create(&mic_thread_key, Engine::destroy_thread_data)) {
  3703. LIBOFFLOAD_ERROR(c_mic_init4, errno);
  3704. return;
  3705. }
  3706. // cpu frequency
  3707. cpu_frequency = COI::PerfGetCycleFrequency();
  3708. env_var = getenv(mic_use_2mb_buffers_envname);
  3709. if (env_var != 0 && *env_var != '\0') {
  3710. uint64_t new_size;
  3711. if (__offload_parse_size_string(env_var, new_size)) {
  3712. __offload_use_2mb_buffers = new_size;
  3713. }
  3714. else {
  3715. LIBOFFLOAD_ERROR(c_invalid_env_var_value,
  3716. mic_use_2mb_buffers_envname);
  3717. }
  3718. }
  3719. env_var = getenv(mic_use_async_buffer_write_envname);
  3720. if (env_var != 0 && *env_var != '\0') {
  3721. uint64_t new_size;
  3722. if (__offload_parse_size_string(env_var, new_size)) {
  3723. __offload_use_async_buffer_write = new_size;
  3724. }
  3725. }
  3726. env_var = getenv(mic_use_async_buffer_read_envname);
  3727. if (env_var != 0 && *env_var != '\0') {
  3728. uint64_t new_size;
  3729. if (__offload_parse_size_string(env_var, new_size)) {
  3730. __offload_use_async_buffer_read = new_size;
  3731. }
  3732. }
  3733. // mic initialization type
  3734. env_var = getenv(offload_init_envname);
  3735. if (env_var != 0 && *env_var != '\0') {
  3736. if (strcmp(env_var, "on_offload") == 0) {
  3737. __offload_init_type = c_init_on_offload;
  3738. }
  3739. else if (strcmp(env_var, "on_offload_all") == 0) {
  3740. __offload_init_type = c_init_on_offload_all;
  3741. }
  3742. #ifndef TARGET_WINNT
  3743. else if (strcmp(env_var, "on_start") == 0) {
  3744. __offload_init_type = c_init_on_start;
  3745. }
  3746. #endif // TARGET_WINNT
  3747. else {
  3748. LIBOFFLOAD_ERROR(c_invalid_env_var_value, offload_init_envname);
  3749. }
  3750. }
  3751. // active wait
  3752. env_var = getenv(offload_active_wait_envname);
  3753. if (env_var != 0 && *env_var != '\0') {
  3754. int64_t new_val;
  3755. if (__offload_parse_int_string(env_var, new_val)) {
  3756. __offload_active_wait = new_val;
  3757. }
  3758. else {
  3759. LIBOFFLOAD_ERROR(c_invalid_env_var_int_value,
  3760. offload_active_wait_envname);
  3761. }
  3762. }
  3763. // omp device num
  3764. env_var = getenv(omp_device_num_envname);
  3765. if (env_var != 0 && *env_var != '\0') {
  3766. int64_t new_val;
  3767. if (__offload_parse_int_string(env_var, new_val) && new_val >= 0) {
  3768. __omp_device_num = new_val;
  3769. }
  3770. else {
  3771. LIBOFFLOAD_ERROR(c_omp_invalid_device_num_env,
  3772. omp_device_num_envname);
  3773. }
  3774. }
  3775. // init ORSL
  3776. ORSL::init();
  3777. }
  3778. extern int __offload_init_library(void)
  3779. {
  3780. // do one time intialization
  3781. static OffloadOnceControl ctrl = OFFLOAD_ONCE_CONTROL_INIT;
  3782. __offload_run_once(&ctrl, __offload_init_library_once);
  3783. // offload is available if COI is available and the number of devices > 0
  3784. bool is_available = COI::is_available && (mic_engines_total > 0);
  3785. // register pending libraries if there are any
  3786. if (is_available && __target_libs) {
  3787. mutex_locker_t locker(__target_libs_lock);
  3788. for (TargetImageList::iterator it = __target_libs_list.begin();
  3789. it != __target_libs_list.end(); it++) {
  3790. // Register library in COI
  3791. COI::ProcessRegisterLibraries(1, &it->data, &it->size,
  3792. &it->origin, &it->offset);
  3793. // add lib to all engines
  3794. for (int i = 0; i < mic_engines_total; i++) {
  3795. mic_engines[i].add_lib(*it);
  3796. }
  3797. }
  3798. __target_libs = false;
  3799. __target_libs_list.clear();
  3800. }
  3801. return is_available;
  3802. }
  3803. extern "C" void __offload_register_image(const void *target_image)
  3804. {
  3805. const struct Image *image = static_cast<const struct Image*>(target_image);
  3806. // decode image
  3807. const char *name = image->data;
  3808. const void *data = image->data + strlen(image->data) + 1;
  3809. uint64_t size = image->size;
  3810. const char *origin = 0;
  3811. uint64_t offset = 0;
  3812. // our actions depend on the image type
  3813. const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
  3814. switch (hdr->e_type) {
  3815. case ET_EXEC:
  3816. // Each offload application is supposed to have only one target
  3817. // image representing target executable.
  3818. // No thread synchronization is required here as the initialization
  3819. // code is always executed in a single thread.
  3820. if (__target_exe != 0) {
  3821. LIBOFFLOAD_ERROR(c_multiple_target_exes);
  3822. exit(1);
  3823. }
  3824. __target_exe = new TargetImage(name, data, size, origin, offset);
  3825. // Registration code for execs is always called from the context
  3826. // of main and thus we can safely call any function here,
  3827. // including LoadLibrary API on windows. This is the place where
  3828. // we do the offload library initialization.
  3829. if (__offload_init_library()) {
  3830. // initialize engine if init_type is on_start
  3831. if (__offload_init_type == c_init_on_start) {
  3832. for (int i = 0; i < mic_engines_total; i++) {
  3833. mic_engines[i].init();
  3834. }
  3835. }
  3836. }
  3837. break;
  3838. case ET_DYN:
  3839. // Registration code for libraries is called from the DllMain
  3840. // context (on windows) and thus we cannot do anything usefull
  3841. // here. So we just add it to the list of pending libraries for
  3842. // the later use.
  3843. __target_libs_lock.lock();
  3844. __target_libs = true;
  3845. __target_libs_list.push_back(TargetImage(name, data, size,
  3846. origin, offset));
  3847. __target_libs_lock.unlock();
  3848. break;
  3849. default:
  3850. // something is definitely wrong, issue an error and exit
  3851. LIBOFFLOAD_ERROR(c_unknown_binary_type);
  3852. exit(1);
  3853. }
  3854. }
  3855. extern "C" void __offload_unregister_image(const void *target_image)
  3856. {
  3857. // Target image is packed as follows:
  3858. // 8 bytes - size of the target binary
  3859. // null-terminated string - binary name
  3860. // <size> bytes - binary contents
  3861. const struct Image {
  3862. int64_t size;
  3863. char data[];
  3864. } *image = static_cast<const struct Image*>(target_image);
  3865. // decode image
  3866. const char *name = image->data;
  3867. const void *data = image->data + strlen(image->data) + 1;
  3868. // our actions depend on the image type
  3869. const Elf64_Ehdr *hdr = static_cast<const Elf64_Ehdr*>(data);
  3870. if (hdr->e_type == ET_EXEC) {
  3871. // We are executing exec's desctructors.
  3872. // It is time to do a library cleanup.
  3873. if (timer_enabled) {
  3874. Offload_Timer_Print();
  3875. }
  3876. #ifdef MYO_SUPPORT
  3877. __offload_myoFini();
  3878. #endif // MYO_SUPPORT
  3879. __offload_fini_library();
  3880. }
  3881. }
  3882. // Runtime trace interface for user programs
  3883. void __offload_console_trace(int level)
  3884. {
  3885. console_enabled = level;
  3886. }
  3887. // User-visible offload API
  3888. int _Offload_number_of_devices(void)
  3889. {
  3890. __offload_init_library();
  3891. return mic_engines_total;
  3892. }
  3893. int _Offload_get_device_number(void)
  3894. {
  3895. return -1;
  3896. }
  3897. int _Offload_get_physical_device_number(void)
  3898. {
  3899. return -1;
  3900. }
  3901. int _Offload_signaled(int index, void *signal)
  3902. {
  3903. __offload_init_library();
  3904. // check index value
  3905. if (index < 0 || mic_engines_total <= 0) {
  3906. LIBOFFLOAD_ERROR(c_offload_signaled1, index);
  3907. LIBOFFLOAD_ABORT;
  3908. }
  3909. // find associated async task
  3910. OffloadDescriptor *task =
  3911. mic_engines[index % mic_engines_total].find_signal(signal, false);
  3912. if (task == 0) {
  3913. LIBOFFLOAD_ERROR(c_offload_signaled2, signal);
  3914. LIBOFFLOAD_ABORT;
  3915. }
  3916. return task->is_signaled();
  3917. }
  3918. void _Offload_report(int val)
  3919. {
  3920. if (val == OFFLOAD_REPORT_ON ||
  3921. val == OFFLOAD_REPORT_OFF) {
  3922. offload_report_enabled = val;
  3923. }
  3924. }
  3925. // IDB support
  3926. int __dbg_is_attached = 0;
  3927. int __dbg_target_id = -1;
  3928. pid_t __dbg_target_so_pid = -1;
  3929. char __dbg_target_exe_name[MAX_TARGET_NAME] = {0};
  3930. const int __dbg_api_major_version = 1;
  3931. const int __dbg_api_minor_version = 0;
  3932. void __dbg_target_so_loaded()
  3933. {
  3934. }
  3935. void __dbg_target_so_unloaded()
  3936. {
  3937. }