123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472 |
- /* Overlay manager for SPU.
- Copyright (C) 2006-2015 Free Software Foundation, Inc.
- This file is part of the GNU Binutils.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston,
- MA 02110-1301, USA. */
- /* MFC DMA defn's. */
- #define MFC_GET_CMD 0x40
- #define MFC_MAX_DMA_SIZE 0x4000
- #define MFC_TAG_UPDATE_ALL 2
- #define MFC_TAG_ID 0
- /* Register usage. */
- #define reserved1 $75
- #define parm $75
- #define tab1 reserved1
- #define tab2 reserved1
- #define vma reserved1
- #define oldvma reserved1
- #define newmask reserved1
- #define map reserved1
- #define reserved2 $76
- #define off1 reserved2
- #define off2 reserved2
- #define present1 reserved2
- #define present2 reserved2
- #define sz reserved2
- #define cmp reserved2
- #define add64 reserved2
- #define cgbits reserved2
- #define off3 reserved2
- #define off4 reserved2
- #define addr4 reserved2
- #define off5 reserved2
- #define tagstat reserved2
- #define reserved3 $77
- #define size1 reserved3
- #define size2 reserved3
- #define rv3 reserved3
- #define ealo reserved3
- #define cmd reserved3
- #define off64 reserved3
- #define tab3 reserved3
- #define tab4 reserved3
- #define tab5 reserved3
- #define reserved4 $78
- #define ovl reserved4
- #define rv2 reserved4
- #define rv5 reserved4
- #define cgshuf reserved4
- #define newovl reserved4
- #define irqtmp1 reserved4
- #define irqtmp2 reserved4
- #define reserved5 $79
- #define target reserved5
- #define save1 $74
- #define rv4 save1
- #define rv7 save1
- #define tagid save1
- #define maxsize save1
- #define pbyte save1
- #define pbit save1
- #define save2 $73
- #define cur save2
- #define rv6 save2
- #define osize save2
- #define zovl save2
- #define oldovl save2
- #define newvma save2
- #define save3 $72
- #define rv1 save3
- #define ea64 save3
- #define buf3 save3
- #define genwi save3
- #define newmap save3
- #define oldmask save3
- #define save4 $71
- #define irq_stat save4
- .text
- .align 4
- .type __rv_pattern, @object
- .size __rv_pattern, 16
- __rv_pattern:
- .word 0x00010203, 0x10111213, 0x80808080, 0x80808080
- .type __cg_pattern, @object
- .size __cg_pattern, 16
- __cg_pattern:
- .word 0x04050607, 0x80808080, 0x80808080, 0x80808080
- .type __ovly_current, @object
- .size __ovly_current, 16
- __ovly_current:
- .space 16
- /*
- * __ovly_return - stub for returning from overlay functions.
- *
- * On entry the four slots of $lr are:
- * __ovly_return, prev ovl index, caller return addr, undefined.
- *
- * Load the previous overlay and jump to the caller return address.
- * Updates __ovly_current.
- */
- .align 4
- .global __ovly_return
- .type __ovly_return, @function
- __ovly_return:
- ila tab1, _ovly_table - 16 # 0,2 0
- shlqbyi ovl, $lr, 4 # 1,4 0
- #nop
- shlqbyi target, $lr, 8 # 1,4 1
- #nop; lnop
- #nop; lnop
- shli off1, ovl, 4 # 0,4 4
- #lnop
- #nop
- hbr ovly_ret9, target # 1,15 5
- #nop; lnop
- #nop; lnop
- #nop
- lqx vma, tab1, off1 # 1,6 8
- #ifdef OVLY_IRQ_SAVE
- nop
- stqd save4, -64($sp) # 1,6 9
- #else
- #nop; lnop
- #endif
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop
- rotqbyi size1, vma, 4 # 1,4 14
- #nop
- stqd save3, -48($sp) # 1,6 15
- #nop
- stqd save2, -32($sp) # 1,6 16
- #nop
- stqd save1, -16($sp) # 1,6 17
- andi present1, size1, 1 # 0,2 18
- stqr ovl, __ovly_current # 1,6 18
- #nop; lnop
- #nop
- brz present1, do_load # 1,4 20
- ovly_ret9:
- #nop
- bi target # 1,4 21
- /*
- * __ovly_load - copy an overlay partion to local store.
- *
- * On entry $75 points to a word consisting of the overlay index in
- * the top 14 bits, and the target address in the bottom 18 bits.
- *
- * Sets up $lr to return via __ovly_return. If $lr is already set
- * to return via __ovly_return, don't change it. In that case we
- * have a tail call from one overlay function to another.
- * Updates __ovly_current.
- */
- .align 3
- .global __ovly_load
- .type __ovly_load, @function
- __ovly_load:
- #if OVL_STUB_SIZE == 8
- ########
- #nop
- lqd target, 0(parm) # 1,6 -11
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop
- rotqby target, target, parm # 1,4 -5
- ila tab2, _ovly_table - 16 # 0,2 -4
- stqd save3, -48($sp) # 1,6 -4
- #nop
- stqd save2, -32($sp) # 1,6 -3
- #nop
- stqd save1, -16($sp) # 1,6 -2
- rotmi ovl, target, -18 # 0,4 -1
- hbr ovly_load9, target # 1,15 -1
- ila rv1, __ovly_return # 0,2 0
- #lnop
- #nop; lnop
- #nop
- lqr cur, __ovly_current # 1,6 2
- shli off2, ovl, 4 # 0,4 3
- stqr ovl, __ovly_current # 1,6 3
- ceq rv2, $lr, rv1 # 0,2 4
- lqr rv3, __rv_pattern # 1,6 4
- #nop; lnop
- #nop; lnop
- #nop
- lqx vma, tab2, off2 # 1,6 7
- ########
- #else /* OVL_STUB_SIZE == 16 */
- ########
- ila tab2, _ovly_table - 16 # 0,2 0
- stqd save3, -48($sp) # 1,6 0
- ila rv1, __ovly_return # 0,2 1
- stqd save2, -32($sp) # 1,6 1
- shli off2, ovl, 4 # 0,4 2
- lqr cur, __ovly_current # 1,6 2
- nop
- stqr ovl, __ovly_current # 1,6 3
- ceq rv2, $lr, rv1 # 0,2 4
- lqr rv3, __rv_pattern # 1,6 4
- #nop
- hbr ovly_load9, target # 1,15 5
- #nop
- lqx vma, tab2, off2 # 1,6 6
- #nop
- stqd save1, -16($sp) # 1,6 7
- ########
- #endif
- #nop; lnop
- #nop; lnop
- #nop
- shufb rv4, rv1, cur, rv3 # 1,4 10
- #nop
- fsmb rv5, rv2 # 1,4 11
- #nop
- rotqmbyi rv6, $lr, -8 # 1,4 12
- #nop
- rotqbyi size2, vma, 4 # 1,4 13
- #nop
- lqd save3, -48($sp) # 1,6 14
- #nop; lnop
- or rv7, rv4, rv6 # 0,2 16
- lqd save2, -32($sp) # 1,6 16
- andi present2, size2, 1 # 0,2 17
- #ifdef OVLY_IRQ_SAVE
- stqd save4, -64($sp) # 1,6 17
- #else
- lnop # 1,0 17
- #endif
- selb $lr, rv7, $lr, rv5 # 0,2 18
- lqd save1, -16($sp) # 1,6 18
- #nop
- brz present2, do_load # 1,4 19
- ovly_load9:
- #nop
- bi target # 1,4 20
- /* If we get here, we are about to load a new overlay.
- * "vma" contains the relevant entry from _ovly_table[].
- * extern struct {
- * u32 vma;
- * u32 size;
- * u32 file_offset;
- * u32 buf;
- * } _ovly_table[];
- */
- .align 3
- .global __ovly_load_event
- .type __ovly_load_event, @function
- __ovly_load_event:
- do_load:
- #ifdef OVLY_IRQ_SAVE
- ila irqtmp1, do_load10 # 0,2 -5
- rotqbyi sz, vma, 8 # 1,4 -5
- #nop
- rdch irq_stat, $SPU_RdMachStat # 1,6 -4
- #nop
- bid irqtmp1 # 1,4 -3
- do_load10:
- nop
- #else
- #nop
- rotqbyi sz, vma, 8 # 1,4 0
- #endif
- rotqbyi osize, vma, 4 # 1,4 1
- #nop
- lqa ea64, _EAR_ # 1,6 2
- #nop
- lqr cgshuf, __cg_pattern # 1,6 3
- /* We could predict the branch at the end of this loop by adding a few
- instructions, and there are plenty of free cycles to do so without
- impacting loop execution time. However, it doesn't make a great
- deal of sense since we need to wait for the dma to complete anyway. */
- __ovly_xfer_loop:
- #nop
- rotqmbyi off64, sz, -4 # 1,4 4
- #nop; lnop
- #nop; lnop
- #nop; lnop
- cg cgbits, ea64, off64 # 0,2 8
- #lnop
- #nop; lnop
- #nop
- shufb add64, cgbits, cgbits, cgshuf # 1,4 10
- #nop; lnop
- #nop; lnop
- #nop; lnop
- addx add64, ea64, off64 # 0,2 14
- #lnop
- ila maxsize, MFC_MAX_DMA_SIZE # 0,2 15
- lnop
- ori ea64, add64, 0 # 0,2 16
- rotqbyi ealo, add64, 4 # 1,4 16
- cgt cmp, osize, maxsize # 0,2 17
- wrch $MFC_LSA, vma # 1,6 17
- #nop; lnop
- selb sz, osize, maxsize, cmp # 0,2 19
- wrch $MFC_EAH, ea64 # 1,6 19
- ila tagid, MFC_TAG_ID # 0,2 20
- wrch $MFC_EAL, ealo # 1,6 20
- ila cmd, MFC_GET_CMD # 0,2 21
- wrch $MFC_Size, sz # 1,6 21
- sf osize, sz, osize # 0,2 22
- wrch $MFC_TagId, tagid # 1,6 22
- a vma, vma, sz # 0,2 23
- wrch $MFC_Cmd, cmd # 1,6 23
- #nop
- brnz osize, __ovly_xfer_loop # 1,4 24
- /* Now update our data structions while waiting for DMA to complete.
- Low bit of .size needs to be cleared on the _ovly_table entry
- corresponding to the evicted overlay, and set on the entry for the
- newly loaded overlay. Note that no overlay may in fact be evicted
- as _ovly_buf_table[] starts with all zeros. Don't zap .size entry
- for zero index! Also of course update the _ovly_buf_table entry. */
- #nop
- lqr newovl, __ovly_current # 1,6 25
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- shli off3, newovl, 4 # 0,4 31
- #lnop
- ila tab3, _ovly_table - 16 # 0,2 32
- #lnop
- #nop
- fsmbi pbyte, 0x100 # 1,4 33
- #nop; lnop
- #nop
- lqx vma, tab3, off3 # 1,6 35
- #nop; lnop
- andi pbit, pbyte, 1 # 0,2 37
- lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- or newvma, vma, pbit # 0,2 41
- rotqbyi buf3, vma, 12 # 1,4 41
- #nop; lnop
- #nop
- stqx newvma, tab3, off3 # 1,6 43
- #nop; lnop
- shli off4, buf3, 2 # 1,4 45
- #lnop
- ila tab4, _ovly_buf_table - 4 # 0,2 46
- #lnop
- #nop; lnop
- #nop; lnop
- #nop
- lqx map, tab4, off4 # 1,6 49
- #nop
- cwx genwi, tab4, off4 # 1,4 50
- a addr4, tab4, off4 # 0,2 51
- #lnop
- #nop; lnop
- #nop; lnop
- #nop; lnop
- #nop
- rotqby oldovl, map, addr4 # 1,4 55
- #nop
- shufb newmap, newovl, map, genwi # 0,4 56
- #if MFC_TAG_ID < 16
- ila newmask, 1 << MFC_TAG_ID # 0,2 57
- #else
- ilhu newmask, 1 << (MFC_TAG_ID - 16) # 0,2 57
- #endif
- #lnop
- #nop; lnop
- #nop; lnop
- stqd newmap, 0(addr4) # 1,6 60
- /* Save app's tagmask, wait for DMA complete, restore mask. */
- ila tagstat, MFC_TAG_UPDATE_ALL # 0,2 61
- rdch oldmask, $MFC_RdTagMask # 1,6 61
- #nop
- wrch $MFC_WrTagMask, newmask # 1,6 62
- #nop
- wrch $MFC_WrTagUpdate, tagstat # 1,6 63
- #nop
- rdch tagstat, $MFC_RdTagStat # 1,6 64
- #nop
- sync # 1,4 65
- /* Any hint prior to the sync is lost. A hint here allows the branch
- to complete 15 cycles after the hint. With no hint the branch will
- take 18 or 19 cycles. */
- ila tab5, _ovly_table - 16 # 0,2 66
- hbr do_load99, target # 1,15 66
- shli off5, oldovl, 4 # 0,4 67
- wrch $MFC_WrTagMask, oldmask # 1,6 67
- ceqi zovl, oldovl, 0 # 0,2 68
- #lnop
- #nop; lnop
- #nop
- fsm zovl, zovl # 1,4 70
- #nop
- lqx oldvma, tab5, off5 # 1,6 71
- #nop
- lqd save3, -48($sp) # 1,6 72
- #nop; lnop
- andc pbit, pbit, zovl # 0,2 74
- lqd save2, -32($sp) # 1,6 74
- #ifdef OVLY_IRQ_SAVE
- ila irqtmp2, do_load90 # 0,2 75
- #lnop
- andi irq_stat, irq_stat, 1 # 0,2 76
- #lnop
- #else
- #nop; lnop
- #nop; lnop
- #endif
- andc oldvma, oldvma, pbit # 0,2 77
- lqd save1, -16($sp) # 1,6 77
- nop # 0,0 78
- #lnop
- #nop
- stqx oldvma, tab5, off5 # 1,6 79
- #nop
- #ifdef OVLY_IRQ_SAVE
- binze irq_stat, irqtmp2 # 1,4 80
- do_load90:
- #nop
- lqd save4, -64($sp) # 1,6 84
- #else
- #nop; lnop
- #endif
- .global _ovly_debug_event
- .type _ovly_debug_event, @function
- _ovly_debug_event:
- nop
- /* Branch to target address. */
- do_load99:
- bi target # 1,4 81/85
- .size __ovly_load, . - __ovly_load
|