matha.s 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. /*
  2. ===========================================================================
  3. Copyright (C) 1999-2005 Id Software, Inc.
  4. This file is part of Quake III Arena source code.
  5. Quake III Arena source code is free software; you can redistribute it
  6. and/or modify it under the terms of the GNU General Public License as
  7. published by the Free Software Foundation; either version 2 of the License,
  8. or (at your option) any later version.
  9. Quake III Arena source code is distributed in the hope that it will be
  10. useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. GNU General Public License for more details.
  13. You should have received a copy of the GNU General Public License
  14. along with Foobar; if not, write to the Free Software
  15. Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  16. ===========================================================================
  17. */
  18. //
  19. // math.s
  20. // x86 assembly-language math routines.
  21. #define GLQUAKE 1 // don't include unneeded defs
  22. #include "qasm.h"
  23. #if id386
  24. .data
  25. .align 4
  26. Ljmptab: .long Lcase0, Lcase1, Lcase2, Lcase3
  27. .long Lcase4, Lcase5, Lcase6, Lcase7
  28. .text
  29. // TODO: rounding needed?
  30. // stack parameter offset
  31. #define val 4
  32. .globl C(Invert24To16)
  33. C(Invert24To16):
  34. movl val(%esp),%ecx
  35. movl $0x100,%edx // 0x10000000000 as dividend
  36. cmpl %edx,%ecx
  37. jle LOutOfRange
  38. subl %eax,%eax
  39. divl %ecx
  40. ret
  41. LOutOfRange:
  42. movl $0xFFFFFFFF,%eax
  43. ret
  44. #if 0
  45. #define in 4
  46. #define out 8
  47. .align 2
  48. .globl C(TransformVector)
  49. C(TransformVector):
  50. movl in(%esp),%eax
  51. movl out(%esp),%edx
  52. flds (%eax) // in[0]
  53. fmuls C(vright) // in[0]*vright[0]
  54. flds (%eax) // in[0] | in[0]*vright[0]
  55. fmuls C(vup) // in[0]*vup[0] | in[0]*vright[0]
  56. flds (%eax) // in[0] | in[0]*vup[0] | in[0]*vright[0]
  57. fmuls C(vpn) // in[0]*vpn[0] | in[0]*vup[0] | in[0]*vright[0]
  58. flds 4(%eax) // in[1] | ...
  59. fmuls C(vright)+4 // in[1]*vright[1] | ...
  60. flds 4(%eax) // in[1] | in[1]*vright[1] | ...
  61. fmuls C(vup)+4 // in[1]*vup[1] | in[1]*vright[1] | ...
  62. flds 4(%eax) // in[1] | in[1]*vup[1] | in[1]*vright[1] | ...
  63. fmuls C(vpn)+4 // in[1]*vpn[1] | in[1]*vup[1] | in[1]*vright[1] | ...
  64. fxch %st(2) // in[1]*vright[1] | in[1]*vup[1] | in[1]*vpn[1] | ...
  65. faddp %st(0),%st(5) // in[1]*vup[1] | in[1]*vpn[1] | ...
  66. faddp %st(0),%st(3) // in[1]*vpn[1] | ...
  67. faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
  68. flds 8(%eax) // in[2] | ...
  69. fmuls C(vright)+8 // in[2]*vright[2] | ...
  70. flds 8(%eax) // in[2] | in[2]*vright[2] | ...
  71. fmuls C(vup)+8 // in[2]*vup[2] | in[2]*vright[2] | ...
  72. flds 8(%eax) // in[2] | in[2]*vup[2] | in[2]*vright[2] | ...
  73. fmuls C(vpn)+8 // in[2]*vpn[2] | in[2]*vup[2] | in[2]*vright[2] | ...
  74. fxch %st(2) // in[2]*vright[2] | in[2]*vup[2] | in[2]*vpn[2] | ...
  75. faddp %st(0),%st(5) // in[2]*vup[2] | in[2]*vpn[2] | ...
  76. faddp %st(0),%st(3) // in[2]*vpn[2] | ...
  77. faddp %st(0),%st(1) // vpn_accum | vup_accum | vright_accum
  78. fstps 8(%edx) // out[2]
  79. fstps 4(%edx) // out[1]
  80. fstps (%edx) // out[0]
  81. ret
  82. #endif
  83. #define EMINS 4+4
  84. #define EMAXS 4+8
  85. #define P 4+12
  86. .align 2
  87. .globl C(BoxOnPlaneSide)
  88. C(BoxOnPlaneSide):
  89. pushl %ebx
  90. movl P(%esp),%edx
  91. movl EMINS(%esp),%ecx
  92. xorl %eax,%eax
  93. movl EMAXS(%esp),%ebx
  94. movb pl_signbits(%edx),%al
  95. cmpb $8,%al
  96. jge Lerror
  97. flds pl_normal(%edx) // p->normal[0]
  98. fld %st(0) // p->normal[0] | p->normal[0]
  99. // bk000422 - warning: missing prefix `*' in absolute indirect address, maybe misassembled!
  100. // bk001129 - fix from Andrew Henderson, was: Ljmptab(,%eax,4)
  101. jmp *Ljmptab(,%eax,4)
  102. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  103. //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  104. Lcase0:
  105. fmuls (%ebx) // p->normal[0]*emaxs[0] | p->normal[0]
  106. flds pl_normal+4(%edx) // p->normal[1] | p->normal[0]*emaxs[0] |
  107. // p->normal[0]
  108. fxch %st(2) // p->normal[0] | p->normal[0]*emaxs[0] |
  109. // p->normal[1]
  110. fmuls (%ecx) // p->normal[0]*emins[0] |
  111. // p->normal[0]*emaxs[0] | p->normal[1]
  112. fxch %st(2) // p->normal[1] | p->normal[0]*emaxs[0] |
  113. // p->normal[0]*emins[0]
  114. fld %st(0) // p->normal[1] | p->normal[1] |
  115. // p->normal[0]*emaxs[0] |
  116. // p->normal[0]*emins[0]
  117. fmuls 4(%ebx) // p->normal[1]*emaxs[1] | p->normal[1] |
  118. // p->normal[0]*emaxs[0] |
  119. // p->normal[0]*emins[0]
  120. flds pl_normal+8(%edx) // p->normal[2] | p->normal[1]*emaxs[1] |
  121. // p->normal[1] | p->normal[0]*emaxs[0] |
  122. // p->normal[0]*emins[0]
  123. fxch %st(2) // p->normal[1] | p->normal[1]*emaxs[1] |
  124. // p->normal[2] | p->normal[0]*emaxs[0] |
  125. // p->normal[0]*emins[0]
  126. fmuls 4(%ecx) // p->normal[1]*emins[1] |
  127. // p->normal[1]*emaxs[1] |
  128. // p->normal[2] | p->normal[0]*emaxs[0] |
  129. // p->normal[0]*emins[0]
  130. fxch %st(2) // p->normal[2] | p->normal[1]*emaxs[1] |
  131. // p->normal[1]*emins[1] |
  132. // p->normal[0]*emaxs[0] |
  133. // p->normal[0]*emins[0]
  134. fld %st(0) // p->normal[2] | p->normal[2] |
  135. // p->normal[1]*emaxs[1] |
  136. // p->normal[1]*emins[1] |
  137. // p->normal[0]*emaxs[0] |
  138. // p->normal[0]*emins[0]
  139. fmuls 8(%ebx) // p->normal[2]*emaxs[2] |
  140. // p->normal[2] |
  141. // p->normal[1]*emaxs[1] |
  142. // p->normal[1]*emins[1] |
  143. // p->normal[0]*emaxs[0] |
  144. // p->normal[0]*emins[0]
  145. fxch %st(5) // p->normal[0]*emins[0] |
  146. // p->normal[2] |
  147. // p->normal[1]*emaxs[1] |
  148. // p->normal[1]*emins[1] |
  149. // p->normal[0]*emaxs[0] |
  150. // p->normal[2]*emaxs[2]
  151. faddp %st(0),%st(3) //p->normal[2] |
  152. // p->normal[1]*emaxs[1] |
  153. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  154. // p->normal[0]*emaxs[0] |
  155. // p->normal[2]*emaxs[2]
  156. fmuls 8(%ecx) //p->normal[2]*emins[2] |
  157. // p->normal[1]*emaxs[1] |
  158. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  159. // p->normal[0]*emaxs[0] |
  160. // p->normal[2]*emaxs[2]
  161. fxch %st(1) //p->normal[1]*emaxs[1] |
  162. // p->normal[2]*emins[2] |
  163. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  164. // p->normal[0]*emaxs[0] |
  165. // p->normal[2]*emaxs[2]
  166. faddp %st(0),%st(3) //p->normal[2]*emins[2] |
  167. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  168. // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
  169. // p->normal[2]*emaxs[2]
  170. fxch %st(3) //p->normal[2]*emaxs[2] +
  171. // p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  172. // p->normal[0]*emaxs[0]+p->normal[1]*emaxs[1]|
  173. // p->normal[2]*emins[2]
  174. faddp %st(0),%st(2) //p->normal[1]*emins[1]+p->normal[0]*emins[0]|
  175. // dist1 | p->normal[2]*emins[2]
  176. jmp LSetSides
  177. //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  178. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  179. Lcase1:
  180. fmuls (%ecx) // emins[0]
  181. flds pl_normal+4(%edx)
  182. fxch %st(2)
  183. fmuls (%ebx) // emaxs[0]
  184. fxch %st(2)
  185. fld %st(0)
  186. fmuls 4(%ebx) // emaxs[1]
  187. flds pl_normal+8(%edx)
  188. fxch %st(2)
  189. fmuls 4(%ecx) // emins[1]
  190. fxch %st(2)
  191. fld %st(0)
  192. fmuls 8(%ebx) // emaxs[2]
  193. fxch %st(5)
  194. faddp %st(0),%st(3)
  195. fmuls 8(%ecx) // emins[2]
  196. fxch %st(1)
  197. faddp %st(0),%st(3)
  198. fxch %st(3)
  199. faddp %st(0),%st(2)
  200. jmp LSetSides
  201. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  202. //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  203. Lcase2:
  204. fmuls (%ebx) // emaxs[0]
  205. flds pl_normal+4(%edx)
  206. fxch %st(2)
  207. fmuls (%ecx) // emins[0]
  208. fxch %st(2)
  209. fld %st(0)
  210. fmuls 4(%ecx) // emins[1]
  211. flds pl_normal+8(%edx)
  212. fxch %st(2)
  213. fmuls 4(%ebx) // emaxs[1]
  214. fxch %st(2)
  215. fld %st(0)
  216. fmuls 8(%ebx) // emaxs[2]
  217. fxch %st(5)
  218. faddp %st(0),%st(3)
  219. fmuls 8(%ecx) // emins[2]
  220. fxch %st(1)
  221. faddp %st(0),%st(3)
  222. fxch %st(3)
  223. faddp %st(0),%st(2)
  224. jmp LSetSides
  225. //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  226. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  227. Lcase3:
  228. fmuls (%ecx) // emins[0]
  229. flds pl_normal+4(%edx)
  230. fxch %st(2)
  231. fmuls (%ebx) // emaxs[0]
  232. fxch %st(2)
  233. fld %st(0)
  234. fmuls 4(%ecx) // emins[1]
  235. flds pl_normal+8(%edx)
  236. fxch %st(2)
  237. fmuls 4(%ebx) // emaxs[1]
  238. fxch %st(2)
  239. fld %st(0)
  240. fmuls 8(%ebx) // emaxs[2]
  241. fxch %st(5)
  242. faddp %st(0),%st(3)
  243. fmuls 8(%ecx) // emins[2]
  244. fxch %st(1)
  245. faddp %st(0),%st(3)
  246. fxch %st(3)
  247. faddp %st(0),%st(2)
  248. jmp LSetSides
  249. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  250. //dist2= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  251. Lcase4:
  252. fmuls (%ebx) // emaxs[0]
  253. flds pl_normal+4(%edx)
  254. fxch %st(2)
  255. fmuls (%ecx) // emins[0]
  256. fxch %st(2)
  257. fld %st(0)
  258. fmuls 4(%ebx) // emaxs[1]
  259. flds pl_normal+8(%edx)
  260. fxch %st(2)
  261. fmuls 4(%ecx) // emins[1]
  262. fxch %st(2)
  263. fld %st(0)
  264. fmuls 8(%ecx) // emins[2]
  265. fxch %st(5)
  266. faddp %st(0),%st(3)
  267. fmuls 8(%ebx) // emaxs[2]
  268. fxch %st(1)
  269. faddp %st(0),%st(3)
  270. fxch %st(3)
  271. faddp %st(0),%st(2)
  272. jmp LSetSides
  273. //dist1= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emins[2];
  274. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emaxs[2];
  275. Lcase5:
  276. fmuls (%ecx) // emins[0]
  277. flds pl_normal+4(%edx)
  278. fxch %st(2)
  279. fmuls (%ebx) // emaxs[0]
  280. fxch %st(2)
  281. fld %st(0)
  282. fmuls 4(%ebx) // emaxs[1]
  283. flds pl_normal+8(%edx)
  284. fxch %st(2)
  285. fmuls 4(%ecx) // emins[1]
  286. fxch %st(2)
  287. fld %st(0)
  288. fmuls 8(%ecx) // emins[2]
  289. fxch %st(5)
  290. faddp %st(0),%st(3)
  291. fmuls 8(%ebx) // emaxs[2]
  292. fxch %st(1)
  293. faddp %st(0),%st(3)
  294. fxch %st(3)
  295. faddp %st(0),%st(2)
  296. jmp LSetSides
  297. //dist1= p->normal[0]*emaxs[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  298. //dist2= p->normal[0]*emins[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  299. Lcase6:
  300. fmuls (%ebx) // emaxs[0]
  301. flds pl_normal+4(%edx)
  302. fxch %st(2)
  303. fmuls (%ecx) // emins[0]
  304. fxch %st(2)
  305. fld %st(0)
  306. fmuls 4(%ecx) // emins[1]
  307. flds pl_normal+8(%edx)
  308. fxch %st(2)
  309. fmuls 4(%ebx) // emaxs[1]
  310. fxch %st(2)
  311. fld %st(0)
  312. fmuls 8(%ecx) // emins[2]
  313. fxch %st(5)
  314. faddp %st(0),%st(3)
  315. fmuls 8(%ebx) // emaxs[2]
  316. fxch %st(1)
  317. faddp %st(0),%st(3)
  318. fxch %st(3)
  319. faddp %st(0),%st(2)
  320. jmp LSetSides
  321. //dist1= p->normal[0]*emins[0] + p->normal[1]*emins[1] + p->normal[2]*emins[2];
  322. //dist2= p->normal[0]*emaxs[0] + p->normal[1]*emaxs[1] + p->normal[2]*emaxs[2];
  323. Lcase7:
  324. fmuls (%ecx) // emins[0]
  325. flds pl_normal+4(%edx)
  326. fxch %st(2)
  327. fmuls (%ebx) // emaxs[0]
  328. fxch %st(2)
  329. fld %st(0)
  330. fmuls 4(%ecx) // emins[1]
  331. flds pl_normal+8(%edx)
  332. fxch %st(2)
  333. fmuls 4(%ebx) // emaxs[1]
  334. fxch %st(2)
  335. fld %st(0)
  336. fmuls 8(%ecx) // emins[2]
  337. fxch %st(5)
  338. faddp %st(0),%st(3)
  339. fmuls 8(%ebx) // emaxs[2]
  340. fxch %st(1)
  341. faddp %st(0),%st(3)
  342. fxch %st(3)
  343. faddp %st(0),%st(2)
  344. LSetSides:
  345. // sides = 0;
  346. // if (dist1 >= p->dist)
  347. // sides = 1;
  348. // if (dist2 < p->dist)
  349. // sides |= 2;
  350. faddp %st(0),%st(2) // dist1 | dist2
  351. fcomps pl_dist(%edx)
  352. xorl %ecx,%ecx
  353. fnstsw %ax
  354. fcomps pl_dist(%edx)
  355. andb $1,%ah
  356. xorb $1,%ah
  357. addb %ah,%cl
  358. fnstsw %ax
  359. andb $1,%ah
  360. addb %ah,%ah
  361. addb %ah,%cl
  362. // return sides;
  363. popl %ebx
  364. movl %ecx,%eax // return status
  365. ret
  366. Lerror:
  367. movl 1, %eax
  368. ret
  369. #endif // id386