unaligned-sh4a.h 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #ifndef __ASM_SH_UNALIGNED_SH4A_H
  3. #define __ASM_SH_UNALIGNED_SH4A_H
  4. /*
  5. * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
  6. * Support for 64-bit accesses are done through shifting and masking
  7. * relative to the endianness. Unaligned stores are not supported by the
  8. * instruction encoding, so these continue to use the packed
  9. * struct.
  10. *
  11. * The same note as with the movli.l/movco.l pair applies here, as long
  12. * as the load is guaranteed to be inlined, nothing else will hook in to
  13. * r0 and we get the return value for free.
  14. *
  15. * NOTE: Due to the fact we require r0 encoding, care should be taken to
  16. * avoid mixing these heavily with other r0 consumers, such as the atomic
  17. * ops. Failure to adhere to this can result in the compiler running out
  18. * of spill registers and blowing up when building at low optimization
  19. * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
  20. */
  21. #include <linux/unaligned/packed_struct.h>
  22. #include <linux/types.h>
  23. #include <asm/byteorder.h>
  24. static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
  25. {
  26. #ifdef __LITTLE_ENDIAN
  27. return p[0] | p[1] << 8;
  28. #else
  29. return p[0] << 8 | p[1];
  30. #endif
  31. }
  32. static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
  33. {
  34. unsigned long unaligned;
  35. __asm__ __volatile__ (
  36. "movua.l @%1, %0\n\t"
  37. : "=z" (unaligned)
  38. : "r" (p)
  39. );
  40. return unaligned;
  41. }
  42. /*
  43. * Even though movua.l supports auto-increment on the read side, it can
  44. * only store to r0 due to instruction encoding constraints, so just let
  45. * the compiler sort it out on its own.
  46. */
  47. static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
  48. {
  49. #ifdef __LITTLE_ENDIAN
  50. return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
  51. sh4a_get_unaligned_cpu32(p);
  52. #else
  53. return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
  54. sh4a_get_unaligned_cpu32(p + 4);
  55. #endif
  56. }
  57. static inline u16 get_unaligned_le16(const void *p)
  58. {
  59. return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
  60. }
  61. static inline u32 get_unaligned_le32(const void *p)
  62. {
  63. return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
  64. }
  65. static inline u64 get_unaligned_le64(const void *p)
  66. {
  67. return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
  68. }
  69. static inline u16 get_unaligned_be16(const void *p)
  70. {
  71. return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
  72. }
  73. static inline u32 get_unaligned_be32(const void *p)
  74. {
  75. return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
  76. }
  77. static inline u64 get_unaligned_be64(const void *p)
  78. {
  79. return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
  80. }
  81. static inline void nonnative_put_le16(u16 val, u8 *p)
  82. {
  83. *p++ = val;
  84. *p++ = val >> 8;
  85. }
  86. static inline void nonnative_put_le32(u32 val, u8 *p)
  87. {
  88. nonnative_put_le16(val, p);
  89. nonnative_put_le16(val >> 16, p + 2);
  90. }
  91. static inline void nonnative_put_le64(u64 val, u8 *p)
  92. {
  93. nonnative_put_le32(val, p);
  94. nonnative_put_le32(val >> 32, p + 4);
  95. }
  96. static inline void nonnative_put_be16(u16 val, u8 *p)
  97. {
  98. *p++ = val >> 8;
  99. *p++ = val;
  100. }
  101. static inline void nonnative_put_be32(u32 val, u8 *p)
  102. {
  103. nonnative_put_be16(val >> 16, p);
  104. nonnative_put_be16(val, p + 2);
  105. }
  106. static inline void nonnative_put_be64(u64 val, u8 *p)
  107. {
  108. nonnative_put_be32(val >> 32, p);
  109. nonnative_put_be32(val, p + 4);
  110. }
  111. static inline void put_unaligned_le16(u16 val, void *p)
  112. {
  113. #ifdef __LITTLE_ENDIAN
  114. __put_unaligned_cpu16(val, p);
  115. #else
  116. nonnative_put_le16(val, p);
  117. #endif
  118. }
  119. static inline void put_unaligned_le32(u32 val, void *p)
  120. {
  121. #ifdef __LITTLE_ENDIAN
  122. __put_unaligned_cpu32(val, p);
  123. #else
  124. nonnative_put_le32(val, p);
  125. #endif
  126. }
  127. static inline void put_unaligned_le64(u64 val, void *p)
  128. {
  129. #ifdef __LITTLE_ENDIAN
  130. __put_unaligned_cpu64(val, p);
  131. #else
  132. nonnative_put_le64(val, p);
  133. #endif
  134. }
  135. static inline void put_unaligned_be16(u16 val, void *p)
  136. {
  137. #ifdef __BIG_ENDIAN
  138. __put_unaligned_cpu16(val, p);
  139. #else
  140. nonnative_put_be16(val, p);
  141. #endif
  142. }
  143. static inline void put_unaligned_be32(u32 val, void *p)
  144. {
  145. #ifdef __BIG_ENDIAN
  146. __put_unaligned_cpu32(val, p);
  147. #else
  148. nonnative_put_be32(val, p);
  149. #endif
  150. }
  151. static inline void put_unaligned_be64(u64 val, void *p)
  152. {
  153. #ifdef __BIG_ENDIAN
  154. __put_unaligned_cpu64(val, p);
  155. #else
  156. nonnative_put_be64(val, p);
  157. #endif
  158. }
  159. /*
  160. * While it's a bit non-obvious, even though the generic le/be wrappers
  161. * use the __get/put_xxx prefixing, they actually wrap in to the
  162. * non-prefixed get/put_xxx variants as provided above.
  163. */
  164. #include <linux/unaligned/generic.h>
  165. #ifdef __LITTLE_ENDIAN
  166. # define get_unaligned __get_unaligned_le
  167. # define put_unaligned __put_unaligned_le
  168. #else
  169. # define get_unaligned __get_unaligned_be
  170. # define put_unaligned __put_unaligned_be
  171. #endif
  172. #endif /* __ASM_SH_UNALIGNED_SH4A_H */