page.c 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. /*
  2. * Copyright (c) 2006 Oracle. All rights reserved.
  3. *
  4. * This software is available to you under a choice of one of two
  5. * licenses. You may choose to be licensed under the terms of the GNU
  6. * General Public License (GPL) Version 2, available from the file
  7. * COPYING in the main directory of this source tree, or the
  8. * OpenIB.org BSD license below:
  9. *
  10. * Redistribution and use in source and binary forms, with or
  11. * without modification, are permitted provided that the following
  12. * conditions are met:
  13. *
  14. * - Redistributions of source code must retain the above
  15. * copyright notice, this list of conditions and the following
  16. * disclaimer.
  17. *
  18. * - Redistributions in binary form must reproduce the above
  19. * copyright notice, this list of conditions and the following
  20. * disclaimer in the documentation and/or other materials
  21. * provided with the distribution.
  22. *
  23. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  24. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  25. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  26. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
  27. * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
  28. * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  29. * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  30. * SOFTWARE.
  31. *
  32. */
  33. #include <linux/highmem.h>
  34. #include <linux/gfp.h>
  35. #include <linux/cpu.h>
  36. #include <linux/export.h>
  37. #include "rds.h"
  38. struct rds_page_remainder {
  39. struct page *r_page;
  40. unsigned long r_offset;
  41. };
  42. static
  43. DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
  44. /**
  45. * rds_page_remainder_alloc - build up regions of a message.
  46. *
  47. * @scat: Scatter list for message
  48. * @bytes: the number of bytes needed.
  49. * @gfp: the waiting behaviour of the allocation
  50. *
  51. * @gfp is always ored with __GFP_HIGHMEM. Callers must be prepared to
  52. * kmap the pages, etc.
  53. *
  54. * If @bytes is at least a full page then this just returns a page from
  55. * alloc_page().
  56. *
  57. * If @bytes is a partial page then this stores the unused region of the
  58. * page in a per-cpu structure. Future partial-page allocations may be
  59. * satisfied from that cached region. This lets us waste less memory on
  60. * small allocations with minimal complexity. It works because the transmit
  61. * path passes read-only page regions down to devices. They hold a page
  62. * reference until they are done with the region.
  63. */
  64. int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
  65. gfp_t gfp)
  66. {
  67. struct rds_page_remainder *rem;
  68. unsigned long flags;
  69. struct page *page;
  70. int ret;
  71. gfp |= __GFP_HIGHMEM;
  72. /* jump straight to allocation if we're trying for a huge page */
  73. if (bytes >= PAGE_SIZE) {
  74. page = alloc_page(gfp);
  75. if (!page) {
  76. ret = -ENOMEM;
  77. } else {
  78. sg_set_page(scat, page, PAGE_SIZE, 0);
  79. ret = 0;
  80. }
  81. goto out;
  82. }
  83. rem = &per_cpu(rds_page_remainders, get_cpu());
  84. local_irq_save(flags);
  85. while (1) {
  86. /* avoid a tiny region getting stuck by tossing it */
  87. if (rem->r_page && bytes > (PAGE_SIZE - rem->r_offset)) {
  88. rds_stats_inc(s_page_remainder_miss);
  89. __free_page(rem->r_page);
  90. rem->r_page = NULL;
  91. }
  92. /* hand out a fragment from the cached page */
  93. if (rem->r_page && bytes <= (PAGE_SIZE - rem->r_offset)) {
  94. sg_set_page(scat, rem->r_page, bytes, rem->r_offset);
  95. get_page(sg_page(scat));
  96. if (rem->r_offset != 0)
  97. rds_stats_inc(s_page_remainder_hit);
  98. rem->r_offset += ALIGN(bytes, 8);
  99. if (rem->r_offset >= PAGE_SIZE) {
  100. __free_page(rem->r_page);
  101. rem->r_page = NULL;
  102. }
  103. ret = 0;
  104. break;
  105. }
  106. /* alloc if there is nothing for us to use */
  107. local_irq_restore(flags);
  108. put_cpu();
  109. page = alloc_page(gfp);
  110. rem = &per_cpu(rds_page_remainders, get_cpu());
  111. local_irq_save(flags);
  112. if (!page) {
  113. ret = -ENOMEM;
  114. break;
  115. }
  116. /* did someone race to fill the remainder before us? */
  117. if (rem->r_page) {
  118. __free_page(page);
  119. continue;
  120. }
  121. /* otherwise install our page and loop around to alloc */
  122. rem->r_page = page;
  123. rem->r_offset = 0;
  124. }
  125. local_irq_restore(flags);
  126. put_cpu();
  127. out:
  128. rdsdebug("bytes %lu ret %d %p %u %u\n", bytes, ret,
  129. ret ? NULL : sg_page(scat), ret ? 0 : scat->offset,
  130. ret ? 0 : scat->length);
  131. return ret;
  132. }
  133. EXPORT_SYMBOL_GPL(rds_page_remainder_alloc);
  134. void rds_page_exit(void)
  135. {
  136. unsigned int cpu;
  137. for_each_possible_cpu(cpu) {
  138. struct rds_page_remainder *rem;
  139. rem = &per_cpu(rds_page_remainders, cpu);
  140. rdsdebug("cpu %u\n", cpu);
  141. if (rem->r_page)
  142. __free_page(rem->r_page);
  143. rem->r_page = NULL;
  144. }
  145. }