123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363 |
- // SPDX-License-Identifier: GPL-2.0
- /* XDP user-space packet buffer
- * Copyright(c) 2018 Intel Corporation.
- */
- #include <linux/init.h>
- #include <linux/sched/mm.h>
- #include <linux/sched/signal.h>
- #include <linux/sched/task.h>
- #include <linux/uaccess.h>
- #include <linux/slab.h>
- #include <linux/bpf.h>
- #include <linux/mm.h>
- #include <linux/netdevice.h>
- #include <linux/rtnetlink.h>
- #include "xdp_umem.h"
- #include "xsk_queue.h"
- #define XDP_UMEM_MIN_CHUNK_SIZE 2048
- void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
- {
- unsigned long flags;
- if (!xs->tx)
- return;
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
- list_add_rcu(&xs->list, &umem->xsk_list);
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
- }
- void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
- {
- unsigned long flags;
- if (!xs->tx)
- return;
- spin_lock_irqsave(&umem->xsk_list_lock, flags);
- list_del_rcu(&xs->list);
- spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
- }
- int xdp_umem_query(struct net_device *dev, u16 queue_id)
- {
- struct netdev_bpf bpf;
- ASSERT_RTNL();
- memset(&bpf, 0, sizeof(bpf));
- bpf.command = XDP_QUERY_XSK_UMEM;
- bpf.xsk.queue_id = queue_id;
- if (!dev->netdev_ops->ndo_bpf)
- return 0;
- return dev->netdev_ops->ndo_bpf(dev, &bpf) ?: !!bpf.xsk.umem;
- }
- int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
- u32 queue_id, u16 flags)
- {
- bool force_zc, force_copy;
- struct netdev_bpf bpf;
- int err;
- force_zc = flags & XDP_ZEROCOPY;
- force_copy = flags & XDP_COPY;
- if (force_zc && force_copy)
- return -EINVAL;
- if (force_copy)
- return 0;
- if (!dev->netdev_ops->ndo_bpf || !dev->netdev_ops->ndo_xsk_async_xmit)
- return force_zc ? -EOPNOTSUPP : 0; /* fail or fallback */
- bpf.command = XDP_QUERY_XSK_UMEM;
- rtnl_lock();
- err = xdp_umem_query(dev, queue_id);
- if (err) {
- err = err < 0 ? -EOPNOTSUPP : -EBUSY;
- goto err_rtnl_unlock;
- }
- bpf.command = XDP_SETUP_XSK_UMEM;
- bpf.xsk.umem = umem;
- bpf.xsk.queue_id = queue_id;
- err = dev->netdev_ops->ndo_bpf(dev, &bpf);
- if (err)
- goto err_rtnl_unlock;
- rtnl_unlock();
- dev_hold(dev);
- umem->dev = dev;
- umem->queue_id = queue_id;
- umem->zc = true;
- return 0;
- err_rtnl_unlock:
- rtnl_unlock();
- return force_zc ? err : 0; /* fail or fallback */
- }
- static void xdp_umem_clear_dev(struct xdp_umem *umem)
- {
- struct netdev_bpf bpf;
- int err;
- if (umem->dev) {
- bpf.command = XDP_SETUP_XSK_UMEM;
- bpf.xsk.umem = NULL;
- bpf.xsk.queue_id = umem->queue_id;
- rtnl_lock();
- err = umem->dev->netdev_ops->ndo_bpf(umem->dev, &bpf);
- rtnl_unlock();
- if (err)
- WARN(1, "failed to disable umem!\n");
- dev_put(umem->dev);
- umem->dev = NULL;
- }
- }
- static void xdp_umem_unpin_pages(struct xdp_umem *umem)
- {
- unsigned int i;
- for (i = 0; i < umem->npgs; i++) {
- struct page *page = umem->pgs[i];
- set_page_dirty_lock(page);
- put_page(page);
- }
- kfree(umem->pgs);
- umem->pgs = NULL;
- }
- static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
- {
- if (umem->user) {
- atomic_long_sub(umem->npgs, &umem->user->locked_vm);
- free_uid(umem->user);
- }
- }
- static void xdp_umem_release(struct xdp_umem *umem)
- {
- xdp_umem_clear_dev(umem);
- if (umem->fq) {
- xskq_destroy(umem->fq);
- umem->fq = NULL;
- }
- if (umem->cq) {
- xskq_destroy(umem->cq);
- umem->cq = NULL;
- }
- xdp_umem_unpin_pages(umem);
- kfree(umem->pages);
- umem->pages = NULL;
- xdp_umem_unaccount_pages(umem);
- kfree(umem);
- }
- static void xdp_umem_release_deferred(struct work_struct *work)
- {
- struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
- xdp_umem_release(umem);
- }
- void xdp_get_umem(struct xdp_umem *umem)
- {
- refcount_inc(&umem->users);
- }
- void xdp_put_umem(struct xdp_umem *umem)
- {
- if (!umem)
- return;
- if (refcount_dec_and_test(&umem->users)) {
- INIT_WORK(&umem->work, xdp_umem_release_deferred);
- schedule_work(&umem->work);
- }
- }
- static int xdp_umem_pin_pages(struct xdp_umem *umem)
- {
- unsigned int gup_flags = FOLL_WRITE;
- long npgs;
- int err;
- umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs),
- GFP_KERNEL | __GFP_NOWARN);
- if (!umem->pgs)
- return -ENOMEM;
- down_write(¤t->mm->mmap_sem);
- npgs = get_user_pages(umem->address, umem->npgs,
- gup_flags, &umem->pgs[0], NULL);
- up_write(¤t->mm->mmap_sem);
- if (npgs != umem->npgs) {
- if (npgs >= 0) {
- umem->npgs = npgs;
- err = -ENOMEM;
- goto out_pin;
- }
- err = npgs;
- goto out_pgs;
- }
- return 0;
- out_pin:
- xdp_umem_unpin_pages(umem);
- out_pgs:
- kfree(umem->pgs);
- umem->pgs = NULL;
- return err;
- }
- static int xdp_umem_account_pages(struct xdp_umem *umem)
- {
- unsigned long lock_limit, new_npgs, old_npgs;
- if (capable(CAP_IPC_LOCK))
- return 0;
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
- umem->user = get_uid(current_user());
- do {
- old_npgs = atomic_long_read(&umem->user->locked_vm);
- new_npgs = old_npgs + umem->npgs;
- if (new_npgs > lock_limit) {
- free_uid(umem->user);
- umem->user = NULL;
- return -ENOBUFS;
- }
- } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
- new_npgs) != old_npgs);
- return 0;
- }
- static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
- {
- u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
- unsigned int chunks, chunks_per_page;
- u64 addr = mr->addr, size = mr->len;
- int size_chk, err, i;
- if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
- /* Strictly speaking we could support this, if:
- * - huge pages, or*
- * - using an IOMMU, or
- * - making sure the memory area is consecutive
- * but for now, we simply say "computer says no".
- */
- return -EINVAL;
- }
- if (!is_power_of_2(chunk_size))
- return -EINVAL;
- if (!PAGE_ALIGNED(addr)) {
- /* Memory area has to be page size aligned. For
- * simplicity, this might change.
- */
- return -EINVAL;
- }
- if ((addr + size) < addr)
- return -EINVAL;
- chunks = (unsigned int)div_u64(size, chunk_size);
- if (chunks == 0)
- return -EINVAL;
- chunks_per_page = PAGE_SIZE / chunk_size;
- if (chunks < chunks_per_page || chunks % chunks_per_page)
- return -EINVAL;
- headroom = ALIGN(headroom, 64);
- size_chk = chunk_size - headroom - XDP_PACKET_HEADROOM;
- if (size_chk < 0)
- return -EINVAL;
- umem->address = (unsigned long)addr;
- umem->props.chunk_mask = ~((u64)chunk_size - 1);
- umem->props.size = size;
- umem->headroom = headroom;
- umem->chunk_size_nohr = chunk_size - headroom;
- umem->npgs = size / PAGE_SIZE;
- umem->pgs = NULL;
- umem->user = NULL;
- INIT_LIST_HEAD(&umem->xsk_list);
- spin_lock_init(&umem->xsk_list_lock);
- refcount_set(&umem->users, 1);
- err = xdp_umem_account_pages(umem);
- if (err)
- return err;
- err = xdp_umem_pin_pages(umem);
- if (err)
- goto out_account;
- umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL);
- if (!umem->pages) {
- err = -ENOMEM;
- goto out_pin;
- }
- for (i = 0; i < umem->npgs; i++)
- umem->pages[i].addr = page_address(umem->pgs[i]);
- return 0;
- out_pin:
- xdp_umem_unpin_pages(umem);
- out_account:
- xdp_umem_unaccount_pages(umem);
- return err;
- }
- struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
- {
- struct xdp_umem *umem;
- int err;
- umem = kzalloc(sizeof(*umem), GFP_KERNEL);
- if (!umem)
- return ERR_PTR(-ENOMEM);
- err = xdp_umem_reg(umem, mr);
- if (err) {
- kfree(umem);
- return ERR_PTR(err);
- }
- return umem;
- }
- bool xdp_umem_validate_queues(struct xdp_umem *umem)
- {
- return umem->fq && umem->cq;
- }
|