123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527 |
- /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */
- /*
- * aoedev.c
- * AoE device utility functions; maintains device list.
- */
- #include <linux/hdreg.h>
- #include <linux/blkdev.h>
- #include <linux/netdevice.h>
- #include <linux/delay.h>
- #include <linux/slab.h>
- #include <linux/bitmap.h>
- #include <linux/kdev_t.h>
- #include <linux/moduleparam.h>
- #include <linux/string.h>
- #include "aoe.h"
- static void dummy_timer(ulong);
- static void freetgt(struct aoedev *d, struct aoetgt *t);
- static void skbpoolfree(struct aoedev *d);
- static int aoe_dyndevs = 1;
- module_param(aoe_dyndevs, int, 0644);
- MODULE_PARM_DESC(aoe_dyndevs, "Use dynamic minor numbers for devices.");
- static struct aoedev *devlist;
- static DEFINE_SPINLOCK(devlist_lock);
- /* Because some systems will have one, many, or no
- * - partitions,
- * - slots per shelf,
- * - or shelves,
- * we need some flexibility in the way the minor numbers
- * are allocated. So they are dynamic.
- */
- #define N_DEVS ((1U<<MINORBITS)/AOE_PARTITIONS)
- static DEFINE_SPINLOCK(used_minors_lock);
- static DECLARE_BITMAP(used_minors, N_DEVS);
- static int
- minor_get_dyn(ulong *sysminor)
- {
- ulong flags;
- ulong n;
- int error = 0;
- spin_lock_irqsave(&used_minors_lock, flags);
- n = find_first_zero_bit(used_minors, N_DEVS);
- if (n < N_DEVS)
- set_bit(n, used_minors);
- else
- error = -1;
- spin_unlock_irqrestore(&used_minors_lock, flags);
- *sysminor = n * AOE_PARTITIONS;
- return error;
- }
- static int
- minor_get_static(ulong *sysminor, ulong aoemaj, int aoemin)
- {
- ulong flags;
- ulong n;
- int error = 0;
- enum {
- /* for backwards compatibility when !aoe_dyndevs,
- * a static number of supported slots per shelf */
- NPERSHELF = 16,
- };
- if (aoemin >= NPERSHELF) {
- pr_err("aoe: %s %d slots per shelf\n",
- "static minor device numbers support only",
- NPERSHELF);
- error = -1;
- goto out;
- }
- n = aoemaj * NPERSHELF + aoemin;
- if (n >= N_DEVS) {
- pr_err("aoe: %s with e%ld.%d\n",
- "cannot use static minor device numbers",
- aoemaj, aoemin);
- error = -1;
- goto out;
- }
- spin_lock_irqsave(&used_minors_lock, flags);
- if (test_bit(n, used_minors)) {
- pr_err("aoe: %s %lu\n",
- "existing device already has static minor number",
- n);
- error = -1;
- } else
- set_bit(n, used_minors);
- spin_unlock_irqrestore(&used_minors_lock, flags);
- *sysminor = n * AOE_PARTITIONS;
- out:
- return error;
- }
- static int
- minor_get(ulong *sysminor, ulong aoemaj, int aoemin)
- {
- if (aoe_dyndevs)
- return minor_get_dyn(sysminor);
- else
- return minor_get_static(sysminor, aoemaj, aoemin);
- }
- static void
- minor_free(ulong minor)
- {
- ulong flags;
- minor /= AOE_PARTITIONS;
- BUG_ON(minor >= N_DEVS);
- spin_lock_irqsave(&used_minors_lock, flags);
- BUG_ON(!test_bit(minor, used_minors));
- clear_bit(minor, used_minors);
- spin_unlock_irqrestore(&used_minors_lock, flags);
- }
- /*
- * Users who grab a pointer to the device with aoedev_by_aoeaddr
- * automatically get a reference count and must be responsible
- * for performing a aoedev_put. With the addition of async
- * kthread processing I'm no longer confident that we can
- * guarantee consistency in the face of device flushes.
- *
- * For the time being, we only bother to add extra references for
- * frames sitting on the iocq. When the kthreads finish processing
- * these frames, they will aoedev_put the device.
- */
- void
- aoedev_put(struct aoedev *d)
- {
- ulong flags;
- spin_lock_irqsave(&devlist_lock, flags);
- d->ref--;
- spin_unlock_irqrestore(&devlist_lock, flags);
- }
- static void
- dummy_timer(ulong vp)
- {
- struct aoedev *d;
- d = (struct aoedev *)vp;
- if (d->flags & DEVFL_TKILL)
- return;
- d->timer.expires = jiffies + HZ;
- add_timer(&d->timer);
- }
- static void
- aoe_failip(struct aoedev *d)
- {
- struct request *rq;
- struct bio *bio;
- unsigned long n;
- aoe_failbuf(d, d->ip.buf);
- rq = d->ip.rq;
- if (rq == NULL)
- return;
- while ((bio = d->ip.nxbio)) {
- bio->bi_error = -EIO;
- d->ip.nxbio = bio->bi_next;
- n = (unsigned long) rq->special;
- rq->special = (void *) --n;
- }
- if ((unsigned long) rq->special == 0)
- aoe_end_request(d, rq, 0);
- }
- static void
- downdev_frame(struct list_head *pos)
- {
- struct frame *f;
- f = list_entry(pos, struct frame, head);
- list_del(pos);
- if (f->buf) {
- f->buf->nframesout--;
- aoe_failbuf(f->t->d, f->buf);
- }
- aoe_freetframe(f);
- }
- void
- aoedev_downdev(struct aoedev *d)
- {
- struct aoetgt *t, **tt, **te;
- struct list_head *head, *pos, *nx;
- struct request *rq;
- int i;
- d->flags &= ~DEVFL_UP;
- /* clean out active and to-be-retransmitted buffers */
- for (i = 0; i < NFACTIVE; i++) {
- head = &d->factive[i];
- list_for_each_safe(pos, nx, head)
- downdev_frame(pos);
- }
- head = &d->rexmitq;
- list_for_each_safe(pos, nx, head)
- downdev_frame(pos);
- /* reset window dressings */
- tt = d->targets;
- te = tt + d->ntargets;
- for (; tt < te && (t = *tt); tt++) {
- aoecmd_wreset(t);
- t->nout = 0;
- }
- /* clean out the in-process request (if any) */
- aoe_failip(d);
- /* fast fail all pending I/O */
- if (d->blkq) {
- while ((rq = blk_peek_request(d->blkq))) {
- blk_start_request(rq);
- aoe_end_request(d, rq, 1);
- }
- }
- if (d->gd)
- set_capacity(d->gd, 0);
- }
- /* return whether the user asked for this particular
- * device to be flushed
- */
- static int
- user_req(char *s, size_t slen, struct aoedev *d)
- {
- const char *p;
- size_t lim;
- if (!d->gd)
- return 0;
- p = kbasename(d->gd->disk_name);
- lim = sizeof(d->gd->disk_name);
- lim -= p - d->gd->disk_name;
- if (slen < lim)
- lim = slen;
- return !strncmp(s, p, lim);
- }
- static void
- freedev(struct aoedev *d)
- {
- struct aoetgt **t, **e;
- int freeing = 0;
- unsigned long flags;
- spin_lock_irqsave(&d->lock, flags);
- if (d->flags & DEVFL_TKILL
- && !(d->flags & DEVFL_FREEING)) {
- d->flags |= DEVFL_FREEING;
- freeing = 1;
- }
- spin_unlock_irqrestore(&d->lock, flags);
- if (!freeing)
- return;
- del_timer_sync(&d->timer);
- if (d->gd) {
- aoedisk_rm_debugfs(d);
- aoedisk_rm_sysfs(d);
- del_gendisk(d->gd);
- put_disk(d->gd);
- blk_cleanup_queue(d->blkq);
- }
- t = d->targets;
- e = t + d->ntargets;
- for (; t < e && *t; t++)
- freetgt(d, *t);
- if (d->bufpool)
- mempool_destroy(d->bufpool);
- skbpoolfree(d);
- minor_free(d->sysminor);
- spin_lock_irqsave(&d->lock, flags);
- d->flags |= DEVFL_FREED;
- spin_unlock_irqrestore(&d->lock, flags);
- }
- enum flush_parms {
- NOT_EXITING = 0,
- EXITING = 1,
- };
- static int
- flush(const char __user *str, size_t cnt, int exiting)
- {
- ulong flags;
- struct aoedev *d, **dd;
- char buf[16];
- int all = 0;
- int specified = 0; /* flush a specific device */
- unsigned int skipflags;
- skipflags = DEVFL_GDALLOC | DEVFL_NEWSIZE | DEVFL_TKILL;
- if (!exiting && cnt >= 3) {
- if (cnt > sizeof buf)
- cnt = sizeof buf;
- if (copy_from_user(buf, str, cnt))
- return -EFAULT;
- all = !strncmp(buf, "all", 3);
- if (!all)
- specified = 1;
- }
- flush_scheduled_work();
- /* pass one: without sleeping, do aoedev_downdev */
- spin_lock_irqsave(&devlist_lock, flags);
- for (d = devlist; d; d = d->next) {
- spin_lock(&d->lock);
- if (exiting) {
- /* unconditionally take each device down */
- } else if (specified) {
- if (!user_req(buf, cnt, d))
- goto cont;
- } else if ((!all && (d->flags & DEVFL_UP))
- || d->flags & skipflags
- || d->nopen
- || d->ref)
- goto cont;
- aoedev_downdev(d);
- d->flags |= DEVFL_TKILL;
- cont:
- spin_unlock(&d->lock);
- }
- spin_unlock_irqrestore(&devlist_lock, flags);
- /* pass two: call freedev, which might sleep,
- * for aoedevs marked with DEVFL_TKILL
- */
- restart:
- spin_lock_irqsave(&devlist_lock, flags);
- for (d = devlist; d; d = d->next) {
- spin_lock(&d->lock);
- if (d->flags & DEVFL_TKILL
- && !(d->flags & DEVFL_FREEING)) {
- spin_unlock(&d->lock);
- spin_unlock_irqrestore(&devlist_lock, flags);
- freedev(d);
- goto restart;
- }
- spin_unlock(&d->lock);
- }
- /* pass three: remove aoedevs marked with DEVFL_FREED */
- for (dd = &devlist, d = *dd; d; d = *dd) {
- struct aoedev *doomed = NULL;
- spin_lock(&d->lock);
- if (d->flags & DEVFL_FREED) {
- *dd = d->next;
- doomed = d;
- } else {
- dd = &d->next;
- }
- spin_unlock(&d->lock);
- if (doomed)
- kfree(doomed->targets);
- kfree(doomed);
- }
- spin_unlock_irqrestore(&devlist_lock, flags);
- return 0;
- }
- int
- aoedev_flush(const char __user *str, size_t cnt)
- {
- return flush(str, cnt, NOT_EXITING);
- }
- /* This has been confirmed to occur once with Tms=3*1000 due to the
- * driver changing link and not processing its transmit ring. The
- * problem is hard enough to solve by returning an error that I'm
- * still punting on "solving" this.
- */
- static void
- skbfree(struct sk_buff *skb)
- {
- enum { Sms = 250, Tms = 30 * 1000};
- int i = Tms / Sms;
- if (skb == NULL)
- return;
- while (atomic_read(&skb_shinfo(skb)->dataref) != 1 && i-- > 0)
- msleep(Sms);
- if (i < 0) {
- printk(KERN_ERR
- "aoe: %s holds ref: %s\n",
- skb->dev ? skb->dev->name : "netif",
- "cannot free skb -- memory leaked.");
- return;
- }
- skb->truesize -= skb->data_len;
- skb_shinfo(skb)->nr_frags = skb->data_len = 0;
- skb_trim(skb, 0);
- dev_kfree_skb(skb);
- }
- static void
- skbpoolfree(struct aoedev *d)
- {
- struct sk_buff *skb, *tmp;
- skb_queue_walk_safe(&d->skbpool, skb, tmp)
- skbfree(skb);
- __skb_queue_head_init(&d->skbpool);
- }
- /* find it or allocate it */
- struct aoedev *
- aoedev_by_aoeaddr(ulong maj, int min, int do_alloc)
- {
- struct aoedev *d;
- int i;
- ulong flags;
- ulong sysminor = 0;
- spin_lock_irqsave(&devlist_lock, flags);
- for (d=devlist; d; d=d->next)
- if (d->aoemajor == maj && d->aoeminor == min) {
- spin_lock(&d->lock);
- if (d->flags & DEVFL_TKILL) {
- spin_unlock(&d->lock);
- d = NULL;
- goto out;
- }
- d->ref++;
- spin_unlock(&d->lock);
- break;
- }
- if (d || !do_alloc || minor_get(&sysminor, maj, min) < 0)
- goto out;
- d = kcalloc(1, sizeof *d, GFP_ATOMIC);
- if (!d)
- goto out;
- d->targets = kcalloc(NTARGETS, sizeof(*d->targets), GFP_ATOMIC);
- if (!d->targets) {
- kfree(d);
- d = NULL;
- goto out;
- }
- d->ntargets = NTARGETS;
- INIT_WORK(&d->work, aoecmd_sleepwork);
- spin_lock_init(&d->lock);
- skb_queue_head_init(&d->skbpool);
- init_timer(&d->timer);
- d->timer.data = (ulong) d;
- d->timer.function = dummy_timer;
- d->timer.expires = jiffies + HZ;
- add_timer(&d->timer);
- d->bufpool = NULL; /* defer to aoeblk_gdalloc */
- d->tgt = d->targets;
- d->ref = 1;
- for (i = 0; i < NFACTIVE; i++)
- INIT_LIST_HEAD(&d->factive[i]);
- INIT_LIST_HEAD(&d->rexmitq);
- d->sysminor = sysminor;
- d->aoemajor = maj;
- d->aoeminor = min;
- d->rttavg = RTTAVG_INIT;
- d->rttdev = RTTDEV_INIT;
- d->next = devlist;
- devlist = d;
- out:
- spin_unlock_irqrestore(&devlist_lock, flags);
- return d;
- }
- static void
- freetgt(struct aoedev *d, struct aoetgt *t)
- {
- struct frame *f;
- struct list_head *pos, *nx, *head;
- struct aoeif *ifp;
- for (ifp = t->ifs; ifp < &t->ifs[NAOEIFS]; ++ifp) {
- if (!ifp->nd)
- break;
- dev_put(ifp->nd);
- }
- head = &t->ffree;
- list_for_each_safe(pos, nx, head) {
- list_del(pos);
- f = list_entry(pos, struct frame, head);
- skbfree(f->skb);
- kfree(f);
- }
- kfree(t);
- }
- void
- aoedev_exit(void)
- {
- flush_scheduled_work();
- flush(NULL, 0, EXITING);
- }
- int __init
- aoedev_init(void)
- {
- return 0;
- }
|