Skip to content
3 changes: 2 additions & 1 deletion block/bfq-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,8 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
list_for_each_entry(blkg, &bfqd->queue->blkg_list, q_node) {
struct bfq_group *bfqg = blkg_to_bfqg(blkg);

bfq_end_wr_async_queues(bfqd, bfqg);
if (bfqg)
bfq_end_wr_async_queues(bfqd, bfqg);
}
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
}
Expand Down
6 changes: 6 additions & 0 deletions block/bfq-iosched.c
Original file line number Diff line number Diff line change
Expand Up @@ -2645,6 +2645,9 @@ static void bfq_end_wr(struct bfq_data *bfqd)
struct bfq_queue *bfqq;
int i;

#ifdef CONFIG_BFQ_GROUP_IOSCHED
mutex_lock(&bfqd->queue->blkcg_mutex);
#endif
spin_lock_irq(&bfqd->lock);

for (i = 0; i < bfqd->num_actuators; i++) {
Expand All @@ -2656,6 +2659,9 @@ static void bfq_end_wr(struct bfq_data *bfqd)
bfq_end_wr_async(bfqd);

spin_unlock_irq(&bfqd->lock);
#ifdef CONFIG_BFQ_GROUP_IOSCHED
mutex_unlock(&bfqd->queue->blkcg_mutex);
#endif
}

static sector_t bfq_io_struct_pos(void *io_struct, bool request)
Expand Down
205 changes: 67 additions & 138 deletions block/blk-cgroup.c
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,7 @@ static void blkg_destroy_all(struct gendisk *disk)
int i;

restart:
mutex_lock(&q->blkcg_mutex);
spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
Expand All @@ -592,6 +593,7 @@ static void blkg_destroy_all(struct gendisk *disk)
if (!(--count)) {
count = BLKG_DESTROY_BATCH_SIZE;
spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex);
cond_resched();
goto restart;
}
Expand All @@ -611,6 +613,7 @@ static void blkg_destroy_all(struct gendisk *disk)

q->root_blkg = NULL;
spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex);
}

static void blkg_iostat_set(struct blkg_iostat *dst, struct blkg_iostat *src)
Expand Down Expand Up @@ -799,49 +802,15 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
return -ENODEV;
}

mutex_lock(&bdev->bd_queue->rq_qos_mutex);
if (!disk_live(bdev->bd_disk)) {
blkdev_put_no_open(bdev);
mutex_unlock(&bdev->bd_queue->rq_qos_mutex);
return -ENODEV;
}

ctx->body = input;
ctx->bdev = bdev;
return 0;
}
/*
* Similar to blkg_conf_open_bdev, but additionally freezes the queue,
* ensures the correct locking order between freeze queue and q->rq_qos_mutex.
*
* This function returns negative error on failure. On success it returns
* memflags which must be saved and later passed to blkg_conf_exit_frozen
* for restoring the memalloc scope.
*/
unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx)
{
int ret;
unsigned long memflags;

if (ctx->bdev)
return -EINVAL;

ret = blkg_conf_open_bdev(ctx);
if (ret < 0)
return ret;
/*
* At this point, we haven’t started protecting anything related to QoS,
* so we release q->rq_qos_mutex here, which was first acquired in blkg_
* conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing
* the queue to maintain the correct locking order.
*/
mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);

memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue);
mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex);

return memflags;
}

/**
* blkg_conf_prep - parse and prepare for per-blkg config update
Expand Down Expand Up @@ -975,36 +944,20 @@ EXPORT_SYMBOL_GPL(blkg_conf_prep);
*/
void blkg_conf_exit(struct blkg_conf_ctx *ctx)
__releases(&ctx->bdev->bd_queue->queue_lock)
__releases(&ctx->bdev->bd_queue->rq_qos_mutex)
{
if (ctx->blkg) {
spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock);
ctx->blkg = NULL;
}

if (ctx->bdev) {
mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex);
blkdev_put_no_open(ctx->bdev);
ctx->body = NULL;
ctx->bdev = NULL;
}
}
EXPORT_SYMBOL_GPL(blkg_conf_exit);

/*
* Similar to blkg_conf_exit, but also unfreezes the queue. Should be used
* when blkg_conf_open_bdev_frozen is used to open the bdev.
*/
void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags)
{
if (ctx->bdev) {
struct request_queue *q = ctx->bdev->bd_queue;

blkg_conf_exit(ctx);
blk_mq_unfreeze_queue(q, memflags);
}
}

static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src)
{
int i;
Expand Down Expand Up @@ -1559,6 +1512,31 @@ struct cgroup_subsys io_cgrp_subsys = {
};
EXPORT_SYMBOL_GPL(io_cgrp_subsys);

/*
* Tear down per-blkg policy data for @pol on @q.
*/
static void blkcg_policy_teardown_pds(struct request_queue *q,
const struct blkcg_policy *pol)
{
struct blkcg_gq *blkg;

list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
struct blkg_policy_data *pd;

spin_lock(&blkcg->lock);
pd = blkg->pd[pol->plid];
if (pd) {
if (pd->online && pol->pd_offline_fn)
pol->pd_offline_fn(pd);
pd->online = false;
pol->pd_free_fn(pd);
blkg->pd[pol->plid] = NULL;
}
spin_unlock(&blkcg->lock);
}
}

/**
* blkcg_activate_policy - activate a blkcg policy on a gendisk
* @disk: gendisk of interest
Expand All @@ -1578,8 +1556,7 @@ EXPORT_SYMBOL_GPL(io_cgrp_subsys);
int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
{
struct request_queue *q = disk->queue;
struct blkg_policy_data *pd_prealloc = NULL;
struct blkcg_gq *blkg, *pinned_blkg = NULL;
struct blkcg_gq *blkg;
unsigned int memflags;
int ret;

Expand All @@ -1594,99 +1571,65 @@ int blkcg_activate_policy(struct gendisk *disk, const struct blkcg_policy *pol)
if (WARN_ON_ONCE(!pol->pd_alloc_fn || !pol->pd_free_fn))
return -EINVAL;

if (queue_is_mq(q))
memflags = blk_mq_freeze_queue(q);
retry:
spin_lock_irq(&q->queue_lock);

/* blkg_list is pushed at the head, reverse walk to initialize parents first */
/*
* Allocate all pds before freezing queue. Some policies like iocost
* and iolatency do percpu allocation in pd_alloc_fn(), which can
* deadlock with queue frozen because percpu memory reclaim may issue
* IO. blkcg_mutex protects q->blkg_list iteration.
*/
mutex_lock(&q->blkcg_mutex);
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd;

if (blkg->pd[pol->plid])
/* Skip dying blkg */
if (hlist_unhashed(&blkg->blkcg_node))
continue;

/* If prealloc matches, use it; otherwise try GFP_NOWAIT */
if (blkg == pinned_blkg) {
pd = pd_prealloc;
pd_prealloc = NULL;
} else {
pd = pol->pd_alloc_fn(disk, blkg->blkcg,
GFP_NOWAIT);
}

pd = pol->pd_alloc_fn(disk, blkg->blkcg, GFP_KERNEL);
if (!pd) {
/*
* GFP_NOWAIT failed. Free the existing one and
* prealloc for @blkg w/ GFP_KERNEL.
*/
if (pinned_blkg)
blkg_put(pinned_blkg);
blkg_get(blkg);
pinned_blkg = blkg;

spin_unlock_irq(&q->queue_lock);

if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
pd_prealloc = pol->pd_alloc_fn(disk, blkg->blkcg,
GFP_KERNEL);
if (pd_prealloc)
goto retry;
else
goto enomem;
ret = -ENOMEM;
goto err_teardown;
}

spin_lock(&blkg->blkcg->lock);

pd->blkg = blkg;
pd->plid = pol->plid;
pd->online = false;
blkg->pd[pol->plid] = pd;
}

/* Now freeze queue and initialize/online all pds */
if (queue_is_mq(q))
memflags = blk_mq_freeze_queue(q);

spin_lock_irq(&q->queue_lock);
list_for_each_entry_reverse(blkg, &q->blkg_list, q_node) {
struct blkg_policy_data *pd = blkg->pd[pol->plid];

/* Skip dying blkg */
if (hlist_unhashed(&blkg->blkcg_node))
continue;

spin_lock(&blkg->blkcg->lock);
if (pol->pd_init_fn)
pol->pd_init_fn(pd);

if (pol->pd_online_fn)
pol->pd_online_fn(pd);
pd->online = true;

spin_unlock(&blkg->blkcg->lock);
}

__set_bit(pol->plid, q->blkcg_pols);
ret = 0;

spin_unlock_irq(&q->queue_lock);
out:

if (queue_is_mq(q))
blk_mq_unfreeze_queue(q, memflags);
if (pinned_blkg)
blkg_put(pinned_blkg);
if (pd_prealloc)
pol->pd_free_fn(pd_prealloc);
return ret;

enomem:
/* alloc failed, take down everything */
spin_lock_irq(&q->queue_lock);
list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;
struct blkg_policy_data *pd;
mutex_unlock(&q->blkcg_mutex);
return 0;

spin_lock(&blkcg->lock);
pd = blkg->pd[pol->plid];
if (pd) {
if (pd->online && pol->pd_offline_fn)
pol->pd_offline_fn(pd);
pd->online = false;
pol->pd_free_fn(pd);
blkg->pd[pol->plid] = NULL;
}
spin_unlock(&blkcg->lock);
}
spin_unlock_irq(&q->queue_lock);
ret = -ENOMEM;
goto out;
err_teardown:
blkcg_policy_teardown_pds(q, pol);
mutex_unlock(&q->blkcg_mutex);
return ret;
}
EXPORT_SYMBOL_GPL(blkcg_activate_policy);

Expand All @@ -1702,38 +1645,24 @@ void blkcg_deactivate_policy(struct gendisk *disk,
const struct blkcg_policy *pol)
{
struct request_queue *q = disk->queue;
struct blkcg_gq *blkg;
unsigned int memflags;

if (!blkcg_policy_enabled(q, pol))
return;

/* Same locking order as blkcg_activate_policy(): mutex -> freeze */
mutex_lock(&q->blkcg_mutex);
if (queue_is_mq(q))
memflags = blk_mq_freeze_queue(q);

mutex_lock(&q->blkcg_mutex);
spin_lock_irq(&q->queue_lock);

__clear_bit(pol->plid, q->blkcg_pols);

list_for_each_entry(blkg, &q->blkg_list, q_node) {
struct blkcg *blkcg = blkg->blkcg;

spin_lock(&blkcg->lock);
if (blkg->pd[pol->plid]) {
if (blkg->pd[pol->plid]->online && pol->pd_offline_fn)
pol->pd_offline_fn(blkg->pd[pol->plid]);
pol->pd_free_fn(blkg->pd[pol->plid]);
blkg->pd[pol->plid] = NULL;
}
spin_unlock(&blkcg->lock);
}

blkcg_policy_teardown_pds(q, pol);
spin_unlock_irq(&q->queue_lock);
mutex_unlock(&q->blkcg_mutex);

if (queue_is_mq(q))
blk_mq_unfreeze_queue(q, memflags);
mutex_unlock(&q->blkcg_mutex);
}
EXPORT_SYMBOL_GPL(blkcg_deactivate_policy);

Expand Down
2 changes: 0 additions & 2 deletions block/blk-cgroup.h
Original file line number Diff line number Diff line change
Expand Up @@ -219,11 +219,9 @@ struct blkg_conf_ctx {

void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input);
int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx);
unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx);
int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol,
struct blkg_conf_ctx *ctx);
void blkg_conf_exit(struct blkg_conf_ctx *ctx);
void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags);

/**
* bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg
Expand Down
Loading