Skip to content
62 changes: 56 additions & 6 deletions fs/fuse/dev_uring.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ MODULE_PARM_DESC(enable_uring,
#define FUSE_RING_HEADER_PG 0
#define FUSE_RING_PAYLOAD_PG 1

/* Threshold that determines if a better queue should be searched for */
#define FUSE_URING_Q_THRESHOLD 2

/* Number of (re)tries to find a better queue */
#define FUSE_URING_Q_TRIES 3

/* redfs only to allow patch backports */
#define IO_URING_F_TASK_DEAD (1 << 13)

Expand Down Expand Up @@ -1456,13 +1462,23 @@ static void fuse_uring_send_in_task(struct io_uring_cmd *cmd,
fuse_uring_send(ent, cmd, err, issue_flags);
}

static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring)
static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring,
bool background)
{
unsigned int qid;
int node;
int node, tries = 0;
unsigned int nr_queues;
unsigned int cpu = task_cpu(current);
struct fuse_ring_queue *queue, *primary_queue = NULL;

/*
* Background requests result in better performance on a different
* CPU, unless CPUs are already busy.
*/
if (background)
cpu++;

retry:
cpu = cpu % ring->max_nr_queues;

/* numa local registered queue bitmap */
Expand All @@ -1475,16 +1491,50 @@ static struct fuse_ring_queue *fuse_uring_select_queue(struct fuse_ring *ring)

nr_queues = READ_ONCE(ring->numa_q_map[node].nr_queues);
if (nr_queues) {
/* prefer the queue that corresponds to the current cpu */
queue = READ_ONCE(ring->queues[cpu]);
if (queue) {
if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD)
return queue;
primary_queue = queue;
}

qid = ring->numa_q_map[node].cpu_to_qid[cpu];
if (WARN_ON_ONCE(qid >= ring->max_nr_queues))
return NULL;
return READ_ONCE(ring->queues[qid]);
if (qid != cpu) {
queue = READ_ONCE(ring->queues[qid]);

/* Might happen on teardown */
if (unlikely(!queue))
return NULL;

if (queue->nr_reqs <= FUSE_URING_Q_THRESHOLD)
return queue;
}

/* Retries help for load balancing */
if (tries < FUSE_URING_Q_TRIES && tries + 1 < nr_queues) {
if (!primary_queue)
primary_queue = queue;

/* Increase cpu, assuming it will map to a different qid*/
cpu++;
tries++;
goto retry;
}
}

/* Retries exceeded, take the primary target queue */
if (primary_queue)
return primary_queue;

/* global registered queue bitmap */
qid = ring->q_map.cpu_to_qid[cpu];
if (WARN_ON_ONCE(qid >= ring->max_nr_queues))
if (WARN_ON_ONCE(qid >= ring->max_nr_queues)) {
/* Might happen on teardown */
return NULL;
}
return READ_ONCE(ring->queues[qid]);
}

Expand Down Expand Up @@ -1525,7 +1575,7 @@ void fuse_uring_queue_fuse_req(struct fuse_iqueue *fiq, struct fuse_req *req)
int err;

err = -EINVAL;
queue = fuse_uring_select_queue(ring);
queue = fuse_uring_select_queue(ring, false);
if (!queue)
goto err;

Expand Down Expand Up @@ -1567,7 +1617,7 @@ bool fuse_uring_queue_bq_req(struct fuse_req *req)
struct fuse_ring_queue *queue;
struct fuse_ring_ent *ent = NULL;

queue = fuse_uring_select_queue(ring);
queue = fuse_uring_select_queue(ring, true);
if (!queue)
return false;

Expand Down
88 changes: 58 additions & 30 deletions fs/fuse/file.c
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,18 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
struct inode *inode = file_inode(io->iocb->ki_filp);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
struct address_space *mapping = io->iocb->ki_filp->f_mapping;

/*
* As in generic_file_direct_write(), invalidate after the
* write, to invalidate read-ahead cache that may have competed
* with the write.
*/
if (io->write && res && mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
io->offset >> PAGE_SHIFT,
(io->offset + res - 1) >> PAGE_SHIFT);
}

spin_lock(&fi->lock);
fi->attr_version = atomic64_inc_return(&fc->attr_version);
Expand Down Expand Up @@ -1132,9 +1144,11 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
{
struct kiocb *iocb = ia->io->iocb;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct fuse_file *ff = file->private_data;
struct fuse_mount *fm = ff->fm;
struct fuse_write_in *inarg = &ia->write.in;
ssize_t written;
ssize_t err;

fuse_write_args_fill(ia, ff, pos, count);
Expand All @@ -1148,10 +1162,26 @@ static ssize_t fuse_send_write(struct fuse_io_args *ia, loff_t pos,
return fuse_async_req_send(fm, ia, count);

err = fuse_simple_request(fm, &ia->ap.args);
if (!err && ia->write.out.size > count)
written = ia->write.out.size;
if (!err && written > count)
err = -EIO;

return err ?: ia->write.out.size;
/*
* Without FOPEN_DIRECT_IO, generic_file_direct_write() does the
* invalidation for us.
*/
if (!err && written && mapping->nrpages &&
(ff->open_flags & FOPEN_DIRECT_IO)) {
/*
* As in generic_file_direct_write(), invalidate after the
* write, to invalidate read-ahead cache that may have competed
* with the write.
*/
invalidate_inode_pages2_range(mapping, pos >> PAGE_SHIFT,
(pos + written - 1) >> PAGE_SHIFT);
}

return err ?: written;
}

bool fuse_write_update_attr(struct inode *inode, loff_t pos, ssize_t written)
Expand Down Expand Up @@ -1676,15 +1706,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (res > 0)
*ppos = pos;

if (res > 0 && write && fopen_direct_io) {
/*
* As in generic_file_direct_write(), invalidate after the
* write, to invalidate read-ahead cache that may have competed
* with the write.
*/
invalidate_inode_pages2_range(mapping, idx_from, idx_to);
}

return res > 0 ? res : err;
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
Expand Down Expand Up @@ -2036,21 +2057,6 @@ static int fuse_writepage_locked(struct page *page)
return error;
}

static int fuse_writepage(struct page *page, struct writeback_control *wbc)
{
struct fuse_conn *fc = get_fuse_conn(page->mapping->host);
int err;

if (wbc->sync_mode == WB_SYNC_NONE &&
fc->num_background >= fc->congestion_threshold)
return AOP_WRITEPAGE_ACTIVATE;

err = fuse_writepage_locked(page);
unlock_page(page);

return err;
}

struct fuse_fill_wb_data {
struct fuse_writepage_args *wpa;
struct fuse_file *ff;
Expand Down Expand Up @@ -2099,8 +2105,9 @@ static void fuse_writepages_send(struct fuse_fill_wb_data *data)


static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
struct fuse_args_pages *ap,
struct fuse_fill_wb_data *data)
struct fuse_args_pages *ap,
struct fuse_fill_wb_data *data,
struct writeback_control *wbc)
{
WARN_ON(!ap->num_pages);

Expand All @@ -2120,6 +2127,17 @@ static bool fuse_writepage_need_send(struct fuse_conn *fc, struct page *page,
if (ap->num_pages == data->max_pages && !fuse_pages_realloc(data))
return true;

/* Reached alignment */
if (fc->alignment_pages && !(page->index % fc->alignment_pages)) {
/* we are at a point where we would write aligned
* check if we potentially could reach the next alignment */
if (page->index + fc->alignment_pages > wbc->range_end)
return true;

if (ap->num_pages + fc->alignment_pages > fc->max_pages)
return true;
}

return false;
}

Expand All @@ -2141,7 +2159,7 @@ static int fuse_writepages_fill(struct folio *folio,
goto out_unlock;
}

if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data)) {
if (wpa && fuse_writepage_need_send(fc, &folio->page, ap, data, wbc)) {
fuse_writepages_send(data);
data->wpa = NULL;
}
Expand Down Expand Up @@ -3184,6 +3202,16 @@ static ssize_t fuse_copy_file_range(struct file *src_file, loff_t src_off,
return ret;
}

#ifdef CONFIG_MIGRATION
int fuse_migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode) {

if (folio_test_writeback(src))
return -EBUSY;
return filemap_migrate_folio(mapping, dst, src, mode);
}
#endif

static const struct file_operations fuse_file_operations = {
.llseek = fuse_file_llseek,
.read_iter = fuse_file_read_iter,
Expand All @@ -3208,10 +3236,10 @@ static const struct file_operations fuse_file_operations = {
static const struct address_space_operations fuse_file_aops = {
.read_folio = fuse_read_folio,
.readahead = fuse_readahead,
.writepage = fuse_writepage,
.writepages = fuse_writepages,
.launder_folio = fuse_launder_folio,
.dirty_folio = filemap_dirty_folio,
.migrate_folio = fuse_migrate_folio,
.bmap = fuse_bmap,
.direct_IO = fuse_direct_IO,
.write_begin = fuse_write_begin,
Expand Down
10 changes: 10 additions & 0 deletions fs/fuse/fuse_i.h
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,10 @@ struct fuse_conn {
/** uring connection information*/
struct fuse_ring *ring;
#endif

/* The foffset alignment in PAGE */
unsigned int alignment_pages;

};

/*
Expand Down Expand Up @@ -1473,6 +1477,12 @@ struct fuse_file *fuse_file_open(struct fuse_mount *fm, u64 nodeid,
bool isdir);
void fuse_file_release(struct inode *inode, struct fuse_file *ff,
unsigned int open_flags, fl_owner_t id, bool isdir);
#ifdef CONFIG_MIGRATION
int fuse_migrate_folio(struct address_space *mapping, struct folio *dst,
struct folio *src, enum migrate_mode mode);
#else
#define fuse_migrate_folio NULL
#endif

#ifdef CONFIG_MIGRATION
int fuse_migrate_folio(struct address_space *mapping, struct folio *dst,
Expand Down
8 changes: 8 additions & 0 deletions fs/fuse/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1429,6 +1429,14 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
fc->direct_io_allow_mmap = 1;
if (flags & FUSE_OVER_IO_URING && fuse_uring_enabled())
fc->io_uring = 1;

if (flags & FUSE_ALIGN_PG_ORDER) {
if (arg->align_page_order > 0) {
fc->alignment_pages =
(1UL << arg->align_page_order)
>> PAGE_SHIFT;
}
}
if (flags & FUSE_NO_EXPORT_SUPPORT)
fm->sb->s_export_op = &fuse_export_fid_operations;
if (flags & FUSE_INVAL_INODE_ENTRY)
Expand Down
10 changes: 9 additions & 1 deletion include/uapi/linux/fuse.h
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ struct fuse_file_lock {
* FUSE_OVER_IO_URING: Indicate that client supports io-uring
* FUSE_INVAL_INODE_ENTRY: invalidate inode aliases when doing inode invalidation
* FUSE_EXPIRE_INODE_ENTRY: expire inode aliases when doing inode invalidation
* FUSE_ALIGN_PG_ORDER: page order (power of 2 exponent for number of pages) for
* optimal io-size alignment
* FUSE_URING_REDUCED_Q: Client (kernel) supports less queues - Server is free
* to register between 1 and nr-core io-uring queues
*/
Expand Down Expand Up @@ -903,6 +905,9 @@ struct fuse_init_in {
#define FUSE_COMPAT_INIT_OUT_SIZE 8
#define FUSE_COMPAT_22_INIT_OUT_SIZE 24

/*
* align_page_order: Number of pages for optimal IO, or a multiple of that
*/
struct fuse_init_out {
uint32_t major;
uint32_t minor;
Expand All @@ -915,7 +920,10 @@ struct fuse_init_out {
uint16_t max_pages;
uint16_t map_alignment;
uint32_t flags2;
uint32_t unused[7];
uint32_t max_stack_depth;
uint8_t align_page_order;
uint8_t padding[3];
uint32_t unused[5];
};

#define CUSE_INIT_INFO_MAX 4096
Expand Down