Skip to content

Commit

Permalink
ext4: use transaction reservation for extent conversion in ext4_end_io
Browse files Browse the repository at this point in the history
Later we would like to clear PageWriteback bit only after extent
conversion from unwritten to written extents is performed.  However it
is not possible to start a transaction after PageWriteback is set
because that violates lock ordering (and is easy to deadlock).  So we
have to reserve a transaction before locking pages and sending them
for IO and later we use the transaction for extent conversion from
ext4_end_io().

Reviewed-by: Zheng Liu <wenqing.lz@taobao.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  • Loading branch information
jankara authored and tytso committed Jun 4, 2013
1 parent 3613d22 commit 6b523df
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 24 deletions.
12 changes: 9 additions & 3 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,13 @@ struct ext4_map_blocks {
#define EXT4_IO_END_DIRECT 0x0004

/*
* For converting uninitialized extents on a work queue.
* For converting uninitialized extents on a work queue. 'handle' is used for
* buffered writeback.
*/
typedef struct ext4_io_end {
struct list_head list; /* per-file finished IO list */
handle_t *handle; /* handle reserved for extent
* conversion */
struct inode *inode; /* file being written to */
unsigned int flag; /* unwritten or not */
loff_t offset; /* offset in the file */
Expand Down Expand Up @@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode,
struct ext4_io_end *io_end)
{
if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
/* Writeback has to have coversion transaction reserved */
WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle &&
!(io_end->flag & EXT4_IO_END_DIRECT));
io_end->flag |= EXT4_IO_END_UNWRITTEN;
atomic_inc(&EXT4_I(inode)->i_unwritten);
}
Expand Down Expand Up @@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *);
extern void ext4_ext_release(struct super_block *);
extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
loff_t len);
extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len);
extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
loff_t offset, ssize_t len);
extern int ext4_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map, int flags);
extern int ext4_ext_calc_metadata_amount(struct inode *inode,
Expand Down
5 changes: 3 additions & 2 deletions fs/ext4/ext4_jbd2.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode)
#define EXT4_HT_MIGRATE 8
#define EXT4_HT_MOVE_EXTENTS 9
#define EXT4_HT_XATTR 10
#define EXT4_HT_MAX 11
#define EXT4_HT_EXT_CONVERT 11
#define EXT4_HT_MAX 12

/**
* struct ext4_journal_cb_entry - Base structure for callback information.
Expand Down Expand Up @@ -319,7 +320,7 @@ static inline handle_t *__ext4_journal_start(struct inode *inode,
#define ext4_journal_stop(handle) \
__ext4_journal_stop(__func__, __LINE__, (handle))

#define ext4_journal_start_reserve(handle, type) \
#define ext4_journal_start_reserved(handle, type) \
__ext4_journal_start_reserved((handle), __LINE__, (type))

handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
Expand Down
40 changes: 29 additions & 11 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -4566,10 +4566,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
* function, to convert the fallocated extents after IO is completed.
* Returns 0 on success.
*/
int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
ssize_t len)
int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode,
loff_t offset, ssize_t len)
{
handle_t *handle;
unsigned int max_blocks;
int ret = 0;
int ret2 = 0;
Expand All @@ -4584,16 +4583,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) -
map.m_lblk);
/*
* credits to insert 1 extent into extent tree
* This is somewhat ugly but the idea is clear: When transaction is
* reserved, everything goes into it. Otherwise we rather start several
* smaller transactions for conversion of each extent separately.
*/
credits = ext4_chunk_trans_blocks(inode, max_blocks);
if (handle) {
handle = ext4_journal_start_reserved(handle,
EXT4_HT_EXT_CONVERT);
if (IS_ERR(handle))
return PTR_ERR(handle);
credits = 0;
} else {
/*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, max_blocks);
}
while (ret >= 0 && ret < max_blocks) {
map.m_lblk += ret;
map.m_len = (max_blocks -= ret);
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
break;
if (credits) {
handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
break;
}
}
ret = ext4_map_blocks(handle, inode, &map,
EXT4_GET_BLOCKS_IO_CONVERT_EXT);
Expand All @@ -4604,10 +4619,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
inode->i_ino, map.m_lblk,
map.m_len, ret);
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
if (ret <= 0 || ret2 )
if (credits)
ret2 = ext4_journal_stop(handle);
if (ret <= 0 || ret2)
break;
}
if (!credits)
ret2 = ext4_journal_stop(handle);
return ret > 0 ? ret2 : ret;
}

Expand Down
25 changes: 20 additions & 5 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -1410,6 +1410,7 @@ static void ext4_da_page_release_reservation(struct page *page,
struct mpage_da_data {
struct inode *inode;
struct writeback_control *wbc;

pgoff_t first_page; /* The first page to write */
pgoff_t next_page; /* Current page to examine */
pgoff_t last_page; /* Last page to examine */
Expand Down Expand Up @@ -2108,8 +2109,14 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
if (err < 0)
return err;
if (map->m_flags & EXT4_MAP_UNINIT)
if (map->m_flags & EXT4_MAP_UNINIT) {
if (!mpd->io_submit.io_end->handle &&
ext4_handle_valid(handle)) {
mpd->io_submit.io_end->handle = handle->h_rsv_handle;
handle->h_rsv_handle = NULL;
}
ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end);
}

BUG_ON(map->m_len == 0);
if (map->m_flags & EXT4_MAP_NEW) {
Expand Down Expand Up @@ -2351,7 +2358,7 @@ static int ext4_da_writepages(struct address_space *mapping,
handle_t *handle = NULL;
struct mpage_da_data mpd;
struct inode *inode = mapping->host;
int needed_blocks, ret = 0;
int needed_blocks, rsv_blocks = 0, ret = 0;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
bool done;
struct blk_plug plug;
Expand Down Expand Up @@ -2379,6 +2386,14 @@ static int ext4_da_writepages(struct address_space *mapping,
if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED))
return -EROFS;

if (ext4_should_dioread_nolock(inode)) {
/*
* We may need to convert upto one extent per block in
* the page and we may dirty the inode.
*/
rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
}

/*
* If we have inline data and arrive here, it means that
* we will soon create the block for the 1st page, so
Expand Down Expand Up @@ -2438,8 +2453,8 @@ static int ext4_da_writepages(struct address_space *mapping,
needed_blocks = ext4_da_writepages_trans_blocks(inode);

/* start a new transaction */
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
needed_blocks);
handle = ext4_journal_start_with_reserve(inode,
EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: "
Expand Down Expand Up @@ -3120,7 +3135,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
* for non AIO case, since the IO is already
* completed, we could do the conversion right here
*/
err = ext4_convert_unwritten_extents(inode,
err = ext4_convert_unwritten_extents(NULL, inode,
offset, ret);
if (err < 0)
ret = err;
Expand Down
11 changes: 8 additions & 3 deletions fs/ext4/page-io.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
{
BUG_ON(!list_empty(&io_end->list));
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
WARN_ON(io_end->handle);

if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
wake_up_all(ext4_ioend_wq(io_end->inode));
Expand All @@ -92,13 +93,15 @@ static int ext4_end_io(ext4_io_end_t *io)
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
handle_t *handle = io->handle;
int ret = 0;

ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);

ret = ext4_convert_unwritten_extents(inode, offset, size);
io->handle = NULL; /* Following call will use up the handle */
ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
if (ret < 0) {
ext4_msg(inode->i_sb, KERN_EMERG,
"failed to convert unwritten extents to written "
Expand Down Expand Up @@ -228,8 +231,10 @@ int ext4_put_io_end(ext4_io_end_t *io_end)

if (atomic_dec_and_test(&io_end->count)) {
if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
err = ext4_convert_unwritten_extents(io_end->inode,
io_end->offset, io_end->size);
err = ext4_convert_unwritten_extents(io_end->handle,
io_end->inode, io_end->offset,
io_end->size);
io_end->handle = NULL;
ext4_clear_io_unwritten_flag(io_end);
}
ext4_release_io_end(io_end);
Expand Down

0 comments on commit 6b523df

Please sign in to comment.