Skip to content

Commit

Permalink
ext4: Wait for proper transaction commit on fsync
Browse files Browse the repository at this point in the history
We cannot rely on buffer dirty bits during fsync because pdflush can come
before fsync is called and clear dirty bits without forcing a transaction
commit. What we do is that we track which transaction has last changed
the inode and which transaction last changed allocation and force it to
disk on fsync.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
  • Loading branch information
jankara authored and tytso committed Dec 9, 2009
1 parent 194074a commit b436b9b
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 31 deletions.
7 changes: 7 additions & 0 deletions fs/ext4/ext4.h
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,13 @@ struct ext4_inode_info {
struct list_head i_aio_dio_complete_list;
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;

/*
* Transactions that contain inode's metadata needed to complete
* fsync and fdatasync, respectively.
*/
tid_t i_sync_tid;
tid_t i_datasync_tid;
};

/*
Expand Down
13 changes: 13 additions & 0 deletions fs/ext4/ext4_jbd2.h
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
return 0;
}

static inline void ext4_update_inode_fsync_trans(handle_t *handle,
struct inode *inode,
int datasync)
{
struct ext4_inode_info *ei = EXT4_I(inode);

if (ext4_handle_valid(handle)) {
ei->i_sync_tid = handle->h_transaction->t_tid;
if (datasync)
ei->i_datasync_tid = handle->h_transaction->t_tid;
}
}

/* super.c */
int ext4_force_commit(struct super_block *sb);

Expand Down
14 changes: 12 additions & 2 deletions fs/ext4/extents.c
Original file line number Diff line number Diff line change
Expand Up @@ -3058,6 +3058,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
ret = ext4_convert_unwritten_extents_dio(handle, inode,
path);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
goto out2;
}
/* buffered IO case */
Expand Down Expand Up @@ -3085,6 +3087,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ret = ext4_ext_convert_to_initialized(handle, inode,
path, iblock,
max_blocks);
if (ret >= 0)
ext4_update_inode_fsync_trans(handle, inode, 1);
out:
if (ret <= 0) {
err = ret;
Expand Down Expand Up @@ -3323,10 +3327,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
allocated = ext4_ext_get_actual_len(&newex);
set_buffer_new(bh_result);

/* Cache only when it is _not_ an uninitialized extent */
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0)
/*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an uninitialized extent.
*/
if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
EXT4_EXT_CACHE_EXTENT);
ext4_update_inode_fsync_trans(handle, inode, 1);
} else
ext4_update_inode_fsync_trans(handle, inode, 0);
out:
if (allocated > max_blocks)
allocated = max_blocks;
Expand Down
46 changes: 17 additions & 29 deletions fs/ext4/fsync.c
Original file line number Diff line number Diff line change
Expand Up @@ -51,25 +51,30 @@
int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
{
struct inode *inode = dentry->d_inode;
struct ext4_inode_info *ei = EXT4_I(inode);
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
int err, ret = 0;
int ret;
tid_t commit_tid;

J_ASSERT(ext4_journal_current_handle() == NULL);

trace_ext4_sync_file(file, dentry, datasync);

if (inode->i_sb->s_flags & MS_RDONLY)
return 0;

ret = flush_aio_dio_completed_IO(inode);
if (ret < 0)
return ret;

if (!journal)
return simple_fsync(file, dentry, datasync);

/*
* data=writeback:
* data=writeback,ordered:
* The caller's filemap_fdatawrite()/wait will sync the data.
* sync_inode() will sync the metadata
*
* data=ordered:
* The caller's filemap_fdatawrite() will write the data and
* sync_inode() will write the inode if it is dirty. Then the caller's
* filemap_fdatawait() will wait on the pages.
* Metadata is in the journal, we wait for proper transaction to
* commit here.
*
* data=journal:
* filemap_fdatawrite won't do anything (the buffers are clean).
Expand All @@ -82,27 +87,10 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
if (ext4_should_journal_data(inode))
return ext4_force_commit(inode->i_sb);

if (!journal)
ret = sync_mapping_buffers(inode->i_mapping);

if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
goto out;

/*
* The VFS has written the file data. If the inode is unaltered
* then we need not start a commit.
*/
if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0, /* sys_fsync did this */
};
err = sync_inode(inode, &wbc);
if (ret == 0)
ret = err;
}
out:
if (journal && (journal->j_flags & JBD2_BARRIER))
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
if (jbd2_log_start_commit(journal, commit_tid))
jbd2_log_wait_commit(journal, commit_tid);
else if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
return ret;
}
29 changes: 29 additions & 0 deletions fs/ext4/inode.c
Original file line number Diff line number Diff line change
Expand Up @@ -983,6 +983,8 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
goto cleanup;

set_buffer_new(bh_result);

ext4_update_inode_fsync_trans(handle, inode, 1);
got_it:
map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
if (count > blocks_to_boundary)
Expand Down Expand Up @@ -4738,6 +4740,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
struct ext4_inode *raw_inode;
struct ext4_inode_info *ei;
struct inode *inode;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
int block;

Expand Down Expand Up @@ -4802,6 +4805,31 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_data[block] = raw_inode->i_block[block];
INIT_LIST_HEAD(&ei->i_orphan);

/*
* Set transaction id's of transactions that have to be committed
* to finish f[data]sync. We set them to currently running transaction
* as we cannot be sure that the inode or some of its metadata isn't
* part of the transaction - the inode could have been reclaimed and
* now it is reread from disk.
*/
if (journal) {
transaction_t *transaction;
tid_t tid;

spin_lock(&journal->j_state_lock);
if (journal->j_running_transaction)
transaction = journal->j_running_transaction;
else
transaction = journal->j_committing_transaction;
if (transaction)
tid = transaction->t_tid;
else
tid = journal->j_commit_sequence;
spin_unlock(&journal->j_state_lock);
ei->i_sync_tid = tid;
ei->i_datasync_tid = tid;
}

if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
Expand Down Expand Up @@ -5056,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
err = rc;
ei->i_state &= ~EXT4_STATE_NEW;

ext4_update_inode_fsync_trans(handle, inode, 0);
out_brelse:
brelse(bh);
ext4_std_error(inode->i_sb, err);
Expand Down
2 changes: 2 additions & 0 deletions fs/ext4/super.c
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&(ei->i_block_reservation_lock));
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;

return &ei->vfs_inode;
}
Expand Down

0 comments on commit b436b9b

Please sign in to comment.