-
Notifications
You must be signed in to change notification settings - Fork 119
Description
Issue
When fallocate a file, in some situations (e.g., fallocate to a larger size, fallocate after creating a snapshot), NOVA will allocate new data blocks and increment i_blocks, leading to i_blocks larger than the block number corresponding to the file size. After a umount and remount, when stat the file, NOVA scans the log and rebuilds the inode. When the rebuild is finished, i_blocks will be the number corresponding to the file size, which is different from the number before umount.
The truncate function does not this issue. The reason will be shown in the Reason part.
Reproduce
The case with snapshots.
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o init,dbgmask=255 /dev/pmem0 /mnt/pmem0
touch /mnt/pmem0/foo
echo 1 > /mnt/pmem0/foo
# the stat shows `i_blocks` is 8
stat /mnt/pmem0/foo
# create a snapshot
echo 1 > /proc/fs/NOVA/pmem0/create_snapshot
# fallcate the file with keep size option
fallocate -n -o 0 -l 1024 /mnt/pmem0/foo
# the stat shows `i_blocks` is 16
stat /mnt/pmem0/foo
# umount and remount
umount /mnt/pmem0
mount -t NOVA -o dbgmask=255 /dev/pmem0 /mnt/pmem0
# the stat shows `i_blocks` is 8
stat /mnt/pmem0/fooThe case without snapshots.
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o init,dbgmask=255 /dev/pmem0 /mnt/pmem0
touch /mnt/pmem0/foo
dd if=/dev/random of=/mnt/pmem0/foo bs=4096 count=1
# the stat shows `i_blocks` is 8
stat /mnt/pmem0/foo
# fallcate the file to 8192 with keep_size option
fallocate -n -o 4096 -l 4096 /mnt/pmem0/foo
# the stat shows `i_blocks` is 16
stat /mnt/pmem0/foo
# umount and remount
umount /mnt/pmem0
mount -t NOVA -o dbgmask=255 /dev/pmem0 /mnt/pmem0
# the stat shows `i_blocks` is 8
stat /mnt/pmem0/fooReason
After the rebuild of an inode, i_blocks will be set corresponding to the file size, as the below code snippet shows
Lines 498 to 501 in 976a4d1
| ret = nova_rebuild_inode_finish(sb, pi, sih, reb, curr_p); | |
| sih->i_blocks = sih->i_size >> data_bits; | |
| if (sih->i_size % (1 << data_bits) > 0) | |
| ++sih->i_blocks; |
As the below code snippet shows, when fallocate a file in some situations (e.g., increase size, different epochs), NOVA allocates a new data block (Line 266), increments the total blocks (Line 300), and resets the new i_blocks of sih and inode (Line 307). Next, NOVA updates the file tree (Line 322).
Lines 266 to 326 in 976a4d1
| allocated = nova_new_data_blocks(sb, sih, &blocknr, start_blk, | |
| ent_blks, ALLOC_INIT_ZERO, ANY_CPU, | |
| ALLOC_FROM_HEAD); | |
| nova_dbgv("%s: alloc %d blocks @ %lu\n", __func__, | |
| allocated, blocknr); | |
| if (allocated <= 0) { | |
| nova_dbg("%s alloc %lu blocks failed!, %d\n", | |
| __func__, ent_blks, allocated); | |
| ret = allocated; | |
| goto out; | |
| } | |
| /* Handle hole fill write */ | |
| nova_init_file_write_entry(sb, sih, &entry_data, epoch_id, | |
| start_blk, allocated, blocknr, | |
| time, new_size); | |
| ret = nova_append_file_write_entry(sb, pi, inode, | |
| &entry_data, &update); | |
| if (ret) { | |
| nova_dbg("%s: append inode entry failed\n", __func__); | |
| ret = -ENOSPC; | |
| goto out; | |
| } | |
| entry = nova_get_block(sb, update.curr_entry); | |
| nova_reset_csum_parity_range(sb, sih, entry, start_blk, | |
| start_blk + allocated, 1, 0); | |
| update_log = true; | |
| if (begin_tail == 0) | |
| begin_tail = update.curr_entry; | |
| total_blocks += allocated; | |
| next: | |
| num_blocks -= allocated; | |
| start_blk += allocated; | |
| } | |
| data_bits = blk_type_to_shift[sih->i_blk_type]; | |
| sih->i_blocks += (total_blocks << (data_bits - sb->s_blocksize_bits)); | |
| inode->i_blocks = sih->i_blocks; | |
| if (update_log) { | |
| sih->log_tail = update.tail; | |
| sih->alter_log_tail = update.alter_tail; | |
| nova_memunlock_inode(sb, pi, &irq_flags); | |
| nova_update_tail(pi, update.tail); | |
| if (metadata_csum) | |
| nova_update_alter_tail(pi, update.alter_tail); | |
| nova_memlock_inode(sb, pi, &irq_flags); | |
| /* Update file tree */ | |
| ret = nova_reassign_file_tree(sb, sih, begin_tail); | |
| if (ret) | |
| goto out; | |
| } |
If the fallcate operation is executed after a snapshot, the execution path of updating the file tree is: nova_reassign_file_tree -> nova_assign_write_entry -> nova_free_old_entry. In nova_free_old_entry, as the below code snippet shows, NOVA first invokes nova_append_data_to_snapshot (Line 156) and then nova_invalidate_write_entry (Line 159). Since the data block is snapshotted, NOVA will not free it. At Line 171, NOVA decrements sih->i_blocks. However, inode->i_blocks and sih->i_blcosk have been updated before updating the file tree. Either this update is incorrect or the previous update is incorrect.
Lines 155 to 171 in 976a4d1
| if (!delete_dead) { | |
| ret = nova_append_data_to_snapshot(sb, entryc, old_nvmm, | |
| num_free, epoch_id); | |
| if (ret == 0) { | |
| nova_invalidate_write_entry(sb, entry, 1, 0); | |
| goto out; | |
| } | |
| nova_invalidate_write_entry(sb, entry, 1, num_free); | |
| } | |
| nova_dbgv("%s: pgoff %lu, free %u blocks\n", | |
| __func__, pgoff, num_free); | |
| nova_free_data_blocks(sb, sih, old_nvmm, num_free); | |
| out: | |
| sih->i_blocks -= num_free; |
When truncating a file, as the below code shows, NOVA first updates the file tree, then updates inode->i_blocks and sih->i_blcosk. Thus, the number is consistently corresponding to the file size, regardless of snapshots.
Lines 420 to 426 in 976a4d1
| freed = nova_delete_file_tree(sb, sih, first_blocknr, | |
| last_blocknr, true, false, epoch_id); | |
| inode->i_blocks -= (freed * (1 << (data_bits - | |
| sb->s_blocksize_bits))); | |
| sih->i_blocks = inode->i_blocks; |
Fix
I do not know what is the expected i_blocks after fallcate, the physically allocated blocks, or the file size.
Corresponding to the behaviors in the truncate function, updating inode->i_blocks and sih->i_blcosk after the updating of the file tree should be correct.
However, if fallocate a file to a larger size, NOVA does not free blocks in the nova_reassign_file_tree function, leading to a consistent physical i_blocks number. In recovery, i_blocks will be set as the file-sized number again. Thus, NOVA should have a consistent concept on i_blocks before patching.