-
Notifications
You must be signed in to change notification settings - Fork 119
Description
Issue
When allocating a page for an inode, if a crash occurs the alter pages are not persisted and the inode's checksum is updated, a segment fault will occur since alter_log_entry computes the alter log address via the address to the later page.
Reason
Allocating a new page for a newly created file has such processes:
- allocate a page for the main log, update inode (Line 1216)
- allocate a page for the alternative log, update inode (Line 1221)
- set the alter page address for both main and alter pages, flush them but no fence. (Line 1226)
- update the inode checksum (Line 1232)
- fence after updating the inode checksum (Within the function at Line 1232)
Lines 1215 to 1235 in 976a4d1
| if (curr_p == 0) { | |
| ret = nova_initialize_inode_log(sb, pi, sih, MAIN_LOG); | |
| if (ret) | |
| return 0; | |
| if (metadata_csum) { | |
| ret = nova_initialize_inode_log(sb, pi, sih, ALTER_LOG); | |
| if (ret) | |
| return 0; | |
| nova_memunlock_inode(sb, pi, &irq_flags); | |
| nova_update_alter_pages(sb, pi, sih->log_head, | |
| sih->alter_log_head); | |
| nova_memlock_inode(sb, pi, &irq_flags); | |
| } | |
| nova_memunlock_inode(sb, pi, &irq_flags); | |
| nova_update_inode_checksum(pi); | |
| nova_memlock_inode(sb, pi, &irq_flags); | |
| return sih->log_head; |
Since there are no fences before updating the checksum, Step 3 could be issued after Step 4. If a crash occurs, after recovery, this issue will cause segment fault. In th nova_verify_entry_csum function, NOVA reads the alter log by the alter page address (Line 328), leading to the bug (Line 335).
Lines 327 to 336 in 976a4d1
| entry_off = nova_get_addr_off(sbi, entry); | |
| alter_off = alter_log_entry(sb, entry_off); | |
| if (alter_off == 0) { | |
| nova_err(sb, "%s: log page tail error detected\n", __func__); | |
| goto fail; | |
| } | |
| alter = (void *) nova_get_block(sb, alter_off); | |
| ret = nova_get_entry_copy(sb, alter, &alter_csum, &alter_size, | |
| alter_copy); |
Reproduce
if (curr_p == 0) {
ret = nova_initialize_inode_log(sb, pi, sih, MAIN_LOG);
if (ret)
return 0;
if (metadata_csum) {
ret = nova_initialize_inode_log(sb, pi, sih, ALTER_LOG);
if (ret)
return 0;
nova_memunlock_inode(sb, pi, &irq_flags);
if (pi->nova_ino == 33) {
// simulate the out-of-order execution.
nova_dbg("-->> %s: skip set alter page. ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
} else {
nova_update_alter_pages(sb, pi, sih->log_head,
sih->alter_log_head);
}
nova_memlock_inode(sb, pi, &irq_flags);
}
// info where we are
nova_dbg("-->> %s: ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
nova_memunlock_inode(sb, pi, &irq_flags);
nova_update_inode_checksum(pi);
if (pi->nova_ino == 33) {
// simulate a crash after updating checksum
nova_dbg("-->> %s: bug on: ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
BUG();
}
nova_memlock_inode(sb, pi, &irq_flags);
return sih->log_head;
}The command to reproduce the bug
# zero out the PM device
dd if=/dev/zero of=/dev/pmem0 bs=1048576 count=100
# make the modified code
make
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o init,data_cow,dbgmask=255 /dev/pmem0 /mnt/pmem0
# do not use touch to create a file, since touch will call two functions, one for creating, another for updating attributes (this will allocate pages and append a log entry)
> /mnt/pmem0/foo
# execute any command to trigger the page allocation
dd if=/dev/zero of=/mnt/pmem0/foo bs=8192 count=1 oflag=direct
# The inserted bug will be triggered here, cat the image for recovery
cat /dev/pmem0 > image
#### reboot the system
# revert the source code modification and then make
make
# restore the image
cat image > /dev/pmem0
# recover it
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o data_cow,dbgmask=255 /dev/pmem0 /mnt/pmem0
# stat can show the file is good
# execute command to trigger the `nova_verify_entry_csum` function
dd if=/dev/zero of=/mnt/pmem0/foo bs=8192 count=1 oflag=direc
# bug occurs, syslog will show the FS stoped at `nova_get_entry_copy`Fix
Both flushing and fencing when setting the alter address of log pages.