Skip to content

Out-of-order execution bug when setting alter pages #154

@iaoing

Description

@iaoing

Issue

When allocating a page for an inode, if a crash occurs the alter pages are not persisted and the inode's checksum is updated, a segment fault will occur since alter_log_entry computes the alter log address via the address to the later page.

Reason

Allocating a new page for a newly created file has such processes:

  1. allocate a page for the main log, update inode (Line 1216)
  2. allocate a page for the alternative log, update inode (Line 1221)
  3. set the alter page address for both main and alter pages, flush them but no fence. (Line 1226)
  4. update the inode checksum (Line 1232)
  5. fence after updating the inode checksum (Within the function at Line 1232)

linux-nova/fs/nova/log.c

Lines 1215 to 1235 in 976a4d1

if (curr_p == 0) {
ret = nova_initialize_inode_log(sb, pi, sih, MAIN_LOG);
if (ret)
return 0;
if (metadata_csum) {
ret = nova_initialize_inode_log(sb, pi, sih, ALTER_LOG);
if (ret)
return 0;
nova_memunlock_inode(sb, pi, &irq_flags);
nova_update_alter_pages(sb, pi, sih->log_head,
sih->alter_log_head);
nova_memlock_inode(sb, pi, &irq_flags);
}
nova_memunlock_inode(sb, pi, &irq_flags);
nova_update_inode_checksum(pi);
nova_memlock_inode(sb, pi, &irq_flags);
return sih->log_head;

Since there are no fences before updating the checksum, Step 3 could be issued after Step 4. If a crash occurs, after recovery, this issue will cause segment fault. In th nova_verify_entry_csum function, NOVA reads the alter log by the alter page address (Line 328), leading to the bug (Line 335).

entry_off = nova_get_addr_off(sbi, entry);
alter_off = alter_log_entry(sb, entry_off);
if (alter_off == 0) {
nova_err(sb, "%s: log page tail error detected\n", __func__);
goto fail;
}
alter = (void *) nova_get_block(sb, alter_off);
ret = nova_get_entry_copy(sb, alter, &alter_csum, &alter_size,
alter_copy);

Reproduce

if (curr_p == 0) {
	ret = nova_initialize_inode_log(sb, pi, sih, MAIN_LOG);
	if (ret)
		return 0;

	if (metadata_csum) {
		ret = nova_initialize_inode_log(sb, pi, sih, ALTER_LOG);
		if (ret)
			return 0;

		nova_memunlock_inode(sb, pi, &irq_flags);
		if (pi->nova_ino == 33) {
                        // simulate the out-of-order execution.
			nova_dbg("-->> %s: skip set alter page. ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
		} else {
			nova_update_alter_pages(sb, pi, sih->log_head,
							sih->alter_log_head);
		}
		nova_memlock_inode(sb, pi, &irq_flags);
	}
        
        // info where we are
	nova_dbg("-->> %s: ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);

	nova_memunlock_inode(sb, pi, &irq_flags);
	nova_update_inode_checksum(pi);
	if (pi->nova_ino == 33) {
                // simulate a crash after updating checksum
		nova_dbg("-->> %s: bug on: ino: %llu, addr: %llu, %llu\n", __func__, pi->nova_ino, sih->log_head, sih->alter_log_head);
		BUG();		
	}
	nova_memlock_inode(sb, pi, &irq_flags);

	return sih->log_head;
}

The command to reproduce the bug

# zero out the PM device
dd if=/dev/zero of=/dev/pmem0 bs=1048576 count=100
# make the modified code
make
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o init,data_cow,dbgmask=255 /dev/pmem0 /mnt/pmem0
# do not use touch to create a file, since touch will call two functions, one for creating, another for updating attributes (this will allocate pages and append a log entry)
> /mnt/pmem0/foo
# execute any command to trigger the page allocation
dd if=/dev/zero of=/mnt/pmem0/foo bs=8192 count=1 oflag=direct
# The inserted bug will be triggered here, cat the image for recovery
cat /dev/pmem0 > image

#### reboot the system

# revert the source code modification and then make
make
# restore the image
cat image > /dev/pmem0
# recover it
insmod nova.ko metadata_csum=1 data_csum=1 data_parity=1 dram_struct_csum=1
mount -t NOVA -o data_cow,dbgmask=255 /dev/pmem0 /mnt/pmem0
# stat can show the file is good
# execute command to trigger the `nova_verify_entry_csum` function
dd if=/dev/zero of=/mnt/pmem0/foo bs=8192 count=1 oflag=direc
# bug occurs, syslog will show the FS stoped at `nova_get_entry_copy`

Fix

Both flushing and fencing when setting the alter address of log pages.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions