1. 程式人生 > >f2fs系列文章fsck(五)

f2fs系列文章fsck(五)

    fsck_verify通過前面的檢查結果來修正元資料。

    首先是對nid的檢查情況進行檢視,f2fs_fsck中的nat_area_bitmap從開始的讀取f2fs_nat_block中的所有的f2fs_nat_entry來記錄所有有效的nid,但是在遍歷的過程中在呼叫sanity_check_nid的時候已經將所有正常的nid都給clear掉了,所以在檢查這個點陣圖的時候,如果發現還有些位是有效的,那麼證明有錯誤發生。然後是f2fs_fsck中記錄硬連結連結串列的hard_link_list_head,正常情況下應該是NULL,如果不是,說明也是出錯誤了。接著f2fs_fsck的main_area_bitmap記錄了在遍歷過程中所訪問到的所有的block,也就是記錄了所有的有效塊,所以這個理論上應該跟f2fs_fsck的sit_area_bitmap是一致的,所以不一致代表著錯誤的發生。還有就是f2fs_fsck的check_result的valid_blk_cnt、valid_node_cnt、valid_nat_entry_cnt、valid_inode_cnt、sit_free_segs也要跟發f2fs_checkpoint保持一致。然後是呼叫check_curseg_offset關於current segment進行檢查,next_blkoff對應的block必須是空閒的。對於LFS寫的,該segment剩下的block必須全部都是空閒的。另外在遍歷過程中對seg_entry的type也是進行了一定的修改,這裡也是要與原始的type進行比對。

for (i = 0; i < fsck->nr_nat_entries; i++) {
	if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0) {
		printf("NID[0x%x] is unreachable\n", i);
		nr_unref_nid++;
	}
}
	
if (fsck->hard_link_list_head != NULL) {
	node = fsck->hard_link_list_head;
	while (node) {
		printf("NID[0x%x] has [0x%x] more unreachable links\n", node->nid, node->links);
		node = node->next;
	}
	c.bug_on = 1;
}

printf("[FSCK] Unreachable nat entries                       ");
if (nr_unref_nid == 0x0) {
	printf(" [Ok..] [0x%x]\n", nr_unref_nid);
} else {
	printf(" [Fail] [0x%x]\n", nr_unref_nid);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] SIT valid block bitmap checking                ");
if (memcmp(fsck->sit_area_bitmap, fsck->main_area_bitmap, fsck->sit_area_bitmap_sz) == 0x0) {
	printf("[Ok..]\n");
} else {
	printf("[Fail]\n");
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] Hard link checking for regular file           ");
if (fsck->hard_link_list_head == NULL) {
	printf(" [Ok..] [0x%x]\n", fsck->chk.multi_hard_link_files);
} else {
	printf(" [Fail] [0x%x]\n", fsck->chk.multi_hard_link_files);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] valid_block_count matching with CP            ");
if (sbi->total_valid_block_count == fsck->chk.valid_blk_cnt) {
	printf(" [Ok..] [0x%x]\n", (u32)fsck->chk.valid_blk_cnt);
} else {
	printf(" [Fail] [0x%x]\n", (u32)fsck->chk.valid_blk_cnt);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] valid_node_count matcing with CP (de lookup)  ");
if (sbi->total_valid_node_count == fsck->chk.valid_node_cnt) {
	printf(" [Ok..] [0x%x]\n", fsck->chk.valid_node_cnt);
} else {
	printf(" [Fail] [0x%x]\n", fsck->chk.valid_node_cnt);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] valid_node_count matcing with CP (nat lookup) ");
if (sbi->total_valid_node_count == fsck->chk.valid_nat_entry_cnt) {
	printf(" [Ok..] [0x%x]\n", fsck->chk.valid_nat_entry_cnt);
} else {
	printf(" [Fail] [0x%x]\n", fsck->chk.valid_nat_entry_cnt);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] valid_inode_count matched with CP             ");
if (sbi->total_valid_inode_count == fsck->chk.valid_inode_cnt) {
	printf(" [Ok..] [0x%x]\n", fsck->chk.valid_inode_cnt);
} else {
	printf(" [Fail] [0x%x]\n", fsck->chk.valid_inode_cnt);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] free segment_count matched with CP            ");
if (le32_to_cpu(F2FS_CKPT(sbi)->free_segment_count) == fsck->chk.sit_free_segs) {
	printf(" [Ok..] [0x%x]\n", fsck->chk.sit_free_segs);
} else {
	printf(" [Fail] [0x%x]\n", fsck->chk.sit_free_segs);
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] next block offset is free                     ");
if (check_curseg_offset(sbi) == 0) {
	printf(" [Ok..]\n");
} else {
	printf(" [Fail]\n");
	ret = EXIT_ERR_CODE;
	c.bug_on = 1;
}

printf("[FSCK] fixing SIT types\n");
if (check_sit_types(sbi) != 0)
	force = 1;

printf("[FSCK] other corrupted bugs                          ");
if (c.bug_on == 0) {
	printf(" [Ok..]\n");
} else {
	printf(" [Fail]\n");
	ret = EXIT_ERR_CODE;
}

    以上只是對這些資料的一致性問題進行了檢查和列印。下面開始真正的修復工作。硬連結的問題由fix_hard_links來完成,nat的問題由fix_nat_entries來完成,sit的問題是由函式rewrite_sit_area_bitmap解決。move_curseg_info、write_curseg_info、flush_curseg_sit_entries共同完成current segment的問題,最後fix_checkpoint完成上述的統計資料到f2fs_checkpoint的修復工作。

if (force || (c.fix_on && !c.ro)) {
	struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);

	if (force || c.bug_on) {
		fix_hard_links(sbi);
		fix_nat_entries(sbi);
		rewrite_sit_area_bitmap(sbi);
		if (check_curseg_offset(sbi)) {
			move_curseg_info(sbi, SM_I(sbi)->main_blkaddr);
			write_curseg_info(sbi);
			flush_curseg_sit_entries(sbi);
		}
		fix_checkpoint(sbi);
	} else if (is_set_ckpt_flags(cp, CP_FSCK_FLAG)) {
		write_checkpoint(sbi);
	}
}

    fix_hard_links:如果f2fs_fsck的硬連結連結串列hard_link_list_head是NULL,那就直接返回,否則遍歷這個連結串列的節點,對每個節點的ino進行基本的sanity_check_nid檢查,然後將對應的f2fs_inode的連結數修復為記錄在連結串列節點中的實際的連結數actual_links。最後將修改之後的f2fs_inode寫回。

static void fix_hard_links(struct f2fs_sb_info *sbi)
{
	struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
	struct hard_link_node *tmp, *node;
	struct f2fs_node *node_blk = NULL;
	struct node_info ni;
	int ret;

	if (fsck->hard_link_list_head == NULL)
		return;
	node_blk = (struct f2fs_node *)calloc(BLOCK_SZ, 1);
	ASSERT(node_blk != NULL);

	node = fsck->hard_link_list_head;
	while (node) {
		if (sanity_check_nid(sbi, node->nid, node_blk, F2FS_FT_MAX, TYPE_INODE, &ni))
			FIX_MSG("Failed to fix, rerun fsck.f2fs");
		node_blk->i.i_links = cpu_to_le32(node->actual_links);
		FIX_MSG("File: 0x%x i_links= 0x%x -> 0x%x", node->nid, node->links, node->actual_links);
		ret = dev_write_block(node_blk, ni.blk_addr);
		ASSERT(ret >= 0);
		tmp = node;
		node = node->next;
		free(tmp);
	}
	free(node_blk);
}

    fix_nat_entries:前面提過,執行到f2fs_verify中,f2fs_fsck中的nat_area_bitmap正常情況下應該是將所有正常的nid的bit全部clear掉了。所以剩下的置了位所對應的nid都應該是無效的。這個函式就是完成這個功能,它逐位檢查f2fs_fsck中的nat_area_bitmap,發現置位了的,就呼叫nullify_nat_entry來將對應的nid無效掉。

static void fix_nat_entries(struct f2fs_sb_info *sbi)
{
	struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
	u32 i;
	for (i = 0; i < fsck->nr_nat_entries; i++)
		if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0)
			nullify_nat_entry(sbi, i);
}

    nullify_nat_entry:這個函式完成將特定的nid無效掉,這個需要將記錄最新的nid對應的nat清空就行。記錄最新nat可能存在兩個地方,一個是在current segment的nat_journal中,還有一個就是記錄在裝置上的f2fs_nat_entry。所以nullify_nat_entry首先在nat_journal中查詢相應的nid,如果找到了就將相應的nat_journal的f2fs_nat_entry清空。否則需要讀取對應的f2fs_nat_block,找到nid的f2fs_nat_entry,將其清空並寫回。

void nullify_nat_entry(struct f2fs_sb_info *sbi, u32 nid)
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
	struct f2fs_journal *journal = &curseg->sum_blk->journal;
	struct f2fs_nat_block *nat_block;
	pgoff_t block_addr;
	int entry_off;
	int ret;
	int i = 0;

	for (i = 0; i < nats_in_cursum(journal); i++) {
		if (le32_to_cpu(nid_in_journal(journal, i)) == nid) {
			memset(&nat_in_journal(journal, i), 0, sizeof(struct f2fs_nat_entry));
			FIX_MSG("Remove nid [0x%x] in nat journal", nid);
			return;
		}
	}

	nat_block = (struct f2fs_nat_block *)calloc(BLOCK_SZ, 1);
	ASSERT(nat_block);
	entry_off = nid % NAT_ENTRY_PER_BLOCK;
	block_addr = current_nat_addr(sbi, nid);
	ret = dev_read_block(nat_block, block_addr);
	ASSERT(ret >= 0);

	if (nid == F2FS_NODE_INO(sbi) || nid == F2FS_META_INO(sbi)) {
		FIX_MSG("nid [0x%x] block_addr= 0x%x -> 0x1", nid,
			le32_to_cpu(nat_block->entries[entry_off].block_addr));
		nat_block->entries[entry_off].block_addr = cpu_to_le32(0x1);
	} else {
		memset(&nat_block->entries[entry_off], 0, sizeof(struct f2fs_nat_entry));
		FIX_MSG("Remove nid [0x%x] in NAT", nid);
	}

	ret = dev_write_block(nat_block, block_addr);
	ASSERT(ret >= 0);
	free(nat_block);
}

 

    rewrite_sit_area_bitmap:這個函式主要完成f2fs_fsck中記錄遍歷過程中的真實有效塊的點陣圖main_area_bitmap到f2fs_sit_entry的同步。首先遍歷所有的segno,將segno對應的f2fs_sit_block讀取進來,然後找到相應的f2fs_sit_entry,然後用main_area_bitmap中segno對應的位置的點陣圖替代f2fs_sit_entry中的點陣圖,然後根據這個點陣圖更新其中的有效塊數,還有就是將更新後的seg_entry中的segment的type也同步到f2fs_sit_entry,最後將修復後的f2fs_sit_entry寫回。

 

void rewrite_sit_area_bitmap(struct f2fs_sb_info *sbi)
{
	struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno = 0;
	struct f2fs_summary_block *sum = curseg->sum_blk;
	char *ptr = NULL;

	sum->journal.n_sits = 0;
	ptr = fsck->main_area_bitmap;

	for (segno = 0; segno < TOTAL_SEGS(sbi); segno++) {
		struct f2fs_sit_block *sit_blk;
		struct f2fs_sit_entry *sit;
		struct seg_entry *se;
		u16 valid_blocks = 0;
		u16 type;
		int i;
		sit_blk = get_current_sit_page(sbi, segno);
		sit = &sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, segno)];
		memcpy(sit->valid_map, ptr, SIT_VBLOCK_MAP_SIZE);

		for (i = 0; i < SIT_VBLOCK_MAP_SIZE; i++)
			valid_blocks += get_bits_in_byte(sit->valid_map[i]);

		se = get_seg_entry(sbi, segno);
		memcpy(se->cur_valid_map, ptr, SIT_VBLOCK_MAP_SIZE);
		se->valid_blocks = valid_blocks;
		type = se->type;
		if (type >= NO_CHECK_TYPE) {
			ASSERT_MSG("Invalide type and valid blocks=%x,%x", segno, valid_blocks);
			type = 0;
		}

		sit->vblocks = cpu_to_le16((type << SIT_VBLOCKS_SHIFT) | valid_blocks);
		rewrite_current_sit_page(sbi, segno, sit_blk);
		free(sit_blk);
		ptr += SIT_VBLOCK_MAP_SIZE;
	}
}

    之前提到過,函式check_curseg_offset檢查current segment是否出現了問題,這裡也是通過這個函式來檢查是不是除了問題,有問題就通過進行修復。check_curseg_offset主要檢查next_blkoff對應的block必須是空閒的。對於LFS寫的,該segment剩下的block必須全部都是空閒的。

int check_curseg_offset(struct f2fs_sb_info *sbi)
{
	int i;

	for (i = 0; i < NO_CHECK_TYPE; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		struct seg_entry *se;
		int j, nblocks;
		if ((curseg->next_blkoff >> 3) >= SIT_VBLOCK_MAP_SIZE)
			return -EINVAL;
		se = get_seg_entry(sbi, curseg->segno);
		if (f2fs_test_bit(curseg->next_blkoff, (const char *)se->cur_valid_map)) {
			ASSERT_MSG("Next block offset is not free, type:%d", i);
			return -EINVAL;
		}

		if (curseg->alloc_type == SSR)
			return 0;

		nblocks = sbi->blocks_per_seg;
		for (j = curseg->next_blkoff + 1; j < nblocks; j++) {
			if (f2fs_test_bit(j, (const char *)se->cur_valid_map)) {
				ASSERT_MSG("LFS must have free section:%d", i);
				return -EINVAL;
			}
		}
	}
	return 0;
}

    move_curseg_info:對NO_CHECK_TYPE種current segment進行遍歷,然後是呼叫函式find_next_free_block在main area中找到相應的seg_entry與遍歷的current segment有著相同型別的segment中的空閒塊或者整個segment空閒的起始塊,然後返回其segno,將這個segno替換該型別對應的current segment,然後修改current segment中的欄位segno、next_blkoff、alloc_type改為SSR(洞寫)、sum_blk。然後呼叫函式reset_curseg根據current segment的type來設定curseg_info中sum_blk中的summary_footer的型別,由於剛才找空閒塊的時候如果是空閒segment,那麼這個segment的type可能跟需要查詢的型別是不對應的,所以reset_curseg也完成對seg_entry的型別的修改。

void move_curseg_info(struct f2fs_sb_info *sbi, u64 from)
{
	int i, ret;

	for (i = 0; i < NO_CHECK_TYPE; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		struct f2fs_summary_block buf;
		u32 old_segno;
		u64 ssa_blk, to;

		ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
		ret = dev_write_block(curseg->sum_blk, ssa_blk);
		ASSERT(ret >= 0);

		to = from;
		ret = find_next_free_block(sbi, &to, 0, i);
		ASSERT(ret == 0);
		old_segno = curseg->segno;
		curseg->segno = GET_SEGNO(sbi, to);
		curseg->next_blkoff = OFFSET_IN_SEG(sbi, to);
		curseg->alloc_type = SSR;

		ssa_blk = GET_SUM_BLKADDR(sbi, curseg->segno);
		ret = dev_read_block(&buf, ssa_blk);
		ASSERT(ret >= 0);

		memcpy(curseg->sum_blk, &buf, SUM_ENTRIES_SIZE);
		reset_curseg(sbi, i);

		DBG(1, "Move curseg[%d] %x -> %x after %"PRIx64"\n", i, old_segno, curseg->segno, from);
	}
}

    write_curseg_info:將修改後的curent segment的segno和blkoff修改到f2fs_checkpoint中的cur_data_segno(cur_node_segno)、cur_data_blkoff(cur_node_blkoff),還有分配的型別alloc_type也進行更新。

void write_curseg_info(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
	int i;

	for (i = 0; i < NO_CHECK_TYPE; i++) {
		cp->alloc_type[i] = CURSEG_I(sbi, i)->alloc_type;
		if (i < CURSEG_HOT_NODE) {
			set_cp(cur_data_segno[i], CURSEG_I(sbi, i)->segno);
			set_cp(cur_data_blkoff[i], CURSEG_I(sbi, i)->next_blkoff);
		} else {
			int n = i - CURSEG_HOT_NODE;
			set_cp(cur_node_segno[n], CURSEG_I(sbi, i)->segno);
			set_cp(cur_node_blkoff[n], CURSEG_I(sbi, i)->next_blkoff);
		}
	}
}

    flush_curseg_sit_entries:之前的move_curseg_info呼叫函式reset_curseg的過程中可能對seg_entry進行了修改,這個函式將current的seg_entry同步到f2fs_sit_entry中寫回。

static void flush_curseg_sit_entries(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	int i;

	for (i = 0; i < NO_CHECK_TYPE; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		struct f2fs_sit_block *sit_blk;
		struct f2fs_sit_entry *sit;
		struct seg_entry *se;

		se = get_seg_entry(sbi, curseg->segno);
		sit_blk = get_current_sit_page(sbi, curseg->segno);
		sit = &sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, curseg->segno)];
		sit->vblocks = cpu_to_le16((se->type << SIT_VBLOCKS_SHIFT) | se->valid_blocks);
		rewrite_current_sit_page(sbi, curseg->segno, sit_blk);
		free(sit_blk);
	}
}

    fix_checkpoint:首先將f2fs_fsck中的check_result中的統計結果同步到f2fs_checkpoint中,這些資料包括ckpt_flags、free_segment_count、valid_block_count、valid_node_count、valid_inode_count。然後按照cp pack中的順序跳過orphan inode進行寫回。

static void fix_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
	struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
	struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
	unsigned long long cp_blk_no;
	u32 flags = CP_UMOUNT_FLAG;
	block_t orphan_blks = 0;
	u32 i;
	int ret;
	u_int32_t crc = 0;

	if (is_set_ckpt_flags(cp, CP_ORPHAN_PRESENT_FLAG)) {
		orphan_blks = __start_sum_addr(sbi) - 1;
		flags |= CP_ORPHAN_PRESENT_FLAG;
	}

	set_cp(cp_pack_total_block_count, 8 + orphan_blks + get_sb(cp_payload));
	flags = update_nat_bits_flags(sb, cp, flags);
	flags |= CP_NOCRC_RECOVERY_FLAG;
	set_cp(ckpt_flags, flags);
	set_cp(free_segment_count, get_free_segments(sbi));
	set_cp(valid_block_count, fsck->chk.valid_blk_cnt);
	set_cp(valid_node_count, fsck->chk.valid_node_cnt);
	set_cp(valid_inode_count, fsck->chk.valid_inode_cnt);
	crc = f2fs_cal_crc32(F2FS_SUPER_MAGIC, cp, CHECKSUM_OFFSET);
	*((__le32 *)((unsigned char *)cp + CHECKSUM_OFFSET)) = cpu_to_le32(crc);

	cp_blk_no = get_sb(cp_blkaddr);
	if (sbi->cur_cp == 2)
		cp_blk_no += 1 << get_sb(log_blocks_per_seg);
	ret = dev_write_block(cp, cp_blk_no++);
	ASSERT(ret >= 0);
	for (i = 0; i < get_sb(cp_payload); i++) {
		ret = dev_write_block(((unsigned char *)cp) + i * F2FS_BLKSIZE, cp_blk_no++);
		ASSERT(ret >= 0);
	}
	cp_blk_no += orphan_blks;
	for (i = 0; i < NO_CHECK_TYPE; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);

		ret = dev_write_block(curseg->sum_blk, cp_blk_no++);
		ASSERT(ret >= 0);
	}

	ret = dev_write_block(cp, cp_blk_no++);
	ASSERT(ret >= 0);

	if (flags & CP_NAT_BITS_FLAG)
		write_nat_bits(sbi, sb, cp, sbi->cur_cp);
}