1. 程式人生 > >【ADNI】資料預處理(3)CNNs

【ADNI】資料預處理(3)CNNs

ADNI Series

1、【ADNI】資料預處理(1)SPM,CAT12

2、【ADNI】資料預處理(2)獲取 subject slices

3、【ADNI】資料預處理(3)CNNs

4、【ADNI】資料預處理(4)Get top k slices according to CNNs

5、【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs

6、【ADNI】資料預處理(6)ADNI_slice_dataloader ||| show image


Idea:

 

已有資料:AD_NC_ALL_SLICE

AD_NC_ALL_SLICE:分別對ADNI下載而來的nii資料(121x145x121)沿x,y,z軸方向進行切片,得到121+145+121=387張切片圖,分別以AD/NC(199/230)為類別儲存在如下目錄:

 

基於上述資料形式,進一步處理:

1)目的:分別對每個切片位置組成的資料進行分類,篩選出具有區分能力的切片位置;

2)方法:將每個位置的切片圖單獨儲存在一個目錄下,總共得到387個目錄,每個目錄下有429張(AD=199張,NC=230張)切片資料;按照8:2的比例劃分 train set 和 validation set;然後分別使用AlexNet進行訓練,記錄 best_val_acc作為評判依據;

處理後的資料形式如下所示:

以 slice_X10 為例:

1)該目錄下有2個子目錄,分別為 train 和 validation;

2)train 和 validation 目錄下也分別有2個子目錄對應2個類別:AD 和 NC;

3)樣本比例:train(AD:NC=159:184);validation(AD:NC=40:46)

 

原始碼:

step1:注意該指令碼所在的目錄

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime

import shutil

root_path = "/home/reserch/documents/deeplearning/alzheimers_disease_DL/ADNI"

def specified_subject_move_to_fold(slice_path_txt_list, target_path, label):

	slice_index = 1
	subject_num = 0

	# print(slice_path_txt_list)
	# print(target_path)

	with open(slice_path_txt_list,"r") as slice_txt_path_list:
		for slice_txt_path in slice_txt_path_list:
			# slice_txt_path = slice_txt_path_list.readline()
			slice_txt_path = slice_txt_path.replace("\n", "")
			slice_txt_path = slice_txt_path.replace("\r", "")
			slice_txt_path = slice_txt_path.replace("\\", "/")
			subject_num = subject_num + 1
			try:
				subject_id = slice_txt_path.split("/")[4]
				# print("subject_id = {}".format(subject_id))
			except:
				subject_id = ""
				print("...xx...")
			# print(slice_txt_path)
			entropy_value_txt_name = "entropy_value_" + label + "_gray_matter_Slices.txt"
			slice_txt = os.path.join(slice_txt_path, entropy_value_txt_name)
			# print(slice_txt)
			with open(slice_txt, "r") as slice_path_list:
				for item_slice in slice_path_list:
					new_target_path = target_path
					slice_name = item_slice.split(",")[0]
					try:
						if (slice_name.split(".")[1] == "jpg"):
							slice_postion = slice_name.split(".")[0]
							slice_path = os.path.join(slice_txt_path, slice_name)
							if (os.path.exists(slice_path)):
								# print("slice_path = {}".format(slice_path))
								# new_slice_name = "GM" + label + str("%.5d"%slice_index) + "_" + subject_id +  ".jpg"
								new_slice_name = slice_postion + "_" + subject_id + "_" + "GM" + label + ".jpg"
								# new_slice_name = "GM" + label + "_" + subject_id +  ".jpg"
								new_target_path = os.path.join(new_target_path, slice_postion, label)
								if not os.path.exists(new_target_path):
									print("Create dir = {}".format(new_target_path))
									os.makedirs(new_target_path)
								new_name = os.path.join(new_target_path, new_slice_name)
								slice_index = slice_index + 1
								print("copied the image to {}".format(new_name))
								shutil.copyfile(slice_path, new_name)
					except:
						pass
						# print("{} not a jpg file.".format(slice_name))
			# if(slice_index > 5):
			# 	break

	# except:
	# 	print("[error]...")
	### subject_num/3 --> 3 including X Y Z
	print("subject_num = {}".format(subject_num/3))
	print("total slice num = {}".format(slice_index))

### according to AD_gray_matter_Slices_path.txt file, move all slices to a folder (AD_GM_except_entropy_zero)
### new file: all slices in a folder. AD_GM_except_entropy_zero + NC_GM_except_entropy_zero

if __name__=="__main__":

	dataset = 'dataset3'

	slice_path_txt_list = './AD_NC_ALL_SLICE/NC_gray_matter_Slices_ALL/NC_gray_matter_Slices_path.txt'
	target_path = os.path.join(root_path, dataset)
	label = 'NC'
	specified_subject_move_to_fold(slice_path_txt_list, target_path, label)

	slice_path_txt_list = './AD_NC_ALL_SLICE/AD_gray_matter_Slices_ALL/AD_gray_matter_Slices_path.txt'
	target_path = os.path.join(root_path, dataset)
	label = 'AD'
	specified_subject_move_to_fold(slice_path_txt_list, target_path, label)

step2:

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime

import shutil
import random
from hcq_lib import *

train_percentage = 0.8
val_percentage = 0.2
test_percentage = 0.1

len_slice_list_CascadeCNNs_AD = 199 ## 199 + 230
len_slice_list_CascadeCNNs_NC = 230 ## 199 + 230
rondom_list_AD = random.sample(range(0, len_slice_list_CascadeCNNs_AD), len_slice_list_CascadeCNNs_AD)
rondom_list_NC = random.sample(range(0, len_slice_list_CascadeCNNs_NC), len_slice_list_CascadeCNNs_NC)
path_backup_random_list = "/home/reserch/documents/deeplearning/alzheimers_disease_DL/ADNI/backup_random_list/rondom_list.txt"
# hcq_backup_txt_rename(path_backup_random_list)

hcq_write(path_backup_random_list, True, True, rondom_list_AD)
hcq_write(path_backup_random_list, True, True, rondom_list_NC)



def get_slice_train_val_test(root_path, slice_folder_path):

    ## root_path = "/home/reserch/documents/deeplearning/alzheimers_disease_DL/ADNI/dataset3"
	## slice_folder_path = slice_X44
	## train_target_path = /home/reserch/documents/deeplearning/alzheimers_disease_DL/ADNI/dataset/slice_X44/train
	train_target_path = os.path.join(root_path, slice_folder_path, "train")
	val_target_path = os.path.join(root_path, slice_folder_path, "validation")

	### get all silce through its path
	slice_list_AD = os.listdir(os.path.join(root_path, slice_folder_path, "AD"))
	slice_list_NC = os.listdir(os.path.join(root_path, slice_folder_path, "NC"))
	# len_slice_list_AD = len(slice_list_AD)
	# len_slice_list_NC = len(slice_list_NC)

	### set the number of train, val, test
	# train_num = int(train_percentage * len_slice_list)
	# val_num = len_slice_list - train_num
	# print("=====")
	# print("total_num = {}".format(len_slice_list))
	# print("train_num = {}".format(train_num))
	# print("val_num = {}".format(val_num))


	### create a rondom list without repetition
	# rondom_list = random.sample(range(0, len_slice_list), len_slice_list)
	# print(rondom_list)

	hcq_create_dir(os.path.join(train_target_path, "AD"))
	hcq_create_dir(os.path.join(train_target_path, "NC"))
	hcq_create_dir(os.path.join(val_target_path, "AD"))
	hcq_create_dir(os.path.join(val_target_path, "NC"))

	### create txt file to store the index of train, val, test
	# train: [0, train_num-1]
	num_train_AD = 0
	num_train_NC = 0
	num_val_AD = 0
	num_val_NC = 0

	### AD
	for i in range(len_slice_list_CascadeCNNs_AD):
		slice_index = rondom_list_AD[i]
		label = ((slice_list_AD[slice_index].split("_")[3]).split(".")[0])[2:4]
		old_path = os.path.join(os.path.join(root_path, slice_folder_path), label, slice_list_AD[slice_index])

		if(num_train_AD < int(len_slice_list_CascadeCNNs_AD*train_percentage)):
			num_train_AD += 1
			new_path = os.path.join(train_target_path, label, slice_list_AD[slice_index])
		else:
			num_val_AD += 1
			new_path = os.path.join(val_target_path, label, slice_list_AD[slice_index])

		shutil.copyfile(old_path, new_path)
		os.remove(old_path)

		# print("===")
		# print(old_path)
		# print(new_path)
		
	### NC
	# # val: [train_num, train_num + val_num - 1]
	for i in range(len_slice_list_CascadeCNNs_NC):
		slice_index = rondom_list_NC[i]
		label = ((slice_list_NC[slice_index].split("_")[3]).split(".")[0])[2:4]
		old_path = os.path.join(os.path.join(root_path, slice_folder_path), label, slice_list_NC[slice_index])

		if(num_train_NC < int(len_slice_list_CascadeCNNs_NC*train_percentage)):
			num_train_NC += 1
			new_path = os.path.join(train_target_path, label, slice_list_NC[slice_index])
		else:
			num_val_NC += 1
			new_path = os.path.join(val_target_path, label, slice_list_NC[slice_index])

		shutil.copyfile(old_path, new_path)
		os.remove(old_path)

		# print("===")
		# print(old_path)
		# print(new_path)
	
	print("num_train_AD = {}".format(num_train_AD))
	print("num_train_NC = {}".format(num_train_NC))
	print("num_val_AD = {}".format(num_val_AD))
	print("num_val_NC = {}".format(num_val_NC))


	### delete empty folder: AD, NC
	hcq_rmdir(os.path.join(root_path, slice_folder_path, "AD"))
	hcq_rmdir(os.path.join(root_path, slice_folder_path, "NC"))


if __name__=="__main__":

	root_path = "/home/reserch/documents/deeplearning/alzheimers_disease_DL/ADNI/dataset3"
	slice_folder_list = os.listdir(root_path)

	num = 0
	for slice_folder_path in slice_folder_list:
		num += 1
		# print("===")
		# print(num)
		get_slice_train_val_test(root_path, slice_folder_path)

		# if(num>0):
		# 	break