1. 程式人生 > >【ADNI】資料預處理(4)Get top k slices according to CNNs

【ADNI】資料預處理(4)Get top k slices according to CNNs

ADNI Series

1、【ADNI】資料預處理(1)SPM,CAT12

2、【ADNI】資料預處理(2)獲取 subject slices

3、【ADNI】資料預處理(3)CNNs

4、【ADNI】資料預處理(4)Get top k slices according to CNNs

5、【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs

6、【ADNI】資料預處理(6)ADNI_slice_dataloader ||| show image


## rules ##
## old_name = root_path + subject_id + top_k_slices_id
## subject_id: random select

## top_k_slices_id: majority select by CNNs

 

What you need to prepare:

1) all slices was put into relevant subject_id folder

[email protected]:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403$ ls
AD_GM_subject_id  NC_GM_subject_id
[email protected]:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403$ tree -L 2
.
├── AD_GM_subject_id
│   ├── 002_S_0619
│   ├── 002_S_0816
│   ├── 002_S_0938
│   ├── 002_S_0955
│   ├── 002_S_1018
│   ├── 003_S_1059
│   ├── 003_S_1257
│   ├── 005_S_0221
│   ├── 005_S_0814
│   ├── 005_S_0929
│   ├── 005_S_1341
│   ├── 006_S_0547
│   ├── 006_S_0653
│   ├── 007_S_0316
│   ├── 007_S_1248
│   ├── 007_S_1304
│   ├── 007_S_1339
│   ├── 009_S_1334
│   ├── 009_S_1354
│   ├── 010_S_0786
│   ├── 010_S_0829
│   ├── 011_S_0003
│   ├── 011_S_0010
│   ├── 011_S_0053
│   ├── 011_S_0183
│   ├── 012_S_0689
│   ├── 012_S_0712
│   ├── 012_S_0720
│   ├── 012_S_0803
│   ├── 013_S_0592
│   ├── 013_S_0699
│   ├── 013_S_0996
│   ├── 013_S_1161
│   ├── 013_S_1205
│   ├── 014_S_0328
│   ├── 014_S_0356
│   ├── 014_S_0357
│   ├── 014_S_1095
│   ├── 016_S_0991
│   ├── 016_S_1263
│   ├── 018_S_0277
│   ├── 018_S_0286
│   ├── 018_S_0335
│   ├── 018_S_0633
│   ├── 018_S_0682
│   ├── 020_S_0213
│   ├── 021_S_0343
│   ├── 021_S_0642
│   ├── 021_S_0753
│   ├── 021_S_1109
│   ├── 022_S_0007
│   ├── 022_S_0129
│   ├── 022_S_0219
│   ├── 022_S_0543
│   ├── 023_S_0083
│   ├── 023_S_0084
│   ├── 023_S_0093
│   ├── 023_S_0139
│   ├── 023_S_0916
│   ├── 023_S_1262
│   ├── 023_S_1289
│   ├── 024_S_1171
│   ├── 024_S_1307
│   ├── 027_S_0404
│   ├── 027_S_0850
│   ├── 027_S_1081
│   ├── 027_S_1082
│   ├── 027_S_1254
│   ├── 027_S_1385
│   ├── 029_S_0836
│   ├── 029_S_0999
│   ├── 029_S_1056
│   ├── 029_S_1184
│   ├── 031_S_0321
│   ├── 031_S_0554
│   ├── 031_S_0773
│   ├── 031_S_1209
│   ├── 032_S_0147
│   ├── 032_S_0400
│   ├── 032_S_1037
│   ├── 032_S_1101
│   ├── 033_S_0724
│   ├── 033_S_0733
│   ├── 033_S_0739
│   ├── 033_S_0888
│   ├── 033_S_0889
│   ├── 033_S_1087
│   ├── 033_S_1281
│   ├── 033_S_1283
│   ├── 033_S_1285
│   ├── 033_S_1308
│   ├── 035_S_0341
│   ├── 036_S_0577
│   ├── 036_S_0759
│   ├── 036_S_0760
│   ├── 036_S_1001
│   ├── 037_S_0627
│   ├── 041_S_1368
│   ├── 041_S_1391
│   ├── 041_S_1435
│   ├── 051_S_1296
│   ├── 053_S_1044
│   ├── 057_S_0474
│   ├── 057_S_1371
│   ├── 057_S_1373
│   ├── 057_S_1379
│   ├── 062_S_0535
│   ├── 062_S_0690
│   ├── 062_S_0730
│   ├── 062_S_0793
│   ├── 067_S_0020
│   ├── 067_S_0029
│   ├── 067_S_0076
│   ├── 067_S_0110
│   ├── 067_S_0812
│   ├── 067_S_0828
│   ├── 067_S_1185
│   ├── 067_S_1253
│   ├── 068_S_0109
│   ├── 073_S_0565
│   ├── 073_S_1207
│   ├── 082_S_1079
│   ├── 082_S_1377
│   ├── 094_S_1027
│   ├── 094_S_1090
│   ├── 094_S_1102
│   ├── 094_S_1164
│   ├── 094_S_1397
│   ├── 094_S_1402
│   ├── 098_S_0149
│   ├── 098_S_0884
│   ├── 099_S_0372
│   ├── 099_S_0470
│   ├── 099_S_0492
│   ├── 099_S_1144
│   ├── 100_S_0743
│   ├── 100_S_0747
│   ├── 100_S_0893
│   ├── 100_S_1062
│   ├── 100_S_1113
│   ├── 109_S_0777
│   ├── 109_S_1157
│   ├── 109_S_1192
│   ├── 114_S_0228
│   ├── 114_S_0374
│   ├── 114_S_0979
│   ├── 116_S_0370
│   ├── 116_S_0392
│   ├── 116_S_0487
│   ├── 116_S_1083
│   ├── 121_S_0953
│   ├── 123_S_0088
│   ├── 123_S_0091
│   ├── 123_S_0094
│   ├── 123_S_0162
│   ├── 126_S_0606
│   ├── 126_S_0784
│   ├── 126_S_0891
│   ├── 126_S_1221
│   ├── 127_S_0431
│   ├── 127_S_0754
│   ├── 127_S_0844
│   ├── 127_S_1382
│   ├── 128_S_0167
│   ├── 128_S_0216
│   ├── 128_S_0266
│   ├── 128_S_0310
│   ├── 128_S_0517
│   ├── 128_S_0528
│   ├── 128_S_0701
│   ├── 128_S_0740
│   ├── 128_S_0805
│   ├── 128_S_1409
│   ├── 128_S_1430
│   ├── 130_S_0956
│   ├── 130_S_1201
│   ├── 130_S_1290
│   ├── 130_S_1337
│   ├── 131_S_0457
│   ├── 131_S_0497
│   ├── 131_S_0691
│   ├── 133_S_1055
│   ├── 133_S_1170
│   ├── 136_S_0194
│   ├── 136_S_0299
│   ├── 136_S_0300
│   ├── 137_S_0366
│   ├── 137_S_0438
│   ├── 137_S_0796
│   ├── 137_S_0841
│   ├── 137_S_1041
│   ├── 141_S_0340
│   ├── 141_S_0696
│   ├── 141_S_0790
│   ├── 141_S_0852
│   ├── 141_S_0853
│   ├── 141_S_1024
│   ├── 141_S_1137
│   └── 141_S_1152
└── NC_GM_subject_id
    ├── 002_S_0295
    ├── 002_S_0413
    ├── 002_S_0559
    ├── 002_S_0685
    ├── 002_S_1261
    ├── 002_S_1280
    ├── 003_S_0907
    ├── 003_S_0931
    ├── 003_S_0981
    ├── 003_S_1021
    ├── 005_S_0223
    ├── 005_S_0553
    ├── 005_S_0602
    ├── 005_S_0610
    ├── 006_S_0484
    ├── 006_S_0498
    ├── 006_S_0681
    ├── 006_S_0731
    ├── 007_S_0068
    ├── 007_S_0070
    ├── 007_S_1206
    ├── 007_S_1222
    ├── 009_S_0751
    ├── 009_S_0842
    ├── 009_S_0862
    ├── 010_S_0067
    ├── 010_S_0419
    ├── 010_S_0420
    ├── 010_S_0472
    ├── 011_S_0002
    ├── 011_S_0005
    ├── 011_S_0008
    ├── 011_S_0016
    ├── 011_S_0021
    ├── 011_S_0022
    ├── 011_S_0023
    ├── 012_S_0637
    ├── 012_S_1009
    ├── 012_S_1133
    ├── 012_S_1212
    ├── 013_S_0502
    ├── 013_S_0575
    ├── 013_S_1035
    ├── 013_S_1276
    ├── 014_S_0519
    ├── 014_S_0520
    ├── 014_S_0548
    ├── 014_S_0558
    ├── 016_S_0359
    ├── 016_S_0538
    ├── 018_S_0043
    ├── 018_S_0055
    ├── 018_S_0369
    ├── 018_S_0425
    ├── 020_S_0097
    ├── 020_S_0883
    ├── 020_S_0899
    ├── 020_S_1288
    ├── 021_S_0159
    ├── 021_S_0337
    ├── 021_S_0647
    ├── 021_S_0984
    ├── 022_S_0014
    ├── 022_S_0066
    ├── 022_S_0096
    ├── 022_S_0130
    ├── 023_S_0031
    ├── 023_S_0058
    ├── 023_S_0061
    ├── 023_S_0081
    ├── 023_S_0926
    ├── 023_S_0963
    ├── 023_S_1190
    ├── 023_S_1306
    ├── 024_S_0985
    ├── 024_S_1063
    ├── 027_S_0074
    ├── 027_S_0118
    ├── 027_S_0120
    ├── 027_S_0403
    ├── 029_S_0824
    ├── 029_S_0843
    ├── 029_S_0845
    ├── 029_S_0866
    ├── 031_S_0618
    ├── 032_S_0095
    ├── 032_S_0479
    ├── 032_S_0677
    ├── 032_S_1169
    ├── 033_S_0516
    ├── 033_S_0734
    ├── 033_S_0741
    ├── 033_S_0920
    ├── 033_S_0923
    ├── 033_S_1016
    ├── 033_S_1086
    ├── 033_S_1098
    ├── 035_S_0048
    ├── 035_S_0156
    ├── 035_S_0555
    ├── 036_S_0576
    ├── 036_S_0672
    ├── 036_S_0813
    ├── 036_S_1023
    ├── 037_S_0303
    ├── 037_S_0327
    ├── 037_S_0454
    ├── 037_S_0467
    ├── 041_S_0125
    ├── 041_S_0262
    ├── 041_S_0898
    ├── 041_S_1002
    ├── 051_S_1123
    ├── 052_S_0951
    ├── 052_S_1250
    ├── 052_S_1251
    ├── 057_S_0643
    ├── 057_S_0779
    ├── 057_S_0818
    ├── 057_S_0934
    ├── 062_S_0578
    ├── 062_S_0768
    ├── 062_S_1099
    ├── 067_S_0019
    ├── 067_S_0024
    ├── 067_S_0056
    ├── 067_S_0059
    ├── 067_S_0177
    ├── 067_S_0257
    ├── 068_S_0127
    ├── 068_S_0210
    ├── 068_S_1191
    ├── 072_S_0315
    ├── 073_S_0089
    ├── 073_S_0311
    ├── 073_S_0312
    ├── 073_S_0386
    ├── 082_S_0304
    ├── 082_S_0363
    ├── 082_S_0640
    ├── 082_S_0761
    ├── 082_S_1256
    ├── 094_S_0489
    ├── 094_S_0526
    ├── 094_S_0692
    ├── 094_S_0711
    ├── 094_S_1241
    ├── 094_S_1267
    ├── 098_S_0171
    ├── 098_S_0172
    ├── 098_S_0896
    ├── 099_S_0040
    ├── 099_S_0090
    ├── 099_S_0352
    ├── 099_S_0533
    ├── 099_S_0534
    ├── 100_S_0015
    ├── 100_S_0035
    ├── 100_S_0047
    ├── 100_S_0069
    ├── 100_S_1286
    ├── 109_S_0840
    ├── 109_S_0876
    ├── 109_S_0967
    ├── 109_S_1013
    ├── 109_S_1014
    ├── 114_S_0166
    ├── 114_S_0173
    ├── 114_S_0416
    ├── 114_S_0601
    ├── 116_S_0360
    ├── 116_S_0382
    ├── 116_S_0648
    ├── 116_S_0657
    ├── 116_S_1232
    ├── 116_S_1249
    ├── 123_S_0072
    ├── 123_S_0106
    ├── 123_S_0113
    ├── 123_S_0298
    ├── 126_S_0405
    ├── 126_S_0506
    ├── 126_S_0605
    ├── 126_S_0680
    ├── 127_S_0259
    ├── 127_S_0260
    ├── 127_S_0622
    ├── 127_S_0684
    ├── 128_S_0229
    ├── 128_S_0230
    ├── 128_S_0245
    ├── 128_S_0272
    ├── 128_S_0500
    ├── 128_S_0522
    ├── 128_S_0545
    ├── 128_S_0863
    ├── 128_S_1242
    ├── 129_S_0778
    ├── 130_S_0232
    ├── 130_S_0886
    ├── 130_S_0969
    ├── 130_S_1200
    ├── 131_S_0123
    ├── 131_S_0319
    ├── 131_S_0436
    ├── 131_S_0441
    ├── 131_S_1301
    ├── 133_S_0433
    ├── 133_S_0488
    ├── 133_S_0493
    ├── 133_S_0525
    ├── 136_S_0086
    ├── 136_S_0184
    ├── 136_S_0186
    ├── 136_S_0196
    ├── 137_S_0283
    ├── 137_S_0459
    ├── 137_S_0686
    ├── 137_S_0972
    ├── 141_S_0717
    ├── 141_S_0726
    ├── 141_S_0767
    ├── 141_S_0810
    ├── 141_S_1094
    ├── 941_S_1194
    ├── 941_S_1195
    ├── 941_S_1197
    ├── 941_S_1202
    └── 941_S_1203

430 directories, 0 files
[email protected]:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403/AD_GM_subject_id/002_S_0619$ ls
XSlice  YSlice  ZSlice

2) top_k_slices.txt which which majority selected by CNNs. (AlexNet)

slice_Z27|||82.29|||96.88
slice_X32|||81.25|||95.74
slice_X74|||81.25|||95.74
slice_Y76|||81.25|||96.88
slice_Z42|||81.25|||96.59
slice_X48|||81.25|||96.02
slice_X43|||80.21|||96.02
slice_Y81|||80.21|||97.16
slice_Y69|||80.21|||96.02
slice_Y64|||80.21|||95.17
slice_Z30|||80.21|||96.88
slice_Y80|||79.17|||96.31
slice_X45|||79.17|||96.88
slice_Z40|||79.17|||96.31
slice_Z29|||79.17|||95.74
slice_X39|||79.17|||96.31
slice_Y62|||79.17|||96.88
slice_Y73|||79.17|||96.88
slice_Z41|||79.17|||96.88
slice_Y77|||79.17|||97.44

silce_id ||| val_acc ||| train_acc

 

What you will get:

A prepared train/validation/test folders for training

[email protected]:~/alzheimer_disease/ADNI_825/experiments_FineTunning/majority_select_slices_folder_01$ tree -L 2
.
├── test
│   ├── AD  ## 1089
│   └── NC  ## 1287
├── train
│   ├── AD  ## 14751
│   └── NC  ## 16929
└── validation
    ├── AD  ## 3861
    └── NC  ## 4455

9 directories, 0 files

Steps:

step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)

step2: according to top_k_silces_id_txt, majority select top k slices

 

Script:

#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime

import shutil
import random

from hcq_lib import *

train_percentage = 0.75
val_percentage = 0.2
test_percentage = 0.05

# len_slice_list_CascadeCNNs_AD = 199 ## 199 + 230
# len_slice_list_CascadeCNNs_NC = 230 ## 199 + 230
# rondom_list_AD = random.sample(range(0, len_slice_list_CascadeCNNs_AD), len_slice_list_CascadeCNNs_AD)
# rondom_list_NC = random.sample(range(0, len_slice_list_CascadeCNNs_NC), len_slice_list_CascadeCNNs_NC)


dataset_path = "/home/hcq/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403"
root_txt_path = "/home/hcq/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id"
top_k_silces_id_txt = os.path.join(root_txt_path, "top_k_slices.txt")

log_path = os.path.join(root_txt_path, "log", "log.txt")

def partition_slice_train_val_test(silce_txt, dataset_dir, label):

	## rules ##
	## old_name = root_path + subject_id + top_k_slices_id
	## subject_id: random select
	## top_k_slices_id: majority select by CNNs


	## step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
	## added by hcq 20180404
	train_subject_id = []
	val_subject_id = []
	test_subject_id = []

	# get subject_id list
	subject_id_list = []
	with open(silce_txt, "r") as silce_txt_list:

		for item in silce_txt_list:
			item = item.replace("\n", "")
			item = item.replace("\r", "")
			# print(item)
			subject_id = item.split('\\')[3]
			if(subject_id not in subject_id_list):
				subject_id_list.append(subject_id)

	num_train = 0
	num_val = 0
	num_test = 0

	len_slice_list = len(subject_id_list)
	rondom_list = random.sample(range(0, len_slice_list), len_slice_list)
	hcq_write(log_path, True, True, "rondom_list [{}]".format(label))
	hcq_write(log_path, False, False, rondom_list)

	for i in range(len_slice_list):
		random_id = rondom_list[i]
		if(num_train < int(len_slice_list*train_percentage)):
			# print("[Train] {}".format(subject_id_list[random_id]))
			train_subject_id.append(subject_id_list[random_id])
			num_train += 1
		elif(num_val < int(len_slice_list*val_percentage)):
			# print("[val] {}".format(subject_id_list[random_id]))
			val_subject_id.append(subject_id_list[random_id])
			num_val += 1
		else:
			# print("[test] {}".format(subject_id_list[random_id]))
			test_subject_id.append(subject_id_list[random_id])
			num_test += 1

	# print("[len_slice_list] {}".format(len_slice_list))
	# print("[num_train] {}".format(num_train))
	# print("[num_val] {}".format(num_val))
	# print("[num_test] {}".format(num_test))
	hcq_write(log_path, True, True, "[len_slice_list] {}".format(len_slice_list))
	hcq_write(log_path, True, True, "[num_train] {}".format(num_train))
	hcq_write(log_path, True, True, "[num_val] {}".format(num_val))
	hcq_write(log_path, True, True, "[num_test] {}".format(num_test))


	### step2: according to top_k_silces_id_txt, majority select top k slices;
	### added by hcq 20180404

	move_slice(train_subject_id, dataset_dir, "train", label)
	move_slice(val_subject_id, dataset_dir, "validation", label)
	move_slice(test_subject_id, dataset_dir, "test", label)
	


def move_slice(subject_id_folder_list, dataset_dir, folder_name, label):

	root_new_path = "/home/hcq/alzheimer_disease/ADNI_825/experiments_FineTunning/"
	dataset_name = "majority_select_slices_folder_01"
	new_name_path = os.path.join(root_new_path, dataset_name, folder_name, label)
	hcq_create_dir(new_name_path)


	for subject_id in subject_id_folder_list:

		with open(top_k_silces_id_txt, "r") as top_k_silces_id_txt_list:
			for item in top_k_silces_id_txt_list:
				item = item.replace("\n", "")
				item = item.replace("\r", "")
				slice_id = item.split('|||')[0]

				slice_id = slice_id + ".jpg"
				if "X" in slice_id:
					old_name = os.path.join(dataset_dir, subject_id, "XSlice", slice_id)
				elif("Y" in slice_id):
					old_name = os.path.join(dataset_dir, subject_id, "YSlice", slice_id)
				elif("Z" in slice_id):
					old_name = os.path.join(dataset_dir, subject_id, "ZSlice", slice_id)

				
				slice_name = subject_id + "_" + slice_id
				new_name = os.path.join(new_name_path, slice_name)
				# print(old_name)
				# print(new_name)
				hcq_write(log_path, True, True, new_name)

				shutil.copyfile(old_name, new_name)



if __name__=="__main__":

	### 

	AD_silce_txt = os.path.join(root_txt_path, "AD_GM_subject_id_path.txt")
	NC_silce_txt = os.path.join(root_txt_path, "NC_GM_subject_id_path.txt")
	
	print("AD_silce_txt = {}".format(AD_silce_txt))
	print("NC_silce_txt = {}".format(NC_silce_txt))

	dataset_AD = os.path.join(dataset_path, "AD_GM_subject_id")
	dataset_NC = os.path.join(dataset_path, "NC_GM_subject_id")

	partition_slice_train_val_test(AD_silce_txt, dataset_AD, "AD")
	partition_slice_train_val_test(NC_silce_txt, dataset_NC, "NC")