【ADNI】資料預處理(4)Get top k slices according to CNNs
ADNI Series
1、【ADNI】資料預處理(1)SPM,CAT12
2、【ADNI】資料預處理(2)獲取 subject slices
3、【ADNI】資料預處理(3)CNNs
4、【ADNI】資料預處理(4)Get top k slices according to CNNs
5、【ADNI】資料預處理(5)Get top k slices (pMCI_sMCI) according to CNNs
6、【ADNI】資料預處理(6)ADNI_slice_dataloader ||| show image
## rules ##
## old_name = root_path + subject_id + top_k_slices_id
## subject_id: random select
## top_k_slices_id: majority select by CNNs
What you need to prepare:
1) all slices was put into relevant subject_id folder
[email protected]:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403$ ls AD_GM_subject_id NC_GM_subject_id [email protected]:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403$ tree -L 2 . ├── AD_GM_subject_id │ ├── 002_S_0619 │ ├── 002_S_0816 │ ├── 002_S_0938 │ ├── 002_S_0955 │ ├── 002_S_1018 │ ├── 003_S_1059 │ ├── 003_S_1257 │ ├── 005_S_0221 │ ├── 005_S_0814 │ ├── 005_S_0929 │ ├── 005_S_1341 │ ├── 006_S_0547 │ ├── 006_S_0653 │ ├── 007_S_0316 │ ├── 007_S_1248 │ ├── 007_S_1304 │ ├── 007_S_1339 │ ├── 009_S_1334 │ ├── 009_S_1354 │ ├── 010_S_0786 │ ├── 010_S_0829 │ ├── 011_S_0003 │ ├── 011_S_0010 │ ├── 011_S_0053 │ ├── 011_S_0183 │ ├── 012_S_0689 │ ├── 012_S_0712 │ ├── 012_S_0720 │ ├── 012_S_0803 │ ├── 013_S_0592 │ ├── 013_S_0699 │ ├── 013_S_0996 │ ├── 013_S_1161 │ ├── 013_S_1205 │ ├── 014_S_0328 │ ├── 014_S_0356 │ ├── 014_S_0357 │ ├── 014_S_1095 │ ├── 016_S_0991 │ ├── 016_S_1263 │ ├── 018_S_0277 │ ├── 018_S_0286 │ ├── 018_S_0335 │ ├── 018_S_0633 │ ├── 018_S_0682 │ ├── 020_S_0213 │ ├── 021_S_0343 │ ├── 021_S_0642 │ ├── 021_S_0753 │ ├── 021_S_1109 │ ├── 022_S_0007 │ ├── 022_S_0129 │ ├── 022_S_0219 │ ├── 022_S_0543 │ ├── 023_S_0083 │ ├── 023_S_0084 │ ├── 023_S_0093 │ ├── 023_S_0139 │ ├── 023_S_0916 │ ├── 023_S_1262 │ ├── 023_S_1289 │ ├── 024_S_1171 │ ├── 024_S_1307 │ ├── 027_S_0404 │ ├── 027_S_0850 │ ├── 027_S_1081 │ ├── 027_S_1082 │ ├── 027_S_1254 │ ├── 027_S_1385 │ ├── 029_S_0836 │ ├── 029_S_0999 │ ├── 029_S_1056 │ ├── 029_S_1184 │ ├── 031_S_0321 │ ├── 031_S_0554 │ ├── 031_S_0773 │ ├── 031_S_1209 │ ├── 032_S_0147 │ ├── 032_S_0400 │ ├── 032_S_1037 │ ├── 032_S_1101 │ ├── 033_S_0724 │ ├── 033_S_0733 │ ├── 033_S_0739 │ ├── 033_S_0888 │ ├── 033_S_0889 │ ├── 033_S_1087 │ ├── 033_S_1281 │ ├── 033_S_1283 │ ├── 033_S_1285 │ ├── 033_S_1308 │ ├── 035_S_0341 │ ├── 036_S_0577 │ ├── 036_S_0759 │ ├── 036_S_0760 │ ├── 036_S_1001 │ ├── 037_S_0627 │ ├── 041_S_1368 │ ├── 041_S_1391 │ ├── 041_S_1435 │ ├── 051_S_1296 │ ├── 053_S_1044 │ ├── 057_S_0474 │ ├── 057_S_1371 │ ├── 057_S_1373 │ ├── 057_S_1379 │ ├── 062_S_0535 │ ├── 062_S_0690 │ ├── 062_S_0730 │ ├── 062_S_0793 │ ├── 067_S_0020 │ ├── 067_S_0029 │ ├── 067_S_0076 │ ├── 067_S_0110 │ ├── 067_S_0812 │ ├── 067_S_0828 │ ├── 067_S_1185 │ ├── 067_S_1253 │ ├── 068_S_0109 │ ├── 073_S_0565 │ ├── 073_S_1207 │ ├── 082_S_1079 │ ├── 082_S_1377 │ ├── 094_S_1027 │ ├── 094_S_1090 │ ├── 094_S_1102 │ ├── 094_S_1164 │ ├── 094_S_1397 │ ├── 094_S_1402 │ ├── 098_S_0149 │ ├── 098_S_0884 │ ├── 099_S_0372 │ ├── 099_S_0470 │ ├── 099_S_0492 │ ├── 099_S_1144 │ ├── 100_S_0743 │ ├── 100_S_0747 │ ├── 100_S_0893 │ ├── 100_S_1062 │ ├── 100_S_1113 │ ├── 109_S_0777 │ ├── 109_S_1157 │ ├── 109_S_1192 │ ├── 114_S_0228 │ ├── 114_S_0374 │ ├── 114_S_0979 │ ├── 116_S_0370 │ ├── 116_S_0392 │ ├── 116_S_0487 │ ├── 116_S_1083 │ ├── 121_S_0953 │ ├── 123_S_0088 │ ├── 123_S_0091 │ ├── 123_S_0094 │ ├── 123_S_0162 │ ├── 126_S_0606 │ ├── 126_S_0784 │ ├── 126_S_0891 │ ├── 126_S_1221 │ ├── 127_S_0431 │ ├── 127_S_0754 │ ├── 127_S_0844 │ ├── 127_S_1382 │ ├── 128_S_0167 │ ├── 128_S_0216 │ ├── 128_S_0266 │ ├── 128_S_0310 │ ├── 128_S_0517 │ ├── 128_S_0528 │ ├── 128_S_0701 │ ├── 128_S_0740 │ ├── 128_S_0805 │ ├── 128_S_1409 │ ├── 128_S_1430 │ ├── 130_S_0956 │ ├── 130_S_1201 │ ├── 130_S_1290 │ ├── 130_S_1337 │ ├── 131_S_0457 │ ├── 131_S_0497 │ ├── 131_S_0691 │ ├── 133_S_1055 │ ├── 133_S_1170 │ ├── 136_S_0194 │ ├── 136_S_0299 │ ├── 136_S_0300 │ ├── 137_S_0366 │ ├── 137_S_0438 │ ├── 137_S_0796 │ ├── 137_S_0841 │ ├── 137_S_1041 │ ├── 141_S_0340 │ ├── 141_S_0696 │ ├── 141_S_0790 │ ├── 141_S_0852 │ ├── 141_S_0853 │ ├── 141_S_1024 │ ├── 141_S_1137 │ └── 141_S_1152 └── NC_GM_subject_id ├── 002_S_0295 ├── 002_S_0413 ├── 002_S_0559 ├── 002_S_0685 ├── 002_S_1261 ├── 002_S_1280 ├── 003_S_0907 ├── 003_S_0931 ├── 003_S_0981 ├── 003_S_1021 ├── 005_S_0223 ├── 005_S_0553 ├── 005_S_0602 ├── 005_S_0610 ├── 006_S_0484 ├── 006_S_0498 ├── 006_S_0681 ├── 006_S_0731 ├── 007_S_0068 ├── 007_S_0070 ├── 007_S_1206 ├── 007_S_1222 ├── 009_S_0751 ├── 009_S_0842 ├── 009_S_0862 ├── 010_S_0067 ├── 010_S_0419 ├── 010_S_0420 ├── 010_S_0472 ├── 011_S_0002 ├── 011_S_0005 ├── 011_S_0008 ├── 011_S_0016 ├── 011_S_0021 ├── 011_S_0022 ├── 011_S_0023 ├── 012_S_0637 ├── 012_S_1009 ├── 012_S_1133 ├── 012_S_1212 ├── 013_S_0502 ├── 013_S_0575 ├── 013_S_1035 ├── 013_S_1276 ├── 014_S_0519 ├── 014_S_0520 ├── 014_S_0548 ├── 014_S_0558 ├── 016_S_0359 ├── 016_S_0538 ├── 018_S_0043 ├── 018_S_0055 ├── 018_S_0369 ├── 018_S_0425 ├── 020_S_0097 ├── 020_S_0883 ├── 020_S_0899 ├── 020_S_1288 ├── 021_S_0159 ├── 021_S_0337 ├── 021_S_0647 ├── 021_S_0984 ├── 022_S_0014 ├── 022_S_0066 ├── 022_S_0096 ├── 022_S_0130 ├── 023_S_0031 ├── 023_S_0058 ├── 023_S_0061 ├── 023_S_0081 ├── 023_S_0926 ├── 023_S_0963 ├── 023_S_1190 ├── 023_S_1306 ├── 024_S_0985 ├── 024_S_1063 ├── 027_S_0074 ├── 027_S_0118 ├── 027_S_0120 ├── 027_S_0403 ├── 029_S_0824 ├── 029_S_0843 ├── 029_S_0845 ├── 029_S_0866 ├── 031_S_0618 ├── 032_S_0095 ├── 032_S_0479 ├── 032_S_0677 ├── 032_S_1169 ├── 033_S_0516 ├── 033_S_0734 ├── 033_S_0741 ├── 033_S_0920 ├── 033_S_0923 ├── 033_S_1016 ├── 033_S_1086 ├── 033_S_1098 ├── 035_S_0048 ├── 035_S_0156 ├── 035_S_0555 ├── 036_S_0576 ├── 036_S_0672 ├── 036_S_0813 ├── 036_S_1023 ├── 037_S_0303 ├── 037_S_0327 ├── 037_S_0454 ├── 037_S_0467 ├── 041_S_0125 ├── 041_S_0262 ├── 041_S_0898 ├── 041_S_1002 ├── 051_S_1123 ├── 052_S_0951 ├── 052_S_1250 ├── 052_S_1251 ├── 057_S_0643 ├── 057_S_0779 ├── 057_S_0818 ├── 057_S_0934 ├── 062_S_0578 ├── 062_S_0768 ├── 062_S_1099 ├── 067_S_0019 ├── 067_S_0024 ├── 067_S_0056 ├── 067_S_0059 ├── 067_S_0177 ├── 067_S_0257 ├── 068_S_0127 ├── 068_S_0210 ├── 068_S_1191 ├── 072_S_0315 ├── 073_S_0089 ├── 073_S_0311 ├── 073_S_0312 ├── 073_S_0386 ├── 082_S_0304 ├── 082_S_0363 ├── 082_S_0640 ├── 082_S_0761 ├── 082_S_1256 ├── 094_S_0489 ├── 094_S_0526 ├── 094_S_0692 ├── 094_S_0711 ├── 094_S_1241 ├── 094_S_1267 ├── 098_S_0171 ├── 098_S_0172 ├── 098_S_0896 ├── 099_S_0040 ├── 099_S_0090 ├── 099_S_0352 ├── 099_S_0533 ├── 099_S_0534 ├── 100_S_0015 ├── 100_S_0035 ├── 100_S_0047 ├── 100_S_0069 ├── 100_S_1286 ├── 109_S_0840 ├── 109_S_0876 ├── 109_S_0967 ├── 109_S_1013 ├── 109_S_1014 ├── 114_S_0166 ├── 114_S_0173 ├── 114_S_0416 ├── 114_S_0601 ├── 116_S_0360 ├── 116_S_0382 ├── 116_S_0648 ├── 116_S_0657 ├── 116_S_1232 ├── 116_S_1249 ├── 123_S_0072 ├── 123_S_0106 ├── 123_S_0113 ├── 123_S_0298 ├── 126_S_0405 ├── 126_S_0506 ├── 126_S_0605 ├── 126_S_0680 ├── 127_S_0259 ├── 127_S_0260 ├── 127_S_0622 ├── 127_S_0684 ├── 128_S_0229 ├── 128_S_0230 ├── 128_S_0245 ├── 128_S_0272 ├── 128_S_0500 ├── 128_S_0522 ├── 128_S_0545 ├── 128_S_0863 ├── 128_S_1242 ├── 129_S_0778 ├── 130_S_0232 ├── 130_S_0886 ├── 130_S_0969 ├── 130_S_1200 ├── 131_S_0123 ├── 131_S_0319 ├── 131_S_0436 ├── 131_S_0441 ├── 131_S_1301 ├── 133_S_0433 ├── 133_S_0488 ├── 133_S_0493 ├── 133_S_0525 ├── 136_S_0086 ├── 136_S_0184 ├── 136_S_0186 ├── 136_S_0196 ├── 137_S_0283 ├── 137_S_0459 ├── 137_S_0686 ├── 137_S_0972 ├── 141_S_0717 ├── 141_S_0726 ├── 141_S_0767 ├── 141_S_0810 ├── 141_S_1094 ├── 941_S_1194 ├── 941_S_1195 ├── 941_S_1197 ├── 941_S_1202 └── 941_S_1203 430 directories, 0 files
[email protected]:~/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403/AD_GM_subject_id/002_S_0619$ ls
XSlice YSlice ZSlice
2) top_k_slices.txt which which majority selected by CNNs. (AlexNet)
slice_Z27|||82.29|||96.88 slice_X32|||81.25|||95.74 slice_X74|||81.25|||95.74 slice_Y76|||81.25|||96.88 slice_Z42|||81.25|||96.59 slice_X48|||81.25|||96.02 slice_X43|||80.21|||96.02 slice_Y81|||80.21|||97.16 slice_Y69|||80.21|||96.02 slice_Y64|||80.21|||95.17 slice_Z30|||80.21|||96.88 slice_Y80|||79.17|||96.31 slice_X45|||79.17|||96.88 slice_Z40|||79.17|||96.31 slice_Z29|||79.17|||95.74 slice_X39|||79.17|||96.31 slice_Y62|||79.17|||96.88 slice_Y73|||79.17|||96.88 slice_Z41|||79.17|||96.88 slice_Y77|||79.17|||97.44
silce_id ||| val_acc ||| train_acc
What you will get:
A prepared train/validation/test folders for training
[email protected]:~/alzheimer_disease/ADNI_825/experiments_FineTunning/majority_select_slices_folder_01$ tree -L 2
.
├── test
│ ├── AD ## 1089
│ └── NC ## 1287
├── train
│ ├── AD ## 14751
│ └── NC ## 16929
└── validation
├── AD ## 3861
└── NC ## 4455
9 directories, 0 files
Steps:
step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
step2: according to top_k_silces_id_txt, majority select top k slices
Script:
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime
import shutil
import random
from hcq_lib import *
train_percentage = 0.75
val_percentage = 0.2
test_percentage = 0.05
# len_slice_list_CascadeCNNs_AD = 199 ## 199 + 230
# len_slice_list_CascadeCNNs_NC = 230 ## 199 + 230
# rondom_list_AD = random.sample(range(0, len_slice_list_CascadeCNNs_AD), len_slice_list_CascadeCNNs_AD)
# rondom_list_NC = random.sample(range(0, len_slice_list_CascadeCNNs_NC), len_slice_list_CascadeCNNs_NC)
dataset_path = "/home/hcq/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id/AD_NC_GM_subject_id_20180403"
root_txt_path = "/home/hcq/alzheimer_disease/ADNI_825/AD_NC_GM_subject_id"
top_k_silces_id_txt = os.path.join(root_txt_path, "top_k_slices.txt")
log_path = os.path.join(root_txt_path, "log", "log.txt")
def partition_slice_train_val_test(silce_txt, dataset_dir, label):
## rules ##
## old_name = root_path + subject_id + top_k_slices_id
## subject_id: random select
## top_k_slices_id: majority select by CNNs
## step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
## added by hcq 20180404
train_subject_id = []
val_subject_id = []
test_subject_id = []
# get subject_id list
subject_id_list = []
with open(silce_txt, "r") as silce_txt_list:
for item in silce_txt_list:
item = item.replace("\n", "")
item = item.replace("\r", "")
# print(item)
subject_id = item.split('\\')[3]
if(subject_id not in subject_id_list):
subject_id_list.append(subject_id)
num_train = 0
num_val = 0
num_test = 0
len_slice_list = len(subject_id_list)
rondom_list = random.sample(range(0, len_slice_list), len_slice_list)
hcq_write(log_path, True, True, "rondom_list [{}]".format(label))
hcq_write(log_path, False, False, rondom_list)
for i in range(len_slice_list):
random_id = rondom_list[i]
if(num_train < int(len_slice_list*train_percentage)):
# print("[Train] {}".format(subject_id_list[random_id]))
train_subject_id.append(subject_id_list[random_id])
num_train += 1
elif(num_val < int(len_slice_list*val_percentage)):
# print("[val] {}".format(subject_id_list[random_id]))
val_subject_id.append(subject_id_list[random_id])
num_val += 1
else:
# print("[test] {}".format(subject_id_list[random_id]))
test_subject_id.append(subject_id_list[random_id])
num_test += 1
# print("[len_slice_list] {}".format(len_slice_list))
# print("[num_train] {}".format(num_train))
# print("[num_val] {}".format(num_val))
# print("[num_test] {}".format(num_test))
hcq_write(log_path, True, True, "[len_slice_list] {}".format(len_slice_list))
hcq_write(log_path, True, True, "[num_train] {}".format(num_train))
hcq_write(log_path, True, True, "[num_val] {}".format(num_val))
hcq_write(log_path, True, True, "[num_test] {}".format(num_test))
### step2: according to top_k_silces_id_txt, majority select top k slices;
### added by hcq 20180404
move_slice(train_subject_id, dataset_dir, "train", label)
move_slice(val_subject_id, dataset_dir, "validation", label)
move_slice(test_subject_id, dataset_dir, "test", label)
def move_slice(subject_id_folder_list, dataset_dir, folder_name, label):
root_new_path = "/home/hcq/alzheimer_disease/ADNI_825/experiments_FineTunning/"
dataset_name = "majority_select_slices_folder_01"
new_name_path = os.path.join(root_new_path, dataset_name, folder_name, label)
hcq_create_dir(new_name_path)
for subject_id in subject_id_folder_list:
with open(top_k_silces_id_txt, "r") as top_k_silces_id_txt_list:
for item in top_k_silces_id_txt_list:
item = item.replace("\n", "")
item = item.replace("\r", "")
slice_id = item.split('|||')[0]
slice_id = slice_id + ".jpg"
if "X" in slice_id:
old_name = os.path.join(dataset_dir, subject_id, "XSlice", slice_id)
elif("Y" in slice_id):
old_name = os.path.join(dataset_dir, subject_id, "YSlice", slice_id)
elif("Z" in slice_id):
old_name = os.path.join(dataset_dir, subject_id, "ZSlice", slice_id)
slice_name = subject_id + "_" + slice_id
new_name = os.path.join(new_name_path, slice_name)
# print(old_name)
# print(new_name)
hcq_write(log_path, True, True, new_name)
shutil.copyfile(old_name, new_name)
if __name__=="__main__":
###
AD_silce_txt = os.path.join(root_txt_path, "AD_GM_subject_id_path.txt")
NC_silce_txt = os.path.join(root_txt_path, "NC_GM_subject_id_path.txt")
print("AD_silce_txt = {}".format(AD_silce_txt))
print("NC_silce_txt = {}".format(NC_silce_txt))
dataset_AD = os.path.join(dataset_path, "AD_GM_subject_id")
dataset_NC = os.path.join(dataset_path, "NC_GM_subject_id")
partition_slice_train_val_test(AD_silce_txt, dataset_AD, "AD")
partition_slice_train_val_test(NC_silce_txt, dataset_NC, "NC")