From: https://liudongdong1.github.io/

0. 基礎配置

0.1. 設定隨機種子

  1. def set_seeds(seed, cuda):
  2. """ Set Numpy and PyTorch seeds.
  3. """
  4. np.random.seed(seed)
  5. torch.manual_seed(seed)
  6. if cuda:
  7. torch.cuda.manual_seed_all(seed)
  8. print ("==> Set NumPy and PyTorch seeds.")

0.2. 張量處理與轉化

  1. tensor.type() # Data type
  2. tensor.size() # Shape of the tensor. It is a subclass of Python tuple
  3. tensor.dim() # Number of dimensions.
  4. # Type convertions.
  5. tensor = tensor.cuda()
  6. tensor = tensor.cpu()
  7. tensor = tensor.float()
  8. tensor = tensor.long()
  9. #tensor 與python資料型別轉化
  10. #Tensor ----> 單個Python資料,使用data.item(),data為Tensor變數且只能為包含單個數據
  11. #Tensor ----> Python list,使用data.tolist(),data為Tensor變數,返回shape相同的可巢狀的list
  12. #CPU&GPU 位置
  13. #CPU張量 ----> GPU張量,使用data.cuda()
  14. #GPU張量 ----> CPU張量,使用data.cpu()
  15. #tensor 與np.ndarray
  16. ndarray = tensor.cpu().numpy()
  17. ndarray = tensor.numpy()
  18. tensor.cpu().detach().numpy().tolist()[0]
  19. # np.ndarray -> torch.Tensor.
  20. tensor = torch.from_numpy(ndarray).float()
  21. tensor = torch.from_numpy(ndarray.copy()).float() # If ndarray has negative stride
  22. # torch.Tensor -> PIL.Image.
  23. image = PIL.Image.fromarray(torch.clamp(tensor * 255, min=0, max=255
  24. ).byte().permute(1, 2, 0).cpu().numpy())
  25. image = torchvision.transforms.functional.to_pil_image(tensor) # Equivalently way
  26. # PIL.Image -> torch.Tensor.
  27. tensor = torch.from_numpy(np.asarray(PIL.Image.open(path))
  28. ).permute(2, 0, 1).float() / 255
  29. tensor = torchvision.transforms.functional.to_tensor(PIL.Image.open(path)) # Equivalently way
  30. # np.ndarray -> PIL.Image.
  31. image = PIL.Image.fromarray(ndarray.astypde(np.uint8))
  32. # PIL.Image -> np.ndarray.
  33. ndarray = np.asarray(PIL.Image.open(path))
  34. #複製張量
  35. # Operation | New/Shared memory | Still in computation graph |
  36. tensor.clone() # | New | Yes |
  37. tensor.detach() # | Shared | No |
  38. tensor.detach.clone()() # | New | No |
  39. #reshape 操作
  40. tensor = torch.reshape(tensor, shape)
  41. # Expand tensor of shape 64*512 to shape 64*512*7*7.
  42. torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
  43. #向量拼接 注意torch.cat和torch.stack的區別在於torch.cat沿著給定的維度拼接,而torch.stack會新增一維。例如當引數是3個10×5的張量,torch.cat的結果是30×5的張量,而torch.stack的結果是3×10×5的張量。
  44. tensor = torch.cat(list_of_tensors, dim=0)
  45. tensor = torch.stack(list_of_tensors, dim=0)
  46. #得到0/非0 元素
  47. torch.nonzero(tensor) # Index of non-zero elements
  48. torch.nonzero(tensor == 0) # Index of zero elements
  49. torch.nonzero(tensor).size(0) # Number of non-zero elements
  50. torch.nonzero(tensor == 0).size(0) # Number of zero elements
  51. #向量乘法
  52. # Matrix multiplication: (m*n) * (n*p) -> (m*p).
  53. result = torch.mm(tensor1, tensor2)
  54. # Batch matrix multiplication: (b*m*n) * (b*n*p) -> (b*m*p).
  55. result = torch.bmm(tensor1, tensor2)
  56. # Element-wise multiplication.
  57. result = tensor1 * tensor2
  58. #計算兩組資料之間的兩兩歐式距離
  59. # X1 is of shape m*d.
  60. X1 = torch.unsqueeze(X1, dim=1).expand(m, n, d)
  61. # X2 is of shape n*d.
  62. X2 = torch.unsqueeze(X2, dim=0).expand(m, n, d)
  63. # dist is of shape m*n, where dist[i][j] = sqrt(|X1[i, :] - X[j, :]|^2)
  64. dist = torch.sqrt(torch.sum((X1 - X2) ** 2, dim=2))
  65. #卷積核
  66. conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True)

0.3. pytorch 版本

  1. torch.__version__ # PyTorch version
  2. torch.version.cuda # Corresponding CUDA version
  3. torch.backends.cudnn.version() # Corresponding cuDNN version
  4. torch.cuda.get_device_name(0) # GPU type

0.4. GPU指定

  1. torch.cuda.is_available()
  2. os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'

1. 資料載入分割

1.0. Transform 變化

其中ToTensor操作會將PIL.Image或形狀為H×W×D,數值範圍為[0, 255]的np.ndarray轉換為形狀為D×H×W,數值範圍為[0.0, 1.0]的torch.Tensor。 Normalize 需要注意資料的維度,否則容易報錯。

  1. train_transform = torchvision.transforms.Compose([
  2. torchvision.transforms.RandomResizedCrop(size=224,
  3. scale=(0.08, 1.0)),
  4. torchvision.transforms.RandomHorizontalFlip(),
  5. torchvision.transforms.ToTensor(),
  6. torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
  7. std=(0.229, 0.224, 0.225)),
  8. ])
  9. val_transform = torchvision.transforms.Compose([
  10. torchvision.transforms.Resize(256),
  11. torchvision.transforms.CenterCrop(224),
  12. torchvision.transforms.ToTensor(),
  13. torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
  14. std=(0.229, 0.224, 0.225)),
  15. ])

1.1. 自定義dataset類

  1. class CharDataset(Dataset):
  2. def __init__(self, csv_file, root_dir, transform = None):
  3. # args: path to csv file with keypoint data, directory with images, transform to be applied
  4. self.key_pts_frame = pd.read_csv(csv_file)
  5. self.root_dir = root_dir
  6. self.transform = transform
  7. def __len__(self):
  8. # return size of dataset
  9. return len(self.key_pts_frame.shape)
  10. def __getitem__(self, idx):
  11. image_name = os.path.join(self.root_dir, self.key_pts_frame.iloc[idx, 0])
  12. image = mpimg.imread(image_name)
  13. # removing alpha color channel if present
  14. if image.shape[2] == 4:
  15. image = image[:, :, 0:3]
  16. key_pts = self.key_pts_frame.iloc[idx, 1:].values()
  17. key_pts = key_pts.astype('float').reshape(-1, 2)
  18. sample = {'image': image, 'keypoints': key_pts}
  19. # apply transform
  20. if self.transform:
  21. sample = self.transform(sample)
  22. return sample
  23. if __name__ == "__main__":
  24. chardata=CharDataset("D:\\Model\\CharPointDetection\\data\\test\\")
  25. print(len(chardata)) #1198
  26. print(chardata[0].get("image").shape) #(96, 96) 最大值1, 最小值0
  • dataset
  1. import json
  2. import matplotlib.pyplot as plt
  3. import numpy as np
  4. from torch.utils.data import Dataset,DataLoader
  5. import matplotlib.pyplot as plt
  6. from torchvision import transforms, utils
  7. import cv2
  8. from util.imageUtil import *
  9. from util.config import *
  10. class DatasetCustom(Dataset):
  11. def __init__(self, rootcsv, imgroot,train=True, transform = None,ratio=0.7):
  12. self.train = train
  13. self.transform = transform
  14. self.allItem=self.readcsv(rootcsv)
  15. self.imgroot=imgroot
  16. #todo 新增打亂操作 訓練和測試資料集進行分割處理
  17. if self.train :
  18. self.labelItem=self.allItem[:int(len(self.allItem)*ratio)]
  19. else:
  20. self.labelItem=self.allItem[int(len(self.allItem)*ratio)+1:]
  21. def readcsv(self,filename):
  22. '''
  23. 讀取CSV中clothdata資料
  24. '''
  25. with open(filename,encoding = 'utf-8') as f:
  26. data = np.loadtxt(f,str,delimiter = ",", skiprows = 1)
  27. data=data[::2,:] #或取csv 檔案資料
  28. return data
  29. def __getitem__(self, index):
  30. index=index%self.__len__()
  31. img_name = self.labelItem[index][0].split('_') # 或取圖片對於路徑
  32. imgpath="{}/camera{}_{}_{}_{}.jpg".format(self.imgroot,img_name[0],img_name[1],0-int(img_name[1]),img_name[2])
  33. ratioW,ratioH,img=imageloadCV(imgpath,RESIZE) #圖片大小進行了resize處理,對於x,y也進行縮放處理
  34. keypoints = self.labelCoordinateHandle(self.labelItem[index][10:],ratioW,ratioH)
  35. if self.transform is not None:
  36. img = self.transform(img)
  37. # return img, keypoints 對於這種列舉方式:for step ,(b_x,b_y) in enumerate(train_loader):
  38. # return {
  39. # 'image': torch.tensor(img, dtype=torch.float),
  40. # 'keypoints': torch.tensor(keypoints, dtype=torch.float),
  41. # }
  42. # 對應程式碼列舉方式
  43. # for i, data in tqdm(enumerate(dataloader), total=num_batches):
  44. # image, keypoints = data['image'].to(DEVICE), data['keypoints'].to(DEVICE)
  45. return {
  46. 'image': img,
  47. 'keypoints': keypoints,
  48. }
  49. def labelCoordinateHandle(self,data,ratioW,ratioH):
  50. '''
  51. 對圖片的長寬進行了相應的縮放處理
  52. '''
  53. data=[float(i) for i in data]
  54. data[0]=data[0]*ratioW
  55. data[1]=data[1]*ratioH
  56. data[3]=data[3]*ratioW
  57. data[4]=data[4]*ratioH
  58. return np.array(data, dtype='float32')
  59. def __len__(self):
  60. return len(self.labelItem)
  61. if __name__ == '__main__':
  62. train_dataset =DatasetCustom(rootcsv=ROOT_CSV,imgroot=IMG_ROOT,train=True,transform=transforms.ToTensor(),ratio=0.7)
  63. test_dataset = DatasetCustom(rootcsv=ROOT_CSV,imgroot=IMG_ROOT,train=False,transform=transforms.ToTensor(),ratio=0.7)
  64. #single record
  65. data= train_dataset.__getitem__(1) #toTensor中進行了轉化 img = torch.from_numpy(pic.transpose((2, 0, 1)))
  66. img, label = data['image'], data['keypoints']
  67. img = np.transpose(img.numpy(),(1,2,0))
  68. plt.imshow(img)
  69. plt.show()
  70. print("label",label)
  71. #DataLoader檢視
  72. train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
  73. test_loader = DataLoader(dataset=test_dataset, batch_size=6, shuffle=False)
  74. def imshow(img):
  75. npimg = img.numpy()
  76. plt.imshow(np.transpose(npimg, (1, 2, 0)))
  77. print('num_of_trainData:', len(train_loader))
  78. print('num_of_testData:', len(test_loader))
  79. #顯示要給batch 中圖片內容
  80. for step ,(b_x,b_y) in enumerate(train_loader):
  81. #print("step:",step)
  82. if step < 1:
  83. imgs = utils.make_grid(b_x)
  84. print(imgs.shape)
  85. imgs = np.transpose(imgs,(1,2,0))
  86. print(imgs.shape)
  87. plt.imshow(imgs)
  88. plt.show()
  89. break

1.2. 資料分割獲取

  1. Dataset = CharDataset(rootdir) # 自定義的dataset 類
  2. l=Dataset.__len__()
  3. test_percent=5
  4. torch.manual_seed(1)
  5. indices = torch.randperm(len(Dataset)).tolist()
  6. dataset = torch.utils.data.Subset(Dataset, indices[:-int(np.ceil(l*test_percent/100))])
  7. dataset_test = torch.utils.data.Subset(Dataset, indices[int(-np.ceil(l*test_percent/100)):])
  8. # define training and validation data loaders
  9. import utils
  10. data_loader = torch.utils.data.DataLoader(
  11. dataset, batch_size=2, shuffle=True,
  12. collate_fn=utils.collate_fn)
  13. data_loader_test = torch.utils.data.DataLoader(
  14. dataset_test, batch_size=(1), shuffle=False,
  15. collate_fn=utils.collate_fn)
  16. for batch_i, data in enumerate(data_loader):
  17. images = data['image']
  18. key_pts = data['keypoints']

1.3. 視訊影象資料

  1. import cv2
  2. video = cv2.VideoCapture(mp4_path)
  3. height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
  4. width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
  5. num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
  6. fps = int(video.get(cv2.CAP_PROP_FPS))
  7. video.release()

1.4. ImageFolder等類

  1. import torchvision.datasets as dset
  2. dataset = dset.ImageFolder('./data/dogcat_2') #沒有transform,先看看取得的原始影象資料
  3. print(dataset.classes) #根據分的資料夾的名字來確定的類別
  4. print(dataset.class_to_idx) #按順序為這些類別定義索引為0,1...
  5. print(dataset.imgs) #返回從所有資料夾中得到的圖片的路徑以及其類別
  6. #獲取圖片
  7. datalength=min(len(os.listdir(os.path.join(imageFolder,'protectivesuit'))),len(os.listdir(os.path.join(imageFolder,'whitecoat'))))
  8. print("資料劃分:",[int(datalength*0.7), int(datalength*0.2), int(datalength*0.1)])
  9. all_dataset = datasets.ImageFolder(root=DATA_PATH_TRAIN, transform=trans)
  10. # 使用random_split實現資料集的劃分,lengths是一個list,按照對應的數量返回資料個數。
  11. # 這兒需要注意的是,lengths的資料量總和等於all_dataset中的資料個數,這兒不是按比例劃分的
  12. train, test, valid = torch.utils.data.random_split(dataset= all_dataset, lengths=[int(datalength*0.7), int(datalength*0.2), int(datalength*0.1)])
  13. # 接著按照正常方式使用DataLoader讀取資料,返回的是DataLoader物件
  14. train = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
  15. test = DataLoader(test, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
  16. valid = DataLoader(valid, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
  17. print(train.classes) #根據分的資料夾的名字來確定的類別
  18. print(train.class_to_idx) #按順序為這些類別定義索引為0,1...
  19. print(train.imgs) #返回從所有資料夾中得到的圖片的路徑以及其類別

1.5. OneHot 編碼

  1. # pytorch的標記預設從0開始
  2. tensor = torch.tensor([0, 2, 1, 3])
  3. N = tensor.size(0)
  4. num_classes = 4
  5. one_hot = torch.zeros(N, num_classes).long()
  6. one_hot.scatter_(dim=1, index=torch.unsqueeze(tensor, dim=1), src=torch.ones(N, num_classes).long())

2. 訓練基本框架

  1. for epoch in range(2): # loop over the dataset multiple times
  2. running_loss = 0.0
  3. for i, data in enumerate(trainloader, 0):
  4. # get the inputs
  5. inputs, labels = data
  6. # zero the parameter gradients
  7. optimizer.zero_grad()
  8. # forward + backward + optimize
  9. outputs = net(inputs)
  10. loss = criterion(outputs, labels) #這裡以及進行了平均處理
  11. loss.backward()
  12. optimizer.step()
  13. # print statistics
  14. running_loss += loss.item()
  15. if i % 2000 == 1999: # print every 2000 mini-batches
  16. print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
  17. running_loss = 0.0
  18. print('Finished Training')
  1. for t in epoch(80):
  2. for images, labels in tqdm.tqdm(train_loader, desc='Epoch %3d' % (t + 1)):
  3. images, labels = images.cuda(), labels.cuda()
  4. scores = model(images)
  5. loss = loss_function(scores, labels)
  6. optimizer.zero_grad()
  7. loss.backward()
  8. optimizer.step()
  9. #計算 softmax 輸出準確率
  10. score = model(images)
  11. prediction = torch.argmax(score, dim=1) # 按行 返回每行最大值在的該行索引, 如果沒有dim 則按照一維陣列計算
  12. num_correct = torch.sum(prediction == labels).item()
  13. accuruacy = num_correct / labels.size(0)
  • Label One-hot編碼時
  1. for images, labels in train_loader:
  2. images, labels = images.cuda(), labels.cuda()
  3. N = labels.size(0)
  4. # C is the number of classes.
  5. smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda()
  6. smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
  7. score = model(images)
  8. log_prob = torch.nn.functional.log_softmax(score, dim=1)
  9. loss = -torch.sum(log_prob * smoothed_labels) / N
  10. optimizer.zero_grad()
  11. loss.backward()
  12. optimizer.step()

3. 模型儲存與載入

注意,torch.load函式要確定儲存的位置:map_location='cpu'

torch.sava有倆種方式:

  • 儲存權重和模型,但是檔案結果不能改變,否則報錯
  • 儲存權重,載入時,先初始化類,然後載入權重資訊。
  1. # 儲存整個網路
  2. torch.save(net, PATH)
  3. # 儲存網路中的引數, 速度快,佔空間少
  4. torch.save(net.state_dict(),PATH)
  5. #--------------------------------------------------
  6. #針對上面一般的儲存方法,載入的方法分別是:
  7. model_dict=torch.load(PATH)
  8. model_dict=model.load_state_dict(torch.load(PATH))
  9. mlp_mixer.load_state_dict(torch.load(Config.MLPMIXER_WEIGHT,map_location='cpu'))
  10. #save model
  11. def save_models(tempmodel,save_path):
  12. torch.save("./model/"+tempmodel.state_dict(), save_path)
  13. print("Checkpoint saved")
  14. # load model
  15. model=Net() #模型的結構
  16. model.load_state_dict(torch.load(Path("./model/95.model")))
  17. model.eval() #執行推理之前,必須先呼叫以將退出和批處理規範化層設定為評估模式。不這樣做將產生不一致的推斷結果。
  18. #斷點儲存
  19. # Save checkpoint.
  20. is_best = current_acc > best_acc
  21. best_acc = max(best_acc, current_acc)
  22. checkpoint = {
  23. 'best_acc': best_acc,
  24. 'epoch': t + 1,
  25. 'model': model.state_dict(),
  26. 'optimizer': optimizer.state_dict(),
  27. }
  28. model_path = os.path.join('model', 'checkpoint.pth.tar')
  29. torch.save(checkpoint, model_path)
  30. if is_best:
  31. shutil.copy('checkpoint.pth.tar', model_path)
  32. # Load checkpoint.
  33. if resume:
  34. model_path = os.path.join('model', 'checkpoint.pth.tar')
  35. assert os.path.isfile(model_path)
  36. checkpoint = torch.load(model_path)
  37. best_acc = checkpoint['best_acc']
  38. start_epoch = checkpoint['epoch']
  39. model.load_state_dict(checkpoint['model'])
  40. optimizer.load_state_dict(checkpoint['optimizer'])
  41. print('Load checkpoint at epoch %d.' % start_epoch)

4. 計算準確率,查準率,查全率

  1. # data['label'] and data['prediction'] are groundtruth label and prediction
  2. # for each image, respectively.
  3. accuracy = np.mean(data['label'] == data['prediction']) * 100
  4. # Compute recision and recall for each class.
  5. for c in range(len(num_classes)):
  6. tp = np.dot((data['label'] == c).astype(int),
  7. (data['prediction'] == c).astype(int))
  8. tp_fp = np.sum(data['prediction'] == c)
  9. tp_fn = np.sum(data['label'] == c)
  10. precision = tp / tp_fp * 100
  11. recall = tp / tp_fn * 100
  12. # data['label'] and data['prediction'] are groundtruth label and prediction
  13. # for each image, respectively.
  14. accuracy = np.mean(data['label'] == data['prediction']) * 100
  15. # Compute recision and recall for each class.
  16. for c in range(len(num_classes)):
  17. tp = np.dot((data['label'] == c).astype(int),
  18. (data['prediction'] == c).astype(int))
  19. tp_fp = np.sum(data['prediction'] == c)
  20. tp_fn = np.sum(data['label'] == c)
  21. precision = tp / tp_fp * 100
  22. recall = tp / tp_fn * 100

建議有引數的層和匯合(pooling)層使用torch.nn模組定義,啟用函式直接使用torch.nn.functional。torch.nn模組和torch.nn.functional的區別在於,torch.nn模組在計算時底層呼叫了torch.nn.functional,但torch.nn模組包括該層引數,還可以應對訓練和測試兩種網路狀態。model(x)前用model.train()和model.eval()切換網路狀態。loss.backward()前用optimizer.zero_grad()清除累積梯度。optimizer.zero_grad()和model.zero_grad()效果一樣。

5. 視覺化部分

有 Facebook 自己開發的 Visdom 和 Tensorboard 兩個選擇。

https://github.com/facebookresearch/visdom

https://github.com/lanpa/tensorboardX

  1. # Example using Visdom.
  2. vis = visdom.Visdom(env='Learning curve', use_incoming_socket=False)
  3. assert self._visdom.check_connection()
  4. self._visdom.close()
  5. options = collections.namedtuple('Options', ['loss', 'acc', 'lr'])(
  6. loss={'xlabel': 'Epoch', 'ylabel': 'Loss', 'showlegend': True},
  7. acc={'xlabel': 'Epoch', 'ylabel': 'Accuracy', 'showlegend': True},
  8. lr={'xlabel': 'Epoch', 'ylabel': 'Learning rate', 'showlegend': True})
  9. for t in epoch(80):
  10. tran(...)
  11. val(...)
  12. vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([train_loss]),
  13. name='train', win='Loss', update='append', opts=options.loss)
  14. vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([val_loss]),
  15. name='val', win='Loss', update='append', opts=options.loss)
  16. vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([train_acc]),
  17. name='train', win='Accuracy', update='append', opts=options.acc)
  18. vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([val_acc]),
  19. name='val', win='Accuracy', update='append', opts=options.acc)
  20. vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([lr]),
  21. win='Learning rate', update='append', opts=options.lr)
  • pytorch graphviz

pip install torchviz

  1. model = nn.Sequential()
  2. model.add_module('W0', nn.Linear(8, 16))
  3. model.add_module('tanh', nn.Tanh())
  4. model.add_module('W1', nn.Linear(16, 1))
  5. x = torch.randn(1, 8)
  6. y = model(x)
  7. make_dot(y.mean(), params=dict(model.named_parameters()), show_attrs=True, show_saved=True)

  • 顯示圖片中的關鍵點
  1. def show_landmarks(image, landmarks):
  2. """Show image with landmarks"""
  3. plt.imshow(image)
  4. plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r')
  5. plt.pause(0.001) # pause a bit so that plots are updated
  6. plt.figure()
  7. show_landmarks(io.imread(os.path.join('data/faces/', img_name)),
  8. landmarks)
  9. plt.show()