From: https://liudongdong1.github.io/
0. 基礎配置
0.1. 設定隨機種子
def set_seeds(seed, cuda):
""" Set Numpy and PyTorch seeds.
"""
np.random.seed(seed)
torch.manual_seed(seed)
if cuda:
torch.cuda.manual_seed_all(seed)
print ("==> Set NumPy and PyTorch seeds.")
0.2. 張量處理與轉化
tensor.type() # Data type
tensor.size() # Shape of the tensor. It is a subclass of Python tuple
tensor.dim() # Number of dimensions.
# Type convertions.
tensor = tensor.cuda()
tensor = tensor.cpu()
tensor = tensor.float()
tensor = tensor.long()
#tensor 與python資料型別轉化
#Tensor ----> 單個Python資料,使用data.item(),data為Tensor變數且只能為包含單個數據
#Tensor ----> Python list,使用data.tolist(),data為Tensor變數,返回shape相同的可巢狀的list
#CPU&GPU 位置
#CPU張量 ----> GPU張量,使用data.cuda()
#GPU張量 ----> CPU張量,使用data.cpu()
#tensor 與np.ndarray
ndarray = tensor.cpu().numpy()
ndarray = tensor.numpy()
tensor.cpu().detach().numpy().tolist()[0]
# np.ndarray -> torch.Tensor.
tensor = torch.from_numpy(ndarray).float()
tensor = torch.from_numpy(ndarray.copy()).float() # If ndarray has negative stride
# torch.Tensor -> PIL.Image.
image = PIL.Image.fromarray(torch.clamp(tensor * 255, min=0, max=255
).byte().permute(1, 2, 0).cpu().numpy())
image = torchvision.transforms.functional.to_pil_image(tensor) # Equivalently way
# PIL.Image -> torch.Tensor.
tensor = torch.from_numpy(np.asarray(PIL.Image.open(path))
).permute(2, 0, 1).float() / 255
tensor = torchvision.transforms.functional.to_tensor(PIL.Image.open(path)) # Equivalently way
# np.ndarray -> PIL.Image.
image = PIL.Image.fromarray(ndarray.astypde(np.uint8))
# PIL.Image -> np.ndarray.
ndarray = np.asarray(PIL.Image.open(path))
#複製張量
# Operation | New/Shared memory | Still in computation graph |
tensor.clone() # | New | Yes |
tensor.detach() # | Shared | No |
tensor.detach.clone()() # | New | No |
#reshape 操作
tensor = torch.reshape(tensor, shape)
# Expand tensor of shape 64*512 to shape 64*512*7*7.
torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
#向量拼接 注意torch.cat和torch.stack的區別在於torch.cat沿著給定的維度拼接,而torch.stack會新增一維。例如當引數是3個10×5的張量,torch.cat的結果是30×5的張量,而torch.stack的結果是3×10×5的張量。
tensor = torch.cat(list_of_tensors, dim=0)
tensor = torch.stack(list_of_tensors, dim=0)
#得到0/非0 元素
torch.nonzero(tensor) # Index of non-zero elements
torch.nonzero(tensor == 0) # Index of zero elements
torch.nonzero(tensor).size(0) # Number of non-zero elements
torch.nonzero(tensor == 0).size(0) # Number of zero elements
#向量乘法
# Matrix multiplication: (m*n) * (n*p) -> (m*p).
result = torch.mm(tensor1, tensor2)
# Batch matrix multiplication: (b*m*n) * (b*n*p) -> (b*m*p).
result = torch.bmm(tensor1, tensor2)
# Element-wise multiplication.
result = tensor1 * tensor2
#計算兩組資料之間的兩兩歐式距離
# X1 is of shape m*d.
X1 = torch.unsqueeze(X1, dim=1).expand(m, n, d)
# X2 is of shape n*d.
X2 = torch.unsqueeze(X2, dim=0).expand(m, n, d)
# dist is of shape m*n, where dist[i][j] = sqrt(|X1[i, :] - X[j, :]|^2)
dist = torch.sqrt(torch.sum((X1 - X2) ** 2, dim=2))
#卷積核
conv = torch.nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=True)
0.3. pytorch 版本
torch.__version__ # PyTorch version
torch.version.cuda # Corresponding CUDA version
torch.backends.cudnn.version() # Corresponding cuDNN version
torch.cuda.get_device_name(0) # GPU type
0.4. GPU指定
torch.cuda.is_available()
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
1. 資料載入分割
1.0. Transform 變化
其中
ToTensor操作會將PIL.Image或形狀為H×W×D,數值範圍為[0, 255]的np.ndarray轉換為形狀為D×H×W
,數值範圍為[0.0, 1.0]的torch.Tensor。 Normalize 需要注意資料的維度,否則容易報錯。
train_transform = torchvision.transforms.Compose([
torchvision.transforms.RandomResizedCrop(size=224,
scale=(0.08, 1.0)),
torchvision.transforms.RandomHorizontalFlip(),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225)),
])
val_transform = torchvision.transforms.Compose([
torchvision.transforms.Resize(256),
torchvision.transforms.CenterCrop(224),
torchvision.transforms.ToTensor(),
torchvision.transforms.Normalize(mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225)),
])
1.1. 自定義dataset類
class CharDataset(Dataset):
def __init__(self, csv_file, root_dir, transform = None):
# args: path to csv file with keypoint data, directory with images, transform to be applied
self.key_pts_frame = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = transform
def __len__(self):
# return size of dataset
return len(self.key_pts_frame.shape)
def __getitem__(self, idx):
image_name = os.path.join(self.root_dir, self.key_pts_frame.iloc[idx, 0])
image = mpimg.imread(image_name)
# removing alpha color channel if present
if image.shape[2] == 4:
image = image[:, :, 0:3]
key_pts = self.key_pts_frame.iloc[idx, 1:].values()
key_pts = key_pts.astype('float').reshape(-1, 2)
sample = {'image': image, 'keypoints': key_pts}
# apply transform
if self.transform:
sample = self.transform(sample)
return sample
if __name__ == "__main__":
chardata=CharDataset("D:\\Model\\CharPointDetection\\data\\test\\")
print(len(chardata)) #1198
print(chardata[0].get("image").shape) #(96, 96) 最大值1, 最小值0
- dataset
import json
import matplotlib.pyplot as plt
import numpy as np
from torch.utils.data import Dataset,DataLoader
import matplotlib.pyplot as plt
from torchvision import transforms, utils
import cv2
from util.imageUtil import *
from util.config import *
class DatasetCustom(Dataset):
def __init__(self, rootcsv, imgroot,train=True, transform = None,ratio=0.7):
self.train = train
self.transform = transform
self.allItem=self.readcsv(rootcsv)
self.imgroot=imgroot
#todo 新增打亂操作 訓練和測試資料集進行分割處理
if self.train :
self.labelItem=self.allItem[:int(len(self.allItem)*ratio)]
else:
self.labelItem=self.allItem[int(len(self.allItem)*ratio)+1:]
def readcsv(self,filename):
'''
讀取CSV中clothdata資料
'''
with open(filename,encoding = 'utf-8') as f:
data = np.loadtxt(f,str,delimiter = ",", skiprows = 1)
data=data[::2,:] #或取csv 檔案資料
return data
def __getitem__(self, index):
index=index%self.__len__()
img_name = self.labelItem[index][0].split('_') # 或取圖片對於路徑
imgpath="{}/camera{}_{}_{}_{}.jpg".format(self.imgroot,img_name[0],img_name[1],0-int(img_name[1]),img_name[2])
ratioW,ratioH,img=imageloadCV(imgpath,RESIZE) #圖片大小進行了resize處理,對於x,y也進行縮放處理
keypoints = self.labelCoordinateHandle(self.labelItem[index][10:],ratioW,ratioH)
if self.transform is not None:
img = self.transform(img)
# return img, keypoints 對於這種列舉方式:for step ,(b_x,b_y) in enumerate(train_loader):
# return {
# 'image': torch.tensor(img, dtype=torch.float),
# 'keypoints': torch.tensor(keypoints, dtype=torch.float),
# }
# 對應程式碼列舉方式
# for i, data in tqdm(enumerate(dataloader), total=num_batches):
# image, keypoints = data['image'].to(DEVICE), data['keypoints'].to(DEVICE)
return {
'image': img,
'keypoints': keypoints,
}
def labelCoordinateHandle(self,data,ratioW,ratioH):
'''
對圖片的長寬進行了相應的縮放處理
'''
data=[float(i) for i in data]
data[0]=data[0]*ratioW
data[1]=data[1]*ratioH
data[3]=data[3]*ratioW
data[4]=data[4]*ratioH
return np.array(data, dtype='float32')
def __len__(self):
return len(self.labelItem)
if __name__ == '__main__':
train_dataset =DatasetCustom(rootcsv=ROOT_CSV,imgroot=IMG_ROOT,train=True,transform=transforms.ToTensor(),ratio=0.7)
test_dataset = DatasetCustom(rootcsv=ROOT_CSV,imgroot=IMG_ROOT,train=False,transform=transforms.ToTensor(),ratio=0.7)
#single record
data= train_dataset.__getitem__(1) #toTensor中進行了轉化 img = torch.from_numpy(pic.transpose((2, 0, 1)))
img, label = data['image'], data['keypoints']
img = np.transpose(img.numpy(),(1,2,0))
plt.imshow(img)
plt.show()
print("label",label)
#DataLoader檢視
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=6, shuffle=False)
def imshow(img):
npimg = img.numpy()
plt.imshow(np.transpose(npimg, (1, 2, 0)))
print('num_of_trainData:', len(train_loader))
print('num_of_testData:', len(test_loader))
#顯示要給batch 中圖片內容
for step ,(b_x,b_y) in enumerate(train_loader):
#print("step:",step)
if step < 1:
imgs = utils.make_grid(b_x)
print(imgs.shape)
imgs = np.transpose(imgs,(1,2,0))
print(imgs.shape)
plt.imshow(imgs)
plt.show()
break
1.2. 資料分割獲取
Dataset = CharDataset(rootdir) # 自定義的dataset 類
l=Dataset.__len__()
test_percent=5
torch.manual_seed(1)
indices = torch.randperm(len(Dataset)).tolist()
dataset = torch.utils.data.Subset(Dataset, indices[:-int(np.ceil(l*test_percent/100))])
dataset_test = torch.utils.data.Subset(Dataset, indices[int(-np.ceil(l*test_percent/100)):])
# define training and validation data loaders
import utils
data_loader = torch.utils.data.DataLoader(
dataset, batch_size=2, shuffle=True,
collate_fn=utils.collate_fn)
data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=(1), shuffle=False,
collate_fn=utils.collate_fn)
for batch_i, data in enumerate(data_loader):
images = data['image']
key_pts = data['keypoints']
1.3. 視訊影象資料
import cv2
video = cv2.VideoCapture(mp4_path)
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(video.get(cv2.CAP_PROP_FPS))
video.release()
1.4. ImageFolder等類
import torchvision.datasets as dset
dataset = dset.ImageFolder('./data/dogcat_2') #沒有transform,先看看取得的原始影象資料
print(dataset.classes) #根據分的資料夾的名字來確定的類別
print(dataset.class_to_idx) #按順序為這些類別定義索引為0,1...
print(dataset.imgs) #返回從所有資料夾中得到的圖片的路徑以及其類別
#獲取圖片
datalength=min(len(os.listdir(os.path.join(imageFolder,'protectivesuit'))),len(os.listdir(os.path.join(imageFolder,'whitecoat'))))
print("資料劃分:",[int(datalength*0.7), int(datalength*0.2), int(datalength*0.1)])
all_dataset = datasets.ImageFolder(root=DATA_PATH_TRAIN, transform=trans)
# 使用random_split實現資料集的劃分,lengths是一個list,按照對應的數量返回資料個數。
# 這兒需要注意的是,lengths的資料量總和等於all_dataset中的資料個數,這兒不是按比例劃分的
train, test, valid = torch.utils.data.random_split(dataset= all_dataset, lengths=[int(datalength*0.7), int(datalength*0.2), int(datalength*0.1)])
# 接著按照正常方式使用DataLoader讀取資料,返回的是DataLoader物件
train = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
test = DataLoader(test, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
valid = DataLoader(valid, batch_size=batch_size, shuffle=True, num_workers=num_of_workers)
print(train.classes) #根據分的資料夾的名字來確定的類別
print(train.class_to_idx) #按順序為這些類別定義索引為0,1...
print(train.imgs) #返回從所有資料夾中得到的圖片的路徑以及其類別
1.5. OneHot 編碼
# pytorch的標記預設從0開始
tensor = torch.tensor([0, 2, 1, 3])
N = tensor.size(0)
num_classes = 4
one_hot = torch.zeros(N, num_classes).long()
one_hot.scatter_(dim=1, index=torch.unsqueeze(tensor, dim=1), src=torch.ones(N, num_classes).long())
2. 訓練基本框架
for epoch in range(2): # loop over the dataset multiple times
running_loss = 0.0
for i, data in enumerate(trainloader, 0):
# get the inputs
inputs, labels = data
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
outputs = net(inputs)
loss = criterion(outputs, labels) #這裡以及進行了平均處理
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
if i % 2000 == 1999: # print every 2000 mini-batches
print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 2000))
running_loss = 0.0
print('Finished Training')
for t in epoch(80):
for images, labels in tqdm.tqdm(train_loader, desc='Epoch %3d' % (t + 1)):
images, labels = images.cuda(), labels.cuda()
scores = model(images)
loss = loss_function(scores, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
#計算 softmax 輸出準確率
score = model(images)
prediction = torch.argmax(score, dim=1) # 按行 返回每行最大值在的該行索引, 如果沒有dim 則按照一維陣列計算
num_correct = torch.sum(prediction == labels).item()
accuruacy = num_correct / labels.size(0)
- Label One-hot編碼時
for images, labels in train_loader:
images, labels = images.cuda(), labels.cuda()
N = labels.size(0)
# C is the number of classes.
smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda()
smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
score = model(images)
log_prob = torch.nn.functional.log_softmax(score, dim=1)
loss = -torch.sum(log_prob * smoothed_labels) / N
optimizer.zero_grad()
loss.backward()
optimizer.step()
3. 模型儲存與載入
注意,torch.load函式要確定儲存的位置:map_location='cpu'
torch.sava有倆種方式:
儲存權重和模型,但是檔案結果不能改變,否則報錯
;儲存權重,載入時,先初始化類,然後載入權重資訊。
# 儲存整個網路
torch.save(net, PATH)
# 儲存網路中的引數, 速度快,佔空間少
torch.save(net.state_dict(),PATH)
#--------------------------------------------------
#針對上面一般的儲存方法,載入的方法分別是:
model_dict=torch.load(PATH)
model_dict=model.load_state_dict(torch.load(PATH))
mlp_mixer.load_state_dict(torch.load(Config.MLPMIXER_WEIGHT,map_location='cpu'))
#save model
def save_models(tempmodel,save_path):
torch.save("./model/"+tempmodel.state_dict(), save_path)
print("Checkpoint saved")
# load model
model=Net() #模型的結構
model.load_state_dict(torch.load(Path("./model/95.model")))
model.eval() #執行推理之前,必須先呼叫以將退出和批處理規範化層設定為評估模式。不這樣做將產生不一致的推斷結果。
#斷點儲存
# Save checkpoint.
is_best = current_acc > best_acc
best_acc = max(best_acc, current_acc)
checkpoint = {
'best_acc': best_acc,
'epoch': t + 1,
'model': model.state_dict(),
'optimizer': optimizer.state_dict(),
}
model_path = os.path.join('model', 'checkpoint.pth.tar')
torch.save(checkpoint, model_path)
if is_best:
shutil.copy('checkpoint.pth.tar', model_path)
# Load checkpoint.
if resume:
model_path = os.path.join('model', 'checkpoint.pth.tar')
assert os.path.isfile(model_path)
checkpoint = torch.load(model_path)
best_acc = checkpoint['best_acc']
start_epoch = checkpoint['epoch']
model.load_state_dict(checkpoint['model'])
optimizer.load_state_dict(checkpoint['optimizer'])
print('Load checkpoint at epoch %d.' % start_epoch)
4. 計算準確率,查準率,查全率
# data['label'] and data['prediction'] are groundtruth label and prediction
# for each image, respectively.
accuracy = np.mean(data['label'] == data['prediction']) * 100
# Compute recision and recall for each class.
for c in range(len(num_classes)):
tp = np.dot((data['label'] == c).astype(int),
(data['prediction'] == c).astype(int))
tp_fp = np.sum(data['prediction'] == c)
tp_fn = np.sum(data['label'] == c)
precision = tp / tp_fp * 100
recall = tp / tp_fn * 100
# data['label'] and data['prediction'] are groundtruth label and prediction
# for each image, respectively.
accuracy = np.mean(data['label'] == data['prediction']) * 100
# Compute recision and recall for each class.
for c in range(len(num_classes)):
tp = np.dot((data['label'] == c).astype(int),
(data['prediction'] == c).astype(int))
tp_fp = np.sum(data['prediction'] == c)
tp_fn = np.sum(data['label'] == c)
precision = tp / tp_fp * 100
recall = tp / tp_fn * 100
建議有引數的層和匯合(pooling)層使用torch.nn模組定義,啟用函式直接使用torch.nn.functional。torch.nn模組和torch.nn.functional的區別在於,torch.nn模組在計算時底層呼叫了torch.nn.functional,但torch.nn模組包括該層引數,還可以應對訓練和測試兩種網路狀態。model(x)前用model.train()和model.eval()切換網路狀態。loss.backward()前用optimizer.zero_grad()清除累積梯度。optimizer.zero_grad()和model.zero_grad()效果一樣。
5. 視覺化部分
有 Facebook 自己開發的 Visdom 和 Tensorboard 兩個選擇。
https://github.com/facebookresearch/visdom
https://github.com/lanpa/tensorboardX
# Example using Visdom.
vis = visdom.Visdom(env='Learning curve', use_incoming_socket=False)
assert self._visdom.check_connection()
self._visdom.close()
options = collections.namedtuple('Options', ['loss', 'acc', 'lr'])(
loss={'xlabel': 'Epoch', 'ylabel': 'Loss', 'showlegend': True},
acc={'xlabel': 'Epoch', 'ylabel': 'Accuracy', 'showlegend': True},
lr={'xlabel': 'Epoch', 'ylabel': 'Learning rate', 'showlegend': True})
for t in epoch(80):
tran(...)
val(...)
vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([train_loss]),
name='train', win='Loss', update='append', opts=options.loss)
vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([val_loss]),
name='val', win='Loss', update='append', opts=options.loss)
vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([train_acc]),
name='train', win='Accuracy', update='append', opts=options.acc)
vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([val_acc]),
name='val', win='Accuracy', update='append', opts=options.acc)
vis.line(X=torch.Tensor([t + 1]), Y=torch.Tensor([lr]),
win='Learning rate', update='append', opts=options.lr)
- pytorch graphviz
pip install torchviz
model = nn.Sequential()
model.add_module('W0', nn.Linear(8, 16))
model.add_module('tanh', nn.Tanh())
model.add_module('W1', nn.Linear(16, 1))
x = torch.randn(1, 8)
y = model(x)
make_dot(y.mean(), params=dict(model.named_parameters()), show_attrs=True, show_saved=True)
- 顯示圖片中的關鍵點
def show_landmarks(image, landmarks):
"""Show image with landmarks"""
plt.imshow(image)
plt.scatter(landmarks[:, 0], landmarks[:, 1], s=10, marker='.', c='r')
plt.pause(0.001) # pause a bit so that plots are updated
plt.figure()
show_landmarks(io.imread(os.path.join('data/faces/', img_name)),
landmarks)
plt.show()