Pytorch yolov3 多GPU 訓練

阿新 • • 發佈：2019-02-07

pytorch 多gpu訓練：

# -*- coding:utf-8 -*-
from __future__ import division

import datetime
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import numpy as np

from PIL import Image

from utils.parse_config import *
from utils.utils import build_targets
from  
collections import defaultdict

def create_modules(module_defs):
    """
    Constructs module list of layer blocks from module configuration in module_defs
    """
#根據cfg檔案建立yolov3網路結構
hyperparams = module_defs.pop(0)
    output_filters = [int(hyperparams['channels'])]
    module_list = nn.ModuleList()
    for  
i, module_def in enumerate(module_defs):
        modules = nn.Sequential()

        if module_def['type'] == 'convolutional':
            bn = int(module_def['batch_normalize'])
            filters = int(module_def['filters'])
            kernel_size = int(module_def['size'])
            pad = (kernel_size - 1 
) // 2 if int(module_def['pad']) else 0
modules.add_module('conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1],
out_channels=filters,
kernel_size=kernel_size,
stride=int(module_def['stride']),
padding=pad,
bias=not bn))
            if bn:
                modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters))
            if module_def['activation'] == 'leaky':
                modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1))

        elif module_def['type'] == 'upsample':
            upsample = nn.Upsample( scale_factor=int(module_def['stride']),
mode='nearest')
            modules.add_module('upsample_%d' % i, upsample)

        elif module_def['type'] == 'route':
            layers = [int(x) for x in module_def["layers"].split(',')]
            filters = sum([output_filters[layer_i] for layer_i in layers])
            modules.add_module('route_%d' % i, EmptyLayer())

        elif module_def['type'] == 'shortcut':
            filters = output_filters[int(module_def['from'])]
            modules.add_module("shortcut_%d" % i, EmptyLayer())

        elif module_def["type"] == "yolo":
            anchor_idxs = [int(x) for x in module_def["mask"].split(",")]
            # Extract anchors
anchors = module_def["anchors"]
            anchors = [anchors[i] for i in anchor_idxs]
            num_classes = int(module_def['classes'])
            img_height = int(hyperparams['height'])
            # Define detection layer
yolo_layer = YOLOLayer(anchors, num_classes, img_height)
            modules.add_module('yolo_%d' % i, yolo_layer)
        # Register module list and number of output filters
module_list.append(modules)
        output_filters.append(filters)

    return hyperparams, module_list

class EmptyLayer(nn.Module):
    """Placeholder for 'route' and 'shortcut' layers"""
def __init__(self):
        super(EmptyLayer, self).__init__()

class YOLOLayer(nn.Module):
    """Detection layer"""
def __init__(self, anchors, num_classes, image_dim):
        super(YOLOLayer, self).__init__()
        self.anchors = anchors
        self.scaled_anchors = None
self.num_anchors = len(anchors)
        self.num_classes = num_classes
        self.bbox_attrs = 5 + num_classes
        self.image_dim = image_dim
        self.ignore_thres = 0.5
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.seen = 0
self.mse_loss = nn.MSELoss()
        self.bce_loss = nn.BCELoss()
        # self.bce_logits_loss = nn.BCEWithLogitsLoss()
def forward(self, x, targets=None):
        bs = x.size(0)
        g_dim = x.size(2)
        stride =  self.image_dim / g_dim
        # Tensors for cuda support
FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor

        prediction = x.view(bs,  self.num_anchors, self.bbox_attrs, g_dim, g_dim).permute(0, 1, 3, 4, 2).contiguous()

        # Get outputs
x = torch.sigmoid(prediction[..., 0])          # Center x
y = torch.sigmoid(prediction[..., 1])          # Center y
w = prediction[..., 2]                         # Width
h = prediction[..., 3]                         # Height
conf = torch.sigmoid(prediction[..., 4])       # Conf
pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
        # Calculate offsets for each grid
grid_x = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).repeat(bs*self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0, g_dim-1, g_dim).repeat(g_dim,1).t().repeat(bs*self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)
        scaled_anchors = [(a_w / stride, a_h / stride) for a_w, a_h in self.anchors]
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
        anchor_w = anchor_w.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(w.shape)
        anchor_h = anchor_h.repeat(bs, 1).repeat(1, 1, g_dim*g_dim).view(h.shape)

        # Add offset and scale with anchors
pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

        self.seen += prediction.size(0)

        # Training
if targets is not None:

            if x.is_cuda:
                self.mse_loss = self.mse_loss.cuda()
                self.bce_loss = self.bce_loss.cuda()

            nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes.cpu().data,
targets.cpu().data,
scaled_anchors,
self.num_anchors,
self.num_classes,
g_dim,
self.ignore_thres)


            # nProposals = int((conf > 0.25).sum().item())
recall = float(nCorrect / nGT) if nGT else 1
tx    = Variable(tx.type(FloatTensor), requires_grad=False)
            ty    = Variable(ty.type(FloatTensor), requires_grad=False)
            tw    = Variable(tw.type(FloatTensor), requires_grad=False)
            th    = Variable(th.type(FloatTensor), requires_grad=False)
            tconf = Variable(tconf.type(FloatTensor), requires_grad=False)
            tcls  = Variable(tcls[cls_mask == 1].type(FloatTensor), requires_grad=False)
            coord_mask = Variable(coord_mask.type(FloatTensor), requires_grad=False)
            conf_mask  = Variable(conf_mask.type(FloatTensor), requires_grad=False)

            loss_x = self.coord_scale * self.mse_loss(x[coord_mask == 1], tx[coord_mask == 1]) / 2
loss_y = self.coord_scale * self.mse_loss(y[coord_mask == 1], ty[coord_mask == 1]) / 2
loss_w = self.coord_scale * self.mse_loss(w[coord_mask == 1], tw[coord_mask == 1]) / 2
loss_h = self.coord_scale * self.mse_loss(h[coord_mask == 1], th[coord_mask == 1]) / 2
loss_conf = self.bce_loss(conf[conf_mask == 1], tconf[conf_mask == 1])
            loss_cls = self.class_scale * self.bce_loss(pred_cls[cls_mask == 1], tcls)
            loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

            return loss, loss_x.item(), loss_y.item(), loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(),recall

        else:
            # If not in training phase return predictions
output = torch.cat((pred_boxes.view(bs, -1, 4) * stride, conf.view(bs, -1, 1), pred_cls.view(bs, -1, self.num_classes)), -1)
            return output.data


class Darknet(nn.Module):
    """YOLOv3 object detection model"""
def __init__(self, module_defs, img_size=416):
        super(Darknet, self).__init__()
        self.module_defs = module_defs
        self.hyperparams, self.module_list = create_modules(self.module_defs)#根據config檔案建立yolov3網路模型，返回網路引數和torch版神經網路
        # print("module",self.module_list)
self.img_size = img_size
        self.loss_names = ['x', 'y', 'w', 'h', 'conf', 'cls', 'recall']
        self.losses = defaultdict(float)

    def forward(self, x, targets=None):
        is_training = targets is not None
output = []
        for name in self.loss_names:
            self.losses[name] =0
layer_outputs = []
        for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def['type'] in ['convolutional', 'upsample']:
                x = module(x)
            elif module_def['type'] == 'route':
                layer_i = [int(x) for x in module_def['layers'].split(',')]
                x = torch.cat([layer_outputs[i] for i in layer_i], 1)
            elif module_def['type'] == 'shortcut':
                layer_i = int(module_def['from'])
                x = layer_outputs[-1] + layer_outputs[layer_i]
            elif module_def['type'] == 'yolo':
                # Train phase: get loss
if is_training:
                    x, *losses = module[0](x, targets)
                    for name, loss in zip(self.loss_names, losses):
                        self.losses[name] += loss
                # Test phase: Get detections
else:
                    x = module(x)
                output.append(x)
            layer_outputs.append(x)
        self.losses['recall'] /= 3
if is_training:
            return sum(output).view(-1, )
        else:
            return torch.cat(output, 1)
        # return sum(output) if is_training else torch.cat(output, 1)
def load_weights(self, weights_path,is_training = False):
        """Parses and loads the weights stored in 'weights_path'"""
#Open the weights file
fp = open(weights_path, "rb")
        header = np.fromfile(fp, dtype=np.int32, count=5)   # First five are header values
        # Needed to write header when saving weights
self.header_info = header

        self.seen = header[3]
        weights = np.fromfile(fp, dtype=np.float32)         # The rest are weights
fp.close()

        ptr = 0
for i, (module_def, module) in enumerate(zip(self.module_defs, self.module_list)):
            if module_def['type'] == 'convolutional':
                conv_layer = module[0]
                if module_def['batch_normalize']:
                    # Load BN bias, weights, running mean and running variance
bn_layer = module[1]
                    num_b = bn_layer.bias.numel() # Number of biases
                    # Bias
bn_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.bias)
                    bn_layer.bias.data.copy_(bn_b)
                    ptr += num_b
                    # Weight
bn_w = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.weight)
                    bn_layer.weight.data.copy_(bn_w)
                    ptr += num_b
                    # Running Mean
bn_rm = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_mean)
                    bn_layer.running_mean.data.copy_(bn_rm)
                    ptr += num_b
                    # Running Var
bn_rv = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(bn_layer.running_var)
                    bn_layer.running_var.data.copy_(bn_rv)
                    ptr += num_b
                else:
                    # Load conv. bias
num_b = conv_layer.bias.numel()
                    conv_b = torch.from_numpy(weights[ptr:ptr + num_b]).view_as(conv_layer.bias)
                    conv_layer.bias.data.copy_(conv_b)
                    ptr += num_b
                # Load conv. weights
num_w = conv_layer.weight.numel()
                conv_w = torch.from_numpy(weights[ptr:ptr + num_w]).view_as(conv_layer.weight)#權重引數賦值
conv_layer.weight.data.copy_(conv_w)
                ptr += num_w

    """
        @:param path    - path of the new weights file
        @:param cutoff  - save layers between 0 and cutoff (cutoff = -1 -> all are saved)
    """
def save_weights(self, path, cutoff=-1):

        fp = open(path, 'wb')
        self.header_info[3] = self.seen
        self.header_info.tofile(fp)

        # Iterate through layers
for i, (module_def, module) in enumerate(zip(self.module_defs[:cutoff], self.module_list[:cutoff])):
            if module_def['type'] == 'convolutional':
                conv_layer = module[0]
                # If batch norm, load bn first
if module_def['batch_normalize']:
                    bn_layer = module[1]

                    bn_layer.bias.data.cpu().numpy().tofile(fp)
                    bn_layer.weight.data.cpu().numpy().tofile(fp)
                    bn_layer.running_mean.data.cpu().numpy().tofile(fp)
                    bn_layer.running_var.data.cpu().numpy().tofile(fp)
                # Load conv bias
else:
                    conv_layer.bias.data.cpu().numpy().tofile(fp)
                # Load conv weights
conv_layer.weight.data.cpu().numpy().tofile(fp)

        fp.close()

train程式碼：關鍵詞

optimizer.module.zero_grad()

model.module.save_weights

loss = model(imgs, targets)

torch.sum(loss).backward()

optimizer.module.step()

for epoch in range(opt.epochs):
    for batch_i, (_, imgs, targets) in enumerate(dataloader):
        imgs = Variable(imgs.type(Tensor))
        targets = Variable(targets.type(Tensor), requires_grad=False)
        optimizer.module.zero_grad()
        loss = model(imgs, targets)
        # loss.backward()
        # optimizer.step()
torch.sum(loss).backward()
        optimizer.module.step()
        now = datetime.datetime.now()
        strftime = now.strftime("%H:%M:%S")
        print(strftime, epoch, opt.epochs, batch_i, len(dataloader), loss)
        if batch_i % 40 == 39:
            if last_total_loss > 0 and total_loss > last_total_loss * 1.01:
                print("total_loss", total_loss)
                adjust_learning_rate(optimizer)
            else:
                print("total_loss", total_loss, last_total_loss)
                last_total_loss = total_loss
            total_loss = torch.sum(loss)
        elif batch_i == 0:
            total_loss = torch.sum(loss)
        else:
            total_loss += torch.sum(loss)

        # if epoch > 0 and batch_i == 0:
        #     if torch.sum(loss) > mean_loss / batch_size :
        #         print("mean_loss", mean_loss)
        #         adjust_learning_rate(optimizer)
        #     mean_loss = torch.sum(loss)
        # else:
        #     mean_loss += torch.sum(loss)
        # info = {'loss': loss.item(), 'cls': model.losses['cls'], 'conf': model.losses['conf']}
        # for tag, value in info.items():
        #     logger.scalar_summary(tag, value, epoch)
print('%s [Epoch %d/%d, Batch %d/%d Losse s: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f, recall: %.5f]' %
              (strftime, epoch, opt.epochs, batch_i, len(dataloader),
               model.module.losses['x'], model.module.losses['y'], model.module.losses['w'],
               model.module.losses['h'], model.module.losses['conf'], model.module.losses['cls'],
               torch.sum(loss), model.module.losses['recall']))

    if epoch % opt.checkpoint_interval == 0:
        model.module.save_weights('%s/%d.weights' % (opt.checkpoint_dir, epoch))

# -*- coding:utf-8 -*-
from __future__ import division

from models import *
from utils.utils import *
from utils.datasets import *
from utils.parse_config import *
from logger import Logger
import os
import sys
import time
import datetime
import argparse

import torch
from torch.utils.data import DataLoader

from torch.autograd import Variable
import torch.optim as optim

parser = argparse.ArgumentParser()
parser.add_argument('--epochs', type=int, default=2001, help='number of epochs')
parser.add_argument('--image_folder', type=str, default='data/samples', help='path to dataset')
parser.add_argument('--batch_size', type=int, default=4, help='size of each image batch')
parser.add_argument('--learning_rate', type=float, default=0.01, help='learning_rate')
parser.add_argument('--train_dir', type=str, default=r'E:\team-CV\dataset\tiny_data\VOC2007/',help='train_dir')
parser.add_argument('--model_config_path', type=str, default='config/yolov3_2cls.cfg', help='path to model config file')
parser.add_argument('--data_config_path', type=str, default='config/coco.data', help='path to data config file')
parser.add_argument('--weights_path', type=str, default='weights/yolov3.weights', help='path to weights file')
# parser.add_argument('--weights_path', type=str, default='checkpoints/40.weights', help='path to weights file')
parser.add_argument('--class_path', type=str, default='data/coco_2cls.names', help='path to class label file')
parser.add_argument('--conf_thres', type=float, default=0.8, help='object confidence threshold')
parser.add_argument('--nms_thres', type=float, default=0.4, help='iou thresshold for non-maximum suppression')
parser.add_argument('--n_cpu', type=int, default=0, help='number of cpu threads to use during batch generation')
parser.add_argument('--img_size', type=int, default=416, help='size of each image dimension')
parser.add_argument('--checkpoint_interval', type=int, default=4, help='interval between saving model weights')
parser.add_argument('--checkpoint_dir', type=str, default='checkpoints', help='directory where model checkpoints are saved')
opt = parser.parse_args()
print(opt)

os.makedirs('output', exist_ok=True)
os.makedirs('checkpoints', exist_ok=True)
def adjust_learning_rate(optimizer, decay_rate=0.5):
    for param_group in optimizer.module.param_groups:
        if(param_group['lr']>1e-8):
            param_group['lr'] = param_group['lr'] * decay_rate
    print(optimizer.module)
cuda = True if torch.cuda.is_available else False
classes = load_classes(opt.class_path)

module_defs=parse_model_config(opt.model_config_path)
hyperparams     = module_defs[0]
anchors=hyperparams["anchors"]
anchors = [int(x) for x in anchors.split(",")]
anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]
module_defs[83]["anchors"]=anchors
module_defs[95]["anchors"]=anchors
module_defs[107]["anchors"]=anchors
batch_size      = opt.batch_size# int(hyperparams['batch'])
subdivisions    = int(hyperparams['subdivisions'])
sub_batch       = batch_size // subdivisions
learning_rate   = opt.learning_rate
momentum        = float(hyperparams['momentum'])
decay           = float(hyperparams['decay'])
burn_in         = int(hyperparams['burn_in'])
hyperparams['height']=hyperparams['width']=opt.img_size

if __name__ == '__main__':
    dataloader = torch.utils.data.DataLoader(
        ListDataset(opt.train_dir,img_size=opt.img_size,is_training = 1,data_size=10000),
batch_size=batch_size, shuffle=1, num_workers=opt.n_cpu)

    model = Darknet(module_defs,img_size=opt.img_size)
    model.load_weights(opt.weights_path,is_training=True)
    #model.apply(weights_init_normal)
ngpus = 4
if ngpus >= 1:
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    if cuda:
        if ngpus > 1:
            model = torch.nn.DataParallel(model).to(device)
            # model = nn.parallel.DataParallel(model,device_ids=_DEVICE_ID).cuda()
else:
            model = model.to(device)

    model.train()
    Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor
    # optimizer = optim.SGD(model.parameters(), lr=learning_rate/batch_size, momentum=momentum, dampening=0, weight_decay=decay*batch_size)
optimizer = optim.Adam(model.parameters(), lr=learning_rate/batch_size, weight_decay=decay*batch_size)
    optimizer = torch.nn.DataParallel(optimizer).to(device)
    print("subdivisions",subdivisions)
    logger = Logger('./logs')
    total_loss=0
last_total_loss=0
for epoch in range(opt.epochs):
        for batch_i, (_, imgs, targets) in enumerate(dataloader):
            imgs = Variable(imgs.type(Tensor))
            targets = Variable(targets.type(Tensor), requires_grad=False)
            optimizer.module.zero_grad()
            loss = model(imgs, targets)
            # loss.backward()
            # optimizer.step()
torch.sum(loss).backward()
            optimizer.module.step()
            strftime = datetime.datetime.now().strftime("%H:%M:%S")
            # print(strftime, epoch, opt.epochs, batch_i, len(dataloader), loss)
if batch_i % 40 == 39:
                if last_total_loss > 0 and total_loss > last_total_loss * 1.01:
                    print("total_loss", total_loss)
                    adjust_learning_rate(optimizer)
                else:
                    last_total_loss = total_loss
                total_loss = torch.sum(loss)
            elif batch_i == 0:
                total_loss = torch.sum(loss)
            else:
                total_loss += torch.sum(loss)

            # if epoch > 0 and batch_i == 0:
            #     if torch.sum(loss) > mean_loss / batch_size :
            #         print("mean_loss", mean_loss)
            #         adjust_learning_rate(optimizer)
            #     mean_loss = torch.sum(loss)
            # else:
            #     mean_loss += torch.sum(loss)
            # info = {'loss': loss.item(), 'cls': model.losses['cls'], 'conf': model.losses['conf']}
            # for tag, value in info.items():
            #     logger.scalar_summary(tag, value, epoch)
print('%s [Epoch %d/%d, Batch %d/%d Losses: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f, recall: %.5f]' %
                  (strftime, epoch, opt.epochs, batch_i, len(dataloader),
model.module.losses['x'], model.module.losses['y'], model.module.losses['w'],
model.module.losses['h'], model.module.losses['conf'], model.module.losses['cls'],
torch.sum(loss), model.module.losses['recall']))

        if epoch % opt.checkpoint_interval == 0:
            model.module.save_weights('%s/%d.weights' % (opt.checkpoint_dir, epoch))

Pytorch yolov3 多GPU 訓練

pytorch 多gpu訓練：# -*- coding:utf-8 -*- from __future__ import division import datetime import torch import torch.nn as nn import torch.nn.

pyTorch 使用多GPU訓練

1.在pyTorch中模型使用GPU訓練很方便，直接使用model.gpu()。 2.使用多GPU訓練，model = nn.DataParallel(model) 3.注意訓練/測試過程中 inputs和labels均需載入到GPU中 inputs, l

pytorch使用多GPU訓練MNIST

下面的程式碼引數沒有除錯，可能準確率不高，僅僅供參考程式碼格式。 import argparse import torch import torch.nn as nn import torch.optim as optim import torch.nn.

pytorch 多GPU訓練

當一臺伺服器有多張GPU時，執行程式預設在一張GPU上執行。通過多GPU訓練，可以增大batchsize，加快訓練速度。 from torch.nn import DataParallel num_gp

pytorch多GPU訓練例項與效能對比

以下實驗是我在百度公司實習的時候做的，記錄下來留個小經驗。多GPU訓練 cifar10_97.23 使用 run.sh 檔案開始訓練 cifar10_97.50 使用 run.4GPU.sh 開始訓練在叢集中改變GPU呼叫個數修改 run.sh 檔案 nohup

Keras多GPU訓練以及載入權重無效的問題

目錄 1、資料並行 1.1、單GPU或者無GPU訓練的程式碼如下： 1.2、資料並行的多GPU 訓練 2、裝置並行參考連結本文講簡單的探討Keras中使用多GPU訓練的方法以及需要注意的地方。有兩種方法可

Caffe 多GPU訓練問題，以及batch_size 選擇的問題

1. 多GPU訓練時，速度沒有變得更快。使用多GPU訓練時，每個GPU都會執行一個 Caffe 模型的例項。比如當使用 n n

pytorch使用指定GPU訓練

本文適合多GPU的機器，並且每個使用者需要單獨使用GPU訓練。雖然pytorch提供了指定gpu的幾種方式，但是使用不當的話會遇到out of memory的問題，主要是因為pytorch會在第0塊gpu上初始化，並且會佔用一定空間的視訊記憶體。這種情況下，經常會出現指定的gpu明明是

【TensorFlow】多GPU訓練：示例程式碼解析

使用多GPU有助於提升訓練速度和調參效率。本文主要對tensorflow的示例程式碼進行註釋解析：cifar10_multi_gpu_train.py 1080Ti下加速效果如下（batch=128）單卡：兩個GPU比單個GPU加速了近一倍：

使用Keras進行多GPU訓練 multi_gpu_model

使用Keras訓練具有多個GPU的深度神經網路（照片來源：Nor-Tech.com）。摘要在今天的部落格文章中，我們學習瞭如何使用多個GPU來訓練基於Keras的深度神經網路。使用多個GPU使我們能夠獲得準線性加速。為了驗證這一點，我們在CIFAR-10資料集上訓練了MiniGoog

使用估算器、tf.keras 和 tf.data 進行多 GPU 訓練

文 / Zalando Research 研究科學家 Kashif Rasul 來源 | TensorFlow 公眾號與大多數 AI 研究部門一樣，Zalando Research 也意識到了對創意進行嘗試和快速原型設計的重要性。隨著資料集變得越來越龐大，

pytorch DataParallel 多GPU使用

單GPU： import os os.environ["CUDA_VISIBLE_DEVICES"] = "0" 多GPU： device_ids = [0,1,2,3] model

keras 多GPU訓練，單GPU預測

多GPU訓練 keras自帶模組 multi_gpu_model，此方式為資料並行的方式，將將目標模型在多個裝置上各複製一份，並使用每個裝置上的複製品處理整個資料集的不同部分資料，最高支援在8片GPU上並行。使用方式： from keras.utils imp

tensorflow 多gpu訓練

當使用多個gpu訓練時,輸入資料為batch_size*num_gpu,這樣模型訓練時間可以大大較小. tensorflow中使用制定gpu可以通過tf.device()實現.例如我想使用0號顯示卡: gpu_ind=0 with tf.device("/g

『TensorFlow』分布式訓練_其二_多GPU並行demo分析（待續）

print all set represent proto copyright keys 20M runners 建議比對『MXNet』第七彈_多GPU並行程序設計 models/tutorials/image/cifar10/cifer10_multi_gpu-trai

tensorflow1.12 多GPU協同訓練報錯tensorflow.python.framework.errors_impl.NotFoundError: libnccl.so.2

tensroflow為了提高多模型訓練速度，需要多個GPU同時工作，而且我們一般使用的工作站都是8塊tesla K80，如果能將8塊顯示卡的計算力充分利用起來，將會大大提高模型訓練的速度，縮短模型訓練時間。這幾天看到tensorflow的mor

DarkNet-YoloV3 使用教程訓練自己的資料 GPU版本

一 Darknet-Yolov3下載與安裝下載解壓完後，將darknet-master主資料夾的名字改為darknet 之後修改Makefile，因為是使用GPU版本，將GPU=0改為 GPU=1 然後使用make指令執行Makefile。下載完放到主目錄

Pytorch 多GPU執行

self.net = netword() n_gpu = 1 if n_gpu==1: self.net = torch.nn.DataParallel(self.net).cuda(device=0) else: gpus = [] for i in range(n

tensorflow、多GPU、多執行緒訓練VGG19來做cifar-10分類

背景：幾天前需要寫個多GPU訓練的演算法模型，翻來覆去在tensorflow的官網上看到cifar-10的官方程式碼，花了三天時間去掉程式碼的冗餘部分和改寫成自己的風格。程式碼共有6部分構成：1、data_input.py 由於cifar-10官方訓練集和驗證集都是.bin格

tensorflow視訊記憶體管理、tensorflow使用多個gpu訓練

通常在程式開始之前並不知道需要多大的視訊記憶體，程式會去申請GPU的視訊記憶體的50% 比如一個8G的記憶體，被佔用了2G，那麼程式會申請4G的視訊記憶體（因為有足夠的剩餘視訊記憶體）如果此時視訊記憶體被佔用7G，那麼程式會申請剩下的所有的1G的視訊記憶體。也許你的程

Pytorch yolov3 多GPU 訓練

pytorch 多gpu訓練：

相關推薦