torch.__version__ # PyTorch version torch.version.cuda # Corresponding CUDA version torch.backends.cudnn.version() # Corresponding cuDNN version torch.cuda.get_device_name(0) # GPU type torch.cuda.is_available() # CUDA
torch.nonzero(tensor) # Index of non-zero elements torch.nonzero(tensor == 0) # Index of zero elements torch.nonzero(tensor).size(0) # Number of non-zero elements torch.nonzero(tensor == 0).size(0) # Number of zero elements
张量扩展
1 2
# Expand tensor of shape 64*512 to shape 64*512*7*7. torch.reshape(tensor, (64, 512, 1, 1)).expand(64, 512, 7, 7)
classFeatureExtractor(torch.nn.Module): """Helper class to extract several convolution features from the given pre-trained model. Attributes: _model, torch.nn.Module. _layers_to_extract, list<str> or set<str> Example: >>> model = torchvision.models.resnet152(pretrained=True) >>> model = torch.nn.Sequential(collections.OrderedDict( list(model.named_children())[:-1])) >>> conv_representation = FeatureExtractor( pretrained_model=model, layers_to_extract={'layer1', 'layer2', 'layer3', 'layer4'})(image) """ def__init__(self, pretrained_model, layers_to_extract): torch.nn.Module.__init__(self) self._model = pretrained_model self._model.eval() self._layers_to_extract = set(layers_to_extract)
defforward(self, x): with torch.no_grad(): conv_representation = [] for name, layer inself._model.named_children(): x = layer(x) if name inself._layers_to_extract: conv_representation.append(x) return conv_representation
model = torchvision.models.resnet18(pretrained=True) for param in model.parameters(): param.requires_grad = False model.fc = nn.Linear(512, 100) # Replace the last fc layer optimizer = torch.optim.SGD(model.fc.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4)
以较大学习率微调全连接层,较小学习率微调卷积层
1 2 3 4 5 6
model = torchvision.models.resnet18(pretrained=True) finetuned_parameters = list(map(id, model.fc.parameters())) conv_parameters = (p for p in model.parameters() ifid(p) notin finetuned_parameters) parameters = [{'params': conv_parameters, 'lr': 1e-3}, {'params': model.fc.parameters()}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)
# Start with main code if __name__ == '__main__': # argparse for additional flags for experiment parser = argparse.ArgumentParser(description="Train a network for ...") ... opt = parser.parse_args()
# add code for datasets (we always use train and validation/ test set) data_transforms = transforms.Compose([ transforms.Resize((opt.img_size, opt.img_size)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])
# load checkpoint if needed/ wanted start_n_iter = 0 start_epoch = 0 if opt.resume: ckpt = load_checkpoint(opt.path_to_checkpoint) # custom method for loading last checkpoint net.load_state_dict(ckpt['net']) start_epoch = ckpt['epoch'] start_n_iter = ckpt['n_iter'] optim.load_state_dict(ckpt['optim']) print("last checkpoint restored") ... # if we want to run experiment on multiple GPUs we move the models there net = torch.nn.DataParallel(net) ... # typically we use tensorboardX to keep track of experiments writer = SummaryWriter(...) # now we start the main loop n_iter = start_n_iter for epoch inrange(start_epoch, opt.epochs): # set models to train mode net.train() ... # use prefetch_generator and tqdm for iterating through data pbar = tqdm(enumerate(BackgroundGenerator(train_data_loader, ...)), total=len(train_data_loader)) start_time = time.time() # for loop going through dataset for i, data in pbar: # data preparation img, label = data if use_cuda: img = img.cuda() label = label.cuda() ... # It's very good practice to keep track of preparation time and computation time using tqdm to find any issues in your dataloader prepare_time = start_time-time.time() # forward and backward pass optim.zero_grad() ... loss.backward() optim.step() ...
# udpate tensorboardX writer.add_scalar(..., n_iter) ... # compute computation time and *compute_efficiency* process_time = start_time-time.time()-prepare_time pbar.set_description("Compute efficiency: {:.2f}, epoch: {}/{}:".format( process_time/(process_time+prepare_time), epoch, opt.epochs)) start_time = time.time() # maybe do a test pass every x epochs if epoch % x == x-1: # bring models to evaluation mode net.eval() ... #do some tests pbar = tqdm(enumerate(BackgroundGenerator(test_data_loader, ...)), total=len(test_data_loader)) for i, data in pbar: ...
# save checkpoint if needed ...
1 2 3 4 5 6 7 8
for t in epoch(80): for images, labels in tqdm.tqdm(train_loader, desc='Epoch %3d' % (t + 1)): images, labels = images.cuda(), labels.cuda() scores = model(images) loss = loss_function(scores, labels) optimizer.zero_grad() loss.backward() optimizer.step()
标记平滑(label smoothing)
1 2 3 4 5 6 7 8 9 10 11 12 13
for images, labels in train_loader: images, labels = images.cuda(), labels.cuda() N = labels.size(0) # C is the number of classes. smoothed_labels = torch.full(size=(N, C), fill_value=0.1 / (C - 1)).cuda() smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=0.9)
score = model(images) log_prob = torch.nn.functional.log_softmax(score, dim=1) loss = -torch.sum(log_prob * smoothed_labels) / N optimizer.zero_grad() loss.backward() optimizer.step()
Mixup
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
beta_distribution = torch.distributions.beta.Beta(alpha, alpha) for images, labels in train_loader: images, labels = images.cuda(), labels.cuda()
l1_regularization = torch.nn.L1Loss(reduction='sum') loss = ... # Standard cross-entropy loss for param in model.parameters(): loss += torch.sum(torch.abs(param)) loss.backward()
不对偏置项进行 L2 正则化/权值衰减(weight decay)
1 2 3 4 5
bias_list = (param for name, param in model.named_parameters() if name[-4:] == 'bias') others_list = (param for name, param in model.named_parameters() if name[-4:] != 'bias') parameters = [{'parameters': bias_list, 'weight_decay': 0}, {'parameters': others_list}] optimizer = torch.optim.SGD(parameters, lr=1e-2, momentum=0.9, weight_decay=1e-4)
# data['label'] and data['prediction'] are groundtruth label and prediction # for each image, respectively. accuracy = np.mean(data['label'] == data['prediction']) * 100
# Compute recision and recall for each class. for c inrange(len(num_classes)): tp = np.dot((data['label'] == c).astype(int), (data['prediction'] == c).astype(int)) tp_fp = np.sum(data['prediction'] == c) tp_fn = np.sum(data['label'] == c) precision = tp / tp_fp * 100 recall = tp / tp_fn * 100