Problem:
In any ML task using neural networks when using the generally available pre-trained models, the task of training the model is greatly minimized because of the pre-trained weights which can take a very long time to train otherwise. However, this is not always possible especially when using a custom network model. But still it is sometime possible to use the pre-trained weights for only some of the layers (if not all). Then the question arises, how to copy or use the pre-trained weights (e.g. vgg16 here) for these only few layers. When looking at the model details, its not that hard. Below example shows just that.
Solution:
This code has been written with Pytorch 1.0.1 (available at the time of writing the post)
import torch import torch.nn as nn import torch.nn.functional as F class myCustomNet(nn.Module): def __init__(self, load_weights=False): super(myCustomNet, self).__init__() # layers to be initialized with vgg16 weights self.pretrained_layers_list = [64, 64, 'M', 128, 128] self.pretrained_layers = create_layers(self.pretrained_layers_list) self.custom_layer = nn.Conv2d(64, 1, kernel_size=1) if not load_weights: pretrained_model = models.vgg16(pretrained = True) # first initialize the weight with random values self._initialize_weights() #test=self.frontend.state_dict().items() for i in range(len(self.pretrained_layers.state_dict().items())): if len(list( self.pretrained_layers.state_dict().items())[i][1]) == len(list( pretrained_model.state_dict().items())[i][1]): list( self.pretrained_layers.state_dict().items())[i][1].data[:] = list( pretrained_model.state_dict().items())[i][1].data[:] print("loaded layer with length: ",len(list( pretrained_model.state_dict().items())[i][1])) else: print ("Not loaded with pretrained weights ") def forward(self,x): x = self.pretrained_layers(x) x = F.relu(self.custom_layer(x)) return x # to initialize the weights with random values def _initialize_weights(self): for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.01) if m.bias is not None: nn.init.constant_(m.bias, 0) elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) def create_layers(cfg, in_channels = 3,ks=3, batch_norm=False,dilation = False): if dilation: d_rate = 2 else: d_rate = 1 layers = [] for v in cfg: if v == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2,dilation=1)] else: conv2d = nn.Conv2d(in_channels, v, kernel_size=ks, padding=d_rate,dilation = d_rate) if batch_norm: layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)] else: layers += [conv2d, nn.ReLU(inplace=True)] in_channels = v return nn.Sequential(*layers)
Instantiate the model:

Visualize the model :

by loading the pretrained weigts into the new model, the training time should much less than training from random weights.