# 涨点明显 | 港中文等提出SplitNet结合Co-Training提升Backbone性能(附源码和论文)

2020/12/03 23:52

## 3、本文方法

### 3.1 分割模型

#### 3.1.2、如何切分网络？

1)切分ResNet

# The below is the same as max(widen_factor / (split_factor ** 0.5) + 0.4, 1.0)     if arch == 'wide_resnet50_2' and split_factor == 2:      self.inplanes = 64      width_per_group = 64      print('INFO:PyTorch: Dividing wide_resnet50_2, change base_width from {} '        'to {}.'.format(64 * 2, 64))     if arch == 'wide_resnet50_3' and split_factor == 2:      self.inplanes = 64      width_per_group = 64 * 2      print('INFO:PyTorch: Dividing wide_resnet50_3, change base_width from {} '        'to {}.'.format(64 * 3, 64 * 2))

2)切分ResNeXt

self.dropout = None  if 'cifar' in dataset:   if arch in ['resnext29_16x64d', 'resnext29_8x64d', 'wide_resnet16_8', 'wide_resnet40_10']:    if dropout_p is not None:     dropout_p = dropout_p / split_factor     # You can also use the below code.     # dropout_p = dropout_p / (split_factor ** 0.5)     print('INFO:PyTorch: Using dropout with ratio {}'.format(dropout_p))     self.dropout = nn.Dropout(dropout_p)  elif 'imagenet' in dataset:   if dropout_p is not None:    dropout_p = dropout_p / split_factor    # You can also use the below code.    # dropout_p = dropout_p / (split_factor ** 0.5)    print('INFO:PyTorch: Using dropout with ratio {}'.format(dropout_p))    self.dropout = nn.Dropout(dropout_p)  

### 3.2 联合训练

  train_transform = transforms.Compose([transforms.RandomCrop(32, padding=4),             transforms.RandomHorizontalFlip(),             CIFAR10Policy(),             transforms.ToTensor(),             transforms.Normalize((0.4914, 0.4822, 0.4465),                  (0.2023, 0.1994, 0.2010)),             transforms.RandomErasing(p=erase_p,                   scale=(0.125, 0.2),                   ratio=(0.99, 1.0),                   value=0, inplace=False),            ])

### 3.3 联合训练损失函数

def _co_training_loss(self, outputs, loss_choose, epoch=0):  """calculate the co-training loss between outputs of different small networks  """  weight_now = self.cot_weight  if self.is_cot_weight_warm_up and epoch < self.cot_weight_warm_up_epochs:   weight_now = max(self.cot_weight * epoch / self.cot_weight_warm_up_epochs, 0.005)  if loss_choose == 'js_divergence':   # the Jensen-Shannon divergence between p(x1), p(x2), p(x3)...   # https://en.wikipedia.org/wiki/Jensen%E2%80%93Shannon_divergence   outputs_all = torch.stack(outputs, dim=0)   p_all = F.softmax(outputs_all, dim=-1)   p_mean = torch.mean(p_all, dim=0)   H_mean = (- p_mean * torch.log(p_mean)).sum(-1).mean()   H_sep = (- p_all * F.log_softmax(outputs_all, dim=-1)).sum(-1).mean()   cot_loss = weight_now * (H_mean - H_sep)  else:   raise NotImplementedError  return cot_loss  

## 4、实验

### 参考：

[1].SplitNet: Divide and Co-training
[2].https://github.com/mzhaoshuai/SplitNet-Divide-and-Co-training

0
1 收藏

0 评论
1 收藏
0