dawn-bench-models/pytorch/CIFAR10/benchmark/models/densenet.py

109 lines
3.5 KiB
Python
Raw Normal View History

2017-08-17 12:43:17 -06:00
'''DenseNet in PyTorch.'''
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
class Bottleneck(nn.Module):
def __init__(self, in_planes, growth_rate):
super(Bottleneck, self).__init__()
self.bn1 = nn.BatchNorm2d(in_planes)
self.conv1 = nn.Conv2d(in_planes, 4 * growth_rate, kernel_size=1, bias=False)
self.bn2 = nn.BatchNorm2d(4 * growth_rate)
self.conv2 = nn.Conv2d(4 * growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)
def forward(self, x):
out = self.conv1(F.relu(self.bn1(x)))
out = self.conv2(F.relu(self.bn2(out)))
out = torch.cat([out, x], 1)
return out
class Transition(nn.Module):
def __init__(self, in_planes, out_planes, last=False, pool_size=2):
super(Transition, self).__init__()
self.last = last
self.pool_size = pool_size
self.bn = nn.BatchNorm2d(in_planes)
if not self.last:
self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)
def forward(self, x):
out = F.relu(self.bn(x))
if not self.last:
out = self.conv(out)
out = F.avg_pool2d(out, self.pool_size)
return out
class DenseNet(nn.Module):
def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
super(DenseNet, self).__init__()
# TODO: Add drop for CIFAR10 without data augmentation
self.growth_rate = growth_rate
num_planes = 2 * growth_rate
self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)
self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
num_planes += nblocks[0] * growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans1 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
num_planes += nblocks[1] * growth_rate
out_planes = int(math.floor(num_planes*reduction))
self.trans2 = Transition(num_planes, out_planes)
num_planes = out_planes
self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
num_planes += nblocks[2] * growth_rate
self.trans3 = Transition(num_planes, num_planes, last=True, pool_size=8)
self.linear = nn.Linear(num_planes, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
m.weight.data.normal_(0, math.sqrt(2. / n))
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zero_()
def _make_dense_layers(self, block, in_planes, nblock):
layers = []
for i in range(nblock):
layers.append(block(in_planes, self.growth_rate))
in_planes += self.growth_rate
return nn.Sequential(*layers)
def forward(self, x):
out = self.conv1(x)
out = self.trans1(self.dense1(out))
out = self.trans2(self.dense2(out))
out = self.trans3(self.dense3(out))
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def DenseNetBC(L, k):
assert (L - 4) % 6 == 0
num_blocks = int((L - 4) / 6)
return DenseNet(Bottleneck, [num_blocks] * 3, growth_rate=k, reduction=0.5)
def DenseNetBC100():
return DenseNetBC(100, 12)
def DenseNetBC250():
return DenseNetBC(250, 24)
def DenseNetBC190():
return DenseNetBC(190, 40)