作者大大您好:
谢谢开源可以学习!!
看到您实现的shufflenet的block实现,好像和论文里面的不一样啊,这个会有影响吗?
def forward(self, old_x):
if self.stride==1:
x_proj, x = self.channel_shuffle(old_x)
return torch.cat((x_proj, self.branch_main(x)), 1)
elif self.stride==2:
x_proj = old_x
x = old_x
return torch.cat((self.branch_proj(x_proj), self.branch_main(x)), 1)
下面是我之前不知道从哪个repo搞过来的,貌似和论文是一致的 :-)
`import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from collections import OrderedDict
from torch.nn import init
import math
def conv_bn(inp, oup, stride):
return nn.Sequential(
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def conv_1x1_bn(inp, oup):
return nn.Sequential(
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
nn.ReLU(inplace=True)
)
def channel_shuffle(x, groups):
batchsize, num_channels, height, width = x.data.size()
channels_per_group = num_channels // groups
# reshape
x = x.view(batchsize, groups,
channels_per_group, height, width)
x = torch.transpose(x, 1, 2).contiguous()
# flatten
x = x.view(batchsize, -1, height, width)
return x
class InvertedResidual(nn.Module):
def init(self, inp, oup, stride, benchmodel):
super(InvertedResidual, self).init()
self.benchmodel = benchmodel
self.stride = stride
assert stride in [1, 2]
oup_inc = oup//2
if self.benchmodel == 1:
#assert inp == oup_inc
self.banch2 = nn.Sequential(
# pw
nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
# dw
nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False),
nn.BatchNorm2d(oup_inc),
# pw-linear
nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
)
else:
self.banch1 = nn.Sequential(
# dw
nn.Conv2d(inp, inp, 3, stride, 1, groups=inp, bias=False),
nn.BatchNorm2d(inp),
# pw-linear
nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
)
self.banch2 = nn.Sequential(
# pw
nn.Conv2d(inp, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
# dw
nn.Conv2d(oup_inc, oup_inc, 3, stride, 1, groups=oup_inc, bias=False),
nn.BatchNorm2d(oup_inc),
# pw-linear
nn.Conv2d(oup_inc, oup_inc, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup_inc),
nn.ReLU(inplace=True),
)
@staticmethod
def _concat(x, out):
# concatenate along channel axis
return torch.cat((x, out), 1)
def forward(self, x):
if 1==self.benchmodel:
x1 = x[:, :(x.shape[1]//2), :, :]
x2 = x[:, (x.shape[1]//2):, :, :]
out = self._concat(x1, self.banch2(x2))
elif 2==self.benchmodel:
out = self._concat(self.banch1(x), self.banch2(x))
return channel_shuffle(out, 2)
class ShuffleNetV2(nn.Module):
def init(self, n_class=1000, input_size=224, width_mult=1., back_flag = False):
super(ShuffleNetV2, self).init()
assert input_size % 32 == 0
self.stage_repeats = [4, 8, 4]
self.back_flag = back_flag
# index 0 is invalid and should never be called.
# only used for indexing convenience.
if width_mult == 0.5:
self.stage_out_channels = [-1, 24, 48, 96, 192, 1024]
elif width_mult == 1.0:
self.stage_out_channels = [-1, 24, 116, 232, 464, 1024]
elif width_mult == 1.5:
self.stage_out_channels = [-1, 24, 176, 352, 704, 1024]
elif width_mult == 2.0:
self.stage_out_channels = [-1, 24, 224, 488, 976, 2048]
else:
raise ValueError(
"""{} groups is not supported for
1x1 Grouped Convolutions""".format(num_groups))
# building first layer
input_channel = self.stage_out_channels[1]
self.conv1 = conv_bn(3, input_channel, 2)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
self.stage2 = []
self.stage3 = []
self.stage4 = []
for idxstage in range(len(self.stage_repeats)):
output_channel = self.stage_out_channels[idxstage+2]
numrepeat = self.stage_repeats[idxstage]
stage = []
for i in range(numrepeat):
if i == 0:
stage.append(InvertedResidual(input_channel, output_channel, 2, 2))
else:
stage.append(InvertedResidual(input_channel, output_channel, 1, 1))
input_channel = output_channel
if idxstage == 0:
self.stage2 = stage
elif idxstage == 1:
self.stage3 = stage
elif idxstage == 2:
self.stage4 = stage
else:
print("error")
# self.stages = nn.Sequential(*self.stages)
self.stage2 = nn.Sequential(*self.stage2) # 58 * 58 * 116
self.stage3 = nn.Sequential(*self.stage3) # 26 * 26 * 232
self.stage4 = nn.Sequential(*self.stage4)
self.features = []
# building inverted residual blocks
for idxstage in range(len(self.stage_repeats)):
numrepeat = self.stage_repeats[idxstage]
output_channel = self.stage_out_channels[idxstage+2]
for i in range(numrepeat):
if i == 0:
#inp, oup, stride, benchmodel):
self.features.append(InvertedResidual(input_channel, output_channel, 2, 2))
else:
self.features.append(InvertedResidual(input_channel, output_channel, 1, 1))
input_channel = output_channel
# make it nn.Sequential
self.features = nn.Sequential(*self.features)
# building last several layers
self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1])
self.globalpool = nn.Sequential(nn.AvgPool2d(int(input_size/32)))
# building classifier
self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class))
def forward(self, x):
y = []
conv1 = self.conv1(x)
maxpool = self.maxpool(conv1)
y.append(maxpool)
out3 = self.stage2(maxpool)
y.append(out3)
out4 = self.stage3(out3)
y.append(out4)
out5 = self.stage4(out4)
y.append(out5)
if self.back_flag:
return y
#x = self.features(x)
x = self.conv_last(out5)
x = self.globalpool(x)
x = x.view(-1, self.stage_out_channels[-1])
x = self.classifier(x)
return x
def shufflenetv2(width_mult=1.):
model = ShuffleNetV2(width_mult=width_mult)
return model
if name == "main":
"""Testing
"""
model = ShuffleNetV2()
print(model)
`