The input image is 64x64x3 patch and ground truth is 16x16x3 center part of input image
I found another paper that used it in vgg model, so i used vgg framework and implement again.
Below is my initalize model part.
function net = init_building(varargin)
opts.networkType = 'dagnn' ;
opts = vl_argparse(opts, varargin) ;
lr = [.1 .2] ;
drop1 = struct('name', 'dropout1', 'type', 'dropout', 'rate' , 0.5) ;
drop2 = struct('name', 'dropout2', 'type', 'dropout', 'rate' , 0.5) ;
% Define network CIFAR10-quick
net.layers = {} ;
% Block 1
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,3,64, 'single'), zeros(1, 64, 'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,64,64, 'single'), zeros(1,64,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', [2 2], ...
'pad', [0 0 0 0]) ; % Emulate caffe
% Block 2
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,64,128, 'single'), zeros(1,128,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,128,128, 'single'), zeros(1,128,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', [2 2], ...
'pad', [0 0 0 0]) ; % Emulate caffe
% Block 3
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,128,256, 'single'), zeros(1,256,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,256,256, 'single'), zeros(1,256,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,256,256, 'single'), zeros(1,256,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', [2 2], ...
'pad', [0 0 0 0]) ; % Emulate caffe
% Block 4
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,256,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,512,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,512,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', [2 2], ...
'pad', [0 0 0 0]) ; % Emulate caffe
% Block 5
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,512,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,512,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(3,3,512,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1], ...
'pad', [1 1 1 1]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = struct('type', 'pool', ...
'method', 'max', ...
'pool', [2 2], ...
'stride', [2 2], ...
'pad', [0 0 0 0]) ; % Emulate caffe
% Block 6
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(2,2,512,4096, 'single'), zeros(1,4096,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1],...
'pad', [0 0 0 0]) ;
net.layers{end+1} = struct('type', 'relu') ;
net.layers{end+1} = drop1;
net.layers{end+1} = struct('type', 'conv', ...
'weights', {{0.01*randn(1,1,4096,4096, 'single'), zeros(1,4096,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1],...
'pad', [0 0 0 0]) ;
net.layers{end+1} = struct('type', 'relu') ;
%net.layers{end+1} = drop2;
net.layers{end+1} = struct('name','fc8','type', 'conv', ...
'weights', {{0.01*randn(1,1,4096,512, 'single'), zeros(1,512,'single')}}, ...
'learningRate', lr, ...
'stride', [1 1],...
'pad', [0 0 0 0]) ;
% Loss layer
net.layers{end+1} = struct('name', 'prob','type', 'softmaxloss') ;
% Meta parameters
net.meta.inputSize = [16 16 3] ;
net.meta.trainOpts.learningRate = 0.0001 * ones(1,50) ;
net.meta.trainOpts.weightDecay = 0.0005 ;
net.meta.trainOpts.batchSize = 100 ;
net.meta.trainOpts.numEpochs = numel(net.meta.trainOpts.learningRate) ;
net = dagnn.DagNN.fromSimpleNN(net, 'canonicalNames', true) ;
net.removeLayer('prob') ;
net.addLayer('objective', ...
SegmentationLoss_building('loss', 'softmaxlog'), ...
{'prediction', 'label'}, 'objective') ;
% Add accuracy layer
net.addLayer('accuracy', ...
SegmentationAccuracy2(), ...
{'prediction', 'label'}, 'accuracy') ;