I train boxinst with a customised datasets contain 1 category, loss 'nan', but I have

<a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url="/us

loss 'nan',Customised datasets with 1 category in boxinst about boxinstseg HOT 2 OPEN

shanghangjiang commented on September 28, 2024

loss 'nan',Customised datasets with 1 category in boxinst

from boxinstseg.

Comments (2)

shanghangjiang commented on September 28, 2024

I also use defualt settings to train boxinst on coco, still loss 'nan'

from boxinstseg.

LiWentomng commented on September 28, 2024

@shanghangjiang
Hello, I have test the default settings for boxinst. It works well.

The training logs are listed as the following:

2023-03-17 20:44:14,061 - mmdet - INFO - Distributed training: True
2023-03-17 20:44:14,929 - mmdet - INFO - Config:
log_config = dict(interval=50, hooks=[dict(type='TextLoggerHook')])
custom_hooks = [dict(type='NumClassCheckHook')]
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
opencv_num_threads = 0
mp_start_method = 'fork'
auto_scale_lr = dict(enable=False, base_batch_size=16)
model = dict(
type='CondInst',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
zero_init_residual=False,
norm_cfg=dict(type='BN', requires_grad=True),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
start_level=1,
add_extra_convs='on_output',
num_outs=5,
relu_before_extra_convs=True),
bbox_head=dict(
type='CondInstBoxHead',
num_classes=80,
in_channels=256,
center_sampling=True,
center_sample_radius=1.5,
norm_on_bbox=True,
stacked_convs=4,
feat_channels=256,
strides=[8, 16, 32, 64, 128],
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='GIoULoss', loss_weight=1.0),
loss_centerness=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
mask_branch=dict(
type='CondInstMaskBranch',
in_channels=256,
in_indices=[0, 1, 2],
strides=[8, 16, 32],
branch_convs=4,
branch_channels=128,
branch_out_channels=16),
mask_head=dict(
type='CondInstMaskHead',
in_channels=16,
in_stride=8,
out_stride=4,
dynamic_convs=3,
dynamic_channels=8,
disable_rel_coors=False,
bbox_head_channels=256,
sizes_of_interest=[64, 128, 256, 512, 1024],
max_proposals=-1,
topk_per_img=64,
boxinst_enabled=True,
bottom_pixels_removed=10,
pairwise_size=3,
pairwise_dilation=2,
pairwise_color_thresh=0.3,
pairwise_warmup=10000),
train_cfg=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.4,
min_pos_iou=0,
ignore_iof_thr=-1),
allowed_border=-1,
pos_weight=-1,
debug=False),
test_cfg=dict(
nms_pre=2000,
min_bbox_size=0,
score_thr=0.05,
nms=dict(type='nms', iou_threshold=0.5),
max_per_img=2000,
output_segm=False))
dataset_type = 'CocoDataset'
data_root = '/mnt/SSD/lwt_workdir/data/coco/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
type='Resize',
img_scale=[(1333, 800), (1333, 768), (1333, 736), (1333, 704),
(1333, 672), (1333, 640)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type='CocoDataset',
ann_file=
'/mnt/SSD/lwt_workdir/data/coco/annotations/instances_train2017.json',
img_prefix='/mnt/SSD/lwt_workdir/data/coco/train2017/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True, with_mask=False),
dict(
type='Resize',
img_scale=[(1333, 800), (1333, 768), (1333, 736), (1333, 704),
(1333, 672), (1333, 640)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]),
val=dict(
type='CocoDataset',
ann_file=
'/mnt/SSD/lwt_workdir/data/coco/annotations/instances_val2017.json',
img_prefix='/mnt/SSD/lwt_workdir/data/coco/val2017/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]),
test=dict(
type='CocoDataset',
ann_file=
'/mnt/SSD/lwt_workdir/data/coco/annotations/instances_val2017.json',
img_prefix='/mnt/SSD/lwt_workdir/data/coco/val2017/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 800),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img'])
])
]))
evaluation = dict(interval=1, metric=['bbox', 'segm'])
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[27, 33])
runner = dict(type='EpochBasedRunner', max_epochs=36)
checkpoint_config = dict(interval=1)
work_dir = './work_dirs/boxinst_coco_3x'
auto_resume = False
gpu_ids = range(0, 2)

2023-03-17 20:44:14,930 - mmdet - INFO - Set random seed to 0, deterministic: False
2023-03-17 20:44:15,342 - mmdet - INFO - initialize ResNet with init_cfg {'type': 'Pretrained', 'checkpoint': 'torchvision://resnet50'}
2023-03-17 20:44:15,342 - mmcv - INFO - load model from: torchvision://resnet50
2023-03-17 20:44:15,342 - mmcv - INFO - load checkpoint from torchvision path: torchvision://resnet50
2023-03-17 20:44:15,554 - mmcv - WARNING - The model and loaded state dict do not match exactly

unexpected key in source state_dict: fc.weight, fc.bias

2023-03-17 20:44:15,588 - mmdet - INFO - initialize FPN with init_cfg {'type': 'Xavier', 'layer': 'Conv2d', 'distribution': 'uniform'}
2023-03-17 20:44:15,615 - mmdet - INFO - initialize CondInstBoxHead with init_cfg {'type': 'Normal', 'layer': 'Conv2d', 'std': 0.01, 'override': {'type': 'Normal', 'name': 'conv_cls', 'std': 0.01, 'bias_prob': 0.01}}
2023-03-17 20:44:15,674 - mmdet - INFO - initialize CondInstMaskBranch with init_cfg {'type': 'Kaiming', 'layer': 'Conv2d', 'distribution': 'uniform', 'a': 1, 'mode': 'fan_in', 'nonlinearity': 'leaky_relu'}
2023-03-17 20:44:15,690 - mmdet - INFO - initialize CondInstMaskHead with init_cfg {'type': 'Normal', 'layer': 'Conv2d', 'std': 0.01, 'bias': 0}
loading annotations into memory...
loading annotations into memory...
Done (t=14.38s)
creating index...
index created!
Done (t=15.70s)
creating index...
index created!
fatal: not a git repository (or any parent up to mount point /mnt)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
fatal: not a git repository (or any parent up to mount point /mnt)
Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set).
2023-03-17 20:44:34,908 - mmdet - INFO - Automatic scaling of learning rate (LR) has been disabled.
loading annotations into memory...
loading annotations into memory...
Done (t=0.46s)
creating index...
Done (t=0.50s)
creating index...
index created!
index created!
2023-03-17 20:44:35,477 - mmdet - INFO - Start running, host: lwt@ps, work_dir: /mnt/SSD/lwt_workdir/new_code/BoxInstSeg/work_dirs/boxinst_coco_3x
2023-03-17 20:44:35,477 - mmdet - INFO - Hooks will be executed in the following order:
before_run:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) CheckpointHook
(LOW ) DistEvalHook
(VERY_LOW ) TextLoggerHook
before_train_epoch:
(VERY_HIGH ) StepLrUpdaterHook
(NORMAL ) NumClassCheckHook
(NORMAL ) DistSamplerSeedHook
(LOW ) IterTimerHook
(LOW ) DistEvalHook
(VERY_LOW ) TextLoggerHook
before_train_iter:
(VERY_HIGH ) StepLrUpdaterHook
(LOW ) IterTimerHook
(LOW ) DistEvalHook
after_train_iter:
(ABOVE_NORMAL) OptimizerHook
(NORMAL ) CheckpointHook
(LOW ) IterTimerHook
(LOW ) DistEvalHook
(VERY_LOW ) TextLoggerHook
after_train_epoch:
(NORMAL ) CheckpointHook
(LOW ) DistEvalHook
(VERY_LOW ) TextLoggerHook
before_val_epoch:
(NORMAL ) NumClassCheckHook
(NORMAL ) DistSamplerSeedHook
(LOW ) IterTimerHook
(VERY_LOW ) TextLoggerHook
before_val_iter:
(LOW ) IterTimerHook
after_val_iter:
(LOW ) IterTimerHook
after_val_epoch:
(VERY_LOW ) TextLoggerHook
after_run:
(VERY_LOW ) TextLoggerHook

2023-03-17 20:44:35,478 - mmdet - INFO - workflow: [('train', 1)], max: 36 epochs
2023-03-17 20:44:35,478 - mmdet - INFO - Checkpoints will be saved to /mnt/SSD/lwt_workdir/new_code/BoxInstSeg/work_dirs/boxinst_coco_3x by HardDiskBackend.
2023-03-17 20:44:41,941 - mmcv - INFO - Reducer buckets have been rebuilt in this iteration.
2023-03-17 20:45:02,344 - mmdet - INFO - Epoch [1][50/29317] lr: 9.890e-04, eta: 6 days, 13:22:14, time: 0.537, data_time: 0.114, memory: 5067, loss_cls: 1.0509, loss_bbox: 0.8177, loss_centerness: 0.6783, loss_prj: 0.8812, loss_pairwise: 0.0009, loss: 3.4290
2023-03-17 20:45:23,345 - mmdet - INFO - Epoch [1][100/29317] lr: 1.988e-03, eta: 5 days, 20:11:35, time: 0.420, data_time: 0.005, memory: 5207, loss_cls: 0.9549, loss_bbox: 0.5695, loss_centerness: 0.6804, loss_prj: 0.4198, loss_pairwise: 0.0016, loss: 2.6263
2023-03-17 20:45:43,824 - mmdet - INFO - Epoch [1][150/29317] lr: 2.987e-03, eta: 5 days, 13:31:37, time: 0.410, data_time: 0.005, memory: 5207, loss_cls: 0.9264, loss_bbox: 0.5596, loss_centerness: 0.6768, loss_prj: 0.3729, loss_pairwise: 0.0025, loss: 2.5382
2023-03-17 20:46:04,404 - mmdet - INFO - Epoch [1][200/29317] lr: 3.986e-03, eta: 5 days, 10:18:03, time: 0.412, data_time: 0.005, memory: 5207, loss_cls: 0.9353, loss_bbox: 0.5364, loss_centerness: 0.6692, loss_prj: 0.3599, loss_pairwise: 0.0035, loss: 2.5043
2023-03-17 20:46:24,665 - mmdet - INFO - Epoch [1][250/29317] lr: 4.985e-03, eta: 5 days, 7:58:27, time: 0.405, data_time: 0.005, memory: 5207, loss_cls: 0.8976, loss_bbox: 0.5471, loss_centerness: 0.6722, loss_prj: 0.3683, loss_pairwise: 0.0039, loss: 2.4890
2023-03-17 20:46:45,612 - mmdet - INFO - Epoch [1][300/29317] lr: 5.984e-03, eta: 5 days, 7:06:07, time: 0.419, data_time: 0.005, memory: 5207, loss_cls: 0.9304, loss_bbox: 0.5266, loss_centerness: 0.6694, loss_prj: 0.3763, loss_pairwise: 0.0052, loss: 2.5079
2023-03-17 20:47:06,033 - mmdet - INFO - Epoch [1][350/29317] lr: 6.983e-03, eta: 5 days, 6:03:05, time: 0.409, data_time: 0.005, memory: 5207, loss_cls: 0.8870, loss_bbox: 0.5204, loss_centerness: 0.6681, loss_prj: 0.3518, loss_pairwise: 0.0054, loss: 2.4326
2023-03-17 20:47:26,702 - mmdet - INFO - Epoch [1][400/29317] lr: 7.982e-03, eta: 5 days, 5:25:24, time: 0.413, data_time: 0.005, memory: 5207, loss_cls: 0.8060, loss_bbox: 0.5170, loss_centerness: 0.6693, loss_prj: 0.3397, loss_pairwise: 0.0062, loss: 2.3382
2023-03-17 20:47:47,521 - mmdet - INFO - Epoch [1][450/29317] lr: 8.981e-03, eta: 5 days, 5:02:25, time: 0.416, data_time: 0.005, memory: 5207, loss_cls: 0.8213, loss_bbox: 0.5000, loss_centerness: 0.6686, loss_prj: 0.3202, loss_pairwise: 0.0066, loss: 2.3167
2023-03-17 20:48:08,123 - mmdet - INFO - Epoch [1][500/29317] lr: 9.980e-03, eta: 5 days, 4:36:39, time: 0.412, data_time: 0.006, memory: 5207, loss_cls: 0.8300, loss_bbox: 0.4803, loss_centerness: 0.6600, loss_prj: 0.3168, loss_pairwise: 0.0073, loss: 2.2944

Besides, your annotation of your dataset seems right, according to your given sample.

from boxinstseg.

loss 'nan',Customised datasets with 1 category in boxinst about boxinstseg HOT 2 OPEN

Comments (2)

Related Issues (20)

Recommend Projects

React

Vue.js

Typescript

TensorFlow

Django

Laravel

D3

Recommend Topics

javascript

web

server

Machine learning

Visualization

Game

Recommend Org

Facebook

Microsoft

Google

Alibaba

D3

Tencent

Jobs