Comments (23)
@hasanirtiza I solve the problem with this modification. I think it's could be a version problem. Thanks for your reply.
from pedestron.
Which dataset are you trying to train on ? Did you change anything in the configuration file ? Could you paste your config file here ?
from pedestron.
Thank you for your reply . i don not change any configuration without pretrained and path. And config file as follows:
model settings
model = dict(
type='CascadeRCNN',
num_stages=3,
pretrained=None,
backbone=dict(
type='HRNet',
extra=dict(
stage1=dict(
num_modules=1,
num_branches=1,
block='BOTTLENECK',
num_blocks=(4, ),
num_channels=(64, )),
stage2=dict(
num_modules=1,
num_branches=2,
block='BASIC',
num_blocks=(4, 4),
num_channels=(32, 64)),
stage3=dict(
num_modules=4,
num_branches=3,
block='BASIC',
num_blocks=(4, 4, 4),
num_channels=(32, 64, 128)),
stage4=dict(
num_modules=3,
num_branches=4,
block='BASIC',
num_blocks=(4, 4, 4, 4),
num_channels=(32, 64, 128, 256))
),
#frozen_stages=-1,
norm_eval=False,
),
neck=dict(
type='HRFPN',
in_channels=[32, 64, 128, 256],
out_channels=256),
rpn_head=dict(
type='RPNHead',
in_channels=256,
feat_channels=256,
anchor_scales=[8],
anchor_ratios=[0.5, 1.0, 2.0],
anchor_strides=[4, 8, 16, 32, 64],
target_means=[.0, .0, .0, .0],
target_stds=[1.0, 1.0, 1.0, 1.0],
loss_cls=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(
type='RoIAlign',
out_size=7,
sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
bbox_head=[
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
target_means=[0., 0., 0., 0.],
target_stds=[0.1, 0.1, 0.2, 0.2],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
target_means=[0., 0., 0., 0.],
target_stds=[0.05, 0.05, 0.1, 0.1],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
dict(
type='SharedFCBBoxHead',
num_fcs=2,
in_channels=256,
fc_out_channels=1024,
roi_feat_size=7,
num_classes=2,
target_means=[0., 0., 0., 0.],
target_stds=[0.033, 0.033, 0.067, 0.067],
reg_class_agnostic=True,
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0,
loss_weight=1.0)),
],
mask_roi_extractor=dict(
type='SingleRoIExtractor',
roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2),
out_channels=256,
featmap_strides=[4, 8, 16, 32]),
mask_head=dict(
type='FCNMaskHead',
num_convs=4,
in_channels=256,
conv_out_channels=256,
num_classes=2,
loss_mask=dict(
type='CrossEntropyLoss', use_mask=True, loss_weight=1))
)
model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
min_pos_iou=0.3,
ignore_iof_thr=0.7),
sampler=dict(
type='RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
add_gt_as_proposals=False),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_across_levels=False,
nms_pre=2000,
nms_post=2000,
max_num=2000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=[
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
min_pos_iou=0.5,
ignore_iof_thr=0.7),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.6,
neg_iou_thr=0.6,
min_pos_iou=0.6,
ignore_iof_thr=0.7),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False),
dict(
assigner=dict(
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.7,
min_pos_iou=0.7,
ignore_iof_thr=0.7),
sampler=dict(
type='RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
add_gt_as_proposals=True),
mask_size=28,
pos_weight=-1,
debug=False)
],
stage_loss_weights=[1, 0.5, 0.25])
test_cfg = dict(
rpn=dict(
nms_across_levels=False,
nms_pre=1000,
nms_post=1000,
max_num=1000,
nms_thr=0.7,
min_bbox_size=0),
rcnn=dict(
score_thr=0.001,
nms=dict(type='soft_nms', iou_thr=0.5),
max_per_img=100,
mask_thr_binary=0.5),
keep_all_stages=False)
dataset settings
dataset_type = 'CocoDataset'
data_root = 'datasets/CrowdHuman/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=True)
data = dict(
imgs_per_gpu=1,
workers_per_gpu=5,
train=dict(
type=dataset_type,
# ann_file=[data_root + 'train.json',
# data_root + 'val.json'],
# img_prefix=[data_root + 'Images',
# data_root + 'Images_val'],
ann_file=data_root + 'train.json',
img_prefix=data_root + 'Images',
img_scale=[(1216, 608),(2048, 1024)],
multiscale_mode='range',
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0.5,
with_mask=True,
with_crowd=True,
with_label=True,
extra_aug=dict(
photo_metric_distortion=dict(brightness_delta=180, contrast_range=(0.5, 1.5),
saturation_range=(0.5, 1.5), hue_delta=18),
random_crop=dict(min_ious=(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9), min_crop_size=0.1),
),
),
test=dict(
type=dataset_type,
ann_file=data_root + 'val.json',
img_prefix=data_root + 'Images_val',
img_scale=(2048, 1024),
img_norm_cfg=img_norm_cfg,
size_divisor=32,
flip_ratio=0,
with_mask=False,
with_label=False,
test_mode=True))
optimizer
mean_teacher=True
optimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2), mean_teacher = dict(alpha=0.999))
learning policy
lr_config = dict(
policy='cosine',
warmup='linear',
warmup_iters=500,
warmup_ratio=1.0 / 3,
step=[110, 160])
checkpoint_config = dict(interval=1)
yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
# dict(type='TensorboardLoggerHook')
])
yapf:enable
runtime settings
total_epochs = 240
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './work_dirs/cityperson_cascade_rcnn_hrnetv2p_w32'
load_from = None
resume_from = None
workflow = [('train', 1)]
from pedestron.
Config seems fine and I cannot reproduce this issue locally. Now can you please paste your PyTorch etc. versions ?
from pedestron.
My env as follows:
This file may be used to create an environment using:
$ conda create --name --file
platform: linux-64
_libgcc_mutex=0.1=main
_pytorch_select=0.2=gpu_0
blas=1.0=mkl
ca-certificates=2020.1.1=0
certifi=2020.4.5.1=py37_0
cffi=1.14.0=py37h2e261b9_0
cudatoolkit=10.0.130=0
cudnn=7.6.5=cuda10.0_0
cycler=0.10.0=py37_0
cython=0.29.17=py37he6710b0_0
freetype=2.9.1=h8a8886c_1
intel-openmp=2020.0=166
jpeg=9b=h024ee3a_2
libedit=3.1.20181209=hc058e9b_0
libffi=3.2.1=hd88cf55_4
libgcc-ng=9.1.0=hdf63c60_0
libgfortran-ng=7.3.0=hdf63c60_0
libpng=1.6.37=hbc83047_0
libstdcxx-ng=9.1.0=hdf63c60_0
libtiff=4.1.0=h2733197_0
mkl=2020.0=166
mkl-service=2.3.0=py37he904b0f_0
mkl_fft=1.0.15=py37ha843d7b_0
mkl_random=1.1.0=py37hd6b4f25_0
ncurses=6.2=he6710b0_1
ninja=1.9.0=py37hfd86e86_0
numpy=1.18.1=py37h4f9e942_0
numpy-base=1.18.1=py37hde5b4d6_1
olefile=0.46=py_0
openssl=1.0.2u=h7b6447c_0
pillow=7.1.2=py37hb39fc2d_0
pip=20.0.2=py37_1
pycparser=2.20=py_0
pyparsing=2.4.7=py_0
python=3.7.0=h6e4f718_3
pytorch=1.2.0=cuda100py37h938c94c_0
readline=7.0=h7b6447c_5
setuptools=46.1.3=py37_0
six=1.14.0=py37_0
sqlite=3.31.1=h62c20be_1
tk=8.6.8=hbc83047_0
torchvision=0.4.0=cuda100py37hecfc37a_0
wheel=0.34.2=py37_0
xz=5.2.5=h7b6447c_0
zlib=1.2.11=h7b6447c_3
zstd=1.3.7=h0b5b093_0
from pedestron.
Can you run the demo.py (using any of the pre-trained model) ?
from pedestron.
I can run the demo.py (use crowdhuman pre-trained model). Only l want to train on crowdhuman from scratch.
from pedestron.
I cannot reproduce the error. I have just cloned the repo and tried training on crowdhuman from the scratch. It is running fine on my system (500 iterations so far). So, it can be either an issue in the installation (did it compile normally after running python setup.py develop
?) or there is some incompatibility issue between the libs etc. One more thing as a caveat, in case you are running mmdetection
framework as well for some other projects, make sure you have a separate environment for Pedestron. Since, mmdetection
has been upgraded to a newer version (mmdet and mmcv), at least in my case I faced some issues.
from pedestron.
Thank you , there is no error in the compilation.
l have created an independent conda environment.
I don not run mmmdetection framework on another project.
Can you provide a full environment file?
And i have a question that the num_classes are set to 2 in the config file. Why not set to 3 (person, mask and background)?
from pedestron.
# packages in environment at envs/Pedestron:
#
# Name Version Build Channel
_libgcc_mutex 0.1 main
addict 2.2.1 pypi_0 pypi
attrdict 2.0.1 pypi_0 pypi
blas 1.0 mkl
bzip2 1.0.6 h9a117a8_4
ca-certificates 2019.11.27 0
caffe2-cuda8.0-cudnn7 0.8.dev py36_2018.05.14 caffe2
cairo 1.14.12 h7636065_2
certifi 2019.11.28 py36_0
cffi 1.11.5 py36h9745a5d_0
chardet 3.0.4 pypi_0 pypi
cudatoolkit 9.0 h13b8566_0
cycler 0.10.0 py36h93f1223_0
cython 0.29.13 pypi_0 pypi
dbus 1.13.2 h714fa37_1
decorator 4.4.1 pypi_0 pypi
expat 2.2.5 he0dffb1_0
ffmpeg 3.4 h7264315_0
fontconfig 2.12.6 h49f89f6_0
freeglut 2.8.1 0 https://repo.continuum.io/pkgs/free
freetype 2.8 hab7d2ae_1
future 0.16.0 py36_1 https://repo.continuum.io/pkgs/free
gflags 2.2.1 hf484d3e_0
glib 2.56.1 h000015b_0
glog 0.3.5 hf484d3e_1
graphite2 1.3.11 hf63cedd_1
gst-plugins-base 1.14.0 hbbd80ab_1
gstreamer 1.14.0 hb453b48_1
h5py 2.8.0 py36hca9c191_0
harfbuzz 1.7.6 h5f0a787_1
hdf5 1.8.18 h6792536_1
icu 58.2 h9c2bf20_1
idna 2.8 pypi_0 pypi
imageio 2.6.1 pypi_0 pypi
intel-openmp 2018.0.0 8
jasper 2.0.14 h07fcdf6_0
jpeg 9b h024ee3a_2
kiwisolver 1.0.1 py36h764f252_0
libedit 3.1 heed3624_0
libffi 3.2.1 hd88cf55_4
libgcc-ng 7.2.0 hdf63c60_3
libgfortran-ng 7.2.0 hdf63c60_3
libglu 9.0.0 h0c0bdc1_1
libopus 1.2.1 hb9ed12e_0
libpng 1.6.34 hb9fc6fc_0
libprotobuf 3.5.2 h6f1eeef_0
libstdcxx-ng 7.2.0 hdf63c60_3
libtiff 4.0.9 h28f6b97_0
libvpx 1.6.1 h888fd40_0
libxcb 1.13 h1bed415_1
libxml2 2.9.8 hf84eae3_0
matplotlib 2.2.2 py36h0e671d2_1
mkl 2018.0.2 1
mkl_fft 1.0.1 py36h3010b51_0
mkl_random 1.0.1 py36h629b387_0
mmcv 0.2.14 pypi_0 pypi
mmdet 0.6.0+47c8e78 dev_0 <develop>
ncurses 6.0 h9df7e31_2
networkx 2.4 pypi_0 pypi
ninja 1.8.2 py36h6bb024c_1
numpy 1.14.3 py36hcd700cb_1
numpy-base 1.14.3 py36h9be14a7_1
olefile 0.45.1 py36_0
opencv 3.3.1 py36h9248ab4_2
opencv-python 4.1.1.26 pypi_0 pypi
openssl 1.0.2p h14c3975_0
pandas 0.23.4 py36h04863e7_0
patsy 0.5.1 pypi_0 pypi
pcre 8.42 h439df22_0
pillow 5.1.0 py36h3deb7b8_0
pip 19.2.3 pypi_0 pypi
pixman 0.34.0 hceecf20_3
protobuf 3.5.2 py36_0 conda-forge
psutil 5.4.6 py36h14c3975_0 anaconda
pycocotools 2.0 pypi_0 pypi
pycparser 2.18 py36hf9f622e_1
pyparsing 2.2.0 py36hee85983_1
pyqt 5.9.2 py36h751905a_0
python 3.6.5 hc3d631a_2
python-dateutil 2.7.2 py36_0
pytorch 1.1.0 py3.6_cuda9.0.176_cudnn7.5.1_0 pytorch
pytz 2018.4 py36_0
pywavelets 1.1.1 pypi_0 pypi
pyyaml 3.12 py36hafb9ca4_1
qt 5.9.5 h7e424d6_0
readline 7.0 ha6073c6_4
requests 2.22.0 pypi_0 pypi
scikit-image 0.16.2 pypi_0 pypi
scikit-learn 0.19.1 py36h7aa7ec6_0
scipy 1.1.0 py36hfc37229_0
seaborn 0.9.0 pyh91ea838_1
setuptools 39.1.0 py36_0
sip 4.19.8 py36hf484d3e_0
six 1.11.0 py36h372c433_1
sqlite 3.23.1 he433501_0
statsmodels 0.10.1 pypi_0 pypi
tbb 2018.0.4 h6bb024c_1 anaconda
tbb4py 2018.0.4 py36h6bb024c_1 anaconda
tensorboardx 1.4 pypi_0 pypi
tk 8.6.7 hc745277_3
torchvision 0.3.0 py36_cu9.0.176_1 pytorch
tornado 5.0.2 py36_0
tqdm 4.23.0 py36_0
urllib3 1.25.7 pypi_0 pypi
wheel 0.31.0 py36_0
xz 5.2.3 h5e939de_4
yaml 0.1.7 had09818_2
zlib 1.2.11 ha838bed_2
Lots of unwanted packages as well, ignore them.
from pedestron.
Thank you , there is no error in the compilation.
l have created an independent conda environment.
I don not run mmmdetection framework on another project.
Can you provide a full environment file?
And i have a question that the num_classes are set to 2 in the config file. Why not set to 3 (person, mask and background)?
For the reason, Pedestron generates mask on the fly #13. Therefore, we are not really interested in the masks. You can change it if you want.
from pedestron.
Thank you, I have copied your environment and it has run successfully.
I want to know how to load your pre-training model and let it continue to train on crowdhuman dataset?
And can you tell me how much the MAP on crowdhuman dataset?
from pedestron.
In the config file, you have load_from = None
. Change None
to the model you want to load from, for example,load_from = 'my_path/epoch_1.pth'
. I think the published model has an AP around ~84.
from pedestron.
When I try to load your pre-trained model (epoch_19.pth.stu), it reported an error.
As follows:
OSError: epoch_19.pth is not a checkpoint file
from pedestron.
Yes, you actually need a teacher model not a student one (not .stu but .pth). Can you try simply making a copy of epoch_19.pth.stu and name it epoch_19.pth and pass this .pth model. See if it works.
from pedestron.
You are right!
Thank you for your patient reply!
from pedestron.
from pedestron.
what is the difference between teacher model and student model?
from pedestron.
In both models weights are saved slightly differently. It is a method that does some sort of smoothing of weights during training and they are helpful. You can read more about it in this NIPS paper
from pedestron.
What is the cause of this problem? How did you solve it? @muzishen
from pedestron.
According the arthor reply,Yes, you actually need a teacher model not a student one (not .stu but .pth). Can you try simply making a copy of epoch_19.pth.stu and name it epoch_19.pth and pass this .pth model. See if it works.
from pedestron.
Sorry, I don't think I made myself clear. I mean this problem ’RuntimeError: Expected object of scalar type Byte but got scalar type Bool for argument #2 'other'‘ @muzishen
from pedestron.
@yuanvq Actually this is an error that I have also reproduced locally. Most probably it is down to the environment (mmdet, mmcv torch etc), make sure versions are as suggested by the repo. Another issue is that, if you are also running mmdetection (newer versions) or any other repo that uses mmdetection and you do not have separate environments, you might also encounter this problem.
By the way did you manage to solve it ?
from pedestron.
Related Issues (20)
- Impact of mean_teacher on the training process HOT 1
- CSP pretrained weights HOT 1
- Reproduce resutls on Caltech dataset HOT 3
- Information regarding the training HOT 1
- Image_scale of Caltech while training HOT 6
- ImportError: /pedestron/tools/../mmdet/ops/dcn/deform_conv_cuda.cpython-36m-x86_64-linux-gnu.so: undefined symbol: _ZN6caffe26detail37_typeMetaDataInstance_preallocated_32E HOT 1
- RuntimeError: Expected cudaMemcpy(&mask_host[0], mask_dev, sizeof(unsigned long long) * boxes_num * col_blocks, cudaMemcpyDeviceToHost) == cudaSuccess to be true, but got false. (Could this error message be improved? If so, please report an enhancement request to PyTorch.) HOT 1
- Training citypersons with all the instances? HOT 1
- Welcome update to OpenMMLab 2.0
- Runtime error - Training with EuroCity persons HOT 1
- Training Caltech using CSP HOT 1
- What's ignore_other_vru in ECP evaluation? HOT 2
- result has no bbox HOT 5
- mmdet/ops/roi_align/src/roi_align_kernel.cu(145): error: identifier "THCudaCheck" is undefined HOT 1
- Update instruction for 2023 HOT 1
- custom dataset test HOT 1
- Evaluate different pre-trained model
- A request for advice on pedestrian tracking work. HOT 2
- a bug when converting crowdhuman to coco format
- Testing a CP trained model on ECP val dataset HOT 3
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from pedestron.