Hi i am studying object detection. i read ur paper and now i am trying to test pelee +

<a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url="/us

this is the whole code and I refered to this <a href="https://github.com/weiliu89/caff

I will check code <a class="user-mention notranslate" data-hovercard-type="user" data-

<a class="user-mention notranslate" data-hovercard-type="user" data-hovercard-url="/us

Hi i want to test pelee + ssd using webcam about pelee HOT 5 OPEN

robert-junwang commented on July 24, 2024

Hi i want to test pelee + ssd using webcam

from pelee.

Comments (5)

ujsyehao commented on July 24, 2024

@kimys1084 Can you post your demo_webcam.py code?

from pelee.

kimys1084 commented on July 24, 2024

from future import print_function
import caffe
from caffe.model_libs import *
from caffe import layers as L
from caffe import params as P
from caffe.model_libs import check_if_exist, make_if_not_exist, make_if_not_exist
#from feature_extractor import CreateMultiBoxHead
from caffe.proto import caffe_pb2

from google.protobuf import text_format
from feature_extractor import VGG_RUN, VGG_SSD, Pelee

import math
import os
import shutil
import stat
import subprocess
import sys

model_meta = {
'pelee':Pelee,
'ssd':VGG_SSD,
'run':VGG_RUN
}

Add extra layers on top of a "base" network (e.g. VGGNet or Inception)

def AddExtraLayers(net, use_batchnorm=True, lr_mult=1):
use_relu = True

# Add additional convolutional layers.
# 19 x 19
from_layer = net.keys()[-1]

# TODO(weiliu89): Construct the name using the last layer to avoid duplication.
# 10 x 10
out_layer = "conv6_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1,
    lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv6_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2,
    lr_mult=lr_mult)

# 5 x 5
from_layer = out_layer
out_layer = "conv7_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
  lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv7_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2,
  lr_mult=lr_mult)

# 3 x 3
from_layer = out_layer
out_layer = "conv8_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
  lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv8_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
  lr_mult=lr_mult)

# 1 x 1
from_layer = out_layer
out_layer = "conv9_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
  lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv9_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
  lr_mult=lr_mult)

return net

Modify the following parameters accordingly

The directory which contains the caffe code.

We assume you are running the script at the CAFFE_ROOT.

caffe_root = os.getcwd()

Set true if you want to start training right after generating all files.

run_soon = True

The device id for webcam

webcam_id = 0

Number of frames to be skipped.

skip_frames = 0

The parameters for the webcam demo

Key parameters used in training

If true, use batch norm for all newly added layers.

Currently only the non batch norm version has been tested.

use_batchnorm = False
num_classes = 21
share_location = True
background_label_id=0
conf_loss_type = P.MultiBoxLoss.SOFTMAX
code_type = P.PriorBox.CENTER_SIZE
lr_mult = 1.

Stores LabelMapItem.

label_map_file = "data/VOC0712/labelmap_voc.prototxt"

The resized image size

resize_width = 304
resize_height = 304

Parameters needed for test.

Set the number of test iterations to the maximum integer number.

test_iter = int(math.pow(2, 29) - 1)

Use GPU or CPU

solver_mode = P.Solver.GPU

Defining which GPUs to use.

gpus = "0"

Number of frames to be processed per batch.

test_batch_size = 1

Only display high quality detections whose scores are higher than a threshold.

visualize_threshold = 0.4

Size of webcam image.

webcam_width = 640
webcam_height = 480

Scale the image size for display.

scale = 1.5

Hopefully you don't need to change the following

resize = "{}x{}".format(resize_width, resize_height)
video_data_param = {
'video_type': P.VideoData.WEBCAM,
'device_id': webcam_id,
'skip_frames': skip_frames,
}
test_transform_param = {
'mean_value': [103.94, 116.78, 123.68],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': resize_height,
'width': resize_width,
'interp_mode': [P.Resize.LINEAR],
}
}
output_transform_param = {
'mean_value': [104, 117, 123],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': int(webcam_height * scale),
'width': int(webcam_width * scale),
'interp_mode': [P.Resize.LINEAR],
},
}

parameters for generating detection output.

det_out_param = {
'num_classes': num_classes,
'share_location': share_location,
'background_label_id': background_label_id,
'nms_param': {'nms_threshold': 0.2, 'top_k': 400},
'save_output_param': {
'label_map_file': label_map_file,
},
'keep_top_k': 200,
'confidence_threshold': 0.01,
'code_type': code_type,
'visualize': True,
'visualize_threshold': visualize_threshold,
}

The job name should be same as the name used in examples/ssd/ssd_pascal.py.

job_name = "pelee_{}".format(resize)

The name of the model. Modify it if you want.

model_name = "{}".format(job_name)

Directory which stores the model .prototxt file.

save_dir = "models/pelee/VOC0712/{}_webcam".format(job_name)

Directory which stores the snapshot of trained models.

#snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name)
snapshot_dir = "examples/Pelee/models/pelee/VOC0712/{}".format(job_name)

Directory which stores the job script and log file.

job_dir = "jobs/VGGNet/VOC0712/{}_webcam".format(job_name)

model definition files.

test_net_file = "{}/test.prototxt".format(save_dir)

snapshot prefix.

#model_name = " peleenet_304X304"
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)

job script path.

job_file = "{}/{}.sh".format(job_dir, model_name)

Find most recent snapshot.

for file in os.listdir(snapshot_dir):
if file.endswith(".caffemodel"):
basename = os.path.splitext(file)[0]

The resume model.

pretrain_model = "{}.caffemodel".format(snapshot_prefix)

parameters for generating priors.

minimum dimension of input image

min_dim = 300

conv4_3 ==> 38 x 38

fc7 ==> 19 x 19

conv6_2 ==> 10 x 10

conv7_2 ==> 5 x 5

conv8_2 ==> 3 x 3

conv9_2 ==> 1 x 1

#mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']
NetBuilder = model_meta['pelee']
mbox_source_layers = NetBuilder.mbox_source_layers

in percent % pretrain_model

min_ratio = 20
max_ratio = 90
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
min_sizes = []
max_sizes = []
for ratio in xrange(min_ratio, max_ratio + 1, step):
min_sizes.append(min_dim * ratio / 100.)
max_sizes.append(min_dim * (ratio + step) / 100.)
min_sizes = [min_dim * 10 / 100.] + min_sizes
max_sizes = [min_dim * 20 / 100.] + max_sizes
steps = [8, 16, 32, 64, 100, 300]

#aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
aspect_ratios = [[2,3], [2, 3], [2, 3], [2, 3], [2,3], [2,3]]

#normalizations = None

L2 normalize conv4_3.

normalizations = [20, -1, -1, -1, -1, -1]

variance used to encode/decode prior bboxes.

if code_type == P.PriorBox.CENTER_SIZE:
prior_variance = [0.1, 0.1, 0.2, 0.2]
else:
prior_variance = [0.1]
flip = True
clip = False

Check file.

check_if_exist(label_map_file)
check_if_exist(pretrain_model)
make_if_not_exist(save_dir)
make_if_not_exist(job_dir)
make_if_not_exist(snapshot_dir)

Create test net.

net = caffe.NetSpec()
net.data = L.VideoData(video_data_param=video_data_param,
data_param=dict(batch_size=test_batch_size),
transform_param=test_transform_param)
'''
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False)
'''
#Pelee(net, from_layer = 'data', use_batchnorm = False)
NetBuilder(net, from_layer='data', use_batchnorm = False)

#AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

#'''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,
num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)
#'''
'''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=False, min_sizes=min_sizes, max_sizes=max_sizes, normalizations=normalizations,
aspect_ratios=aspect_ratios, num_classes=num_classes, share_location=share_location,
flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1)
'''

conf_name = "mbox_conf"
if conf_loss_type == P.MultiBoxLoss.SOFTMAX:
reshape_name = "{}_reshape".format(conf_name)
net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))
softmax_name = "{}_softmax".format(conf_name)
net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
flatten_name = "{}_flatten".format(conf_name)
net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
mbox_layers[1] = net[flatten_name]
elif conf_loss_type == P.MultiBoxLoss.LOGISTIC:
sigmoid_name = "{}_sigmoid".format(conf_name)
net[sigmoid_name] = L.Sigmoid(net[conf_name])
mbox_layers[1] = net[sigmoid_name]

mbox_layers.append(net.data)

net.detection_out = L.DetectionOutput(*mbox_layers,
detection_output_param=det_out_param,
transform_param=output_transform_param,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))
net.slience = L.Silence(net.detection_out, ntop=0,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))

with open(test_net_file, 'w') as f:
print('name: "{}_test"'.format(model_name), file=f)
print(net.to_proto(), file=f)
shutil.copy(test_net_file, job_dir)

Create job file.

with open(job_file, 'w') as f:
f.write('cd {}\n'.format(caffe_root))
f.write('./build/tools/caffe test \\n')
f.write('--model="{}" \\n'.format(test_net_file))
f.write('--weights="{}" \\n'.format(pretrain_model))
f.write('--iterations="{}" \\n'.format(test_iter))
if solver_mode == P.Solver.GPU:
f.write('--gpu {}\n'.format(gpus))

Copy the python script to job_dir.

py_file = os.path.abspath(file)
shutil.copy(py_file, job_dir)

Run the job.

os.chmod(job_file, stat.S_IRWXU)
if run_soon:
subprocess.call(job_file, shell=True)

from pelee.

kimys1084 commented on July 24, 2024

this is the whole code and I refered to this https://github.com/weiliu89/caffe/tree/ssd
ssd_pascal_webcam.py

thank you

from pelee.

ujsyehao commented on July 24, 2024

I will check code @kimys1084, please wait!

from pelee.

cui-shaowei commented on July 24, 2024

@kimys1084 Can you be able to run webcam now? I am also debugging it.

from pelee.

Comments (5)

Add extra layers on top of a "base" network (e.g. VGGNet or Inception)

Modify the following parameters accordingly

The directory which contains the caffe code.

We assume you are running the script at the CAFFE_ROOT.

Set true if you want to start training right after generating all files.

The device id for webcam

Number of frames to be skipped.

The parameters for the webcam demo

Key parameters used in training

If true, use batch norm for all newly added layers.

Currently only the non batch norm version has been tested.

Stores LabelMapItem.

The resized image size

Parameters needed for test.

Set the number of test iterations to the maximum integer number.

Use GPU or CPU

Defining which GPUs to use.

Number of frames to be processed per batch.

Only display high quality detections whose scores are higher than a threshold.

Size of webcam image.

Scale the image size for display.

Hopefully you don't need to change the following

parameters for generating detection output.

The job name should be same as the name used in examples/ssd/ssd_pascal.py.

The name of the model. Modify it if you want.

Directory which stores the model .prototxt file.

Directory which stores the snapshot of trained models.

Directory which stores the job script and log file.

model definition files.

snapshot prefix.

job script path.

Find most recent snapshot.

The resume model.

parameters for generating priors.

minimum dimension of input image

conv4_3 ==> 38 x 38

fc7 ==> 19 x 19

conv6_2 ==> 10 x 10

conv7_2 ==> 5 x 5

conv8_2 ==> 3 x 3

conv9_2 ==> 1 x 1

in percent % pretrain_model

L2 normalize conv4_3.

variance used to encode/decode prior bboxes.

Check file.

Create test net.

Create job file.

Copy the python script to job_dir.

Run the job.

Related Issues (20)

Recommend Projects

Recommend Topics

Recommend Org

Jobs