Comments (5)
@kimys1084 Can you post your demo_webcam.py code?
from pelee.
from future import print_function
import caffe
from caffe.model_libs import *
from caffe import layers as L
from caffe import params as P
from caffe.model_libs import check_if_exist, make_if_not_exist, make_if_not_exist
#from feature_extractor import CreateMultiBoxHead
from caffe.proto import caffe_pb2
from google.protobuf import text_format
from feature_extractor import VGG_RUN, VGG_SSD, Pelee
import math
import os
import shutil
import stat
import subprocess
import sys
model_meta = {
'pelee':Pelee,
'ssd':VGG_SSD,
'run':VGG_RUN
}
Add extra layers on top of a "base" network (e.g. VGGNet or Inception)
def AddExtraLayers(net, use_batchnorm=True, lr_mult=1):
use_relu = True
# Add additional convolutional layers.
# 19 x 19
from_layer = net.keys()[-1]
# TODO(weiliu89): Construct the name using the last layer to avoid duplication.
# 10 x 10
out_layer = "conv6_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1,
lr_mult=lr_mult)
from_layer = out_layer
out_layer = "conv6_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2,
lr_mult=lr_mult)
# 5 x 5
from_layer = out_layer
out_layer = "conv7_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
lr_mult=lr_mult)
from_layer = out_layer
out_layer = "conv7_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2,
lr_mult=lr_mult)
# 3 x 3
from_layer = out_layer
out_layer = "conv8_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
lr_mult=lr_mult)
from_layer = out_layer
out_layer = "conv8_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
lr_mult=lr_mult)
# 1 x 1
from_layer = out_layer
out_layer = "conv9_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
lr_mult=lr_mult)
from_layer = out_layer
out_layer = "conv9_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
lr_mult=lr_mult)
return net
Modify the following parameters accordingly
The directory which contains the caffe code.
We assume you are running the script at the CAFFE_ROOT.
caffe_root = os.getcwd()
Set true if you want to start training right after generating all files.
run_soon = True
The device id for webcam
webcam_id = 0
Number of frames to be skipped.
skip_frames = 0
The parameters for the webcam demo
Key parameters used in training
If true, use batch norm for all newly added layers.
Currently only the non batch norm version has been tested.
use_batchnorm = False
num_classes = 21
share_location = True
background_label_id=0
conf_loss_type = P.MultiBoxLoss.SOFTMAX
code_type = P.PriorBox.CENTER_SIZE
lr_mult = 1.
Stores LabelMapItem.
label_map_file = "data/VOC0712/labelmap_voc.prototxt"
The resized image size
resize_width = 304
resize_height = 304
Parameters needed for test.
Set the number of test iterations to the maximum integer number.
test_iter = int(math.pow(2, 29) - 1)
Use GPU or CPU
solver_mode = P.Solver.GPU
Defining which GPUs to use.
gpus = "0"
Number of frames to be processed per batch.
test_batch_size = 1
Only display high quality detections whose scores are higher than a threshold.
visualize_threshold = 0.4
Size of webcam image.
webcam_width = 640
webcam_height = 480
Scale the image size for display.
scale = 1.5
Hopefully you don't need to change the following
resize = "{}x{}".format(resize_width, resize_height)
video_data_param = {
'video_type': P.VideoData.WEBCAM,
'device_id': webcam_id,
'skip_frames': skip_frames,
}
test_transform_param = {
'mean_value': [103.94, 116.78, 123.68],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': resize_height,
'width': resize_width,
'interp_mode': [P.Resize.LINEAR],
}
}
output_transform_param = {
'mean_value': [104, 117, 123],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': int(webcam_height * scale),
'width': int(webcam_width * scale),
'interp_mode': [P.Resize.LINEAR],
},
}
parameters for generating detection output.
det_out_param = {
'num_classes': num_classes,
'share_location': share_location,
'background_label_id': background_label_id,
'nms_param': {'nms_threshold': 0.2, 'top_k': 400},
'save_output_param': {
'label_map_file': label_map_file,
},
'keep_top_k': 200,
'confidence_threshold': 0.01,
'code_type': code_type,
'visualize': True,
'visualize_threshold': visualize_threshold,
}
The job name should be same as the name used in examples/ssd/ssd_pascal.py.
job_name = "pelee_{}".format(resize)
The name of the model. Modify it if you want.
model_name = "{}".format(job_name)
Directory which stores the model .prototxt file.
save_dir = "models/pelee/VOC0712/{}_webcam".format(job_name)
Directory which stores the snapshot of trained models.
#snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name)
snapshot_dir = "examples/Pelee/models/pelee/VOC0712/{}".format(job_name)
Directory which stores the job script and log file.
job_dir = "jobs/VGGNet/VOC0712/{}_webcam".format(job_name)
model definition files.
test_net_file = "{}/test.prototxt".format(save_dir)
snapshot prefix.
#model_name = " peleenet_304X304"
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)
job script path.
job_file = "{}/{}.sh".format(job_dir, model_name)
Find most recent snapshot.
for file in os.listdir(snapshot_dir):
if file.endswith(".caffemodel"):
basename = os.path.splitext(file)[0]
The resume model.
pretrain_model = "{}.caffemodel".format(snapshot_prefix)
parameters for generating priors.
minimum dimension of input image
min_dim = 300
conv4_3 ==> 38 x 38
fc7 ==> 19 x 19
conv6_2 ==> 10 x 10
conv7_2 ==> 5 x 5
conv8_2 ==> 3 x 3
conv9_2 ==> 1 x 1
#mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']
NetBuilder = model_meta['pelee']
mbox_source_layers = NetBuilder.mbox_source_layers
in percent % pretrain_model
min_ratio = 20
max_ratio = 90
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
min_sizes = []
max_sizes = []
for ratio in xrange(min_ratio, max_ratio + 1, step):
min_sizes.append(min_dim * ratio / 100.)
max_sizes.append(min_dim * (ratio + step) / 100.)
min_sizes = [min_dim * 10 / 100.] + min_sizes
max_sizes = [min_dim * 20 / 100.] + max_sizes
steps = [8, 16, 32, 64, 100, 300]
#aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
aspect_ratios = [[2,3], [2, 3], [2, 3], [2, 3], [2,3], [2,3]]
#normalizations = None
L2 normalize conv4_3.
normalizations = [20, -1, -1, -1, -1, -1]
variance used to encode/decode prior bboxes.
if code_type == P.PriorBox.CENTER_SIZE:
prior_variance = [0.1, 0.1, 0.2, 0.2]
else:
prior_variance = [0.1]
flip = True
clip = False
Check file.
check_if_exist(label_map_file)
check_if_exist(pretrain_model)
make_if_not_exist(save_dir)
make_if_not_exist(job_dir)
make_if_not_exist(snapshot_dir)
Create test net.
net = caffe.NetSpec()
net.data = L.VideoData(video_data_param=video_data_param,
data_param=dict(batch_size=test_batch_size),
transform_param=test_transform_param)
'''
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False)
'''
#Pelee(net, from_layer = 'data', use_batchnorm = False)
NetBuilder(net, from_layer='data', use_batchnorm = False)
#AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)
#'''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,
num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)
#'''
'''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=False, min_sizes=min_sizes, max_sizes=max_sizes, normalizations=normalizations,
aspect_ratios=aspect_ratios, num_classes=num_classes, share_location=share_location,
flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1)
'''
conf_name = "mbox_conf"
if conf_loss_type == P.MultiBoxLoss.SOFTMAX:
reshape_name = "{}_reshape".format(conf_name)
net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))
softmax_name = "{}_softmax".format(conf_name)
net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
flatten_name = "{}_flatten".format(conf_name)
net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
mbox_layers[1] = net[flatten_name]
elif conf_loss_type == P.MultiBoxLoss.LOGISTIC:
sigmoid_name = "{}_sigmoid".format(conf_name)
net[sigmoid_name] = L.Sigmoid(net[conf_name])
mbox_layers[1] = net[sigmoid_name]
mbox_layers.append(net.data)
net.detection_out = L.DetectionOutput(*mbox_layers,
detection_output_param=det_out_param,
transform_param=output_transform_param,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))
net.slience = L.Silence(net.detection_out, ntop=0,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))
with open(test_net_file, 'w') as f:
print('name: "{}_test"'.format(model_name), file=f)
print(net.to_proto(), file=f)
shutil.copy(test_net_file, job_dir)
Create job file.
with open(job_file, 'w') as f:
f.write('cd {}\n'.format(caffe_root))
f.write('./build/tools/caffe test \\n')
f.write('--model="{}" \\n'.format(test_net_file))
f.write('--weights="{}" \\n'.format(pretrain_model))
f.write('--iterations="{}" \\n'.format(test_iter))
if solver_mode == P.Solver.GPU:
f.write('--gpu {}\n'.format(gpus))
Copy the python script to job_dir.
py_file = os.path.abspath(file)
shutil.copy(py_file, job_dir)
Run the job.
os.chmod(job_file, stat.S_IRWXU)
if run_soon:
subprocess.call(job_file, shell=True)
from pelee.
this is the whole code and I refered to this https://github.com/weiliu89/caffe/tree/ssd
ssd_pascal_webcam.py
thank you
from pelee.
I will check code @kimys1084, please wait!
from pelee.
@kimys1084 Can you be able to run webcam now? I am also debugging it.
from pelee.
Related Issues (20)
- fine tuning with different number of classes HOT 1
- peele-SSD add_extra_layers_pelee
- About the stanford dog dataset. HOT 3
- 2-way dense layer in code and paper seems mismatch. HOT 2
- how can i get the fps=120 on nvidia tx2? please help me HOT 1
- Calculation of number of parameter, macc, and flops HOT 3
- pytorch pretained model
- max_iter
- Does it support 512 or bigger input size? HOT 1
- question for iteration HOT 1
- can not download the pretrained PeleeNet model
- one question
- Question for peleeNet structure
- How can I train my own model?
- train error
- peleeNet speed in GTX1080ti HOT 1
- Question about 1x1 convolutional kernels to reduce computational cost
- the paper was accepted two years ago???
- Pelee input resolution problem
- question about merge bn?
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from pelee.