GithubHelp home page GithubHelp logo

Comments (5)

ujsyehao avatar ujsyehao commented on July 24, 2024

@kimys1084 Can you post your demo_webcam.py code?

from pelee.

kimys1084 avatar kimys1084 commented on July 24, 2024

from future import print_function
import caffe
from caffe.model_libs import *
from caffe import layers as L
from caffe import params as P
from caffe.model_libs import check_if_exist, make_if_not_exist, make_if_not_exist
#from feature_extractor import CreateMultiBoxHead
from caffe.proto import caffe_pb2

from google.protobuf import text_format
from feature_extractor import VGG_RUN, VGG_SSD, Pelee

import math
import os
import shutil
import stat
import subprocess
import sys

model_meta = {
'pelee':Pelee,
'ssd':VGG_SSD,
'run':VGG_RUN
}

Add extra layers on top of a "base" network (e.g. VGGNet or Inception)

def AddExtraLayers(net, use_batchnorm=True, lr_mult=1):
use_relu = True

# Add additional convolutional layers.
# 19 x 19
from_layer = net.keys()[-1]

# TODO(weiliu89): Construct the name using the last layer to avoid duplication.
# 10 x 10
out_layer = "conv6_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 1, 0, 1,
    lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv6_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 512, 3, 1, 2,
    lr_mult=lr_mult)

# 5 x 5
from_layer = out_layer
out_layer = "conv7_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
  lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv7_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 1, 2,
  lr_mult=lr_mult)

# 3 x 3
from_layer = out_layer
out_layer = "conv8_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
  lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv8_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
  lr_mult=lr_mult)

# 1 x 1
from_layer = out_layer
out_layer = "conv9_1"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 128, 1, 0, 1,
  lr_mult=lr_mult)

from_layer = out_layer
out_layer = "conv9_2"
ConvBNLayer(net, from_layer, out_layer, use_batchnorm, use_relu, 256, 3, 0, 1,
  lr_mult=lr_mult)

return net

Modify the following parameters accordingly

The directory which contains the caffe code.

We assume you are running the script at the CAFFE_ROOT.

caffe_root = os.getcwd()

Set true if you want to start training right after generating all files.

run_soon = True

The device id for webcam

webcam_id = 0

Number of frames to be skipped.

skip_frames = 0

The parameters for the webcam demo

Key parameters used in training

If true, use batch norm for all newly added layers.

Currently only the non batch norm version has been tested.

use_batchnorm = False
num_classes = 21
share_location = True
background_label_id=0
conf_loss_type = P.MultiBoxLoss.SOFTMAX
code_type = P.PriorBox.CENTER_SIZE
lr_mult = 1.

Stores LabelMapItem.

label_map_file = "data/VOC0712/labelmap_voc.prototxt"

The resized image size

resize_width = 304
resize_height = 304

Parameters needed for test.

Set the number of test iterations to the maximum integer number.

test_iter = int(math.pow(2, 29) - 1)

Use GPU or CPU

solver_mode = P.Solver.GPU

Defining which GPUs to use.

gpus = "0"

Number of frames to be processed per batch.

test_batch_size = 1

Only display high quality detections whose scores are higher than a threshold.

visualize_threshold = 0.4

Size of webcam image.

webcam_width = 640
webcam_height = 480

Scale the image size for display.

scale = 1.5

Hopefully you don't need to change the following

resize = "{}x{}".format(resize_width, resize_height)
video_data_param = {
'video_type': P.VideoData.WEBCAM,
'device_id': webcam_id,
'skip_frames': skip_frames,
}
test_transform_param = {
'mean_value': [103.94, 116.78, 123.68],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': resize_height,
'width': resize_width,
'interp_mode': [P.Resize.LINEAR],
}
}
output_transform_param = {
'mean_value': [104, 117, 123],
'resize_param': {
'prob': 1,
'resize_mode': P.Resize.WARP,
'height': int(webcam_height * scale),
'width': int(webcam_width * scale),
'interp_mode': [P.Resize.LINEAR],
},
}

parameters for generating detection output.

det_out_param = {
'num_classes': num_classes,
'share_location': share_location,
'background_label_id': background_label_id,
'nms_param': {'nms_threshold': 0.2, 'top_k': 400},
'save_output_param': {
'label_map_file': label_map_file,
},
'keep_top_k': 200,
'confidence_threshold': 0.01,
'code_type': code_type,
'visualize': True,
'visualize_threshold': visualize_threshold,
}

The job name should be same as the name used in examples/ssd/ssd_pascal.py.

job_name = "pelee_{}".format(resize)

The name of the model. Modify it if you want.

model_name = "{}".format(job_name)

Directory which stores the model .prototxt file.

save_dir = "models/pelee/VOC0712/{}_webcam".format(job_name)

Directory which stores the snapshot of trained models.

#snapshot_dir = "models/VGGNet/VOC0712/{}".format(job_name)
snapshot_dir = "examples/Pelee/models/pelee/VOC0712/{}".format(job_name)

Directory which stores the job script and log file.

job_dir = "jobs/VGGNet/VOC0712/{}_webcam".format(job_name)

model definition files.

test_net_file = "{}/test.prototxt".format(save_dir)

snapshot prefix.

#model_name = " peleenet_304X304"
snapshot_prefix = "{}/{}".format(snapshot_dir, model_name)

job script path.

job_file = "{}/{}.sh".format(job_dir, model_name)

Find most recent snapshot.

for file in os.listdir(snapshot_dir):
if file.endswith(".caffemodel"):
basename = os.path.splitext(file)[0]

The resume model.

pretrain_model = "{}.caffemodel".format(snapshot_prefix)

parameters for generating priors.

minimum dimension of input image

min_dim = 300

conv4_3 ==> 38 x 38

fc7 ==> 19 x 19

conv6_2 ==> 10 x 10

conv7_2 ==> 5 x 5

conv8_2 ==> 3 x 3

conv9_2 ==> 1 x 1

#mbox_source_layers = ['conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2']
NetBuilder = model_meta['pelee']
mbox_source_layers = NetBuilder.mbox_source_layers

in percent % pretrain_model

min_ratio = 20
max_ratio = 90
step = int(math.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - 2)))
min_sizes = []
max_sizes = []
for ratio in xrange(min_ratio, max_ratio + 1, step):
min_sizes.append(min_dim * ratio / 100.)
max_sizes.append(min_dim * (ratio + step) / 100.)
min_sizes = [min_dim * 10 / 100.] + min_sizes
max_sizes = [min_dim * 20 / 100.] + max_sizes
steps = [8, 16, 32, 64, 100, 300]

#aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
aspect_ratios = [[2,3], [2, 3], [2, 3], [2, 3], [2,3], [2,3]]

#normalizations = None

L2 normalize conv4_3.

normalizations = [20, -1, -1, -1, -1, -1]

variance used to encode/decode prior bboxes.

if code_type == P.PriorBox.CENTER_SIZE:
prior_variance = [0.1, 0.1, 0.2, 0.2]
else:
prior_variance = [0.1]
flip = True
clip = False

Check file.

check_if_exist(label_map_file)
check_if_exist(pretrain_model)
make_if_not_exist(save_dir)
make_if_not_exist(job_dir)
make_if_not_exist(snapshot_dir)

Create test net.

net = caffe.NetSpec()
net.data = L.VideoData(video_data_param=video_data_param,
data_param=dict(batch_size=test_batch_size),
transform_param=test_transform_param)
'''
VGGNetBody(net, from_layer='data', fully_conv=True, reduced=True, dilated=True,
dropout=False)
'''
#Pelee(net, from_layer = 'data', use_batchnorm = False)
NetBuilder(net, from_layer='data', use_batchnorm = False)

#AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

#'''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=use_batchnorm, min_sizes=min_sizes, max_sizes=max_sizes,
aspect_ratios=aspect_ratios, steps=steps, normalizations=normalizations,
num_classes=num_classes, share_location=share_location, flip=flip, clip=clip,
prior_variance=prior_variance, kernel_size=3, pad=1, lr_mult=lr_mult)
#'''
'''
mbox_layers = CreateMultiBoxHead(net, data_layer='data', from_layers=mbox_source_layers,
use_batchnorm=False, min_sizes=min_sizes, max_sizes=max_sizes, normalizations=normalizations,
aspect_ratios=aspect_ratios, num_classes=num_classes, share_location=share_location,
flip=flip, clip=clip, prior_variance=prior_variance, kernel_size=3, pad=1)
'''

conf_name = "mbox_conf"
if conf_loss_type == P.MultiBoxLoss.SOFTMAX:
reshape_name = "{}_reshape".format(conf_name)
net[reshape_name] = L.Reshape(net[conf_name], shape=dict(dim=[0, -1, num_classes]))
softmax_name = "{}_softmax".format(conf_name)
net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
flatten_name = "{}_flatten".format(conf_name)
net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
mbox_layers[1] = net[flatten_name]
elif conf_loss_type == P.MultiBoxLoss.LOGISTIC:
sigmoid_name = "{}_sigmoid".format(conf_name)
net[sigmoid_name] = L.Sigmoid(net[conf_name])
mbox_layers[1] = net[sigmoid_name]

mbox_layers.append(net.data)

net.detection_out = L.DetectionOutput(*mbox_layers,
detection_output_param=det_out_param,
transform_param=output_transform_param,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))
net.slience = L.Silence(net.detection_out, ntop=0,
include=dict(phase=caffe_pb2.Phase.Value('TEST')))

with open(test_net_file, 'w') as f:
print('name: "{}_test"'.format(model_name), file=f)
print(net.to_proto(), file=f)
shutil.copy(test_net_file, job_dir)

Create job file.

with open(job_file, 'w') as f:
f.write('cd {}\n'.format(caffe_root))
f.write('./build/tools/caffe test \\n')
f.write('--model="{}" \\n'.format(test_net_file))
f.write('--weights="{}" \\n'.format(pretrain_model))
f.write('--iterations="{}" \\n'.format(test_iter))
if solver_mode == P.Solver.GPU:
f.write('--gpu {}\n'.format(gpus))

Copy the python script to job_dir.

py_file = os.path.abspath(file)
shutil.copy(py_file, job_dir)

Run the job.

os.chmod(job_file, stat.S_IRWXU)
if run_soon:
subprocess.call(job_file, shell=True)

from pelee.

kimys1084 avatar kimys1084 commented on July 24, 2024

this is the whole code and I refered to this https://github.com/weiliu89/caffe/tree/ssd
ssd_pascal_webcam.py

thank you

from pelee.

ujsyehao avatar ujsyehao commented on July 24, 2024

I will check code @kimys1084, please wait!

from pelee.

cui-shaowei avatar cui-shaowei commented on July 24, 2024

@kimys1084 Can you be able to run webcam now? I am also debugging it.

from pelee.

Related Issues (20)

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.