cysu / open-reid Goto Github PK
View Code? Open in Web Editor NEWOpen source person re-identification library in python
Home Page: https://cysu.github.io/open-reid/
License: MIT License
Open source person re-identification library in python
Home Page: https://cysu.github.io/open-reid/
License: MIT License
Hi, Tong Xiao. I found that your splitting of the evaluation set of Market1501 is different from the standard manner.
In your splits, the statistics look as follows:
Market1501 dataset loaded
subset | # ids | # images
---------------------------
train | 651 | 11387
val | 100 | 1549
trainval | 751 | 12936
query | 750 | 16483
gallery | 751 | 19281
While the standard one looks like:
Market1501 dataset loaded
subset | # ids | # images
---------------------------
train | 651 | 11387
val | 100 | 1549
trainval | 751 | 12936
query | 750 | 3368
gallery | 751 | 15913
The difference resides in the number of query and gallery images. In your partitioning, for each query id, the query set gathers all its images. While in the standard partitioning, for each query id, only some selected images (at most one image for one camera) are placed in the query set.
It makes some difference in the evaluation results. In the triplet loss example, using the same trained model, the open-reid way of partitioning gives results:
Mean AP: 67.6%
CMC Scores allshots cuhk03 market1501
top-1 42.7% 70.1% 84.5%
top-5 59.3% 91.0% 94.0%
top-10 67.2% 95.0% 96.5%
while on the standard eval splits, it gives:
Mean AP: 66.5%
CMC Scores allshots cuhk03 market1501
top-1 44.0% 70.0% 82.5%
top-5 61.9% 91.1% 93.0%
top-10 69.9% 95.1% 95.7%
May I ask whether it's just your original intention or it's a little mistake?
After diving into your code, I made some modification to the reid/datasets/market1501.py
and let it do the standard splitting:
from __future__ import print_function, absolute_import
import os.path as osp
import numpy as np
from ..utils.data import Dataset
from ..utils.osutils import mkdir_if_missing
from ..utils.serialization import read_json
from ..utils.serialization import write_json
########################
# Added
def _pluck(identities, indices, relabel=False):
"""Extract im names of given pids.
Args:
identities: containing im names
indices: pids
relabel: whether to transform pids to classification labels
"""
ret = []
for index, pid in enumerate(indices):
pid_images = identities[pid]
for camid, cam_images in enumerate(pid_images):
for fname in cam_images:
name = osp.splitext(fname)[0]
x, y, _ = map(int, name.split('_'))
assert pid == x and camid == y
if relabel:
ret.append((fname, index, camid))
else:
ret.append((fname, pid, camid))
return ret
########################
class Market1501(Dataset):
url = 'https://drive.google.com/file/d/0B8-rUzbwVRk0c054eEozWG9COHM/view'
md5 = '65005ab7d12ec1c44de4eeafe813e68a'
def __init__(self, root, split_id=0, num_val=100, download=True):
super(Market1501, self).__init__(root, split_id=split_id)
if download:
self.download()
if not self._check_integrity():
raise RuntimeError("Dataset not found or corrupted. " +
"You can use download=True to download it.")
self.load(num_val)
def download(self):
if self._check_integrity():
print("Files already downloaded and verified")
return
import re
import hashlib
import shutil
from glob import glob
from zipfile import ZipFile
raw_dir = osp.join(self.root, 'raw')
mkdir_if_missing(raw_dir)
# Download the raw zip file
fpath = osp.join(raw_dir, 'Market-1501-v15.09.15.zip')
if osp.isfile(fpath) and \
hashlib.md5(open(fpath, 'rb').read()).hexdigest() == self.md5:
print("Using downloaded file: " + fpath)
else:
raise RuntimeError("Please download the dataset manually from {} "
"to {}".format(self.url, fpath))
# Extract the file
exdir = osp.join(raw_dir, 'Market-1501-v15.09.15')
if not osp.isdir(exdir):
print("Extracting zip file")
with ZipFile(fpath) as z:
z.extractall(path=raw_dir)
# Format
images_dir = osp.join(self.root, 'images')
mkdir_if_missing(images_dir)
# 1501 identities (+1 for background) with 6 camera views each
identities = [[[] for _ in range(6)] for _ in range(1502)]
def register(subdir, pattern=re.compile(r'([-\d]+)_c(\d)')):
fnames = [] ######### Added. Names of images in new dir.
fpaths = sorted(glob(osp.join(exdir, subdir, '*.jpg')))
pids = set()
for fpath in fpaths:
fname = osp.basename(fpath)
pid, cam = map(int, pattern.search(fname).groups())
if pid == -1: continue # junk images are just ignored
assert 0 <= pid <= 1501 # pid == 0 means background
assert 1 <= cam <= 6
cam -= 1
pids.add(pid)
fname = ('{:08d}_{:02d}_{:04d}.jpg'
.format(pid, cam, len(identities[pid][cam])))
identities[pid][cam].append(fname)
shutil.copy(fpath, osp.join(images_dir, fname))
fnames.append(fname) ######### Added
return pids, fnames
trainval_pids, _ = register('bounding_box_train')
gallery_pids, gallery_fnames = register('bounding_box_test')
query_pids, query_fnames = register('query')
assert query_pids <= gallery_pids
assert trainval_pids.isdisjoint(gallery_pids)
# Save meta information into a json file
meta = {'name': 'Market1501', 'shot': 'multiple', 'num_cameras': 6,
'identities': identities,
'query_fnames': query_fnames, ######### Added
'gallery_fnames': gallery_fnames} ######### Added
write_json(meta, osp.join(self.root, 'meta.json'))
# Save the only training / test split
splits = [{
'trainval': sorted(list(trainval_pids)),
'query': sorted(list(query_pids)),
'gallery': sorted(list(gallery_pids))}]
write_json(splits, osp.join(self.root, 'splits.json'))
########################
# Added
def load(self, num_val=0.3, verbose=True):
splits = read_json(osp.join(self.root, 'splits.json'))
if self.split_id >= len(splits):
raise ValueError("split_id exceeds total splits {}"
.format(len(splits)))
self.split = splits[self.split_id]
# Randomly split train / val
trainval_pids = np.asarray(self.split['trainval'])
np.random.shuffle(trainval_pids)
num = len(trainval_pids)
if isinstance(num_val, float):
num_val = int(round(num * num_val))
if num_val >= num or num_val < 0:
raise ValueError("num_val exceeds total identities {}"
.format(num))
train_pids = sorted(trainval_pids[:-num_val])
val_pids = sorted(trainval_pids[-num_val:])
self.meta = read_json(osp.join(self.root, 'meta.json'))
identities = self.meta['identities']
self.train = _pluck(identities, train_pids, relabel=True)
self.val = _pluck(identities, val_pids, relabel=True)
self.trainval = _pluck(identities, trainval_pids, relabel=True)
self.num_train_ids = len(train_pids)
self.num_val_ids = len(val_pids)
self.num_trainval_ids = len(trainval_pids)
##########
# Added
query_fnames = self.meta['query_fnames']
gallery_fnames = self.meta['gallery_fnames']
self.query = []
for fname in query_fnames:
name = osp.splitext(fname)[0]
pid, cam, _ = map(int, name.split('_'))
self.query.append((fname, pid, cam))
self.gallery = []
for fname in gallery_fnames:
name = osp.splitext(fname)[0]
pid, cam, _ = map(int, name.split('_'))
self.gallery.append((fname, pid, cam))
##########
if verbose:
print(self.__class__.__name__, "dataset loaded")
print(" subset | # ids | # images")
print(" ---------------------------")
print(" train | {:5d} | {:8d}"
.format(self.num_train_ids, len(self.train)))
print(" val | {:5d} | {:8d}"
.format(self.num_val_ids, len(self.val)))
print(" trainval | {:5d} | {:8d}"
.format(self.num_trainval_ids, len(self.trainval)))
print(" query | {:5d} | {:8d}"
.format(len(self.split['query']), len(self.query)))
print(" gallery | {:5d} | {:8d}"
.format(len(self.split['gallery']), len(self.gallery)))
########################
If it's your original intention, just ignore my issue, and IMHO, you may add some notes in the doc.
Thank you again for your code!
Hey,
I just downloaded the ran the code today:
python examples/triplet_loss.py -d market1501 -a resnet50 --combine-trainval --logs-dir examples/logs/triplet-loss/market1501-resnet50 -b 64
The only change I made is batch size because I have only 1 GPU, however the result right now is quite different from what's reported on the benchmark websites:
Mean AP: 78.6%
CMC Scores allshots cuhk03 market1501
top-1 57.0% 78.1% 87.3%
top-5 72.5% 97.0% 95.4%
top-10 79.8% 98.7% 97.9%
Hi, Tong Xiao, very grateful for your complete and self-contained ReID library. I have a small question to consult you about. I failed to reproduce the performance using triplet loss on Market1501.
I think my script is effectively the same as provided in your github.io page:
CUDA_VISIBLE_DEVICES=0,1,2,3 python examples/triplet_loss.py -d market1501 -a resnet50 --combine-trainval --logs-dir examples/logs/triplet-loss/market1501-resnet50-4gpu-batch-size-256 -b 256 --print-freq 20
The performance it achieved is quite worse than reported on your github.io page:
Mean AP: 60.3%
CMC Scores allshots cuhk03 market1501
top-1 34.8% 63.1% 80.4%
top-5 51.4% 87.4% 92.1%
top-10 59.7% 92.7% 95.1%
Do you have anything not updated on the github.io page? Waiting for your kind response and thank you very much.
Anyone else who has run this code and notices this issue is also highly appreciated to share your results.
I've noticed that there're some import statements in reid.metric_learning.euclidean
as well as in reid.metric_learning.kissme
and reid.metric_learning
that imports the module metric_learn
or metric_learn.base_metric
.
However, I fail to locate this module in your package. Does this belong to some reference libraries I overlooked?
Hi!
I want to know if you can provide the trained module or weights. I'm a new beginner here, and it's too hard for me to train these models from the scratch due to personal computer's bad computing performance and small memory. Still, I wanna evaluate these models performance .Thus, I'll appreciate it if you can provide your best trained checkpoint file of that three different loss function.
Thanks, anyway.
CMC is currently taking a long time to compute, so long that for me it takes longer on dukemtmc to do validation than to calculate the epoch.
I noticed that CMC is currently single-threaded, is it possible to make it multi-threaded?
e.g. can I pool this loop:
for i in range(m):
# Filter out the same id and same camera
valid = ((gallery_ids[indices[i]] != query_ids[i]) |
(gallery_cams[indices[i]] != query_cams[i]))
if separate_camera_set:
# Filter out samples from same camera
valid &= (gallery_cams[indices[i]] != query_cams[i])
if not np.any(matches[i, valid]):
continue
if single_gallery_shot:
repeat = 10
gids = gallery_ids[indices[i][valid]]
inds = np.where(valid)[0]
ids_dict = defaultdict(list)
for j, x in zip(inds, gids):
ids_dict[x].append(j)
else:
repeat = 1
for _ in range(repeat):
if single_gallery_shot:
# Randomly choose one instance for each id
sampled = (valid & _unique_sample(ids_dict, len(valid)))
index = np.nonzero(matches[i, sampled])[0]
else:
index = np.nonzero(matches[i, valid])[0]
delta = 1. / (len(index) * repeat)
for j, k in enumerate(index):
if k - j >= topk:
break
if first_match_break:
ret[k - j] += 1
break
ret[k - j] += delta
num_valid_queries += 1
?
Hi @Cysu
I got triplet_loss error:
open-reid$ python examples/triplet_loss.py -d cuhk03 -b 64 -j 2 -a resnet50 --logs-dir logs/triplet-loss/cuhk03-resnet50
Files already downloaded and verified
CUHK03 dataset loaded
subset | # ids | # images
train | 1267 | 24345
val | 100 | 1918
trainval | 1367 | 26263
query | 100 | 1930
gallery | 100 | 1930
Traceback (most recent call last):
File "examples/triplet_loss.py", line 217, in
main(parser.parse_args())
File "examples/triplet_loss.py", line 145, in main
trainer.train(epoch, train_loader, optimizer)
File "open-reid/reid/trainers.py", line 31, in train
loss, prec1 = self._forward(inputs, targets)
File "open-reid/reid/trainers.py", line 80, in _forward
loss, prec = self.criterion(outputs, targets)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 224, in call
result = self.forward(*input, **kwargs)
File "open-reid/reid/loss/triplet.py", line 26, in forward
dist_an.append(dist[i][1 - mask[i]].min())
File "/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py", line 826, in rsub
return SubConstant.apply(other, self)
File "/usr/local/lib/python2.7/dist-packages/torch/autograd/functions/basic_ops.py", line 130, in forward
return tensor.neg().add(constant)
AttributeError: 'torch.cuda.ByteTensor' object has no attribute 'neg'
how to compute CMC and rank?
When installing PyTorch, I am getting this error. They recommend to install with miniconda.
Processing torch-0.1.2.post1.tar.gz
Writing /tmp/easy_install-d9jpUw/torch-0.1.2.post1/setup.cfg
Running torch-0.1.2.post1/setup.py -q bdist_egg --dist-dir /tmp/easy_install-d9jpUw/torch-0.1.2.post1/egg-dist-tmp-xjdlXA
Traceback (most recent call last):
File "setup.py", line 22, in <module>
......
RuntimeError: PyTorch does not currently provide packages for PyPI (see status at https://github.com/pytorch/pytorch/issues/566).
Please follow the instructions at http://pytorch.org/ to install with miniconda instead.
I installed it using pip3 with CUDA 8 on Ubuntu 16, according to their official instructions (and then all remaining dependencies). Now, I am getting
No module named reid.
My dataset doesn't have camera lable. In order to satisfy the format of dataset, I assume all of them are from same camera (camera 0). And no problem happened during training. But error happened at evaluating, File "examples/softmax_loss.py", line 150, in main top1 = evaluator.evaluate(val_loader, dataset.val, dataset.val) File "/workspace/gjc/anaconda2/envs/py35/lib/python3.5/site-packages/open_reid-0.2.0-py3.5.egg/reid/evaluators.py", line 120, in evaluate File "/workspace/gjc/anaconda2/envs/py35/lib/python3.5/site-packages/open_reid-0.2.0-py3.5.egg/reid/evaluators.py", line 98, in evaluate_all File "/workspace/gjc/anaconda2/envs/py35/lib/python3.5/site-packages/open_reid-0.2.0-py3.5.egg/reid/evaluators.py", line 98, in <dictcomp> File "/workspace/gjc/anaconda2/envs/py35/lib/python3.5/site-packages/open_reid-0.2.0-py3.5.egg/reid/evaluation_metrics/ranking.py", line 78, in cmc RuntimeError: No valid query
To simplify, I have deleted code about mAP and cuhk03.(Same error happened when I haven't modified code)
Since I don't have camera lable, how should I evaluate the model in such situation? May anyone help me?
@Cysu @zydou
I am installing open-reid on MacOS by running setup.py install.
I have the following:
System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/numpy/core/include/numpy/npy_1_7_deprecated_api.h:15:2: warning:
"Using deprecated NumPy API, disable it by " "#defining NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION"
[-W#warnings]
#warning "Using deprecated NumPy API, disable it by " \
^
In file included from /tmp/easy_install-TjMlGE/h5py-2.7.1/h5py/defs.c:569:
./h5py/api_compat.h:27:10: fatal error: 'hdf5.h' file not found
#include "hdf5.h"
^~~~~~~~
1 warning and 1 error generated.
error: Setup script exited with error: command 'cc' failed with exit status 1
Do you know if cuhk01-03 datasets overlap? I have tried to check the papers, but they couldn't figure it out.
Hi, your library is very convenient to use and I have a question,
I use market1501 dataset to train a resnet50. Is it able to fine-tune the same model using cuhk03 ?
Thanks,
Great library!
The implementation for RandomPairSampler is missing from utils/data/sampler.py. Will it be implemented any time soon? It sounds like something useful for loading input data for a verification task.
I'd love to help out. Are there any ways we can contribute to Open-Reid?
Hello!
I have a few questions, may be you could tell me a simple solution and I wouldnt have to invent a bike...
If you have any answers I'll be happy to get them as a help in my research. Also if you have any suggestions how to train it for maximal precision - it would be just great (I've seen your examples, but cannot get precision higher that 80% - I have only one gtx 1060 and cannot repeat experiments with batch = 256)
How can I calculate Accuracy for multiple queries?
Hi, recently, the pytorch has updated and the code does not run smoothly in the new pytorch version. The following error occurs, could you please update a new version of the code ?
Traceback (most recent call last):
File "examples/triplet_loss.py", line 217, in
main(parser.parse_args())
File "examples/triplet_loss.py", line 145, in main
trainer.train(epoch, train_loader, optimizer)
File "build/bdist.linux-x86_64/egg/reid/trainers.py", line 31, in train
File "build/bdist.linux-x86_64/egg/reid/trainers.py", line 80, in _forward
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 224, in call
result = self.forward(*input, **kwargs)
File "build/bdist.linux-x86_64/egg/reid/loss/triplet.py", line 26, in forward
File "/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py", line 826, in rsub
return SubConstant.apply(other, self)
File "/usr/local/lib/python2.7/dist-packages/torch/autograd/functions/basic_ops.py", line 130, in forward
return tensor.neg().add(constant)
AttributeError: 'torch.cuda.ByteTensor' object has no attribute 'neg'
Hi!
I have successfully downloaded 'Market-1501-v15.09.15.zip' from google drive, but the url of 'Duke.tar.gz' in the code showed 'Error 404'. Any help?
By the way, I have downloaded a file named 'DukeMTMC_reID.zip' before. Is it same to 'Duke.tar.gz'?
Thanks!
Hi there~
I see that the OIM Loss is a little different to that described in the paper. This implementation omitted the Circular Queue(CQ) part. May I ask what's the consideration behind this change?
Thank you.
Hi, Tong Xiao.
I find that sklearn.metrics.average_precision_score
has changed its behavior since version 0.19
. Previous versions (I have only tested 0.18.1) generate mAP identical to the code of Market1501, while newer versions (I have only tested 0.19.1) generate higher mAP.
I provide a test case for this, link.
Thank you!
Hello.
I find evaluation process a little bit confusing. I used the script author provided for network training:
python examples/triplet_loss.py -d cuhk03 -b 32 -a resnet50 --logs-dir logs/triplet_loss/cuhk03-resnet50
I downloaded cuhk03 dataset, so training process went well and produced evaluation results for allshots, cuhk03 and market1501, which I didn't download. How come?
Hi, Tong Xiao, Thank you for your great work. There is a question about evaluation as in title. What is the difference between first_match_break==True and first_match_break==False?
Thank you very much!
Hi!
"LinAlgError: Matrix is not positive definite" occurred when training when set 'dist-metric' to 'kissme'. Any help? Thanks!
I can not find any other accessible website to download cuhk dataset.I wonder if you can offer a baiduyun link.Thanks!
Hello!
First, I would like to thank you, your system works pretty good and its a great job you are doing!
My main question is... is there a simple way to convert weights to tensorflow (or keras)? I use the resnet50 in your system, and layers that you had added are complicating the convertation - no open-source tools can deal with it (at least I couldnt find the one who does). And I am a bit scared to do that manually, because I am a beginner in deep learning, and have no any experience in torch also...
Also, I cannot improve performance by using deeper resnets. resnet50 show best result, do you think it can be due to insufficiency of training data?
Do you know how to use the loss that oim.py and triplet.py
thank you
Market 1501 evaluation protocol uses fixed 3368 high-quality images as query. It is because we can easily draw high quality bounding box for query pedestrian.
In this sense, reis.datasets.market1501
did not follow the official evaluation protocol.
def register(subdir, pattern=re.compile(r'([-\d]+)_c(\d)')):
fpaths = sorted(glob(osp.join(exdir, subdir, '*.jpg')))
pids = set()
for fpath in fpaths:
fname = osp.basename(fpath)
pid, cam = map(int, pattern.search(fname).groups())
if pid == -1: continue # junk images are just ignored
assert 0 <= pid <= 1501 # pid == 0 means background
assert 1 <= cam <= 6
cam -= 1
pids.add(pid)
fname = ('{:08d}_{:02d}_{:04d}.jpg'
.format(pid, cam, len(identities[pid][cam])))
identities[pid][cam].append(fname)
shutil.copy(fpath, osp.join(images_dir, fname))
return pids
trainval_pids = register('bounding_box_train')
gallery_pids = register('bounding_box_test')
query_pids = register('query')
This may explain why the demo code python examples/triplet_loss.py -d market1501
only has 67.9 mAP much lower than soa. For an visual case,
hi,first thanks for this toolkit for re-id. There is a problem when I run the triplet-loss for market-1501,
python examples/resnet.py -d market1501 -b 64 -j 2 --loss triplet --logs-dir logs/resnet-triplet-test2 --num-instances 4 --optimizer adam
I only get 2.6% top1 accuracy. I find, finally the difference of hard-negative and hard-positive Almost goes to zero, that's not what we want. Did I have some operational errors? Or the code need to be modified?
Hi, @Cysu !
I think your work is wonderful, and this repository really helps a lot in person reid! But I have a few question.
First, I think all your metric is based on Euclidean distance, which is obvious in the function "pairwise_distance" in .reid.evaluator. But as far as I know, cos similarity is also a popular metric.
Second, I noticed that your feature extracted by resnet is the logit calculated by Resnet.classifier, which confused me a lot since I think the feature should be the output of the avgpool layer, which I think is consistent with other author's work. By the way, I noticed the "cut_at_pooling" option in Resnet, which I think is the solution to this problem but should be after
x = F.avg_pool2d(x, x.size()[2:]) x = x.view(x.size(0), -1)
instead of before them. I'm really interested in your work, hoping your reply!
Hi,
I have 3 questions here
batch size ( Rank1 )
64 ( 77.4% )
96 ( 79.5% )
108 ( 79.8% )
Is this the expected results? Running a batch size of 256 will get me ~84% rank 1?
Also, there seems no different in the result whether image normalization is used or not. True?
I have memory leakage problem. Maximum epoch run before OS terminate the program is about 18-20, depending on batch size. Not sure if this is a problem of running Pytorch under Windows 10, or python code issues?
Thanks
Hi all
After I executed the command
python examples/resnet.py -d viper -b 64 -j 2 --loss oim --logs-dir logs/resnet-viper-oim
I encountered the following errors:
Process Process-4:
Traceback (most recent call last):
File "/root/miniconda2/lib/python2.7/multiprocessing/process.py", line 258, in _bootstrap
self.run()
File "/root/miniconda2/lib/python2.7/multiprocessing/process.py", line 114, in run
self._target(*self._args, **self._kwargs)
File "/root/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 45, in _worker_loop
data_queue.put((idx, samples))
File "/root/miniconda2/lib/python2.7/multiprocessing/queues.py", line 392, in put
return send(obj)
File "/root/miniconda2/lib/python2.7/site-packages/torch/multiprocessing/queue.py", line 17, in send
ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(obj)
File "/root/miniconda2/lib/python2.7/pickle.py", line 224, in dump
self.save(obj)
File "/root/miniconda2/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/root/miniconda2/lib/python2.7/pickle.py", line 554, in save_tuple
save(element)
File "/root/miniconda2/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/root/miniconda2/lib/python2.7/pickle.py", line 606, in save_list
self._batch_appends(iter(obj))
File "/root/miniconda2/lib/python2.7/pickle.py", line 639, in _batch_appends
save(x)
File "/root/miniconda2/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/root/miniconda2/lib/python2.7/multiprocessing/forking.py", line 67, in dispatcher
self.save_reduce(obj=obj, *rv)
File "/root/miniconda2/lib/python2.7/pickle.py", line 401, in save_reduce
save(args)
File "/root/miniconda2/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/root/miniconda2/lib/python2.7/pickle.py", line 554, in save_tuple
save(element)
File "/root/miniconda2/lib/python2.7/pickle.py", line 286, in save
f(self, obj) # Call unbound method with explicit self
File "/root/miniconda2/lib/python2.7/multiprocessing/forking.py", line 66, in dispatcher
rv = reduce(obj)
File "/root/miniconda2/lib/python2.7/site-packages/torch/multiprocessing/reductions.py", line 113, in reduce_storage
fd, size = storage.share_fd()
RuntimeError: unable to write to file </torch_29225_1654046705> at /py/conda-bld/pytorch_1493669264383/work/torch/lib/TH/THAllocator.c:267
When switch to the xentropy loss with
python examples/resnet.py -d viper -b 64 -j 1 --loss xentropy --logs-dir logs/resnet-viper-xentropy
The following error occured:
Exception in thread Thread-1:
Traceback (most recent call last):
File "/root/miniconda2/lib/python2.7/threading.py", line 801, in __bootstrap_inner
self.run()
File "/root/miniconda2/lib/python2.7/threading.py", line 754, in run
self.__target(*self.__args, **self.__kwargs)
File "/root/miniconda2/lib/python2.7/site-packages/torch/utils/data/dataloader.py", line 51, in _pin_memory_loop
r = in_queue.get()
File "/root/miniconda2/lib/python2.7/multiprocessing/queues.py", line 378, in get
return recv()
File "/root/miniconda2/lib/python2.7/site-packages/torch/multiprocessing/queue.py", line 22, in recv
return pickle.loads(buf)
File "/root/miniconda2/lib/python2.7/pickle.py", line 1388, in loads
return Unpickler(file).load()
File "/root/miniconda2/lib/python2.7/pickle.py", line 864, in load
dispatchkey
File "/root/miniconda2/lib/python2.7/pickle.py", line 1139, in load_reduce
value = func(*args)
File "/root/miniconda2/lib/python2.7/site-packages/torch/multiprocessing/reductions.py", line 68, in rebuild_storage_fd
fd = multiprocessing.reduction.rebuild_handle(df)
File "/root/miniconda2/lib/python2.7/multiprocessing/reduction.py", line 155, in rebuild_handle
conn = Client(address, authkey=current_process().authkey)
File "/root/miniconda2/lib/python2.7/multiprocessing/connection.py", line 169, in Client
c = SocketClient(address)
File "/root/miniconda2/lib/python2.7/multiprocessing/connection.py", line 308, in SocketClient
s.connect(address)
File "/root/miniconda2/lib/python2.7/socket.py", line 228, in meth
return getattr(self._sock,name)(*args)
error: [Errno 111] Connection refused
In both situations, the terminal is frozen after these errors prompt. I have to kill the corresponding Python process in order to exit.
Any suggestions to solve this?
I find there are three benchmarks including 'cmc all shots', 'cmc cuhk03', 'cmc market1501'. I want to know what meaning do they represent seperately ? Does cmc cuhk03 mean single-shot and cmc market1501 mean multi-shot? Lastly, I know the new split method for cuhk03 training set in https://github.com/zhunzhong07/person-re-ranking#the-new-trainingtesting-protocol-for-cuhk03 and your open-reid does not use it. And the performance seems quite different in baseline method?
Any reply will be appreciated.
Thank you.
from the script when training with triplet loss , it doesn't give the explicit value of Batch_size , so the defalut batch_size of 256 is used ( I'm not quite sure) ? But in the paper the number is 18 ? Maybe it's the reason?
hi,i have trained a resnet50 on my dataset.and i want to extract the feature that on pool5,how to use the parameters that on function of outputs = extract_cnn_feature(model, imgs, ?? ) . what should the third parameters look like ?
Hi Tong,
Thanks very much for this great project! I believe this will significantly facilitate the research in person reid.
I have a small suggestion regarding the feature extraction stage in the evaluation mode. When the evaluator.evaluate()
is executed, the first step is to extract features followed by construction of the distance matrix used to compute mAPs. However, the features extracted in model.eval()
remains the same as those from the training phase, i.e. both are pre-logits produced by the x = self.classifier(x)
. This is not expected as we want the features to come out from the 'real' feature extractor (resulting in 2048-d vectors in the resnet) instead of the 751-d pre-logit vectors from the classifier (in case of market1501). Therefore, I suggest to modify the def forward(self, x)
that will look like:
def forward(self, x):
x = self.feature_extractor(x)
if not self.training:
# self.training decides whether the network is in model.train() or model.eval()
return x
x = self.classifier(x)
return x
Hi, Tong Xiao.
In the forward
function of ResNet
or InceptionNet
, I am confused by the following two lines:
if self.norm:
x = F.normalize(x)
elif self.has_embedding:
x = F.relu(x)
Why should normalize
and relu
be exclusive to each other? Should the elif
here be if
instead?
Waiting for your response. Thanks a lot!
hi!
in the open-reid/reid/evaluators.py
def pairwise_distance(features, query=None, gallery=None, metric=None):
if query is None and gallery is None:
n = len(features)
x = torch.cat(list(features.values()))
x = x.view(n, -1)
if metric is not None:
x = metric.transform(x)
dist = torch.pow(x, 2).sum(1) * 2
dist = dist.expand(n, n) - 2 * torch.mm(x, x.t())
return dist
the result seems not euclidean metric?
in this 'pairwise_distance' function,the second part is computing euclidean metric,but the first part looks not right?
why they are different?
thanks!
I cloned the latest version open-reid (latest commit is a1df21b).
First, I run the example code:
python examples/softmax_loss.py -d viper -b 64 -j 2 -a resnet50 --logs-dir logs/softmax-loss/viper-resnet50
The result is:
Mean AP: 15.5%
CMC Scores allshots cuhk03 market1501
top-1 7.1% 12.2% 7.1%
top-5 23.6% 35.6% 23.6%
top-10 32.9% 47.3% 32.9%
Then, I run the same code again on the same machine:
python examples/softmax_loss.py -d viper -b 64 -j 2 -a resnet50 --logs-dir logs/softmax-loss/viper-resnet50
The result is:
Mean AP: 15.6%
CMC Scores allshots cuhk03 market1501
top-1 7.9% 13.0% 7.9%
top-5 20.9% 32.8% 20.9%
top-10 30.9% 44.8% 30.9%
It's weird that they are different. It seems that these two lines are not work:
open-reid/examples/softmax_loss.py
Lines 71 to 72 in a1df21b
train_transformer
use RandomSizedRectCrop
and RandomHorizontalFlip
:open-reid/examples/softmax_loss.py
Lines 36 to 41 in a1df21b
RandomSizedRectCrop
and RandomHorizontalFlip
use python built-in random module other than numpy.random.open-reid/reid/utils/data/transforms.py
Lines 19 to 42 in a1df21b
class RandomHorizontalFlip(object):
"""Horizontally flip the given PIL.Image randomly with a probability of 0.5."""
def __call__(self, img):
"""
Args:
img (PIL.Image): Image to be flipped.
Returns:
PIL.Image: Randomly flipped image.
"""
if random.random() < 0.5:
return img.transpose(Image.FLIP_LEFT_RIGHT)
return img
(Note: RandomHorizontalFlip
source code at here)
So in examples/softmax_loss.py
, I import random
and change:
def main(args):
np.random.seed(args.seed)
torch.manual_seed(args.seed)
to:
def main(args):
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
Then I run the same example code twice. The results are still different.
Then, in reid/utils/data/transforms.py
, I change:
open-reid/reid/utils/data/transforms.py
Lines 26 to 29 in a1df21b
for attempt in range(10):
area = img.size[0] * img.size[1]
target_area = random.uniform(0.64, 1.0) * area
print(target_area)
aspect_ratio = random.uniform(2, 3)
Then run the example code twice. The target_area
differ in first run and second run, indicating that random.seed(args.seed)
is not work.
So I rewrite the reid/utils/data/transforms.py
with numpy.random. The final reid/utils/data/transforms.py
is:
from __future__ import absolute_import
from torchvision.transforms import *
import numpy as np
class RandomHorizontalFlip(object):
"""Horizontally flip the given PIL.Image randomly with a probability of 0.5."""
def __call__(self, img):
"""
Args:
img (PIL.Image): Image to be flipped.
Returns:
PIL.Image: Randomly flipped image.
"""
if np.random.random() < 0.5:
return img.transpose(Image.FLIP_LEFT_RIGHT)
return img
class RectScale(object):
def __init__(self, height, width, interpolation=Image.BILINEAR):
self.height = height
self.width = width
self.interpolation = interpolation
def __call__(self, img):
w, h = img.size
if h == self.height and w == self.width:
return img
return img.resize((self.width, self.height), self.interpolation)
class RandomSizedRectCrop(object):
def __init__(self, height, width, interpolation=Image.BILINEAR):
self.height = height
self.width = width
self.interpolation = interpolation
def __call__(self, img):
for attempt in range(10):
area = img.size[0] * img.size[1]
target_area = np.random.uniform(0.64, 1.0) * area
print(target_area)
aspect_ratio = np.random.uniform(2, 3)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w <= img.size[0] and h <= img.size[1]:
x1 = np.random.randint(0, img.size[0] - w + 1)
y1 = np.random.randint(0, img.size[1] - h + 1)
img = img.crop((x1, y1, x1 + w, y1 + h))
assert(img.size == (w, h))
return img.resize((self.width, self.height), self.interpolation)
# Fallback
scale = RectScale(self.height, self.width,
interpolation=self.interpolation)
return scale(img)
Then run the example code twice. The target_area
is the same between first run and second run. But the final results (mAP, CMC) are still different.
I'm wondering what's wrong with the code. Could you check the code and answer my quesion?
I want to train my model using soft max
Loss and triplet loss together. How can I combine two loss in a net together? Any reply will be appreciated.
thanks for your work, when I use softmax example, I find this error"Traceback (most recent call last):
File "softmax_loss.py", line 12, in
from reid import datasets
File "/home/yuepan/code/pytorch_open_reid/reid/init.py", line 7, in
from . import metric_learning
File "/home/yuepan/code/pytorch_open_reid/reid/metric_learning/init.py", line 3, in
from metric_learn import (ITML_Supervised, LMNN, LSML_Supervised,
ImportError: No module named metric_learn"
and I find no metric_learn.py in folder
Line 99 in example/tripletloss.py num_classes should be the value above rather than args.num_features
使用open-reid 来训练 PRW 数据集遇到了问题,我的做法是把 PRW 数据制作成 Market-1501 的那种格式,包括bouding_box_test/train 甚至 PRW 的名字和里面各个文件名我都修改成跟 Market-1501 一样,我也修改过 reid/datasets/init.py 和 market1501.py 文件,运行始终出现如下的错误:Please download the dataset manually from https://drive.google.com/file/d/0B8-rUzbwVRk0c054eEozWG9COHM/view to /home/test/work_space/open-reid master/examples/data/market1501/raw/Market-1501-v15.09.15.zip,我发现导致这个问题的缘由在 market1501.py 文件中的这行代码出错: if osp.isfile(fpath) and hashlib.md5(open(fpath, 'rb').read()).hexdigest() == self.md5: 因为我制作的PRW数据在网络上根本找不到对应的数据下载的网址,我尝试修改成:hashlib.md5(open(fpath, 'rb').read()).hexdigest() != self.md5: (或者去掉这个代码),结果还是出现同样的错误,然后我调试程序,我发现程序真正执行的_init_.py 和 market1501.py 中的代码根本没有改变,我觉得好奇怪:不管我始终怎样修改market1501.py代码,每次修改我都做了保存,但是实际上程序依然执行的原来的_init_.py 和 market1501.py 代码,具体我也不清楚到底是哪里错了问题,希望能得到作者的帮助,谢谢啦!
Hi the author,
Have you tried the triplet on the MARS dataset?
I ran the triplet_loss.py on market1501 as told in benchmark, but I got worse result that every item is about 5% lower than presented in bemchmark. BTW, I didn't change code. I do not understand why.Can you help me? Thanks!
Hi,
I couldn't find in the code where you select PK images for mini-batch.
Which variation of triplet loss is implemented here?
thanks,
Aytac
Hi @Cysu,
When run
open-reid$ python examples/oim_loss.py -d viper -b 64 -j 2 -a resnet50 --logs-dir logs/oim-loss/viper-resnet50
I got the following error:
Files already downloaded and verified
VIPeR dataset loaded
subset | # ids | # images
train | 216 | 432
val | 100 | 200
trainval | 316 | 632
query | 316 | 632
gallery | 316 | 632
Traceback (most recent call last):
File "examples/oim_loss.py", line 222, in
main(parser.parse_args())
File "examples/oim_loss.py", line 151, in main
trainer.train(epoch, train_loader, optimizer)
File "open-reid/reid/trainers.py", line 31, in train
loss, prec1 = self._forward(inputs, targets)
File "open-reid/reid/trainers.py", line 70, in _forward
outputs = self.model(*inputs)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 224, in call
result = self.forward(*input, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/torch/nn/parallel/data_parallel.py", line 58, in forward
return self.module(*inputs[0], **kwargs[0])
File "/usr/local/lib/python2.7/dist-packages/torch/nn/modules/module.py", line 224, in call
result = self.forward(*input, **kwargs)
File "/home/deep307/PROJECT/open-reid/reid/models/resnet.py", line 81, in forward
x = x / x.norm(2, 1).expand_as(x)
File "/usr/local/lib/python2.7/dist-packages/torch/autograd/variable.py", line 725, in expand_as
return Expand.apply(self, (tensor.size(),))
File "/usr/local/lib/python2.7/dist-packages/torch/autograd/_functions/tensor.py", line 111, in forward
result = i.expand(*new_size)
RuntimeError: The expanded size of the tensor (128) must match the existing size (64) at non-singleton dimension 1. at /pytorch/torch/lib/THC/generic/THCTensor.c:323
Hi, I'm trying to do a reid project with a new dataset. But the size of the images are different from each other. So what should I do to train with such dataset? Just convert it to the unified format? What should I do to split them into "trainval,gallery,query"? I only have one dataset with person id label. and camera id wasn't provided.
And when I run the example in readme on a server with gpu, error happened.
TypeError: sum received an invalid combination of arguments -got(dim=int, keepdim=bool) but expected one of no arguments *(int dim)
It happened at reid/evaluation_metrics/classification.py line 17. I haven't solved it.
But I have tried cpu version on my laptop by modifying some part of the code(delete cuda()
and let pin_memory=False
), then I got train data of epoch[0] like CMC score. Although I stopped training due to the bad performance of laptop, could the data printed verify that I can run with cpu? Will the change that I made in the code have influence on the training result? After training epoch[0],I got
Mean AP:4.5%
CMC Scores allshots cuhk03 market1501
top-1 0.5% 3.5% 0.5%
top-5 5.0% 10.0% 5.0%
top-10 10.5% 21.0% 10.5%
It's quite low. My pc can't support continuing training. Will the result be better by training rest data? Or the bad result is due to my change to the code? Could someone tell me?
Another problem is that how can I get the probability ranking of each image in the gallery if I use certain image as query?
I have build a similar project for person reid with pytorch? But somehow, my baseline's rank1 precision is not satisfying. So who could tell me the precision of this code, for example, evaluation on market1501 ?
hi xiaotong:
i change some place in the code in https://github.com/Cysu/open-reid/blob/master/reid/utils/data/dataset.py
self.meta = read_json(osp.join(self.root, 'meta.json')) identities = self.meta['identities'] self.train = _pluck(identities, train_pids, relabel=True) self.val = _pluck(identities, val_pids, relabel=True) self.trainval = _pluck(identities, trainval_pids, relabel=True) self.query = _pluck(identities, self.split['query']) self.gallery = _pluck(identities, self.split['gallery']) self.num_train_ids = len(train_pids) self.num_val_ids = len(val_pids) self.num_trainval_ids = len(trainval_pids)
i change the" relabel=True" to" relabel=False" and the errors occured when i run the code .dubug the code i find the paremeters show "unable to get repr for ..."when compute the loss at
def _forward(self, inputs, targets): outputs = self.model(*inputs) if isinstance(self.criterion, torch.nn.CrossEntropyLoss): loss = self.criterion(outputs, targets) prec, = accuracy(outputs.data, targets.data) prec = prec[0]
i didn't find why the error occur ,can you help @Cysu @wk910930 @zydou
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.