Comments (20)
i think you can set the waitkey more small.
https://github.com/Linaom1214/tensorrt-python/blob/08bb095c022044e254361db18a2f4f4040976c23/utils/utils.py#L78
you can try set 1
from tensorrt-for-yolo-series.
please select a small size model, current the model is so bigger.
from tensorrt-for-yolo-series.
I trained my model on yolov6s.pt
Cuda version 11.4 and cudnn 8.2.4
from tensorrt-for-yolo-series.
I trained my model on yolov6s.pt
Cuda version 11.4 and cudnn 8.2.4
can you show your code? so I can see more.
from tensorrt-for-yolo-series.
which python script do I have to share?
for e.g trt.py, or train.py
from tensorrt-for-yolo-series.
which python script do I have to share? for e.g trt.py, or train.py
trt.py
from tensorrt-for-yolo-series.
#trt.py
import sys
sys.path.append('../')
from trt_utils import preproc, vis
from trt_utils import BaseEngine
import numpy as np
import cv2
import time
import os
class Predictor(BaseEngine):
def init(self, engine_path , imgsz=(640,640)):
super(Predictor, self).init(engine_path)
self.imgsz = imgsz
self.n_classes = 1
self.class_names = [ 'license' ]
if name == 'main':
pred = Predictor(engine_path='./yolov6_3.trt')
#img_path = '../src/3.jpg'
#origin_img = pred.inference(img_path)
#cv2.imwrite("%s_yolov6.jpg" % os.path.splitext(
#os.path.split(img_path)[-1])[0], origin_img)
pred.detect_video('./720.mp4') # set 0 use a webcam
#pred.detect_video(0) # set 0 use a webcam
pred.get_fps()
from tensorrt-for-yolo-series.
#trt.py
import sys sys.path.append('../') from trt_utils import preproc, vis from trt_utils import BaseEngine import numpy as np import cv2 import time import os
class Predictor(BaseEngine): def init(self, engine_path , imgsz=(640,640)): super(Predictor, self).init(engine_path) self.imgsz = imgsz self.n_classes = 1 self.class_names = [ 'license' ]
if name == 'main': pred = Predictor(engine_path='./yolov6_3.trt') #img_path = '../src/3.jpg' #origin_img = pred.inference(img_path) #cv2.imwrite("%s_yolov6.jpg" % os.path.splitext( #os.path.split(img_path)[-1])[0], origin_img) pred.detect_video('./720.mp4') # set 0 use a webcam #pred.detect_video(0) # set 0 use a webcam
pred.get_fps()
the output of pred.get_fps()
function is also 23-27 FPS????
from tensorrt-for-yolo-series.
#trt utils
import tensorrt as trt
import pycuda.autoinit
import pycuda.driver as cuda
import numpy as np
import cv2
import time
class BaseEngine(object):
def init(self, engine_path, imgsz=(640, 640)):
self.imgsz = imgsz
self.mean = None
self.std = None
self.n_classes = 1
self.class_names = ['license']
# Allocates all buffers required for an engine, i.e. host/device inputs/outputs.
logger = trt.Logger(trt.Logger.WARNING)
trt.init_libnvinfer_plugins(logger, '')
runtime = trt.Runtime(logger)
with open(engine_path, "rb") as f:
serialized_engine = f.read()
engine = runtime.deserialize_cuda_engine(serialized_engine)
self.context = engine.create_execution_context()
self.inputs, self.outputs, self.bindings = [], [], []
self.stream = cuda.Stream()
for binding in engine:
size = trt.volume(engine.get_binding_shape(binding))
dtype = trt.nptype(engine.get_binding_dtype(binding))
# Allocate host and device buffers
host_mem = cuda.pagelocked_empty(size, dtype)
device_mem = cuda.mem_alloc(host_mem.nbytes)
# Append the device buffer to device bindings.
self.bindings.append(int(device_mem))
if engine.binding_is_input(binding):
self.inputs.append({'host': host_mem, 'device': device_mem})
else:
self.outputs.append({'host': host_mem, 'device': device_mem})
def infer(self, img):
self.inputs[0]['host'] = np.ravel(img)
# transfer data to the gpu
for inp in self.inputs:
cuda.memcpy_htod_async(inp['device'], inp['host'], self.stream)
# run inference
self.context.execute_async_v2(
bindings=self.bindings,
stream_handle=self.stream.handle)
# fetch outputs from gpu
for out in self.outputs:
cuda.memcpy_dtoh_async(out['host'], out['device'], self.stream)
# synchronize stream
self.stream.synchronize()
data = [out['host'] for out in self.outputs]
return data
def detect_video(self, video_path):
start_time = 0 # skip first {start_time} seconds
fps = 0
cap = cv2.VideoCapture(video_path)
while True:
ret, frame = cap.read()
end_time = time.time()
diff = end_time - start_time
fps = 1 / (diff)
start_time = end_time
fps_text = "FPS : {:.2f}".format(fps)
cv2.putText(frame, fps_text, (5, 30), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 255), 1)
if not ret:
break
blob, ratio = preproc(frame, self.imgsz, self.mean, self.std)
data = self.infer(blob)
predictions = np.reshape(data, (1, -1, int(5 + self.n_classes)))[0]
dets = self.postprocess(predictions, ratio)
if dets is not None:
final_boxes, final_scores, final_cls_inds = dets[:,
:4], dets[:, 4], dets[:, 5]
frame = vis(frame, final_boxes, final_scores, final_cls_inds,
conf=0.5, class_names=self.class_names)
cv2.imshow('frame', frame)
if cv2.waitKey(25) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
def inference(self, img_path, conf=0.5):
origin_img = cv2.imread(img_path)
img, ratio = preproc(origin_img, self.imgsz, self.mean, self.std)
data = self.infer(img)
predictions = np.reshape(data, (1, -1, int(5 + self.n_classes)))[0]
dets = self.postprocess(predictions, ratio)
if dets is not None:
final_boxes, final_scores, final_cls_inds = dets[:,
:4], dets[:, 4], dets[:, 5]
origin_img = vis(origin_img, final_boxes, final_scores, final_cls_inds,
conf=conf, class_names=self.class_names)
return origin_img
@staticmethod
def postprocess(predictions, ratio):
boxes = predictions[:, :4]
scores = predictions[:, 4:5] * predictions[:, 5:]
boxes_xyxy = np.ones_like(boxes)
boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
boxes_xyxy /= ratio
dets = multiclass_nms(boxes_xyxy, scores, nms_thr=0.45, score_thr=0.1)
return dets
def get_fps(self):
# warmup
import time
img = np.ones((1, 3, self.imgsz[0], self.imgsz[1]))
img = np.ascontiguousarray(img, dtype=np.float32)
for _ in range(20):
_ = self.infer(img)
t1 = time.perf_counter()
_ = self.infer(img)
print(1 / (time.perf_counter() - t1), 'FPS')
def nms(boxes, scores, nms_thr):
"""Single class NMS implemented in Numpy."""
x1 = boxes[:, 0]
y1 = boxes[:, 1]
x2 = boxes[:, 2]
y2 = boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= nms_thr)[0]
order = order[inds + 1]
return keep
def multiclass_nms(boxes, scores, nms_thr, score_thr):
"""Multiclass NMS implemented in Numpy"""
final_dets = []
num_classes = scores.shape[1]
for cls_ind in range(num_classes):
cls_scores = scores[:, cls_ind]
valid_score_mask = cls_scores > score_thr
if valid_score_mask.sum() == 0:
continue
else:
valid_scores = cls_scores[valid_score_mask]
valid_boxes = boxes[valid_score_mask]
keep = nms(valid_boxes, valid_scores, nms_thr)
if len(keep) > 0:
cls_inds = np.ones((len(keep), 1)) * cls_ind
dets = np.concatenate(
[valid_boxes[keep], valid_scores[keep, None], cls_inds], 1
)
final_dets.append(dets)
if len(final_dets) == 0:
return None
return np.concatenate(final_dets, 0)
def preproc(image, input_size, mean, std, swap=(2, 0, 1)):
if len(image.shape) == 3:
padded_img = np.ones((input_size[0], input_size[1], 3)) * 114.0
else:
padded_img = np.ones(input_size) * 114.0
img = np.array(image)
r = min(input_size[0] / img.shape[0], input_size[1] / img.shape[1])
resized_img = cv2.resize(
img,
(int(img.shape[1] * r), int(img.shape[0] * r)),
interpolation=cv2.INTER_LINEAR,
).astype(np.float32)
padded_img[: int(img.shape[0] * r), : int(img.shape[1] * r)] = resized_img
padded_img = padded_img[:, :, ::-1]
padded_img /= 255.0
if mean is not None:
padded_img -= mean
if std is not None:
padded_img /= std
padded_img = padded_img.transpose(swap)
padded_img = np.ascontiguousarray(padded_img, dtype=np.float32)
return padded_img, r
_COLORS = np.array(
[
0.000, 0.447, 0.741,
0.850, 0.325, 0.098,
0.929, 0.694, 0.125,
0.494, 0.184, 0.556,
0.466, 0.674, 0.188,
0.301, 0.745, 0.933,
0.635, 0.078, 0.184,
0.300, 0.300, 0.300,
0.600, 0.600, 0.600,
1.000, 0.000, 0.000,
1.000, 0.500, 0.000,
0.749, 0.749, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 1.000,
0.667, 0.000, 1.000,
0.333, 0.333, 0.000,
0.333, 0.667, 0.000,
0.333, 1.000, 0.000,
0.667, 0.333, 0.000,
0.667, 0.667, 0.000,
0.667, 1.000, 0.000,
1.000, 0.333, 0.000,
1.000, 0.667, 0.000,
1.000, 1.000, 0.000,
0.000, 0.333, 0.500,
0.000, 0.667, 0.500,
0.000, 1.000, 0.500,
0.333, 0.000, 0.500,
0.333, 0.333, 0.500,
0.333, 0.667, 0.500,
0.333, 1.000, 0.500,
0.667, 0.000, 0.500,
0.667, 0.333, 0.500,
0.667, 0.667, 0.500,
0.667, 1.000, 0.500,
1.000, 0.000, 0.500,
1.000, 0.333, 0.500,
1.000, 0.667, 0.500,
1.000, 1.000, 0.500,
0.000, 0.333, 1.000,
0.000, 0.667, 1.000,
0.000, 1.000, 1.000,
0.333, 0.000, 1.000,
0.333, 0.333, 1.000,
0.333, 0.667, 1.000,
0.333, 1.000, 1.000,
0.667, 0.000, 1.000,
0.667, 0.333, 1.000,
0.667, 0.667, 1.000,
0.667, 1.000, 1.000,
1.000, 0.000, 1.000,
1.000, 0.333, 1.000,
1.000, 0.667, 1.000,
0.333, 0.000, 0.000,
0.500, 0.000, 0.000,
0.667, 0.000, 0.000,
0.833, 0.000, 0.000,
1.000, 0.000, 0.000,
0.000, 0.167, 0.000,
0.000, 0.333, 0.000,
0.000, 0.500, 0.000,
0.000, 0.667, 0.000,
0.000, 0.833, 0.000,
0.000, 1.000, 0.000,
0.000, 0.000, 0.167,
0.000, 0.000, 0.333,
0.000, 0.000, 0.500,
0.000, 0.000, 0.667,
0.000, 0.000, 0.833,
0.000, 0.000, 1.000,
0.000, 0.000, 0.000,
0.143, 0.143, 0.143,
0.286, 0.286, 0.286,
0.429, 0.429, 0.429,
0.571, 0.571, 0.571,
0.714, 0.714, 0.714,
0.857, 0.857, 0.857,
0.000, 0.447, 0.741,
0.314, 0.717, 0.741,
0.50, 0.5, 0
]
).astype(np.float32).reshape(-1, 3)
def vis(img, boxes, scores, cls_ids, conf=0.5, class_names=None):
for i in range(len(boxes)):
box = boxes[i]
cls_id = int(cls_ids[i])
score = scores[i]
if score < conf:
continue
x0 = int(box[0])
y0 = int(box[1])
x1 = int(box[2])
y1 = int(box[3])
color = (_COLORS[cls_id] * 255).astype(np.uint8).tolist()
text = '{}:{:.1f}%'.format(class_names[cls_id], score * 100)
txt_color = (0, 0, 0) if np.mean(_COLORS[cls_id]) > 0.5 else (255, 255, 255)
font = cv2.FONT_HERSHEY_SIMPLEX
txt_size = cv2.getTextSize(text, font, 0.4, 1)[0]
cv2.rectangle(img, (x0, y0), (x1, y1), color, 2)
txt_bk_color = (_COLORS[cls_id] * 255 * 0.7).astype(np.uint8).tolist()
cv2.rectangle(
img,
(x0, y0 + 1),
(x0 + txt_size[0] + 1, y0 + int(1.5 * txt_size[1])),
txt_bk_color,
-1
)
cv2.putText(img, text, (x0, y0 + txt_size[1]), font, 0.4, txt_color, thickness=1)
return img
from tensorrt-for-yolo-series.
def detect_video(self, video_path): ======> from this function i am getting fps 23-27
pred.video() gives overall frames of a video more than 500
from tensorrt-for-yolo-series.
def detect_video(self, video_path): ======> from this function i am getting fps 23-27
pred.video() gives overall frames of a video more than 500
the result of get_fps() function is ???
from tensorrt-for-yolo-series.
you platform is 1080Ti?
from tensorrt-for-yolo-series.
I am using RTX 3070 8gb
from tensorrt-for-yolo-series.
I am using RTX 3070 8gb
the get_fps()
is right , i think maybe detect video need a warmup, this is only a simple demo, after i will update code!
you can aslo use you camera test the model.
from tensorrt-for-yolo-series.
Sure, I am eagerly waiting for the updates on the camera test model.
from tensorrt-for-yolo-series.
Sure, I am eagerly waiting for the updates on the camera test model.
try this solution
from tensorrt-for-yolo-series.
Thanks, for the solution.
FPS increased to 90-98 fps
Cuda utilisation is still 354 MB
Is there any solution to utilizing full gpu/cuda.
from tensorrt-for-yolo-series.
So, I can increase my fps more.
from tensorrt-for-yolo-series.
Thanks, for the solution.
FPS increased to 90-98 fps
Cuda utilisation is still 354 MB
Is there any solution to utilizing full gpu/cuda.
opencv frame updates are slower than inference time, in fact your model may run faster.
Using multiple batch may increase GPU usage.
from tensorrt-for-yolo-series.
Kindly provide me some links or sources to implement multiple batches for video inference.
.
from tensorrt-for-yolo-series.
Related Issues (20)
- YOLOv7 Tensorrt converted model inference is equal to PyTorch model HOT 3
- int8 vs fp16 加速倍数能有多少? HOT 1
- How to use engine in a process or a thread HOT 4
- how to deploy in multiple nvidia card, such as a computer with 8 3060 card?
- Add dynamic batch support for converting from onnx to .engine?
- auto in_dims = engine->getBindingDimensions(engine->getBindingIndex("image_arrays")); HOT 1
- En715 Jetson xaiver Nx Yolov7.trt Not detect HOT 2
- yolov7,official,int8,onnx-> trt报错 HOT 3
- c++ endtoend 关于预测的置信度绘制 HOT 4
- memory leak: Destroy function does not work
- Detection duplicates with fp16 on Jetson Nano (TensorRT v8.2.1.8) HOT 2
- Support for windows?
- License? HOT 4
- 关于V8 tensorrt 出现乱框的情况 HOT 33
- TensorRT Conversion Issue "TypeError: pybind11::init(): factory function returned nullptr" HOT 2
- about tensort version question or other question? HOT 5
- yolov8 inference HOT 3
- Error Code 1: Serialization (Serialization assertion creator failed.Cannot deserialize plugin since corresponding IPluginCreator not found in Plugin Registry) HOT 2
- wrong confidence score (negative confidence score) on Jetson Nano inference HOT 3
- usage example for image_batch.py HOT 2
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from tensorrt-for-yolo-series.