Object Detection
YOLOP / toolkits /deploy /main.cpp
Riser's picture
First model version
67bb36a
#include "yolov5.hpp"
#include "zedcam.hpp"
#include <csignal>
static volatile bool keep_running = true;
void keyboard_handler(int sig) {
// handle keyboard interrupt
if (sig == SIGINT)
keep_running = false;
}
int main(int argc, char** argv) {
signal(SIGINT, keyboard_handler);
cudaSetDevice(DEVICE);
// CUcontext ctx;
// CUdevice device;
// cuInit(0);
// cuDeviceGet(&device, 0);
// cuCtxCreate(&ctx, 0, device);
std::string engine_name = "../mcnet.engine";
// deserialize the .engine and run inference
std::ifstream file(engine_name, std::ios::binary);
if (!file.good()) {
std::cerr << "read " << engine_name << " error!" << std::endl;
return -1;
}
char *trtModelStream = nullptr;
size_t size = 0;
file.seekg(0, file.end);
size = file.tellg();
file.seekg(0, file.beg);
trtModelStream = new char[size];
assert(trtModelStream);
file.read(trtModelStream, size);
file.close();
// prepare data ---------------------------
static float det_out[BATCH_SIZE * OUTPUT_SIZE];
static int seg_out[BATCH_SIZE * IMG_H * IMG_W];
static int lane_out[BATCH_SIZE * IMG_H * IMG_W];
IRuntime* runtime = createInferRuntime(gLogger);
assert(runtime != nullptr);
ICudaEngine* engine = runtime->deserializeCudaEngine(trtModelStream, size);
assert(engine != nullptr);
IExecutionContext* context = engine->createExecutionContext();
assert(context != nullptr);
delete[] trtModelStream;
assert(engine->getNbBindings() == 4);
void* buffers[4];
// In order to bind the buffers, we need to know the names of the input and output tensors.
// Note that indices are guaranteed to be less than IEngine::getNbBindings()
const int inputIndex = engine->getBindingIndex(INPUT_BLOB_NAME);
const int output_det_index = engine->getBindingIndex(OUTPUT_DET_NAME);
const int output_seg_index = engine->getBindingIndex(OUTPUT_SEG_NAME);
const int output_lane_index = engine->getBindingIndex(OUTPUT_LANE_NAME);
assert(inputIndex == 0);
assert(output_det_index == 1);
assert(output_seg_index == 2);
assert(output_lane_index == 3);
// Create GPU buffers on device
CUDA_CHECK(cudaMalloc(&buffers[inputIndex], BATCH_SIZE * 3 * INPUT_H * INPUT_W * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[output_det_index], BATCH_SIZE * OUTPUT_SIZE * sizeof(float)));
CUDA_CHECK(cudaMalloc(&buffers[output_seg_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int)));
CUDA_CHECK(cudaMalloc(&buffers[output_lane_index], BATCH_SIZE * IMG_H * IMG_W * sizeof(int)));
// Create stream
cudaStream_t stream;
CUDA_CHECK(cudaStreamCreate(&stream));
// create zed
auto zed = create_camera();
sl::Resolution image_size = zed->getCameraInformation().camera_configuration.resolution;
sl::Mat img_zed(image_size.width, image_size.height, sl::MAT_TYPE::U8_C4, sl::MEM::GPU);
cv::cuda::GpuMat img_ocv = slMat2cvMatGPU(img_zed);
cv::cuda::GpuMat cvt_img(image_size.height, image_size.width, CV_8UC3);
// store seg results
cv::Mat tmp_seg(IMG_H, IMG_W, CV_32S, seg_out);
// sotore lane results
cv::Mat tmp_lane(IMG_H, IMG_W, CV_32S, lane_out);
cv::Mat seg_res(image_size.height, image_size.width, CV_32S);
cv::Mat lane_res(image_size.height, image_size.width, CV_32S);
char key = ' ';
while (keep_running and key != 'q') {
// retrieve img
if (zed->grab() != sl::ERROR_CODE::SUCCESS) continue;
zed->retrieveImage(img_zed, sl::VIEW::LEFT, sl::MEM::GPU);
cudaSetDevice(DEVICE);
cv::cuda::cvtColor(img_ocv, cvt_img, cv::COLOR_BGRA2BGR);
// preprocess ~3ms
preprocess_img_gpu(cvt_img, (float*)buffers[inputIndex], INPUT_W, INPUT_H); // letterbox
// buffers[inputIndex] = pr_img.data;
// Run inference
auto start = std::chrono::system_clock::now();
// cuCtxPushCurrent(ctx);
doInference(*context, stream, buffers, det_out, seg_out, lane_out, BATCH_SIZE);
// cuCtxPopCurrent(&ctx);
auto end = std::chrono::system_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;
// postprocess ~0ms
std::vector<Yolo::Detection> batch_res;
nms(batch_res, det_out, CONF_THRESH, NMS_THRESH);
cv::resize(tmp_seg, seg_res, seg_res.size(), 0, 0, cv::INTER_NEAREST);
cv::resize(tmp_lane, lane_res, lane_res.size(), 0, 0, cv::INTER_NEAREST);
// show results
//std::cout << res.size() << std::endl;
visualization(cvt_img, seg_res, lane_res, batch_res, key);
}
// destroy windows
#ifdef SHOW_IMG
cv::destroyAllWindows();
#endif
// close camera
img_zed.free();
zed->close();
delete zed;
// Release stream and buffers
cudaStreamDestroy(stream);
CUDA_CHECK(cudaFree(buffers[inputIndex]));
CUDA_CHECK(cudaFree(buffers[output_det_index]));
CUDA_CHECK(cudaFree(buffers[output_seg_index]));
CUDA_CHECK(cudaFree(buffers[output_lane_index]));
// Destroy the engine
context->destroy();
engine->destroy();
runtime->destroy();
return 0;
}