main.cpp
/**
* Copyright (c) 2020 Nobuo Tsukamoto
*
* This software is released under the MIT License.
* See the LICENSE file in the project root for more information.
*/
#include <fstream>
#include <iostream>
#include <map>
#include <opencv2/opencv.hpp>
#include <boost/algorithm/string.hpp>
#include "object_detector.h"
const cv::String kKeys =
"{help h usage ? | | show help command.}"
"{n thread |2 | num of thread to set tf-lite interpreter.}"
"{s score |0.5 | score threshold.}"
"{l label |. | path to label file.}"
"{@input | | path to tf-lite model file.}"
;
const cv::String kWindowName = "Object detection example.";
const cv::Scalar kWhiteColor = cv::Scalar(246, 250, 250);
const cv::Scalar kBuleColor = cv::Scalar(255, 209, 0);
std::unique_ptr<std::map<long, std::string>> ReadLabelFile(const std::string& label_path)
{
auto labels = std::make_unique<std::map<long, std::string>>();
std::ifstream ifs(label_path);
if (ifs.is_open())
{
std::string label = "";
while (std::getline(ifs, label))
{
std::vector<std::string> result;
boost::algorithm::split(result, label, boost::is_any_of(" ")); // Split by space.
if (result.size() < 2)
{
std::cout << "Expect 2-D input label (" << result.size() << ")." << std::endl;
continue;
}
auto label_string = result[2];
for (size_t i = 3; i < result.size(); i++)
{
label_string += " " + result[i];
}
auto id = std::stol(result[0]);
//std::cout << "id: " << id << ", name: " << label_string << ", " << result.size() << std::endl;
labels->insert(std::make_pair(id, label_string));
}
}
else
{
std::cout << "Label file not found. : " << label_path << std::endl;
}
return labels;
}
void DrawCaption(
cv::Mat& im,
const cv::Point& point,
const std::string& caption)
{
cv::putText(im, caption, point, cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 0), 2);
cv::putText(im, caption, point, cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(255, 255, 255), 1);
}
int main(int argc, char* argv[]) try
{
// Argument parsing
cv::String model_path;
cv::CommandLineParser parser(argc, argv, kKeys);
if (parser.has("h"))
{
parser.printMessage();
return 0;
}
auto num_of_threads = parser.get<unsigned int>("thread");
auto score_threshold = parser.get<float>("score");
auto label_path = parser.get<cv::String>("label");
if (parser.has("@input"))
{
model_path = parser.get<cv::String>("@input");
}
else
{
std::cout << "No model file path." << std::endl;
return 0;
}
if (!parser.check()) {
parser.printErrors();
return 1;
}
std::cout << "model path : " << model_path << std::endl;
std::cout << "label path : " << label_path << std::endl;
std::cout << "threads : " << num_of_threads << std::endl;
std::cout << "score threshold : " << score_threshold << std::endl;
// Create Object detector
auto detector = std::make_unique<ObjectDetector>(score_threshold);
detector->BuildInterpreter(model_path, num_of_threads);
auto width = detector->Width();
auto height = detector->Height();
// Load label file
auto labels = ReadLabelFile(label_path);
// Window setting
cv::namedWindow(kWindowName,
cv::WINDOW_GUI_NORMAL | cv::WINDOW_AUTOSIZE | cv::WINDOW_KEEPRATIO);
cv::moveWindow(kWindowName, 100, 100);
// Videocapture setting.
cv::VideoCapture cap(0);
auto cap_width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
auto cap_height = cap.get(cv::CAP_PROP_FRAME_HEIGHT);
std::cout << "Start capture." << " isOpened: " << std::boolalpha << cap.isOpened() << std::endl;
while(cap.isOpened())
{
const auto& start_time = std::chrono::steady_clock::now();
cv::Mat frame, input_im;
cap >> frame;
// Create input data.
// camera resolution => input_im tensor size
cv::resize(frame, input_im, cv::Size(width, height));
std::vector<uint8_t> input_data(input_im.data, input_im.data + (input_im.cols * input_im.rows * input_im.elemSize()));
// Run inference.
std::chrono::duration<double, std::milli> inference_time_span;
const auto& result = detector->RunInference(input_data, inference_time_span);
for (const auto& object : *result)
{
auto x = int(object.x * cap_width);
auto y = int(object.y * cap_height);
auto w = int(object.width * cap_width);
auto h = int(object.height * cap_height);
// Draw bounding box
cv::rectangle(frame, cv::Rect(x, y, w, h), kBuleColor, 2);
// Draw Caption
std::ostringstream caption;
auto it = labels->find(object.class_id);
if (it != std::end(*labels))
{
caption << it->second;
}
else
{
caption << "ID: " << std::to_string(object.class_id);
}
caption << "(" << std::fixed << std::setprecision(2) << object.scores << ")";
DrawCaption(frame, cv::Point(x, y), caption.str());
}
// Calc fps and draw fps and inference time.
std::chrono::duration<double, std::milli> time_span = std::chrono::steady_clock::now() - start_time;
std::ostringstream time_caption;
time_caption << std::fixed << std::setprecision(2) << inference_time_span.count() << " ms, " << 1000.0 / time_span.count() << "FPS";
DrawCaption(frame, cv::Point(10, 60), time_caption.str());
cv::imshow(kWindowName, frame);
// Handle the keyboard before moving to the next frame
const int key = cv::waitKey(1);
if (key == 27 || key == 'q')
{
break; // Escape
}
}
return EXIT_SUCCESS;
}
catch (const cv::Exception& e)
{
std::cerr << "OpenCV error calling :\n " << e.what() << std::endl;
return EXIT_FAILURE;
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}
objector.h
/**
* Copyright (c) 2020 Nobuo Tsukamoto
*
* This software is released under the MIT License.
* See the LICENSE file in the project root for more information.
*/
#ifndef OBJECT_DETECTOR_H_
#define OBJECT_DETECTOR_H_
#include <chrono>
#include <memory>
#include <string>
#include "edgetpu.h"
#include <tensorflow/lite/interpreter.h>
#include <tensorflow/lite/kernels/register.h>
#include <tensorflow/lite/model.h>
#include <tensorflow/lite/optional_debug_tools.h>
class BoundingBox
{
public:
int class_id = 0;
float scores = 0.0f;
float x = 0.0f;
float y = 0.0f;
float width = 0.0f;
float height = 0.0f;
float center_x = 0.0f;
float center_y = 0.0f;
};
class ObjectDetector
{
public:
ObjectDetector(const float score_threshold);
bool BuildInterpreter(
const std::string& model_path,
const unsigned int num_of_threads = 1);
std::unique_ptr<std::vector<BoundingBox>> RunInference(
const std::vector<uint8_t>& input_data,
std::chrono::duration<double, std::milli>& time_span);
const int Width() const;
const int Height() const;
const int Channels() const;
private:
std::unique_ptr<tflite::FlatBufferModel> model_;
tflite::ops::builtin::BuiltinOpResolver* resolver_;
std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_;
std::unique_ptr<tflite::Interpreter> interpreter_;
TfLiteTensor* output_locations_ = nullptr;
TfLiteTensor* output_classes_ = nullptr;
TfLiteTensor* output_scores_ = nullptr;
TfLiteTensor* num_detections_ = nullptr;
float score_threshold_ = 0.5f;
int input_width_ = 0;
int input_height_ = 0;
int input_channels_ = 0;
std::vector<int> input_tensor_shape;
size_t input_array_size = 1;
bool BuildInterpreterInternal(const unsigned int num_of_threads);
bool BuildEdgeTpuInterpreterInternal(std::string model_path, const unsigned int num_of_threads);
float* GetTensorData(TfLiteTensor& tensor, const int index = 0);
TfLiteFloatArray* TfLiteFloatArrayCopy(const TfLiteFloatArray* src);
};
#endif /* OBJECT_DETECTOR_H_ */
objector.cpp
/**
* Copyright (c) 2020 Nobuo Tsukamoto
*
* This software is released under the MIT License.
* See the LICENSE file in the project root for more information.
*/
#include <iostream>
#include <chrono>
#include <vector>
#include <edgetpu.h>
#include "object_detector.h"
ObjectDetector::ObjectDetector(const float score_threshold)
: score_threshold_(score_threshold)
{
}
bool ObjectDetector::BuildInterpreter(
const std::string& model_path,
const unsigned int num_of_threads)
{
auto is_edgetpu = false;
auto result = false;
// Split model name and check edge tpu model.
if (model_path.find("edgetpu") != std::string::npos)
{
is_edgetpu = true;
}
// Load Model
model_ = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
if (model_ == nullptr)
{
std::cerr << "Fail to build FlatBufferModel from file: " << model_path << std::endl;
return result;
}
if (is_edgetpu)
{
result = BuildEdgeTpuInterpreterInternal(model_path, num_of_threads);
}
else
{
result = BuildInterpreterInternal(num_of_threads);
}
return result;
}
bool ObjectDetector::BuildInterpreterInternal(
const unsigned int num_of_threads)
{
std::cout << "Build TF-Lite Interpreter." << std::endl;
// Build interpreter
tflite::ops::builtin::BuiltinOpResolver resolver;
if (tflite::InterpreterBuilder(*model_, resolver)(&interpreter_) != kTfLiteOk) {
std::cerr << "Failed to build interpreter." << std::endl;
return false;
}
// Set Thread option.
interpreter_->SetNumThreads(num_of_threads);
// Bind given context with interpreter.
if (interpreter_->AllocateTensors() != kTfLiteOk) {
std::cerr << "Failed to allocate tensors." << std::endl;
return false;
}
// Get input tensor size.
const auto& dimensions = interpreter_->tensor(interpreter_->inputs()[0])->dims;
input_height_ = dimensions->data[1];
input_width_ = dimensions->data[2];
input_channels_ = dimensions->data[3];
// Get output tensor
output_locations_ = interpreter_->tensor(interpreter_->outputs()[0]);
output_classes_ = interpreter_->tensor(interpreter_->outputs()[1]);
output_scores_ = interpreter_->tensor(interpreter_->outputs()[2]);
num_detections_ = interpreter_->tensor(interpreter_->outputs()[3]);
return true;
}
bool ObjectDetector::BuildEdgeTpuInterpreterInternal(
std::string model_path,
const unsigned int num_of_threads)
{
std::cout << "Build EdgeTpu Interpreter." << model_path << std::endl;
// Create the EdgeTpuContext.
edgetpu_context_ = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
if (edgetpu_context_ == nullptr)
{
std::cerr << "Fail create edge tpu context." << std::endl;
return false;
}
// Build interpreter
resolver_ = new tflite::ops::builtin::BuiltinOpResolver();
resolver_->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
if (tflite::InterpreterBuilder(*model_, *resolver_)(&interpreter_) != kTfLiteOk) {
std::cerr << "Failed to build interpreter." << std::endl;
return false;
}
// Bind given context with interpreter.
interpreter_->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context_.get());
// Set Thread option.
interpreter_->SetNumThreads(1);
// Bind given context with interpreter.
if (interpreter_->AllocateTensors() != kTfLiteOk) {
std::cerr << "Failed to allocate tensors." << std::endl;
return false;
}
std::cout << "Success AllocateTensors" << std::endl;
// Get input tensor size.
const auto& dimensions = interpreter_->tensor(interpreter_->inputs()[0])->dims;
input_height_ = dimensions->data[1];
input_width_ = dimensions->data[2];
input_channels_ = dimensions->data[3];
input_tensor_shape.resize(dimensions->size);
for (auto i = 0; i < dimensions->size; i++)
{
input_tensor_shape[i] = dimensions->data[i];
input_array_size *= input_tensor_shape[i];
}
std::ostringstream input_string_stream;
std::copy(input_tensor_shape.begin(), input_tensor_shape.end(), std::ostream_iterator<int>(input_string_stream, " "));
std::cout << "input shape: " << input_string_stream.str() << std::endl;
std::cout << "input array size: " << input_array_size << std::endl;
// Get output tensor
output_locations_ = interpreter_->tensor(interpreter_->outputs()[0]);
output_classes_ = interpreter_->tensor(interpreter_->outputs()[1]);
output_scores_ = interpreter_->tensor(interpreter_->outputs()[2]);
num_detections_ = interpreter_->tensor(interpreter_->outputs()[3]);
return true;
}
std::unique_ptr<std::vector<BoundingBox>> ObjectDetector::RunInference(
const std::vector<uint8_t>& input_data,
std::chrono::duration<double, std::milli>& time_span)
{
const auto& start_time = std::chrono::steady_clock::now();
/*
const int input_tensor_index = interpreter_->inputs()[0];
const TfLiteTensor* input_tensor = interpreter_->tensor(input_tensor_index);
const TfLiteType input_type = input_tensor->type;
const char* input_name = input_tensor->name;
std::vector<int> input_dims(
input_tensor->dims->data,
input_tensor->dims->data + input_tensor->dims->size);
if (input_tensor->quantization.type == kTfLiteNoQuantization)
{
std::cout << "Deal with legacy model with old quantization parameters." << std::endl;
interpreter_ ->SetTensorParametersReadOnly(
input_tensor_index,
input_type,
input_name,
input_dims,
input_tensor->params,
reinterpret_cast<const char*>(input_data.data()),
std::min(input_data.size(), input_array_size));
}
else
{
std::cout << "For models with new quantization parameters, deep copy the parameters." << std::endl;
TfLiteQuantization input_quant_clone = input_tensor->quantization;
const TfLiteAffineQuantization* input_quant_params = reinterpret_cast<TfLiteAffineQuantization*>(
input_tensor->quantization.params);
// |input_quant_params_clone| will be owned by |input_quant_clone|, and will
// be deallocated by free(). Therefore malloc is used to allocate its
// memory here.
TfLiteAffineQuantization* input_quant_params_clone = reinterpret_cast<TfLiteAffineQuantization*>(
malloc(sizeof(TfLiteAffineQuantization)));
input_quant_params_clone->scale = TfLiteFloatArrayCopy(input_quant_params->scale);
input_quant_params_clone->zero_point = TfLiteIntArrayCopy(input_quant_params->zero_point);
input_quant_params_clone->quantized_dimension = input_quant_params->quantized_dimension;
input_quant_clone.params = input_quant_params_clone;
interpreter_->SetTensorParametersReadOnly(
input_tensor_index, input_type, input_name,
input_dims, input_quant_clone,
reinterpret_cast<const char*>(input_data.data()),
std::min(input_data.size(), input_array_size));
}
*/
std::vector<float> output_data;
uint8_t* input = interpreter_->typed_input_tensor<uint8_t>(0);
std::memcpy(input, input_data.data(), input_data.size());
interpreter_->Invoke();
const float* locations = GetTensorData(*output_locations_);
const float* classes = GetTensorData(*output_classes_);
const float* scores = GetTensorData(*output_scores_);
const int num_detections = (int)*GetTensorData(*num_detections_);
auto results = std::make_unique<std::vector<BoundingBox>>();
for (auto i = 0; i < num_detections; i++)
{
if (scores[i] >= score_threshold_)
{
auto bounding_box = std::make_unique<BoundingBox>();
auto y0 = locations[4 * i + 0];
auto x0 = locations[4 * i + 1];
auto y1 = locations[4 * i + 2];
auto x1 = locations[4 * i + 3];
bounding_box->class_id = (int)classes[i];
bounding_box->scores = scores[i];
bounding_box->x = x0;
bounding_box->y = y0;
bounding_box->width = x1 - x0;
bounding_box->height = y1 - y0;
bounding_box->center_x = bounding_box->x + (bounding_box->width / 2.0f);
bounding_box->center_y = bounding_box->y + (bounding_box->height / 2.0f);
#if 0
std::cout << "class_id: " << bounding_box->class_id << std::endl;
std::cout << "scores : " << bounding_box->scores << std::endl;
std::cout << "x : " << bounding_box->x << std::endl;
std::cout << "y : " << bounding_box->y << std::endl;
std::cout << "width : " << bounding_box->width << std::endl;
std::cout << "height : " << bounding_box->height << std::endl;
std::cout << "center : " << bounding_box->center_x << ", " << bounding_box->center_y << std::endl;
std::cout << "y : " << bounding_box->y << std::endl;
#endif
results->emplace_back(std::move(*bounding_box));
}
}
time_span =
std::chrono::steady_clock::now() - start_time;
return results;
}
const int ObjectDetector::Width() const
{
return input_width_;
}
const int ObjectDetector::Height() const
{
return input_height_;
}
const int ObjectDetector::Channels() const
{
return input_channels_;
}
float* ObjectDetector::GetTensorData(TfLiteTensor& tensor, const int index)
{
float* result = nullptr;
auto nelems = 1;
for (auto i = 1; i < tensor.dims->size; i++)
{
nelems *= tensor.dims->data[i];
}
switch (tensor.type)
{
case kTfLiteFloat32:
result = tensor.data.f + nelems * index;
break;
std::cerr << "Unmatch tensor type." << std::endl;
default:
break;
}
return result;
}
TfLiteFloatArray* ObjectDetector::TfLiteFloatArrayCopy(const TfLiteFloatArray* src)
{
TfLiteFloatArray* ret = static_cast<TfLiteFloatArray*>(malloc(TfLiteFloatArrayGetSizeInBytes(src->size)));
ret->size = src->size;
std::memcpy(ret->data, src->data, src->size * sizeof(float));
return ret;
}