tflite c++代码（二）

萧远

2023-12-01

main.cpp

/**
 * Copyright (c) 2020 Nobuo Tsukamoto
 *
 * This software is released under the MIT License.
 * See the LICENSE file in the project root for more information.
 */

#include <fstream>
#include <iostream>
#include <map>

#include <opencv2/opencv.hpp>

#include <boost/algorithm/string.hpp>

#include "object_detector.h"

const cv::String kKeys =
    "{help h usage ? |    | show help command.}"
    "{n thread       |2   | num of thread to set tf-lite interpreter.}"
    "{s score        |0.5 | score threshold.}"
    "{l label        |.   | path to label file.}"
    "{@input         |    | path to tf-lite model file.}"
    ;

const cv::String kWindowName = "Object detection example.";
const cv::Scalar kWhiteColor = cv::Scalar(246, 250, 250);
const cv::Scalar kBuleColor = cv::Scalar(255, 209, 0);

std::unique_ptr<std::map<long, std::string>> ReadLabelFile(const std::string& label_path)
{
    auto labels = std::make_unique<std::map<long, std::string>>();

    std::ifstream ifs(label_path);
    if (ifs.is_open())
    {
        std::string label = "";
        while (std::getline(ifs, label))
        {
            std::vector<std::string> result;

            boost::algorithm::split(result, label, boost::is_any_of(" ")); // Split by space.
            if (result.size() < 2)
            {
                std::cout << "Expect 2-D input label (" << result.size() << ")." << std::endl;
                continue;
            }
             
            auto label_string = result[2];
            for (size_t i = 3; i < result.size(); i++)
            {
                label_string += " " + result[i];
            }
            auto id = std::stol(result[0]);
            //std::cout << "id: " << id << ", name: " << label_string << ", " << result.size() << std::endl;
            labels->insert(std::make_pair(id, label_string));
        }
    }
    else
    {
        std::cout << "Label file not found. : " << label_path << std::endl;
    }
    return labels;
}

void DrawCaption(
    cv::Mat& im,
    const cv::Point& point,
    const std::string& caption)
{
    cv::putText(im, caption, point, cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(0, 0, 0), 2);
    cv::putText(im, caption, point, cv::FONT_HERSHEY_SIMPLEX, 0.8, cv::Scalar(255, 255, 255), 1);
}

int main(int argc, char* argv[]) try
{
    // Argument parsing
    cv::String model_path;
    cv::CommandLineParser parser(argc, argv, kKeys);
    if (parser.has("h"))
    {
        parser.printMessage();
        return 0;
    }
    auto num_of_threads = parser.get<unsigned int>("thread");
    auto score_threshold = parser.get<float>("score");
    auto label_path = parser.get<cv::String>("label");
    if (parser.has("@input"))
    {
        model_path = parser.get<cv::String>("@input");
    }
    else
    {
        std::cout << "No model file path." << std::endl;
        return 0;
    }
    if (!parser.check()) {
        parser.printErrors();
        return 1;
    }
    std::cout << "model path      : " << model_path << std::endl;
    std::cout << "label path      : " << label_path << std::endl;
    std::cout << "threads         : " << num_of_threads << std::endl;
    std::cout << "score threshold : " << score_threshold << std::endl;


    // Create Object detector
    auto detector = std::make_unique<ObjectDetector>(score_threshold);

    detector->BuildInterpreter(model_path, num_of_threads);
    auto width = detector->Width();
    auto height = detector->Height();

    // Load label file
    auto labels = ReadLabelFile(label_path);

    // Window setting
    cv::namedWindow(kWindowName,
        cv::WINDOW_GUI_NORMAL | cv::WINDOW_AUTOSIZE | cv::WINDOW_KEEPRATIO);
    cv::moveWindow(kWindowName, 100, 100);

    // Videocapture setting.
    cv::VideoCapture cap(0);
    auto cap_width = cap.get(cv::CAP_PROP_FRAME_WIDTH);
    auto cap_height = cap.get(cv::CAP_PROP_FRAME_HEIGHT);

    std::cout << "Start capture." << " isOpened: " << std::boolalpha << cap.isOpened() << std::endl;

    while(cap.isOpened())
    {
        const auto& start_time = std::chrono::steady_clock::now();
    
        cv::Mat frame, input_im;
        
        cap >> frame;

        // Create input data.
        // camera resolution  => input_im tensor size
        cv::resize(frame, input_im, cv::Size(width, height));
        std::vector<uint8_t> input_data(input_im.data, input_im.data + (input_im.cols * input_im.rows * input_im.elemSize()));

        // Run inference.
        std::chrono::duration<double, std::milli> inference_time_span;

        const auto& result = detector->RunInference(input_data, inference_time_span);

        for (const auto& object : *result)
        {
            auto x = int(object.x * cap_width);
            auto y = int(object.y * cap_height);
            auto w = int(object.width * cap_width);
            auto h = int(object.height * cap_height);

            // Draw bounding box
            cv::rectangle(frame, cv::Rect(x, y, w, h), kBuleColor, 2);

            // Draw Caption
            std::ostringstream caption;

            auto it = labels->find(object.class_id);
            if (it != std::end(*labels))
            {
                caption << it->second;
            }
            else
            {
                caption << "ID: " << std::to_string(object.class_id);
            }
            caption << "(" << std::fixed << std::setprecision(2) << object.scores << ")";
            DrawCaption(frame, cv::Point(x, y), caption.str());
        }

        // Calc fps and draw fps and inference time.
        std::chrono::duration<double, std::milli> time_span = std::chrono::steady_clock::now() - start_time;
        std::ostringstream time_caption;

        time_caption << std::fixed << std::setprecision(2) << inference_time_span.count() << " ms, " << 1000.0 / time_span.count() << "FPS";
        DrawCaption(frame, cv::Point(10, 60), time_caption.str());

        cv::imshow(kWindowName, frame);
        // Handle the keyboard before moving to the next frame
        const int key = cv::waitKey(1);
        if (key == 27 || key == 'q')
        {
            break;  // Escape
        }

    }
    return EXIT_SUCCESS;

}
catch (const cv::Exception& e)
{
    std::cerr << "OpenCV error calling :\n    " << e.what() << std::endl;
    return EXIT_FAILURE;
}
catch (const std::exception& e)
{
    std::cerr << e.what() << std::endl;
    return EXIT_FAILURE;
}

objector.h

/**
 * Copyright (c) 2020 Nobuo Tsukamoto
 *
 * This software is released under the MIT License.
 * See the LICENSE file in the project root for more information.
 */

#ifndef OBJECT_DETECTOR_H_
#define OBJECT_DETECTOR_H_

#include <chrono>
#include <memory>
#include <string>
#include "edgetpu.h"

#include <tensorflow/lite/interpreter.h>
#include <tensorflow/lite/kernels/register.h>
#include <tensorflow/lite/model.h>
#include <tensorflow/lite/optional_debug_tools.h>

class BoundingBox
{
public:
    int class_id = 0;
    float scores = 0.0f;
    float x = 0.0f;
    float y = 0.0f;
    float width = 0.0f;
    float height = 0.0f;
    float center_x = 0.0f;
    float center_y = 0.0f;
};

class ObjectDetector
{
public:
    ObjectDetector(const float score_threshold);

    bool BuildInterpreter(
        const std::string& model_path,
        const unsigned int num_of_threads = 1);

    std::unique_ptr<std::vector<BoundingBox>> RunInference(
        const std::vector<uint8_t>& input_data,
        std::chrono::duration<double, std::milli>& time_span);

    const int Width() const;
    const int Height() const;
    const int Channels() const;

private:
    std::unique_ptr<tflite::FlatBufferModel> model_;
    tflite::ops::builtin::BuiltinOpResolver* resolver_;
    std::shared_ptr<edgetpu::EdgeTpuContext> edgetpu_context_; 
    std::unique_ptr<tflite::Interpreter> interpreter_;

    TfLiteTensor* output_locations_ = nullptr;
    TfLiteTensor* output_classes_ = nullptr;
    TfLiteTensor* output_scores_ = nullptr;
    TfLiteTensor* num_detections_ = nullptr;

    float score_threshold_ = 0.5f;

    int input_width_ = 0;
    int input_height_ = 0;
    int input_channels_ = 0;

    std::vector<int> input_tensor_shape;
    size_t input_array_size = 1;

    bool BuildInterpreterInternal(const unsigned int num_of_threads);

    bool BuildEdgeTpuInterpreterInternal(std::string model_path, const unsigned int num_of_threads);

    float* GetTensorData(TfLiteTensor& tensor, const int index = 0);

    TfLiteFloatArray* TfLiteFloatArrayCopy(const TfLiteFloatArray* src);

};

#endif /* OBJECT_DETECTOR_H_ */

objector.cpp

/**
 * Copyright (c) 2020 Nobuo Tsukamoto
 *
 * This software is released under the MIT License.
 * See the LICENSE file in the project root for more information.
 */

#include <iostream>
#include <chrono>
#include <vector>

#include <edgetpu.h>

#include "object_detector.h"

ObjectDetector::ObjectDetector(const float score_threshold)
    : score_threshold_(score_threshold)
{
}

bool ObjectDetector::BuildInterpreter(
    const std::string& model_path,
    const unsigned int num_of_threads)
{
    auto is_edgetpu = false;
    auto result = false;

    // Split model name and check edge tpu model.
    if (model_path.find("edgetpu") != std::string::npos)
    {
        is_edgetpu = true;
    }

    // Load Model
    model_ = tflite::FlatBufferModel::BuildFromFile(model_path.c_str());
    if (model_ == nullptr)
    {
        std::cerr << "Fail to build FlatBufferModel from file: " << model_path << std::endl;
        return result;
    }

    if (is_edgetpu)
    {
        result = BuildEdgeTpuInterpreterInternal(model_path, num_of_threads);
    }
    else
    {
        result = BuildInterpreterInternal(num_of_threads);
    }

    return result;
}

bool ObjectDetector::BuildInterpreterInternal(
    const unsigned int num_of_threads)
{
    std::cout << "Build TF-Lite Interpreter." << std::endl;
    
    // Build interpreter
    tflite::ops::builtin::BuiltinOpResolver resolver;
    if (tflite::InterpreterBuilder(*model_, resolver)(&interpreter_) != kTfLiteOk) {
        std::cerr << "Failed to build interpreter." << std::endl;
        return false;
    }

    // Set Thread option.
    interpreter_->SetNumThreads(num_of_threads);

    // Bind given context with interpreter.
    if (interpreter_->AllocateTensors() != kTfLiteOk) {
        std::cerr << "Failed to allocate tensors." << std::endl;
        return false;
    }

    // Get input tensor size.
    const auto& dimensions = interpreter_->tensor(interpreter_->inputs()[0])->dims;
    input_height_ = dimensions->data[1];
    input_width_ = dimensions->data[2];
    input_channels_ = dimensions->data[3];

    // Get output tensor
    output_locations_ = interpreter_->tensor(interpreter_->outputs()[0]);
    output_classes_ = interpreter_->tensor(interpreter_->outputs()[1]);
    output_scores_ = interpreter_->tensor(interpreter_->outputs()[2]);
    num_detections_ = interpreter_->tensor(interpreter_->outputs()[3]);

    return true;
}

bool ObjectDetector::BuildEdgeTpuInterpreterInternal(
    std::string model_path,
    const unsigned int num_of_threads)
{
    std::cout << "Build EdgeTpu Interpreter." << model_path << std::endl;

    //  Create the EdgeTpuContext.
    edgetpu_context_ = edgetpu::EdgeTpuManager::GetSingleton()->OpenDevice();
    if (edgetpu_context_ == nullptr)
    {
        std::cerr << "Fail create edge tpu context." << std::endl;
        return false;
    }

    // Build interpreter
    resolver_ = new tflite::ops::builtin::BuiltinOpResolver();
    resolver_->AddCustom(edgetpu::kCustomOp, edgetpu::RegisterCustomOp());
    if (tflite::InterpreterBuilder(*model_, *resolver_)(&interpreter_) != kTfLiteOk) {
        std::cerr << "Failed to build interpreter." << std::endl;
        return false;
    }

    // Bind given context with interpreter.
    interpreter_->SetExternalContext(kTfLiteEdgeTpuContext, edgetpu_context_.get());

    // Set Thread option.
    interpreter_->SetNumThreads(1);

    // Bind given context with interpreter.
    if (interpreter_->AllocateTensors() != kTfLiteOk) {
        std::cerr << "Failed to allocate tensors." << std::endl;
        return false;
    }

    std::cout << "Success AllocateTensors" << std::endl;
    // Get input tensor size.
    const auto& dimensions = interpreter_->tensor(interpreter_->inputs()[0])->dims;
    input_height_ = dimensions->data[1];
    input_width_ = dimensions->data[2];
    input_channels_ = dimensions->data[3];

 
    input_tensor_shape.resize(dimensions->size);
    for (auto i = 0; i < dimensions->size; i++)
    {
        input_tensor_shape[i] = dimensions->data[i];
        input_array_size *= input_tensor_shape[i];
    }

    std::ostringstream input_string_stream;
    std::copy(input_tensor_shape.begin(), input_tensor_shape.end(), std::ostream_iterator<int>(input_string_stream, " "));

    std::cout << "input shape: " << input_string_stream.str() << std::endl;
    std::cout << "input array size: " << input_array_size << std::endl;

    // Get output tensor
    output_locations_ = interpreter_->tensor(interpreter_->outputs()[0]);
    output_classes_ = interpreter_->tensor(interpreter_->outputs()[1]);
    output_scores_ = interpreter_->tensor(interpreter_->outputs()[2]);
    num_detections_ = interpreter_->tensor(interpreter_->outputs()[3]);

    return true;
}

std::unique_ptr<std::vector<BoundingBox>> ObjectDetector::RunInference(
    const std::vector<uint8_t>& input_data,
    std::chrono::duration<double, std::milli>& time_span)
{
    const auto& start_time = std::chrono::steady_clock::now();
/*
    const int input_tensor_index = interpreter_->inputs()[0];
    const TfLiteTensor* input_tensor = interpreter_->tensor(input_tensor_index);
    const TfLiteType input_type = input_tensor->type;
    const char* input_name = input_tensor->name;
    std::vector<int> input_dims(
        input_tensor->dims->data,
        input_tensor->dims->data + input_tensor->dims->size);
    if (input_tensor->quantization.type == kTfLiteNoQuantization)
    {
        std::cout << "Deal with legacy model with old quantization parameters." << std::endl;
        interpreter_ ->SetTensorParametersReadOnly(
            input_tensor_index,
            input_type,
            input_name,
            input_dims,
            input_tensor->params,
            reinterpret_cast<const char*>(input_data.data()),
            std::min(input_data.size(), input_array_size));
    }
    else
    {
        std::cout << "For models with new quantization parameters, deep copy the parameters." << std::endl;
        TfLiteQuantization input_quant_clone = input_tensor->quantization;
        const TfLiteAffineQuantization* input_quant_params = reinterpret_cast<TfLiteAffineQuantization*>(
            input_tensor->quantization.params);
        // |input_quant_params_clone| will be owned by |input_quant_clone|, and will
        // be deallocated by free(). Therefore malloc is used to allocate its
        // memory here.
        TfLiteAffineQuantization* input_quant_params_clone = reinterpret_cast<TfLiteAffineQuantization*>(
            malloc(sizeof(TfLiteAffineQuantization)));
        input_quant_params_clone->scale = TfLiteFloatArrayCopy(input_quant_params->scale);
        input_quant_params_clone->zero_point = TfLiteIntArrayCopy(input_quant_params->zero_point);
        input_quant_params_clone->quantized_dimension = input_quant_params->quantized_dimension;
        input_quant_clone.params = input_quant_params_clone;
        interpreter_->SetTensorParametersReadOnly(
            input_tensor_index, input_type, input_name,
            input_dims, input_quant_clone,
            reinterpret_cast<const char*>(input_data.data()),
            std::min(input_data.size(), input_array_size));
    }
*/
    std::vector<float> output_data;
    uint8_t* input = interpreter_->typed_input_tensor<uint8_t>(0);
    std::memcpy(input, input_data.data(), input_data.size());
    
    interpreter_->Invoke();

    const float* locations = GetTensorData(*output_locations_);
    const float* classes = GetTensorData(*output_classes_);
    const float* scores = GetTensorData(*output_scores_);
    const int num_detections = (int)*GetTensorData(*num_detections_);
    
    auto results = std::make_unique<std::vector<BoundingBox>>();

    for (auto i = 0; i < num_detections; i++)
    {
        if (scores[i] >= score_threshold_)
        {
            auto bounding_box = std::make_unique<BoundingBox>();
            auto y0 = locations[4 * i + 0];
            auto x0 = locations[4 * i + 1];
            auto y1 = locations[4 * i + 2];
            auto x1 = locations[4 * i + 3];

            
            bounding_box->class_id = (int)classes[i];
            bounding_box->scores = scores[i];
            bounding_box->x = x0;
            bounding_box->y = y0;
            bounding_box->width = x1 - x0;
            bounding_box->height = y1 - y0;
            bounding_box->center_x = bounding_box->x + (bounding_box->width / 2.0f);
            bounding_box->center_y = bounding_box->y + (bounding_box->height / 2.0f);

#if 0
            std::cout << "class_id: " << bounding_box->class_id << std::endl;
            std::cout << "scores  : " << bounding_box->scores << std::endl;
            std::cout << "x       : " << bounding_box->x << std::endl;
            std::cout << "y       : " << bounding_box->y << std::endl;
            std::cout << "width   : " << bounding_box->width << std::endl;
            std::cout << "height  : " << bounding_box->height << std::endl;
            std::cout << "center  : " << bounding_box->center_x << ", " << bounding_box->center_y << std::endl;
            std::cout << "y       : " << bounding_box->y << std::endl;
#endif
            results->emplace_back(std::move(*bounding_box));
        }
    }

    time_span =
        std::chrono::steady_clock::now() - start_time;

    return results;
}

const int ObjectDetector::Width() const
{
    return input_width_;
}

const int ObjectDetector::Height() const
{
    return input_height_;
}

const int ObjectDetector::Channels() const
{
    return input_channels_;
}

float* ObjectDetector::GetTensorData(TfLiteTensor& tensor, const int index)
{
    float* result = nullptr;
    auto nelems = 1;
    for (auto i = 1; i < tensor.dims->size; i++)
    {
        nelems *= tensor.dims->data[i];
    }

    switch (tensor.type)
    {
    case kTfLiteFloat32:
        result = tensor.data.f + nelems * index;
        break;
        std::cerr << "Unmatch tensor type." << std::endl;
    default:
        break;
    }
    return result;
}

TfLiteFloatArray* ObjectDetector::TfLiteFloatArrayCopy(const TfLiteFloatArray* src)
{
    TfLiteFloatArray* ret = static_cast<TfLiteFloatArray*>(malloc(TfLiteFloatArrayGetSizeInBytes(src->size)));
    ret->size = src->size;
    std::memcpy(ret->data, src->data, src->size * sizeof(float));
    return ret;
}

参考：NobuoTsukamoto/tflite-cv-example

tflite c++代码（二）

相关阅读

相关文章

相关问答

相关文档