前言:本文不会讲解如何搭建环境等操作,并且c+工程实现
CANN开发环境:5.0.3.alpha005
CANN运行环境:任意版本(如果你还在使用DDK当我没说)
输入是一张192(宽)*256(高)矩形图,根据对原工程的打印,以及测试发现,任一检测模型输出的人形图均可作为输入,但要保证输入图像在resize过程中不变形
(1)由于AlphaPose输出的特殊性,测试过了,在5.0.3.alpha003以前的版本会报错,直接升级到最高版本就可以解决这个问题;
(2)转换命令如下:atc --model=256_192_alphaPose17_v0_sim.onnx --framework=5 --output=opencv_rgb_256_192_alphaPose17_v0 --soc_version=Ascend310
(3)模型获取链接:
链接:https://pan.baidu.com/s/1elKNNbN2hEjQcfI0UHxj-A
提取码:ira3
(1)由于我是c+写的工程,所以想适配python版本的,可以自己实现一下,从原工程可知,预处理如下:归一化(1/255),减均值(R-0.406,G-0.457,B-0.480);
(2)Opencv:板端我一般使用opencv读取jpg图片,并且模型输入为rgb,所以需要对读入的图片转换通道(BGR->RGB),再去做上述的预处理操作。代码如下:
Result AlphaPose::Preprocess(cv::Mat& input, cv::Mat& resizeImg)
{
resizeImg = Utils::ResizeWithPadding(input, modelWidth_, modelHeight_);
cv::Mat RGB;
cv::cvtColor(resizeImg, RGB, cv::COLOR_BGR2RGB);
std::vector<float> mean_value{0.406, 0.457, 0.480};
std::vector<cv::Mat> bgrChannels(3);
cv::split(RGB, bgrChannels);
for (int i = 0; i < bgrChannels.size(); i++)
{
bgrChannels[i].convertTo(bgrChannels[i], CV_32FC3, 0.00392157, -mean_value[i]);
}
cv::merge(bgrChannels, RGB);
float *inputBuffer = new(std::nothrow) float[RGBU8_IMAGE_SIZE(modelWidth_, modelHeight_)];
int last_size = Utils::CopyDataToBuffer(RGB, inputBuffer);
//Copy the data into the cache of the input dataset
aclrtMemcpyKind policy = (ResourceManager::GetInstance()->GetRunMode() == ACL_HOST) ? ACL_MEMCPY_HOST_TO_DEVICE : ACL_MEMCPY_DEVICE_TO_DEVICE;
aclError ret = aclrtMemcpy(imageDataBuf_, imageDataSize_, inputBuffer, imageDataSize_, policy);
if (ret != ACL_ERROR_NONE)
{
ERROR_LOG("Copy resized image data to device failed.");
return FAILED;
}
return SUCCESS;
}
其中使用到的函数实现如下:
cv::Mat Utils::ResizeWithPadding(cv::Mat frame, int dstWidth, int dstHeight)
{
int width = frame.cols;
int height = frame.rows;
int bottom = 0, right = 0;
if(width * dstHeight / dstWidth > height)
{
bottom = width * dstHeight / dstWidth - height;
if(bottom < 0)
{
bottom = 0;
}
}
else
{
right = height * dstWidth / dstHeight - width;
if(right < 0)
{
right = 0;
}
}
cv::Mat paddingMat;
cv::copyMakeBorder(frame, paddingMat, bottom / 2, bottom / 2, right / 2, right / 2, cv::BORDER_CONSTANT, cv::Scalar(0, 0, 0));
cv::Mat resizeMat;
cv::resize(paddingMat, resizeMat, cv::Size(dstWidth, dstHeight));
return resizeMat;
}
int Utils::CopyDataToBuffer(cv::Mat frame, float* buffer)
{
vector<cv::Mat> bgrChannels(3);
cv::split(frame, bgrChannels);
int last_size = 0;
vector<cv::Mat>::iterator it;
for(it = bgrChannels.begin(); it != bgrChannels.end(); it++)
{
int splited_data_length = (*it).rows * (*it).cols;
int each_size = splited_data_length * sizeof(float);
int ret = Utils::memcpy_s(buffer + last_size, each_size, (*it).ptr<float>(0), each_size);
if (ret != SUCCESS)
{
ERROR_LOG("memory operation failed in the feature mask's CopyDataToBuffer, error=%d", ret);
return FAILED;
}
last_size += splited_data_length;
}
return last_size;
}
(3)AIPP:这个没什么好说的,RGB888_U8,rbuv=True
(1)模型输出只有一个,所以不用担心代码不适配,直接复制即可:
namespace
{
static int keyPointNum = 17;
static int outputHeightSize = 64;
static int outputWidthSize = 48;
static int keyPointPair[17][2] = { {1, 2}, {1, 5}, {2, 3}, {3, 4}, {5, 6}, {6, 7}, {1, 8}, {8, 9}, {9, 10},
{1, 11}, {11, 12}, {12, 13}, {1, 0}, {0, 14}, {14, 16}, {0, 15}, {15, 17} };
};
Result AlphaPose::Postprocess(cv::Mat& resizeImg, aclmdlDataset* modelOutput)
{
uint32_t dataSize = 0;
float* keyPoint = (float*)GetInferenceOutputItem(dataSize, modelOutput, 0);
if (keyPoint == nullptr)
{
return FAILED;
}
uint32_t personNum = (dataSize / sizeof(float)) / (keyPointNum * outputWidthSize * outputHeightSize);
printf("Exist %d Groups KeyPoint \n", personNum);
if (personNum > 1)
{
ERROR_LOG(" \n");
return FAILED;
}
float location[keyPointNum][2];
int confidence[keyPointNum];
for(int aa = 0; aa < personNum; aa++) // 目前假定只接受一个人的输出, 如果输出为两个人, 相应的后处理需要修改一下
{
for(int bb = 0; bb < keyPointNum; bb++)
{
float *v = keyPoint + bb * outputWidthSize * outputHeightSize;
// 找到最大值以及最大值所在位置(下标)
float max = *max_element(v, v + (outputWidthSize * outputHeightSize)); // 最大值
int maxIndex = max_element(v, v + (outputWidthSize * outputHeightSize)) - v; // 最大值下标
confidence[bb] = max; // 将最大值存入
max = max > 0 ? 1 : 0; // 通过这个点的置信度最大值是否大于0判断是否存在该点
if(max == 0)
{
location[bb][0] = 0.0;
location[bb][1] = 0.0;
continue;
}
// 相对于原工程来说, 这里等价于 px, py 的后续操作
location[bb][0] = maxIndex % outputWidthSize;
location[bb][1] = floor(maxIndex / outputWidthSize); // int(maxIndex / outputWidthSize) - 1
int x, y;
if(1 < location[bb][0] < (outputWidthSize - 1) && 1 < location[bb][1] < (outputHeightSize - 1))
{
x = Utils::Sign(v[int(location[bb][1] * (location[bb][0]+1))] - v[int(location[bb][1] * (location[bb][0]-1))]);
y = Utils::Sign(v[int((location[bb][1]+1) * location[bb][0])] - v[int((location[bb][1]-1) * location[bb][0])]);
location[bb][0] += x * 0.25;
location[bb][1] += y * 0.25;
}
// 将点缩放到模型输入图片大小
location[bb][0] *= 4;
location[bb][1] *= 4;
}
}
#if 0
for(int aa = 0; aa < keyPointNum; aa++)
{
printf("%02d: x: %f, y: %f \n", aa, location[aa][0], location[aa][1]);
if(location[keyPointPair[aa][0]][0] > 0 && location[keyPointPair[aa][1]][0] > 0)
{
cv::line(resizeImg, cv::Point(location[keyPointPair[aa][0]][0], location[keyPointPair[aa][0]][1]),
cv::Point(location[keyPointPair[aa][1]][0], location[keyPointPair[aa][1]][1]),
cv::Scalar(255, 200, 100), 2, cv::LINE_AA);
cv::circle(resizeImg, cv::Point(int(location[aa][0]), int(location[aa][1])), 1, (0, 0, 255), -1);
}
}
cv::imwrite("./alphaPoseResult.jpg", resizeImg);
#endif
// 这里请使用你的释放方式
if(ResourceManager::GetInstance()->GetRunMode() == ACL_HOST)
{
delete[] ((uint8_t*)keyPoint);
}
return SUCCESS;
}