Leptonica PIX 与 Qt QImage 的相互转换

马梓

2023-12-01

最近有个项目需要用到 OCR 功能，研究了一下，感觉 Tesseract 还不错,准备在项目中试试。但是发现 Tesseract 底层图像数据用到了 Leptonica。而我的程序准备用 Qt C++ 来写。这里就涉及一个问题，如何把 QImage 图像数据传递给 Tesseract。

花了两天时间，写了两个 Leptonica PIX 与 Qt QImage 的相互转换函数。

为了写这个代码，首先需要搞明白 PIX 都有哪些部分，如何构造。Leptonica 的文档不多，搞懂这些问题主要还是读源码。按照 Linus 的说法就是： reading the fucking code 。

PIX 的结构如下：

/*! Basic Pix */
struct Pix
{
    l_uint32             w;         /*!< width in pixels                   */
    l_uint32             h;         /*!< height in pixels                  */
    l_uint32             d;         /*!< depth in bits (bpp)               */
    l_uint32             spp;       /*!< number of samples per pixel       */
    l_uint32             wpl;       /*!< 32-bit words/line                 */
    l_uint32             refcount;  /*!< reference count (1 if no clones)  */
    l_int32              xres;      /*!< image res (ppi) in x direction    */
                                    /*!< (use 0 if unknown)                */
    l_int32              yres;      /*!< image res (ppi) in y direction    */
                                    /*!< (use 0 if unknown)                */
    l_int32              informat;  /*!< input file format, IFF_*          */
    l_int32              special;   /*!< special instructions for I/O, etc */
    char                *text;      /*!< text string associated with pix   */
    struct PixColormap  *colormap;  /*!< colormap (may be null)            */
    l_uint32            *data;      /*!< the image data                    */
};
typedef struct Pix PIX;

其中有几个字段需要简单介绍一下：

wpl 这个记录图像每行数据占用多少个 32bit word. QImage 里类似的对应物是 QImage::bytesPerLine()。当然准确的说是 bytesPerLine / 4。
从这里我们还可以知道 PIX 中每行数据占用的空间都是 4字节的整数倍。也就是每行的首地址是四字节对齐的。
PIX 支持 colormap，在 QImage 中叫做 QImage::colorTable()。
data 指向真正的数据，虽然是 uint32 * 类型，但是实际数据有可能是单字节、双字节、三字节（RGB24）或者四字节（RGB32 或 RGBA32）。

由于PIX 强制要求每行数据4字节对齐，而 QImage 没这个要求。所以PIX 很难和 QImage共享数据。因此我下面的代码中都是新分配了数据空间之后复制数据，没有去考虑数据共享问题。这样会有些效率问题，但是一般情况下是够用的。如果真到了需要考虑这个转化过程耗时问题时，可以考虑再单独写一个函数来实现。不过 PIX 和 QImage 的数据中 R\G\B 三个分量的顺序不同，所以共享数据时肯定会遇到各种麻烦。
先来看看 PIX 到 QImage 的转换：

#include <leptonica/allheaders.h>
#include <QImage>
#include <QBuffer>
#include <QtDebug>
/**
 * @brief PIX2QImage Leptonica PIX 转换为 QImage。返回的 QImage 与 pixImage 不共享数据。
 * @param pixImage
 * @return
 */
QImage PIX2QImage(PIX *pixImage)
{
    static QImage none(0, 0, QImage::Format_Invalid);
    if(pixImage == nullptr)
    {
        qDebug() << "***Invalid format!!!";
        return none;
    }
    l_int32 width = pixGetWidth(pixImage);
    l_int32 height = pixGetHeight(pixImage);
    l_int32 depth = pixGetDepth(pixImage);
    l_int32 bytesPerLine = pixGetWpl(pixImage) * 4;
    l_int32 wpld = pixGetWpl(pixImage);
    l_uint32 * start = pixGetData(pixImage);
    //l_uint32 * s_data = pixGetData(pixEndianByteSwapNew(pixImage));

    QImage::Format format;
    switch (depth)
    {
    case 1:
        format = QImage::Format_Mono;
        break;
    case 8:
        format = QImage::Format_Indexed8;
        break;
    case 24:
        format = QImage::Format_RGB888;
        break;
    default:
        format = QImage::Format_RGB32;
        break;
    }

    QImage result(width, height, format);
    if (result.format() == QImage::Format_RGB32)
    {
        qDebug() << "QImage::Format_RGB32";
        for(int i = 0; i < height; i++)
        {
            QRgb * lined = (QRgb *)result.scanLine(i);
            l_uint32 * lines = start + wpld * i ;
            for(int j = 0; j < width; j ++)
            {
                l_int32 rval, gval, bval;
                extractRGBValues(lines[j], &rval, &gval, &bval);
                lined[j] = qRgb(rval, gval, bval);
            }
        }
    }
    else
    {
        for(int i = 0; i < height; i++)
        {
            uchar * lined = result.scanLine(i);
            uchar * lines = (uchar *)(start + wpld * i) ;
            memcpy(lined , lines, static_cast<size_t>(bytesPerLine));
        }
    }
//
    PIXCMAP * pixcmap = pixGetColormap(pixImage);
    if(pixcmap != nullptr)
    {
        qDebug() << "generate colorTable";
        QVector<QRgb> colorTable;
        RGBA_QUAD * map = (RGBA_QUAD *) pixcmap->array;
        for(int i = 0; i < pixcmap->n; i++)
        {
            colorTable.append(qRgb(map[i].red, map[i].green, map[i].blue ));
        }

        result.setColorTable(colorTable);
    }

    return result;
}

为了解决 R、G、B 顺序问题，用到了下面两个函数。这样效率可能会稍微低点，但是代码已读性会好很多，而且也不怕 PIX 或 QImage 改变底层数据表示。

extractRGBValues(lines[j], &rval, &gval, &bval);
lined[j] = qRgb(rval, gval, bval);

然后是 QImage 到 PIX 的代码：

/**
 * @brief QImage2Pix QImage 转换为 PIX，不共享数据
 * @param image
 * @return
 */
PIX* QImage2Pix(const QImage &image)
{
    PIX * pix;
    int width = image.width();
    int height = image.height();
    int depth = image.depth();
    pix = pixCreate(width, height, depth);
    if(image.isNull() )
    {
        qDebug() << "image is null";
        return nullptr;
    }
    if( image.colorCount() )
    {
        QVector<QRgb> table = image.colorTable();

        PIXCMAP * map = pixcmapCreate(8);

        int n = table.size();
        for(int i = 0; i < n; i++)
        {
            pixcmapAddColor(map, qRed(table[i]), qGreen(table[i]), qBlue(table[i]));
        }
        pixSetColormap(pix, map);
    }
    int bytePerLine = image.bytesPerLine();
    l_uint32* start = pixGetData(pix);
    l_int32 wpld = pixGetWpl(pix);
    if(image.format() == QImage::Format_Mono || image.format() == QImage::Format_Indexed8 || image.format() == QImage::Format_RGB888)
    {
        for(int i = 0; i < height; i++)
        {
            const uchar * lines = image.scanLine(i);
            uchar * lined = (uchar *)(start + wpld * i) ;
            memcpy(lined , lines, static_cast<size_t>(bytePerLine));
        }
    }
    else if (image.format() == QImage::Format_RGB32 || image.format() == QImage::Format_ARGB32)
    {
        qDebug() << "QImage::Format_RGB32";
        for(int i = 0; i < image.height(); i++)
        {
            const QRgb * lines = (const QRgb *)image.scanLine(i);
            l_uint32 * lined = start + wpld * i ;
            for(int j = 0; j < width; j ++)
            {
                uchar rval = qRed(lines[j]);
                uchar gval = qGreen(lines[j]);
                uchar bval = qBlue(lines[j]);
                l_uint32 pixel;
                composeRGBPixel(rval, gval, bval, &pixel);
                lined[j] = pixel;
            }
        }
    }
    return pix;
}

这两个函数都没有考虑全所有的图像格式，但是基本我们常见的8 bit \24 bit\32 bit 图像格式都支持了。8 bit 图像没有测试，24bit 和 32 bit 图像都测试通过。

如果不考虑效率，这代码还可以写的更简单些，比如 QImage 转换到 PIX 可以写为：

PIX* makePIXFromQImage(const QImage &image)
{
     QByteArray ba;
     QBuffer buf(&ba);
     buf.open(QIODevice::WriteOnly);
     image.save(&buf, "BMP");
     return pixReadMemBmp((const l_uint8*) ba.constData(), ba.size());
}

PIX 转换为 QImage 也可以类似的转换，这里就不介绍了。
希望上面的代码对大家有用。

Leptonica PIX 与 Qt QImage 的相互转换

相关阅读

相关文章

相关问答

相关文档