当前位置: 首页 > 工具软件 > MXML > 使用案例 >

mxml中文乱码的解决

祁远
2023-12-01

字符编码格式

判断字符编码格式可参考:https://blog.csdn.net/thedarkfairytale/article/details/73457200

int IsUTF8(const char* str)
{
	unsigned int nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节  
	unsigned char chr = *str;
	int bAllAscii = 1;

	for (unsigned int i = 0; str[i] != '\0'; ++i){
		chr = *(str + i);
		//判断是否ASCII编码,如果不是,说明有可能是UTF8,ASCII用7位编码,最高位标记为0,0xxxxxxx 
		if (nBytes == 0 && (chr & 0x80) != 0){
			bAllAscii = 0;
		}

		if (nBytes == 0) {
			//如果不是ASCII码,应该是多字节符,计算字节数  
			if (chr >= 0x80) {

				if (chr >= 0xFC && chr <= 0xFD){
					nBytes = 6;
				}
				else if (chr >= 0xF8){
					nBytes = 5;
				}
				else if (chr >= 0xF0){
					nBytes = 4;
				}
				else if (chr >= 0xE0){
					nBytes = 3;
				}
				else if (chr >= 0xC0){
					nBytes = 2;
				}
				else{
					return 0;
				}

				nBytes--;
			}
		}
		else{
			//多字节符的非首字节,应为 10xxxxxx 
			if ((chr & 0xC0) != 0x80){
				return 0;
			}
			//减到为零为止
			nBytes--;
		}
	}

	//违返UTF8编码规则 
	if (nBytes != 0)  {
		return 0;
	}

	if (bAllAscii){ //如果全部都是ASCII, 也是UTF8
		return 1;
	}

	return 1;
}

int IsGBK(const char* str)
{
	unsigned int nBytes = 0;//GBK可用1-2个字节编码,中文两个 ,英文一个 
	unsigned char chr = *str;
	int bAllAscii = 1; //如果全部都是ASCII,  

	for (unsigned int i = 0; str[i] != '\0'; ++i){
		chr = *(str + i);
		if ((chr & 0x80) != 0 && nBytes == 0){// 判断是否ASCII编码,如果不是,说明有可能是GBK
			bAllAscii = 0;
		}

		if (nBytes == 0) {
			if (chr >= 0x80) {
				if (chr >= 0x81 && chr <= 0xFE){
					nBytes = +2;
				}
				else{
					return 0;
				}

				nBytes--;
			}
		}
		else{
			if (chr < 0x40 || chr>0xFE){
				return 0;
			}
			nBytes--;
		}//else end
	}

	if (nBytes != 0)  {		//违返规则 
		return 0;
	}

	if (bAllAscii){ //如果全部都是ASCII, 也是GBK
		return 1;
	}

	return 1;
}

XmlAddElemStr的修改部分

int XmlAddElemStr(mxml_node_t *ptRoot, const int8 *szElemName, const int8 *szValue, mxml_node_t **pptElem)
{
	if (!szValue)
	{
		return -1;
	}
	int isGBK = IsGBK(szValue);
	int8* pTmpArr = NULL;
	if (isGBK)
	{
		int tmpLen = strlen(szValue);
		pTmpArr = (int8*)malloc(tmpLen * 3 + 1);
		memset(pTmpArr,0, tmpLen * 3 + 1);
		if (!pTmpArr)
		{
			return -1;
		}
		GB2312ToUTF8(pTmpArr, szValue, tmpLen);
	}
	else
	{
		pTmpArr = szValue;
	}
    mxml_node_t *ptElem = NULL;
    mxml_node_t *ptText = NULL;

    ptElem = mxmlNewElement(ptRoot, szElemName);
    if (NULL == ptElem)
    {
        printf("mxmlNewElement() failed, {%s}\n", szElemName);
        return -1;
    }

	ptText = mxmlNewText(ptElem, 0, pTmpArr);
    if (NULL == ptText)
    {
		if (isGBK &&pTmpArr)
		{
			free(pTmpArr);
			pTmpArr = NULL;
		}
		printf("mxmlNewText() failed, Elem{%s}, value{%s}\n", szElemName, pTmpArr);
        return -1;
    }

    if (pptElem)
    {
        *pptElem = ptElem;
    }
	/*释放内存*/
	if (isGBK &&pTmpArr)
	{
		free(pTmpArr);
		pTmpArr = NULL;
	}
    return 0;
}

编码格式转换

 类似资料: