一个简单的xml如下:
<xml>
<node attr1="value1" attr2="value2">aaa</node>
</xml>
其中:
<xml>
和<node>
分别是开始标签(或者叫开始节点)<xml>
没有属性,<node>
有两个属性</xml>
和</node>
是结束标签xml有多种解析方式,最常见的是DOM方式和SAX方式。下面是两种解析方式的对比。
SAX(Simple API for XML)
DOM(Document Object Model)
GLib库提供的解析方式为SAX,官方文档建议在解析简单xml时使用,如果解析非常复杂的xml时,可选用别的xml解析器。
GLib对xml数据格式的约定如下:
GLib库支持的xml数据格式如下:
&<>“'
GLib库的SAX解析器的核心是GMarkupParser结构体,该结构体内有五个函数指针
struct GMarkupParser {
void (*start_element)(xxx);
void (*end_element)(xxx);
void (*text)(xxx);
void (*passthrough)(xxx);
void (*error)(xxx);
}
gchar * g_markup_escape_text ()
gchar * g_markup_printf_escaped ()
gchar * g_markup_vprintf_escaped ()
GMarkupParseContext * g_markup_parse_context_new ()
gboolean g_markup_parse_context_parse ()
gboolean g_markup_parse_context_end_parse ()
void g_markup_parse_context_free ()
void g_markup_parse_context_get_position ()
const gchar * g_markup_parse_context_get_element ()
const GSList * g_markup_parse_context_get_element_stack ()
gpointer g_markup_parse_context_get_user_data ()
void g_markup_parse_context_push ()
gpointer g_markup_parse_context_pop ()
GMarkupParseContext * g_markup_parse_context_ref ()
void g_markup_parse_context_unref ()
gboolean g_markup_collect_attributes ()
// 创建解析器
GMarkupParseContext * g_markup_parse_context_new ()
// 释放解析器
void g_markup_parse_context_free ()
// 解析
gboolean g_markup_parse_context_parse ()
gboolean g_markup_parse_context_end_parse ()
void g_markup_parse_context_push ()
gpointer g_markup_parse_context_pop ()
// 引用解引用
GMarkupParseContext * g_markup_parse_context_ref ()
void g_markup_parse_context_unref ()
// 属性收集
gboolean g_markup_collect_attributes ()
// 解析位置获取
void g_markup_parse_context_get_position ()
// 标签获取
const gchar * g_markup_parse_context_get_element ()
// 标签栈获取
const GSList * g_markup_parse_context_get_element_stack ()
// 用户数据获取
gpointer g_markup_parse_context_get_user_data ()
// 转义字符处理
gchar * g_markup_escape_text ()
gchar * g_markup_printf_escaped ()
gchar * g_markup_vprintf_escaped ()
待解析xml内容:
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text100</element_name>
<element_name attribute_name3="attribute_value3" attribute_name4="attribute_value4">text200</element_name>
</elements>
</xml>
本xml的elements节点下有两个子节点,每个节点又有两个属性及属性值,使用GLib对该xml解析,程序如下。
解析程序:
源码见glib_examples\glib_markup\glib_markup_basic
#include <glib.h>
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
g_printf("(start)element_name: %s \n", element_name);
while (*name_cursor) {
g_printf("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_printf("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = NULL,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_basic.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_basic]# ./glib_markup_basic
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text100</element_name>
<element_name attribute_name3="attribute_value3" attribute_name4="attribute_value4">text200</element_name>
</elements>
</xml>
(start)element_name: xml
(text)text:
(start)element_name: head
(end)element_name: head
(text)text:
(start)element_name: element_num
(text)text:2
(end)element_name: element_num
(text)text:
(start)element_name: elements
(text)text:
(start)element_name: element_name
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text100
(end)element_name: element_name
(text)text:
(start)element_name: element_name
(start)name_cursor:attribute_name3, value_cursor:attribute_value3
(start)name_cursor:attribute_name4, value_cursor:attribute_value4
(text)text:text200
(end)element_name: element_name
(text)text:
(end)element_name: elements
(text)text:
(end)element_name: xml
以简单节点<element_num>2</element_num>
为例,说明一下SXA方式解析xml的过程。
<element_num>
),调用start回调函数开始解析;</element_num>
),调用end回调函数,解析结束。elements下面有两个子节点,节点名均为element_name,每个子节点都会被解析到,当解析到子节点时,会取出子节点的属性标签(attribute_namex)、属性值(attribute_value1)和子节点值(两个子节点的值均为text)。
待解析xml内容:
<Glib>
<CoreApplications>
<Module version="2.48" name="Main Event Loop">manages all available sources of events</Module>
<Module version="2.48" name="Thread Pools">pools of threads to execute work concurrently</Module>
</CoreApplications>
<Utilities>
<Module version="2.48" name="File Utilities">various file-related functions</Module>
<Module version="2.48" name="Simple XML Subset Parser">parses a subset of XML</Module>
</Utilities>
<DataTypes>
<Module version="2.48" name="Sequences">scalable lists</Module>
<Module version="2.48" name="Byte Arrays">arrays of bytes</Module>
</DataTypes>
</Glib>
上面xml,CoreApplications、Utilities和DataTypes三个节点下面的子节点名字一模一样,只有name属性的值不一样,最终想要的结果是解析各Module子节点的name属性值,然后解析各Module子节点的节点值,将两者一一对应。下面是对该xml解析的程序示例。
解析程序:
源码见glib_examples\glib_markup\glib_markup_same_node_name
#include <stdio.h>
#include <glib.h>
#define TEST_GMARKUP_CORE_APPS "CoreApplications"
#define TEST_GMARKUP_UTILITIES "Utilities"
#define TEST_GMARKUP_DATATYPES "DataTypes"
#define TEST_GMARKUP_MODULE "Module"
gboolean g_core_flag = FALSE;
gboolean g_util_flag = FALSE;
gboolean g_data_flag = FALSE;
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
#if 0
g_printf("(start)element_name: %s \n", element_name);
while (*name_cursor) {
g_printf("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
#endif
#if 1
if(0 == strcmp(element_name, TEST_GMARKUP_CORE_APPS)) {
g_print("(start)start parse of core \n");
g_core_flag = TRUE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_UTILITIES)) {
g_print("(start)start parse of util \n");
g_util_flag = TRUE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_DATATYPES)) {
g_print("(start)start parse of data \n");
g_data_flag = TRUE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_MODULE)){
if(g_core_flag) {
while (*name_cursor) {
g_print("(start)core: name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
if(g_util_flag) {
while (*name_cursor) {
g_print("(start)util: name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
if(g_data_flag) {
while (*name_cursor) {
g_print("(start)data: name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
}
#endif
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
#if 0
g_printf("(end)element_name: %s \n", element_name);
#endif
#if 1
if(0 == strcmp(element_name, TEST_GMARKUP_CORE_APPS)) {
g_print("(end)end parse of core \n");
g_core_flag = FALSE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_UTILITIES)) {
g_print("(end)end parse of util \n");
g_util_flag = FALSE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_DATATYPES)) {
g_print("(end)end parse of data \n");
g_data_flag = FALSE;
}
#endif
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
#if 0
g_printf("(text)text:%s \n", text);
#endif
#if 1
if(g_core_flag) {
g_print("(text)core text:%s \n", text);
}
if(g_util_flag) {
g_print("(text)util text:%s \n", text);
}
if(g_data_flag) {
g_print("(text)data text:%s \n", text);
}
#endif
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = NULL,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_same_node_name.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_same_node_name]# ./glib_markup_same_node_name
<Glib>
<CoreApplications>
<Module version="2.48" name="Main Event Loop">manages all available sources of events</Module>
<Module version="2.48" name="Thread Pools">pools of threads to execute work concurrently</Module>
</CoreApplications>
<Utilities>
<Module version="2.48" name="File Utilities">various file-related functions</Module>
<Module version="2.48" name="Simple XML Subset Parser">parses a subset of XML</Module>
</Utilities>
<DataTypes>
<Module version="2.48" name="Sequences">scalable lists</Module>
<Module version="2.48" name="Byte Arrays">arrays of bytes</Module>
</DataTypes>
</Glib>
(start)start parse of core
(text)core text:
(start)core: name_cursor:version, value_cursor:2.48
(start)core: name_cursor:name, value_cursor:Main Event Loop
(text)core text:manages all available sources of events
(text)core text:
(start)core: name_cursor:version, value_cursor:2.48
(start)core: name_cursor:name, value_cursor:Thread Pools
(text)core text:pools of threads to execute work concurrently
(text)core text:
(end)end parse of core
(start)start parse of util
(text)util text:
(start)util: name_cursor:version, value_cursor:2.48
(start)util: name_cursor:name, value_cursor:File Utilities
(text)util text:various file-related functions
(text)util text:
(start)util: name_cursor:version, value_cursor:2.48
(start)util: name_cursor:name, value_cursor:Simple XML Subset Parser
(text)util text:parses a subset of XML
(text)util text:
(end)end parse of util
(start)start parse of data
(text)data text:
(start)data: name_cursor:version, value_cursor:2.48
(start)data: name_cursor:name, value_cursor:Sequences
(text)data text:scalable lists
(text)data text:
(start)data: name_cursor:version, value_cursor:2.48
(start)data: name_cursor:name, value_cursor:Byte Arrays
(text)data text:arrays of bytes
(text)data text:
(end)end parse of data
一个节点的解析被分为了三个回调函数(start、end和text),因此,本程序使用全局变量的方式在start和text之间传递消息,使节点的属性值和节点自身值可以正确关联。
xml有三种注释方法,单行注释,多行注释和CDATA注释,其中最后一种,CDATA,里面可以是任意内容,可以包含数据、注释、甚至是转义字符。
待解析xml内容:
<xml>
<head/>
<element_num>2</element_num>
<!-- single line comment -->
<elements>
<!--
multiple lines comment 1
multiple lines comment 2
-->
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
<![CDATA[
<!-- include other comment format -->
multiple cdata comment 1
multiple cdata comment 2
<!--
comment line 1
comment line 2
-->
]]>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
</elements>
</xml>
GLib的xml注释解析在passthrough函数完成。
解析程序:
源码见glib_examples\glib_markup\glib_markup_comment
#include <glib.h>
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
g_printf("(start)element_name: %s \n", element_name);
while (*name_cursor) {
g_printf("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_printf("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void passthrough( GMarkupParseContext *context,
const gchar *passthrough_text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(passthrough)passthrough:%s \n", passthrough_text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = passthrough,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_comment.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf);
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_comment]# ./glib_markup_comment
<xml>
<head/>
<element_num>2</element_num>
<!-- single line comment -->
<elements>
<!--
multiple lines comment 1
multiple lines comment 2
-->
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
<![CDATA[
<!-- include other comment format -->
multiple cdata comment 1
multiple cdata comment 2
<!--
comment line 1
comment line 2
-->
]]>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
</elements>
</xml>
(start)element_name: xml
(text)text:
(start)element_name: head
(end)element_name: head
(text)text:
(start)element_name: element_num
(text)text:2
(end)element_name: element_num
(text)text:
(passthrough)passthrough:<!-- single line comment -->
(text)text:
(start)element_name: elements
(text)text:
(passthrough)passthrough:<!--
multiple lines comment 1
multiple lines comment 2
-->
(text)text:
(start)element_name: element_name
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text
(end)element_name: element_name
(text)text:
(passthrough)passthrough:<![CDATA[
<!-- include other comment format -->
multiple cdata comment 1
multiple cdata comment 2
<!--
comment line 1
comment line 2
-->
]]>
(text)text:
(start)element_name: element_name
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text
(end)element_name: element_name
(text)text:
(end)element_name: elements
(text)text:
(end)element_name: xml
解析过程中可能会出现解析失败的情况,此时可以在error回调函数里打印出错信息。
待解析xml内容:
<xml>
<head/>
<element_num>2</element>
<elements>
<element_name>text1</element_name>
<element_name>text2</element_name>
</elements>
</xml>
解析程序:
源码见glib_examples\glib_markup\glib_markup_error
#include <glib.h>
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
g_printf("(start)element_name: %s \n", element_name);
while (*name_cursor) {
g_printf("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_printf("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = NULL,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_error.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_error]# ./glib_markup_error
<xml>
<head/>
<element_num>2</element>
<elements>
<element_name>text1</element_name>
<element_name>text2</element_name>
</elements>
</xml>
(start)element_name: xml
(text)text:
(start)element_name: head
(end)element_name: head
(text)text:
(start)element_name: element_num
(text)text:2
(err)parse xml failed: Error on line 3 char 25: Element 'element' was closed, but the currently open element is 'element_num'
context parse failed!
error回调函数有xml解析失败的详细原因,如果不设置,则解析失败时只提示失败,不会给解析错误原因。
将上面的例子,parser变量的error成员值置空,再次运行,可以看到,没有解析错误原因。
[root@centos7_6 glib_markup_error]# ./glib_markup_error
<xml>
<head/>
<element_num>2</element>
<elements>
<element_name>text1</element_name>
<element_name>text2</element_name>
</elements>
</xml>
(start)element_name: xml
(text)text:
(start)element_name: head
(end)element_name: head
(text)text:
(start)element_name: element_num
(text)text:2
context parse failed!
解析器支持嵌套,使用g_markup_parse_context_push
和g_markup_parse_context_pop
函数可以将当前解析器入栈,此时就可以开始新的解析器了。以下面xml为例,当解析到<Utilities>
节点时,在start回调函数将当前解析器上下文压栈,使用新的解析器开始解析Utilities的子节点。Utilities解析完,在end回调函数将当前解析器上下文出栈。待解析xml和程序见下。
待解析xml内容:
<Glib>
<CoreApplications>
<Module version="2.48" name="Main Event Loop">manages all available sources of events</Module>
<Module version="2.48" name="Thread Pools">pools of threads to execute work concurrently</Module>
</CoreApplications>
<Utilities>
<Module version="2.48" name="File Utilities">various file-related functions</Module>
<Module version="2.48" name="Simple XML Subset Parser">parses a subset of XML</Module>
</Utilities>
<DataTypes>
<Module version="2.48" name="Sequences">scalable lists</Module>
<Module version="2.48" name="Byte Arrays">arrays of bytes</Module>
</DataTypes>
</Glib>
解析程序:
源码见glib_examples\glib_markup\glib_markup_sub_parse
#include <stdio.h>
#include <glib.h>
#define TEST_GMARKUP_CORE_APPS "CoreApplications"
#define TEST_GMARKUP_UTILITIES "Utilities"
#define TEST_GMARKUP_DATATYPES "DataTypes"
#define TEST_GMARKUP_MODULE "Module"
gboolean g_core_flag = FALSE;
gboolean g_util_flag = FALSE;
gboolean g_data_flag = FALSE;
static void sub_start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
#if 1
g_print("(sub_start)element_name: %s \n", element_name);
while (*name_cursor) {
g_print("(sub_start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
#endif
}
static void sub_end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
#if 1
g_print("(sub_end)element_name: %s \n", element_name);
#endif
}
static void sub_text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
#if 1
g_print("(sub_text)text:%s \n", text);
#endif
}
GMarkupParser sub_parser = {
. start_element = sub_start,
. end_element = sub_end,
. text = sub_text,
. passthrough = NULL ,
. error = NULL
};
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
#if 0
g_printf("(start)element_name: %s \n", element_name);
while (*name_cursor) {
g_printf("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
#endif
#if 1
if(0 == strcmp(element_name, TEST_GMARKUP_CORE_APPS)) {
g_print("(start)start parse of core \n");
g_core_flag = TRUE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_UTILITIES)) {
g_print("(start)start parse of util \n");
g_util_flag = TRUE;
g_markup_parse_context_push (context, &sub_parser, NULL);
}
if(0 == strcmp(element_name, TEST_GMARKUP_DATATYPES)) {
g_print("(start)start parse of data \n");
g_data_flag = TRUE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_MODULE)){
if(g_core_flag) {
while (*name_cursor) {
g_print("(start)core: name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
if(g_util_flag) {
while (*name_cursor) {
g_print("(start)util: name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
if(g_data_flag) {
while (*name_cursor) {
g_print("(start)data: name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
}
#endif
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
#if 0
g_printf("(end)element_name: %s \n", element_name);
#endif
#if 1
if(0 == strcmp(element_name, TEST_GMARKUP_CORE_APPS)) {
g_print("(end)end parse of core \n");
g_core_flag = FALSE;
}
if(0 == strcmp(element_name, TEST_GMARKUP_UTILITIES)) {
g_print("(end)end parse of util \n");
g_util_flag = FALSE;
g_markup_parse_context_pop(context);
}
if(0 == strcmp(element_name, TEST_GMARKUP_DATATYPES)) {
g_print("(end)end parse of data \n");
g_data_flag = FALSE;
}
#endif
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
#if 0
g_printf("(text)text:%s \n", text);
#endif
#if 1
if(g_core_flag) {
g_print("(text)core text:%s \n", text);
}
if(g_util_flag) {
g_print("(text)util text:%s \n", text);
}
if(g_data_flag) {
g_print("(text)data text:%s \n", text);
}
#endif
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = NULL,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_sub_parser.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_sub_parse]# ./glib_markup_sub_parse
<Glib>
<CoreApplications>
<Module version="2.48" name="Main Event Loop">manages all available sources of events</Module>
<Module version="2.48" name="Thread Pools">pools of threads to execute work concurrently</Module>
</CoreApplications>
<Utilities>
<Module version="2.48" name="File Utilities">various file-related functions</Module>
<Module version="2.48" name="Simple XML Subset Parser">parses a subset of XML</Module>
</Utilities>
<DataTypes>
<Module version="2.48" name="Sequences">scalable lists</Module>
<Module version="2.48" name="Byte Arrays">arrays of bytes</Module>
</DataTypes>
</Glib>
(start)start parse of core
(text)core text:
(start)core: name_cursor:version, value_cursor:2.48
(start)core: name_cursor:name, value_cursor:Main Event Loop
(text)core text:manages all available sources of events
(text)core text:
(start)core: name_cursor:version, value_cursor:2.48
(start)core: name_cursor:name, value_cursor:Thread Pools
(text)core text:pools of threads to execute work concurrently
(text)core text:
(end)end parse of core
(start)start parse of util
(sub_text)text:
(sub_start)element_name: Module
(sub_start)name_cursor:version, value_cursor:2.48
(sub_start)name_cursor:name, value_cursor:File Utilities
(sub_text)text:various file-related functions
(sub_end)element_name: Module
(sub_text)text:
(sub_start)element_name: Module
(sub_start)name_cursor:version, value_cursor:2.48
(sub_start)name_cursor:name, value_cursor:Simple XML Subset Parser
(sub_text)text:parses a subset of XML
(sub_end)element_name: Module
(sub_text)text:
(end)end parse of util
(start)start parse of data
(text)data text:
(start)data: name_cursor:version, value_cursor:2.48
(start)data: name_cursor:name, value_cursor:Sequences
(text)data text:scalable lists
(text)data text:
(start)data: name_cursor:version, value_cursor:2.48
(start)data: name_cursor:name, value_cursor:Byte Arrays
(text)data text:arrays of bytes
(text)data text:
(end)end parse of data
以上结果可以看出,在解析到Utilities节点时,该节点内部的子节点使用的是子解析器。
g_markup_collect_attributes
是一个变参函数,可以根据属性名获取到对应的属性值。当然,遍历name_cursor
和value_cursor
也可以得到属性名和属性值,但如果属性值有很多又知道属性名的情况下,可以使用此函数快速获取到属性值。
属性收集有一个重要的参数为GMarkupCollectType
,当设置G_MARKUP_COLLECT_OPTIONAL
时,参数为可选,即使找不到该参数,也不会报错,只会返回NULL。如果不设置该参数,则会报解析失败。
待解析xml内容:
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
</elements>
</xml>
解析程序:
源码见glib_examples\glib_markup\glib_markup_attributes
#include <glib.h>
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
char *value1 = NULL;
char *value2 = NULL;
g_print("(start)element_name: %s \n", element_name);
if (g_markup_collect_attributes (element_name,
attribute_names,
attribute_values,
error,
G_MARKUP_COLLECT_STRING | G_MARKUP_COLLECT_OPTIONAL, "attribute_name1", &value1,
G_MARKUP_COLLECT_STRING | G_MARKUP_COLLECT_OPTIONAL, "attribute_name2", &value2,
G_MARKUP_COLLECT_INVALID) ) {
g_print("value1: %s \n", value1);
g_print("value2: %s \n", value2);
} else {
g_print("no such attribute \n");
}
while (*name_cursor) {
g_print("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_print("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void passthrough( GMarkupParseContext *context,
const gchar *passthrough_text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_print("(passthrough)passthrough:%s \n", passthrough_text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = passthrough,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_collect_attributes.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_attributes]# ./glib_markup_attributes
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
</elements>
</xml>
(start)element_name: xml
value1: (null)
value2: (null)
(text)text:
(start)element_name: head
value1: (null)
value2: (null)
(end)element_name: head
(text)text:
(start)element_name: element_num
value1: (null)
value2: (null)
(text)text:2
(end)element_name: element_num
(text)text:
(start)element_name: elements
value1: (null)
value2: (null)
(text)text:
(start)element_name: element_name
value1: attribute_value1
value2: attribute_value2
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text
(end)element_name: element_name
(text)text:
(end)element_name: elements
(text)text:
(end)element_name: xml
使用g_markup_parse_context_get_position
函数可以获取到当前解析到的位置信息。
待解析xml内容:
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name1 attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name1>
<element_name2 attribute_name3="attribute_value3" attribute_name4="attribute_value4">text</element_name2>
</elements>
</xml>
解析程序:
源码见glib_examples\glib_markup\glib_markup_get_position
#include <glib.h>
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
gint line_number = 0;
gint char_number = 0;
g_print("(start)element_name: %s \n", element_name);
g_markup_parse_context_get_position(context, &line_number, &char_number);
g_print("(get_position)line_number:%d, char_number:%d \n", line_number, char_number);
while (*name_cursor) {
g_print("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_print("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void passthrough( GMarkupParseContext *context,
const gchar *passthrough_text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_print("(passthrough)passthrough:%s \n", passthrough_text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = passthrough,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_get_position.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 build]# ./glib_markup_get_position
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name1 attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name1>
<element_name2 attribute_name3="attribute_value3" attribute_name4="attribute_value4">text</element_name2>
</elements>
</xml>
(start)element_name: xml
(get_position)line_number:2, char_number:1
(text)text:
(start)element_name: head
(get_position)line_number:2, char_number:8
(end)element_name: head
(text)text:
(start)element_name: element_num
(get_position)line_number:3, char_number:15
(text)text:2
(end)element_name: element_num
(text)text:
(start)element_name: elements
(get_position)line_number:5, char_number:1
(text)text:
(start)element_name: element_name1
(get_position)line_number:5, char_number:87
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text
(end)element_name: element_name1
(text)text:
(start)element_name: element_name2
(get_position)line_number:6, char_number:87
(start)name_cursor:attribute_name3, value_cursor:attribute_value3
(start)name_cursor:attribute_name4, value_cursor:attribute_value4
(text)text:text
(end)element_name: element_name2
(text)text:
(end)element_name: elements
(text)text:
(end)element_name: xml
g_markup_parse_context_get_element
函数可以获取当前正在解析的标签。在start回调函数中,element_name本身就是正在处理的标签。
待解析xml内容:
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name1 attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name1>
<element_name2 attribute_name3="attribute_value3" attribute_name4="attribute_value4">text</element_name2>
</elements>
</xml>
解析程序:
源码见glib_examples\glib_markup\glib_markup_element
#include <glib.h>
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
gchar *element;
g_print("(start)element_name: %s \n", element_name);
element = (gchar *)g_markup_parse_context_get_element(context);
if(NULL != element) {
g_print("(get_element)get: %s \n", element);
}
while (*name_cursor) {
g_print("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_print("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void passthrough( GMarkupParseContext *context,
const gchar *passthrough_text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_print("(passthrough)passthrough:%s \n", passthrough_text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = passthrough,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_element.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf) ;
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_element]# ./glib_markup_element
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name1 attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name1>
<element_name2 attribute_name3="attribute_value3" attribute_name4="attribute_value4">text</element_name2>
</elements>
</xml>
(start)element_name: xml
(get_element)get: xml
(text)text:
(start)element_name: head
(get_element)get: head
(end)element_name: head
(text)text:
(start)element_name: element_num
(get_element)get: element_num
(text)text:2
(end)element_name: element_num
(text)text:
(start)element_name: elements
(get_element)get: elements
(text)text:
(start)element_name: element_name1
(get_element)get: element_name1
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text
(end)element_name: element_name1
(text)text:
(start)element_name: element_name2
(get_element)get: element_name2
(start)name_cursor:attribute_name3, value_cursor:attribute_value3
(start)name_cursor:attribute_name4, value_cursor:attribute_value4
(text)text:text
(end)element_name: element_name2
(text)text:
(end)element_name: elements
(text)text:
(end)element_name: xml
返回一个标签链表,该链表为当前标签及其所有的父标签,一直递归到根标签。
举例:<html><h1><text>aaa</text></h1></html>
,当从text
标签获取标签栈时,链表中的元素依次为text->h1->html
。
待解析xml内容:
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
<element_name attribute_name3="attribute_value3" attribute_name4="attribute_value4">text</element_name>
</elements>
</xml>
解析程序:
源码见glib_examples\glib_markup\glib_markup_element_stack
#include <glib.h>
static void _slist_print(gpointer data, gpointer user_data)
{
g_print("(stack)%s: %s \n", user_data, data);
return;
}
static void start( GMarkupParseContext *context,
const gchar *element_name,
const gchar **attribute_names,
const gchar **attribute_values,
gpointer user_data,
GError ** error )
{
const gchar **name_cursor = attribute_names;
const gchar **value_cursor = attribute_values;
GSList *stack;
g_print("(start)element_name: %s \n", element_name);
stack = (GSList *) g_markup_parse_context_get_element_stack (context);
if(g_slist_length(stack) > 0) {
g_slist_foreach(stack, _slist_print, (gpointer)element_name);
} else {
g_print("(stack)%s: no element stack \n", element_name);
}
while (*name_cursor) {
g_print("(start)name_cursor:%s, value_cursor:%s \n", *name_cursor, *value_cursor);
name_cursor++;
value_cursor++;
}
}
static void end( GMarkupParseContext *context,
const gchar *element_name,
gpointer user_data,
GError **error )
{
g_print("(end)element_name: %s \n", element_name);
}
static void text( GMarkupParseContext *context,
const gchar *text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_printf("(text)text:%s \n", text);
}
static void passthrough( GMarkupParseContext *context,
const gchar *passthrough_text,
gsize text_len,
gpointer user_data,
GError **error )
{
g_print("(passthrough)passthrough:%s \n", passthrough_text);
}
static void err(GMarkupParseContext *context,
GError *error,
gpointer user_data)
{
g_print("(err)parse xml failed: %s \n", error->message);
}
GMarkupParser parser = {
. start_element = start,
. end_element = end,
. text = text,
. passthrough = passthrough,
. error = err
};
int main( )
{
GMarkupParseContext *context;
gchar *buf = NULL;
gsize length = 0;
g_file_get_contents("xml_get_element_stack.xml" , &buf, &length, NULL);
g_print( "%s \n" , buf);
context = g_markup_parse_context_new(&parser, 0, NULL, NULL);
if(NULL == context) {
g_print("context new failed! \n");
return -1;
}
if(FALSE == g_markup_parse_context_parse(context, buf, length, NULL)) {
g_print("context parse failed! \n");
}
g_markup_parse_context_free(context);
return 0;
}
运行结果:
[root@centos7_6 glib_markup_element_stack]# ./glib_markup_element_stack
<xml>
<head/>
<element_num>2</element_num>
<elements>
<element_name attribute_name1="attribute_value1" attribute_name2="attribute_value2">text</element_name>
<element_name attribute_name3="attribute_value3" attribute_name4="attribute_value4">text</element_name>
</elements>
</xml>
(start)element_name: xml
(stack)xml: xml
(text)text:
(start)element_name: head
(stack)head: head
(stack)head: xml
(end)element_name: head
(text)text:
(start)element_name: element_num
(stack)element_num: element_num
(stack)element_num: xml
(text)text:2
(end)element_name: element_num
(text)text:
(start)element_name: elements
(stack)elements: elements
(stack)elements: xml
(text)text:
(start)element_name: element_name
(stack)element_name: element_name
(stack)element_name: elements
(stack)element_name: xml
(start)name_cursor:attribute_name1, value_cursor:attribute_value1
(start)name_cursor:attribute_name2, value_cursor:attribute_value2
(text)text:text
(end)element_name: element_name
(text)text:
(start)element_name: element_name
(stack)element_name: element_name
(stack)element_name: elements
(stack)element_name: xml
(start)name_cursor:attribute_name3, value_cursor:attribute_value3
(start)name_cursor:attribute_name4, value_cursor:attribute_value4
(text)text:text
(end)element_name: element_name
(text)text:
(end)element_name: elements
(text)text:
(end)element_name: xml
当xml内包含&、<、>、'、"
这几个字符时,xml无法解析,需要转换成相应的实体。
下面是五个在XML文件中预定义好的实体:
&--&
和<--<
小于号>-->
大于号'--'
单引号"--"
双引号