当前位置: 首页 > 工具软件 > GML4J > 使用案例 >

(GML2Pajek) GML到Pajek文件的转换,GML转net,GML转Pajek网络文件

封烈
2023-12-01

Pajek的网络输入数据格式为*.net,而今天想下的一些数据都是*.gml格式的。

http://www-personal.umich.edu/~mejn/netdata/ 

Mark Newman在提供这些开源的数据的同时,提供了他写的一个简单GML分析器的C源码。我修改了一点,加了个主函数用来生成*.net文件。

如果*.gml文件提供更多信息,如节点的分类等,也可以修改这个简单的GML2Pajek来增加更多内容。


如果需要读取*.gml中更多信息,需要增加的地方:

1. 数据结构

2. 对应GML分析器中的函数

3. 将NETWORK输出到Pajek支持格式的函数(可能还要打开新文件,如*.vec)


我修改了Mark Newman提供的分析器的一部分。但是我认为Mark Newman放出这个程序的时候一定是有经过测试的,我不知道为什么我不修改就不能用了(不能转换他主页上提供的gml格式数据)。可能和一些我不知道的原因有关。

同时由于Newman提供的部分数据带有结点的value,我对每个*.gml都会生成对应的*.vec文件。如果*.gml不带有value,这个*.vec是多余的,我没有做判断。

下面是源代码,Newman提供的gml parser我做了一些修改。


gml2pajek.c

// Program to read a network stored in a GML file into a NETWORK struct
//
// Yi-Shi Lin 12 AUG 03
//
// A simple parser in C that will read the files into a data structure is needed
// It is provided by Mark Newman:
//      http://www-personal.umich.edu/~mejn/netdata/readgml.zip
//
// If more infomation in *.gml is needed
// Please modify the data structure defined in network.h
// and the corresponding functions in readgml.c and gml2pajek.c

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "readgml.h"

#define FILENAMELEN 100

// Function to open *.gml and *.net

int open_file(FILE **gml_file, FILE **net_file, FILE **vec_file)
{
    char filename[FILENAMELEN];
    int len;
    printf("\ninput *.gml\n");
    scanf("%s", filename);
    len=strlen(filename);
    if (((*gml_file)=fopen(filename, "r"))==NULL)
    {
        printf("fail to open \"%s\"\n", filename);
        return 0;
    }
    printf("%s has been opened...\n", filename);
    filename[len-3]='n';
    filename[len-2]='e';
    filename[len-1]='t';
    if (((*net_file)=fopen(filename, "w"))==NULL)
    {
        printf("fail to open \"%s\"\n", filename);
        return 0;
    }
    filename[len-3]='v';
    filename[len-2]='e';
    filename[len-1]='c';
    if (((*vec_file)=fopen(filename, "w"))==NULL)
    {
        printf("fail to open \"%s\"\n", filename);
        return 0;
    }
    return 1;
}

// Function to close *.gml and *.net

void close_file(FILE *gml_file, FILE *net_file, FILE *vec_file)
{
    fclose(gml_file);
    fclose(net_file);
    fclose(vec_file);
}

// Function to write the network into *.net

void write_network(NETWORK *network, FILE *net_file, FILE *vec_file)
{
    int i,j;
    fprintf(net_file, "*Vertices %d\n", network->nvertices);
    for (i=0; i<network->nvertices; i++)
    {
        fprintf(net_file, "%d ", i+1);
        if (network->vertex[i].label)
            fprintf(net_file, "\"%s\"", network->vertex[i].label);
        fprintf(net_file, "\n");
    }
    fprintf(net_file, "*Edges\n");
    for (i=0; i<network->nvertices; i++)
    {
        for (j=0; j<network->vertex[i].degree; j++)
        {
            EDGE *tmp_edge=&(network->vertex[i].edge[j]);
            fprintf(net_file, "%d %d %.lf\n", i+1, tmp_edge->target+1, tmp_edge->weight);
        }
    }
    printf("*.net has been generated\n");
    fprintf(vec_file, "*Vertices %d\n", network->nvertices);
    for (i=0; i<network->nvertices; i++)
    {
        fprintf(vec_file, "%lf\n", network->vertex[i].value);
    }
    printf("*.vec has been generated\n");
}

int main()
{
    NETWORK network;
    FILE *gml_file, *net_file, *vec_file;

    while (1)
    {
        if (open_file(&gml_file, &net_file, &vec_file)==0)
        {
            continue;
        }
        if (read_network(&network, gml_file)!=0)
        {
            printf("fail to read the network!\n");
            continue;
        }
        write_network(&network, net_file, vec_file);
        close_file(gml_file, net_file, vec_file);
        free_network(&network);
    }

    return 0;
}


network.h

// Header file for VERTEX, EDGE, and NETWORK data structures
//
// Mark Newman  11 AUG 06

#ifndef _NETWORK_H
#define _NETWORK_H

typedef struct
{
    int target;        // Index in the vertex[] array of neighboring vertex.
    // (Note that this is not necessarily equal to the GML
    // ID of the neighbor if IDs are nonconsecutive or do
    // not start at zero.)
    double weight;     // Weight of edge.  1 if no weight is specified.
} EDGE;

typedef struct
{
    int id;            // GML ID number of vertex
    int degree;        // Degree of vertex (out-degree for directed nets)
    double value;      //  Value of vertex
    char *label;       // GML label of vertex.  NULL if no label specified
    EDGE *edge;        // Array of EDGE structs, one for each neighbor
} VERTEX;

typedef struct
{
    int nvertices;     // Number of vertices in network
    int directed;      // 1 = directed network, 0 = undirected
    char has_value;
    VERTEX *vertex;    // Array of VERTEX structs, one for each vertex
} NETWORK;

#endif


readgml.h

// Header file for the parser for GML files
//
// Mark Newman  11 AUG 06

#ifndef _READGML_H
#define _READGML_H

#include <stdio.h>
#include "network.h"

int read_network(NETWORK *network, FILE *stream);
void free_network(NETWORK *network);

#endif


readgml.c

// Functions to read a network stored in a GML file into a NETWORK struct
//
// Mark Newman  11 AUG 06
//
// To use this software, #include "readgml.h" at the head of your program
// and then call the following.
//
// Function calls:
//   int read_network(NETWORK *network, FILE *stream)
//     -- Reads a network from the FILE pointed to by "stream" into the
//        structure "network".  For the format of NETWORK structs see file
//        "network.h".  Returns 0 if read was successful.
//   void free_network(NETWORK *network)
//     -- Destroys a NETWORK struct again, freeing up the memory


// Inclusions

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#include "network.h"

// Constants

#define LINELENGTH 1000

// Types

typedef struct line
{
    char *str;
    struct line *ptr;
} LINE;

// Globals

LINE *first;
LINE *current;


// Function to read one line from a specified stream.  Return value is
// 1 if an EOF was encountered.  Otherwise 0.

int read_line(FILE *stream, char line[LINELENGTH])
{
    if (fgets(line,LINELENGTH,stream)==NULL) return 1;
    line[strlen(line)-1] = '\0';   // Erase the terminating NEWLINE
    return 0;
}


// Function to read in the whole file into a linked-list buffer, so that we
// can do several passes on it, as required to read the GML format
// efficiently

int fill_buffer(FILE *stream)
{
    int length;
    char line[LINELENGTH];
    LINE *previous;
    if (stream==NULL || read_line(stream,line)!=0)
    {
        first = NULL;// Indicates empty buffer
        return 1;
    }
    length = strlen(line) + 1;
    first = malloc(sizeof(LINE));
    first->str = malloc(length*sizeof(char));
    strcpy(first->str,line);

    previous = first;
    while (read_line(stream,line)==0)
    {
        length = strlen(line) + 1;
        previous->ptr = malloc(sizeof(LINE));
        previous = previous->ptr;
        previous->str = malloc(length*sizeof(char));
        strcpy(previous->str,line);
    }
    previous->ptr = NULL;          // Indicates last line
    return 0;
}


// Function to free up the buffer again

void free_buffer()
{
    LINE *thisptr;
    LINE *nextptr;

    thisptr = first;
    while (thisptr!=NULL)
    {
        nextptr = thisptr->ptr;
        free(thisptr->str);
        free(thisptr);
        thisptr = nextptr;
    }
}


// Function to reset to the start of the buffer again

void reset_buffer()
{
    current = first;
}


// Function to get the next line in the buffer.  Returns 0 if there was
// a line or 1 if we've reached the end of the buffer.

int next_line(char line[LINELENGTH])
{
    if (current==NULL) return 1;
    strcpy(line,current->str);
    current = current->ptr;
    return 0;
}



// Function to establish whether the network read from a given stream is
// directed or not.  Returns 1 for a directed network, and 0 otherwise.  If
// the GML file contains no "directed" line then the graph is assumed to be
// undirected, which is the GML default behavior.

int is_directed()
{
    int result=0;
    char *ptr;
    char line[LINELENGTH];

    reset_buffer();

    while (next_line(line)==0)
    {
        ptr = strstr(line,"directed");
        if (ptr==NULL) continue;
        sscanf(ptr,"directed %i",&result);
        break;
    }

    return result;
}


// Function to count the vertices in a GML file.  Returns number of vertices.

int count_vertices()
{
    int result=0;
    char *ptr;
    char line[LINELENGTH];

    reset_buffer();

    while (next_line(line)==0) {
        ptr = strstr(line,"node");
        if (ptr!=NULL) {
            ptr = strstr(line,"label \"");
            if (ptr==NULL) result++;
    }
  }

    return result;
}


// Function to compare the IDs of two vertices

int cmpid(VERTEX *v1p, VERTEX *v2p)
{
    if (v1p->id>v2p->id) return 1;
    if (v1p->id<v2p->id) return -1;
    return 0;
}


// Function to allocate space for a network structure stored in a GML file
// and determine the parameters (id, label) of each of the vertices.

void create_network(NETWORK *network)
{
    int i;
    int length;
    char *ptr;
    char *start,*stop;
    char line[LINELENGTH];
    char label[LINELENGTH];

    // Determine whether the network is directed

    network->directed = is_directed();
    // Count the vertices

    network->nvertices = count_vertices();
    // Make space for the vertices

    network->vertex = calloc(network->nvertices,sizeof(VERTEX));

    // Go through the file reading the details of each vertex one by one

    reset_buffer();

    for (i=0; i<network->nvertices; i++)
    {

        // Skip to next "node" entry

        do
        {
            next_line(line);
        }
        while (strstr(line,"node")==NULL);

        // Read in the details of this vertex

        // initial value
        network->vertex[i].value=0.0;

        do
        {

            // Look for ID

            ptr = strstr(line,"id");
            if (ptr!=NULL) sscanf(ptr,"id %i",&network->vertex[i].id);
            // Look for label

            ptr = (strstr(line,"label"));
            if (ptr!=NULL)
            {
                start = strchr(line,'"');
                if (start==NULL)
                {
                    sscanf(ptr,"label %s",&label);
                }
                else
                {
                    stop = strchr(++start,'"');
                    if (stop==NULL) length = strlen(line) - (start-line);
                    else length = stop - start;
                    strncpy(label,start,length);
                    label[length] = '\0';
                    network->vertex[i].label = malloc((length+1)*sizeof(char));
                    strcpy(network->vertex[i].label,label);
                }
            }

            // Look for value

            ptr = (strstr(line, "value"));
            if (ptr!=NULL)
            {
                sscanf(ptr, "value %lf", &network->vertex[i].value);
            }

            // If we see a closing square bracket we are done

            if (strstr(line,"]")!=NULL) break;

        }
        while (next_line(line)==0);

    }
    // Sort the vertices in increasing order of their IDs so we can find them
    // quickly later

    qsort(network->vertex,network->nvertices,sizeof(VERTEX),(void*)cmpid);
}


// Function to find a vertex with a specified ID using binary search.
// Returns the element in the vertex[] array holding the vertex in question,
// or -1 if no vertex was found.

int find_vertex(int id, NETWORK *network)
{
    int top,bottom,split;
    int idsplit;

    top = network->nvertices;
    if (top<1) return -1;
    bottom = 0;
    split = top/2;

    do
    {
        idsplit = network->vertex[split].id;
        if (id>idsplit)
        {
            bottom = split + 1;
            split = (top+bottom)/2;
        }
        else if (id<idsplit)
        {
            top = split;
            split = (top+bottom)/2;
        }
        else return split;
    }
    while (top>bottom);

    return -1;
}


// Function to determine the degrees of all the vertices by going through
// the edge data

void get_degrees(NETWORK *network)
{
    int s,t;
    int vs,vt;
    char *ptr;
    char line[LINELENGTH];

    reset_buffer();

    while (next_line(line)==0)
    {

        // Find the next edge entry

        ptr = strstr(line,"edge");
        if (ptr==NULL) continue;

        // Read the source and target of the edge

        s = t = -1;

        do
        {

            ptr = strstr(line,"source");
            if (ptr!=NULL) sscanf(ptr,"source %i",&s);
            ptr = strstr(line,"target");
            if (ptr!=NULL) sscanf(ptr,"target %i",&t);

            // If we see a closing square bracket we are done

            if (strstr(line,"]")!=NULL) break;

        }
        while (next_line(line)==0);

        // Increment the degrees of the appropriate vertex or vertices

        if ((s>=0)&&(t>=0))
        {
            vs = find_vertex(s,network);
            network->vertex[vs].degree++;
            if (network->directed==0)
            {
                vt = find_vertex(t,network);
                network->vertex[vt].degree++;
            }
        }

    }

    return;
}


// Function to read in the edges

void read_edges(NETWORK *network)
{
    int i;
    int s,t;
    int vs,vt;
    int *count;
    double w;
    char *ptr;
    char line[LINELENGTH];

    // Malloc space for the edges and temporary space for the edge counts
    // at each vertex

    for (i=0; i<network->nvertices; i++)
    {
        network->vertex[i].edge = malloc(network->vertex[i].degree*sizeof(EDGE));
    }
    count = calloc(network->nvertices,sizeof(int));

    // Read in the data

    reset_buffer();

    while (next_line(line)==0)
    {

        // Find the next edge entry

        ptr = strstr(line,"edge");
        if (ptr==NULL) continue;

        // Read the source and target of the edge and the edge weight

        s = t = -1;
        w = 1.0;

        do
        {

            ptr = strstr(line,"source");
            if (ptr!=NULL) sscanf(ptr,"source %i",&s);
            ptr = strstr(line,"target");
            if (ptr!=NULL) sscanf(ptr,"target %i",&t);
            ptr = strstr(line,"value");
            if (ptr!=NULL) sscanf(ptr,"value %lf",&w);

            // If we see a closing square bracket we are done

            if (strstr(line,"]")!=NULL) break;

        }
        while (next_line(line)==0);

        // Add these edges to the appropriate vertices

        if ((s>=0)&&(t>=0))
        {
            vs = find_vertex(s,network);
            vt = find_vertex(t,network);
            network->vertex[vs].edge[count[vs]].target = vt;
            network->vertex[vs].edge[count[vs]].weight = w;
            count[vs]++;
            if (network->directed==0)
            {
                network->vertex[vt].edge[count[vt]].target = vs;
                network->vertex[vt].edge[count[vt]].weight = w;
                count[vt]++;
            }
        }

    }

    free(count);
    return;
}


// Function to read a complete network

int read_network(NETWORK *network, FILE *stream)
{
    fill_buffer(stream);
    create_network(network);
    get_degrees(network);
    read_edges(network);
    free_buffer();
    return 0;
}


// Function to free the memory used by a network again

void free_network(NETWORK *network)
{
    int i;

    for (i=0; i<network->nvertices; i++)
    {
        free(network->vertex[i].edge);
        free(network->vertex[i].label);
    }
    free(network->vertex);
}


 类似资料: