已有一个数据库 myjql 的框架,现在在这个框架中写一棵组织在文件里的 B+ 树,以此实现数据库基本功能。
数据库文件要可持久化存储,也就说退出数据库后会留下数据库文件,再次打开时数据不会丢失。
数据库中每个条目由 16 个字节组成:A(int) + B(12字节字符串)。
按照 B 为关键字建立索引。
要求支持如下操作:
select
: 列出所有的元组select s
: 列出所有 B 为 s 的元组insert i s
: 插入一个 A = i
, B = s
的条目delete s
: 删除所有 B 为 s 的条目.exit
: 退出数据库测试集:大约有 6e6 级别的条目。
时间限制:180 s
内存限制:6 MB
硬盘空间限制:无
注意:不能把索引存在内存里,否则一定超出内存空间限制。
B+树的条目存在叶子结点上,因此文件里可以只存放一棵B+树。可以按照结点为单位在文件中存储B+树。只要知道每个结点在文件中的首位置,就可以用fseek
函数在文件中定位结点并且读取,因此我们用一个整数类型来表示一个结点在文件中的首位置,把这个整数称为node_ptr
类。
除了结点,为了方便管理文件信息,还需要一些元数据。
typedef unsigned int node_ptr;
struct FILE_INFO{
node_ptr root;
unsigned int lenth;
unsigned int entry_num;
};
root
:存下B+树的根结点
lenth
:B+树文件的总长度,可以据此在文件内分配新的结点
entry_num
:树内条目总数
新建结点并且返回起始位置的代码:
node_ptr New_NODE()
{
node_ptr ret = finfo.lenth;
finfo.lenth += sizeof(struct NODE);
return ret;
}
struct NODE{
node_ptr lsibling,rsibling;
int is_leaf;
int key_number;
char data[IDX_SIZE];
};
lsibling,rsibling
:左右邻居结点,用来进行合并、迁移,对于叶子结点可以用来有序遍历。
is_leaf
:标记该结点是否是叶子结点。
key_number
:用来表示结点现存的键值的数量。
data
:char数组,用来存放键值和指针(或对于叶子来说,条目的A值)。4 个字节放一个整数,接着 12 字节放一个字符串,这样无论是叶子结点上条目的A值+B值,还是非叶子结点上的子节点号+键值,都可以用 16个连续的char类型来表示。
因为一个页空间的大小是 4096 字节,因此我将一个结点直接设置为一个页的大小,并且根据计算,一个B+树的结点大概最多能存放252个条目(或键值与子结点号),也就是说这棵树的度(Degree)大概在 126 ,也就是说除了根结点,所有结点应该是 126~252 叉的,应当具有至少50%的占有率。
定义了一些宏来表示这些大小信息:
#define PAGE_SIZE 4096
#define NODE_SIZE 4096
#define DEGREE ((NODE_SIZE-16-16-4)/32)
#define IDX_SIZE (NODE_SIZE-16)
myjql
需要能够读取识别上面构造的数据库文件,并且在增删元素的时候修改文件,退出的时候留下文件。
void open_file(const char* filename) {
/* open file */
if(!(fp = fopen(filename,"r+")))//如果文件不存在,新建一个文件,初始化元信息和B+树
{
fp = fopen(filename,"w+");
finfo.entry_num=0;
finfo.lenth=sizeof(struct FILE_INFO);
finfo.root=New_NODE();
struct NODE node;//建立一个结点,并且插入一条哨兵条目
memset(&node,0,sizeof(node));
node.is_leaf=1;
node.key_number=1;
np2char(0,node.data);
memcpy(node.data+4,"\0\0\0\0\0\0\0\0\0\0\0",12);
write_node(finfo.root,&node);
}
else//如果存在,直接把元数据读出来
{
fseek(fp,0,SEEK_SET);
fread(&finfo,sizeof(struct FILE_INFO),1,fp);
}
}
void exit_nicely(int code) {
/* do clean work */
fseek(fp,0,SEEK_SET);
fwrite(&finfo,sizeof(struct FILE_INFO),1,fp);
exit(code);
}
struct NODE node;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
//从文件把以n为起始位置的结点读到node
void write_node(node_ptr n,struct NODE* node)
{
fseek(fp,n,SEEK_SET);
fwrite(node,sizeof(struct NODE),1,fp);
}
在这个项目中,要求键值相同的条目按照时间后先排序,也就是说相同键值,后到者排在前面。这个看似需要用时间戳维护,但其实不然。当我们插入一个条目 (A,B)
的时候,我们认为此时B+树中所有小于 B
的值都比 B
小,而B+树中所有大于等于 B
的值都比当前插入的这个条目大。这样自然而然,插入条目虽然没有存时间戳的信息,但天然按照时间戳的顺序进行了排序,不需要维护额外的时间戳信息,可谓大道至简。
递归到叶子进行插入,每递归一层,从文件里把对应结点的信息读入到内存。
如果当前结点满出,则分裂结点,回溯的时候在父亲结点插入新分裂出来的子节点。
结束当前层的时候要注意写回内存。
注意各种细节即可。
node_ptr tree_insert(node_ptr n,char * retkey)
{
assert(n);
struct NODE node;
char newkey[12];
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
//printf("\n node:%d key_number:%d\n",n,node.key_number);
int i;
for(i = 0;i < node.key_number;i++ )
{
if(strlessequal(statement.row.b,node.data+(i*16+4)))break;
}
node_ptr newchildentry;
if(!node.is_leaf)newchildentry = tree_insert(char2np(node.data+(i*16)),newkey);
else {newchildentry = statement.row.a;memcpy(newkey,statement.row.b,12);}
//printf("\nchildentry:%d\n",newchildentry);
if(!node.is_leaf && !newchildentry)return 0;
if(!node.is_leaf)i++;
for(int j=node.key_number;j>=i;j--)
memcpy(node.data+((j+1)*16),node.data+(j*16),16);
if(!node.is_leaf)
{
memcpy(node.data+(i*16+4),node.data+(i*16-12),12);
memcpy(node.data+(i*16-12),newkey,12);
np2char(newchildentry,node.data+(i*16));
}
else
{
np2char(newchildentry,node.data+(i*16));
memcpy(node.data+(i*16+4),newkey,12);
}
node.key_number++;
if(node.key_number <= 2*DEGREE)
{
write_node(n,&node);
return 0;
}
else
{
struct NODE new_node;
memset(&new_node,0,sizeof(new_node));
node_ptr new_id = New_NODE();
if(node.rsibling)
{
struct NODE rnode;
memset(&rnode,0,sizeof(rnode));
fseek(fp,node.rsibling,SEEK_SET);
fread(&rnode,sizeof(struct NODE),1,fp);
rnode.lsibling = new_id;
write_node(node.rsibling,&rnode);
}
new_node.rsibling = node.rsibling;
new_node.is_leaf = node.is_leaf;
new_node.lsibling = n;
node.rsibling = new_id;
if(!node.is_leaf)
{
memcpy(new_node.data,node.data+((DEGREE+1)*16),DEGREE*16+4);
memset(node.data+((DEGREE+1)*16),0,DEGREE*16+4);
new_node.key_number = node.key_number = DEGREE;
memcpy(retkey,node.data+(DEGREE*16+4),12);//UNCLEAR
//printf("\nretkey:%s\n",retkey);
}
else
{
memcpy(new_node.data,node.data+((DEGREE)*16),(DEGREE+1)*16);
new_node.key_number = DEGREE + 1;
node.key_number = DEGREE;
memcpy(retkey,new_node.data+4,12);//UNCLEAR
}
write_node(n,&node);
write_node(new_id,&new_node);
if(n==finfo.root)
{
struct NODE new_root;
memset(&new_root,0,sizeof(new_root));
int root_id = New_NODE();
finfo.root = root_id;
new_root.is_leaf = 0;
new_root.key_number = 1;
new_root.lsibling = new_root.rsibling = 0;
np2char(n,new_root.data);
memcpy(new_root.data+4,retkey,12);
np2char(new_id,new_root.data+16);
write_node(root_id,&new_root);
}
return new_id;
}
}
递归到叶子进行删除,每递归一层,从文件里把对应结点的信息读入到内存。
如果删除之后当前结点占有率低于50%,从左右兄弟选一个平均分配一下所存条目,如果两者条目之和能够用一个结点存储,就合并结点。回溯的时候在父亲结点删除合并消去的子节点。
返回是否找到了要删的目标结点。
注意各种细节即可。(长码警告)
int upd;
char updkey[12];
int tree_delete(node_ptr fa,struct NODE *fa_node,node_ptr n,uint32_t loc,node_ptr *childentry)
{
struct NODE node;
node_ptr oldchildenty=0;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
int i;
for(i = 0;i < node.key_number;i++ )
{
if(strless(statement.row.b,node.data+(i*16+4)))break;
}
int exist=0;
if(!node.is_leaf)exist = tree_delete(n,&node,char2np(node.data+(i*16)),i,&oldchildenty);
else exist = i && !strcmp(statement.row.b,node.data+(i*16-12));
//if(statement.row.b[0]=='z')printf("i:%d key_num:%d exist:%d node:%d\n",i,node.key_number,exist,n);
if(upd)
{
if(fa && loc)
{
memcpy(fa_node->data+(loc*16-12),updkey,12);
upd=0;
write_node(fa,fa_node);
}
}
if(!node.is_leaf && !oldchildenty){*childentry = 0;return exist;}
if(node.is_leaf && !exist){*childentry = 0;return exist;}
node.key_number--;
if(oldchildenty)i=oldchildenty;
if(!i)
{
if(fa && loc)
{
//printf("George node:%d key:%s fakey:%s\n",n,statement.row.b,fa_node->data+(loc*16-12));
memcpy(fa_node->data+(loc*16-12),node.data+4,12);
write_node(fa,fa_node);
}
for(int j=i;j<=node.key_number;j++)
memcpy(node.data+(j*16),node.data+((j+1)*16),16);
}
else
{
for(int j=i;j<=node.key_number;j++)
if(!node.is_leaf) memcpy(node.data+(j*16-12),node.data+((j+1)*16-12),16);
else memcpy(node.data+(j*16-16),node.data+((j+1)*16-16),16);
if(node.is_leaf && i==1)
{
if(fa && loc)
{
//printf("George node:%d key:%s fakey:%s\n",n,statement.row.b,fa_node->data+(loc*16-12));
memcpy(fa_node->data+(loc*16-12),node.data+4,12);
//printf("George node:%d key:%s fakey:%s\n",n,statement.row.b,fa_node->data+(loc*16-12));
write_node(fa,fa_node);
}
if(fa && !loc)
{
upd=1;
memcpy(updkey,node.data+4,12);
}
}
}
//不想写左sibling的合并了,太麻烦了QAQ
//算了还是写吧……
if(fa && fa_node->key_number!=loc)//有右兄弟
{
if(node.key_number < DEGREE)
{
//printf("move!\n");
struct NODE sb_node;
memset(&sb_node,0,sizeof(sb_node));
fseek(fp,node.rsibling,SEEK_SET);
fread(&sb_node,sizeof(struct NODE),1,fp);
if(node.key_number+sb_node.key_number < 2*DEGREE)//合并
{
*childentry = loc + 1;
if(!node.is_leaf)
{
memcpy(node.data+(node.key_number*16+4),fa_node->data+(loc*16+4),12);
node.key_number++;
}
for(int j=0;j<=sb_node.key_number;j++)
memcpy(node.data+((node.key_number+j)*16),sb_node.data+(j*16),16);
node.key_number += sb_node.key_number;
if(sb_node.rsibling)//维护链表
{
struct NODE sbsb_node;
memset(&sbsb_node,0,sizeof(sbsb_node));
fseek(fp,sb_node.rsibling,SEEK_SET);
fread(&sbsb_node,sizeof(struct NODE),1,fp);
sbsb_node.lsibling = n;
write_node(sb_node.rsibling,&sbsb_node);
}//懒得处理被删掉的空间了
node.rsibling = sb_node.rsibling;
write_node(n,&node);
return exist;
}
else //均分
{
if(!node.is_leaf)
{
int move_num=0;
while(sb_node.key_number - node.key_number > 1)
{
memcpy(node.data+(node.key_number*16+4),fa_node->data+(loc*16+4),12);
node.key_number++;
memcpy(node.data+(node.key_number*16),sb_node.data+(move_num*16),4);
memcpy(fa_node->data+(loc*16+4),sb_node.data+(move_num*16+4),12);
sb_node.key_number--;
move_num++;
}
for(int j=0;j<=sb_node.key_number;j++)
memcpy(sb_node.data+(j*16),sb_node.data+((move_num+j)*16),16);
}
else
{
int move_num=0;
while(sb_node.key_number - node.key_number > 1)
{
memcpy(node.data+(node.key_number*16),sb_node.data+(move_num*16),16);
node.key_number++;
sb_node.key_number--;
move_num++;
}
for(int j=0;j<=sb_node.key_number;j++)
memcpy(sb_node.data+(j*16),sb_node.data+((move_num+j)*16),16);
memcpy(fa_node->data+(loc*16+4),sb_node.data+4,12);
}
write_node(n,&node);
write_node(node.rsibling,&sb_node);
write_node(fa,fa_node);
return exist;
}
}
}
else if(fa && loc)//有左兄弟
{
if(node.key_number < DEGREE)
{
//printf("move!\n");
struct NODE sb_node;
memset(&sb_node,0,sizeof(sb_node));
fseek(fp,node.lsibling,SEEK_SET);
fread(&sb_node,sizeof(struct NODE),1,fp);
if(node.key_number+sb_node.key_number < 2*DEGREE)//合并
{
*childentry = loc;
if(!node.is_leaf)
{
memcpy(sb_node.data+(sb_node.key_number*16+4),fa_node->data+(loc*16-12),12);
sb_node.key_number++;
}
for(int j=0;j<=node.key_number;j++)
memcpy(sb_node.data+((sb_node.key_number+j)*16),node.data+(j*16),16);
sb_node.key_number += node.key_number;
//printf("node.l:%d node:%d node.r:%d ll:%d\n",node.lsibling,n,node.rsibling,sb_node.lsibling);
if(node.rsibling)//维护链表
{
struct NODE r_node;
memset(&r_node,0,sizeof(r_node));
fseek(fp,node.rsibling,SEEK_SET);
fread(&r_node,sizeof(struct NODE),1,fp);
r_node.lsibling = node.lsibling;
write_node(node.rsibling,&r_node);
}//懒得处理被删掉的空间了
sb_node.rsibling = node.rsibling;
write_node(node.lsibling,&sb_node);
return exist;
}
else //均分
{
if(!node.is_leaf)
{
int move_num=(sb_node.key_number - node.key_number)/2;
for(int j=node.key_number;j>=0;j--)
memcpy(node.data+((j+move_num)*16),node.data+(j*16),16);
while(move_num)
{
move_num--;
memcpy(node.data+(move_num*16+4),fa_node->data+(loc*16-12),12);
memcpy(node.data+(move_num*16),sb_node.data+(sb_node.key_number*16),4);
node.key_number++;
sb_node.key_number--;
memcpy(fa_node->data+(loc*16-12),sb_node.data+(sb_node.key_number*16+4),12);
}
}
else
{
int move_num=(sb_node.key_number - node.key_number)/2;
for(int j=node.key_number;j>=0;j--)
memcpy(node.data+((j+move_num)*16),node.data+(j*16),16);
while(move_num)
{
move_num--;
sb_node.key_number--;
memcpy(node.data+(move_num*16),sb_node.data+(sb_node.key_number*16),16);
node.key_number++;
}
memcpy(fa_node->data+(loc*16-12),node.data+4,12);
}
write_node(n,&node);
write_node(node.lsibling,&sb_node);
write_node(fa,fa_node);
return exist;
}
}
}
if(node.key_number == 0)
{
finfo.root=char2np(node.data);
return exist;
}//可以用哨兵避免整个树空的发生。
write_node(n,&node);
return exist;
}
void tree_search(node_ptr n,node_ptr *nid,unsigned int *offset)
{
struct NODE node;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
//printf("sn:%d\n",n);
int i;
for(i = 0;i < node.key_number;i++ )
{
if(strlessequal(statement.row.b,node.data+(i*16+4)))break;
}
//printf(" i=%d\n",i);
if(!node.is_leaf)
{
tree_search(char2np(node.data+(i*16)),nid,offset);
}
else
{
if(i==node.key_number)
{
*nid=node.rsibling;
*offset=0;
}
else
{
*nid=n;
*offset=i;
}
}
}
因为要删除所有键值B
为s
的条目,可以多次去删除知道找不到键值为B
的条目为止。一次操作的复杂度可能很大,但是总复杂度等价于插入操作的总复杂度。
void b_tree_delete() {
/* delete row(s) */
struct NODE tmp;
node_ptr tmp2;
while(tree_delete(0,&tmp,finfo.root,0,&tmp2))
{
finfo.entry_num--;
}
}
先在B+树找到键值 B
的起点,然后在叶子上遍历输出即可。
void b_tree_search() {
/* print selected rows */
node_ptr n;
unsigned int offset;
tree_search(finfo.root,&n,&offset);
int empty=1;
struct NODE node;
fseek(fp,n,SEEK_SET);
if(n)fread(&node,sizeof(struct NODE),1,fp);
while(n && !strcmp(statement.row.b,node.data+(offset*16+4)))
{
empty=0;
printf("(%d, %s)\n",char2np(node.data+(offset*16)),node.data+(offset*16+4));
offset++;
if(offset==node.key_number)
{
n=node.rsibling;
if(!n)break;
offset=0;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
}
}
if(empty)
{
printf("(Empty)\n");
}
}
本次项目实现了B+树为基础的数据库,在本次项目中我加深了对B+树结构的认识,了解到了B+树的应用场景,亲手编写并调试来实现了一棵B+树的基本操作。
深刻认识到B+树不同于我们在内存运算中常用的数据结构,这是专门针对IO的低速性质和IO一次读取一个页的特性而设计的数据结构。
(才不是因为是ACM选手所以喜欢把所有代码写在一个文件里)
(这么做不是方便助教老师编译验收吗? )
/* You may refer to: https://cstack.github.io/db_tutorial/ */
/* Compile: gcc -o myjql myjql.c -O3 */
/* Test: /usr/bin/time -v ./myjql myjql.db < in.txt > out.txt */
/* Compare: diff out.txt ans.txt */
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <assert.h>
#define PAGE_SIZE 4096
#define NODE_SIZE 4096
#define DEGREE ((NODE_SIZE-16-16-4)/32)
#define IDX_SIZE (NODE_SIZE-16)
typedef unsigned int node_ptr;
struct FILE_INFO{
node_ptr root;
unsigned int lenth;
unsigned int entry_num;
}finfo;
struct NODE{
node_ptr lsibling,rsibling;
int is_leaf;
int key_number;
char data[IDX_SIZE];
};
/* shell IO */
#define INPUT_BUFFER_SIZE 31
struct {
char buffer[INPUT_BUFFER_SIZE + 1];
size_t length;
} input_buffer;
typedef enum {
INPUT_SUCCESS,
INPUT_TOO_LONG
} InputResult;
void print_prompt() { printf("myjql> "); }
InputResult read_input() {
/* we read the entire line as the input */
input_buffer.length = 0;
while (input_buffer.length <= INPUT_BUFFER_SIZE
&& (input_buffer.buffer[input_buffer.length++] = getchar()) != '\n'
&& input_buffer.buffer[input_buffer.length - 1] != EOF);
if (input_buffer.buffer[input_buffer.length - 1] == EOF)
exit(EXIT_SUCCESS);
input_buffer.length--;
/* if the last character is not new-line, the input is considered too long,
the remaining characters are discarded */
if (input_buffer.length == INPUT_BUFFER_SIZE
&& input_buffer.buffer[input_buffer.length] != '\n') {
while (getchar() != '\n');
return INPUT_TOO_LONG;
}
input_buffer.buffer[input_buffer.length] = 0;
return INPUT_SUCCESS;
}
FILE *fp;
node_ptr New_NODE();
void np2char(node_ptr,char*);
void write_node(unsigned int,struct NODE *);
void open_file(const char* filename) {
/* open file */
if(!(fp = fopen(filename,"r+")))
{
fp = fopen(filename,"w+");
finfo.entry_num=0;
finfo.lenth=sizeof(struct FILE_INFO);
finfo.root=New_NODE();
struct NODE node;
memset(&node,0,sizeof(node));
node.is_leaf=1;
node.key_number=1;
np2char(0,node.data);
memcpy(node.data+4,"\0\0\0\0\0\0\0\0\0\0\0",12);
write_node(finfo.root,&node);
}
else
{
fseek(fp,0,SEEK_SET);
fread(&finfo,sizeof(struct FILE_INFO),1,fp);
}
}
void exit_nicely(int code) {
/* do clean work */
fseek(fp,0,SEEK_SET);
fwrite(&finfo,sizeof(struct FILE_INFO),1,fp);
exit(code);
}
void exit_success() {
printf("bye~\n");
exit_nicely(EXIT_SUCCESS);
}
/* specialization of data structure */
#define COLUMN_B_SIZE 11
typedef struct {
uint32_t a;
char b[COLUMN_B_SIZE + 1];
} Row;
void print_row(Row* row) {
printf("(%d, %s)\n", row->a, row->b);
}
/* statement */
typedef enum {
STATEMENT_INSERT,
STATEMENT_SELECT,
STATEMENT_DELETE
} StatementType;
struct {
StatementType type;
Row row;
uint8_t flag; /* whether row.a, row.b have valid values */
} statement;
/* B-Tree operations */
/* B+Tree*/
int strless(char *a,char *b)
{
return strcmp(a,b)<0;
}
int strlessequal(char *a,char *b)
{
return strcmp(a,b)<=0;
}
node_ptr char2np(char *s)
{
node_ptr ret=0;
ret |= 0xff & s[3];ret <<= 8;
ret |= 0xff & s[2];ret <<= 8;
ret |= 0xff & s[1];ret <<= 8;
ret |= 0xff & s[0];
return ret;
}
void np2char(node_ptr x,char *s)
{
s[0] = x & 0xff; x >>= 8;
s[1] = x & 0xff; x >>= 8;
s[2] = x & 0xff; x >>= 8;
s[3] = x & 0xff;
}
node_ptr New_NODE()
{
node_ptr ret = finfo.lenth;
finfo.lenth += sizeof(struct NODE);
return ret;
}
void write_node(node_ptr n,struct NODE* node)
{
fseek(fp,n,SEEK_SET);
fwrite(node,sizeof(struct NODE),1,fp);
}
node_ptr tree_insert(node_ptr n,char * retkey)
{
assert(n);
struct NODE node;
char newkey[12];
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
//printf("\n node:%d key_number:%d\n",n,node.key_number);
int i;
for(i = 0;i < node.key_number;i++ )
{
if(strlessequal(statement.row.b,node.data+(i*16+4)))break;
}
node_ptr newchildentry;
if(!node.is_leaf)newchildentry = tree_insert(char2np(node.data+(i*16)),newkey);
else {newchildentry = statement.row.a;memcpy(newkey,statement.row.b,12);}
//printf("\nchildentry:%d\n",newchildentry);
if(!node.is_leaf && !newchildentry)return 0;
if(!node.is_leaf)i++;
for(int j=node.key_number;j>=i;j--)
memcpy(node.data+((j+1)*16),node.data+(j*16),16);
if(!node.is_leaf)
{
memcpy(node.data+(i*16+4),node.data+(i*16-12),12);
memcpy(node.data+(i*16-12),newkey,12);
np2char(newchildentry,node.data+(i*16));
}
else
{
np2char(newchildentry,node.data+(i*16));
memcpy(node.data+(i*16+4),newkey,12);
}
node.key_number++;
if(node.key_number <= 2*DEGREE)
{
write_node(n,&node);
return 0;
}
else
{
struct NODE new_node;
memset(&new_node,0,sizeof(new_node));
node_ptr new_id = New_NODE();
if(node.rsibling)
{
struct NODE rnode;
memset(&rnode,0,sizeof(rnode));
fseek(fp,node.rsibling,SEEK_SET);
fread(&rnode,sizeof(struct NODE),1,fp);
rnode.lsibling = new_id;
write_node(node.rsibling,&rnode);
}
new_node.rsibling = node.rsibling;
new_node.is_leaf = node.is_leaf;
new_node.lsibling = n;
node.rsibling = new_id;
if(!node.is_leaf)
{
memcpy(new_node.data,node.data+((DEGREE+1)*16),DEGREE*16+4);
memset(node.data+((DEGREE+1)*16),0,DEGREE*16+4);
new_node.key_number = node.key_number = DEGREE;
memcpy(retkey,node.data+(DEGREE*16+4),12);//UNCLEAR
//printf("\nretkey:%s\n",retkey);
}
else
{
memcpy(new_node.data,node.data+((DEGREE)*16),(DEGREE+1)*16);
new_node.key_number = DEGREE + 1;
node.key_number = DEGREE;
memcpy(retkey,new_node.data+4,12);//UNCLEAR
}
write_node(n,&node);
write_node(new_id,&new_node);
if(n==finfo.root)
{
struct NODE new_root;
memset(&new_root,0,sizeof(new_root));
int root_id = New_NODE();
finfo.root = root_id;
new_root.is_leaf = 0;
new_root.key_number = 1;
new_root.lsibling = new_root.rsibling = 0;
np2char(n,new_root.data);
memcpy(new_root.data+4,retkey,12);
np2char(new_id,new_root.data+16);
write_node(root_id,&new_root);
}
return new_id;
}
}
int upd;
char updkey[12];
int tree_delete(node_ptr fa,struct NODE *fa_node,node_ptr n,uint32_t loc,node_ptr *childentry)
{
struct NODE node;
node_ptr oldchildenty=0;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
int i;
for(i = 0;i < node.key_number;i++ )
{
if(strless(statement.row.b,node.data+(i*16+4)))break;
}
int exist=0;
if(!node.is_leaf)exist = tree_delete(n,&node,char2np(node.data+(i*16)),i,&oldchildenty);
else exist = i && !strcmp(statement.row.b,node.data+(i*16-12));
//if(statement.row.b[0]=='z')printf("i:%d key_num:%d exist:%d node:%d\n",i,node.key_number,exist,n);
if(upd)
{
if(fa && loc)
{
memcpy(fa_node->data+(loc*16-12),updkey,12);
upd=0;
write_node(fa,fa_node);
}
}
if(!node.is_leaf && !oldchildenty){*childentry = 0;return exist;}
if(node.is_leaf && !exist){*childentry = 0;return exist;}
node.key_number--;
if(oldchildenty)i=oldchildenty;
if(!i)
{
if(fa && loc)
{
//printf("George node:%d key:%s fakey:%s\n",n,statement.row.b,fa_node->data+(loc*16-12));
memcpy(fa_node->data+(loc*16-12),node.data+4,12);
write_node(fa,fa_node);
}
for(int j=i;j<=node.key_number;j++)
memcpy(node.data+(j*16),node.data+((j+1)*16),16);
}
else
{
for(int j=i;j<=node.key_number;j++)
if(!node.is_leaf) memcpy(node.data+(j*16-12),node.data+((j+1)*16-12),16);
else memcpy(node.data+(j*16-16),node.data+((j+1)*16-16),16);
if(node.is_leaf && i==1)
{
if(fa && loc)
{
//printf("George node:%d key:%s fakey:%s\n",n,statement.row.b,fa_node->data+(loc*16-12));
memcpy(fa_node->data+(loc*16-12),node.data+4,12);
//printf("George node:%d key:%s fakey:%s\n",n,statement.row.b,fa_node->data+(loc*16-12));
write_node(fa,fa_node);
}
if(fa && !loc)
{
upd=1;
memcpy(updkey,node.data+4,12);
}
}
}
//不想写左sibling的合并了,太麻烦了QAQ
//算了还是写吧……
if(fa && fa_node->key_number!=loc)//有右兄弟
{
if(node.key_number < DEGREE)
{
//printf("move!\n");
struct NODE sb_node;
memset(&sb_node,0,sizeof(sb_node));
fseek(fp,node.rsibling,SEEK_SET);
fread(&sb_node,sizeof(struct NODE),1,fp);
if(node.key_number+sb_node.key_number < 2*DEGREE)//合并
{
*childentry = loc + 1;
if(!node.is_leaf)
{
memcpy(node.data+(node.key_number*16+4),fa_node->data+(loc*16+4),12);
node.key_number++;
}
for(int j=0;j<=sb_node.key_number;j++)
memcpy(node.data+((node.key_number+j)*16),sb_node.data+(j*16),16);
node.key_number += sb_node.key_number;
if(sb_node.rsibling)//维护链表
{
struct NODE sbsb_node;
memset(&sbsb_node,0,sizeof(sbsb_node));
fseek(fp,sb_node.rsibling,SEEK_SET);
fread(&sbsb_node,sizeof(struct NODE),1,fp);
sbsb_node.lsibling = n;
write_node(sb_node.rsibling,&sbsb_node);
}//懒得处理被删掉的空间了
node.rsibling = sb_node.rsibling;
write_node(n,&node);
return exist;
}
else //均分
{
if(!node.is_leaf)
{
int move_num=0;
while(sb_node.key_number - node.key_number > 1)
{
memcpy(node.data+(node.key_number*16+4),fa_node->data+(loc*16+4),12);
node.key_number++;
memcpy(node.data+(node.key_number*16),sb_node.data+(move_num*16),4);
memcpy(fa_node->data+(loc*16+4),sb_node.data+(move_num*16+4),12);
sb_node.key_number--;
move_num++;
}
for(int j=0;j<=sb_node.key_number;j++)
memcpy(sb_node.data+(j*16),sb_node.data+((move_num+j)*16),16);
}
else
{
int move_num=0;
while(sb_node.key_number - node.key_number > 1)
{
memcpy(node.data+(node.key_number*16),sb_node.data+(move_num*16),16);
node.key_number++;
sb_node.key_number--;
move_num++;
}
for(int j=0;j<=sb_node.key_number;j++)
memcpy(sb_node.data+(j*16),sb_node.data+((move_num+j)*16),16);
memcpy(fa_node->data+(loc*16+4),sb_node.data+4,12);
}
write_node(n,&node);
write_node(node.rsibling,&sb_node);
write_node(fa,fa_node);
return exist;
}
}
}
else if(fa && loc)//有左兄弟
{
if(node.key_number < DEGREE)
{
//printf("move!\n");
struct NODE sb_node;
memset(&sb_node,0,sizeof(sb_node));
fseek(fp,node.lsibling,SEEK_SET);
fread(&sb_node,sizeof(struct NODE),1,fp);
if(node.key_number+sb_node.key_number < 2*DEGREE)//合并
{
*childentry = loc;
if(!node.is_leaf)
{
memcpy(sb_node.data+(sb_node.key_number*16+4),fa_node->data+(loc*16-12),12);
sb_node.key_number++;
}
for(int j=0;j<=node.key_number;j++)
memcpy(sb_node.data+((sb_node.key_number+j)*16),node.data+(j*16),16);
sb_node.key_number += node.key_number;
//printf("node.l:%d node:%d node.r:%d ll:%d\n",node.lsibling,n,node.rsibling,sb_node.lsibling);
if(node.rsibling)//维护链表
{
struct NODE r_node;
memset(&r_node,0,sizeof(r_node));
fseek(fp,node.rsibling,SEEK_SET);
fread(&r_node,sizeof(struct NODE),1,fp);
r_node.lsibling = node.lsibling;
write_node(node.rsibling,&r_node);
}//懒得处理被删掉的空间了
sb_node.rsibling = node.rsibling;
write_node(node.lsibling,&sb_node);
return exist;
}
else //均分
{
if(!node.is_leaf)
{
int move_num=(sb_node.key_number - node.key_number)/2;
for(int j=node.key_number;j>=0;j--)
memcpy(node.data+((j+move_num)*16),node.data+(j*16),16);
while(move_num)
{
move_num--;
memcpy(node.data+(move_num*16+4),fa_node->data+(loc*16-12),12);
memcpy(node.data+(move_num*16),sb_node.data+(sb_node.key_number*16),4);
node.key_number++;
sb_node.key_number--;
memcpy(fa_node->data+(loc*16-12),sb_node.data+(sb_node.key_number*16+4),12);
}
}
else
{
int move_num=(sb_node.key_number - node.key_number)/2;
for(int j=node.key_number;j>=0;j--)
memcpy(node.data+((j+move_num)*16),node.data+(j*16),16);
while(move_num)
{
move_num--;
sb_node.key_number--;
memcpy(node.data+(move_num*16),sb_node.data+(sb_node.key_number*16),16);
node.key_number++;
}
memcpy(fa_node->data+(loc*16-12),node.data+4,12);
}
write_node(n,&node);
write_node(node.lsibling,&sb_node);
write_node(fa,fa_node);
return exist;
}
}
}
if(node.key_number == 0)
{
finfo.root=char2np(node.data);
return exist;
}//可以用哨兵避免整个树空的发生。
write_node(n,&node);
return exist;
}
void tree_search(node_ptr n,node_ptr *nid,unsigned int *offset)
{
struct NODE node;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
//printf("sn:%d\n",n);
int i;
for(i = 0;i < node.key_number;i++ )
{
if(strlessequal(statement.row.b,node.data+(i*16+4)))break;
}
//printf(" i=%d\n",i);
if(!node.is_leaf)
{
tree_search(char2np(node.data+(i*16)),nid,offset);
}
else
{
if(i==node.key_number)
{
*nid=node.rsibling;
*offset=0;
}
else
{
*nid=n;
*offset=i;
}
}
}
/* the key to select is stored in `statement.row.b` */
void b_tree_search() {
/* print selected rows */
node_ptr n;
unsigned int offset;
tree_search(finfo.root,&n,&offset);
int empty=1;
struct NODE node;
fseek(fp,n,SEEK_SET);
if(n)fread(&node,sizeof(struct NODE),1,fp);
while(n && !strcmp(statement.row.b,node.data+(offset*16+4)))
{
empty=0;
printf("(%d, %s)\n",char2np(node.data+(offset*16)),node.data+(offset*16+4));
offset++;
if(offset==node.key_number)
{
n=node.rsibling;
if(!n)break;
offset=0;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
}
}
if(empty)
{
printf("(Empty)\n");
}
//printf("[INFO] select: %s\n", statement.row.b);
}
/* the row to insert is stored in `statement.row` */
void b_tree_insert() {
/* insert a row */
char tmp[12];
tree_insert(finfo.root,tmp);
//printf("[INFO] insert: ");
//print_row(&statement.row);
finfo.entry_num++;
}
/* the key to delete is stored in `statement.row.b` */
void b_tree_delete() {
/* delete row(s) */
struct NODE tmp;
node_ptr tmp2;
while(tree_delete(0,&tmp,finfo.root,0,&tmp2))
{
//if(statement.row.b[0]=='z')printf("delete_ok\n");
finfo.entry_num--;
}
//("[INFO] delete: %s\n", statement.row.b);
}
void b_tree_traverse() {
/* print all rows */
memcpy(statement.row.b,"0\0\0\0\0\0\0\0\0\0\0",12);
node_ptr n;
unsigned int offset;
tree_search(finfo.root,&n,&offset);
int empty=1;
struct NODE node;
fseek(fp,n,SEEK_SET);
if(n)fread(&node,sizeof(struct NODE),1,fp);
//printf("George::: root:%d id:%d off:%d keynum:%d ls:%d\n",finfo.root,n,offset,node.key_number,node.lsibling);
while(n && offset!=node.key_number)
{
empty=0;
//printf("%d\n",n);
printf("(%d, %s)\n",char2np(node.data+(offset*16)),node.data+(offset*16+4));
offset++;
if(offset==node.key_number)
{
n=node.rsibling;
if(!n)break;
offset=0;
fseek(fp,n,SEEK_SET);
fread(&node,sizeof(struct NODE),1,fp);
}
}
//printf("num:%d\n",finfo.entry_num);
if(empty)
{
printf("(Empty)\n");
}
//printf("[INFO] traverse\n");
}
/* logic starts */
typedef enum {
EXECUTE_SUCCESS
} ExecuteResult;
typedef enum {
META_COMMAND_SUCCESS,
META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;
typedef enum {
PREPARE_SUCCESS,
PREPARE_NEGATIVE_VALUE,
PREPARE_STRING_TOO_LONG,
PREPARE_SYNTAX_ERROR,
PREPARE_UNRECOGNIZED_STATEMENT,
PREPARE_EMPTY_STATEMENT
} PrepareResult;
MetaCommandResult do_meta_command() {
if (strcmp(input_buffer.buffer, ".exit") == 0) {
exit(EXIT_SUCCESS);
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
PrepareResult prepare_insert() {
statement.type = STATEMENT_INSERT;
char* keyword = strtok(input_buffer.buffer, " ");
char* a = strtok(NULL, " ");
char* b = strtok(NULL, " ");
int x;
if (a == NULL || b == NULL)
return PREPARE_SYNTAX_ERROR;
x = atoi(a);
if (x < 0)
return PREPARE_NEGATIVE_VALUE;
if (strlen(b) > COLUMN_B_SIZE)
return PREPARE_STRING_TOO_LONG;
statement.row.a = x;
strcpy(statement.row.b, b);
return PREPARE_SUCCESS;
}
PrepareResult prepare_condition() {
statement.flag = 0;
char* keyword = strtok(input_buffer.buffer, " ");
char* b = strtok(NULL, " ");
char* c = strtok(NULL, " ");
if (b == NULL) return PREPARE_SUCCESS;
if (c != NULL) return PREPARE_SYNTAX_ERROR;
if (strlen(b) > COLUMN_B_SIZE)
return PREPARE_STRING_TOO_LONG;
strcpy(statement.row.b, b);
statement.flag |= 2;
return PREPARE_SUCCESS;
}
PrepareResult prepare_select() {
statement.type = STATEMENT_SELECT;
return prepare_condition();
}
PrepareResult prepare_delete() {
statement.type = STATEMENT_DELETE;
PrepareResult result = prepare_condition();
if (result == PREPARE_SUCCESS && statement.flag == 0)
return PREPARE_SYNTAX_ERROR;
return result;
}
PrepareResult prepare_statement() {
if (strlen(input_buffer.buffer) == 0) {
return PREPARE_EMPTY_STATEMENT;
} else if (strncmp(input_buffer.buffer, "insert", 6) == 0) {
return prepare_insert();
} else if (strncmp(input_buffer.buffer, "select", 6) == 0) {
return prepare_select();
} else if (strncmp(input_buffer.buffer, "delete", 6) == 0) {
return prepare_delete();
}
return PREPARE_UNRECOGNIZED_STATEMENT;
}
ExecuteResult execute_select() {
printf("\n");
if (statement.flag == 0) {
b_tree_traverse();
} else {
b_tree_search();
}
return EXECUTE_SUCCESS;
}
ExecuteResult execute_statement() {
switch (statement.type) {
case STATEMENT_INSERT:
b_tree_insert();
return EXECUTE_SUCCESS;
case STATEMENT_SELECT:
return execute_select();
case STATEMENT_DELETE:
b_tree_delete();
return EXECUTE_SUCCESS;
}
}
void sigint_handler(int signum) {
printf("\n");
exit(EXIT_SUCCESS);
}
int main(int argc, char* argv[]) {
if (argc < 2) {
printf("Must supply a database filename.\n");
exit(EXIT_FAILURE);
}
atexit(&exit_success);
signal(SIGINT, &sigint_handler);
open_file(argv[1]);
while (1) {
print_prompt();
switch (read_input()) {
case INPUT_SUCCESS:
break;
case INPUT_TOO_LONG:
printf("Input is too long.\n");
continue;
}
if (input_buffer.buffer[0] == '.') {
switch (do_meta_command()) {
case META_COMMAND_SUCCESS:
continue;
case META_COMMAND_UNRECOGNIZED_COMMAND:
printf("Unrecognized command '%s'.\n", input_buffer.buffer);
continue;
}
}
switch (prepare_statement()) {
case PREPARE_SUCCESS:
break;
case PREPARE_EMPTY_STATEMENT:
continue;
case PREPARE_NEGATIVE_VALUE:
printf("Column `a` must be positive.\n");
continue;
case PREPARE_STRING_TOO_LONG:
printf("String for column `b` is too long.\n");
continue;
case PREPARE_SYNTAX_ERROR:
printf("Syntax error. Could not parse statement.\n");
continue;
case PREPARE_UNRECOGNIZED_STATEMENT:
printf("Unrecognized keyword at start of '%s'.\n",
input_buffer.buffer);
continue;
}
switch (execute_statement()) {
case EXECUTE_SUCCESS:
printf("\nExecuted.\n\n");
break;
}
//printf("%d\n",finfo.entry_num);
}
}