B+树实现程序

6 月 14th, 2010 | Posted by | Filed under 程序设计

本文内容遵从CC版权协议, 可以随意转载, 但必须以超链接形式标明文章原始出处和作者信息及版权声明
网址: http://www.penglixun.com/tech/program/b_plus_tree_program.html

在我的另一篇文章数据库算法与数据结构系列——B树相关中我介绍了B树的结构,PPT上的东西很理论,这里补充一下演示的程序和B+树实现的代码。

下面是从网上找的一个B-树的图形演示程序。

  B-Tree_Demo.zip (68.6 KiB, 3,093 hits)

下面是用C++实现B+树的代码,没做什么优化,全部都是操作文件,所以相对较慢。

/*
操作说明
1.插入操作,"i 关键字" -- 若存在,则提示;若不存在,则插入关键字。 
2.删除操作,"d 关键字"。
3.输出B+树操作,"p 1或2" -- 两种输出模式,模式1输出整个树结构,模式2按从小到大顺序输出所有关键字。
4.测试,"t 1或2" -- 1测试插入,2测试查找
*/

#include 
#include 
#include 
#include 
#include 
#include 
#include 

using namespace std;

/*计时器*/
class Timer {
public :
    //构造函数
    Timer ();
    //析构函数
    ~Timer ();
    //开始计时
    void begin();
    //计时结束
    void end();
    //获取时间,ms
    double get_time();
private :
    clock_t start, finish;
    double time;
};

Timer::Timer () {
    start = 0;
    finish = 0;
}

Timer::~Timer () {
    start = 0;
    finish = 0;
}

void Timer::begin () {
    start = clock();
}

void Timer::end () {
    finish = clock();
}

double Timer::get_time() {
    time = (double)(finish-start)/CLOCKS_PER_SEC*1000;
    return time;
}


// 定义B+树的阶数
#define M 4
// B+树结点定义
struct BPlusNode
{
    int  amount;      // 该结点中已保存的键值个数
    long key[M];      // 键值数组
    long children[M]; // 子结点位置数组
    long father;      // 父结点位置
    long left;        // 同一层中的左结点位置
    long right;       // 同一层中的右结点位置
    bool isactive;    // 有效位
    bool isleave;     // 标志该结点是否叶结点
};
// 函数头定义
bool findkey   (long key, long &position); // 查找包含给定键值的结点位置
bool insertkey (long key, long position);  // 插入一个新键值
bool deletekey (long key, long position);  // 删除给定的键值
void printkey  (long mode);                // 按照给定模式输出B+树
void testBtree (int type) ;    //测试B树
// 全局变量
static long pre;             // 上一个被访问的结点
static long cur;             // 当前指向的结点
static long root;            // 根结点位置
static long smallest;        // 保存最小关键值的叶结点
static long nodenewposition; // 新插入的结点位置

// 主函数
int main() {
    Timer     timer;
    BPlusNode node;
    char    command;
    fstream     iofile;
    long keynumber;
    long position = -1;
    
    // 检测索引文件是否存在,若不存在则创建一个初始化的索引文件,其中包含根结点位置,最小键值位置和一个空结点
    fstream infile ("BPlusTreeData.dat", ios::binary|ios::in);
    if (!infile) {    
        node.amount = 1;//如果不存在,则创建根结点,且阶数M=4
        for (int i = 0; i < M; ++i) {
            node.key[i] = 0;
            node.children[i] = 0;
        }
        node.father = 0;
        node.left = 0;
        node.right = 0;
        node.isactive = true;
        node.isleave = true;
        root = 8;//root为什么=8?
        smallest = 8;
        fstream outfile ("BPlusTreeData.dat", ios::binary|ios::out);//输出文件BPlusTreeData.dat
        outfile.seekp(0, ios::beg);//指针移到begin处,文件开头
        outfile.write((char *)&root, sizeof(long));//输出根结点
        outfile.write((char *)&smallest, sizeof(long));//输出最小关键字
        outfile.write((char *)&node, sizeof(BPlusNode));//输出头指针?
        outfile.close();
    }
    infile.close();
    // 循环获取命令行,执行给定的命令
    while (true) {
        cin >> command >> keynumber;
        // 插入一个新的键值,该值不能与已有关键值重复
        switch (command) {
        case 'i' :
            timer.begin();
            if (findkey(keynumber, position)) {
                cout << keynumber << " is already in this B+ tree" << endl;
            } else if (insertkey(keynumber, position)) {
                timer.end();
                cout << "Successful inserted in " << timer.get_time() << " ms" << endl;
            } else 
                cout << "Action falled" << endl;
            break;
        case 'd' :  // 删除给定的键值
            if (deletekey(keynumber, position))
               cout << "Successful deleted" << endl;
            break;
        case 'p' :
            // 按照指定模式输出B+数
            // 模式“1”输出整棵树结构
            // 模式“2”按照从小到大的顺序输出所有键值
            timer.begin();
            printkey(keynumber);
            timer.end();
            cout << "Printed in " << timer.get_time() << " ms" << endl;
            break;
        case 't':
            timer.begin();
            testBtree(keynumber);
            timer.end();
            cout << "Test in " << timer.get_time() << " ms" << endl;
            break;
        case 'e':
            return 0;
        default :
            cout << "Please make sure the command is correct" << endl;
            continue;
        }
    }
}

// 查找包含给定键值的结点位置,若找到则返回“true”
// “position”保存最后访问的结点位置
bool findkey (long key, long &position) {
    BPlusNode node;
    long point;
    fstream iofile ("BPlusTreeData.dat", ios::binary|ios::in|ios::out);
    iofile.seekp(0, ios::beg);
    iofile.read((char *)&root, sizeof(long));
    iofile.seekp(root, ios::beg);
    while (true) {
        cur = iofile.tellp();
        iofile.read((char *)&node, sizeof(BPlusNode));
        if(!node.isactive) 
            continue;

        // B+树只在叶结点保存记录信息
        // 所以查找必须执行到叶结点
        if(!node.isleave) { // 如果该结点不是叶结点,则根据键值决定访问哪个子结点
            for (int i = 1; i < node.amount; ++i) {
                point = -1;
                if (node.key[i] > key) {
                    point = node.children[i-1];
                    break;
                }
            }
            if (point == -1) 
                point = node.children[node.amount-1];
            iofile.seekp(point, ios::beg);
            pre = cur;
        } else { // 如果该结点是叶结点,则顺序访问结点中的键值
            for (int i = 1; i < node.amount; ++i) {
                if (node.key[i] == key) {
                    position = cur;
                    iofile.close();
                    return true;
                }
            }
            position = cur;
            iofile.close();
            return false;
        }
    }
}

// 按照给定格式输出B+数
void printkey (long mode) {
    BPlusNode node;
    int i = 1, k = 1;
    fstream iofile("BPlusTreeData.dat", ios::binary|ios::in|ios::out);

    // 从根结点开始广度遍历,输出整棵B+树结构
    if (mode == 1) {
        iofile.seekp(0, ios::beg);
        iofile.read((char *)&root, sizeof(long));
        iofile.seekp(root, ios::beg);
        do {
            cur = iofile.tellp();
            cout << "level " << k << ":";
            do {
                iofile.read((char *)&node, sizeof(BPlusNode));
                cout << "     node " << i << ": ";
                for (int j = 1; j < node.amount; ++j)
                    cout << node.key[j] << " ";
                if (node.right == 0) {
                    i = 1;
                    cout << endl;
                    break;
                }
                iofile.seekp(node.right, ios::beg);
                ++i;
            } while(true);
            iofile.seekp(cur, ios::beg);
            iofile.read((char *)&node, sizeof(BPlusNode));
            if (node.children[0] == 0)
                break;
            iofile.seekp(node.children[0], ios::beg);
            ++k;
        } while (true);
       iofile.close();
    } else if (mode == 2) { // 从包含最小键值的叶结点开始按照从小到大的顺序输出所有键值
        iofile.seekp(4, ios::beg);
        iofile.read((char *)&smallest, sizeof(long));
        iofile.seekp(smallest, ios::beg);
        do {
            iofile.read((char *)&node, sizeof(BPlusNode));
            for (int l = 1; l < node.amount; ++l)
                cout << node.key[l] << " ";
            if (node.right == 0) {
                cout << endl;
                break;
            }
            iofile.seekp(node.right, ios::beg);
        } while(true);
        iofile.close();
     }
}

// 在位于“position”的结点中插入一个新键值“key”
// 按照B+树的规则,根据情况分裂结点 
bool insertkey (long key, long position) {
    fstream iofile;
    iofile.open("BPlusTreeData.dat", ios::binary|ios::in|ios::out);
    BPlusNode node;
    BPlusNode nodenew;
    BPlusNode nodetemp1, nodetemp2;
    long keytemp[M];
    long childrentemp[M+1];
    iofile.seekp(0, ios::end);
    long posEnd = iofile.tellp();
    //根节点分裂之后新建根节点
    if (position == 0) {
        iofile.seekp(0, ios::beg);
        iofile.read((char *)&root, sizeof(long));
        iofile.seekp(root, ios::beg);
        iofile.read((char *)&node, sizeof(BPlusNode));
        nodenew.amount = 2;
        nodenew.key[1] = key;
        nodenew.children[0] = root;
        nodenew.children[1] = nodenewposition;
        nodenew.father = 0;
        nodenew.left = 0;
        nodenew.right = 0;
        nodenew.isactive = true;
        nodenew.isleave = false;
        iofile.seekp(8, ios::beg);
        do {
            cur = iofile.tellp();
            iofile.read((char *)&nodetemp2, sizeof(BPlusNode));
        } while(nodetemp2.isactive && iofile.tellp() < posEnd);
        if (nodetemp2.isactive) {
            nodenewposition = iofile.tellp();
            iofile.write((char *)&nodenew, sizeof(BPlusNode));
        } else {
            iofile.seekp(cur, ios::beg);
            nodenewposition = cur;
            iofile.write((char *)&nodenew, sizeof(BPlusNode));
        }
        root = nodenewposition;
        iofile.seekp(0, ios::beg);
        iofile.write((char *)&root, sizeof(long));
        for (int i = 0; i <= 1; ++i) {
            iofile.seekp(nodenew.children[i], ios::beg);
            iofile.read((char *)&nodetemp1, sizeof(BPlusNode));
            nodetemp1.father = nodenewposition;
            iofile.seekp(nodenew.children[i], ios::beg);
            iofile.write((char *)&nodetemp1, sizeof(BPlusNode));
        }
        iofile.close();
        return true;
    } else { // 没有分裂到根结点
        int insertposition = 0;
        iofile.seekp(position, ios::beg);
        iofile.read((char *)&node, sizeof(BPlusNode));
        if (node.amount < M) { // 结点中还有空位保存新插入的键值,不需要分裂
            // 按照从小到大的顺序重新排列原有键值和新插入的键值
            bool issort1 = false;
            for (int i = 1; i < node.amount; ++i) {
                if (node.key[i] > key) {
                    for (int j = node.amount - i, k = 0; j > 0; --j, ++k)
                        node.key[node.amount-k] = node.key[node.amount-k-1];
                    node.key[i] = key;
                    insertposition = i;
                    issort1 = true;
                    break;
                }
            }
            if (!issort1) {
                node.key[node.amount] = key;
                insertposition = node.amount;
            }
            node.amount++;
            if (!node.isleave) {
                for (int p = node.amount-1; p > insertposition; --p)
                    node.children[p] = node.children[p-1];
                node.children[insertposition] = nodenewposition;
            }
            iofile.seekp(position, ios::beg);
            iofile.write((char *)&node, sizeof(BPlusNode));
            iofile.close();
            return true;
        } else { // 结点中没有空位保存新插入的键值,必须分裂成两个结点
            long nextinsertkey = 0;
            // 按照从小到大的顺序重新排列原有键值和新插入的键值
            // 并且按照键值的顺序重新排列保存子结点位置的数组
            bool issort2 = false;
            for (int m = 1, n = 0, o = 0; m < M; ++m, ++o) {
                if (node.key[m] < key || issort2) {
                    keytemp[m-1+n] = node.key[m];
                    childrentemp[o+n] = node.children[o];
                } else {
                    keytemp[m-1] = key;
                    childrentemp[o] = node.children[o];
                    childrentemp[o+1] = nodenewposition;
                    n = 1;
                    --m;
                    issort2 = true;
                }
            }
            if (!issort2) {
                keytemp[M-1] = key;
                childrentemp[M-1] = node.children[M-1];
                childrentemp[M] = nodenewposition;
            }
            node.amount = M/2+1;
            if (!node.isleave) { // 按照内部结点的方式创建新结点
                nodenew.amount = M/2;
                for (int i = 0, j = 1; i <= M; ++i) {
                    if (i < M/2) {
                        node.key[i+1] = keytemp[i];
                        node.children[i] = childrentemp[i];
                    } else if (i == M/2) {
                        nextinsertkey = keytemp[i];
                        node.children[i] = childrentemp[i];
                    } else if (i < M) {
                        nodenew.key[j] = keytemp[i];
                        nodenew.children[j-1] = childrentemp[i];
                        node.key[i] = 0;
                        ++j;
                    } else if (i == M) { 
                        nodenew.children[j-1] = childrentemp[i];
                    }
                }
                nodenew.isleave = false;
            } else { // 按照叶结点的方式创建新结点
                nodenew.amount = M/2+1;
                for (int i = 0, j = 1; i < M; ++i) {
                    if (i < M/2) {
                        node.key[i+1] = keytemp[i];
                    } else {
                        nodenew.key[j] = keytemp[i];
                        if (i < M-1)
                            node.key[i+1] = 0;
                        ++j;
                    }
                }
                nextinsertkey = nodenew.key[1];
                nodenew.isleave = true;
                for (int n = 0; n < M; ++n)
                    nodenew.children[n] = 0;
            }
            nodenew.key[0] = 0;
            nodenew.father = node.father;
            nodenew.left = position;
            nodenew.right = node.right;
            nodenew.isactive = true;
            // 查找新结点的插入位置
            // 若索引文件中存在一个曾经被删除的结点,则用新结点覆盖掉这个结点
            // 若不存在这样的结点,则将新结点添加到索引文件尾部
            iofile.seekp(8, ios::beg);
            do{
                cur = iofile.tellp();
                iofile.read((char *)&nodetemp2, sizeof(BPlusNode));
            } while(nodetemp2.isactive && iofile.tellp() < posEnd);
            if (nodetemp2.isactive) {
                nodenewposition = iofile.tellp();
                iofile.write((char *)&nodenew, sizeof(BPlusNode));
            } else {
                iofile.seekp(cur, ios::beg);
                nodenewposition = cur;
                iofile.write((char *)&nodenew, sizeof(BPlusNode));
            }
            node.right = nodenewposition;
            iofile.seekp(position, ios::beg);
            iofile.write((char *)&node, sizeof(BPlusNode));
            iofile.close();
            if (insertkey(nextinsertkey, nodenew.father))  // 递归调用插入算法将分裂后需要插入到父结点的键值插入到父结点中
                return true;
            else 
                return false;
        }
    }
}

// 删除给定的键值
// 该算法不符合B+树的删除规则
// 只是简单地将除被删除键值外的其它键值重新插入一遍
bool deletekey (long key, long position) {
    fstream iofile;
    iofile.open("BPlusTreeData.dat", ios::binary|ios::in|ios::out);
    BPlusNode node;
    long *keynumbertemp = new long[1000];
    long number = 0;
    long posEnd;
    iofile.seekp(0, ios::end);
    posEnd = iofile.tellp();
    iofile.seekp(4, ios::beg);
    iofile.read((char *)&smallest, sizeof(long));
    iofile.seekp(smallest, ios::beg);
    do {
        iofile.read((char *)&node, sizeof(BPlusNode));
        for (int i = 1; i < node.amount; ++i) {
            keynumbertemp[number] = node.key[i];
            ++number;
        }
        if (node.right == 0) {
            --number;
            break;
        }
        iofile.seekp(node.right, ios::beg);
    } while(true);
    node.amount = 1;
    for (int j = 0; j < M; ++j) {
        node.key[j] = 0;
        node.children[j] = 0;
    }
    node.father = 0;
    node.left = 0;
    node.right = 0;
    node.isactive = true;
    node.isleave = true;
    root = 8;
    smallest = 8;
    iofile.seekp(0, ios::beg);
    iofile.write((char *)&root, sizeof(long));
    iofile.write((char *)&smallest, sizeof(long));
    iofile.write((char *)&node, sizeof(BPlusNode));
    for (;iofile.tellp() < posEnd;){
        iofile.read((char *)&node, sizeof(BPlusNode));
        node.isactive = false;
        iofile.seekp(-long(sizeof(BPlusNode)), ios::cur);
        iofile.write((char *)&node, sizeof(BPlusNode));
    }
    iofile.close();
    for (int k = 0; k <= number; ++k) {
        if (keynumbertemp[k] == key)
            continue;
        findkey(keynumbertemp[k], position);
        insertkey(keynumbertemp[k], position);
    }
    return true;
}

// 测试模块
void testBtree (int type) {
    srand(time(0));
    long keynumber, position;
    switch (type) {
    case 1:
        for (int i = 0; i < 5000; ++i) {
            if (i % 1000 == 0) {
                cout << "Insert " << i << " Nodes" << endl;
            }
            keynumber = rand() % LONG_MAX;
            if (! findkey(keynumber, position)) {
                insertkey(keynumber, position);
            }
        }
        break;
    case 2:
        for (int i = 1; i <= 10000; ++i) {
            if (i % 1000 == 0) {
                cout << "Select " << i << " Nodes" << endl;
            }
            keynumber = rand() % LONG_MAX;
            findkey(keynumber, position);
        }
        break;
    }
}

我的测试结果:
plx@plinux-Laptop:~/Dropbox/Code$ ./btree
t 1
Insert 0 Nodes
Insert 1000 Nodes
Insert 2000 Nodes
Insert 3000 Nodes
Insert 4000 Nodes
Test in 40290 ms
t 2
Select 1000 Nodes
Select 2000 Nodes
Select 3000 Nodes
Select 4000 Nodes
Select 5000 Nodes
Select 6000 Nodes
Select 7000 Nodes
Select 8000 Nodes
Select 9000 Nodes
Select 10000 Nodes
Test in 540 ms

查找速度还是非常快的,如果全部载入内存操作,估计1/100的时间

标签: , ,
  1. jackmore
    5 月 12th, 201117:22

    LZ,是用32位机子跑的吗?

    root = 8;//root为什么=8?
    是不是因为32位机子? 因为两个long(一个root,一个smallest),和起来占用8个字节。
    所以root从第8字节开始。

    [回复]

  2. cm
    6 月 1st, 201115:36

    这个b+树有问题,在叶子节点不是顺序递增的

    [回复]