跳跃表的实现和测试
本文内容遵从CC版权协议, 可以随意转载, 但必须以超链接形式标明文章原始出处和作者信息及版权声明网址: http://www.penglixun.com/tech/program/skip_list_leveldb.html
LevelDB中一个核心的数据结构就是跳跃表,它是一个类似单向链表的结构但增加了多层指针进行跳跃,可以获得近似平衡树的效率,但是代码远远没有AVL等平衡二叉树实现复杂,所以尽管理论上跳跃表不是一个好算法,但是实现简单令他很多地方都很实用。
这面是一个跳跃表的结构。
这是实现代码和测试代码,非常简单,相比平衡树那是简单了多了去了。
发现一些内存泄露和内存越界,补上fix。
#include
#include
#include
#include
#include
#include
#include
#define TIME(A,B) (double)(B-A)/CLOCKS_PER_SEC*1000
/* Skip List Level Limit, Level begin wth 0 */
#define MAX_SKIP_LEVEL 6
/* Level Weight for Calc node level */
#define LEVEL_W 6
/* Skip List Struct */
typedef struct skip_list_struct skip_list_t;
struct skip_list_struct {
int key;
int value;
int level;
/* Level[i] have (i+1) pointer to */
skip_list_t* forward[MAX_SKIP_LEVEL];
};
/* Rand Node Level */
int rand_level()
{
int level= 0;
int i= 0;
/* Node which level more bigger, more less */
for(; i< MAX_SKIP_LEVEL-1; ++i)
//for(; i< MAX_SKIP_LEVEL; ++i) 这里不对,可能生成超过MAX_SKIP_LEVEL的数
{
level+= rand()%10> LEVEL_W? 1: 0;
}
return level;
}
/* Make a new node and init it */
skip_list_t* init_skip_list_node (int level, int key, int value)
{
skip_list_t* node= (skip_list_t *)malloc(sizeof(skip_list_t));
node->level= level;
node->key= key;
node->value= value;
int i= 0;
for (; i< MAX_SKIP_LEVEL; ++i)
{
node->forward[i]= NULL;
}
return node;
}
/* Insert or Update a value on Skip List */
int skip_list_write (skip_list_t* skip_list, int key, int value)
{
skip_list_t* update_node[MAX_SKIP_LEVEL];
skip_list_t* node= skip_list;
int i= skip_list->level;
for(; i>=0; --i)
{
while (node->forward[i]!= NULL &&
key> node->forward[i]->key)
{
node= node->forward[i];
}
update_node[i]= node;
}
node= node->forward[0]== NULL? node: node->forward[0];
if (key== node->key)
{
node->value= value;
}
else
{
int level= rand_level();
node= init_skip_list_node(level, key, value);
for (i= 0; i<= level; ++i)
{
node->forward[i]= update_node[i]->forward[i];
update_node[i]->forward[i]= node;
}
}
}
/* Delete a node on Skip List */
int skip_list_delete (skip_list_t* skip_list, int key)
{
skip_list_t* update_node[MAX_SKIP_LEVEL];
skip_list_t* node= skip_list;
int i= skip_list->level;
for(; i>=0; --i)
{
while (node->forward[i]!= NULL &&
key> node->forward[i]->key)
{
node= node->forward[i];
}
update_node[i]= node;
}
node= node->forward[0]== NULL? node: node->forward[0];
if (key== node->key)
{
for (i= 0; i<= skip_list->level; ++i)
{
if (update_node[i]->forward[i] != node)
{
break;
}
update_node[i]->forward[i]= node->forward[i];
}
free(node);
return 0; // SUCCESS
}
else
{
return 1; // NO FOUND
}
}
/* Search a key from Skip List */
int skip_list_search (skip_list_t* skip_list, int key)
{
skip_list_t* node= skip_list;
int level= node->level;
int i= level;
for (; i>= 0; --i)
{
while (node->forward[i]!= NULL &&
key> node->forward[i]->key)
{
node= node->forward[i];
}
}
node= node->forward[0]== NULL? node: node->forward[0];
if (key== node->key)
{
return node->value;
}
else
{
return INT_MIN;
}
}
int print_skip_list (skip_list_t* skip_list)
{
skip_list_t* node;
int i= 0;
for(; i< MAX_SKIP_LEVEL; ++i)
{
node= skip_list->forward[i];
printf("Level[%d]: ", i);
while(node!= NULL)
{
printf("%d -> ", node->key);
node= node->forward[i];
}
printf("NULL\n");
}
}
// 增加释放内存的操作,避免内存泄露
/* Free All Nodes */
int free_skip_list (skip_list_t* skip_list) {
skip_list_t* node= skip_list->forward[0];
skip_list_t* next_node;
while (node!= NULL)
{
next_node= node->forward[0];
free(node);
node= next_node;
}
free(skip_list);
}
int main(int argc, char *argv[])
{
srand((unsigned)time(0));
int count= 0;
int i= 0;
/* Function Test */
printf("#### Function Test ####\n");
count= 20;
printf("== Init Skip List ==\n");
skip_list_t* skip_list= init_skip_list_node(MAX_SKIP_LEVEL-1, INT_MIN, INT_MIN);
// skip_list_t* skip_list= init_skip_list_node(MAX_SKIP_LEVEL, INT_MIN, INT_MIN); 多了一层所以会越界
for (i= 0; i
这是测试结果:
### Function Test ####
== Init Skip List ==
== Print Skip List ==
Level[0]: 0 -> 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10 -> 11 -> 12 -> 13 -> 14 -> 15 -> 16 -> 17 -> 18 -> 19 -> NULL
Level[1]: 1 -> 2 -> 3 -> 4 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10 -> 13 -> 15 -> 16 -> 17 -> 18 -> 19 -> NULL
Level[2]: 1 -> 2 -> 3 -> 5 -> 6 -> 7 -> 8 -> 9 -> 10 -> 13 -> 15 -> 16 -> 17 -> 18 -> NULL
Level[3]: 1 -> 2 -> 6 -> 7 -> 8 -> 10 -> 13 -> 15 -> NULL
Level[4]: 1 -> 2 -> 7 -> 8 -> 10 -> NULL
Level[5]: NULL
== Search Key ==
Search [4]: 4
Search [13]: 13
Search [9]: 9
Search [23]: -2147483648
Search [8]: 8
Search [7]: 7
Search [22]: -2147483648
Search [5]: 5
Search [3]: 3
Search [15]: 15
Search [13]: 13
Search [15]: 15
Search [6]: 6
Search [22]: -2147483648
Search [7]: 7
Search [13]: 13
Search [14]: 14
Search [2]: 2
Search [6]: 6
Search [17]: 17
== Delete Key ==
Delete [15]: SUCCESS
Delete [5]: SUCCESS
Delete [13]: SUCCESS
Delete [10]: SUCCESS
Delete [20]: NO FOUND
Delete [11]: SUCCESS
Delete [14]: SUCCESS
Delete [16]: SUCCESS
Delete [10]: NO FOUND
Delete [16]: NO FOUND
Delete [9]: SUCCESS
Delete [23]: NO FOUND
Delete [6]: SUCCESS
Delete [8]: SUCCESS
Delete [21]: NO FOUND
Delete [4]: SUCCESS
Delete [9]: NO FOUND
Delete [22]: NO FOUND
Delete [20]: NO FOUND
Delete [1]: SUCCESS
== Print Skip List ==
Level[0]: 0 -> 2 -> 3 -> 7 -> 12 -> 17 -> 18 -> 19 -> NULL
Level[1]: 2 -> 3 -> 7 -> 17 -> 18 -> 19 -> NULL
Level[2]: 2 -> 3 -> 7 -> 17 -> 18 -> NULL
Level[3]: 2 -> 7 -> NULL
Level[4]: 2 -> 7 -> NULL
Level[5]: NULL
#### Performance Test ####
== Insert 10^5 Items (6 Level) ==
Time: 1196.923950 ms, Speed: 83547.500000 Node/s
== Search 10^5 Items (6 Level) ==
Time: 1196.629028 ms, Speed: 83568.085938 Node/s
调整MAX_SKIP_LEVEL和LEVEL_W两个常量,可以明显的观察到Speed的变化,自己实现一遍,比看很多遍代码理解深刻多了。
修改后的代码无泄露无越界了:
==1631==
==1631== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 5 from 1)
==1631== malloc/free: in use at exit: 0 bytes in 0 blocks.
==1631== malloc/free: 100,010 allocs, 100,010 frees, 6,400,640 bytes allocated.
==1631== For counts of detected errors, rerun with: -v
==1631== All heap blocks were freed -- no leaks are possible.
探讨下:
rand_level 函数会不会有性能问题, 一定要调用随机函数MAX_SKIP_LEVEL -1 次么?
运行多少次跟随机函数返回值有关吧
[回复]
写完才看到不是Dancing link,各种数据结果解决问题的方式都有局限性,但都有启发性,优化的意义都不大,这里是DL的连接:
http://blog.csdn.net/test4ever/article/details/4126868
[回复]
感觉效率还是有点差啊
[回复]