一个InnoDB性能超过Oracle的调优Case

1 月 23rd, 2012 | Posted by P.Linux | Filed under 未分类

年前抽空到兄弟公司支援了一下Oracle迁移MySQL的测试，本想把MySQL调优到接近Oracle的性能即可，但经过 @何_登成 @淘宝丁奇 @淘宝褚霸 @淘伯松诸位大牛的指导和帮助（排名不分先后，仅按第一次为此CASE而骚扰的时间排序），不断修正方案，最终获得了比Oracle更好的性能，虽然是个特殊场景，但是我觉得意义是很广泛的，值得参考，遂记录于此。
所有涉及表结构和具体业务模型的部分全部略去，也请勿咨询，不能透露，敬请谅解。

目录 (Contents)

一、测试模型：

包含12张业务表，每个事务包含12个SQL，每个SQL向一张表做INSERT，做完12个SQL即完成一个事务。

用一个C API编写的程序连接MySQL，不断执行如下操作

开始事务：START TRANSACTION;
每张表插入一行：INSERT INTO xxx VALUES (val1,val2,…); #一共12次
提交事务：COMMIT;

通过一个Shell脚本来启动32个测试程序并发测试

二、测试环境：

1. 机型：

R510
CPU：Intel(R) Xeon(R) CPU E5645 @ 2.40GHz 双路24线程
内存：6 * 8G 48G
存储：FusionIO 320G MLC

R910
CPU：Intel(R) Xeon(R) CPU E7530 @ 1.87GHz 四路48线程
内存：32* 4G 128G
存储：FusionIO 640G MLC

2. Linux配置：

单实例启动数据库：/boot/grub/menu.lst修改kernel启动参数增加numa=off
多实例启动数据库：numactl –cpunodebind=$BIND_NO –localalloc $MYSQLD

RHEL 5.4 with 2.6.18内置内核
RHEL 6.1 with 2.6.32淘宝版内核

fs.aio-max-nr = 1048576 #调整系统允许的最大异步IO队列长度
vm.nr_hugepages = 18000 #大页页数
vm.hugetlb_shm_group = 601 #允许使用大页的用户id，即mysql用户
vm.swappiness = 0 #不倾向使用SWAP

3. FusionIO配置：

启动配置：
/etc/modprobe.d/iomemory-vsl.conf
options iomemory-vsl use_workqueue=0 # 忽略Linux IO调度
options iomemory-vsl disable-msi=0 # 开启MSI中断
options iomemory-vsl use_large_pcie_rx_buffer=1 # 打开PCIE缓冲
options iomemory-vsl preallocate_memory=SN号 # 预分配管理内存

格式化配置：
fio-format -b 4K /dev/fct0 # 格式化设备为4K匹配NAND芯片页大小
mkfs.xfs -f -i attr=2 -l lazy-count=1,sectsize=4096 -b size=4096 -d sectsize=4096 -L data /dev/fioa # 调整XFS与FusionIO 4K页匹配，比较激进，需要更多稳定性测试认为这组参数充分安全

mount配置：
/dev/fioa on /data type xfs (rw,noatime,nodiratime,noikeep,nobarrier,allocsize=100M,attr2,largeio,inode64,swalloc) # FusionIO的逻辑Block是100M，所以设为100M的预扩展

4. MySQL版本和通用配置：

Percona 5.1.60-13.1 原版
Percona 5.1.60-13.1 修改版
* 允许自定义InnoDB AIO队列申请长度 (5.5_change_aio_io_limit.patch)
Percona 5.5.19-24.0 原版
* 允许innodb_flush_neighbor_pages=2来合并真正相邻的脏页合并
* Group Commit
Percona 5.5.18-23.0 修改版
* 允许自定义InnoDB AIO队列申请长度 (5.5_change_aio_io_limit.patch)
* 允许预先扩展数据文件 (5.5_innodb_extent_tablespace.patch，@淘宝丁奇贡献)
* Group Cimmit

innodb_buffer_pool_size=20G
sync_binlog=1
innodb_flush_log_at_trx_commit=1

测试并发：32

5. 修改补丁

#cat 5.5_change_aio_io_limit.patch

--- Percona-Server-5.5.18-23.0/storage/innobase/handler/ha_innodb.cc	2011-12-20 06:38:58.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/storage/innobase/handler/ha_innodb.cc	2012-01-17 10:13:41.000000000 +0800
@@ -146,6 +146,7 @@
 static ulong innobase_commit_concurrency = 0;
 static ulong innobase_read_io_threads;
 static ulong innobase_write_io_threads;
+static ulong innobase_aio_pending_ios_per_thread; // Change AIO io_limit By P.Linux
 static long innobase_buffer_pool_instances = 1;

 static ulong innobase_page_size;
@@ -2870,6 +2871,7 @@
 	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
 	srv_n_read_io_threads = (ulint) innobase_read_io_threads;
 	srv_n_write_io_threads = (ulint) innobase_write_io_threads;
+	srv_n_aio_pending_ios_per_thread = (ulint) innobase_aio_pending_ios_per_thread;

 	srv_read_ahead &= 3;
 	srv_adaptive_flushing_method %= 3;
@@ -12282,6 +12284,11 @@
   "Number of background write I/O threads in InnoDB.",
   NULL, NULL, 4, 1, 64, 0);

+static MYSQL_SYSVAR_ULONG(aio_pending_ios_per_thread, innobase_aio_pending_ios_per_thread,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Number of AIO pending IOS per-thread in InnoDB.",
+  NULL, NULL, 4, 32, 4096, 0);
+
 static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Helps to save your data in case the disk image of the database becomes corrupt.",
--- Percona-Server-5.5.18-23.0/storage/innobase/srv/srv0srv.c	2011-12-20 06:38:57.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/storage/innobase/srv/srv0srv.c	2012-01-17 10:23:35.000000000 +0800
@@ -242,6 +242,7 @@
 UNIV_INTERN ulint	srv_n_file_io_threads	= ULINT_MAX;
 UNIV_INTERN ulint	srv_n_read_io_threads	= ULINT_MAX;
 UNIV_INTERN ulint	srv_n_write_io_threads	= ULINT_MAX;
+UNIV_INTERN ulint   srv_n_aio_pending_ios_per_thread = ULINT_MAX; // Change AIO io_limit By P.Linux

 /* Switch to enable random read ahead. */
 UNIV_INTERN my_bool	srv_random_read_ahead	= FALSE;
--- Percona-Server-5.5.18-23.0/storage/innobase/srv/srv0start.c	2011-12-20 06:38:57.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/storage/innobase/srv/srv0start.c	2012-01-17 10:25:12.000000000 +0800
@@ -1475,14 +1475,16 @@

 	ut_a(srv_n_file_io_threads

#cat 5.5_innodb_extent_tablespace.patch

--- Percona-Server-5.5.18-23.0/sql/sql_yacc.yy	2011-12-20 06:38:58.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/sql/sql_yacc.yy	2012-01-17 14:45:47.000000000 +0800
@@ -3878,6 +3878,14 @@
           { 
             Lex->alter_tablespace_info->ts_alter_tablespace_type= ALTER_TABLESPACE_DROP_FILE; 
           }
+        /* innodb_extent_tablespace By P.Linux */
+        | tablespace_name
+          SET
+          opt_ts_extent_size
+          {
+            Lex->alter_tablespace_info->ts_alter_tablespace_type= ALTER_TABLESPACE_ALTER_FILE;
+          }
+        /* End */
         ;

 logfile_group_info:
--- Percona-Server-5.5.18-23.0/sql/handler.h	2011-12-20 06:38:58.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/sql/handler.h	2012-01-17 14:29:17.000000000 +0800
@@ -501,7 +501,8 @@
 {
   TS_ALTER_TABLESPACE_TYPE_NOT_DEFINED = -1,
   ALTER_TABLESPACE_ADD_FILE = 1,
-  ALTER_TABLESPACE_DROP_FILE = 2
+  ALTER_TABLESPACE_DROP_FILE = 2,
+  ALTER_TABLESPACE_ALTER_FILE = 3 // innodb_extent_tablespace By P.Linux
 };

 enum tablespace_access_mode
--- Percona-Server-5.5.18-23.0/storage/innobase/fil/fil0fil.c	2011-12-20 06:38:57.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/storage/innobase/fil/fil0fil.c	2012-01-17 14:31:40.000000000 +0800
@@ -368,7 +368,8 @@
 Checks if a single-table tablespace for a given table name exists in the
 tablespace memory cache.
 @return	space id, ULINT_UNDEFINED if not found */
-static
+//static
+UNIV_INTERN // innodb_extent_tablespace By P.Linux
 ulint
 fil_get_space_id_for_table(
 /*=======================*/
@@ -4676,7 +4677,8 @@
 Checks if a single-table tablespace for a given table name exists in the
 tablespace memory cache.
 @return	space id, ULINT_UNDEFINED if not found */
-static
+//static
+UNIV_INTERN // innodb_extent_tablespace By P.Linux
 ulint
 fil_get_space_id_for_table(
 /*=======================*/
--- Percona-Server-5.5.18-23.0/storage/innobase/handler/ha_innodb.cc	2011-12-20 06:38:58.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/storage/innobase/handler/ha_innodb.cc	2012-01-17 14:37:49.000000000 +0800
@@ -433,6 +434,12 @@
 /*=======================*/
 	uint	flags);

+/****************************************************************//**
+Alter tablespace supported in an InnoDB table. Allow setting extent space. */
+int innobase_alter_tablespace(handlerton *hton,
+                                THD* thd, st_alter_tablespace *alter_info);
+/* innodb_extent_tablespace By P.Linux */
+
 static const char innobase_hton_name[]= "InnoDB";

 /*************************************************************//**
@@ -2489,6 +2496,7 @@
         innobase_hton->flags=HTON_NO_FLAGS;
         innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
 	innobase_hton->alter_table_flags = innobase_alter_table_flags;
+	innobase_hton->alter_tablespace= innobase_alter_tablespace; // innodb_extent_tablespace By P.Linux

 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);

@@ -3146,6 +3155,33 @@
 		| HA_INPLACE_ADD_PK_INDEX_NO_READ_WRITE);
 }

+/****************************************************************//**
+Alter tablespace supported in an InnoDB table. Allow setting extent space. */
+int innobase_alter_tablespace(handlerton *hton,
+                                THD* thd, st_alter_tablespace *alter_info)
+{
+       if (alter_info->ts_alter_tablespace_type != ALTER_TABLESPACE_ALTER_FILE)
+       {
+               return HA_ADMIN_NOT_IMPLEMENTED;
+       }
+
+       ulint table_space= fil_get_space_id_for_table(alter_info->tablespace_name);
+
+       if (table_space == ULINT_UNDEFINED)
+       {
+               my_error(ER_WRONG_TABLE_NAME, MYF(0), alter_info->tablespace_name);
+               return EE_FILENOTFOUND;
+       }
+
+       ulint extent_size= alter_info->extent_size;
+       
+       ulint actual_size=0;
+       fil_extend_space_to_desired_size(&actual_size, table_space, extent_size);
+
+       return 0;
+}
+/* innodb_extent_tablespace By P.Linux */
+
 /*****************************************************************//**
 Commits a transaction in an InnoDB database. */
 static
--- Percona-Server-5.5.18-23.0/storage/innobase/include/fil0fil.h	2011-12-20 06:38:57.000000000 +0800
+++ Percona-Server-5.5.18-23.0-debug/storage/innobase/include/fil0fil.h	2012-01-17 14:39:20.000000000 +0800
@@ -744,6 +744,18 @@
 /*============================*/
 	ulint		id);	/*!< in: space id */

+/*******************************************************************//**
+Checks if a single-table tablespace for a given table name exists in the
+tablespace memory cache.
+@return        space id, ULINT_UNDEFINED if not found */
+UNIV_INTERN
+ulint
+fil_get_space_id_for_table(
+/*=======================*/
+       const char*     name);  /*!< in: table name in the standard
+                               'databasename/tablename' format */
+/* innodb_extent_tablespace By P.Linux */
+
 /*************************************************************************
 Return local hash table informations. */

三、测试结果：

1. R910 Oracle单实例

测试人：童家旺，支付宝
TPS：稳定值2000,峰值2600 （我没参与测试，也没有报告，无法确定详情）
我的补充：Oracle已经是调优的过的，请相信我们的Oracle DBA不是吃素的。我把听Oracle DBA描述的只言碎语随便写下，Oracle跑到后面TPS也是有所下降，不是能一直100%稳定，最后CPU已经吃尽了，所以基本上再怎么优化提升的幅度会比较小。

2. R910 MySQL单实例 Percona 5.1.59 原版

测试人：帝俊，支付宝
TPS：峰值1500，无法稳定（具体不祥）
测试人描述：
目前的测试数据显示，由于MySQL在checkpoint上处理跟不上，不足以持续支持1.5K/s的事务数，10MB/s的redo量下的交易创建。该负载下，FIO的写出速度为160～190MB/s，写IOPS为2～2.3k，测试FIO的写吞吐量可以到600MB/s，写IOPS有8K+，需要进一步研究如何进一步提升系统的吞吐量。

3. R910 MySQL多实例 Percona 5.1.60-13.1原版

测试人：彭立勋，B2B
TPS：峰值500*4（无法稳定），谷值100，均值450＊4
重要配置：
innodb_page_size=4K # 修改数据页大小与FusionIO匹配
innodb_log_block_size=4K # 修改日志页大小于FusionIO匹配
innodb_log_file_size=1G
innodb_log_files_in_group=3
innodb_buffer_pool_size=20G
innodb_max_dirty_pages_pct=75
innodb_flush_method=ALL_O_DIRECT # 修改文件写入方式全部为O_DIRECT
innodb_read_io_threads=2
innodb_write_io_threads=10
innodb_io_capacity=20000
innodb_extra_rsegments=16
innodb_use_purge_thread=4
innodb_adaptive_flushing_method=3 # 采用Keep_average刷新方式
innodb_flush_neighbor_pages=0 # 不为了凑顺序IO刷相邻未修改的页
测试人描述：
每颗物理CPU绑定一个MySQL实例，四个实例同时接受测试。可以看到在测试过程中，IOPS抖动很大，在4K～17K之间抖动，可以判定，是Checkpoint机制不完善导致刷新间歇性繁忙，在IO闲置的时候不能充分发挥性能。但多实例可以提升整体TPS接近Oracle的均值，说明MySQL内部可能某些常量设置不合理，或者锁定力度太粗导致单实例不能充分发挥单机性能。

4. R910 MySQL多实例 Percona 5.1.60-13.1 修改版

测试人：彭立勋，B2B
TPS：峰值1200*4，谷值0，均值950*4
重要配置：（在测试3的基础上）
innodb_aio_pending_ios_per_thread=1024
测试人描述：
经过对测试3的分析，可以发现，InnoDB已经标记了很多Page到Flush_list，但是并没有被即时的回写，可以在INNODB_BUFFER_POOL_PAGES系统表中发现很页flush_type=2，即在Flush_list中。
经过review代码，发现InnoDB申请的AIO队列的长度只有256，由常量OS_AIO_N_PENDING_IOS_PER_THREAD（os0file.h）定义。将此常量修改为InnoDB的参数后，重新测试，可以使FusionIO的IOPS达到7K～18K，IO利用率得以提升，整体性能已经超越Oracle，但存在严重的低谷，大约每10s一次。

5.R510 MySQL单实例 Percona 5.5.18-23.0 修改版

测试人：彭立勋,B2B
TPS：峰值2800，谷值2300，均值2500
重要配置：（在测试3的基础上）
innodb_aio_pending_ios_per_thread=512
alter tablespace `trade/xxx` set extent_size=5000000; # 预先扩展数据文件
测试人描述：
根据测试4的结果进行分析，需要解决的主要问题就是抖动，抖动可能是两个原因导致的，一个是Checkpoint机制不完善，一个是数据文件扩展。Checkpoint机制不完善这个暂时无法改进，只能先解决数据文件扩展上的问题，采用淘宝丁奇的方法，对MySQL增加预先扩展文件的功能，在测试前先将文件扩展至测试写满需要的大小，使测试过程中无需扩展文件。
实例测试中发现非常有效，抖动范围在2300～2800之间，可以接受。但是Buffer Pool一旦脏页写满，为了控制脏页量InnoDB就会加大刷新量，影响到TPS。实际上在脏页未满的时候，IOPS就没有用完，但是InnoDB计算刷新量并没有考虑操作系统反馈的影响信息，只是根据自己的redo产生量计算。

同时观察CPU还发现，2.6.18内核会将所有软中断发送到Core0上处理，这可能也是瓶颈之一。（当时忘记拷贝状态，这是后来确认内核问题看得，可以看这篇文章，一样的，CPU软中断实践）
03:05:17 PM CPU %user %nice %sys %iowait %irq %soft %steal %idle intr/s
03:05:18 PM all 0.00 0.00 0.00 0.00 0.00 0.00 0.00 100.00 1014.00
03:05:18 PM 0 0.00 0.00 0.00 0.00 0.00 0.00 0.00 100.00 1000.00

6. R510 MySQL单实例 Percona 5.5.19-24.0 原版

测试人：彭立勋，B2B
TPS：峰值3100，谷值2400，均值2700
重要配置：（在测试3的基础上）
替换内核版本为2.6.32淘宝版，使用IO中断负载均衡。
innodb_adaptive_flushing_method = 2
innodb_flush_neighbor_pages = cont
测试人描述：
采用淘宝版内核后，可以发现每个CPU都被用的比较满：(部分)
06:27:26 PM  CPU    %usr   %nice    %sys %iowait    %irq   %soft  %steal  %guest   %idle
06:27:27 PM  all   69.80    0.00   18.68    0.51    0.00    0.17    0.00    0.00   10.84
06:27:27 PM    0   74.75    0.00   17.17    0.00    0.00    0.00    0.00    0.00    8.08
06:27:27 PM    1   73.96    0.00   16.67    1.04    0.00    0.00    0.00    0.00    8.33
06:27:27 PM    2   73.20    0.00   17.53    1.03    0.00    0.00    0.00    0.00    8.25
06:27:27 PM    3   71.72    0.00   19.19    1.01    0.00    0.00    0.00    0.00    8.08
06:27:27 PM    4   71.43    0.00   18.37    1.02    0.00    0.00    0.00    0.00    9.18
06:27:27 PM    5   70.71    0.00   19.19    1.01    0.00    0.00    0.00    0.00    9.09

这是个好现象，说明CPU被充分用起来了，在脏页未满之前，TPS可以比较稳定的维持在3000以上。但还是老问题，脏页一满，速度就下降，到测试结束时下降为2400。

四、测试结论：

MySQL的调优与操作系统结合非常紧密，需要整体联动才能获得好的效果，InnoDB琐粒度较粗的缺陷，在代码实现简单的情况下，实际上对并发的影响不是很明显。
目前MySQL对高速硬件的利用主要缺陷是，不少常量写死，Checkpoint机制不完善，Checkpoint刷新脏页–>InnoDB AIO队列–>操作系统IO队列–>存储设备，中间任何一环存在问题，都可能导致性能下降。
InnoDB AIO队列可以通过补丁开放参数设置，这个瓶颈已经消除。
操作系统IO队列可以通过淘宝的内核补丁将中断分散到每个核上处理来解决。
目前存在最大的问题就是Checkpoint刷新脏页的机制，仅仅依赖redo产生的速度，其实硬件IO还有很多余量，但InnoDB并不知道。
如果能限定一种机型，限定一种操作系统，在MySQL内获取操作系统报告的硬件状态，自适应的决策自己的行为，这样可以充分利用系统资源，例如IO util%并不高的时候，即使脏页还没到阈值，也可以加大刷新量，充分利用IO，这样可能系统根本就达不到脏页阈值，可以一直保持搞TPS，至少可以延缓TPS下降的趋势。
抖动问题则是Oracle和MySQL都存在的问题，扩展数据文件的瞬间必然导致TPS下降，淘宝丁奇的方法可以完美解决，Oracle也是类似的方法通过预先分配表空间文件解决。

五、测试缺陷：

测试CASE不全，没有在R910上测试5.5（虽然已经超了Oracle，但R910上应该还能猛一点），没有测试5.5多实例下可以获得何种性能，没有测试5.1在2.6.32内核下的表现，没有测试不同的页大小对InnoDB的影响。
没有稳定性测试，原版+多实例属于稳定方案，其他改动是否100%不影响稳定，尚需测试。
在R910上的测试没有监控系统，也就没有图，坑爹了。

六、后续Action

在InnoDB控制刷赃页量的地方加入对系统diskstat的监控，当系统IO util%<80%的时候，增加(IO_CAPACITY-当前系统IO数-redo计算的刷新量)个页的刷新，在系统不忙的时候提前加大刷新量，期望保持TPS稳定。

七、随意补充

为什么读为主的应用不用担心IO用不完？因为读操作是同步IO，一旦请求就被发送到磁盘，所以只要并发够多，总能把IO压爆。但是写为了加速，几乎所有数据库都是先写到内存，再异步写到磁盘，当然你要是搞最大保护模式，应该也是有数据库可以直接同步写磁盘的，但是对大部分数据库都是先写内存，再异步到磁盘，所以如果异步IO这里存在设计上的瓶颈，不管加多少并发，都是徒劳，内存一旦写满，链接线程就都堵住了，要等异步IO消化完才能继续，所以对于写为主的应用，这个CASE都是很有参考价值的。

标签: 数据库, AIO, InnoDB, Kernel, Linux, MySQL, Percona, XtraDB

类似的文章

自己动手实现Multi-Master Replication 在Server层实现Kill Idle Transaction

zedware
1 月 23rd, 201213:47

回复 | 引用 | #1

唉。建议做国产数据库评测的兄弟们找你们切磋一下。他们做类似测试的经验比较丰富。

[回复]
jametong
3 月 29th, 201218:01

回复 | 引用 | #2

结论太早了，从帖子里面的内容来说，不足以证明比Oracle的性能好。

Oracle的数据库我们是没有做极限优化的。只是很普通的使用，因为目标是要跑业务的，不是玩。

另外，可能需要考虑将备库的问题也考虑上，毕竟生产环境是不能不考虑故障切换的问题。这一点，目前的MySQL测试显示结果是比较差的。

[回复]
dblover 回复:
10 10 月, 2015 at 16:20
@jametong,
这几项是关键信息啊：
R910
CPU：Intel(R) Xeon(R) CPU E7530 @ 1.87GHz 四路48线程
内存：32* 4G 128G
存储：FusionIO 640G MLC

通过一个Shell脚本来启动32个测试程序并发测试

Oracle跑到后面TPS也是有所下降，不是能一直100%稳定，最后CPU已经吃尽了，所以基本上再怎么优化提升的幅度会比较小。

这个结果应该还是可信的，简单的几个操作我相信mysql真的可以跑过oracle。
但是，如果出现下面的情况，估计结果可能会不太好看：
1、表数据量变大，业务存在复杂查询的情况；
2、并发数增大，比如从32上升到64或者128；
3、redo日志和binlog最好还是放在磁盘设备上面；
4、主备高可用部署环境下；

[回复]
digoal
11 月 21st, 201308:21

回复 | 引用 | #3

PostgreSQL 9.3下的测试结果, 可以参考一下.
服务器DELL R610
CPU E5504 2.0 降频到1.6GHZ
DISK OCZ Revodrive3x2 240G
测试16个并发连接, 测试表一共9个字段, 1个主键, 无其他索引.
TPS 8023.

CREATE OR REPLACE FUNCTION public.f_test1()
RETURNS void
LANGUAGE plpgsql
STRICT
AS $function$
declare
begin
insert into t1(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t2(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t3(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t4(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t5(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t6(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t7(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t8(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t9(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t10(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t11(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
insert into t12(c1,c2,c3,c4,c5,c6,c7) values (‘test1′,’test2′,’test3′,’test4′,’test5′,’test6′,’test7’);
return;
exception when others then
return;
end;
$function$;

create table t1 (id serial4 primary key, c1 text, c2 text, c3 text, c4 text, c5 text, c6 text, c7 text, c8 timestamp default now());
create table t2 (like t1 including all);
create table t3 (like t1 including all);
create table t4 (like t1 including all);
create table t5 (like t1 including all);
create table t6 (like t1 including all);
create table t7 (like t1 including all);
create table t8 (like t1 including all);
create table t9 (like t1 including all);
create table t10 (like t1 including all);
create table t11 (like t1 including all);
create table t12 (like t1 including all);

digoal=# select f_test1();
f_test1
———

(1 row)

digoal=# \timing
Timing is on.
digoal=# select f_test1();
f_test1
———

(1 row)

Time: 1.155 ms
digoal=# select * from t1;
id | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
—-+——-+——-+——-+——-+——-+——-+——-+—————————-
1 | test1 | test2 | test3 | test4 | test5 | test6 | test7 | 2013-11-21 08:16:41.385535
13 | test1 | test2 | test3 | test4 | test5 | test6 | test7 | 2013-11-21 08:16:43.6705
(2 rows)

Time: 0.477 ms
digoal=# select * from t2;
id | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
—-+——-+——-+——-+——-+——-+——-+——-+—————————-
2 | test1 | test2 | test3 | test4 | test5 | test6 | test7 | 2013-11-21 08:16:41.385535
14 | test1 | test2 | test3 | test4 | test5 | test6 | test7 | 2013-11-21 08:16:43.6705
(2 rows)

Time: 0.424 ms
digoal=# select * from t12;
id | c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8
—-+——-+——-+——-+——-+——-+——-+——-+—————————-
12 | test1 | test2 | test3 | test4 | test5 | test6 | test7 | 2013-11-21 08:16:41.385535
24 | test1 | test2 | test3 | test4 | test5 | test6 | test7 | 2013-11-21 08:16:43.6705
(2 rows)

Time: 0.414 ms

pg93@db-172-16-3-150-> pgbench -M prepared -n -r -f ./test.sql -c 16 -j 4 -T 60 -h $PGDATA -p 1921 -U postgres digoal
transaction type: Custom query
scaling factor: 1
query mode: prepared
number of clients: 16
number of threads: 4
duration: 60 s
number of transactions actually processed: 481755
tps = 8023.940959 (including connections establishing)
tps = 8025.920647 (excluding connections establishing)
statement latencies in milliseconds:
1.990496 select f_test1();

[回复]
digoal 回复:
21 11 月, 2013 at 08:24
@digoal,
digoal=# select count(*) from t1;
count
——–
481757
(1 row)

digoal=# select count(*) from t2;
count
——–
481757
(1 row)

digoal=# select count(*) from t3;
count
——–
481757
(1 row)

digoal=# select count(*) from t4;
count
——–
481757
(1 row)

digoal=# select count(*) from t5;
count
——–
481757
(1 row)

digoal=# select count(*) from t6;
count
——–
481757
(1 row)

digoal=# select count(*) from t7;
count
——–
481757
(1 row)

digoal=# select count(*) from t12;
count
——–
481757
(1 row)

[回复]
digoal 回复:
21 11 月, 2013 at 08:36
@digoal,
优化后可以达到1.5W tps.
pg93@db-172-16-3-150-> pgbench -M prepared -n -r -f ./test.sql -c 16 -j 4 -T 60 -h $PGDATA -p 1921 -U postgres digoal
transaction type: Custom query
scaling factor: 1
query mode: prepared
number of clients: 16
number of threads: 4
duration: 60 s
number of transactions actually processed: 909280
tps = 15151.787574 (including connections establishing)
tps = 15155.568739 (excluding connections establishing)
statement latencies in milliseconds:
1.053459 select f_test1();

[回复]

P.Linux Laboratory