手打不易,如果转摘,请注明出处!
注明原文:https://zhangxiaofan.blog.csdn.net/article/details/121351546
目录
非事务(不事务),循环单次单条插入——实际开发,禁止这种写法
批量操作-BEGIN END (单次多条 insert语句 批量插入)
批量操作-foreach batch(单次单条 insert语句 批量插入)
近期项目要转数据库,需要转移几个亿的数据。用的Mybatis框架,结果开发人员发现 foreach batch写法,在大量数据数据插入的时候效率很低,甚至超过2000条后,还不如循环单条插入(带事务)。而且还出现了 ORA-04036 PGA memory 。最后用的JDBC batch对4个亿的数据进行了迁移。
那在大量数据操作的情况,用哪种方式效率最高呢? 我这里单独用了半天的时间研究了一下。
声明:效率跟表大小、字段长度有关系,这里的测试条数不具有普遍性,仅供参考!
DROP TABLE STUDENT ;
CREATE TABLE student
(
id number(20) PRIMARY KEY,
name varchar(20) not NULL,
name2 char(20) DEFAULT 'default' NOT NULL,
age number(5)
) tablespace TBS_CUR_DAT;
COMMENT ON COLUMN student.name IS '姓名';
COMMENT ON COLUMN student.name2 IS '姓名2';
COMMENT ON COLUMN student.age IS '年龄';
INSERT INTO TEST.STUDENT (ID, NAME, NAME2, AGE) VALUES(1, '1', '1', 1);
default-executor-type:
simple:
SimpleExecutor, 单个 sqlsession 内, 每次操作,都开启一个 Statement 对象。
用完立刻关闭 Statement 对象
batch:
BatchExecutor, 单个 sqlsession 内,每次操作复用已有 Statement 对象, addBatch()汇总,然后统一执行executeBatch()
因此 Mybatis 官方写明它无法返回行数(BATCH executor is in use, the update counts are being lost.)
reuse:
ReuseExecutor, 应用实例内, 全局共享 Statement对象(Map<String, Statement>), 存在则复用
@GetMapping(value = "/effective/student/insert/for")
public String insertFori(int num) {
long start = System.currentTimeMillis();
long maxId = studentService.getMaxId() + 1;
for (int i = 0; i < num; i++) {
long pId = maxId + i;
Student student = getStudent(pId + "");
studentService.insert(student);
}
long end = System.currentTimeMillis();
System.out.println("循环插入-无事务 执行时间:" + (end - start));
return "ok";
}
<insert id="insert" parameterType="com.batch.entity.Student">
INSERT INTO STUDENT(ID,
NAME,
AGE)
VALUES (#{id,jdbcType=NUMERIC},
#{name,jdbcType=VARCHAR},
#{age,jdbcType=DECIMAL})
</insert>
@Transactional
@GetMapping(value = "/effective/student/insert/fortrans")
public String insertListTrans(int num) {
long start = System.currentTimeMillis();
Long maxId = studentService.getMaxId();
long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1;
for (int i = 0; i < num; i++) {
long pId = tempMaxId + i;
Student student = getStudent(pId + "");
studentService.insert(student);
}
long end = System.currentTimeMillis();
logger.info("循环插入-有事务 执行时间:" + (end - start));
return "ok";
}
@GetMapping(value = "/effective/student/insert/beginend")
public String insertListBeginEnd(int num) {
long start = System.currentTimeMillis();
Long maxId = studentService.getMaxId();
long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1;
List<Student> studentList = new ArrayList<>();
for (int i = 0; i < num; i++) {
long pId = tempMaxId + i;
Student student = getStudent("" + pId);
studentList.add(student);
}
logger.info("Mybatis SQL return :" + studentService.insertListBeginEnd(studentList));
long end = System.currentTimeMillis();
logger.info("单次多条insert批量插入(BEGIN END) 执行时间:" + (end - start));
return "ok";
}
<insert id="insertListBeginEnd">
<foreach collection="studentList" item="item" index="index" open="begin" close=";end;" separator=";">
insert into STUDENT(
ID,
NAME,
AGE
)values(
#{item.id},
#{item.name},
#{item.age}
)
</foreach>
</insert>
@Transactional // 对速度基本无影响
@GetMapping(value = "/effective/student/insert/batch")
public String insertList2(int num) {
long start = System.currentTimeMillis();
Long maxId = studentService.getMaxId();
long tempMaxId = (maxId == null) ? 0 : studentService.getMaxId() + 1;
List<Student> studentList = new ArrayList<>();
for (int i = 0; i < num; i++) {
long pId = tempMaxId + i;
Student student = getStudent(pId + "");
studentList.add(student);
}
logger.info("Mybatis SQL return :" + studentService.insertListBatch(studentList));
long end = System.currentTimeMillis();
logger.info("批量插入 执行时间:" + (end - start));
return "ok";
}
<insert id="insertListBatch">
insert into STUDENT(
ID,
NAME,
AGE
)
<foreach collection="studentList" item="item" index="index" separator="union all">
(
select
#{item.id},
#{item.name,jdbcType=VARCHAR},
#{item.age,jdbcType=DECIMAL}
from dual
)
</foreach>
</insert>
....
@Override
public void run() {
Connection connection = null;
PreparedStatement preparedStatement = null;
try {
connection = oracleConnect.getConnection();
connection.setAutoCommit(false);
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException("Error");
}
// 开始时间
Long begin = System.currentTimeMillis();
// insert sql 前缀
String sqlTemplate = "INSERT INTO TEST.STUDENT(ID, NAME, AGE) VALUES (?,?,?)";
PreparedStatement insertStmt = null;
try {
insertStmt = connection.prepareStatement(sqlTemplate);
} catch (SQLException e) {
e.printStackTrace();
throw new RuntimeException("创建statement失败.");
}
try {
// 设置事务为非自动提交
connection.setAutoCommit(false);
long tempMaxId = (maxId == null) ? 0 : maxId + 1;
for (int i = 0; i < this.num; i++) {
long pId = tempMaxId + i;
Student student = getStudent(pId + "");
insertStmt.setBigDecimal(1, student.getId());
insertStmt.setString(2, student.getName());
insertStmt.setBigDecimal(3, student.getAge());
insertStmt.addBatch();
}
/**
* executeBatch 返回的是 int[] 数组,和批处理中的执行的SQL一一对应,值代表影响的行数。
* 元素>=0,影响的行数。
* 元素=-2,执行成功,但无法获取影响的行数。
* 元素=-3,执行失败
*/
int[] result = insertStmt.executeBatch();
int updateCount = insertStmt.getUpdateCount();
System.out.println("result= " + JSON.toJSONString(result));
System.out.println("updateCount= " + updateCount);
// 提交事务, 先关闭自动提交conn.setAutoCommit(false);
connection.commit();
} catch (SQLException e) {
DbUtil.rollback(connection);
e.printStackTrace();
throw new RuntimeException("SQL执行错误");
} finally {
// 关闭 Statement
DbUtil.close(insertStmt);
// 归还连接 connect
oracleConnect.returnConnection(connection);
}
// 结束时间
Long end = System.currentTimeMillis();
System.out.println("线程" + Thread.currentThread().getName() + "数据插入耗时: " + (end - begin) + " ms");
}
.....
每个都进行三次测试, 单位是毫秒(ms)
items | 1000 条 | 2500 条 | 5000 条 |
非事务,循环单次单条插入 | 449,331,744,109 | 113,911,108,813,093 | - |
有事务,循环单次单条插入(Simple模式 ) | 195,613,441,224 | 343,733,933,460 | 596,757,005,844 |
批量操作-BEGIN END | 819,103,102 | 3,344,219,237 | 18,312,652,462 |
批量操作-foreach batch | 4,534,539 | 406,339,681 | 35,399,174,195 |
数据量很大的情况下,上面的方式都不适合了,耗时很长。用下面两个batch方式。
items | 1万条 | 5万条 | 10万条 |
JDBC原生batch-prepared | 281,1297,299 | 11,321,065,735 | 713,758,886 |
有事务,循环单次单条插入(Batch模式) | 227,163,168 | 1,007,892,829 | 200,416,991,739 |
一句话:追求效率用JDBC原生batch-prepare
再次声明:效率跟表大小、字段长度有关系,这里的测试条数不具有普遍性,仅供参考!
转摘还请注明出处,感谢!