Nested Loops Join 请确保用在局部扫描的OLTP场景:

驱动表的限制条件有索引

/*
结论: Nested Loops Join连接优化,驱动表的限制条件有索引!
*/

--环境构造
--研究Nested Loops Join访问次数前准备工作
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 100
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;


--Nested Loops Join两表无索引试验
set linesize 1000
set autotrace off
alter session set statistics_level=all ;
SELECT /*+ leading(t1) use_nl(t2) */ *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n = 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
-------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
-------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 1014 |
| 1 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.01 | 1014 |
|* 2 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 8 |
|* 3 | TABLE ACCESS FULL| T2 | 1 | 1 | 1 |00:00:00.01 | 1006 |
-------------------------------------------------------------------------------------
2 - filter("T1"."N"=19)
3 - filter("T1"."ID"="T2"."T1_ID")

----两表无索引场合如果不用HINT,一般走Hash Join
alter session set statistics_level=all ;
SELECT *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n = 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.06 | 1013 | | | |
|* 1 | HASH JOIN | | 1 | 1 | 1 |00:00:00.06 | 1013 | 742K| 742K| 376K (0)|
|* 2 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 7 | | | |
| 3 | TABLE ACCESS FULL| T2 | 1 | 100K| 100K|00:00:00.02 | 1006 | | | |
----------------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")
2 - filter("T1"."N"=19)



---对t1表的限制条件建索引
CREATE INDEX t1_n ON t1 (n);
---有了限制条件的索引,Nested Loops Join性能略有提升
set linesize 1000
alter session set statistics_level=all ;
SELECT /*+ leading(t1) use_nl(t2) */ *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n = 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

--------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads |
--------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.06 | 1009 | 1007 |
| 1 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.06 | 1009 | 1007 |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | 6 |
|* 3 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | 1 |
|* 4 | TABLE ACCESS FULL | T2 | 1 | 1 | 1 |00:00:00.05 | 1006 | 1001 |
--------------------------------------------------------------------------------------------------------
3 - access("T1"."N"=19)
4 - filter("T1"."ID"="T2"."T1_ID")


---不过发现,增加了索引后Oracle不用HINT,还是走HASH连接。
alter session set statistics_level=all ;
SELECT *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n = 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
--------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
--------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.05 | 1008 | | | |
|* 1 | HASH JOIN | | 1 | 1 | 1 |00:00:00.05 | 1008 | 742K| 742K| 350K (0)|
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
|* 3 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 1 | | | |
| 4 | TABLE ACCESS FULL | T2 | 1 | 89127 | 100K|00:00:00.02 | 1006 | | | |
--------------------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")
3 - access("T1"."N"=19)

/*
原因在于,这个T1表总记录也不过100条,所以用索引效果并没有很明显,如果这个T1表记录有几十万上百万条,那检索一条记录出来
,用索引效果就非常明显了!
*/

 被驱动表限制条件有索引  

/*
结论: 给Nested Loops Join连接优化,被驱动表的连接条件有索引!
*/

--环境构造
--研究Nested Loops Join访问次数前准备工作
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 100
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;

--(对驱动表t1表的限制条件建索引),如下
CREATE INDEX t1_n ON t1 (n);

--(对被驱动表t2表的连接条件建索引),如下:
CREATE INDEX t2_t1_id ON t2(t1_id);

----这下表连接性能有了大幅度提升
alter session set statistics_level=all ;
SELECT /*+ leading(t1) use_nl(t2) */ *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n = 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

----------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads|
-----------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 7 | 4 |
| 1 | NESTED LOOPS | | 1 | | 1 |00:00:00.01 | 7 | 4 |
| 2 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.01 | 6 | 4 |
| 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | 0 |
|* 4 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | 0 |
|* 5 | INDEX RANGE SCAN | T2_T1_ID | 1 | 1 | 1 |00:00:00.01 | 3 | 4 |
| 6 | TABLE ACCESS BY INDEX ROWID | T2 | 1 | 1 | 1 |00:00:00.01 | 1 | 0 |
-----------------------------------------------------------------------------------------------------------
4 - access("T1"."N"=19)
5 - access("T1"."ID"="T2"."T1_ID")
--性能有了大幅度提升,BUFFERS居然只有7


---增加了索引后Oracle不用HINT,终于自己去选择Nested Loops Join
alter session set statistics_level=all ;
SELECT *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n = 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
----------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.01 | 7 |
| 1 | NESTED LOOPS | | 1 | | 1 |00:00:00.01 | 7 |
| 2 | NESTED LOOPS | | 1 | 1 | 1 |00:00:00.01 | 6 |
| 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 |
|* 4 | INDEX RANGE SCAN | T1_N | 1 | 1 | 1 |00:00:00.01 | 2 |
|* 5 | INDEX RANGE SCAN | T2_T1_ID | 1 | 1 | 1 |00:00:00.01 | 3 |
| 6 | TABLE ACCESS BY INDEX ROWID | T2 | 1 | 1 | 1 |00:00:00.01 | 1 |
----------------------------------------------------------------------------------------------------
4 - access("T1"."N"=19)
5 - access("T1"."ID"="T2"."T1_ID")

  确保小结果集先驱动

/*
结论: Nested Loops Join连接优化,注意驱动表的结果集是否是小的
(在统计信息不准确的时候,经常会出现将大的结果集驱动的情况,需要我们介入判断分析)
*/

--环境构造
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE INDEX t1_n ON t1 (n);
CREATE INDEX t2_t1_id ON t2(t1_id);

--然后继续进入SESSION,执行
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 10000
ORDER BY dbms_random.random;

INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T1',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ;
exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T2',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ;
select count(*) from t1;
select count(*) from t2;

----开始试验(正常是小的结果集先访问):
set linesize 1000
alter session set statistics_level=all ;
SELECT *
FROM t1, t2
WHERE t1.id = t2.t1_id
AND t1.n <= 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers |
----------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 19 |00:00:00.01 | 53 |
| 1 | NESTED LOOPS | | 1 | | 19 |00:00:00.01 | 53 |
| 2 | NESTED LOOPS | | 1 | 16 | 19 |00:00:00.01 | 34 |
| 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 16 | 19 |00:00:00.01 | 23 |
|* 4 | INDEX RANGE SCAN | T1_N | 1 | 16 | 19 |00:00:00.01 | 4 |
|* 5 | INDEX RANGE SCAN | T2_T1_ID | 19 | 1 | 19 |00:00:00.01 | 11 |
| 6 | TABLE ACCESS BY INDEX ROWID | T2 | 19 | 1 | 19 |00:00:00.01 | 19 |
----------------------------------------------------------------------------------------------------
4 - access("T1"."N"<=19)
5 - access("T1"."ID"="T2"."T1_ID")

/*
构造如下:
假如oracle的统计信息不准确
故意错乱颠倒,欺骗Oracle说T1表是小表而T2表是大表。
*/
EXEC dbms_stats.SET_table_stats(user, 'T1', numrows => 20000000 ,numblks => 1000000);
EXEC dbms_stats.SET_table_stats(user, 'T2', numrows => 1 ,numblks => 1);
--结果顺序颠倒了,性能大幅度下降!
set linesize 1000
alter session set statistics_level=all ;
SELECT *
FROM t1, t2
WHERE t1.id t2.t1_id
AND t1.n <= 19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
--------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | Reads |
--------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 19 |00:00:03.93 | 1801K| 1031 |
| 1 | NESTED LOOPS | | 1 | | 19 |00:00:03.93 | 1801K| 1031 |
| 2 | NESTED LOOPS | | 1 | 1 | 1900K|00:00:01.17 | 1019 | 1006 |
| 3 | TABLE ACCESS FULL | T2 | 1 | 1 | 100K|00:00:00.08 | 1007 | 1001 |
|* 4 | INDEX RANGE SCAN | T1_N | 100K| 10000 | 1900K|00:00:00.63 | 12 | 5 |
|* 5 | TABLE ACCESS BY INDEX ROWID| T1 | 1900K| 1 | 19 |00:00:02.22 | 1800K| 25 |
--------------------------------------------------------------------------------------------------------
4 - access("T1"."N"<=19)
5 - filter("T1"."ID"="T2"."T1_ID")

  Hash Join确保在全表扫描的OLAP场景:

两表限制条件有索引

/*
结论: Hash Join连接优,两表的限制条件有索引(注:针对索引条件返回记录很少的情况)!
*/

--环境构造
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 10000
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;

--Hash连接优化第1式,两边的限制条件有索引

--首先测试Hash Join两表的限制条件皆无索引的情况

alter session set statistics_level=all ;
set linesize 1000
SELECT /*+ leading(t2) use_hash(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19
and t2.n=12;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1104 | | | |
|* 1 | HASH JOIN | | 1 | 1 | 0 |00:00:00.01 | 1104 | 738K| 738K| 342K (0)|
|* 2 | TABLE ACCESS FULL| T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 99 | | | |
----------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")
2 - filter("T2"."N"=12)
3 - filter("T1"."N"=19)


---在两表的限制条件建索引后,发现索引都可以用到,性能大幅度提升
--首先在t1表的限制条件建索引的情况,测试发现性能果然有提升!

create index idx_t1_n on t1(n);
SELECT /*+ leading(t2) use_hash(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19
and t2.n=12;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1008 | | | |
|* 1 | HASH JOIN | | 1 | 1 | 0 |00:00:00.01 | 1008 | 738K| 738K| 345K (0)|
|* 2 | TABLE ACCESS FULL | T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | |
| 3 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | |
|* 4 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
------------------------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")
2 - filter("T2"."N"=12)
--以上是create index idx_t1_n on t1(n)后的情况

--接下来在t2表的限制条件再建索引,又更快了!
create index idx_t2_n on t2(n);

SELECT /*+ leading(t2) use_hash(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19
and t2.n=12;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 6 | | | |
|* 1 | HASH JOIN | | 1 | 1 | 0 |00:00:00.01 | 6 | 738K| 738K| 367K (0)|
| 2 | TABLE ACCESS BY INDEX ROWID| T2 | 1 | 1 | 1 |00:00:00.01 | 3 | | | |
|* 3 | INDEX RANGE SCAN | IDX_T2_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
| 4 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | |
|* 5 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
------------------------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")
3 - access("T2"."N"=12)
5 - access("T1"."N"=19)
--以上是create index idx_t2_n on t2(n)后的情况

  小结果集驱动

/*
结论: Hash Join连接优化,注意驱动表的结果集是否是小的
(在统计信息不准确的时候,经常会出现将大的结果集驱动的情况,需要我们介入判断分析)
*/
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 100
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;

exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T1',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ;
exec dbms_stats.gather_table_stats(ownname => user,tabname => 'T2',estimate_percent => 10,method_opt=> 'for all indexed columns',cascade=>TRUE) ;
select count(*) from t1;
select count(*) from t2;

--在无索引,且是全扫描的情况下,一般走HASH连接,看下面性能
set linesize 1000
alter session set statistics_level=all;
SELECT *
FROM t1, t2
WHERE t1.id = t2.t1_id;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.07 | 1019 | | | |
|* 1 | HASH JOIN | | 1 | 100 | 100 |00:00:00.07 | 1019 | 742K| 742K| 1202K (0)|
| 2 | TABLE ACCESS FULL| T1 | 1 | 100 | 100 |00:00:00.01 | 7 | | | |
| 3 | TABLE ACCESS FULL| T2 | 1 | 89127 | 100K|00:00:00.02 | 1012 | | | |
----------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")

/*
以下是经常出现的案例由于统计信息的错误导致执行计划的错误,我们构造如下:
假如oracle的统计信息不准确
以下故意错乱颠倒,欺骗Oracle说T1表是小表而T2表是大表。
*/
EXEC dbms_stats.SET_table_stats(user, 'T1', numrows => 20000000 ,numblks => 1000000);
EXEC dbms_stats.SET_table_stats(user, 'T2', numrows => 1 ,numblks => 1);
set linesize 1000
alter session set statistics_level=all;
SELECT *
FROM t1, t2
WHERE t1.id = t2.t1_id;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.10 | 1019 | | | |
|* 1 | HASH JOIN | | 1 | 20M| 100 |00:00:00.10 | 1019 | 9472K| 1956K| 9M (0)|
| 2 | TABLE ACCESS FULL| T2 | 1 | 1 | 100K|00:00:00.02 | 1005 | | | |
| 3 | TABLE ACCESS FULL| T1 | 1 | 20M| 100 |00:00:00.01 | 14 | | | |
----------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")

  确保在PGA完成HASH运算的尺寸

 

这里的场景是hash连接占用HASH  AREA内存区过多时,这时候我们可以考虑增大PGA

如果是oracle11g,默认是直接增大memory_target

Hash Join算法
第1步:判定小表是否能够全部存放在hash area内存中,如果可以,则做内存hash join。如果不行,转第二步。
第2步:决定fan-out数。(Number of Partitions) * C<= Favm *M 其中C为Cluster size,
其值为DB_BLOCK_SIZE*HASH_MULTIBLOCK_IO_COUNT;Favm为hash area内存可以使用的百分比,一般为0.8左右;M为Hash_area_size的大小。
第3步: 读取部分小表S,采用内部hash函数(这里称为hash_fun_1),将连接键值映射至某个分区,同时采用hash_fun_2函数对连接键值产生另外一个hash值,这个hash值用于创建hash table用,并且与连接键值存放在一起。
第4步: 对build input建立位图向量。
第5步: 如果内存中没有空间了,则将分区写至磁盘上。
第6步: 读取小表S的剩余部分,重复第三步,直至小表S全部读完。
第7步: 将分区按大小排序,选取几个分区建立hash table(这里选取分区的原则是使选取的数量最多)。
第8步: 根据前面用hash_fun_2函数计算好的hash值,建立hash table。
第9步: 读取表B,采用位图向量进行位图向量过滤。
第10步:对通过过滤的数据采用hash_fun_1函数将数据映射到相应的分区中去,并计算hash_fun_2的hash值。
第11步:如果所落的分区在内存中,则将前面通过hash_fun_2函数计算所得的hash值与内存中已存在的hash table做连接,将结果写到磁盘上。如果所落的分区不在内存中,则将相应的值与表S相应的分区放在一起。
第12步:继续读取表B,重复第9步,直至表B读取完毕。
第13步:读取相应的(Si,Bi)做hash连接。在这里会发生动态角色互换。
第14步:如果分区过后,最小的分区也比内存大,则发生nested- loop hash join。

  Merge Sort Join优化:

两表限制条件有索引

/*
结论: Merge Sort Join连接优化,两表的限制条件有索引(注:针对索引条件返回记录很少的情况)!
*/
--环境构造
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 10000
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;

--两边的限制条件有索引

--首先是,两表限制条件皆无索引的情况,如下
alter session set statistics_level=all ;
set linesize 1000
SELECT /*+ leading(t2) use_merge(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19
and t2.n=12;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
-----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
-----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1104 | | | |
| 1 | MERGE JOIN | | 1 | 1 | 0 |00:00:00.01 | 1104 | | | |
| 2 | SORT JOIN | | 1 | 11 | 1 |00:00:00.01 | 1005 | 2048 | 2048 | 2048 (0)|
|* 3 | TABLE ACCESS FULL| T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | |
|* 4 | SORT JOIN | | 1 | 1 | 0 |00:00:00.01 | 99 | 2048 | 2048 | 2048 (0)|
|* 5 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 99 | | | |
-----------------------------------------------------------------------------------------------------------------
3 - filter("T2"."N"=12)
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
5 - filter("T1"."N"=19)

---在两表的限制条件建索引后,发现索引都可以用到,性能大幅度提升
--首先对t1表的限制条件建索引,发现如下Merge Sort Join快了。
create index idx_t1_n on t1(n);
SELECT /*+ leading(t2) use_merge(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19
and t2.n=12;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
-------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
-------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 1008 | | | |
| 1 | MERGE JOIN | | 1 | 1 | 0 |00:00:00.01 | 1008 | | | |
| 2 | SORT JOIN | | 1 | 11 | 1 |00:00:00.01 | 1005 | 2048 | 2048 | 2048 (0)|
|* 3 | TABLE ACCESS FULL | T2 | 1 | 11 | 1 |00:00:00.01 | 1005 | | | |
|* 4 | SORT JOIN | | 1 | 1 | 0 |00:00:00.01 | 3 | 2048 | 2048 | 2048 (0)|
| 5 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | |
|* 6 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
-------------------------------------------------------------------------------------------------------------------------------
3 - filter("T2"."N"=12)
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
6 - access("T1"."N"=19)

--接下来对t2表的限制条件建索引,发现如下Merge Sort Join更快了。
create index idx_t2_n on t2(n);
SELECT /*+ leading(t2) use_merge(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19
and t2.n=12;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));

-------------------------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
-------------------------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 0 |00:00:00.01 | 6 | | | |
| 1 | MERGE JOIN | | 1 | 1 | 0 |00:00:00.01 | 6 | | | |
| 2 | SORT JOIN | | 1 | 1 | 1 |00:00:00.01 | 3 | 2048 | 2048 | 2048 (0)|
| 3 | TABLE ACCESS BY INDEX ROWID| T2 | 1 | 1 | 1 |00:00:00.01 | 3 | | | |
|* 4 | INDEX RANGE SCAN | IDX_T2_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
|* 5 | SORT JOIN | | 1 | 1 | 0 |00:00:00.01 | 3 | 2048 | 2048 | 2048 (0)|
| 6 | TABLE ACCESS BY INDEX ROWID| T1 | 1 | 1 | 1 |00:00:00.01 | 3 | | | |
|* 7 | INDEX RANGE SCAN | IDX_T1_N | 1 | 1 | 1 |00:00:00.01 | 2 | | | |
-------------------------------------------------------------------------------------------------------------------------------
4 - access("T2"."N"=12)
5 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
7 - access("T1"."N"=19)

  连接条件索引消除排序

/*
结论: Merge Sort Join连接优化,在连接条件字段上建索引,用以消除排序合并连接的排序动作!
*/

--环境构造

DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 100
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;

--争取利用索引来消除排序(可惜的是,ORACLE算法的限制,只能避免一次排序)

---首先看两表的连接条件都无索引的情况,如下,有两次排序:
set linesize 1000
set autotrace traceonly
SELECT /*+ leading(t1) use_merge(t2)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id;

执行计划
------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time |
------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 123 | | 1852 (1)| 00:00:23 |
| 1 | MERGE JOIN | | 1 | 123 | | 1852 (1)| 00:00:23 |
| 2 | SORT JOIN | | 1 | 57 | | 4 (25)| 00:00:01 |
|* 3 | TABLE ACCESS FULL| T1 | 1 | 57 | | 3 (0)| 00:00:01 |
|* 4 | SORT JOIN | | 100K| 6445K| 15M| 1848 (1)| 00:00:23 |
| 5 | TABLE ACCESS FULL| T2 | 100K| 6445K| | 273 (1)| 00:00:04 |
------------------------------------------------------------------------------------
3 - filter("T1"."N"=19)
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")

统计信息
----------------------------------------------------------
0 recursive calls
0 db block gets
1012 consistent gets
0 physical reads
0 redo size
880 bytes sent via SQL*Net to client
415 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
2 sorts (memory)
0 sorts (disk)
1 rows processed

--接下来在t1表建索引,发现排序消除了一个。
CREATE INDEX idx_t1_id ON t1(id);
set linesize 1000
set autotrace traceonly
SELECT /*+ leading(t1) use_merge(t2)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id;
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time |
--------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 100 | 397K| | 47930 (1)| 00:09:36 |
| 1 | MERGE JOIN | | 100 | 397K| | 47930 (1)| 00:09:36 |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 100 | 198K| | 2 (0)| 00:00:01 |
| 3 | INDEX FULL SCAN | IDX_T1_ID | 100 | | | 1 (0)| 00:00:01 |
|* 4 | SORT JOIN | | 111K| 217M| 582M| 47928 (1)| 00:09:36 |
| 5 | TABLE ACCESS FULL | T2 | 111K| 217M| | 274 (1)| 00:00:04 |
--------------------------------------------------------------------------------------------------
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")

统计信息
----------------------------------------------------------
0 recursive calls
0 db block gets
1021 consistent gets
0 physical reads
0 redo size
13432 bytes sent via SQL*Net to client
481 bytes received via SQL*Net from client
8 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
100 rows processed

--接下来在t2表的连接条件建索引,发现排序依然有一个,无法消除。
CREATE INDEX idx_t2_t1_id ON t2(t1_id);
set linesize 1000
set autotrace traceonly
SELECT /*+ leading(t1) use_merge(t2)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id;
--------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time |
--------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 100 | 397K| | 38263 (1)| 00:07:40 |
| 1 | MERGE JOIN | | 100 | 397K| | 38263 (1)| 00:07:40 |
| 2 | TABLE ACCESS BY INDEX ROWID| T1 | 100 | 198K| | 2 (0)| 00:00:01 |
| 3 | INDEX FULL SCAN | IDX_T1_ID | 100 | | | 1 (0)| 00:00:01 |
|* 4 | SORT JOIN | | 89127 | 173M| 464M| 38261 (1)| 00:07:40 |
| 5 | TABLE ACCESS FULL | T2 | 89127 | 173M| | 273 (1)| 00:00:04 |
--------------------------------------------------------------------------------------------------
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
统计信息
----------------------------------------------------------
0 recursive calls
0 db block gets
1021 consistent gets
0 physical reads
0 redo size
13432 bytes sent via SQL*Net to client
482 bytes received via SQL*Net from client
8 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
100 rows processed

--T2表如果一定要索引扫描呢,比如如下语句,字段只取t2表的t1_id
SELECT /*+ leading(t1) use_merge(t2)*/ t2.t1_id
FROM t1, t2
WHERE t1.id = t2.t1_id;
-----------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes |TempSpc| Cost (%CPU)| Time |
-----------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 100 | 2600 | | 494 (2)| 00:00:06 |
| 1 | MERGE JOIN | | 100 | 2600 | | 494 (2)| 00:00:06 |
| 2 | INDEX FULL SCAN | IDX_T1_ID | 100 | 1300 | | 1 (0)| 00:00:01 |
|* 3 | SORT JOIN | | 89127 | 1131K| 3512K| 493 (2)| 00:00:06 |
| 4 | INDEX FAST FULL SCAN| IDX_T2_T1_ID | 89127 | 1131K| | 66 (2)| 00:00:01 |
-----------------------------------------------------------------------------------------------
3 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
统计信息
----------------------------------------------------------
7 recursive calls
0 db block gets
311 consistent gets
222 physical reads
0 redo size
1686 bytes sent via SQL*Net to client
482 bytes received via SQL*Net from client
8 SQL*Net roundtrips to/from client
1 sorts (memory)
0 sorts (disk)
100 rows processed

--发现无论如何都不能同时消除排序合并连接两边的排序,

  避免取多余列致排序尺寸过大

/*
结论: Merge Sort Join连接优化,避免多余列致排序尺寸过大
*/

--环境构造
DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 100
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;

--Merge Sort Join取所有字段的情况
alter session set statistics_level=all ;
set linesize 1000
SELECT /*+ leading(t2) use_merge(t1)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
-----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
-----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.14 | 1012 | | | |
| 1 | MERGE JOIN | | 1 | 1 | 1 |00:00:00.14 | 1012 | | | |
| 2 | SORT JOIN | | 1 | 89127 | 20 |00:00:00.13 | 1005 | 9266K| 1184K| 8236K (0)|
| 3 | TABLE ACCESS FULL| T2 | 1 | 89127 | 100K|00:00:00.03 | 1005 | | | |
|* 4 | SORT JOIN | | 20 | 1 | 1 |00:00:00.01 | 7 | 2048 | 2048 | 2048 (0)|
|* 5 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 7 | | | |
-----------------------------------------------------------------------------------------------------------------
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
5 - filter("T1"."N"=19)

---Merge Sort Join取部分字段的情况
SELECT /*+ leading(t2) use_merge(t1)*/ t1.id
FROM t1, t2
WHERE t1.id = t2.t1_id
and t1.n=19;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
-----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
-----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 1 |00:00:00.11 | 1012 | | | |
| 1 | MERGE JOIN | | 1 | 1 | 1 |00:00:00.11 | 1012 | | | |
| 2 | SORT JOIN | | 1 | 89127 | 20 |00:00:00.11 | 1005 | 1895K| 658K| 1684K (0)|
| 3 | TABLE ACCESS FULL| T2 | 1 | 89127 | 100K|00:00:00.03 | 1005 | | | |
|* 4 | SORT JOIN | | 20 | 1 | 1 |00:00:00.01 | 7 | 2048 | 2048 | 2048 (0)|
|* 5 | TABLE ACCESS FULL| T1 | 1 | 1 | 1 |00:00:00.01 | 7 | | | |
-----------------------------------------------------------------------------------------------------------------
4 - access("T1"."ID"="T2"."T1_ID")
filter("T1"."ID"="T2"."T1_ID")
5 - filter("T1"."N"=19)

  保证PGA尺寸:

这里的场景是参与排序合并连接的尺寸过大的时候,这时候我们可以考虑增大PGA

如果是oracle11g,默认是直接增大memory_target

 HASH连接和NL连接只取部分列基本不会有什么性能提升:

/*
结论: HASH连接和NL连接只取部分列基本不会有什么性能提升。
*/

--环境构造

DROP TABLE t1 CASCADE CONSTRAINTS PURGE;
DROP TABLE t2 CASCADE CONSTRAINTS PURGE;
CREATE TABLE t1 (
id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
CREATE TABLE t2 (
id NUMBER NOT NULL,
t1_id NUMBER NOT NULL,
n NUMBER,
contents VARCHAR2(4000)
)
;
execute dbms_random.seed(0);
INSERT INTO t1
SELECT rownum, rownum, dbms_random.string('a', 50)
FROM dual
CONNECT BY level <= 100
ORDER BY dbms_random.random;
INSERT INTO t2 SELECT rownum, rownum, rownum, dbms_random.string('b', 50) FROM dual CONNECT BY level <= 100000
ORDER BY dbms_random.random;
COMMIT;
select count(*) from t1;
select count(*) from t2;

--第3式,取部分字段,减少排序尺寸!
--Merge Sort Join取所有字段的情况
alter session set statistics_level=all ;
set linesize 1000
SELECT /*+ leading(t1) use_hash(t2)*/ *
FROM t1, t2
WHERE t1.id = t2.t1_id;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.06 | 1006 | | | |
|* 1 | HASH JOIN | | 1 | 100 | 100 |00:00:00.06 | 1006 | 742K| 742K| 1199K (0)|
| 2 | TABLE ACCESS FULL| T1 | 1 | 100 | 100 |00:00:00.01 | 7 | | | |
| 3 | TABLE ACCESS FULL| T2 | 1 | 116K| 100K|00:00:00.02 | 999 | | | |
----------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")



SELECT /*+ leading(t1) use_hash(t2)*/ t1.id
FROM t1, t2
WHERE t1.id = t2.t1_id;
select * from table(dbms_xplan.display_cursor(null,null,'allstats last'));
----------------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Starts | E-Rows | A-Rows | A-Time | Buffers | OMem | 1Mem | Used-Mem |
----------------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | | 100 |00:00:00.07 | 1006 | | | |
|* 1 | HASH JOIN | | 1 | 100 | 100 |00:00:00.07 | 1006 | 1066K| 1066K| 1223K (0)|
| 2 | TABLE ACCESS FULL| T1 | 1 | 100 | 100 |00:00:00.01 | 7 | | | |
| 3 | TABLE ACCESS FULL| T2 | 1 | 116K| 100K|00:00:00.02 | 999 | | | |
----------------------------------------------------------------------------------------------------------------
1 - access("T1"."ID"="T2"."T1_ID")