表大小

SQL> select count(*) from agent.TB_AGENT_INFO;

  COUNT(*)
----------
      1751

SQL> select count(*) from TB_CHANNEL_INFO ;

  COUNT(*)
----------
      1807

SQL> select count(*) from TB_USER_CHANNEL;

  COUNT(*)
----------
      7269

SQL> select count(*) from OSS_USER_STATION;

  COUNT(*)
----------
      2149

SQL> select count(*) from tb_user_zgy ;

  COUNT(*)
----------
  43

SQL> select count(*) from act.tb_user_agent_relat;

  COUNT(*)
----------
     29612

SQL> select count(*) from agent.base_data_user_info ;

  COUNT(*)
----------
     30005

SQL> select count(*) from agent.base_data_invest_info;

  COUNT(*)
----------
   3530163

慢的sql

select a.city,
       a.agent_id,
       a.username,
       a.real_name,
       phone,
       zgy_name,
       login_count,
       user_count,
       count(distinct b.invest_id) user_invested,
       sum(b.order_amount / 100) invest_amount
  from (select a.city,
               a.agent_id,
               a.username,
               a.real_name, -- 业主姓名
               a.phone, -- 业主手机号
               d.real_name zgy_name, -- 所属专管员
               count(distinct case
                       when c.str_day <= '20160821' then
                        c.login_name
                     end) login_count,
               count(distinct case
                       when c.str_day <= '20160821' then
                        decode(c.status, 1, c.invest_id, null)
                     end) user_count
          from (select agent_id, city, username, real_name, phone
                   from agent.TB_AGENT_INFO
                  where agent_id in
                        (SELECT agent_id
                           FROM (SELECT distinct *
                                   FROM TB_CHANNEL_INFO t
                                  START WITH t.CHANNEL_ID in
                                             (select CHANNEL_ID
                                                from TB_USER_CHANNEL
                                               where USER_ID = 596)
                                 CONNECT BY PRIOR
                                             t.CHANNEL_ID = t.PARENT_CHANNEL_ID)
                          WHERE agent_id IS NOT NULL)) a
          left join oss_user_station e
            on a.agent_id = e.agent_id
           and e.user_type = 0
          left join tb_user_zgy d
            on e.username = d.username
          left join act.tb_user_agent_relat  c
            on a.agent_id = c.agent_id
         group by a.city,
                  a.username,
                  a.real_name,
                  a.phone,
                  d.real_name,
                  a.agent_id) a
  left join (select invest_id, order_amount, agent_id, str_day
               from agent.base_data_invest_info
              where str_day >= '20150801' and str_day<='20160821') b
    on a.agent_id = b.agent_id
 group by a.city,
          a.agent_id,
          a.username,
          a.real_name,
          a.phone,
          a.zgy_name,
          a.login_count,
          a.user_count
这个查询可以看成两部分,第一部分一堆小表关联的a和唯一的一个大表再做关联

man
----------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                        | Name                  | Rows  | Bytes |TempSpc| Cost (%CPU)| Time    |
----------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                                 |                       |    55M|  6616M|       |  3934K  (1)| 13:06:52 |
|   1 |  HASH GROUP BY                                   |                       |    55M|  6616M|       |  3934K  (1)| 13:06:52 |
|   2 |   VIEW                                           | VW_DAG_1              |    55M|  6616M|       |  3934K  (1)| 13:06:52 |
|   3 |    HASH GROUP BY                                 |                       |    55M|  6301M|  7681M|  3934K  (1)| 13:06:52 |
|   4 |     VIEW                                         | VM_NWVW_0             |    55M|  6301M|       |  2456K  (1)| 08:11:15 |
|   5 |      SORT GROUP BY                               |                       |    55M|    10G|    11G|  2456K  (1)| 08:11:15 |
|*  6 |       HASH JOIN RIGHT OUTER                      |                       |    55M|    10G|       | 21643   (2)| 00:04:20 |
|   7 |        TABLE ACCESS FULL                         | TB_USER_AGENT_RELAT   | 27937 |  1200K|       |   102   (0)| 00:00:02 |
|*  8 |        HASH JOIN OUTER                           |                       |  3374K|   511M|       | 21392   (1)| 00:04:17 |
|*  9 |         HASH JOIN SEMI                           |                       |  1712 |   188K|       |  2007   (1)| 00:00:25 |
|* 10 |          HASH JOIN RIGHT OUTER                   |                       |  1712 |   173K|       |    32   (0)| 00:00:01 |
|  11 |           TABLE ACCESS FULL                      | TB_USER_ZGY           |    43 |   903 |       |     3   (0)| 00:00:01 |
|* 12 |           HASH JOIN RIGHT OUTER                  |                       |  1712 |   138K|       |    29   (0)| 00:00:01 |
|* 13 |            TABLE ACCESS FULL                     | OSS_USER_STATION      |  1075 | 25800 |       |     6   (0)| 00:00:01 |
|  14 |            TABLE ACCESS FULL                     | TB_AGENT_INFO         |  1712 |    98K|       |    23   (0)| 00:00:01 |
|  15 |          VIEW                                    | VW_NSO_1              | 16271 |   143K|       |  1975   (1)| 00:00:24 |
|* 16 |           VIEW                                   |                       | 16271 |   143K|       |  1975   (1)| 00:00:24 |
|  17 |            HASH UNIQUE                           |                       | 16271 |  8882K|    10M|  1975   (1)| 00:00:24 |
|* 18 |             CONNECT BY WITHOUT FILTERING (UNIQUE)|                       |       |       |       |            |          |
|* 19 |              HASH JOIN RIGHT SEMI                |                       |   530 |   146K|       |    29   (0)| 00:00:01 |
|* 20 |               TABLE ACCESS FULL                  | TB_USER_CHANNEL       |   600 |  7800 |       |     7   (0)| 00:00:01 |
|  21 |               TABLE ACCESS FULL                  | TB_CHANNEL_INFO       |  1807 |   476K|       |    22   (0)| 00:00:01 |
|  22 |              TABLE ACCESS FULL                   | TB_CHANNEL_INFO       |  1807 |   476K|       |    22   (0)| 00:00:01 |
|* 23 |         TABLE ACCESS FULL                        | BASE_DATA_INVEST_INFO |  3374K|   148M|       | 19375   (1)| 00:03:53 |
----------------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   6 - access("AGENT_ID"="C"."AGENT_ID"(+))
   8 - access("AGENT_ID"="AGENT_ID"(+))
   9 - access("AGENT_ID"="AGENT_ID")
  10 - access("C"."USERNAME"="D"."USERNAME"(+))
  12 - access("AGENT_ID"="C"."AGENT_ID"(+))
  13 - filter("C"."USER_TYPE"(+)=0)
  16 - filter("AGENT_ID" IS NOT NULL)
  18 - access("T"."PARENT_CHANNEL_ID"=PRIOR "T"."CHANNEL_ID")
  19 - access("T"."CHANNEL_ID"="CHANNEL_ID")
  20 - filter("USER_ID"=596)
  23 - filter("STR_DAY"(+)>='20150801' AND "STR_DAY"(+)<='20160821')

尝试单独跑 a,很快

(select a.city,
               a.agent_id,
               a.username,
               a.real_name, -- 业主姓名
               a.phone, -- 业主手机号
               d.real_name zgy_name, -- 所属专管员
               count(distinct case
                       when c.str_day <= '20160821' then
                        c.login_name
                     end) login_count,
               count(distinct case
                       when c.str_day <= '20160821' then
                        decode(c.status, 1, c.invest_id, null)
                     end) user_count
          from (select agent_id, city, username, real_name, phone
                   from agent.TB_AGENT_INFO
                  where agent_id in
                        (SELECT agent_id
                           FROM (SELECT distinct *
                                   FROM TB_CHANNEL_INFO t
                                  START WITH t.CHANNEL_ID in
                                             (select CHANNEL_ID
                                                from TB_USER_CHANNEL
                                               where USER_ID = 596)
                                 CONNECT BY PRIOR
                                             t.CHANNEL_ID = t.PARENT_CHANNEL_ID)
                          WHERE agent_id IS NOT NULL)) a
          left join oss_user_station e
            on a.agent_id = e.agent_id
           and e.user_type = 0
          left join tb_user_zgy d
            on e.username = d.username
          left join act.tb_user_agent_relat  c
            on a.agent_id = c.agent_id
         group by a.city,
                  a.username,
                  a.real_name,
                  a.phone,
                  d.real_name,
                  a.agent_id) a

单独跑a很快,和b合在一起就很慢,那么怀疑是由于视图合并,导致了a内部的表提前去和b关联,引发了性能问题。
尝试禁止视图合并可以使用rownum>0,或no_merge hint

select a.city,
       a.agent_id,
       a.username,
       a.real_name,
       phone,
       zgy_name,
       login_count,
       user_count,
       count(distinct b.invest_id) user_invested,
       sum(b.order_amount / 100) invest_amount
  from (select * from (select a.city,
               a.agent_id,
               a.username,
               a.real_name, -- 业主姓名
               a.phone, -- 业主手机号
               d.real_name zgy_name, -- 所属专管员
               count(distinct case
                       when c.str_day <= '20160821' then
                        c.login_name
                     end) login_count,
               count(distinct case
                       when c.str_day <= '20160821' then
                        decode(c.status, 1, c.invest_id, null)
                     end) user_count
          from (select agent_id, city, username, real_name, phone
                   from agent.TB_AGENT_INFO
                  where agent_id in
                        (SELECT agent_id
                           FROM (SELECT distinct *
                                   FROM TB_CHANNEL_INFO t
                                  START WITH t.CHANNEL_ID in
                                             (select CHANNEL_ID
                                                from TB_USER_CHANNEL
                                               where USER_ID = 596)
                                 CONNECT BY PRIOR
                                             t.CHANNEL_ID = t.PARENT_CHANNEL_ID)
                          WHERE agent_id IS NOT NULL)) a
          left join oss_user_station e
            on a.agent_id = e.agent_id
           and e.user_type = 0
          left join tb_user_zgy d
            on e.username = d.username
          left join act.tb_user_agent_relat  c
            on a.agent_id = c.agent_id
         group by a.city,
                  a.username,
                  a.real_name,
                  a.phone,
                  d.real_name,
                  a.agent_id) where rownum>0)a
  left join (select invest_id, order_amount, agent_id, str_day
               from agent.base_data_invest_info
              where str_day >= '20150801' and str_day<='20160821') b
    on a.agent_id = b.agent_id
 group by a.city,
          a.agent_id,
          a.username,
          a.real_name,
          a.phone,
          a.zgy_name,
          a.login_count,
          a.user_count

kuai
-----------------------------------------------------------------------------------------------------------------------------------
| Id  | Operation                                         | Name                  | Rows  | Bytes |TempSpc| Cost (%CPU)| Time    |
-----------------------------------------------------------------------------------------------------------------------------------
|   0 | SELECT STATEMENT                                  |                       |   823M|    96G|       |    23M  (1)| 78:59:52 |
|   1 |  HASH GROUP BY                                    |                       |   823M|    96G|       |    23M  (1)| 78:59:52 |
|   2 |   VIEW                                            | VW_DAG_0              |   823M|    96G|       |    23M  (1)| 78:59:52 |
|   3 |    HASH GROUP BY                                  |                       |   823M|    98G|   112G|    23M  (1)| 78:59:52 |
|*  4 |     HASH JOIN OUTER                               |                       |   823M|    98G|    26M| 41358   (6)| 00:08:17 |
|   5 |      VIEW                                         |                       |   259K|    23M|       | 11090   (1)| 00:02:14 |
|   6 |       COUNT                                       |                       |       |       |       |            |          |
|*  7 |        FILTER                                     |                       |       |       |       |            |          |
|   8 |         VIEW                                      |                       |   259K|    23M|       | 11090   (1)| 00:02:14 |
|   9 |          SORT GROUP BY                            |                       |   259K|    38M|    41M| 11090   (1)| 00:02:14 |
|* 10 |           HASH JOIN                               |                       |   259K|    38M|       |  2111   (1)| 00:00:26 |
|* 11 |            VIEW                                   |                       | 16271 |   143K|       |  1975   (1)| 00:00:24 |
|  12 |             HASH UNIQUE                           |                       | 16271 |  8882K|    10M|  1975   (1)| 00:00:24 |
|* 13 |              CONNECT BY WITHOUT FILTERING (UNIQUE)|                       |       |       |       |            |          |
|* 14 |               HASH JOIN RIGHT SEMI                |                       |   530 |   146K|       |    29   (0)| 00:00:01 |
|* 15 |                TABLE ACCESS FULL                  | TB_USER_CHANNEL       |   600 |  7800 |       |     7   (0)| 00:00:01 |
|  16 |                TABLE ACCESS FULL                  | TB_CHANNEL_INFO       |  1807 |   476K|       |    22   (0)| 00:00:01 |
|  17 |               TABLE ACCESS FULL                   | TB_CHANNEL_INFO       |  1807 |   476K|       |    22   (0)| 00:00:01 |
|* 18 |            HASH JOIN OUTER                        |                       | 27937 |  4037K|       |   134   (0)| 00:00:02 |
|* 19 |             HASH JOIN RIGHT OUTER                 |                       |  1712 |   173K|       |    32   (0)| 00:00:01 |
|  20 |              TABLE ACCESS FULL                    | TB_USER_ZGY           |    43 |   903 |       |     3   (0)| 00:00:01 |
|* 21 |              HASH JOIN RIGHT OUTER                |                       |  1712 |   138K|       |    29   (0)| 00:00:01 |
|* 22 |               TABLE ACCESS FULL                   | OSS_USER_STATION      |  1075 | 25800 |       |     6   (0)| 00:00:01 |
|  23 |               TABLE ACCESS FULL                   | TB_AGENT_INFO         |  1712 |    98K|       |    23   (0)| 00:00:01 |
|  24 |             TABLE ACCESS FULL                     | TB_USER_AGENT_RELAT   | 27937 |  1200K|       |   102   (0)| 00:00:02 |
|* 25 |      TABLE ACCESS FULL                            | BASE_DATA_INVEST_INFO |  3374K|   109M|       | 19375   (1)| 00:03:53 |
-----------------------------------------------------------------------------------------------------------------------------------

Predicate Information (identified by operation id):
---------------------------------------------------

   4 - access("A"."AGENT_ID"="AGENT_ID"(+))
   7 - filter(ROWNUM>0)
  10 - access("AGENT_ID"="AGENT_ID")
  11 - filter("AGENT_ID" IS NOT NULL)
  13 - access("T"."PARENT_CHANNEL_ID"=PRIOR "T"."CHANNEL_ID")
  14 - access("T"."CHANNEL_ID"="CHANNEL_ID")
  15 - filter("USER_ID"=596)
  18 - access("AGENT_ID"="C"."AGENT_ID"(+))
  19 - access("C"."USERNAME"="D"."USERNAME"(+))
  21 - access("AGENT_ID"="C"."AGENT_ID"(+))
  22 - filter("C"."USER_TYPE"(+)=0)
  25 - filter("STR_DAY"(+)>='20150801' AND "STR_DAY"(+)<='20160821')

用no_merge hint禁止视图合并也可以

select a.city,
       a.agent_id,
       a.username,
       a.real_name,
       phone,
       zgy_name,
       login_count,
       user_count,
       count(distinct b.invest_id) user_invested,
       sum(b.order_amount / 100) invest_amount
  from (select /*+ no_merge */
               a.city,
               a.agent_id,
               a.username,
               a.real_name, -- 业主姓名
               a.phone, -- 业主手机号
               d.real_name zgy_name, -- 所属专管员
               count(distinct case
                       when c.str_day <= '20160821' then
                        c.login_name
                     end) login_count,
               count(distinct case
                       when c.str_day <= '20160821' then
                        decode(c.status, 1, c.invest_id, null)
                     end) user_count
          from (select  /*+ qb_name(sb) */ agent_id, city, username, real_name, phone
                   from agent.TB_AGENT_INFO
                  where agent_id in
                        (SELECT agent_id
                           FROM (SELECT distinct *
                                   FROM TB_CHANNEL_INFO t
                                  START WITH t.CHANNEL_ID in
                                             (select CHANNEL_ID
                                                from TB_USER_CHANNEL
                                               where USER_ID = 596)
                                 CONNECT BY PRIOR
                                             t.CHANNEL_ID = t.PARENT_CHANNEL_ID)
                          WHERE agent_id IS NOT NULL)) a
          left join oss_user_station e
            on a.agent_id = e.agent_id
           and e.user_type = 0
          left join tb_user_zgy d
            on e.username = d.username
          left join (select * from act.tb_user_agent_relat c) c
            on a.agent_id = c.agent_id
         group by a.city,
                  a.username,
                  a.real_name,
                  a.phone,
                  d.real_name,
                  a.agent_id) a
  left join (select invest_id, order_amount, agent_id, str_day
               from agent.base_data_invest_info
              where str_day >= '20150801' and str_day<='20160821') b
    on a.agent_id = b.agent_id
 group by a.city,
          a.agent_id,
          a.username,
          a.real_name,
          a.phone,
          a.zgy_name,
          a.login_count,
          a.user_count

cdh hdfs 分布不均匀 hash分布不均匀_ci

至此sql从一个小时都跑不完,到最后两秒跑完,工作已经完成,但是单从慢的执行计划中并没有看出什么问题。有聚合函数group by走hash没有错,虽然有全表扫描带*但是要么过滤性太差,要么不是性能瓶颈。那为什么总共300多w就跑不完了呢

慢的执行计划做一个10046

Number of plan statistics captured: 1

Rows (1st) Rows (avg) Rows (max)  Row Source Operation
---------- ---------- ----------  ---------------------------------------------------
         0          0          0  HASH GROUP BY (cr=0 pr=0 pw=0 time=278 us cost=3934270 size=6937507584 card=55059584)
         0          0          0   VIEW  VW_DAG_1 (cr=0 pr=0 pw=0 time=111 us cost=3934270 size=6937507584 card=55059584)
         0          0          0    HASH GROUP BY (cr=0 pr=0 pw=0 time=108 us cost=3934270 size=6607150080 card=55059584)
         0          0          0     VIEW  VM_NWVW_0 (cr=0 pr=0 pw=0 time=32 us cost=2456206 size=6607150080 card=55059584)
         0          0          0      SORT GROUP BY (cr=0 pr=0 pw=0 time=31 us cost=2456206 size=11177095552 card=55059584)
 148234852  148234852  148234852       HASH JOIN RIGHT OUTER (cr=34882 pr=0 pw=0 time=34098445 us cost=21643 size=11177095552 card=55059584)
     29651      29651      29651        TABLE ACCESS FULL TB_USER_AGENT_RELAT (cr=332 pr=0 pw=0 time=8201 us cost=102 size=1229228 card=27937)
    703556     703556     703556        HASH JOIN OUTER (cr=34550 pr=0 pw=0 time=1518631 us cost=21392 size=536480628 card=3374092)
       612        612        612         HASH JOIN SEMI (cr=272 pr=0 pw=0 time=31359 us cost=2007 size=193456 card=1712)
      1751       1751       1751          HASH JOIN RIGHT OUTER (cr=100 pr=0 pw=0 time=11404 us cost=32 size=178048 card=1712)
        43         43         43           TABLE ACCESS FULL TB_USER_ZGY (cr=2 pr=0 pw=0 time=103 us cost=3 size=903 card=43)
      1751       1751       1751           HASH JOIN RIGHT OUTER (cr=98 pr=0 pw=0 time=6664 us cost=29 size=142096 card=1712)
      1312       1312       1312            TABLE ACCESS FULL OSS_USER_STATION (cr=15 pr=0 pw=0 time=420 us cost=6 size=25800 card=1075)
      1751       1751       1751            TABLE ACCESS FULL TB_AGENT_INFO (cr=83 pr=0 pw=0 time=1804 us cost=23 size=101008 card=1712)
       612        612        612          VIEW  VW_NSO_1 (cr=172 pr=0 pw=0 time=19720 us cost=1975 size=146439 card=16271)
       612        612        612           VIEW  (cr=172 pr=0 pw=0 time=19351 us cost=1975 size=146439 card=16271)
       613        613        613            HASH UNIQUE (cr=172 pr=0 pw=0 time=19224 us cost=1975 size=9095489 card=16271)
      1215       1215       1215             CONNECT BY WITHOUT FILTERING (UNIQUE) (cr=172 pr=0 pw=0 time=16687 us)
       603        603        603              HASH JOIN RIGHT SEMI (cr=97 pr=0 pw=0 time=4922 us cost=29 size=149990 card=530)
       603        603        603               TABLE ACCESS FULL TB_USER_CHANNEL (cr=22 pr=0 pw=0 time=550 us cost=7 size=7800 card=600)
      1807       1807       1807               TABLE ACCESS FULL TB_CHANNEL_INFO (cr=75 pr=0 pw=0 time=1615 us cost=22 size=487890 card=1807)
      1807       1807       1807              TABLE ACCESS FULL TB_CHANNEL_INFO (cr=75 pr=0 pw=0 time=1133 us cost=22 size=487890 card=1807)
   1631878    1631878    1631878         TABLE ACCESS FULL BASE_DATA_INVEST_INFO (cr=34278 pr=0 pw=0 time=950767 us cost=19375 size=155208232 card=3374092)

id 6 1亿4千多万,一个多小时也没跑出来
并且temp撑爆了
第 43 行出现错误:
ORA-01652: 无法通过 128 (在表空间 TEMP 中) 扩展 temp 段
一亿四千多万,b表才300万,sql group by之前也不过一百多万的结果

根据 6 -
access(“AGENT_ID”=”C”.”AGENT_ID”(+)) 查看c和b表agent_id数据分布

select agent_id,count(*) from act.tb_user_agent_relat group by agent_id order by 2 desc

cdh hdfs 分布不均匀 hash分布不均匀_ci_02

最多的6827行,最少的1行

select agent_id,count(*) from agent.base_data_invest_info group by agent_id order by 2 desc

cdh hdfs 分布不均匀 hash分布不均匀_SQL_03

最多50w,最少1行
又一次进了hash join链接列数据分布不均匀的坑,hash join只适合数据分布均匀的列做链接条件

做个oradebug short_stack

SQL> select unique sid from v$mystat;

       SID
----------
      1132

SQL> select p.spid from v$process p ,v$session s where s.paddr=p.addr and s.sid=1132;

SPID
------------------------------------------------
28539

oradebug setospid 28539

SQL> oradebug short_stack
ksedsts()+465<-ksdxfstk()+32<-ksdxcb()+1927<-sspuser()+112<-__sighandler()<-io_submit()+7<-skgfqio()+1275<-ksfd_skgfqio()+894<-ksfdgo()+423<-ksfdaio()+2290<-kcflbi()+906<-kcbldio()+3104<-kcblsltio()+530<-stsIssueWrite()+118<-stsGetBlock()+442<-sdbinb()+135<-sdbput()+1042<-smbwrt()+247<-smbput()+2503<-sorput()+93<-qesaEvaAndPutDistAggOpns()+590<-qergsRowP()+430<-qerhjWalkHashBucket()+397<-qerhjGenProbeHashTable()+1571<-qerhjGenProbeHashTable()+718<-kdstf11011010000km()+673<-kdsttgr()+153241<-qertbFetch()+2455<-rwsfcd()+103<-qerhjFetch()+1661<-rwsfcd()+103<-qerhjFetch()+1661<-qergsFetch()+757<-qervwFetch()+139<-qerghFetch()+315<-qervwFetch()+139<-qerghFetch()+315<-opifch2()+2766<-kpoal8()+2833<-opiodr()+917<-ttcpip()+2183<-opitsk()+1710<-opiino()+969<-opiodr()+917<-opidrv()+570<-sou2o()+103<-opimai_real()+133<-ssthrdmain()+265<-main()+201<-__libc_start_main()+244
SQL> 
SQL> 
SQL> 
SQL> 
SQL> oradebug short_stack
ksedsts()+465<-ksdxfstk()+32<-ksdxcb()+1927<-sspuser()+112<-__sighandler()<-io_submit()+7<-skgfqio()+1275<-ksfd_skgfqio()+894<-ksfdgo()+423<-ksfdaio()+2290<-kcflbi()+906<-kcbldio()+3104<-kcblsltio()+530<-stsIssueWrite()+118<-stsGetBlock()+442<-sdbinb()+135<-sdbput()+1042<-smbwrt()+247<-smbput()+2503<-sorput()+93<-qesaEvaAndPutDistAggOpns()+590<-qergsRowP()+430<-qerhjWalkHashBucket()+397<-qerhjGenProbeHashTable()+1571<-qerhjWalkHashBucket()+397<-qerhjGenProbeHashTable()+1571<-kdstf11011010000km()+673<-kdsttgr()+153241<-qertbFetch()+2455<-rwsfcd()+103<-qerhjFetch()+1661<-rwsfcd()+103<-qerhjFetch()+1661<-qergsFetch()+757<-qervwFetch()+139<-qerghFetch()+315<-qervwFetch()+139<-qerghFetch()+315<-opifch2()+2766<-kpoal8()+2833<-opiodr()+917<-ttcpip()+2183<-opitsk()+1710<-opiino()+969<-opiodr()+917<-opidrv()+570<-sou2o()+103<-opimai_real()+133<-ssthrdmain()+265<-main()+201<-__libc_start_main()+244
SQL> 
SQL> 
SQL> 
SQL> 

SQL> oradebug short_stack
ksedsts()+465<-ksdxfstk()+32<-ksdxcb()+1927<-sspuser()+112<-__sighandler()<-qergsRowP()+2161<-qerhjWalkHashBucket()+397<-qerhjGenProbeHashTable()+1571<-qerhjWalkHashBucket()+397<-qerhjGenProbeHashTable()+1571<-kdstf11011010000km()+673<-kdsttgr()+153241<-qertbFetch()+2455<-rwsfcd()+103<-qerhjFetch()+1661<-rwsfcd()+103<-qerhjFetch()+1661<-qergsFetch()+757<-qervwFetch()+139<-qerghFetch()+315<-qervwFetch()+139<-qerghFetch()+315<-opifch2()+2766<-kpoal8()+2833<-opiodr()+917<-ttcpip()+2183<-opitsk()+1710<-opiino()+969<-opiodr()+917<-opidrv()+570<-sou2o()+103<-opimai_real()+133<-ssthrdmain()+265<-main()+201<-__libc_start_main()+244
SQL> oradebug short_stack
ksedsts()+465<-ksdxfstk()+32<-ksdxcb()+1927<-sspuser()+112<-__sighandler()<-lmebco()+63<-qesaSimpleCompare()+73<-smbput()+913<-sorput()+93<-qergsRowP()+1067<-qerhjWalkHashBucket()+397<-qerhjGenProbeHashTable()+1571<-qerhjGenProbeHashTable()+718<-kdstf11011010000km()+673<-kdsttgr()+153241<-qertbFetch()+2455<-rwsfcd()+103<-qerhjFetch()+1661<-rwsfcd()+103<-qerhjFetch()+1661<-qergsFetch()+757<-qervwFetch()+139<-qerghFetch()+315<-qervwFetch()+139<-qerghFetch()+315<-opifch2()+2766<-kpoal8()+2833<-opiodr()+917<-ttcpip()+2183<-opitsk()+1710<-opiino()+969<-opiodr()+917<-opidrv()+570<-sou2o()+103<-opimai_real()+133<-ssthrdmain()+265<-main()+201<-__libc_start_main()+244

可以看到qerhjWalkHashBucket
qerhjWalkHashBucket就表示在做hash join的过程中需要遍历hash bucket中的数据,当链接列数据分布不均,某些值特别多时,遍历其hash bucket的成本也就非常高,如果pga放不下了,就会放到temp进行磁盘io,这就是性能瓶颈的原因,这个例子把30g的temp表空间都撑爆了,可见hash bucket有多大!

做个SQL MONITOR,也可以看出,瓶颈在id 6。如果做一个sql rpt也可以发现sql执行过程中的每妙逻辑读实际并不高,因为时间都花在了遍历hash bucket中

cdh hdfs 分布不均匀 hash分布不均匀_cdh hdfs 分布不均匀_04