1. 背景

导入150w数据到mysql,数据格式为tab分隔csv文件。测试索引对导入速度的影响


2. 无索引,导入数据后重建

CREATE TABLE `tab` (
`ct_id` char(36) DEFAULT NULL COMMENT 'id',
`ct_title` varchar(200) DEFAULT NULL COMMENT '标题',
`bd_id` int(11) DEFAULT NULL COMMENT 'bdid',
`org_unit` int(11) DEFAULT NULL COMMENT '',
`poi_id` int(11) DEFAULT NULL,
`poi_name` varchar(200) DEFAULT NULL COMMENT 'poi名称'
) ENGINE=InnoDB DEFAULT CHARSET=utf8
mysql > load data infile '/tmp/ct.txt' into table tab;
Query OK, 1928611 rows affected (15.83 sec)
Records: 1928611  Deleted: 0  Skipped: 0  Warnings: 0

重建索引

mysql >  alter table tab add  UNIQUE index `uk_ct_poi` (`ct_id`,`poi_id`),

    ->   add index `idx_orgunit` (`org_unit`),

    ->   add index `idx_poi_orgunit` (`poi_id`,`org_unit`),

    ->   add index idx_bd_id(bd_id),

    ->   add index idx_poi_id(poi_id),

    ->   add index idx_poi_name(poi_name),

    ->   add index idx_all(ct_id,ct_title,bd_id,org_unit,poi_name),

    ->   add index idx_test(ct_title,bd_id,org_unit,poi_name);

Query OK, 0 rows affected (1 min 24.56 sec)

Records: 0  Duplicates: 0  Warnings: 0


3. 表建好索引导入数据

mysql > CREATE TABLE `tab` (
    -> `ct_id` char(36) DEFAULT NULL COMMENT 'id',
    -> `ct_title` varchar(200) DEFAULT NULL COMMENT '标题',
    -> `bd_id` int(11) DEFAULT NULL COMMENT 'id',
    -> `org_unit` int(11) DEFAULT NULL COMMENT '',
    -> `poi_id` int(11) DEFAULT NULL,
    -> `poi_name` varchar(200) DEFAULT NULL COMMENT 'poi名称',
    ->   UNIQUE KEY `uk_ct_poi` (`ct_id`,`poi_id`),
    ->   KEY `idx_orgunit` (`org_unit`),
    ->   KEY `idx_poi_orgunit` (`poi_id`,`org_unit`),
    ->   key idx_bd_id(bd_id),
    ->   key idx_poi_id(poi_id),
    ->   key idx_poi_name(poi_name),
    ->   key idx_all(ct_id,ct_title,bd_id,org_unit,poi_name),
    ->   key idx_test(ct_title,bd_id,org_unit,poi_name)
    -> ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
mysql  > load data infile '/tmp/ct.txt' into table mixed_poi_mt_ct;
Query OK, 1928611 rows affected (13 min 8.88 sec)
Records: 1928611  Deleted: 0  Skipped: 0  Warnings: 0


4. 结论

大数据导入导出先导入数据再重建索引


5. 选择性插入

无where

mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id

    ->         limit 100000

    -> ;

Query OK, 69517 rows affected (5.02 sec)

Records: 100000  Duplicates: 30483  Warnings: 0


where字段有string类型索引

mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id

    ->         where ct.ct_id>'0'

    ->         limit 100000

    -> ;

Query OK, 69517 rows affected (45.76 sec)

Records: 100000  Duplicates: 30483  Warnings: 0


where字段有int类型索引

mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id

    ->         where ct.poi_id>0

    ->         limit 100000;

Query OK, 69517 rows affected (5.72 sec)

Records: 100000  Duplicates: 30483  Warnings: 0


where字段无索引

mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id

    ->         where poi.close_status=0

    ->         limit 100000;

Query OK, 69355 rows affected (23 min 21.16 sec)

Records: 100000  Duplicates: 30645  Warnings: 0


join字段使用了函数

mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)         

   ->    select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()        ->    from mixed_poi_mt_ct ct        

   ->    join poi_crm poi on ct.poi_id=poi.poi_id         

   ->    join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id                 

   ->    limit 100000;

Query OK, 77348 rows affected (5 min 25.86 sec)


mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()         from mixed_poi_mt_ct ct         join poi_crm poi on ct.poi_id=poi.poi_id         join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id         where ct.ct_id>'0'         limit 100000;

Query OK, 77348 rows affected (7 min 11.24 sec)


mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id

    ->         where poi.poi_id>0

    ->         limit 100000;

Query OK, 64770 rows affected (41.99 sec)

Records: 100000  Duplicates: 35230  Warnings: 0


???  线下库不稳定,过了段时间的数据

mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id

    ->         where poi.poi_id>0

    ->         limit 100000

    -> ;

Query OK, 64770 rows affected (25.93 sec)


mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)

    ->         select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()

    ->         from mixed_poi_mt_ct ct

    ->         join poi_crm poi on ct.poi_id=poi.poi_id

    ->         join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id

    ->         where poi.close_status=0

    ->         limit 100000

    -> ;

Query OK, 77186 rows affected (5 min 0.31 sec)

Records: 100000  Duplicates: 22814  Warnings: 0