1. 背景
导入150w数据到mysql,数据格式为tab分隔csv文件。测试索引对导入速度的影响
2. 无索引,导入数据后重建
CREATE TABLE `tab` ( `ct_id` char(36) DEFAULT NULL COMMENT 'id', `ct_title` varchar(200) DEFAULT NULL COMMENT '标题', `bd_id` int(11) DEFAULT NULL COMMENT 'bdid', `org_unit` int(11) DEFAULT NULL COMMENT '', `poi_id` int(11) DEFAULT NULL, `poi_name` varchar(200) DEFAULT NULL COMMENT 'poi名称' ) ENGINE=InnoDB DEFAULT CHARSET=utf8
mysql > load data infile '/tmp/ct.txt' into table tab; Query OK, 1928611 rows affected (15.83 sec) Records: 1928611 Deleted: 0 Skipped: 0 Warnings: 0
重建索引
mysql > alter table tab add UNIQUE index `uk_ct_poi` (`ct_id`,`poi_id`),
-> add index `idx_orgunit` (`org_unit`),
-> add index `idx_poi_orgunit` (`poi_id`,`org_unit`),
-> add index idx_bd_id(bd_id),
-> add index idx_poi_id(poi_id),
-> add index idx_poi_name(poi_name),
-> add index idx_all(ct_id,ct_title,bd_id,org_unit,poi_name),
-> add index idx_test(ct_title,bd_id,org_unit,poi_name);
Query OK, 0 rows affected (1 min 24.56 sec)
Records: 0 Duplicates: 0 Warnings: 0
3. 表建好索引导入数据
mysql > CREATE TABLE `tab` ( -> `ct_id` char(36) DEFAULT NULL COMMENT 'id', -> `ct_title` varchar(200) DEFAULT NULL COMMENT '标题', -> `bd_id` int(11) DEFAULT NULL COMMENT 'id', -> `org_unit` int(11) DEFAULT NULL COMMENT '', -> `poi_id` int(11) DEFAULT NULL, -> `poi_name` varchar(200) DEFAULT NULL COMMENT 'poi名称', -> UNIQUE KEY `uk_ct_poi` (`ct_id`,`poi_id`), -> KEY `idx_orgunit` (`org_unit`), -> KEY `idx_poi_orgunit` (`poi_id`,`org_unit`), -> key idx_bd_id(bd_id), -> key idx_poi_id(poi_id), -> key idx_poi_name(poi_name), -> key idx_all(ct_id,ct_title,bd_id,org_unit,poi_name), -> key idx_test(ct_title,bd_id,org_unit,poi_name) -> ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
mysql > load data infile '/tmp/ct.txt' into table mixed_poi_mt_ct; Query OK, 1928611 rows affected (13 min 8.88 sec) Records: 1928611 Deleted: 0 Skipped: 0 Warnings: 0
4. 结论
大数据导入导出先导入数据再重建索引
5. 选择性插入
无where
mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> limit 100000
-> ;
Query OK, 69517 rows affected (5.02 sec)
Records: 100000 Duplicates: 30483 Warnings: 0
where字段有string类型索引
mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> where ct.ct_id>'0'
-> limit 100000
-> ;
Query OK, 69517 rows affected (45.76 sec)
Records: 100000 Duplicates: 30483 Warnings: 0
where字段有int类型索引
mysql> insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> where ct.poi_id>0
-> limit 100000;
Query OK, 69517 rows affected (5.72 sec)
Records: 100000 Duplicates: 30483 Warnings: 0
where字段无索引
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on poi.geo_area_id=oga.geo_area_id
-> where poi.close_status=0
-> limit 100000;
Query OK, 69355 rows affected (23 min 21.16 sec)
Records: 100000 Duplicates: 30645 Warnings: 0
join字段使用了函数
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now() -> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> limit 100000;
Query OK, 77348 rows affected (5 min 25.86 sec)
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time) select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now() from mixed_poi_mt_ct ct join poi_crm poi on ct.poi_id=poi.poi_id join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id where ct.ct_id>'0' limit 100000;
Query OK, 77348 rows affected (7 min 11.24 sec)
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> where poi.poi_id>0
-> limit 100000;
Query OK, 64770 rows affected (41.99 sec)
Records: 100000 Duplicates: 35230 Warnings: 0
??? 线下库不稳定,过了段时间的数据
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> where poi.poi_id>0
-> limit 100000
-> ;
Query OK, 64770 rows affected (25.93 sec)
mysql > insert IGNORE into mixed_contract(contract_id,contract_name,old_orgunit_id,new_orgunit_id,region_manual_type,sub_orgunit_id,old_bd_id,pending_status,update_time,create_time)
-> select ct.ct_id,ct.ct_title,ct.org_unit,if(1=2,0,oga.orgunit_id),2,oga.orgunit_id,ct.bd_id,0,now(),now()
-> from mixed_poi_mt_ct ct
-> join poi_crm poi on ct.poi_id=poi.poi_id
-> join orgunit_geo_area oga on if(merge_id!=0,(select t.geo_area_id from poi_crm t where t.poi_id=poi.merge_id),poi.geo_area_id)=oga.geo_area_id
-> where poi.close_status=0
-> limit 100000
-> ;
Query OK, 77186 rows affected (5 min 0.31 sec)
Records: 100000 Duplicates: 22814 Warnings: 0