假如当前有一条数据
id emails
1 baidu@qq.com,baidu@163.com,baidu@139.com
现在想要分别取出baidu@qq.com,baidu@163.com,baidu@139.com 存入另外一个表中 表只有俩个字段id,email
如果用java程序,那么很简单,直接根据ID查询出这条记录,然后对这个字符做split以逗号分割就可以了,但是sql中没有split函数,如果实现,
以下是用sql写的处理数据的存储过程
drop PROCEDURE if EXISTS dealEmail
CREATE PROCEDURE dealEmail(in totalCount INT) -- totalCount是count(*)个数,需要处理多少行数据
BEGIN
DECLARE limitcount INT(10); -- 保证每次查询仅有一条数据
DECLARE comma INT(10); -- emails数据中,的个数
DECLARE ema VARCHAR(500); -- 插入另外一张表的email数据
DECLARE repeatcount INT(10); -- 判断是否有重复数据
DECLARE emailsStr VARCHAR(500); -- 本次截取后的字符 如1,2,3 本次操作留下的字符是2,那么emailsStr=2
DECLARE totalemailsStr VARCHAR(1000); -- 所有截取替换操作前的所有字符 如1,2,3 1,2已经操作 totalemailStr=1,2
DECLARE subcount INT(10); -- 判断当前是第几次截取
DECLARE appendEmails VARCHAR(1000); -- 为email数据最后追加,
set limitcount=0;
-- LOCATE查询字符串第一次出现的位置 left函数 左截取
while(totalCount>0) DO
SELECT LENGTH(emails)-LENGTH(REPLACE(emails,',','')) into comma from t_author where emails!='' limit limitcount,1;
set subcount=1;
set totalemailsStr='';
set appendEmails ='';
if(comma<=0) THEN
select emails into ema from t_author where emails!='' limit limitcount,1;
select count(pid) into repeatcount from t_email where email=ema;
if(repeatcount = 0) THEN -- 判断当前即将插入数据是否存在
INSERT into t_email(email) values(ema);
END IF;
ELSE
WHILE(comma>=0) DO
IF(subcount=1) THEN
/**第一次截取 */
select SUBSTR(emails,1,LENGTH(left(emails, LOCATE(',',emails)-1))) into ema from t_author where emails!='' limit limitcount,1;
SELECT left(emails, LOCATE(',',emails)) into totalemailsStr FROM t_author where emails!='' limit limitcount,1;
select count(pid) into repeatcount from t_email where email=ema;
if(repeatcount = 0) THEN
INSERT into t_email(email) values(ema);
END IF;
set subcount = subcount + 1;
set comma = comma - 1;
ELSE
select concat(emails,',') into appendEmails from t_author where emails!='' LIMIT limitcount,1;
select SUBSTR(REPLACE(appendEmails,totalemailsStr,''),1,LENGTH(left(REPLACE(appendEmails,totalemailsStr,''), LOCATE(',',REPLACE(appendEmails,totalemailsStr,''))-1))) into ema from t_author where emails!='' limit limitcount,1;
SELECT left(REPLACE(appendEmails,totalemailsStr,''), LOCATE(',',REPLACE(appendEmails,totalemailsStr,''))) into emailsStr from t_author where emails!='' limit limitcount,1;
set totalemailsStr = concat(totalemailsStr,emailsStr);
select count(pid) into repeatcount from t_email where email=ema;
if(repeatcount = 0) THEN
INSERT into t_email(email) values(ema);
END IF;
set subcount = subcount + 1;
set comma = comma - 1;
END IF;
END WHILE;
end IF;
set totalCount = totalCount-1;
set limitcount = limitcount+1;
end WHILE;
END;
CALL dealEmail(568);
使用的是mysql5.5
[b][b]允许对上面代码进行修剪[/b][/b]
drop PROCEDURE if EXISTS dealEmail
CREATE PROCEDURE dealEmail()
BEGIN
DECLARE limitcount INT(10); -- 保证每次查询仅有一条数据
DECLARE comma INT(10); -- emails数据中,的个数
DECLARE ema VARCHAR(500); -- 插入另外一张表的email数据
DECLARE searchname VARCHAR(500); -- 插入另外一张表的email数据
DECLARE repeatcount INT(10); -- 判断是否有重复数据
DECLARE emailsStr VARCHAR(500); -- 本次截取后的字符 如1,2,3 本次操作留下的字符是2,那么emailsStr=2
DECLARE totalemailsStr VARCHAR(1000); -- 所有截取替换操作前的所有字符 如1,2,3 1,2已经操作 totalemailStr=1,2
DECLARE subcount INT(10); -- 判断当前是第几次截取
DECLARE appendEmails VARCHAR(1000); -- 为email数据最后追加,
DECLARE totalCount INT(10);
set limitcount=0;
SELECT count(1) into totalCount from t_author where emails is not null;
-- LOCATE查询字符串第一次出现的位置 left函数 左截取
while(totalCount>0) DO
SELECT LENGTH(emails)-LENGTH(REPLACE(emails,',','')) into comma from t_author where emails is not null limit limitcount,1;
set subcount=1;
set totalemailsStr='';
set appendEmails ='';
if(comma<=0) THEN
SELECT emails,researchname into ema,searchname from t_author where emails is not null limit limitcount,1;
/**SELECT count(pid) into repeatcount from t_email where email=ema; */
INSERT into t_email(email,researchname) values(trim(ema),trim(searchname));
ELSE
SELECT concat(emails,',') into appendEmails from t_author where emails is not null LIMIT limitcount,1;
WHILE(comma>=0) DO
SELECT SUBSTR(REPLACE(appendEmails,totalemailsStr,''),1,LENGTH(left(REPLACE(appendEmails,totalemailsStr,''), LOCATE(',',REPLACE(appendEmails,totalemailsStr,''))-1))),researchname into ema,searchname from t_author where emails is not null limit limitcount,1;
SELECT left(REPLACE(appendEmails,totalemailsStr,''), LOCATE(',',REPLACE(appendEmails,totalemailsStr,''))) into emailsStr from t_author where emails is not null limit limitcount,1;
set totalemailsStr = concat(totalemailsStr,emailsStr);
/**SELECT count(pid) into repeatcount from t_email where email=ema; */
INSERT into t_email(email,researchname) values(trim(ema),trim(searchname));
set subcount = subcount + 1;
set comma = comma - 1;
END WHILE;
end IF;
set totalCount = totalCount-1;
set limitcount = limitcount+1;
end WHILE;
END;
CALL dealEmail()
[b][b]如果数据量大,mysql会执行很长时间,现在提供一种快速的办法,再次更新,嘻嘻
使用的时游标,网上很多说使用游标处理数据不要超过1W,否则会很慢,但是我现在还不知道更好的办法,只能先用这个,后续有好的,再次更新,然后修改mysql不自动提交,这样效率会更好。不多说了,贴代码[/b][/b]
set autocommit=0;
drop PROCEDURE if EXISTS dealEmail;
CREATE PROCEDURE dealEmail()
BEGIN
DECLARE comma INT(10); -- emails数据中,的个数
DECLARE ema VARCHAR(500); -- 插入另外一张表的email数据
DECLARE searchname VARCHAR(500); -- 插入另外一张表的email数据
DECLARE emailsStr VARCHAR(500); -- 本次截取后的字符 如1,2,3 本次操作留下的字符是2,那么emailsStr=2
DECLARE totalemailsStr VARCHAR(1600); -- 所有截取替换操作前的所有字符 如1,2,3 1,2已经操作 totalemailStr=1,2
DECLARE subcount INT(10); -- 判断当前是第几次截取
DECLARE appendEmails VARCHAR(1600); -- 为email数据最后追加,
DECLARE id INT(10);
DECLARE beforeid INT(10);
DECLARE cur CURSOR for select pid from t_author;
DECLARE CONTINUE HANDLER FOR SQLSTATE '02000' SET id=0;
open cur;
FETCH cur into id;
WHILE id !=0 DO
SELECT LENGTH(emails)-LENGTH(REPLACE(emails,',','')) into comma from t_author where pid=id;
set subcount=1;
set totalemailsStr='';
set appendEmails ='';
if(comma<=0) THEN
SELECT emails,researchname into ema,searchname from t_author where pid=id;
INSERT into t_email(email,researchname) values(trim(ema),trim(searchname));
ELSE
SELECT concat(emails,',') into appendEmails from t_author where pid=id;
WHILE(comma>=0) DO
SELECT SUBSTR(REPLACE(appendEmails,totalemailsStr,''),1,LENGTH(left(REPLACE(appendEmails,totalemailsStr,''), LOCATE(',',REPLACE(appendEmails,totalemailsStr,''))-1))),researchname into ema,searchname from t_author where pid=id;
SELECT left(REPLACE(appendEmails,totalemailsStr,''), LOCATE(',',REPLACE(appendEmails,totalemailsStr,''))) into emailsStr from t_author where pid=id;
set totalemailsStr = concat(totalemailsStr,emailsStr);
INSERT into t_email(email,researchname) values(trim(ema),trim(searchname));
set subcount = subcount + 1;
set comma = comma - 1;
END WHILE;
end IF;
set beforeid = id;
FETCH cur into id;
END WHILE;
CLOSE cur;
commit;
END
DELETE from t_email;
commit;
call dealEmail();
SELECT count(*) from t_email;