- 由于导出数据之前对数据的内容了解不多,准备工作略显不足,没有考虑到表中部分字段中会出现特殊字符,比如:单引号('),双引号("),中文等等;所以通过select 查询出来的数据包含了未转义的特殊字符,再将这些字段封装进sql语句的values中作为值插入mysql数据库时,由于特殊字符 单引号(') 的存在,将sql插入语句的键值对的映射关系打乱了,导致数据导入出现异常,进而数据导入中断;
解决方案:在查询出数据,将数据封装进插入语句时进行特殊字符的转义,例如:
string mysql_escape_string ( string $unescaped_string )
string mysql_escape_string ( string $unescaped_string )
这样就可以将特殊字符转义;
2.由于数据量很大,并且线上数据库一直有用户访问,为了不影响线上数据库的性能,所以查询语句必须用批量,分页查询,使用mysql的limit关键字,限制一次查询时读取的数据条数,并且在一次查询后sleep(1)一下,进而降低对线上数据库的压力;
每次查询出的数据经过过滤后可能没有需要查询数据库的记录,这就导致sql语句的values字段为空,进而出现插入异常,因此需要判断一下values是否为空,然后将sql语句写入sql文件;
public function ActionDumpData()
{
$sql = "select count(*) as num from server_operate_log";
$counts = Yii::app()->db->createCommand($sql)->queryAll();
$one_dump = 5000;
$pages = intval($counts[0]['num'] / $one_dump) +1;
$max_id = 0;
$fp = fopen("/home/work/wwwroot/operate_log_data.sql","a+");
$res = array();
for($i = 0;$i < 256;$i++)
{
$prefix = $this->getHashTable($i);
$res[$prefix] = array();
}
try
{
for($start = 0;$start < $pages;$start++)
{
$sql1 = "select id,server_id,date,operator,table_name,field_name,src,dst,list_id,list_type,`desc` from server_operate_log limit ".$start * $one_dump.",".$one_dump;
$data = array();
$data = Yii::app()->db->createCommand($sql1)->queryAll();
foreach($data as $d)
{
//filter useless log
if(isset($d['operator']) && $d['operator'] == 'rest_interface' && empty($d['src']) && $d['dst'] > 0 && stristr($d['desc'],'rest') && stristr($d['desc'],'noah_product_id 0'))
continue;
$max_id = $d['id'];
$suffix = $this->getHashTable($d['server_id']);
$value = '';
if(isset($d['server_id']))
$value .= $d['server_id'];
else
$value .= 0;
if(isset($d['date']))
$value .= ",'".mysql_escape_string($d['date'])."'";
else
$value .= ",NULL";
if(isset($d['operator']))
$value .= ",'".mysql_escape_string($d['operator'])."'";
else
$value .= ",NULL";
if(isset($d['table_name']))
$value .= ",'".mysql_escape_string($d['table_name'])."'";
else
$value .= ",NULL";
if(isset($d['field_name']))
$value .= ",'".mysql_escape_string($d['field_name'])."'";
else
$value .= ",NULL";
if(isset($d['src']))
$value .= ",'".mysql_escape_string($d['src'])."'";
else
$value .= ",NULL";
if(isset($d['dst']))
$value .= ",'".mysql_escape_string($d['dst'])."'";
else
$value .= ",NULL";
if(isset($d['list_id']))
$value .= ",".$d['list_id'];
else
$value .= ",0";
if(isset($d['list_type']))
$value .= ",'".mysql_escape_string($d['list_type'])."'";
else
$value .= ",'NULL'";
if(isset($d['desc']))
$value .= ",'".mysql_escape_string($d['desc'])."'";
else
$value .= ",NULL";
$value .= ",'server_operate_log'";
$arr = array($value);
$res[$suffix][] = '('.$value.')';
}
foreach($res as $k => $v)
{
if(!empty($v))
{
$sql2 = "insert into operate_log_".$k."(object_id,date,operator,table_name,field_name,src,dst,list_id,list_type,`desc`,log_type) values ".implode(",",$res[$k]);
fwrite($fp,$sql2);
fwrite($fp,";\n");
unset($res[$k]);
$res[$k] = array();
}
}
sleep(1);
}
}
catch (Exception $e)
{
fclose($fp);
$fp = fopen("/home/work/wwwroot/max_id.sql","a+");
fwrite($fp,$max_id);
fwrite($fp,"\n Exception;\n");
fclose($fp);
}
fclose($fp);
$fp = fopen("/home/work/wwwroot/max_id.sql","a+");
fwrite($fp,$max_id);
fwrite($fp,"\n Finished;\n");
fclose($fp);
echo "data dump finished";
}
public function ActionDumpData()
{
$sql = "select count(*) as num from server_operate_log";
$counts = Yii::app()->db->createCommand($sql)->queryAll();
$one_dump = 5000;
$pages = intval($counts[0]['num'] / $one_dump) +1;
$max_id = 0;
$fp = fopen("/home/work/wwwroot/operate_log_data.sql","a+");
$res = array();
for($i = 0;$i < 256;$i++)
{
$prefix = $this->getHashTable($i);
$res[$prefix] = array();
}
try
{
for($start = 0;$start < $pages;$start++)
{
$sql1 = "select id,server_id,date,operator,table_name,field_name,src,dst,list_id,list_type,`desc` from server_operate_log limit ".$start * $one_dump.",".$one_dump;
$data = array();
$data = Yii::app()->db->createCommand($sql1)->queryAll();
foreach($data as $d)
{
//filter useless log
if(isset($d['operator']) && $d['operator'] == 'rest_interface' && empty($d['src']) && $d['dst'] > 0 && stristr($d['desc'],'rest') && stristr($d['desc'],'noah_product_id 0'))
continue;
$max_id = $d['id'];
$suffix = $this->getHashTable($d['server_id']);
$value = '';
if(isset($d['server_id']))
$value .= $d['server_id'];
else
$value .= 0;
if(isset($d['date']))
$value .= ",'".mysql_escape_string($d['date'])."'";
else
$value .= ",NULL";
if(isset($d['operator']))
$value .= ",'".mysql_escape_string($d['operator'])."'";
else
$value .= ",NULL";
if(isset($d['table_name']))
$value .= ",'".mysql_escape_string($d['table_name'])."'";
else
$value .= ",NULL";
if(isset($d['field_name']))
$value .= ",'".mysql_escape_string($d['field_name'])."'";
else
$value .= ",NULL";
if(isset($d['src']))
$value .= ",'".mysql_escape_string($d['src'])."'";
else
$value .= ",NULL";
if(isset($d['dst']))
$value .= ",'".mysql_escape_string($d['dst'])."'";
else
$value .= ",NULL";
if(isset($d['list_id']))
$value .= ",".$d['list_id'];
else
$value .= ",0";
if(isset($d['list_type']))
$value .= ",'".mysql_escape_string($d['list_type'])."'";
else
$value .= ",'NULL'";
if(isset($d['desc']))
$value .= ",'".mysql_escape_string($d['desc'])."'";
else
$value .= ",NULL";
$value .= ",'server_operate_log'";
$arr = array($value);
$res[$suffix][] = '('.$value.')';
}
foreach($res as $k => $v)
{
if(!empty($v))
{
$sql2 = "insert into operate_log_".$k."(object_id,date,operator,table_name,field_name,src,dst,list_id,list_type,`desc`,log_type) values ".implode(",",$res[$k]);
fwrite($fp,$sql2);
fwrite($fp,";\n");
unset($res[$k]);
$res[$k] = array();
}
}
sleep(1);
}
}
catch (Exception $e)
{
fclose($fp);
$fp = fopen("/home/work/wwwroot/max_id.sql","a+");
fwrite($fp,$max_id);
fwrite($fp,"\n Exception;\n");
fclose($fp);
}
fclose($fp);
$fp = fopen("/home/work/wwwroot/max_id.sql","a+");
fwrite($fp,$max_id);
fwrite($fp,"\n Finished;\n");
fclose($fp);
echo "data dump finished";
}
由于线上数据不断更新,因此导出是记录的最大的id信息,以备其他服务上线后进行数据同步;
3.根据object_id来对原始数据记录进行hash,分别录入32张表中,hash的字段要选择数据区分度大,尽量将原始数据均匀划分的字段:
private function getHashTable($u, $n = 32)
{
$h = sprintf("%u", crc32($u));
$h1 = intval($h / $n);
$h2 = $h1 % $n;
$h3 = base_convert($h2, 10, 16);
$h4 = sprintf("%02s", $h3);
return $h4;
}
private function getHashTable($u, $n = 32)
{
$h = sprintf("%u", crc32($u));
$h1 = intval($h / $n);
$h2 = $h1 % $n;
$h3 = base_convert($h2, 10, 16);
$h4 = sprintf("%02s", $h3);
return $h4;
}
4. 由于数据导入时线上数据库是主从备份,因此为了减轻从库读取主库记录变更日志时对主库造成的压力,在每次批量录入几百条数据后,需要让mysql数据插入语句暂停等待一下:
select sleep(1);
select sleep(1);
5. 数据写入时一条insert 语句批量写入几百条记录,这样可以提高数据插入效率,降低对数据库的压力,但由于未知原因,一些insert语句在中间断开,成为两条语句,在sql文件中出现了换行,这样导入时就会出现异常,最初设想可以通过去掉非法insert语句后的换行,将两行合并为一行:
mixed str_replace ( mixed $search , mixed $replace , mixed $subject [, int &$count ] )
mixed str_replace ( mixed $search , mixed $replace , mixed $subject [, int &$count ] )
但始终无法去掉非法insert语句后的换行符,通过拼接还是无法合成一条insert语句。。最终通过计算非法insert语句的字符数,用php截取insert语句从0到len-1的字符,这样将换行符去掉,合称为一条insert语句:
public function actionMergeLine()
{
$fp = fopen("/home/work/wwwroot/log_correct.sql","r");
$fp2 = fopen("/home/work/wwwroot/log_correct_second.sql","a+");
while(($lines = fgets($fp,1048576)) !== false)
{
//匹配合法结束的insert语句
preg_match_all("/\)\;/",$lines,$n);
if(!count($n[0]) && strlen($lines)>50)
{
$line = fgets($fp,1048576);
$lines = substr($lines,0,strlen($lines)-1);
$lines .= $line;
}
fwrite($fp2,$lines);
}
fclose($fp2);
fclose($fp);
}
public function actionMergeLine()
{
$fp = fopen("/home/work/wwwroot/log_correct.sql","r");
$fp2 = fopen("/home/work/wwwroot/log_correct_second.sql","a+");
while(($lines = fgets($fp,1048576)) !== false)
{
//匹配合法结束的insert语句
preg_match_all("/\)\;/",$lines,$n);
if(!count($n[0]) && strlen($lines)>50)
{
$line = fgets($fp,1048576);
$lines = substr($lines,0,strlen($lines)-1);
$lines .= $line;
}
fwrite($fp2,$lines);
}
fclose($fp2);
fclose($fp);
}
6. 由于在非法断开的insert语句处,最后一条记录的value值偶尔不完整,因此需要去掉最后非法的value,通过php进行字符串查找,查找非法insert语句中:"),(" 的最后一次出现的位置,因为"(" 后面即为不完整的value,需要将后面的value去掉,即截取insert语句前面的字符,然后做处理加上";",作为一条合法的insert语句而结束:
public function actionSplitLines()
{
$fp = fopen("/home/work/wwwroot/log_test.sql","r");
$fp2 = fopen("/home/work/wwwroot/log_test2.sql","a+");
while(($lines = fgets($fp,1048576)) !== false)
{
preg_match_all("/insert into operate_log/",$lines,$n);
if(count($n[0]) > 1)//一条insert语句中出现两次insert,非法
{
$pos = stripos($lines,"insert into operate_log",60);
$str1 = substr($lines,0,$pos-1);
$pos2 = strrpos($str1,"),(");
$str1 = substr($str1,0,$pos2+1);
$str1 .=";";
fwrite($fp2,$str1);
$str2 = substr($lines,$pos,strlen($lines)-1);
fwrite($fp2,$str2);
$str1 = '';
$str2 = '';
}
}
fclose($fp);
fclose($fp2);
}
public function actionSplitLines()
{
$fp = fopen("/home/work/wwwroot/log_test.sql","r");
$fp2 = fopen("/home/work/wwwroot/log_test2.sql","a+");
while(($lines = fgets($fp,1048576)) !== false)
{
preg_match_all("/insert into operate_log/",$lines,$n);
if(count($n[0]) > 1)//一条insert语句中出现两次insert,非法
{
$pos = stripos($lines,"insert into operate_log",60);
$str1 = substr($lines,0,$pos-1);
$pos2 = strrpos($str1,"),(");
$str1 = substr($str1,0,$pos2+1);
$str1 .=";";
fwrite($fp2,$str1);
$str2 = substr($lines,$pos,strlen($lines)-1);
fwrite($fp2,$str2);
$str1 = '';
$str2 = '';
}
}
fclose($fp);
fclose($fp2);
}
7.最后在导入数据时需要将导入数据的mysql进程压入后台运行,否则终端关闭后导入工作就会停止:
nohup mysql -u user -h mysql_server_address -P3306 -p password database_name < /home/work/wwwroot/log_data.sql;
nohup mysql -u user -h mysql_server_address -P3306 -p password database_name < /home/work/wwwroot/log_data.sql;