问题描述
应用:x86 centos7
数据库:1-2-128-22.08.12-166927-20005-ENT
测试数据库有两个相互隔离的IP,应用可通过两个IP连接数据库,当断开正在连接的一个网卡后,不能立即切换,程序卡住时间过长问题。应用部署于windows时正常。
如上图所示客户端初始连接ip为192.168.44.130 当关闭改网卡后程序卡住12分钟。
问题原因分析
通过设置连接超时等参数均未解决,通过咨询专家提供TCP/IP连接排查方向。通过程序跟踪,确定确为与TCP连接相关,测试修改TCP相关内核参数,最终确定与linux tcp 断网重连net.ipv4.tcp_retries2参数相关
具体分析过程
- 创建测试用户和表
CREATE USER "USER01" IDENTIFIED BY "Dameng88.";
CREATE TABLE "USER01"."TAB02"
(
"C1" INT,
"C2" VARCHAR(50));
- 配置服务名连接
vim /etc/dm_svc.config
TIME_ZONE=(480)
LANGUAGE=(cn)
DMTEST=(192.168.183.128:5236,192.168.44.128:5236)
[DMTEST]
AUTO_RECONNECT=(1)
CONNECT_TIMEOUT=(500)
- 编写DCI测试用例
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <unistd.h>
#include "DCI.h"
/* 声明句柄 */
OCIEnv* envhp; /* 环境句柄 */
OCISvcCtx* svchp; /* 服务环境句柄 */
OCIServer* srvhp; /* 服务器句柄 */
OCISession* authp; /* 会话句柄 */
OCIStmt* stmthp; /* 语句句柄 */
OCIDescribe* dschp; /* 描述句柄 */
OCIError* errhp; /* 错误句柄 */
OCIDefine* defhp[3]; /* 定义句柄 */
OCIBind* bidhp[4]; /* 绑定句柄 */
sb2 ind[3]; /* 指示符变量 */
/* 绑定select结果集的参数 */
int szc1; /* 存储personid列 */
text szc2[30]; /* 存储sex列 */
char sql[256]; /* 存储执行的sql语句*/
void connDB(){
char strServerName[50];
char strUserName[50];
char strPassword[50];
int ret;
text errbuf[100];
/* 设置服务器,用户名和密码 */
strcpy(strServerName, "DMTEST");
strcpy(strUserName, "SYSDBA");
strcpy(strPassword, "SYSDBA");
/* 初始化OCI应用环境*/
OCIInitialize(OCI_DEFAULT, NULL, NULL, NULL, NULL);
/* 初始化环境句柄 */
OCIEnvInit(&envhp, OCI_DEFAULT, 0, 0);
/* 分配句柄 */
OCIHandleAlloc(envhp, (dvoid**)&svchp, OCI_HTYPE_SVCCTX, 0, 0); /*服务器环境句柄*/
OCIHandleAlloc(envhp, (dvoid**)&srvhp, OCI_HTYPE_SERVER, 0, 0); /* 服务器句柄*/
OCIHandleAlloc(envhp, (dvoid**)&authp, OCI_HTYPE_SESSION, 0, 0); /* 会话句柄 */
OCIHandleAlloc(envhp, (dvoid**)&errhp, OCI_HTYPE_ERROR, 0, 0); /* 错误句柄 */
OCIHandleAlloc(envhp, (dvoid**)&dschp, OCI_HTYPE_DESCRIBE, 0, 0); /*描述符句柄*/
/* 连接服务器 */
OCIServerAttach(srvhp, errhp, (text*)strServerName, (sb4)strlen(strServerName), OCI_DEFAULT);
/* 设置用户名和密码 */
OCIAttrSet(authp, OCI_HTYPE_SESSION, (text*)strUserName, (ub4)strlen(strUserName), OCI_ATTR_USERNAME, errhp);
OCIAttrSet(authp, OCI_HTYPE_SESSION, (text*)strPassword, (ub4)strlen(strPassword), OCI_ATTR_PASSWORD, errhp);
/* 设置服务器环境句柄属性 */
OCIAttrSet((dvoid*)svchp, (ub4)OCI_HTYPE_SVCCTX,(dvoid*)srvhp, (ub4)0, OCI_ATTR_SERVER, errhp);
OCIAttrSet(svchp, OCI_HTYPE_SVCCTX, (dvoid*)authp,0, OCI_ATTR_SESSION, errhp);
/* 创建并开始一个用户会话 */
ret=OCISessionBegin(svchp, errhp, authp, OCI_CRED_RDBMS, OCI_DEFAULT);
while(ret != 0)
{
OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
printf("\n%d,%s\n",ret,errbuf);
sleep(2);
ret=OCIServerAttach(srvhp, errhp, (text*)strServerName, (sb4)strlen(strServerName), OCI_DEFAULT);
}
}
void closeDB(){
//结束会话
OCISessionEnd(svchp, errhp, authp, (ub4)0);
//断开与数据库的连接
OCIServerDetach(srvhp, errhp, OCI_DEFAULT);
//释放OCI句柄
OCIHandleFree((dvoid*)dschp, OCI_HTYPE_DESCRIBE);
OCIHandleFree((dvoid*)stmthp, OCI_HTYPE_STMT);
OCIHandleFree((dvoid*)errhp, OCI_HTYPE_ERROR);
OCIHandleFree((dvoid*)authp, OCI_HTYPE_SESSION);
OCIHandleFree((dvoid*)svchp, OCI_HTYPE_SVCCTX);
OCIHandleFree((dvoid*)srvhp, OCI_HTYPE_SERVER);
}
int main(int argc, char* argv[])
{
char strServerName[50];
char strUserName[50];
char strPassword[50];
int ret;
text errbuf[100];
connDB();
int i=1;
for(i=1;i<100;i++){
/* 分配和初始化句柄 */
ret=OCIHandleAlloc(envhp, (dvoid**)&stmthp, OCI_HTYPE_STMT, 0, 0);
if (ret != 0){
OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
printf("\n%d,%s\n",ret,errbuf);
OCISessionEnd(svchp, errhp, authp, (ub4)0);
sleep(2);
closeDB();
connDB();
ret=OCIHandleAlloc(envhp, (dvoid**)&stmthp, OCI_HTYPE_STMT, 0, 0);
}
memset(sql, 0, sizeof(sql));
strcpy(sql, "delete from user01.tab02");
// 准备SQL语句
OCIStmtPrepare(stmthp, errhp, (text*)sql, strlen(sql), OCI_NTV_SYNTAX, OCI_DEFAULT);
// 执行SQL语句
ret = OCIStmtExecute(svchp, stmthp, errhp, (ub4)1, (ub4)0, (CONST OCISnapshot*) 0, (OCISnapshot*)0, (ub4)OCI_DEFAULT);
if (ret != 0)
{
OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
printf("\n%s\n", errbuf);
sleep(2);
continue;
}
else
{
//提交到数据库
OCITransCommit(svchp, errhp, OCI_DEFAULT);
printf("Delete OK!\n");
}
memset(sql, 0, sizeof(sql));
printf("Insert ...\n");
strcpy(sql,"insert into user01.tab02 values(1,'test'||sysdate)");
OCIStmtPrepare(stmthp, errhp, (text*)sql, strlen(sql), OCI_NTV_SYNTAX, OCI_DEFAULT);
ret=OCIStmtExecute(svchp, stmthp, errhp, (ub4)1, (ub4)0, (CONST OCISnapshot*) 0, (OCISnapshot*)0, (ub4)OCI_DEFAULT);
if(ret!=0){
OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
printf("\n%s\n", errbuf);
sleep(2);
continue;
}
else{
// 提交到数据库
OCITransCommit(svchp, errhp, OCI_DEFAULT);
printf("Insert ok!\n");
}
///************************************************************************/
///* 查询表 */
///************************************************************************/
memset(sql, 0, sizeof(sql));
printf("Select ...\n");
strcpy(sql, "select C1,C2 from USER01.TAB02 union select SESS_ID as C1,CLNT_IP AS C2 from v$sessions where state='ACTIVE'");
/* 准备SQL语句 */
OCIStmtPrepare(stmthp, errhp, (text*)sql, strlen(sql), OCI_NTV_SYNTAX, OCI_DEFAULT);
/* 绑定输出列 */
OCIDefineByPos(stmthp, &defhp[0], errhp, 1, &szc1,sizeof(szc1), SQLT_INT, &ind[0], 0, 0,OCI_DEFAULT);
OCIDefineByPos(stmthp, &defhp[1], errhp, 2, (ub1*)szc2,sizeof(szc2), SQLT_STR, &ind[1], 0, 0, OCI_DEFAULT);
//OCIDefineByPos(stmthp, &defhp[2], errhp, 3, (ub1*)szphone,sizeof(szphone), SQLT_STR, &ind[2], 0, 0, OCI_DEFAULT);
/* 执行SQL语句 */
ret = OCIStmtExecute(svchp, stmthp, errhp, (ub4)0, 0, NULL, NULL, OCI_DEFAULT);
if (ret != 0)
{
OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
printf("%s\n", errbuf);
sleep(2);
continue;
}
else{
printf("%-10s%-10s\n", "C1", "C2");
while ((OCIStmtFetch(stmthp, errhp, 1, OCI_FETCH_NEXT, OCI_DEFAULT)) != OCI_NO_DATA)
{
printf("%-8d", szc1);
printf("%s\n", szc2);
// printf("%-10s\n", szphone);
}
printf("\nSelect ok!\n");
}
sleep(2);
}//结束循环
closeDB();
return 0;
- 编译脚本并运行
[root@test OCI]# g++ -o testOci testOci.cpp -I/home/dmdba/dmdbms/include -L/home/dmdba/dmdbms/bin -ldmoci
执行
./testOci
- 断网卡住后进行跟踪
pstack查看
通过跟踪发现tcp连接已建立,但一直未断开
解决办法
修改应用端内核参数
vim /etc/sysctl.conf
net.ipv4.tcp_retries2=3
修改后sysctl -p生效
再次执行脚本,断网测试,时间缩短为12s,确定断网程序卡住时间过长是因为内核参数导致
tcp_retries2 :INTEGER
默认值为15
在丢弃激活(已建立通讯状况)的TCP连接之前﹐需要进行多少次重试。默认值为15,根据RTO的值来决定,相当于13-30分钟(RFC1122规定,必须大于100秒).