问题描述

应用:x86 centos7

数据库:1-2-128-22.08.12-166927-20005-ENT

测试数据库有两个相互隔离的IP,应用可通过两个IP连接数据库,当断开正在连接的一个网卡后,不能立即切换,程序卡住时间过长问题。应用部署于windows时正常。

达梦数据库 支持mysql_句柄

如上图所示客户端初始连接ip为192.168.44.130 当关闭改网卡后程序卡住12分钟。

问题原因分析

        通过设置连接超时等参数均未解决,通过咨询专家提供TCP/IP连接排查方向。通过程序跟踪,确定确为与TCP连接相关,测试修改TCP相关内核参数,最终确定与linux tcp 断网重连net.ipv4.tcp_retries2参数相关

具体分析过程

  • 创建测试用户和表
CREATE USER "USER01" IDENTIFIED BY "Dameng88.";
CREATE TABLE "USER01"."TAB02"
(
"C1" INT,
"C2" VARCHAR(50));
  • 配置服务名连接
vim /etc/dm_svc.config
TIME_ZONE=(480)
LANGUAGE=(cn)
DMTEST=(192.168.183.128:5236,192.168.44.128:5236)

[DMTEST]
AUTO_RECONNECT=(1)
CONNECT_TIMEOUT=(500)
  • 编写DCI测试用例
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <unistd.h>
#include "DCI.h"
/* 声明句柄 */
OCIEnv* envhp; /* 环境句柄 */
OCISvcCtx* svchp; /* 服务环境句柄 */
OCIServer* srvhp; /* 服务器句柄 */
OCISession* authp; /* 会话句柄 */
OCIStmt* stmthp; /* 语句句柄 */
OCIDescribe* dschp; /* 描述句柄 */
OCIError* errhp; /* 错误句柄 */
OCIDefine* defhp[3]; /* 定义句柄 */
OCIBind* bidhp[4]; /* 绑定句柄 */
sb2 ind[3]; /* 指示符变量 */
/* 绑定select结果集的参数 */
int szc1; /* 存储personid列 */
text szc2[30]; /* 存储sex列 */
char sql[256]; /* 存储执行的sql语句*/

void connDB(){
        char strServerName[50];
        char strUserName[50];
        char strPassword[50];
        int ret;
        text errbuf[100];
        /* 设置服务器,用户名和密码 */
        strcpy(strServerName, "DMTEST");
        strcpy(strUserName, "SYSDBA");
        strcpy(strPassword, "SYSDBA");
        /* 初始化OCI应用环境*/
        OCIInitialize(OCI_DEFAULT, NULL, NULL, NULL, NULL);
        /* 初始化环境句柄 */
        OCIEnvInit(&envhp, OCI_DEFAULT, 0, 0);
        /* 分配句柄 */
        OCIHandleAlloc(envhp, (dvoid**)&svchp, OCI_HTYPE_SVCCTX, 0, 0); /*服务器环境句柄*/
        OCIHandleAlloc(envhp, (dvoid**)&srvhp, OCI_HTYPE_SERVER, 0, 0); /* 服务器句柄*/
        OCIHandleAlloc(envhp, (dvoid**)&authp, OCI_HTYPE_SESSION, 0, 0); /* 会话句柄 */
        OCIHandleAlloc(envhp, (dvoid**)&errhp, OCI_HTYPE_ERROR, 0, 0); /* 错误句柄 */
        OCIHandleAlloc(envhp, (dvoid**)&dschp, OCI_HTYPE_DESCRIBE, 0, 0); /*描述符句柄*/
        /* 连接服务器 */
        OCIServerAttach(srvhp, errhp, (text*)strServerName, (sb4)strlen(strServerName), OCI_DEFAULT);
        /* 设置用户名和密码 */
        OCIAttrSet(authp, OCI_HTYPE_SESSION, (text*)strUserName, (ub4)strlen(strUserName), OCI_ATTR_USERNAME, errhp);
        OCIAttrSet(authp, OCI_HTYPE_SESSION, (text*)strPassword, (ub4)strlen(strPassword), OCI_ATTR_PASSWORD, errhp);
        /* 设置服务器环境句柄属性 */
        OCIAttrSet((dvoid*)svchp, (ub4)OCI_HTYPE_SVCCTX,(dvoid*)srvhp, (ub4)0, OCI_ATTR_SERVER, errhp);
        OCIAttrSet(svchp, OCI_HTYPE_SVCCTX, (dvoid*)authp,0, OCI_ATTR_SESSION, errhp);
        /* 创建并开始一个用户会话 */
        ret=OCISessionBegin(svchp, errhp, authp, OCI_CRED_RDBMS, OCI_DEFAULT);
        while(ret != 0)
        {
                OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
                printf("\n%d,%s\n",ret,errbuf);
                sleep(2);
                ret=OCIServerAttach(srvhp, errhp, (text*)strServerName, (sb4)strlen(strServerName), OCI_DEFAULT);
        }

}

void closeDB(){
        //结束会话
        OCISessionEnd(svchp, errhp, authp, (ub4)0);
        //断开与数据库的连接
        OCIServerDetach(srvhp, errhp, OCI_DEFAULT);
        //释放OCI句柄
        OCIHandleFree((dvoid*)dschp, OCI_HTYPE_DESCRIBE);
        OCIHandleFree((dvoid*)stmthp, OCI_HTYPE_STMT);
        OCIHandleFree((dvoid*)errhp, OCI_HTYPE_ERROR);
        OCIHandleFree((dvoid*)authp, OCI_HTYPE_SESSION);
        OCIHandleFree((dvoid*)svchp, OCI_HTYPE_SVCCTX);
        OCIHandleFree((dvoid*)srvhp, OCI_HTYPE_SERVER);
}

int main(int argc, char* argv[])
{
        char strServerName[50];
        char strUserName[50];
        char strPassword[50];
        int ret;
        text errbuf[100];
        connDB();       
	int i=1;
	for(i=1;i<100;i++){
                /* 分配和初始化句柄 */
                ret=OCIHandleAlloc(envhp, (dvoid**)&stmthp, OCI_HTYPE_STMT, 0, 0);
                if (ret != 0){
                        OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
                        printf("\n%d,%s\n",ret,errbuf);
                        OCISessionEnd(svchp, errhp, authp, (ub4)0);
                        sleep(2);
                        closeDB();
                        connDB();
                        ret=OCIHandleAlloc(envhp, (dvoid**)&stmthp, OCI_HTYPE_STMT, 0, 0);
                }
        	memset(sql, 0, sizeof(sql));
                strcpy(sql, "delete from user01.tab02");
                // 准备SQL语句
                OCIStmtPrepare(stmthp, errhp, (text*)sql, strlen(sql), OCI_NTV_SYNTAX, OCI_DEFAULT);
        	// 执行SQL语句
                ret = OCIStmtExecute(svchp, stmthp, errhp, (ub4)1, (ub4)0, (CONST OCISnapshot*) 0, (OCISnapshot*)0, (ub4)OCI_DEFAULT);
        	if (ret != 0)
                {
                        OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
                        printf("\n%s\n", errbuf);
                        sleep(2);
        		continue;
                }
                else
                {
        		//提交到数据库
        		OCITransCommit(svchp, errhp, OCI_DEFAULT);
                        printf("Delete OK!\n");
                }
        	memset(sql, 0, sizeof(sql));
        	printf("Insert ...\n");
        	strcpy(sql,"insert into user01.tab02 values(1,'test'||sysdate)");
        	OCIStmtPrepare(stmthp, errhp, (text*)sql, strlen(sql), OCI_NTV_SYNTAX, OCI_DEFAULT);
        	ret=OCIStmtExecute(svchp, stmthp, errhp, (ub4)1, (ub4)0, (CONST OCISnapshot*) 0, (OCISnapshot*)0, (ub4)OCI_DEFAULT);
        	if(ret!=0){
        	   OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
        	   printf("\n%s\n", errbuf);
                   sleep(2);
        	   continue;
        	}
        	else{
        	   // 提交到数据库
        	   OCITransCommit(svchp, errhp, OCI_DEFAULT);
        	   printf("Insert ok!\n");
        	}
                ///************************************************************************/
                ///* 查询表 */
                ///************************************************************************/
                memset(sql, 0, sizeof(sql));
        	printf("Select ...\n");
                strcpy(sql, "select C1,C2 from USER01.TAB02 union select SESS_ID as C1,CLNT_IP AS C2 from v$sessions where state='ACTIVE'");
                /* 准备SQL语句 */
                OCIStmtPrepare(stmthp, errhp, (text*)sql, strlen(sql), OCI_NTV_SYNTAX, OCI_DEFAULT);
                /* 绑定输出列 */
                OCIDefineByPos(stmthp, &defhp[0], errhp, 1, &szc1,sizeof(szc1), SQLT_INT, &ind[0], 0, 0,OCI_DEFAULT);
                OCIDefineByPos(stmthp, &defhp[1], errhp, 2, (ub1*)szc2,sizeof(szc2), SQLT_STR, &ind[1], 0, 0, OCI_DEFAULT);
                //OCIDefineByPos(stmthp, &defhp[2], errhp, 3, (ub1*)szphone,sizeof(szphone), SQLT_STR, &ind[2], 0, 0, OCI_DEFAULT);
                /* 执行SQL语句 */
                ret = OCIStmtExecute(svchp, stmthp, errhp, (ub4)0, 0, NULL, NULL, OCI_DEFAULT);
                if (ret != 0)
                {
                        OCIErrorGet(errhp, 1, NULL, &ret, (OraText*)errbuf, sizeof(errbuf), OCI_HTYPE_ERROR);
                        printf("%s\n", errbuf);
                        sleep(2);
        		continue;
                }
        	else{
                	printf("%-10s%-10s\n", "C1", "C2");
                	while ((OCIStmtFetch(stmthp, errhp, 1, OCI_FETCH_NEXT, OCI_DEFAULT)) != OCI_NO_DATA)
                	{
                        	printf("%-8d", szc1);
                        	printf("%s\n", szc2);
                 	//       printf("%-10s\n", szphone);
                	}
        		printf("\nSelect ok!\n");
        	}
                sleep(2);
	}//结束循环
	closeDB();
        return 0;
  • 编译脚本并运行
[root@test OCI]# g++ -o testOci testOci.cpp -I/home/dmdba/dmdbms/include -L/home/dmdba/dmdbms/bin -ldmoci

执行
./testOci
  • 断网卡住后进行跟踪

pstack查看

达梦数据库 支持mysql_linux_02

通过跟踪发现tcp连接已建立,但一直未断开

达梦数据库 支持mysql_句柄_03

 

达梦数据库 支持mysql_服务器_04

 解决办法

修改应用端内核参数

vim /etc/sysctl.conf

net.ipv4.tcp_retries2=3

修改后sysctl -p生效

再次执行脚本,断网测试,时间缩短为12s,确定断网程序卡住时间过长是因为内核参数导致

达梦数据库 支持mysql_linux_05

tcp_retries2 :INTEGER

默认值为15

在丢弃激活(已建立通讯状况)的TCP连接之前﹐需要进行多少次重试。默认值为15,根据RTO的值来决定,相当于13-30分钟(RFC1122规定,必须大于100秒).