需求:线上运行的job,有时间可能因为数据库异常、内存不足或者是内部其他异常导致整个进程退出,是偶发事件,但是如果进程停止,业务数据没处理积压起来,会影响业务。为了能自动监控并启动这种意外停止的进程,写了一个程序监控,每分种检查一遍,然后自动处理,实际上是非常有用的。

贴代码如下:

//主要逻辑

var runday = DateTime.Today;
            ConfigFile.Instanse.fileName = CommonFunctions.GetAbsolutePath("Kulv.YCF.KeepTaskRun.ini");//获取配置文件绝对路径
            string ExeFile = "";
            string ServiceName = "";
            var configIndex = 1;
            RunTaskAgain(() =>
            {
                while (true)
                {
                    try
                    {
                        ExeFile = ConfigFile.Instanse["ExeFile" + configIndex];
                        if (string.IsNullOrEmpty(ExeFile)) break;
                        Logger.Info("………………………………………………………… ExeFile" + configIndex + " Start……………………………………………………………");//写日志到文本文件中
                        ServiceName = ConfigFile.Instanse["ServiceName" + configIndex];

                        var isRun = CommonFunctions.IsProgramRun(ExeFile);//判断exe是否在运行的进程中
                        if (DateTime.Today != runday)
                        {
                            runday = DateTime.Today;
                        }

                        Logger.Info(string.Format("ExeFile:{0},ServiceName:{1}", ExeFile, ServiceName));
                        if (isRun)
                        {
                            Logger.Info("程序正在运行中");
                        }
                        else
                        {
                            Logger.Info(string.Format("程序未运行,尝试启动服务"));
                            var startResult = CommonFunctions.RunCmd(string.Format("sc start \"{0}\"", ServiceName));//通过cmd命令启动服务
                            var regex = new Regex("(\r\n)+");
                            startResult = regex.Replace(startResult, "$1");//多个换行替换成一个
                            Logger.Info("\r\n" + startResult);
                            if (startResult.Contains("失败") == false)
                            {
                                Logger.Info(string.Format("启动服务成功!"));
                            }
                            var phonestr = ConfigFile.Instanse["CellPhone" + configIndex];
                            FinanceApiInvoke.ApiDomain = ConfigFile.Instanse["MapApiAddress" + configIndex]; ;
                            if (string.IsNullOrEmpty(phonestr))
                            {
                                Logger.Info(string.Format("短信接收人配置" + configIndex + "为空!"));
                            }
                            else if (string.IsNullOrEmpty(FinanceApiInvoke.ApiDomain))
                            {
                                Logger.Info(string.Format("短信发送API配置" + configIndex + "为空!"));
                            }
                            else
                            {
                                DateTime dt = DataCache.GetCache<DateTime>("LastSendMsgTime");//用缓存,5分钟内只发一次短信
                                if ((DateTime.Now - dt).TotalMinutes >= 5)
                                {

                                    var phones = phonestr.Split(new char[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
                                    List<SMSForSendIModel> msgList = new List<SMSForSendIModel>();
                                    foreach (var phone in phones)
                                    {
                                        msgList.Add(new SMSForSendIModel()
                                        {
                                            CompanyId = CompanyEnum.YaoChufa,
                                            Phone = phone,
                                            SendBy = "KeepTaskRun",
                                            UserId = 0,
                                            TemplateCode = "NOTICE-COMMON0",
                                            UserType = UserType.SystemUser,
                                            Content = string.Format("库存服务YCF_STOCK_TASK处于停止状态,监控程序已在尝试启动服务,如果自动启动失败,需要人工处理!如正在发布请忽略此信息。")
                                        });
                                    }
                                    var sendRet = FinanceApiInvoke.SendSmsToWithEncryptionBatch(msgList, true);//通过api提交要发的短信给内部系统
                                    Logger.InfoFormat("短信返回:{0}", JsonUtility.ToJson(sendRet));
                                    DataCache.Set<DateTime>("LastSendMsgTime", DateTime.Now, 60 * 5);
                                    Logger.InfoFormat("job监控发短信成功");
                                }

                            }
                        }
                        Logger.Info("……………………………………………………………ExeFile" + configIndex + " End………………………………………………………………");
                        configIndex++;
                    }
                    catch (Exception ex)
                    {
                        Logger.Info("配置" + configIndex + ",ErrorMessage:" + ExceptionMessage.GetOnlyMessage(ex));
                    }
                }
            }, Logger, this.GetType().Name);



//配置文件-Kulv.YCF.KeepTaskRun.ini


[配置1]
ExeFile1=D:\Task\StockTask\YCF.Stock.Task.exe
ServiceName1=YCF_Stock_Task
CellPhone1=15920522222,15920522223
MapApiAddress1=
[配置2]
ExeFile2=D:\Task\StockTask\YCF.Stock.Task2.exe
ServiceName2=YCF_Stock_Task2
CellPhone2=15920522222,15920522223
MapApiAddress2=
[配置3]
ExeFile3=D:\Task\StockTask\YCF.Stock.Task3.exe
ServiceName3=YCF_Stock_Task3
CellPhone3=15920522222,15920522223
MapApiAddress3=



//判断方法:


/// <summary>
        /// 判断程序是否正在运行
        /// </summary>
        /// <param name="exefile">The exefile.</param>
        /// <returns></returns>
        public static bool IsProgramRun(string exefile)
        {//D:\FinancePartTask\Kulv.YCF.Task.exe
            Process[] processes = Process.GetProcesses();
            string FileName = "";
            bool ret = false;
            Process toKill = null;
            foreach (Process thisproc in processes)
            {
                try
                {
                    FileName = thisproc.MainModule.FileName;
                    if (FileName == exefile)
                    {
                        toKill = thisproc;
                        break;
                    }
                }
                catch// (Exception ex)
                {
                    //FileName = "不能访问";
                }
            }
            try
            {
                if (toKill != null)
                {
                    ret = true;
                }
                else
                {
                    ret = false;
                }
            }
            catch// (Exception ex)
            {
                ret = false;
            }
            return ret;
        }

        /// <summary>
        /// 运行传入的cmd命令
        /// </summary>
        /// <param name="cmd">cmd语句</param>
        /// <returns></returns>
        public static string RunCmd(string cmd, string cmdCurrentDir = null)
        {
            string ret = "";
            if (!string.IsNullOrEmpty(cmdCurrentDir)) cmd = "cd /d " + cmdCurrentDir + " & " + cmd;
            cmd = cmd + " &exit"; //说明:不管命令是否成功均执行exit命令,否则当调用ReadToEnd()方法时,会处于假死状态
            using (var p = new Process())
            {
                p.StartInfo.FileName = "cmd.exe";
                p.StartInfo.UseShellExecute = false; //是否使用操作系统shell启动
                p.StartInfo.RedirectStandardInput = true; //接受来自调用程序的输入信息
                p.StartInfo.RedirectStandardOutput = true; //由调用程序获取输出信息
                p.StartInfo.RedirectStandardError = true; //重定向标准错误输出
                p.StartInfo.CreateNoWindow = true; //不显示程序窗口
                p.Start(); //启动程序

                //向cmd窗口写入命令
                p.StandardInput.WriteLine(cmd);
                p.StandardInput.AutoFlush = true;

                p.WaitForExit(); //等待程序执行完退出进程
                ret = p.StandardOutput.ReadToEnd();
                p.Close();
            }
            return ret;
        }




这样,业务task异常退出之后,程序会自动启动服务,对task正常运行多了一层保障。

有人会问,监控程序本身也会异常退出呀,根据运行情况来看,没有异常退出过,因为程序内部与数据库等等都没交互,不会引发不明情况的异常退出,所以基本上不会异常退出。