本节重点介绍 :

  • 自定义一个模块的二开方法
  • 自定义一个errLog模块,统计/var/log/message 中的错误日志

自定义一个模块的二开方法

  • collect/目录下新建一个 errlog.go
  • 定义一个结构体体errLogCollector
type errLogCollector struct {
	logger log.Logger
}
  • 写一个new xxCollector的工厂函数,一个参数为 log.logger
func NewErrLogCollector(logger log.Logger) (Collector, error) {
	return &errLogCollector{logger}, nil
}
  • 写一个 init方法调用 registerCollector 注册自己
const (
	errLogSubsystem = "errlog"
)
func init() {
	registerCollector(errLogSubsystem, defaultEnabled, NewErrLogCollector)
}
  • 给这个结构体绑定一个Update方法,签名如下
func (c *xxCollector) Update(ch chan<- prometheus.Metric) error {}

完成这个Update方法

流程说明

  • 分析 日志文件
  • $5是app的名字
  • 有info error等level字段
Aug 15 09:54:02 prome-master01 containerd: time="2021-08-15T09:54:02.839718531+08:00" level=info msg="ExecSync for \"cedb3c6d71c0422dfe95d16b242fd08e78096606f1f9614e945cc99581b92f92\" returns with exit code 1"
  • 执行awk可以得到一个日志文件中错误日志按app name进行分布的结果
grep -i error /var/log/messages-20210814 |awk '{a[$5]++}END{for(i in a) print i,a[i]}'
telegraf: 3872
pushgateway: 2
kubelet: 16822
containerd: 9350
kernel: 5
grafana-server: 10

新增一个执行shell命令的函数

func errLogGrep() string {
	errLogCmd := `grep -i error /var/log/messages |awk '{a[$5]++}END{for(i in a) print i,a[i]}'`
	cmd := exec.Command("sh", "-c", errLogCmd)
	output, _ := cmd.CombinedOutput()
	return string(output)

}

然后在Update中按行遍历

  • 按行遍历之后再按 :分割就能得到 appname 和value
  • 然后将name中的 - 替换为_
  • value 字符串转换为int
  • 然后构建一个 metric对象塞入ch中即可
func (c *errLogCollector) Update(ch chan<- prometheus.Metric) error {
	var metricType prometheus.ValueType
	metricType = prometheus.GaugeValue
	output := errLogGrep()
	for _, line := range strings.Split(output, "\n") {
		l := strings.Split(line, ":")
		if len(l) != 2 {
			continue
		}
		name := strings.TrimSpace(l[0])
		value := strings.TrimSpace(l[1])
		v, _ := strconv.Atoi(value)
		name = strings.Replace(name, "-", "_", -1)
		level.Debug(c.logger).Log("msg", "Set errLog", "name", name, "value", value)

		ch <- prometheus.MustNewConstMetric(
			prometheus.NewDesc(
				prometheus.BuildFQName(namespace, errLogSubsystem, name),
				fmt.Sprintf("/var/log/message err log %s.", name),
				nil, nil,
			),
			metricType, float64(v),
		)
	}
	return nil
}

运行我们的程序

  • 打包
  • 编译 go build -v node_exporter.go
  • 然后运行 ./node_exporter --web.listen-address=“:9101”
  • 查询errlog metrics
[root@prome-master01 tgzs]# curl -s localhost:9101/metrics |grep node_errlog
# HELP node_errlog_containerd /var/log/message err log containerd.
# TYPE node_errlog_containerd gauge
node_errlog_containerd 9350
# HELP node_errlog_grafana_server /var/log/message err log grafana_server.
# TYPE node_errlog_grafana_server gauge
node_errlog_grafana_server 10
# HELP node_errlog_kernel /var/log/message err log kernel.
# TYPE node_errlog_kernel gauge
node_errlog_kernel 5
# HELP node_errlog_kubelet /var/log/message err log kubelet.
# TYPE node_errlog_kubelet gauge
node_errlog_kubelet 16822
# HELP node_errlog_pushgateway /var/log/message err log pushgateway.
# TYPE node_errlog_pushgateway gauge
node_errlog_pushgateway 2
# HELP node_errlog_telegraf /var/log/message err log telegraf.
# TYPE node_errlog_telegraf gauge
node_errlog_telegraf 3872

完整的errlog.go

// Copyright 2015 The Prometheus Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// +build darwin linux openbsd
// +build !nomeminfo

package collector

import (
	"fmt"
	"os/exec"
	"strconv"
	"strings"

	"github.com/go-kit/log"
	"github.com/go-kit/log/level"
	"github.com/prometheus/client_golang/prometheus"
)

const (
	errLogSubsystem = "errlog"
)

type errLogCollector struct {
	logger log.Logger
}

func init() {
	registerCollector(errLogSubsystem, defaultEnabled, NewErrLogCollector)
}

// NewMeminfoCollector returns a new Collector exposing memory stats.
func NewErrLogCollector(logger log.Logger) (Collector, error) {
	return &errLogCollector{logger}, nil
}

func errLogGrep() string {
	errLogCmd := `grep -i error /var/log/messages |awk '{a[$5]++}END{for(i in a) print i,a[i]}'`
	cmd := exec.Command("sh", "-c", errLogCmd)
	output, _ := cmd.CombinedOutput()
	return string(output)

}

// Update calls (*meminfoCollector).getMemInfo to get the platform specific
// memory metrics.
func (c *errLogCollector) Update(ch chan<- prometheus.Metric) error {
	var metricType prometheus.ValueType
	metricType = prometheus.GaugeValue
	output := errLogGrep()
	for _, line := range strings.Split(output, "\n") {
		l := strings.Split(line, ":")
		if len(l) != 2 {
			continue
		}
		name := strings.TrimSpace(l[0])
		value := strings.TrimSpace(l[1])
		v, _ := strconv.Atoi(value)
		name = strings.Replace(name, "-", "_", -1)
		level.Debug(c.logger).Log("msg", "Set errLog", "name", name, "value", value)

		ch <- prometheus.MustNewConstMetric(
			prometheus.NewDesc(
				prometheus.BuildFQName(namespace, errLogSubsystem, name),
				fmt.Sprintf("/var/log/message err log %s.", name),
				nil, nil,
			),
			metricType, float64(v),
		)
	}
	return nil
}

本节重点总结 :

  • 自定义一个模块的二开方法
  • 自定义一个errLog模块,统计/var/log/message 中的错误日志