一、普罗米修斯监控服务
1、普罗米修斯监控分为两种:
- 携带metrics接口的服务
- 不携带metrics接口的服务
2、普罗米修斯监控携带metrics接口的服务的流程:
- 通过endpoint获取需要监控的etcd的地址
- 创建service,给集群内部的servicemointor使用
- 创建servicemointor部署需要的证书
- 重启普罗米修斯监控pod,载入监控项
1.监控携带metrics接口的服务(etcd)
携带metrics接口的服务就表示可以通过metrics接口获取服务的监控项和监控信息。
adm方法安装的集群,etcd在集群内部;二进制方法安装的集群,etcd在集群外部。
- 如果etcd在集群外部,只能通过endpoint指定IP的方法获取
- 如果etcd在集群内不,既可以通过指定IP的方法也可以通过service别名(ExternalName)获取
1)测试etcd服务的metrics接口
#1.查看etcd的端口
[root@gdx1 ~]# netstat -lntp |grep etcd
tcp 0 0 192.168.12.11:2379 0.0.0.0:* LISTEN 85425/etcd
tcp 0 0 127.0.0.1:2379 0.0.0.0:* LISTEN 85425/etcd
tcp 0 0 192.168.12.11:2380 0.0.0.0:* LISTEN 85425/etcd
tcp 0 0 127.0.0.1:2381 0.0.0.0:* LISTEN 85425/etcd
#2379是用于集群外部连接etcd集群的端口
#2380是集群内部端口,集群是高可用的即分布式的,相当于多台主机组合成一台,用于内部沟通的端口
#2.查看etcd服务的metrics接口,也就是可用的监控项
[root@gdx1 ~]# curl -k --cert /etc/kubernetes/pki/apiserver-etcd-client.crt --key /etc/kubernetes/pki/apiserver-etcd-client.key https://127.0.0.1:2379/metrics
# TYPE promhttp_metric_handler_requests_total counter
promhttp_metric_handler_requests_total{code="200"} 0
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
#-k:忽略证书
# /etc/kubernetes/pki/apiserver-etcd-client.crt:连接etcd证书
#/etc/kubernetes/pki/apiserver-etcd-client.key:etcd证书私钥
#https://127.0.0.1:2379:url+端口
2)通过endpoint获取需要监控的etcd的地址
#1.编辑配置清单
[root@gdx1 yaml]# vim etcd-endpoints.yaml
kind: Endpoints
apiVersion: v1
metadata:
namespace: kube-system #注意命名空间
name: etcd-monitor
labels:
k8s: etcd
subsets:
- addresses:
- ip: "192.168.12.11" #etcd节点对应的主机ip,有几台就写几台
ports:
- port: 2379 #etcd集群的端口号
protocol: TCP
name: etcd
#2.部署
[root@gdx1 yaml]# kubectl apply -f etcd-endpoints.yaml
endpoints/etcd-monitor created
#3.查看结果
[root@gdx1 yaml]# kubectl get endpoints -n kube-system
NAME ENDPOINTS AGE
etcd-monitor 192.168.12.11:2379 33s
3)创建service,给集群内部的servicemonitor使用
#1.编写service配置清单
[root@gdx1 yaml]# vim etcd-service.yaml
kind: Service
apiVersion: v1
metadata:
namespace: kube-system
name: etcd-monitor
labels:
k8s: etcd
spec:
ports:
- port: 2379
targetPort: 2379
name: etcd
protocol: TCP
#2.部署
[root@gdx1 yaml]# kubectl apply -f etcd-service.yaml
service/etcd-monitor created
#3.查看分配的IP地址
[root@gdx1 yaml]# kubectl get svc -n kube-system
NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
etcd-monitor ClusterIP 10.110.7.57 <none> 2379/TCP 16s
kube-dns ClusterIP 10.96.0.10 <none> 53/UDP,53/TCP,9153/TCP 6d22h
kubelet ClusterIP None <none> 10250/TCP,10255/TCP,4194/TCP 27h
#4.测试连接结果,通过分配的集群内部的IP地址
[root@gdx1 yaml]# curl -k --cert /etc/kubernetes/pki/apiserver-etcd-client.crt --key /etc/kubernetes/pki/apiserver-etcd-client.key https://10.110.7.57:2379/metrics
# TYPE promhttp_metric_handler_requests_total counter
promhttp_metric_handler_requests_total{code="200"} 1
promhttp_metric_handler_requests_total{code="500"} 0
promhttp_metric_handler_requests_total{code="503"} 0
#注:获取到可用的监控项就说明上述操作没有问题
4)创建servicemonitor部署需要的访问证书
#1.编辑配置清单
[root@gdx1 yaml]# vim etcd-servicemontior.yaml
kind: ServiceMonitor
apiVersion: monitoring.coreos.com/v1
metadata:
namespace: monitoring
name: etcd-monitor
labels:
k8s: etcd
spec:
endpoints:
- interval: 3s
port: etcd
scheme: https
tlsConfig:
caFile: /etc/prometheus/secrets/etcd-certs/ca.crt
certFile: /etc/prometheus/secrets/etcd-certs/peer.crt
keyFile: /etc/prometheus/secrets/etcd-certs/peer.key
insecureSkipVerify: true
selector:
matchLabels:
k8s: etcd
namespaceSelector:
matchNames:
- "kube-system"
#2.部署
[root@gdx1 yaml]# kubectl apply -f etcd-servicemontior.yaml
servicemonitor.monitoring.coreos.com/etcd-monitor created
#3.查看部署结果
[root@gdx1 yaml]# kubectl get servicemonitors -n monitoring
NAME AGE
etcd-monitor 110s
5)创建一个secrets,用来保存prometheus监控的etcd的证书
[root@gdx1 yaml]# kubectl create secret generic etcd-certs -n monitoring --from-file=/etc/kubernetes/pki/etcd/ca.crt --from-file=/etc/kubernetes/pki/etcd/peer.crt --from-file=/etc/kubernetes/pki/etcd/peer.key
secret/etcd-certs created
6)重启普罗米修斯监控pod(prometheus-k8s-0),载入监控项
#1.编辑配置清单
[root@gdx1 yaml]# vim prometheus-k8s.yaml
kind: Prometheus
apiVersion: monitoring.coreos.com/v1
metadata:
labels:
prometheus: k8s
name: k8s
namespace: monitoring
spec:
alerting:
alertmanagers:
- name: alertmanager-main
namespace: monitoring
port: web
- name: alertmanager-main-etcd
namespace: kube-system
port: etcd
image: quay.io/prometheus/prometheus:v2.15.2
nodeSelector:
kubernetes.io/os: linux
podMonitorNamespaceSelector: {}
podMonitorSelector: {}
replicas: 2
resources:
requests:
memory: 400Mi
ruleSelector:
matchLabels:
prometheus: k8s
role: alert-rules
securityContext:
fsGroup: 2000
runAsNonRoot: true
runAsUser: 1000
serviceAccountName: prometheus-k8s
serviceMonitorNamespaceSelector: {}
serviceMonitorSelector: {}
version: v2.15.2
secrets:
- etcd-certs
#2.部署
[root@gdx1 yaml]# kubectl apply -f prometheus-k8s.yaml
prometheus.monitoring.coreos.com/k8s configured
#2.查看
[root@gdx1 yaml]# kubectl get pods -n monitoring
NAME READY STATUS RESTARTS AGE
prometheus-k8s-0 3/3 Running 1 16s
prometheus-k8s-1 3/3 Running 1 16s
7)测试是否监控成功
8)grafana出图
选则一个dashboard模板