前段時間看了馬哥的k8s新書,最後幾章講了下EFK,嘗試部署了下,很多問題, 這裡改進下,寫個筆記記錄下吧。

準備工作

所有元件都通過helm3部署,選新增幾個倉庫。

helm repo add bitnami https://charts.bitnami.com/bitnami
helm repo add fluent https://fluent.github.io/helm-charts

儲存用的nfs的sc,自行解決。

三個4G記憶體工作節點的k8s叢集。

部署es

其中動態pv, prometheus 根據實際情況修改,沒裝prometheus也沒什麼事。
 

[root@bjzb-lecar-ops-jenkins-master-33 cluster-log]# cat bitnami-elasticsearch-values.yaml
clusterDomain: cluster.local # Kubernetes叢集域名;
name: elasticsearch # Elasticsearch叢集名稱; master: # 準主節點相關的配置;
name: master
replicas: 2 # 例項數量;
heapSize: 512m # 堆記憶體大小;
resources:
limits: {}
# cpu: 1000m
# memory: 2048Mi
requests:
cpu: 200m
memory: 512Mi
persistence: # 持久卷相關的配置;
enabled: true # 禁用時將自動使用emptyDir儲存卷;
storageClass: "managed-nfs-storage" # 從指定儲存類中動態建立PV;
# existingClaim: my-persistent-volume-claim # 使用現有的PVC;
# existingVolume: my-persistent-volume # 使用現有的PV;
accessModes:
- ReadWriteOnce
size: 8Gi
service: # 服務配置
type: ClusterIP
port: 9300 # 節點間的transport流量使用埠; coordinating: # 僅協調節點相關的配置;
replicas: 2 # 例項數量;
heapSize: 128m
resources:
requests:
cpu: 250m
memory: 512Mi
service: # 僅協調節點相關的服務,這也是接收Elasticsearch客戶端請求的入口;
type: ClusterIP
port: 9200
# nodePort:
# loadBalancerIP: data: # 資料節點相關的配置;
name: data
replicas: 2
heapSize: 512m
resources: # 資料節點是CPU密集及IO密集型的應用,資源需求和限制要謹慎設定;
limits: {}
# cpu: 100m
# memory: 2176Mi
requests:
cpu: 250m
memory: 512Mi
persistence:
enabled: true
storageClass: "managed-nfs-storage"
# existingClaim: my-persistent-volume-claim
# existingVolume: my-persistent-volume
accessModes:
- ReadWriteOnce
size: 10Gi ingest: # 攝取節點相關的配置;
enabled: false # 預設為禁用狀態;
name: ingest
replicas: 2
heapSize: 128m
resources:
limits: {}
# cpu: 100m
# memory: 384Mi
requests:
cpu: 500m
memory: 512Mi
service:
type: ClusterIP
port: 9300 curator: # curator相關的配置;
enabled: false
name: curator
cronjob: # 執行週期及相關的配置;
# At 01:00 every day
schedule: "0 1 * * *"
concurrencyPolicy: ""
failedJobsHistoryLimit: ""
successfulJobsHistoryLimit: ""
jobRestartPolicy: Never metrics: # 用於暴露指標的exporter;
enabled: true
name: metrics
service:
type: ClusterIP
annotations: # 指標採集相關的註解資訊;
prometheus.io/scrape: "true"
prometheus.io/port: "9114"
resources:
limits: {}
# cpu: 100m
# memory: 128Mi
requests:
cpu: 100m
memory: 128Mi
podAnnotations: # Pod上的註解,用於支援指標採集;
prometheus.io/scrape: "true"
prometheus.io/port: "8080"
serviceMonitor: # Service監控相關的配置
enabled: false
namespace: monitoring
interval: 10s
scrapeTimeout: 10s
helm install es -f bitnami-elasticsearch-values.yaml  bitnami/elasticsearch -n logging

哎,這一步各種問題,會遇到映象下載慢,k8s叢集資源不夠(我已經把yml裡申請的資源調的很低了),儲存許可權問題,反正大家注意點就行。

部署fluent-bit

[root@bj-k8s-master efk]# cat fluent-fluent-bit-values.yaml
# kind -- DaemonSet or Deployment
kind: DaemonSet image:
repository: fluent/fluent-bit
pullPolicy: IfNotPresent service:
type: ClusterIP
port: 2020
annotations:
prometheus.io/path: "/api/v1/metrics/prometheus"
prometheus.io/port: "2020"
prometheus.io/scrape: "true" resources: {}
# limits:
# cpu: 100m
# memory: 128Mi
#requests:
# cpu: 100m
# memory: 128Mi tolerations:
- key: node-role.kubernetes.io/master
effect: NoSchedule config:
service: |
[SERVICE]
Flush 3
Daemon Off
#Log_Level info
Log_Level debug
Parsers_File custom_parsers.conf
Parsers_File parsers.conf
HTTP_Server On
HTTP_Listen 0.0.0.0
HTTP_Port 2020 inputs: |
[INPUT]
Name tail
Path /var/log/containers/*.log
Parser docker
Tag kube.*
Mem_Buf_Limit 5MB
Skip_Long_Lines On
Refresh_Interval 10
[INPUT]
Name tail
Path /var/log/containers/nginx-demo*.log
Parser docker
Tag nginx-demo.*
Mem_Buf_Limit 5MB
Skip_Long_Lines On
Refresh_Interval 10
[INPUT]
Name tail
Path /var/log/containers/ingress-nginx-controller*.log
Parser docker
Tag ingress-nginx-controller.*
Mem_Buf_Limit 5MB
Skip_Long_Lines On
Refresh_Interval 10 filters: |
[FILTER]
Name kubernetes
Match kube.*
Kube_URL https://kubernetes.default.svc:443
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
Kube_Tag_Prefix kube.var.log.containers.
Merge_Log On
Keep_Log Off
K8S-Logging.Exclude On
K8S-Logging.Parser On
[FILTER]
Name kubernetes
Match ingress-nginx-controller.*
Kube_URL https://kubernetes.default.svc:443
Kube_CA_File /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
Kube_Token_File /var/run/secrets/kubernetes.io/serviceaccount/token
Kube_Tag_Prefix kube.var.log.containers.
Merge_Log On
Merge_Parser ingress-nginx
Keep_Log Off
K8S-Logging.Exclude On
K8S-Logging.Parser On outputs: |
[OUTPUT]
Name es
Match kube.*
Host es-elasticsearch-coordinating-only.logging.svc.cluster.local.
Logstash_Format On
Logstash_Prefix k8s-cluster
Type flb_type
Replace_Dots On [OUTPUT]
Name es
Match nginx-demo.*
Host es-elasticsearch-coordinating-only.logging.svc.cluster.local.
Logstash_Format On
Logstash_Prefix nginx-demo
Type flb_type
Replace_Dots On
[OUTPUT]
Name es
Match ingress-nginx-controller.*
Host es-elasticsearch-coordinating-only.logging.svc.cluster.local.
Logstash_Format On
Logstash_Prefix ingress-nginx-controller
Type flb_type
Replace_Dots On customParsers: |
[PARSER]
Name docker_no_time
Format json
Time_Keep Off
Time_Key time
Time_Format %Y-%m-%dT%H:%M:%S.%L [PARSER]
Name ingress-nginx
Format regex
Regex ^(?<message>(?<remote>[^ ]*) - (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*) "(?<referer>[^\"]*)" "(?<agent>[^\"]*)" (?<request_length>[^ ]*) (?<request_time>[^ ]*) \[(?<proxy_upstream_name>[^ ]*)\] \[(?<proxy_alternative_upstream_name>[^ ]*)\] (?<upstream_addr>[^ ]*) (?<upstream_response_length>[^ ]*) (?<upstream_response_time>[^ ]*) (?<upstream_status>[^ ]*) (?<req_id>[^ ]*).*)$
Time_Key time
Time_Format %d/%b/%Y:%H:%M:%S %z
helm install fb -f fluent-fluent-bit-values.yaml fluent/fluent-bit -n logging

注意下es的host地址,如果跟我不是一樣的namespace記得改下,吃過虧。這個書裡提供的都寫一個index裡,nginx-ingress日誌也不解析,fluent-bit研究了半天,大概就這程度吧,通過docker不同的log名字打上tag,這樣每個應用在es裡單獨的index。當然量不大都寫進一個index,通過label欄位查詢某應用的日誌也行。

部署kibana

[root@bj-k8s-master efk]# cat bitnami-kibana-values.yaml
replicaCount: 1 updateStrategy:
type: RollingUpdate plugins:
- https://github.com/pjhampton/kibana-prometheus-exporter/releases/download/7.8.1/kibana-prometheus-exporter-7.8.1.zip persistence:
enabled: true
storageClass: "managed-nfs-storage"
# existingClaim: your-claim
accessMode: ReadWriteOnce
size: 10Gi service:
port: 5601
type: ClusterIP
# nodePort:
externalTrafficPolicy: Cluster
annotations: {}
# loadBalancerIP:
# extraPorts: ingress:
enabled: true
certManager: false
annotations:
kubernetes.io/ingress.class: nginx
hostname: kibana.ilinux.io
path: /
tls: false
# tlsHosts:
# - www.kibana.local
# - kibana.local
# tlsSecret: kibana.local-tls configuration:
server:
basePath: ""
rewriteBasePath: false metrics:
enabled: true
service:
annotations:
prometheus.io/scrape: "true"
prometheus.io/port: "80"
prometheus.io/path: "_prometheus/metrics" serviceMonitor:
enabled: false
# namespace: monitoring
# interval: 10s
# scrapeTimeout: 10s
# selector:
# prometheus: my-prometheus elasticsearch:
hosts:
- es-elasticsearch-coordinating-only.logging.svc.cluster.local.
# - elasticsearch-2
port: 9200
helm install kib -f bitnami-kibana-values.yaml bitnami/kibana -n logging

同上如果跟我namespace不一樣記得改下es地址。 這地方書裡有坑,把charts pull下來看了下,values.yml檔案有出入,估計是寫書的時候chart版本不一樣導致的。

配置kibana

自己改下hosts解析到ingress地址,訪問kibana

新增匹配的索引

每個應用日誌獨立的index

 

ingress-nginx日誌已解析

fluentbit官方文件

https://docs.fluentbit.io/

線上正則匹配

https://rubular.com/