75,497
社区成员




一、架构组件
Loki 是受 Prometheus 启发的水平可扩展、高可用、多租户日志聚合系统。非常适合采集 Kubernetes Pod 的日志,关键 Loki 还易于操作且更加轻量级(相比 ELK/EFK/EFLK )。 • Loki:相当于 EFK 中的 ElasticSearch ,用于存储日志和处理查询。 • Promtail:相当于 EFK 中的 Filebeat/Fluentd ,用于采集日志并将其发送给 Loki 。 • Distributor 分发器:日志数据传输的“第一站”,Distributor 分发器接收到日志数据后,根据元数据和 hash 算法,将日志分批并行地发送到多个 Ingester 接收器上 • Ingester 接收器:接收器是一个有状态的组件,在日志进入时对其进行 gzip 压缩操作,并负责构建和刷新 chunck 块,当 chunk 块达到一定的数量或者时间后,就会刷新 chunk 块和对应的 Index 索引存储到数据库中 • Querier 查询器:给定一个时间范围和标签选择器,Querier 查询器可以从数据库中查看 Index 索引以确定哪些 chunck 块匹配,并通过 greps 将结果显示出来,它还会直接从 Ingester 接收器获取尚未刷新的最新数据 • Query frontend 查询前端:查询前端是一个可选的组件,运行在 Querier 查询器之前,起到缓存,均衡调度的功能,用于加速日志查询 |
二、部署
1、创建 loki-rbac
# vim loki-rbac.yaml apiVersion: v1 kind: ServiceAccount metadata: name: loki namespace: monitor --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: loki namespace: monitor rules: - apiGroups: - extensions resourceNames: - loki resources: - podsecuritypolicies verbs: - use --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: loki namespace: monitor roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: loki subjects: - kind: ServiceAccount name: loki |
2、创建 loki-configmap.yaml
# vim loki-configmap.yaml apiVersion: v1 kind: ConfigMap metadata: name: loki namespace: monitor labels: app: loki data: loki.yaml: | auth_enabled: false ingester: chunk_idle_period: 3m # 如果块没有达到最大的块大小,那么在刷新之前,块应该在内存中不更新多长时间 chunk_block_size: 262144 chunk_retain_period: 1m # 块刷新后应该在内存中保留多长时间 max_transfer_retries: 0 lifecycler: # 配置ingester的生命周期,以及在哪里注册以进行发现 ring: kvstore: store: inmemory # 用于ring的后端存储,支持consul、etcd、inmemory replication_factor: 1 # 写入和读取的ingesters数量,至少为1(为了冗余和弹性,默认情况下为3) limits_config: retention_period: 240h # 多久过期 enforce_metric_name: false reject_old_samples: true # 旧日志是否会被拒绝 reject_old_samples_max_age: 168h # 拒绝旧日志的最大时限 schema_config: # 配置从特定时间段开始应该使用哪些索引模式 configs: - from: 2020-10-24 # 创建索引的日期。如果这是唯一的schema_config,则使用过去的日期,否则使用希望切换模式时的日期 store: boltdb-shipper # 索引使用哪个存储,如:cassandra, bigtable, dynamodb,或boltdb object_store: filesystem # 用于块的存储,如:gcs, s3, inmemory, filesystem, cassandra,如果省略,默认值与store相同 schema: v11 index: # 配置如何更新和存储索引 prefix: index_ # 所有周期表的前缀 period: 24h # 每张表的时间范围6天 server: http_listen_port: 3100 storage_config: # 为索引和块配置一个或多个存储 boltdb_shipper: active_index_directory: /data/loki/boltdb-shipper-active cache_location: /data/loki/boltdb-shipper-cache cache_ttl: 24h shared_store: filesystem filesystem: directory: /data/loki/chunks chunk_store_config: # 配置如何缓存块,以及在将它们保存到存储之前等待多长时间 max_look_back_period: 0s # 限制查询数据的时间,默认是禁用的,这个值应该小于或等于table_manager.retention_period中的值 table_manager: retention_deletes_enabled: true # 日志保留周期开关,用于表保留删除 retention_period: 48h # 日志保留周期,保留期必须是索引/块的倍数 compactor: working_directory: /data/loki/boltdb-shipper-compactor shared_store: filesystem compaction_interval: 10m # 压缩间隔 retention_enabled: true # 持久化开启 retention_delete_delay: 5m # 过期后多久删除 retention_delete_worker_count: 150 # 过期删除协程数目 ruler: alertmanager_refresh_interval: 1m disable_rule_group_label: false evaluation_interval: 1m0s for_grace_period: 20m0s for_outage_tolerance: 1h0s notification_queue_capacity: 10000 notification_timeout: 4s poll_interval: 10m0s query_stats_enabled: true remote_write: config_refresh_period: 10s enabled: false resend_delay: 2m0s search_pending_for: 5m0s sharding_strategy: default storage: type: local local: directory: /data/rules # ruler 告警文件目录 rule_path: /data/rules-temp # ruler 临时目录 alertmanager_url: http://alertmanager:9093 enable_alertmanager_v2: true enable_api: true enable_sharding: true wal_cleaner: period: 240h min_age: 12h0m0s wal: dir: /data/loki/ruler_wal max_age: 4h0m0s min_age: 5m0s truncate_frequency: 1h0m0s ring: kvstore: store: inmemory flush_period: 1m ruler.yaml: | groups: - name: test-prod-log rules: - alert: export-server-Error-Log expr: | sum(rate({job="kube-system/kube-apiserver",namespace="kube-system",pod="kube-apiserver-master"}|~ "passthrough"[5m])) > 0 for: 10m labels: severity: log annotations: description: "Error log \r\n >Message: {{ $labels.message }} \r\n >App: {{ $labels.app }} \r\n >Job: {{ $labels.job }}" |
3、创建loki
# vim loki.yaml apiVersion: v1 kind: Service metadata: name: loki namespace: monitor labels: app: loki spec: type: ClusterIP ports: - port: 3100 protocol: TCP name: http-metrics targetPort: http-metrics selector: app: loki --- apiVersion: v1 kind: Service metadata: name: loki-outer namespace: monitor labels: app: loki spec: type: NodePort ports: - port: 3100 protocol: TCP name: http-metrics targetPort: http-metrics nodePort: 32537 selector: app: loki --- apiVersion: apps/v1 kind: StatefulSet metadata: name: loki namespace: monitor labels: app: loki spec: podManagementPolicy: OrderedReady replicas: 1 selector: matchLabels: app: loki serviceName: loki updateStrategy: type: RollingUpdate template: metadata: labels: app: loki spec: serviceAccountName: loki initContainers: [] containers: - name: loki image: grafana/loki:2.3.0 imagePullPolicy: IfNotPresent args: - -config.file=/etc/loki/loki.yaml volumeMounts: - name: loki-config mountPath: /etc/loki - name: loki-ruler mountPath: /data/rules/rule-temp - name: storage mountPath: /data ports: - name: http-metrics containerPort: 3100 protocol: TCP livenessProbe: httpGet: path: /ready port: http-metrics scheme: HTTP initialDelaySeconds: 45 timeoutSeconds: 1 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 readinessProbe: httpGet: path: /ready port: http-metrics scheme: HTTP initialDelaySeconds: 45 timeoutSeconds: 1 periodSeconds: 10 successThreshold: 1 failureThreshold: 3 securityContext: readOnlyRootFilesystem: true terminationGracePeriodSeconds: 4800 volumes: - name: loki-ruler configMap: defaultMode: 420 name: loki items: - key: ruler.yaml path: ruler.yaml - name: loki-config configMap: defaultMode: 420 name: loki items: - key: loki.yaml path: loki.yaml volumeClaimTemplates: - metadata: name: storage spec: storageClassName: managed-nfs-storage accessModes: - ReadWriteMany resources: requests: storage: 1Gi |
4、创建promtail-rbac.yaml
# vim promtail-rbac.yaml apiVersion: v1 kind: ServiceAccount metadata: name: loki-promtail labels: app: promtail namespace: monitor --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: labels: app: promtail name: promtail-clusterrole namespace: monitor rules: - apiGroups: [""] resources: - nodes - nodes/proxy - services - endpoints - pods verbs: ["get", "watch", "list"] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: promtail-clusterrolebinding labels: app: promtail namespace: monitor subjects: - kind: ServiceAccount name: loki-promtail namespace: monitor roleRef: kind: ClusterRole name: promtail-clusterrole apiGroup: rbac.authorization.k8s.io |
5、创建promtail-cm.yaml
# vim promtail-cm.yaml apiVersion: v1 kind: ConfigMap metadata: name: loki-promtail namespace: monitor labels: app: promtail data: promtail.yaml: | client: # 配置Promtail如何连接到Loki的实例 backoff_config: # 配置当请求失败时如何重试请求给Loki max_period: 5m max_retries: 10 min_period: 500ms batchsize: 1048576 # 发送给Loki的最大批次大小(以字节为单位) batchwait: 1s # 发送批处理前等待的最大时间(即使批次大小未达到最大值) external_labels: {} # 所有发送给Loki的日志添加静态标签 timeout: 10s # 等待服务器响应请求的最大时间 positions: filename: /run/promtail/positions.yaml server: http_listen_port: 3101 target_config: sync_period: 10s scrape_configs: - job_name: system pipeline_stages: static_configs: - targets: - localhost labels: job: varlogs __path__: /var/log/*.log - job_name: kubernetes-pods-name pipeline_stages: - docker: {} kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: - __meta_kubernetes_pod_label_name target_label: __service__ - source_labels: - __meta_kubernetes_pod_node_name target_label: __host__ - action: drop regex: '' source_labels: - __service__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace replacement: $1 separator: / source_labels: - __meta_kubernetes_namespace - __service__ target_label: job - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container - replacement: /var/log/pods/*$1/*.log separator: / source_labels: - __meta_kubernetes_pod_uid - __meta_kubernetes_pod_container_name target_label: __path__ - job_name: kubernetes-pods-app pipeline_stages: - docker: {} kubernetes_sd_configs: - role: pod relabel_configs: - action: drop regex: .+ source_labels: - __meta_kubernetes_pod_label_name - source_labels: - __meta_kubernetes_pod_label_app target_label: __service__ - source_labels: - __meta_kubernetes_pod_node_name target_label: __host__ - action: drop regex: '' source_labels: - __service__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace replacement: $1 separator: / source_labels: - __meta_kubernetes_namespace - __service__ target_label: job - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container - replacement: /var/log/pods/*$1/*.log separator: / source_labels: - __meta_kubernetes_pod_uid - __meta_kubernetes_pod_container_name target_label: __path__ - job_name: kubernetes-pods-direct-controllers pipeline_stages: - docker: {} kubernetes_sd_configs: - role: pod relabel_configs: - action: drop regex: .+ separator: '' source_labels: - __meta_kubernetes_pod_label_name - __meta_kubernetes_pod_label_app - action: drop regex: '[0-9a-z-.]+-[0-9a-f]{8,10}' source_labels: - __meta_kubernetes_pod_controller_name - source_labels: - __meta_kubernetes_pod_controller_name target_label: __service__ - source_labels: - __meta_kubernetes_pod_node_name target_label: __host__ - action: drop regex: '' source_labels: - __service__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace replacement: $1 separator: / source_labels: - __meta_kubernetes_namespace - __service__ target_label: job - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container - replacement: /var/log/pods/*$1/*.log separator: / source_labels: - __meta_kubernetes_pod_uid - __meta_kubernetes_pod_container_name target_label: __path__ - job_name: kubernetes-pods-indirect-controller pipeline_stages: - docker: {} kubernetes_sd_configs: - role: pod relabel_configs: - action: drop regex: .+ separator: '' source_labels: - __meta_kubernetes_pod_label_name - __meta_kubernetes_pod_label_app - action: keep regex: '[0-9a-z-.]+-[0-9a-f]{8,10}' source_labels: - __meta_kubernetes_pod_controller_name - action: replace regex: '([0-9a-z-.]+)-[0-9a-f]{8,10}' source_labels: - __meta_kubernetes_pod_controller_name target_label: __service__ - source_labels: - __meta_kubernetes_pod_node_name target_label: __host__ - action: drop regex: '' source_labels: - __service__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace replacement: $1 separator: / source_labels: - __meta_kubernetes_namespace - __service__ target_label: job - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container - replacement: /var/log/pods/*$1/*.log separator: / source_labels: - __meta_kubernetes_pod_uid - __meta_kubernetes_pod_container_name target_label: __path__ - job_name: kubernetes-pods-static pipeline_stages: - docker: {} kubernetes_sd_configs: - role: pod relabel_configs: - action: drop regex: '' source_labels: - __meta_kubernetes_pod_annotation_kubernetes_io_config_mirror - action: replace source_labels: - __meta_kubernetes_pod_label_component target_label: __service__ - source_labels: - __meta_kubernetes_pod_node_name target_label: __host__ - action: drop regex: '' source_labels: - __service__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace replacement: $1 separator: / source_labels: - __meta_kubernetes_namespace - __service__ target_label: job - action: replace source_labels: - __meta_kubernetes_namespace target_label: namespace - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container - replacement: /var/log/pods/*$1/*.log separator: / source_labels: - __meta_kubernetes_pod_annotation_kubernetes_io_config_mirror - __meta_kubernetes_pod_container_name target_label: __path__ |
6、创建promtail-ds.yaml
# vim promtail-ds.yaml apiVersion: apps/v1 kind: DaemonSet metadata: name: loki-promtail namespace: monitor labels: app: promtail spec: selector: matchLabels: app: promtail updateStrategy: rollingUpdate: maxUnavailable: 1 type: RollingUpdate template: metadata: labels: app: promtail spec: serviceAccountName: loki-promtail containers: - name: promtail image: grafana/promtail:2.3.0 imagePullPolicy: IfNotPresent args: - -config.file=/etc/promtail/promtail.yaml - -client.url=http://loki:3100/loki/api/v1/push env: - name: HOSTNAME valueFrom: fieldRef: apiVersion: v1 fieldPath: spec.nodeName volumeMounts: - mountPath: /etc/promtail name: config - mountPath: /run/promtail name: run - mountPath: /var/lib/docker/containers name: docker readOnly: true - mountPath: /var/log/pods name: pods readOnly: true ports: - containerPort: 3101 name: http-metrics protocol: TCP securityContext: readOnlyRootFilesystem: true runAsGroup: 0 runAsUser: 0 readinessProbe: failureThreshold: 5 httpGet: path: /ready port: http-metrics scheme: HTTP initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 volumes: - name: config configMap: defaultMode: 420 name: loki-promtail - name: run hostPath: path: /run/promtail type: "" - name: docker hostPath: path: /var/lib/docker/containers - name: pods hostPath: path: /var/log/pods |