Skip to content

Latest commit

 

History

History
executable file
·
202 lines (151 loc) · 4.41 KB

File metadata and controls

executable file
·
202 lines (151 loc) · 4.41 KB

監控 Docker 狀態

現狀

# 本機狀態
docker stats

# 多容器狀態
docker compose stats

長期

node-exporter

docker host 狀態

services:
  node_exporter:
    image: quay.io/prometheus/node-exporter:latest
    container_name: node_exporter
    # 確保容器在退出或 Docker 服務重啟時自動重啟
    restart: always

    # 1. 網路模式:使用主機的網路堆棧
    # 這是為了讓 Node Exporter 能正確收集主機的網路指標,並直接使用主機的 9100 Port
    network_mode: host
    
    # 2. PID 模式:存取主機的程序資訊
    # 這是為了讓 Node Exporter 能正確收集程序數量和系統負載等指標
    pid: host

    # 3. 檔案系統掛載:存取系統資訊
    # 這是為了讓 Node Exporter 能讀取 /proc, /sys 等關鍵系統檔案
    volumes:
      # 將主機根目錄掛載到容器內的 /host,只讀模式
      - "/:/host:ro,rslave"
    
    # 4. 啟動指令:告知 Exporter 根目錄的位置
    command:
      - '--path.rootfs=/host'

cadvisor

container 狀態

services:
  cadvisor:
    image: ghcr.io/google/cadvisor:0.56.2
    container_name: cadvisor
    volumes:
      # 必須配置的 Volumes
      - /:/rootfs:ro
      - /var/run:/var/run:rw
      - /sys:/sys:ro
      - /var/lib/docker/:/var/lib/docker:ro
      - /dev/disk/:/dev/disk:ro
    network_mode: host
    #pid: host
    #ports:
      # 將 8080 端口暴露出來,這是 Prometheus 採集數據的接口
    #  - '8080:8080' 
    restart: always
    privileged: true
    devices:
      - /dev/kmsg

prometheus

收集數據 指標收集與儲存

services:
  prometheus:
    image: prom/prometheus:latest
    container_name: prometheus
    volumes:
      # 載入配置文件
      - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
      # 數據持久化
      - prometheus_data:/prometheus
    command:
      # 設定數據保留時間為 90 天
      - '--storage.tsdb.retention.time=90d'
      - '--config.file=/etc/prometheus/prometheus.yml'
    #ports:
    #  - "9090:9090" # 暴露 Prometheus UI 埠
    #networks:
    #  - monitor-net
    network_mode: host
    restart: unless-stopped
volumes:
  prometheus_data: {}

prometheus.yml

# prometheus/prometheus.yml
global:
  scrape_interval: 15s # 每 15 秒拉取一次數據
  external_labels:
    monitor: 'docker-cluster-monitor'

scrape_configs:
  # 監控 Prometheus 自身的指標 (用於自我健康檢查)
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']
  # 監控 Docker 主機的指標 (Node Exporter)
  - job_name: 'docker_nodes'
    static_configs:
      - targets:
          - '192.168.50.123:9100'
          # ... 其他主機
  # 新增一個 Job 來採集 cAdvisor 的指標
  - job_name: 'cadvisor'
    metrics_path: /metrics
    static_configs:
      - targets:
          - '192.168.50.123:8080'
          # ... 其他主機

!!! 修改 prometheus.yml 後, 需要 reload

docker kill -s HUP <container_id>

!!! 檢查 prometheus.yml 語法

docker compose exec prometheus promtool check config /etc/prometheus/prometheus.yml

grafana

數據檢視

services:
  grafana:
    image: grafana/grafana:latest
    container_name: grafana
    volumes:
      # 數據持久化,用於儲存用戶、設置、和修改後的儀表板
      - grafana_data:/var/lib/grafana
    environment:
      # 預設管理員帳號密碼 (請務必修改密碼)
      - GF_SECURITY_ADMIN_USER=admin
      - GF_SECURITY_ADMIN_PASSWORD=icYwJBq6rRFwz0u9Nft9
      # 讓 Grafana 在啟動時能自動連接 Prometheus (可選,通常建議手動配置)
      - GF_PATHS_PROVISIONING=/etc/grafana/provisioning
    depends_on:
      - prometheus # 確保 Prometheus 先啟動
    #ports:
    #  - "3000:3000" # 暴露 Grafana UI 埠
    #networks:
    #  - monitor-net
    network_mode: host
    restart: unless-stopped
volumes:
  grafana_data: {}

檢視模版

https://grafana.com/grafana/dashboards/

!!! node-exporter

https://grafana.com/grafana/dashboards/1860-node-exporter-full/

https://grafana.com/grafana/dashboards/16098-node-exporter-dashboard-20240520-job/

!!! cadvisor

https://grafana.com/grafana/dashboards/19724-y0nei-s-cadvisor-exporter/