diff --git a/.codacy.yml b/.codacy.yml
index a358bf92739100..cebc95f8193627 100644
--- a/.codacy.yml
+++ b/.codacy.yml
@@ -10,9 +10,16 @@ exclude_paths:
- web/gui/lib/**
- web/gui/old/**
- web/gui/src/**
+ - web/gui/v1/**
- web/gui/v2/**
- web/gui/main.js
- tests/**
- aclk/tests/**
- libnetdata/libjudy/**
-
+ - database/sqlite/sqlite3.c
+ - ml/dlib/**
+ - web/server/h2o/libh2o/**
+ - build/**
+ - build_external/**
+ - libnetdata/dyn_conf/tests/**
+ - packaging/**
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index bc4fd0b32988cd..7f368ceb7e4bed 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -19,11 +19,11 @@ collectors/cups.plugin/ @thiagoftsm
exporting/ @thiagoftsm
daemon/ @thiagoftsm @vkalintiris
database/ @thiagoftsm @vkalintiris
-docs/ @tkatsoulas @andrewm4894 @Ancairon
+docs/ @tkatsoulas @Ancairon
health/ @thiagoftsm @vkalintiris @MrZammler
health/health.d/ @thiagoftsm @MrZammler
health/notifications/ @Ferroin @thiagoftsm @MrZammler
-ml/ @andrewm4894 @vkalintiris
+ml/ @vkalintiris
libnetdata/ @thiagoftsm @vkalintiris
packaging/ @Ferroin @tkatsoulas
registry/ @novykh
@@ -32,11 +32,12 @@ system/ @Ferroin @tkatsoulas
tests/ @Ferroin @vkalintiris @tkatsoulas
web/ @thiagoftsm @vkalintiris
web/gui/ @novykh
+logsmanagement/ @Dim-P @thiagoftsm
# Ownership by filetype (overwrites ownership by directory)
*.am @Ferroin @tkatsoulas
-*.md @tkatsoulas @andrewm4894 @Ancairon
-*.mdx @tkatsoulas @andrewm4894 @Ancairon
+*.md @tkatsoulas @Ancairon
+*.mdx @tkatsoulas @Ancairon
Dockerfile* @Ferroin @tkatsoulas
# Ownership of specific files
diff --git a/.github/data/distros.yml b/.github/data/distros.yml
index cdd0faf06f80c5..9175a5c7308731 100644
--- a/.github/data/distros.yml
+++ b/.github/data/distros.yml
@@ -44,11 +44,6 @@ include:
support_type: Intermediate
notes: ''
eol_check: true
- - <<: *alpine
- version: "3.15"
- support_type: Intermediate
- notes: ''
- eol_check: true
- distro: archlinux
version: latest
@@ -60,34 +55,6 @@ include:
test:
ebpf-core: true
- - &alma
- distro: almalinux
- version: "9"
- support_type: Core
- notes: ''
- jsonc_removal: |
- dnf remove -y json-c-devel
- eol_check: true
- packages: &alma_packages
- type: rpm
- repo_distro: el/9
- alt_links:
- - el/9Server
- - el/9Client
- arches:
- - x86_64
- - aarch64
- test:
- ebpf-core: true
- - <<: *alma
- version: "8"
- packages:
- <<: *alma_packages
- repo_distro: el/8
- alt_links:
- - el/8Server
- - el/8Client
-
- &amzn
distro: amazonlinux
version: "2"
@@ -108,7 +75,6 @@ include:
<<: *amzn_packages
repo_distro: amazonlinux/2023
-
- distro: centos
version: "7"
support_type: Core
@@ -125,6 +91,30 @@ include:
test:
ebpf-core: false
+ - ¢os_stream
+ distro: centos-stream
+ base_image: 'quay.io/centos/centos:stream9'
+ version: '9'
+ support_type: 'Community'
+ notes: ''
+ jsonc_removal: |
+ dnf remove -y json-c-devel
+ eol_check: true
+ packages: &cs_packages
+ type: rpm
+ repo_distro: el/c9s
+ arches:
+ - x86_64
+ - aarch64
+ test:
+ ebpf-core: true
+ - <<: *centos_stream
+ version: '8'
+ base_image: 'quay.io/centos/centos:stream8'
+ packages:
+ <<: *cs_packages
+ repo_distro: el/c8s
+
- &debian
distro: debian
version: "12"
@@ -165,7 +155,7 @@ include:
- &fedora
distro: fedora
- version: "38"
+ version: "39"
support_type: Core
notes: ''
eol_check: true
@@ -173,12 +163,19 @@ include:
dnf remove -y json-c-devel
packages: &fedora_packages
type: rpm
- repo_distro: fedora/38
+ repo_distro: fedora/39
arches:
- x86_64
- aarch64
test:
ebpf-core: true
+ - <<: *fedora
+ version: "38"
+ packages:
+ <<: *fedora_packages
+ repo_distro: fedora/38
+ test:
+ ebpf-core: true
- <<: *fedora
version: "37"
packages:
@@ -198,7 +195,7 @@ include:
zypper rm -y libjson-c-devel
packages: &opensuse_packages
type: rpm
- repo_distro: opensuse/leap:15.5
+ repo_distro: opensuse/15.5
arches:
- x86_64
- aarch64
@@ -235,6 +232,36 @@ include:
<<: *oracle_packages
repo_distro: ol/9
+ - &rocky
+ distro: rockylinux
+ version: "9"
+ support_type: Core
+ notes: ''
+ jsonc_removal: |
+ dnf remove -y json-c-devel
+ eol_check: true
+ packages: &rocky_packages
+ type: rpm
+ repo_distro: el/9
+ alt_links:
+ - el/9Server
+ - el/9Client
+ - el/9RedHatVirtualizationHost
+ arches:
+ - x86_64
+ - aarch64
+ test:
+ ebpf-core: true
+ - <<: *rocky
+ version: "8"
+ packages:
+ <<: *rocky_packages
+ repo_distro: el/8
+ alt_links:
+ - el/8Server
+ - el/8Client
+ - el/8RedHatVirtualizationHost
+
- &ubuntu
distro: ubuntu
version: "22.04"
@@ -254,6 +281,11 @@ include:
- arm64
test:
ebpf-core: true
+ - <<: *ubuntu
+ version: "23.10"
+ packages:
+ <<: *ubuntu_packages
+ repo_distro: ubuntu/mantic
- <<: *ubuntu
version: "23.04"
packages:
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 1dd4d472a18b5a..0ea825ef4bc074 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -153,3 +153,6 @@ area/tests:
area/web:
- web/**
+
+area/logs-management:
+ - logsmanagement/**
diff --git a/.github/scripts/check_latest_versions.py b/.github/scripts/check_latest_versions.py
new file mode 100755
index 00000000000000..67b11f8d54e614
--- /dev/null
+++ b/.github/scripts/check_latest_versions.py
@@ -0,0 +1,33 @@
+import sys
+import os
+import modules.version_manipulation as ndvm
+import modules.github_actions as cigh
+
+
+def main(command_line_args):
+ """
+ Inputs: Single version or multiple versions
+ Outputs:
+ Create files with the versions that needed update under temp_dir/staging-new-releases
+ Setting the GitHub outputs, 'versions_needs_update' to 'true'
+ """
+ versions = [str(arg) for arg in command_line_args]
+ # Create a temp output folder for the release that need update
+ staging = os.path.join(os.environ.get('TMPDIR', '/tmp'), 'staging-new-releases')
+ os.makedirs(staging, exist_ok=True)
+ for version in versions:
+ temp_value = ndvm.compare_version_with_remote(version)
+ if temp_value:
+ path, filename = ndvm.get_release_path_and_filename(version)
+ release_path = os.path.join(staging, path)
+ os.makedirs(release_path, exist_ok=True)
+ file_release_path = os.path.join(release_path, filename)
+ with open(file_release_path, "w") as file:
+ print("Creating local copy of the release version update at: ", file_release_path)
+ file.write(version)
+ if cigh.run_as_github_action():
+ cigh.update_github_output("versions_needs_update", "true")
+
+
+if __name__ == "__main__":
+ main(sys.argv[1:])
diff --git a/.github/scripts/check_latest_versions_per_channel.py b/.github/scripts/check_latest_versions_per_channel.py
new file mode 100644
index 00000000000000..885e5a98cc4a5c
--- /dev/null
+++ b/.github/scripts/check_latest_versions_per_channel.py
@@ -0,0 +1,9 @@
+import check_latest_versions
+import modules.version_manipulation as ndvm
+import sys
+
+if __name__ == "__main__":
+ channel = sys.argv[1]
+ sorted_agents_by_major = ndvm.sort_and_grouby_major_agents_of_channel(channel)
+ latest_per_major = [values[0] for values in sorted_agents_by_major.values()]
+ check_latest_versions.main(latest_per_major)
diff --git a/.github/scripts/ci-support-pkgs.sh b/.github/scripts/ci-support-pkgs.sh
index 9ba11b68ee87f7..5cedbf3b9e02ca 100755
--- a/.github/scripts/ci-support-pkgs.sh
+++ b/.github/scripts/ci-support-pkgs.sh
@@ -9,7 +9,8 @@ set -e
case "${ID}" in
amzn|almalinux|centos|fedora)
- dnf install -y procps-ng cronie cronie-anacron || yum install -y procps-ng cronie cronie-anacron
+ dnf install -y procps-ng cronie cronie-anacron || \
+ yum install -y procps-ng cronie cronie-anacron
;;
arch)
pacman -S --noconfirm cronie
diff --git a/.github/scripts/get-static-cache-key.sh b/.github/scripts/get-static-cache-key.sh
index 3b07088f474f09..5093b332796332 100755
--- a/.github/scripts/get-static-cache-key.sh
+++ b/.github/scripts/get-static-cache-key.sh
@@ -2,13 +2,14 @@
arch="${1}"
platform="$(packaging/makeself/uname2platform.sh "${arch}")"
+builder_rev="v1"
-docker pull --platform "${platform}" netdata/static-builder
+docker pull --platform "${platform}" netdata/static-builder:${builder_rev}
# shellcheck disable=SC2046
cat $(find packaging/makeself/jobs -type f ! -regex '.*\(netdata\|-makeself\).*') > /tmp/static-cache-key-data
-docker run -it --rm --platform "${platform}" netdata/static-builder sh -c 'apk list -I 2>/dev/null' >> /tmp/static-cache-key-data
+docker run -it --rm --platform "${platform}" netdata/static-builder:${builder_rev} sh -c 'apk list -I 2>/dev/null' >> /tmp/static-cache-key-data
h="$(sha256sum /tmp/static-cache-key-data | cut -f 1 -d ' ')"
diff --git a/.github/scripts/modules/github_actions.py b/.github/scripts/modules/github_actions.py
new file mode 100644
index 00000000000000..1d653a77b01005
--- /dev/null
+++ b/.github/scripts/modules/github_actions.py
@@ -0,0 +1,27 @@
+import os
+
+
+def update_github_env(key, value):
+ try:
+ env_file = os.getenv('GITHUB_ENV')
+ print(env_file)
+ with open(env_file, "a") as file:
+ file.write(f"{key}={value}")
+ print(f"Updated GITHUB_ENV with {key}={value}")
+ except Exception as e:
+ print(f"Error updating GITHUB_ENV. Error: {e}")
+
+
+def update_github_output(key, value):
+ try:
+ env_file = os.getenv('GITHUB_OUTPUT')
+ print(env_file)
+ with open(env_file, "a") as file:
+ file.write(f"{key}={value}")
+ print(f"Updated GITHUB_OUTPUT with {key}={value}")
+ except Exception as e:
+ print(f"Error updating GITHUB_OUTPUT. Error: {e}")
+
+
+def run_as_github_action():
+ return os.environ.get('GITHUB_ACTIONS') == 'true'
diff --git a/.github/scripts/modules/requirements.txt b/.github/scripts/modules/requirements.txt
new file mode 100644
index 00000000000000..fbec796fa5d698
--- /dev/null
+++ b/.github/scripts/modules/requirements.txt
@@ -0,0 +1 @@
+PyGithub==2.1.1
diff --git a/.github/scripts/modules/version_manipulation.py b/.github/scripts/modules/version_manipulation.py
new file mode 100644
index 00000000000000..cc346fb54d28da
--- /dev/null
+++ b/.github/scripts/modules/version_manipulation.py
@@ -0,0 +1,141 @@
+import os
+import re
+import requests
+from itertools import groupby
+from github import Github
+from github.GithubException import GithubException
+
+repos_URL = {
+ "stable": "netdata/netdata",
+ "nightly": "netdata/netdata-nightlies"
+}
+
+GH_TOKEN = os.getenv("GH_TOKEN")
+if GH_TOKEN is None or GH_TOKEN != "":
+ print("Token is not defined or empty, continuing with limitation on requests per sec towards Github API")
+
+
+def identify_channel(_version):
+ nightly_pattern = r'v(\d+)\.(\d+)\.(\d+)-(\d+)-nightly'
+ stable_pattern = r'v(\d+)\.(\d+)\.(\d+)'
+ if re.match(nightly_pattern, _version):
+ _channel = "nightly"
+ _pattern = nightly_pattern
+ elif re.match(stable_pattern, _version):
+ _channel = "stable"
+ _pattern = stable_pattern
+ else:
+ print("Invalid version format.")
+ return None
+ return _channel, _pattern
+
+
+def padded_version(item):
+ key_value = '10000'
+ for value in item[1:]:
+ key_value += f'{value:05}'
+ return int(key_value)
+
+
+def extract_version(title):
+ if identify_channel(title):
+ _, _pattern = identify_channel(title)
+ try:
+ match = re.match(_pattern, title)
+ if match:
+ return tuple(map(int, match.groups()))
+ except Exception as e:
+ print(f"Unexpected error: {e}")
+ return None
+
+
+def get_release_path_and_filename(_version):
+ nightly_pattern = r'v(\d+)\.(\d+)\.(\d+)-(\d+)-nightly'
+ stable_pattern = r'v(\d+)\.(\d+)\.(\d+)'
+ if match := re.match(nightly_pattern, _version):
+ msb = match.group(1)
+ _path = "nightly"
+ _filename = f"v{msb}"
+ elif match := re.match(stable_pattern, _version):
+ msb = match.group(1)
+ _path = "stable"
+ _filename = f"v{msb}"
+ else:
+ print("Invalid version format.")
+ exit(1)
+ return (_path, _filename)
+
+
+def compare_version_with_remote(version):
+ """
+ If the version = fun (version) you need to update the version in the
+ remote. If the version remote doesn't exist, returns the version
+ :param channel: any version of the agent
+ :return: the greater from version and version remote.
+ """
+
+ prefix = "https://packages.netdata.cloud/releases"
+ path, filename = get_release_path_and_filename(version)
+
+ remote_url = f"{prefix}/{path}/{filename}"
+ response = requests.get(remote_url)
+
+ if response.status_code == 200:
+ version_remote = response.text.rstrip()
+
+ version_components = extract_version(version)
+ remote_version_components = extract_version(version_remote)
+
+ absolute_version = padded_version(version_components)
+ absolute_remote_version = padded_version(remote_version_components)
+
+ if absolute_version > absolute_remote_version:
+ print(f"Version in the remote: {version_remote}, is older than the current: {version}, I need to update")
+ return (version)
+ else:
+ print(f"Version in the remote: {version_remote}, is newer than the current: {version}, no action needed")
+ return (None)
+ else:
+ # Remote version not found
+ print(f"Version in the remote not found, updating the predefined latest path with the version: {version}")
+ return (version)
+
+
+def sort_and_grouby_major_agents_of_channel(channel):
+ """
+ Fetches the GH API and read either netdata/netdata or netdata/netdata-nightlies repo. It fetches all of their
+ releases implements a grouping by their major release number.
+ Every k,v in this dictionary is in the form; "vX": [descending ordered list of Agents in this major release].
+ :param channel: "nightly" or "stable"
+ :return: None or dict() with the Agents grouped by major version # (vX)
+ """
+ try:
+ G = Github(GH_TOKEN)
+ repo = G.get_repo(repos_URL[channel])
+ releases = repo.get_releases()
+ except GithubException as e:
+ print(f"GitHub API request failed: {e}")
+ return None
+
+ except Exception as e:
+ print(f"An unexpected error occurred: {e}")
+ return None
+
+ extracted_titles = [extract_version(item.title) for item in releases if
+ extract_version(item.title) is not None]
+ # Necessary sorting for implement the group by
+ extracted_titles.sort(key=lambda x: x[0])
+ # Group titles by major version
+ grouped_by_major = {major: list(group) for major, group in groupby(extracted_titles, key=lambda x: x[0])}
+ sorted_grouped_by_major = {}
+ for key, values in grouped_by_major.items():
+ sorted_values = sorted(values, key=padded_version, reverse=True)
+ sorted_grouped_by_major[key] = sorted_values
+ # Transform them in the correct form
+ if channel == "stable":
+ result_dict = {f"v{key}": [f"v{a}.{b}.{c}" for a, b, c in values] for key, values in
+ sorted_grouped_by_major.items()}
+ else:
+ result_dict = {f"v{key}": [f"v{a}.{b}.{c}-{d}-nightly" for a, b, c, d in values] for key, values in
+ sorted_grouped_by_major.items()}
+ return result_dict
diff --git a/.github/scripts/pkg-test.sh b/.github/scripts/pkg-test.sh
index 85e8b2e8d2cdaa..35767bf2ef2e4c 100755
--- a/.github/scripts/pkg-test.sh
+++ b/.github/scripts/pkg-test.sh
@@ -14,7 +14,9 @@ install_debian_like() {
# Install Netdata
# Strange quoting is required here so that glob matching works.
- apt-get install -y $(find /netdata/artifacts -type f -name 'netdata*.deb' ! -name '*dbgsym*' ! -name '*cups*' ! -name '*freeipmi*') || exit 3
+ # shellcheck disable=SC2046
+ apt-get install -y $(find /netdata/artifacts -type f -name 'netdata*.deb' \
+! -name '*dbgsym*' ! -name '*cups*' ! -name '*freeipmi*') || exit 3
# Install testing tools
apt-get install -y --no-install-recommends curl "${netcat}" jq || exit 1
@@ -32,10 +34,10 @@ install_fedora_like() {
# Install Netdata
# Strange quoting is required here so that glob matching works.
- "$PKGMGR" install -y /netdata/artifacts/netdata*.rpm || exit 1
+ "${PKGMGR}" install -y /netdata/artifacts/netdata*.rpm || exit 1
# Install testing tools
- "$PKGMGR" install -y curl nc jq || exit 1
+ "${PKGMGR}" install -y curl nc jq || exit 1
}
install_centos() {
@@ -49,15 +51,15 @@ install_centos() {
fi
# Install EPEL (needed for `jq`
- "$PKGMGR" install -y epel-release || exit 1
+ "${PKGMGR}" install -y epel-release || exit 1
# Install Netdata
# Strange quoting is required here so that glob matching works.
- "$PKGMGR" install -y /netdata/artifacts/netdata*.rpm || exit 1
+ "${PKGMGR}" install -y /netdata/artifacts/netdata*.rpm || exit 1
# Install testing tools
# shellcheck disable=SC2086
- "$PKGMGR" install -y ${opts} curl nc jq || exit 1
+ "${PKGMGR}" install -y ${opts} curl nc jq || exit 1
}
install_amazon_linux() {
@@ -69,11 +71,11 @@ install_amazon_linux() {
# Install Netdata
# Strange quoting is required here so that glob matching works.
- "$PKGMGR" install -y /netdata/artifacts/netdata*.rpm || exit 1
+ "${PKGMGR}" install -y /netdata/artifacts/netdata*.rpm || exit 1
# Install testing tools
# shellcheck disable=SC2086
- "$PKGMGR" install -y ${opts} curl nc jq || exit 1
+ "${PKGMGR}" install -y ${opts} curl nc jq || exit 1
}
install_suse_like() {
@@ -130,7 +132,7 @@ case "${DISTRO}" in
fedora | oraclelinux)
install_fedora_like
;;
- centos | rockylinux | almalinux)
+ centos| centos-stream | rockylinux | almalinux)
install_centos
;;
amazonlinux)
diff --git a/.github/scripts/upload-new-version-tags.sh b/.github/scripts/upload-new-version-tags.sh
new file mode 100755
index 00000000000000..a9b0cd303099dc
--- /dev/null
+++ b/.github/scripts/upload-new-version-tags.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+set -e
+
+host="packages.netdata.cloud"
+user="netdatabot"
+
+prefix="/var/www/html/releases"
+staging="${TMPDIR:-/tmp}/staging-new-releases"
+
+mkdir -p "${staging}"
+
+for source_dir in "${staging}"/*; do
+ if [ -d "${source_dir}" ]; then
+ base_name=$(basename "${source_dir}")
+ scp -r "${source_dir}"/* "${user}@${host}:${prefix}/${base_name}"
+ fi
+done
diff --git a/.github/workflows/build-dummy.yml b/.github/workflows/build-dummy.yml
deleted file mode 100644
index 6bf327e2d09c00..00000000000000
--- a/.github/workflows/build-dummy.yml
+++ /dev/null
@@ -1,127 +0,0 @@
----
-# Ci code for building release artifacts.
-#
-# This workflow exists so we can require these checks to pass, but skip
-# them on PRs that have nothing to do with the source code.
-name: Build
-on:
- pull_request: # PR checks only validate the build and generate artifacts for testing.
- paths-ignore: # This MUST be kept in-sync with the paths-ignore key for the build-dummy.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - 'configure.ac'
- - 'netdata-installer.sh'
- - '**/Makefile*'
- - 'Makefile*'
- - '.github/workflows/build.yml'
- - '.github/scripts/build-static.sh'
- - '.github/scripts/get-static-cache-key.sh'
- - '.github/scripts/gen-matrix-build.py'
- - '.github/scripts/run-updater-check.sh'
- - 'build/**'
- - 'packaging/makeself/**'
- - 'packaging/installer/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
- - '!**.md'
-concurrency: # This keeps multiple instances of the job from running concurrently for the same ref and event type.
- group: build-${{ github.ref }}-${{ github.event_name }}
- cancel-in-progress: true
-jobs:
- build-dist: # Build the distribution tarball and store it as an artifact.
- name: Build Distribution Tarball
- runs-on: ubuntu-latest
- steps:
- - run: echo 'NOT REQUIRED'
-
- build-static: # Build the static binary archives, and store them as artifacts.
- name: Build Static
- runs-on: ubuntu-latest
- strategy:
- matrix:
- arch:
- - x86_64
- - armv7l
- - aarch64
- - ppc64le
- steps:
- - run: echo 'NOT REQUIRED'
-
- matrix: # Generate the shared build matrix for our build tests.
- name: Prepare Build Matrix
- runs-on: ubuntu-latest
- outputs:
- matrix: ${{ steps.set-matrix.outputs.matrix }}
- steps:
- - name: Checkout
- id: checkout
- uses: actions/checkout@v3
- - name: Prepare tools
- id: prepare
- run: |
- sudo apt-get update && sudo apt-get install -y python3-ruamel.yaml
- - name: Read build matrix
- id: set-matrix
- run: |
- matrix="$(.github/scripts/gen-matrix-build.py)"
- echo "Generated matrix: ${matrix}"
- echo "matrix=${matrix}" >> "${GITHUB_OUTPUT}"
-
- prepare-test-images: # Prepare the test environments for our build checks. This also checks dependency handling code for each tested environment.
- name: Prepare Test Environments
- runs-on: ubuntu-latest
- needs:
- - matrix
- env:
- RETRY_DELAY: 300
- strategy:
- fail-fast: false
- matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
- steps:
- - run: echo 'NOT REQUIRED'
-
- source-build: # Test various source build arrangements.
- name: Test Source Build
- runs-on: ubuntu-latest
- needs:
- - matrix
- strategy:
- fail-fast: false
- matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
- steps:
- - run: echo 'NOT REQUIRED'
-
- updater-check: # Test the generated dist archive using the updater code.
- name: Test Generated Distfile and Updater Code
- runs-on: ubuntu-latest
- needs:
- - matrix
- strategy:
- fail-fast: false
- matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
- steps:
- - run: echo 'NOT REQUIRED'
-
- prepare-upload: # Consolidate the artifacts for uploading or releasing.
- name: Prepare Artifacts
- runs-on: ubuntu-latest
- steps:
- - run: echo 'NOT REQUIRED'
-
- artifact-verification-dist: # Verify the regular installer works with the consolidated artifacts.
- name: Test Consolidated Artifacts (Source)
- runs-on: ubuntu-latest
- steps:
- - run: echo 'NOT REQUIRED'
-
- artifact-verification-static: # Verify the static installer works with the consolidated artifacts.
- name: Test Consolidated Artifacts (Static)
- runs-on: ubuntu-latest
- steps:
- - run: echo 'NOT REQUIRED'
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 180574a3c299fa..4a6debc4696857 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -5,31 +5,7 @@ on:
push: # Master branch checks only validate the build and generate artifacts for testing.
branches:
- master
- pull_request: # PR checks only validate the build and generate artifacts for testing.
- paths: # This MUST be kept in-sync with the paths-ignore key for the build-dummy.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - 'configure.ac'
- - 'netdata-installer.sh'
- - '**/Makefile*'
- - 'Makefile*'
- - '.github/workflows/build.yml'
- - '.github/scripts/build-static.sh'
- - '.github/scripts/get-static-cache-key.sh'
- - '.github/scripts/gen-matrix-build.py'
- - '.github/scripts/run-updater-check.sh'
- - 'build/**'
- - 'packaging/makeself/**'
- - 'packaging/installer/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
- - '!**.md'
+ pull_request: null # PR checks only validate the build and generate artifacts for testing.
workflow_dispatch: # Dispatch runs build and validate, then push to the appropriate storage location.
inputs:
type:
@@ -44,30 +20,90 @@ concurrency: # This keeps multiple instances of the job from running concurrentl
group: build-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
+ file-check: # Check what files changed if we’re being run in a PR or on a push.
+ name: Check Modified Files
+ runs-on: ubuntu-latest
+ outputs:
+ run: ${{ steps.check-run.outputs.run }}
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ submodules: recursive
+ - name: Check files
+ id: check-files
+ uses: tj-actions/changed-files@v40
+ with:
+ since_last_remote_commit: ${{ github.event_name != 'pull_request' }}
+ files: |
+ **.c
+ **.cc
+ **.h
+ **.hh
+ **.in
+ configure.ac
+ netdata-installer.sh
+ **/Makefile*
+ Makefile*
+ .github/data/distros.yml
+ .github/workflows/build.yml
+ .github/scripts/build-static.sh
+ .github/scripts/get-static-cache-key.sh
+ .github/scripts/gen-matrix-build.py
+ .github/scripts/run-updater-check.sh
+ build/**
+ packaging/makeself/**
+ packaging/installer/**
+ aclk/aclk-schemas/
+ ml/dlib/
+ mqtt_websockets
+ web/server/h2o/libh2o
+ files_ignore: |
+ netdata.spec.in
+ **.md
+ - name: Check Run
+ id: check-run
+ run: |
+ if [ "${{ steps.check-files.outputs.any_modified }}" == "true" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+ echo 'run=true' >> "${GITHUB_OUTPUT}"
+ else
+ echo 'run=false' >> "${GITHUB_OUTPUT}"
+ fi
+
build-dist: # Build the distribution tarball and store it as an artifact.
name: Build Distribution Tarball
runs-on: ubuntu-latest
+ needs:
+ - file-check
outputs:
distfile: ${{ steps.build.outputs.distfile }}
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Fix tags
id: fix-tags
- if: github.event_name != 'push'
+ if: github.event_name != 'push' && needs.file-check.outputs.run == 'true'
run: |
git fetch --tags --force
- name: Mark Stable
id: channel
- if: github.event_name == 'workflow_dispatch' && github.event.inputs.type != 'nightly'
+ if: github.event_name == 'workflow_dispatch' && github.event.inputs.type != 'nightly' && needs.file-check.outputs.run == 'true'
run: |
sed -i 's/^RELEASE_CHANNEL="nightly"/RELEASE_CHANNEL="stable"/' netdata-installer.sh
- name: Build
id: build
+ if: needs.file-check.outputs.run == 'true'
run: |
git describe
mkdir -p artifacts
@@ -85,6 +121,7 @@ jobs:
cp netdata-*.tar.gz artifacts/
- name: Store
id: store
+ if: needs.file-check.outputs.run == 'true'
uses: actions/upload-artifact@v3
with:
name: dist-tarball
@@ -112,11 +149,14 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
build-static: # Build the static binary archives, and store them as artifacts.
name: Build Static
runs-on: ubuntu-latest
+ needs:
+ - file-check
strategy:
matrix:
arch:
@@ -125,38 +165,43 @@ jobs:
- aarch64
- ppc64le
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
- name: Fix tags
id: fix-tags
- if: github.event_name != 'push'
+ if: github.event_name != 'push' && needs.file-check.outputs.run == 'true'
run: |
git fetch --tags --force
- name: Mark Stable
id: channel
- if: github.event_name == 'workflow_dispatch' && github.event.inputs.type != 'nightly'
+ if: github.event_name == 'workflow_dispatch' && github.event.inputs.type != 'nightly' && needs.file-check.outputs.run == 'true'
run: |
sed -i 's/^RELEASE_CHANNEL="nightly"/RELEASE_CHANNEL="stable"/' netdata-installer.sh packaging/makeself/install-or-update.sh
- name: Get Cache Key
- if: github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'run-ci/no-cache')
+ if: (github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'run-ci/no-cache')) && needs.file-check.outputs.run == 'true'
id: cache-key
run: .github/scripts/get-static-cache-key.sh ${{ matrix.arch }} "${{ contains(github.event.pull_request.labels.*.name, 'run-ci/no-cache') }}"
- name: Cache
- if: github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'run-ci/no-cache')
+ if: (github.event_name != 'pull_request' || ! contains(github.event.pull_request.labels.*.name, 'run-ci/no-cache')) && needs.file-check.outputs.run == 'true'
id: cache
uses: actions/cache@v3
with:
path: artifacts/cache
key: ${{ steps.cache-key.outputs.key }}
- name: Build
- if: github.event_name != 'workflow_dispatch' # Don’t use retries on PRs.
+ if: github.event_name != 'workflow_dispatch' && needs.file-check.outputs.run == 'true' # Don’t use retries on PRs.
run: .github/scripts/build-static.sh ${{ matrix.arch }}
- name: Build
- if: github.event_name == 'workflow_dispatch'
+ if: github.event_name == 'workflow_dispatch' && needs.file-check.outputs.run == 'true'
id: build
uses: nick-fields/retry@v2
with:
@@ -165,6 +210,7 @@ jobs:
command: .github/scripts/build-static.sh ${{ matrix.arch }}
- name: Store
id: store
+ if: needs.file-check.outputs.run == 'true'
uses: actions/upload-artifact@v3
with:
name: static-archive
@@ -192,6 +238,7 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
matrix: # Generate the shared build matrix for our build tests.
@@ -203,7 +250,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Prepare tools
id: prepare
run: |
@@ -252,13 +299,13 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Setup Buildx
id: buildx
- uses: docker/setup-buildx-action@v2
+ uses: docker/setup-buildx-action@v3
- name: Build test environment
id: build1
- uses: docker/build-push-action@v4
+ uses: docker/build-push-action@v5
continue-on-error: true # We retry 3 times at 5 minute intervals if there is a failure here.
with:
push: false
@@ -276,7 +323,7 @@ jobs:
- name: Build test environment (attempt 2)
if: ${{ steps.build1.outcome == 'failure' }}
id: build2
- uses: docker/build-push-action@v4
+ uses: docker/build-push-action@v5
continue-on-error: true # We retry 3 times at 5 minute intervals if there is a failure here.
with:
push: false
@@ -294,7 +341,7 @@ jobs:
- name: Build test environment (attempt 3)
if: ${{ steps.build1.outcome == 'failure' && steps.build2.outcome == 'failure' }}
id: build3
- uses: docker/build-push-action@v4
+ uses: docker/build-push-action@v5
with:
push: false
load: false
@@ -344,42 +391,53 @@ jobs:
needs:
- matrix
- prepare-test-images
+ - file-check
strategy:
fail-fast: false
max-parallel: 8
matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Fetch test environment
id: fetch
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: ${{ matrix.artifact_key }}-test-env
- name: Load test environment
id: load
+ if: needs.file-check.outputs.run == 'true'
run: docker load --input image.tar
- name: Regular build on ${{ matrix.distro }}
id: build-basic
+ if: needs.file-check.outputs.run == 'true'
run: |
docker run --security-opt seccomp=unconfined -w /netdata test:${{ matrix.artifact_key }} \
/bin/sh -c 'autoreconf -ivf && ./configure --disable-dependency-tracking && make -j2'
- name: netdata-installer on ${{ matrix.distro }}, disable cloud
id: build-no-cloud
+ if: needs.file-check.outputs.run == 'true'
run: |
docker run --security-opt seccomp=unconfined -w /netdata test:${{ matrix.artifact_key }} \
/bin/sh -c './netdata-installer.sh --dont-wait --dont-start-it --disable-cloud --one-time-build'
- name: netdata-installer on ${{ matrix.distro }}, require cloud
id: build-cloud
+ if: needs.file-check.outputs.run == 'true'
run: |
docker run --security-opt seccomp=unconfined -w /netdata test:${{ matrix.artifact_key }} \
/bin/sh -c './netdata-installer.sh --dont-wait --dont-start-it --require-cloud --one-time-build'
- name: netdata-installer on ${{ matrix.distro }}, require cloud, no JSON-C
id: build-no-jsonc
- if: matrix.jsonc_removal != ''
+ if: matrix.jsonc_removal != '' && needs.file-check.outputs.run == 'true'
run: |
docker run --security-opt seccomp=unconfined -w /netdata test:${{ matrix.artifact_key }} \
/bin/sh -c '/rmjsonc.sh && ./netdata-installer.sh --dont-wait --dont-start-it --require-cloud --one-time-build'
@@ -407,6 +465,7 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
updater-check: # Test the generated dist archive using the updater code.
@@ -417,6 +476,7 @@ jobs:
- build-dist
- matrix
- prepare-test-images
+ - file-check
strategy:
fail-fast: false
max-parallel: 8
@@ -429,17 +489,24 @@ jobs:
volumes:
- ${{ github.workspace }}:/usr/local/apache2/htdocs/
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
- name: Fetch dist tarball artifacts
id: fetch-tarball
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: dist-tarball
path: dist-tarball
- name: Prepare artifact directory
id: prepare
+ if: needs.file-check.outputs.run == 'true'
run: |
mkdir -p artifacts/download/latest || exit 1
echo "9999.0.0-0" > artifacts/download/latest/latest-version.txt || exit 1
@@ -450,14 +517,17 @@ jobs:
cat sha256sums.txt
- name: Fetch test environment
id: fetch-test-environment
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: ${{ matrix.artifact_key }}-test-env
- name: Load test environment
id: load
+ if: needs.file-check.outputs.run == 'true'
run: docker load --input image.tar
- name: Install netdata and run the updater on ${{ matrix.distro }}
id: updater-check
+ if: needs.file-check.outputs.run == 'true'
run: |
docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 --network host -w /netdata test:${{ matrix.artifact_key }} \
/netdata/.github/scripts/run-updater-check.sh
@@ -484,6 +554,7 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
prepare-upload: # Consolidate the artifacts for uploading or releasing.
@@ -492,27 +563,37 @@ jobs:
needs:
- build-dist
- build-static
+ - file-check
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
- name: Prepare Environment
id: prepare
+ if: needs.file-check.outputs.run == 'true'
run: mkdir -p artifacts
- name: Retrieve Dist Tarball
id: fetch-dist
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: dist-tarball
path: dist-tarball
- name: Retrieve Static Build Artifacts
id: fetch-static
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: static-archive
path: static-archive
- name: Prepare Artifacts
id: consolidate
+ if: needs.file-check.outputs.run == 'true'
working-directory: ./artifacts/
run: |
mv ../dist-tarball/* . || exit 1
@@ -524,6 +605,7 @@ jobs:
cat sha256sums.txt
- name: Store Artifacts
id: store
+ if: needs.file-check.outputs.run == 'true'
uses: actions/upload-artifact@v3
with:
name: final-artifacts
@@ -552,6 +634,7 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
artifact-verification-dist: # Verify the regular installer works with the consolidated artifacts.
@@ -559,6 +642,7 @@ jobs:
runs-on: ubuntu-latest
needs:
- prepare-upload
+ - file-check
services:
apache: # This gets used to serve the dist tarball for the updater script.
image: httpd:2.4
@@ -567,22 +651,30 @@ jobs:
volumes:
- ${{ github.workspace }}:/usr/local/apache2/htdocs/
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
- name: Fetch artifacts
id: fetch
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: final-artifacts
path: artifacts
- name: Prepare artifacts directory
id: prepare
+ if: needs.file-check.outputs.run == 'true'
run: |
mkdir -p download/latest
mv artifacts/* download/latest
- name: Verify that artifacts work with installer
id: verify
+ if: needs.file-check.outputs.run == 'true'
env:
NETDATA_TARBALL_BASEURL: http://localhost:8080/
run: packaging/installer/kickstart.sh --build-only --dont-start-it --disable-telemetry --dont-wait
@@ -606,6 +698,7 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
artifact-verification-static: # Verify the static installer works with the consolidated artifacts.
@@ -613,6 +706,7 @@ jobs:
runs-on: ubuntu-latest
needs:
- prepare-upload
+ - file-check
services:
apache: # This gets used to serve the static archives.
image: httpd:2.4
@@ -621,22 +715,30 @@ jobs:
volumes:
- ${{ github.workspace }}:/usr/local/apache2/htdocs/
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
- name: Fetch artifacts
id: fetch-artifacts
+ if: needs.file-check.outputs.run == 'true'
uses: actions/download-artifact@v3
with:
name: final-artifacts
path: artifacts
- name: Prepare artifacts directory
id: prepare
+ if: needs.file-check.outputs.run == 'true'
run: |
mkdir -p download/latest
mv artifacts/* download/latest
- name: Verify that artifacts work with installer
id: verify
+ if: needs.file-check.outputs.run == 'true'
env:
NETDATA_TARBALL_BASEURL: http://localhost:8080/
run: packaging/installer/kickstart.sh --static-only --dont-start-it --disable-telemetry
@@ -660,6 +762,7 @@ jobs:
&& startsWith(github.ref, 'refs/heads/master')
&& github.event_name != 'pull_request'
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
upload-nightly: # Upload the nightly build artifacts to GCS.
@@ -725,12 +828,12 @@ jobs:
steps:
- name: Checkout Main Repo
id: checkout-main
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
path: main
- name: Checkout Nightly Repo
id: checkout-nightly
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
repository: netdata/netdata-nightlies
path: nightlies
@@ -762,6 +865,37 @@ jobs:
makeLatest: true
tag: ${{ steps.version.outputs.version }}
token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }}
+ - name: Checkout netdata main Repo # Checkout back to netdata/netdata repo to the update latest packaged versions
+ id: checkout-netdata
+ uses: actions/checkout@v4
+ with:
+ token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }}
+ - name: Init python environment for publish release metadata
+ uses: actions/setup-python@v4
+ id: init-python
+ with:
+ python-version: "3.12"
+ - name: Setup python environment
+ id: setup-python
+ run: |
+ pip install -r .github/scripts/modules/requirements.txt
+ - name: Check if the version is latest and published
+ id: check-latest-version
+ run: |
+ python .github/scripts/check_latest_versions.py ${{ steps.version.outputs.version }}
+ - name: SSH setup
+ id: ssh-setup
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && steps.check-latest-version.outputs.versions_needs_update == 'true'
+ uses: shimataro/ssh-key-action@v2
+ with:
+ key: ${{ secrets.NETDATABOT_PACKAGES_SSH_KEY }}
+ name: id_ecdsa
+ known_hosts: ${{ secrets.PACKAGES_KNOWN_HOSTS }}
+ - name: Sync newer releases
+ id: sync-releases
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && steps.check-latest-version.outputs.versions_needs_update == 'true'
+ run: |
+ .github/scripts/upload-new-version-tags.sh
- name: Failure Notification
uses: rtCamp/action-slack-notify@v2
env:
@@ -777,6 +911,12 @@ jobs:
Fetch artifacts: ${{ steps.fetch.outcome }}
Prepare version info: ${{ steps.version.outcome }}
Create release: ${{ steps.create-release.outcome }}
+ Checkout back netdata/netdata: ${{ steps.checkout-netdata.outcome }}
+ Init python environment: ${{ steps.init-python.outcome }}
+ Setup python environment: ${{ steps.setup-python.outcome }}
+ Check the nearly published release against the advertised: ${{ steps.check-latest-version.outcome }}
+ Setup ssh: ${{ steps.ssh-setup.outcome }}
+ Sync with the releases: ${{ steps.sync-releases.outcome }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
if: >-
${{
@@ -811,7 +951,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Retrieve Artifacts
id: fetch
uses: actions/download-artifact@v3
diff --git a/.github/workflows/checks-dummy.yml b/.github/workflows/checks-dummy.yml
deleted file mode 100644
index 369d70ff91d15b..00000000000000
--- a/.github/workflows/checks-dummy.yml
+++ /dev/null
@@ -1,42 +0,0 @@
----
-name: Checks
-on:
- pull_request:
- paths-ignore: # This MUST be kept in sync with the paths key for the checks.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - 'configure.ac'
- - '**/Makefile*'
- - 'Makefile*'
- - '.gitignore'
- - '.github/workflows/checks.yml'
- - 'build/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
-env:
- DISABLE_TELEMETRY: 1
-concurrency:
- group: checks-${{ github.ref }}
- cancel-in-progress: true
-jobs:
- libressl-checks:
- name: LibreSSL
- runs-on: ubuntu-latest
- steps:
- - run: "echo 'NOT REQUIRED'"
- clang-checks:
- name: Clang
- runs-on: ubuntu-latest
- steps:
- - run: "echo 'NOT REQUIRED'"
- gitignore-check:
- name: .gitignore
- runs-on: ubuntu-latest
- steps:
- - run: "echo 'NOT REQUIRED'"
diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml
index 4c892ffce80898..1308f45fa59721 100644
--- a/.github/workflows/checks.yml
+++ b/.github/workflows/checks.yml
@@ -2,58 +2,77 @@
name: Checks
on:
push:
- paths:
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - 'configure.ac'
- - '**/Makefile*'
- - 'Makefile*'
- - '.gitignore'
- - '.github/workflows/checks.yml'
- - 'build/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
branches:
- master
- pull_request:
- paths: # This MUST be kept in-sync with the paths-ignore key for the checks-dummy.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - 'configure.ac'
- - '**/Makefile*'
- - 'Makefile*'
- - '.gitignore'
- - '.github/workflows/checks.yml'
- - 'build/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
+ pull_request: null
env:
DISABLE_TELEMETRY: 1
concurrency:
group: checks-${{ github.ref }}
cancel-in-progress: true
jobs:
+ file-check: # Check what files changed if we’re being run in a PR or on a push.
+ name: Check Modified Files
+ runs-on: ubuntu-latest
+ outputs:
+ run: ${{ steps.check-run.outputs.run }}
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ submodules: recursive
+ - name: Check files
+ id: check-files
+ uses: tj-actions/changed-files@v40
+ with:
+ since_last_remote_commit: ${{ github.event_name != 'pull_request' }}
+ files: |
+ **.c
+ **.cc
+ **.h
+ **.hh
+ **.in
+ configure.ac
+ **/Makefile*
+ Makefile*
+ .gitignore
+ .github/workflows/checks.yml
+ build/**
+ aclk/aclk-schemas/
+ ml/dlib/
+ mqtt_websockets
+ web/server/h2o/libh2o
+ files_ignore: |
+ netdata.spec.in
+ **.md
+ - name: Check Run
+ id: check-run
+ run: |
+ if [ "${{ steps.check-files.outputs.any_modified }}" == "true" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+ echo 'run=true' >> "${GITHUB_OUTPUT}"
+ else
+ echo 'run=false' >> "${GITHUB_OUTPUT}"
+ fi
+
libressl-checks:
name: LibreSSL
+ needs:
+ - file-check
runs-on: ubuntu-latest
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Build
+ if: needs.file-check.outputs.run == 'true'
run: >
docker run -v "$PWD":/netdata -w /netdata alpine:latest /bin/sh -c
'apk add bash;
@@ -63,30 +82,49 @@ jobs:
autoreconf -ivf;
./configure --disable-dependency-tracking;
make;'
+
clang-checks:
name: Clang
+ needs:
+ - file-check
runs-on: ubuntu-latest
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Build
- run: |
- docker build -f .github/dockerfiles/Dockerfile.clang .
+ if: needs.file-check.outputs.run == 'true'
+ run: docker build -f .github/dockerfiles/Dockerfile.clang .
+
gitignore-check:
name: .gitignore
+ needs:
+ - file-check
runs-on: ubuntu-latest
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Prepare environment
+ if: needs.file-check.outputs.run == 'true'
run: ./packaging/installer/install-required-packages.sh --dont-wait --non-interactive netdata
- name: Build netdata
+ if: needs.file-check.outputs.run == 'true'
run: ./netdata-installer.sh --dont-start-it --disable-telemetry --dont-wait --install-prefix /tmp/install --one-time-build
- name: Check that repo is clean
+ if: needs.file-check.outputs.run == 'true'
run: |
git status --porcelain=v1 > /tmp/porcelain
if [ -s /tmp/porcelain ]; then
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
index 174f650eacf01f..ae5818afc05cf2 100644
--- a/.github/workflows/codeql.yml
+++ b/.github/workflows/codeql.yml
@@ -23,7 +23,7 @@ jobs:
python: ${{ steps.python.outputs.run }}
steps:
- name: Clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -76,7 +76,7 @@ jobs:
security-events: write
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -103,7 +103,7 @@ jobs:
security-events: write
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
diff --git a/.github/workflows/coverity.yml b/.github/workflows/coverity.yml
index 8a1ee2486e997d..eb68c302b2ffec 100644
--- a/.github/workflows/coverity.yml
+++ b/.github/workflows/coverity.yml
@@ -19,7 +19,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
id: checkout
with:
submodules: recursive
diff --git a/.github/workflows/dashboard-pr.yml b/.github/workflows/dashboard-pr.yml
index ac414da10920fa..f02cfb69dd036a 100644
--- a/.github/workflows/dashboard-pr.yml
+++ b/.github/workflows/dashboard-pr.yml
@@ -21,7 +21,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Update Files
id: update
run: |
diff --git a/.github/workflows/docker-dummy.yml b/.github/workflows/docker-dummy.yml
deleted file mode 100644
index 64131dac569fdc..00000000000000
--- a/.github/workflows/docker-dummy.yml
+++ /dev/null
@@ -1,51 +0,0 @@
----
-name: Docker
-on:
- pull_request:
- paths-ignore: # This MUST be kept in-sync with the paths key for the dummy.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - '.dockerignore'
- - 'configure.ac'
- - 'netdata-installer.sh'
- - '**/Makefile*'
- - 'Makefile*'
- - '.github/workflows/docker.yml'
- - '.github/scripts/docker-test.sh'
- - 'build/**'
- - 'packaging/docker/**'
- - 'packaging/installer/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
- - '!**.md'
-env:
- DISABLE_TELEMETRY: 1
-concurrency:
- group: docker-${{ github.ref }}-${{ github.event_name }}
- cancel-in-progress: true
-jobs:
- docker-test:
- name: Docker Runtime Test
- runs-on: ubuntu-latest
- steps:
- - run: echo 'NOT REQUIRED'
-
- docker-ci:
- name: Docker Alt Arch Builds
- needs: docker-test
- runs-on: ubuntu-latest
- strategy:
- matrix:
- platforms:
- - linux/i386
- - linux/arm/v7
- - linux/arm64
- - linux/ppc64le
- steps:
- - run: echo 'NOT REQUIRED'
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
index aad83ced578767..b7fe0a8666fabb 100644
--- a/.github/workflows/docker.yml
+++ b/.github/workflows/docker.yml
@@ -4,29 +4,7 @@ on:
push:
branches:
- master
- pull_request:
- paths: # This MUST be kept in-sync with the paths-ignore key for the docker-dummy.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - '!netdata.spec.in'
- - '.dockerignore'
- - 'configure.ac'
- - 'netdata-installer.sh'
- - '**/Makefile*'
- - 'Makefile*'
- - '.github/workflows/docker.yml'
- - '.github/scripts/docker-test.sh'
- - 'build/**'
- - 'packaging/docker/**'
- - 'packaging/installer/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
- - '!**.md'
+ pull_request: null
workflow_dispatch:
inputs:
version:
@@ -39,27 +17,86 @@ concurrency:
group: docker-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
+ file-check: # Check what files changed if we’re being run in a PR or on a push.
+ name: Check Modified Files
+ runs-on: ubuntu-latest
+ outputs:
+ run: ${{ steps.check-run.outputs.run }}
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ submodules: recursive
+ - name: Check files
+ id: file-check
+ uses: tj-actions/changed-files@v40
+ with:
+ since_last_remote_commit: ${{ github.event_name != 'pull_request' }}
+ files: |
+ **.c
+ **.cc
+ **.h
+ **.hh
+ **.in
+ .dockerignore
+ configure.ac
+ netdata-installer.sh
+ **/Makefile*
+ Makefile*
+ .github/workflows/docker.yml
+ .github/scripts/docker-test.sh
+ build/**
+ packaging/docker/**
+ packaging/installer/**
+ aclk/aclk-schemas/
+ ml/dlib/
+ mqtt_websockets
+ web/server/h2o/libh2o
+ files_ignore: |
+ netdata.spec.in
+ **.md
+ - name: Check Run
+ id: check-run
+ run: |
+ if [ "${{ steps.file-check.outputs.any_modified }}" == "true" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+ echo 'run=true' >> "${GITHUB_OUTPUT}"
+ else
+ echo 'run=false' >> "${GITHUB_OUTPUT}"
+ fi
+
docker-test:
name: Docker Runtime Test
+ needs:
+ - file-check
runs-on: ubuntu-latest
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Setup Buildx
id: prepare
- uses: docker/setup-buildx-action@v2
+ if: needs.file-check.outputs.run == 'true'
+ uses: docker/setup-buildx-action@v3
- name: Test Build
id: build
- uses: docker/build-push-action@v4
+ if: needs.file-check.outputs.run == 'true'
+ uses: docker/build-push-action@v5
with:
load: true
push: false
tags: netdata/netdata:test
- name: Test Image
id: test
+ if: needs.file-check.outputs.run == 'true'
run: .github/scripts/docker-test.sh
- name: Failure Notification
uses: rtCamp/action-slack-notify@v2
@@ -82,12 +119,15 @@ jobs:
&& github.event_name != 'pull_request'
&& startsWith(github.ref, 'refs/heads/master')
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
docker-ci:
if: github.event_name != 'workflow_dispatch'
name: Docker Alt Arch Builds
- needs: docker-test
+ needs:
+ - docker-test
+ - file-check
runs-on: ubuntu-latest
strategy:
matrix:
@@ -97,21 +137,28 @@ jobs:
- linux/arm64
- linux/ppc64le
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Setup QEMU
id: qemu
- if: matrix.platforms != 'linux/i386'
- uses: docker/setup-qemu-action@v2
+ if: matrix.platforms != 'linux/i386' && needs.file-check.outputs.run == 'true'
+ uses: docker/setup-qemu-action@v3
- name: Setup Buildx
id: buildx
- uses: docker/setup-buildx-action@v2
+ if: needs.file-check.outputs.run == 'true'
+ uses: docker/setup-buildx-action@v3
- name: Build
id: build
- uses: docker/build-push-action@v4
+ if: needs.file-check.outputs.run == 'true'
+ uses: docker/build-push-action@v5
with:
platforms: ${{ matrix.platforms }}
load: false
@@ -138,6 +185,7 @@ jobs:
&& github.event_name != 'pull_request'
&& startsWith(github.ref, 'refs/heads/master')
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
normalize-tag: # Fix the release tag if needed
@@ -166,7 +214,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Determine which tags to use
@@ -186,21 +234,21 @@ jobs:
run: echo "OFFICIAL_IMAGE=true" >> "${GITHUB_ENV}"
- name: Setup QEMU
id: qemu
- uses: docker/setup-qemu-action@v2
+ uses: docker/setup-qemu-action@v3
- name: Setup Buildx
id: buildx
- uses: docker/setup-buildx-action@v2
+ uses: docker/setup-buildx-action@v3
- name: Docker Hub Login
id: docker-hub-login
if: github.repository == 'netdata/netdata'
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
- name: GitHub Container Registry Login
id: ghcr-login
if: github.repository == 'netdata/netdata'
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
@@ -208,14 +256,14 @@ jobs:
- name: Quay.io Login
id: quay-login
if: github.repository == 'netdata/netdata'
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.NETDATABOT_QUAY_USERNAME }}
password: ${{ secrets.NETDATABOT_QUAY_TOKEN }}
- name: Docker Build
id: build
- uses: docker/build-push-action@v4
+ uses: docker/build-push-action@v5
with:
platforms: linux/amd64,linux/i386,linux/arm/v7,linux/arm64,linux/ppc64le
push: ${{ github.repository == 'netdata/netdata' }}
@@ -278,7 +326,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Determine which tags to use
@@ -298,21 +346,21 @@ jobs:
run: echo "OFFICIAL_IMAGE=true" >> "${GITHUB_ENV}"
- name: Setup QEMU
id: qemu
- uses: docker/setup-qemu-action@v2
+ uses: docker/setup-qemu-action@v3
- name: Setup Buildx
id: buildx
- uses: docker/setup-buildx-action@v2
+ uses: docker/setup-buildx-action@v3
- name: Docker Hub Login
id: docker-hub-login
if: github.repository == 'netdata/netdata'
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_PASSWORD }}
- name: GitHub Container Registry Login
id: ghcr-login
if: github.repository == 'netdata/netdata'
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
@@ -320,14 +368,14 @@ jobs:
- name: Quay.io Login
id: quay-login
if: github.repository == 'netdata/netdata'
- uses: docker/login-action@v2
+ uses: docker/login-action@v3
with:
registry: quay.io
username: ${{ secrets.NETDATABOT_QUAY_USERNAME }}
password: ${{ secrets.NETDATABOT_QUAY_TOKEN }}
- name: Docker Build
id: build
- uses: docker/build-push-action@v4
+ uses: docker/build-push-action@v5
with:
platforms: linux/amd64,linux/i386,linux/arm/v7,linux/arm64,linux/ppc64le
push: ${{ github.repository == 'netdata/netdata' }}
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 69fda40c3ee706..a0554b16779383 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -17,7 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Run link check
diff --git a/.github/workflows/generate-integrations.yml b/.github/workflows/generate-integrations.yml
index 599cefbc0b88c1..4128e992561bd2 100644
--- a/.github/workflows/generate-integrations.yml
+++ b/.github/workflows/generate-integrations.yml
@@ -1,6 +1,5 @@
---
-# CI workflow used to regenerate `integrations/integrations.js` when
-# relevant source files are changed.
+# CI workflow used to regenerate `integrations/integrations.js` and accompanying documentation when relevant source files are changed.
name: Generate Integrations
on:
push:
@@ -28,7 +27,7 @@ jobs:
steps:
- name: Checkout Agent
id: checkout-agent
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 1
submodules: recursive
@@ -37,7 +36,7 @@ jobs:
run: echo "go_ref=$(cat packaging/go.d.version)" >> "${GITHUB_ENV}"
- name: Checkout Go
id: checkout-go
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 1
path: go.d.plugin
@@ -55,6 +54,14 @@ jobs:
run: |
source ./virtualenv/bin/activate
python3 integrations/gen_integrations.py
+ - name: Generate Integrations Documentation
+ id: generate-integrations-documentation
+ run: |
+ python3 integrations/gen_docs_integrations.py
+ - name: Generate collectors/COLLECTORS.md
+ id: generate-collectors-md
+ run: |
+ python3 integrations/gen_doc_collector_page.py
- name: Clean Up Temporary Data
id: clean
run: rm -rf go.d.plugin virtualenv
@@ -67,7 +74,7 @@ jobs:
branch: integrations-regen
title: Regenerate integrations.js
body: |
- Regenerate `integrations/integrations.js` based on the
+ Regenerate `integrations/integrations.js`, and documentation based on the
latest code.
This PR was auto-generated by
@@ -87,6 +94,8 @@ jobs:
Checkout Go: ${{ steps.checkout-go.outcome }}
Prepare Dependencies: ${{ steps.prep-deps.outcome }}
Generate Integrations: ${{ steps.generate.outcome }}
+ Generate Integrations Documentation: ${{ steps.generate-integrations-documentation.outcome }}
+ Generate collectors/COLLECTORS.md: ${{ steps.generate-collectors-md.outcome }}
Clean Up Temporary Data: ${{ steps.clean.outcome }}
Create PR: ${{ steps.create-pr.outcome }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
diff --git a/.github/workflows/monitor-releases.yml b/.github/workflows/monitor-releases.yml
new file mode 100644
index 00000000000000..649cf68aab7bac
--- /dev/null
+++ b/.github/workflows/monitor-releases.yml
@@ -0,0 +1,72 @@
+---
+name: Monitor-releases
+
+on:
+ release:
+ types: [released, deleted]
+ workflow_dispatch:
+ inputs:
+ channel:
+ description: 'Specify the release channel'
+ required: true
+ default: 'stable'
+
+
+concurrency: # This keeps multiple instances of the job from running concurrently for the same ref and event type.
+ group: monitor-{{ github.event.inputs.channel }}-releases-${{ github.ref }}-${{ github.event_name }}
+ cancel-in-progress: true
+
+jobs:
+ update-stable-agents-metadata:
+ name: update-stable-agents-metadata
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ token: ${{ secrets.NETDATABOT_GITHUB_TOKEN }}
+ - name: Init python environment
+ uses: actions/setup-python@v4
+ id: init-python
+ with:
+ python-version: "3.12"
+ - name: Setup python environment
+ id: setup-python
+ run: |
+ pip install -r .github/scripts/modules/requirements.txt
+ - name: Check for newer versions
+ id: check-newer-releases
+ run: |
+ python .github/scripts/check_latest_versions_per_channel.py "${{ github.event.inputs.channel }}"
+ - name: SSH setup
+ id: ssh-setup
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && steps.check-newer-releases.outputs.versions_needs_update == 'true'
+ uses: shimataro/ssh-key-action@v2
+ with:
+ key: ${{ secrets.NETDATABOT_PACKAGES_SSH_KEY }}
+ name: id_ecdsa
+ known_hosts: ${{ secrets.PACKAGES_KNOWN_HOSTS }}
+ - name: Sync newer releases
+ id: sync-releases
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && steps.check-newer-releases.outputs.versions_needs_update == 'true'
+ run: |
+ .github/scripts/upload-new-version-tags.sh
+ - name: Failure Notification
+ uses: rtCamp/action-slack-notify@v2
+ env:
+ SLACK_COLOR: 'danger'
+ SLACK_FOOTER: ''
+ SLACK_ICON_EMOJI: ':github-actions:'
+ SLACK_TITLE: 'Failed to prepare changelog:'
+ SLACK_USERNAME: 'GitHub Actions'
+ SLACK_MESSAGE: |-
+ ${{ github.repository }}: Failed to update stable Agent's metadata.
+ Checkout: ${{ steps.checkout.outcome }}
+ Init python: ${{ steps.init-python.outcome }}
+ Setup python: ${{ steps.setup-python.outcome }}
+ Check for newer stable releaes: ${{ steps.check-newer-releases.outcome }}
+ Setup ssh: ${{ steps.ssh-setup.outcome }}
+ Syncing newer release to packages.netdata.cloud : ${{ steps.sync-releases.outcome }}
+ SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_URL }}
+ if: failure()
diff --git a/.github/workflows/packagecloud.yml b/.github/workflows/packagecloud.yml
index ba70c177bc0451..3c427756a4ba2c 100644
--- a/.github/workflows/packagecloud.yml
+++ b/.github/workflows/packagecloud.yml
@@ -20,7 +20,7 @@ jobs:
- devel
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
id: checkout
with:
submodules: recursive
diff --git a/.github/workflows/packaging-dummy.yml b/.github/workflows/packaging-dummy.yml
deleted file mode 100644
index 653227e1c8a30f..00000000000000
--- a/.github/workflows/packaging-dummy.yml
+++ /dev/null
@@ -1,80 +0,0 @@
----
-# Handles building of binary packages for the agent.
-#
-# This workflow exists so that we can make these required checks but
-# still skip running them on PRs where they are not relevant.
-name: Packages
-on:
- pull_request:
- types:
- - opened
- - reopened
- - labeled
- - synchronize
- paths-ignore: # This MUST be kept in-sync with the paths key for the packaging.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - 'netdata.spec.in'
- - 'configure.ac'
- - '**/Makefile*'
- - 'Makefile*'
- - '.github/workflows/packaging.yml'
- - '.github/scripts/gen-matrix-packaging.py'
- - '.github/scripts/pkg-test.sh'
- - 'build/**'
- - 'packaging/*.sh'
- - 'packaging/*.checksums'
- - 'packaging/*.version'
- - 'contrib/debian/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
- - '!**.md'
-env:
- DISABLE_TELEMETRY: 1
- REPO_PREFIX: netdata/netdata
-concurrency:
- group: packages-${{ github.ref }}-${{ github.event_name }}
- cancel-in-progress: true
-jobs:
- matrix:
- name: Prepare Build Matrix
- runs-on: ubuntu-latest
- outputs:
- matrix: ${{ steps.set-matrix.outputs.matrix }}
- steps:
- - name: Checkout
- id: checkout
- uses: actions/checkout@v3
- - name: Prepare tools
- id: prepare
- run: |
- sudo apt-get update && sudo apt-get install -y python3-ruamel.yaml
- - name: Read build matrix
- id: set-matrix
- run: |
- if [ "${{ github.event_name }}" = "pull_request" ] && \
- [ "${{ !contains(github.event.pull_request.labels.*.name, 'run-ci/packaging') }}" = "true" ]; then
- matrix="$(.github/scripts/gen-matrix-packaging.py 1)"
- else
- matrix="$(.github/scripts/gen-matrix-packaging.py 0)"
- fi
- echo "Generated matrix: ${matrix}"
- echo "matrix=${matrix}" >> "${GITHUB_OUTPUT}"
-
- build:
- name: Build
- runs-on: ubuntu-latest
- env:
- DOCKER_CLI_EXPERIMENTAL: enabled
- needs:
- - matrix
- strategy:
- matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
- fail-fast: false
- steps:
- - run: echo 'NOT REQUIRED'
diff --git a/.github/workflows/packaging.yml b/.github/workflows/packaging.yml
index 7e8c7e527a5925..eb936c4d902bbd 100644
--- a/.github/workflows/packaging.yml
+++ b/.github/workflows/packaging.yml
@@ -8,31 +8,6 @@ on:
- reopened
- labeled
- synchronize
- paths: # This MUST be kept in-sync with the paths-ignore key for the packaging-dummy.yml workflow.
- - '**.c'
- - '**.cc'
- - '**.h'
- - '**.hh'
- - '**.in'
- - 'netdata.spec.in'
- - 'configure.ac'
- - '**/Makefile*'
- - 'Makefile*'
- - '.github/workflows/packaging.yml'
- - '.github/scripts/gen-matrix-packaging.py'
- - '.github/scripts/pkg-test.sh'
- - 'build/**'
- - 'packaging/*.sh'
- - 'packaging/*.checksums'
- - 'packaging/*.version'
- - 'contrib/debian/**'
- - 'aclk/aclk-schemas/'
- - 'ml/dlib/'
- - 'mqtt_websockets'
- - 'web/server/h2o/libh2o'
- - '!**.md'
- branches:
- - master
push:
branches:
- master
@@ -52,6 +27,57 @@ concurrency:
group: packages-${{ github.ref }}-${{ github.event_name }}
cancel-in-progress: true
jobs:
+ file-check: # Check what files changed if we’re being run in a PR or on a push.
+ name: Check Modified Files
+ runs-on: ubuntu-latest
+ outputs:
+ run: ${{ steps.check-run.outputs.run }}
+ steps:
+ - name: Checkout
+ id: checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ submodules: recursive
+ - name: Check files
+ id: file-check
+ uses: tj-actions/changed-files@v40
+ with:
+ since_last_remote_commit: ${{ github.event_name != 'pull_request' }}
+ files: |
+ **.c
+ **.cc
+ **.h
+ **.hh
+ **.in
+ netdata.spec.in
+ configure.ac
+ **/Makefile*
+ Makefile*
+ .github/data/distros.yml
+ .github/workflows/packaging.yml
+ .github/scripts/gen-matrix-packaging.py
+ .github/scripts/pkg-test.sh
+ build/**
+ packaging/*.sh
+ packaging/*.checksums
+ packaging/*.version
+ contrib/debian/**
+ aclk/aclk-schemas/
+ ml/dlib/
+ mqtt_websockets
+ web/server/h2o/libh2o
+ files_ignore: |
+ **.md
+ - name: Check Run
+ id: check-run
+ run: |
+ if [ "${{ steps.file-check.outputs.any_modified }}" == "true" ] || [ "${{ github.event_name }}" == "workflow_dispatch" ]; then
+ echo 'run=true' >> "${GITHUB_OUTPUT}"
+ else
+ echo 'run=false' >> "${GITHUB_OUTPUT}"
+ fi
+
matrix:
name: Prepare Build Matrix
runs-on: ubuntu-latest
@@ -60,7 +86,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Prepare tools
id: prepare
run: |
@@ -107,7 +133,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Check Version
id: check-version
run: |
@@ -161,6 +187,7 @@ jobs:
needs:
- matrix
- version-check
+ - file-check
strategy:
matrix: ${{ fromJson(needs.matrix.outputs.matrix) }}
# We intentiaonally disable the fail-fast behavior so that a
@@ -169,24 +196,31 @@ jobs:
fail-fast: false
max-parallel: 8
steps:
+ - name: Skip Check
+ id: skip
+ if: needs.file-check.outputs.run != 'true'
+ run: echo "SKIPPED"
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ if: needs.file-check.outputs.run == 'true'
+ uses: actions/checkout@v4
with:
fetch-depth: 0 # We need full history for versioning
submodules: recursive
- name: Setup QEMU
id: qemu
- if: matrix.platform != 'linux/amd64' && matrix.platform != 'linux/i386'
- uses: docker/setup-qemu-action@v2
+ if: matrix.platform != 'linux/amd64' && matrix.platform != 'linux/i386' && needs.file-check.outputs.run == 'true'
+ uses: docker/setup-qemu-action@v3
- name: Prepare Docker Environment
id: docker-config
+ if: needs.file-check.outputs.run == 'true'
shell: bash
run: |
echo '{"cgroup-parent": "actions-job.slice", "experimental": true}' | sudo tee /etc/docker/daemon.json 2>/dev/null
sudo service docker restart
- name: Fetch images
id: fetch-images
+ if: needs.file-check.outputs.run == 'true'
uses: nick-invision/retry@v2
with:
max_attempts: 3
@@ -194,15 +228,17 @@ jobs:
timeout_seconds: 900
command: |
docker pull --platform ${{ matrix.platform }} ${{ matrix.base_image }}
- docker pull --platform ${{ matrix.platform }} netdata/package-builders:${{ matrix.distro }}${{ matrix.version }}
+ docker pull --platform ${{ matrix.platform }} netdata/package-builders:${{ matrix.distro }}${{ matrix.version }}-v1
- name: Build Packages
id: build
+ if: needs.file-check.outputs.run == 'true'
shell: bash
run: |
docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 -e VERSION=${{ needs.version-check.outputs.version }} \
- --platform=${{ matrix.platform }} -v "$PWD":/netdata netdata/package-builders:${{ matrix.distro }}${{ matrix.version }}
+ --platform=${{ matrix.platform }} -v "$PWD":/netdata netdata/package-builders:${{ matrix.distro }}${{ matrix.version }}-v1
- name: Save Packages
id: artifacts
+ if: needs.file-check.outputs.run == 'true'
continue-on-error: true
uses: actions/upload-artifact@v3
with:
@@ -210,6 +246,7 @@ jobs:
path: ${{ github.workspace }}/artifacts/*
- name: Test Packages
id: test
+ if: needs.file-check.outputs.run == 'true'
shell: bash
run: |
docker run --security-opt seccomp=unconfined -e DISABLE_TELEMETRY=1 -e DISTRO=${{ matrix.distro }} \
@@ -218,7 +255,7 @@ jobs:
/netdata/.github/scripts/pkg-test.sh
- name: Upload to PackageCloud
id: upload
- if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata'
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && needs.file-check.outputs.run == 'true'
continue-on-error: true
shell: bash
env:
@@ -232,7 +269,7 @@ jobs:
done
- name: SSH setup
id: ssh-setup
- if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata'
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && needs.file-check.outputs.run == 'true'
uses: shimataro/ssh-key-action@v2
with:
key: ${{ secrets.NETDATABOT_PACKAGES_SSH_KEY }}
@@ -240,7 +277,7 @@ jobs:
known_hosts: ${{ secrets.PACKAGES_KNOWN_HOSTS }}
- name: Upload to packages.netdata.cloud
id: package-upload
- if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata'
+ if: github.event_name == 'workflow_dispatch' && github.repository == 'netdata/netdata' && needs.file-check.outputs.run == 'true'
run: |
.github/scripts/package-upload.sh \
${{ matrix.repo_distro }} \
@@ -272,4 +309,5 @@ jobs:
&& github.event_name != 'pull_request'
&& startsWith(github.ref, 'refs/heads/master')
&& github.repository == 'netdata/netdata'
+ && needs.file-check.outputs.run == 'true'
}}
diff --git a/.github/workflows/platform-eol-check.yml b/.github/workflows/platform-eol-check.yml
index d1f4416cde6820..ae290a973ef974 100644
--- a/.github/workflows/platform-eol-check.yml
+++ b/.github/workflows/platform-eol-check.yml
@@ -22,7 +22,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Prepare tools
id: prepare
run: |
@@ -66,7 +66,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Actually check the EOL date for the platform.
- name: Check EOL Date
id: check
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index e675d789fb3bc2..2fa51cc52f5327 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -29,7 +29,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
submodules: recursive
@@ -116,7 +116,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
ref: ${{ needs.update-changelogs.outputs.ref }}
- name: Trigger build
@@ -151,7 +151,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
ref: ${{ needs.update-changelogs.outputs.ref }}
- name: Trigger build
@@ -186,7 +186,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
ref: ${{ needs.update-changelogs.outputs.ref }}
- name: Trigger build
diff --git a/.github/workflows/repoconfig-packages.yml b/.github/workflows/repoconfig-packages.yml
index e2b41570fa39c7..df8fac2044661c 100644
--- a/.github/workflows/repoconfig-packages.yml
+++ b/.github/workflows/repoconfig-packages.yml
@@ -27,7 +27,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
- name: Prepare tools
id: prepare
run: |
@@ -77,7 +77,7 @@ jobs:
steps:
- name: Checkout
id: checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
# Unlike normally, we do not need a deep clone or submodules for this.
- name: Fetch base image
id: fetch-images
diff --git a/.github/workflows/review.yml b/.github/workflows/review.yml
index 7e76717edf581b..8cb61f2cf8fcc9 100644
--- a/.github/workflows/review.yml
+++ b/.github/workflows/review.yml
@@ -23,7 +23,7 @@ jobs:
yamllint: ${{ steps.yamllint.outputs.run }}
steps:
- name: Clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -112,7 +112,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -129,7 +129,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: false
fetch-depth: 0
@@ -162,7 +162,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -182,7 +182,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -203,7 +203,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Run hadolint
@@ -219,7 +219,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
@@ -234,6 +234,7 @@ jobs:
./.git/*
packaging/makeself/makeself.sh
packaging/makeself/makeself-header.sh
+ ./fluent-bit/*
yamllint:
name: yamllint
@@ -242,7 +243,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Git clone repository
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5f83a440598336..46384ffc5e1fec 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -25,7 +25,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout
- uses: actions/checkout@v3
+ uses: actions/checkout@v4
with:
submodules: recursive
- name: Prepare environment
diff --git a/.gitignore b/.gitignore
index daa55c0300af35..a53025c3be353e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,6 +18,7 @@ Makefile.in
.*.swp
*.old
*.log
+!collectors/log2journal/tests.d/*.log
*.pyc
Makefile
@@ -41,6 +42,9 @@ sha256sums.txt
# netdata binaries
netdata
netdatacli
+systemd-cat-native
+log2journal
+!log2journal/
!netdata/
upload/
artifacts/
@@ -83,6 +87,9 @@ debugfs.plugin
systemd-journal.plugin
!systemd-journal.plugin/
+logs-management.plugin
+!logs-management.plugin/
+
# protoc generated files
*.pb.cc
*.pb.h
@@ -142,6 +149,8 @@ daemon/get-kubernetes-labels.sh
health/notifications/alarm-notify.sh
claim/netdata-claim.sh
+collectors/cgroups.plugin/cgroup-name.sh
+collectors/cgroups.plugin/cgroup-network-helper.sh
collectors/tc.plugin/tc-qos-helper.sh
collectors/charts.d.plugin/charts.d.plugin
collectors/python.d.plugin/python.d.plugin
@@ -149,6 +158,8 @@ collectors/ioping.plugin/ioping.plugin
collectors/go.d.plugin
web/netdata-switch-dashboard.sh
+logsmanagement/stress_test/stress_test
+
# installer generated files
/netdata-uninstaller.sh
/netdata-updater.sh
diff --git a/.gitmodules b/.gitmodules
index 2dae4a1dd237bc..ac8e131f5d482c 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -13,3 +13,8 @@
path = web/server/h2o/libh2o
url = https://github.com/h2o/h2o.git
ignore = untracked
+[submodule "fluent-bit"]
+ path = fluent-bit
+ url = https://github.com/fluent/fluent-bit.git
+ shallow = true
+ ignore = dirty
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7055f4af179d9e..2fde81e9f7518d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,129 +1,447 @@
# Changelog
-## [v1.42.4](https://github.com/netdata/netdata/tree/v1.42.4) (2023-09-18)
+## [v1.44.1](https://github.com/netdata/netdata/tree/v1.44.1) (2023-12-12)
-[Full Changelog](https://github.com/netdata/netdata/compare/v1.42.3...v1.42.4)
+[Full Changelog](https://github.com/netdata/netdata/compare/v1.44.0...v1.44.1)
+
+**Merged pull requests:**
+
+- uninstaller remove log2journal and systemd-cat-native [\#16585](https://github.com/netdata/netdata/pull/16585) ([ilyam8](https://github.com/ilyam8))
+- Handle coverity issues related to Y2K38\_SAFETY [\#16583](https://github.com/netdata/netdata/pull/16583) ([stelfrag](https://github.com/stelfrag))
+- Add Alpine Linux 3.19 to CI. [\#16579](https://github.com/netdata/netdata/pull/16579) ([Ferroin](https://github.com/Ferroin))
+- make debugfs exit on sigpipe [\#16569](https://github.com/netdata/netdata/pull/16569) ([ilyam8](https://github.com/ilyam8))
+- Fix memory leak during host chart label cleanup [\#16568](https://github.com/netdata/netdata/pull/16568) ([stelfrag](https://github.com/stelfrag))
+- fix cpu arch/ram/disk values in buildinfo [\#16567](https://github.com/netdata/netdata/pull/16567) ([ilyam8](https://github.com/ilyam8))
+- Resolve issue on startup in servers with 1 core [\#16565](https://github.com/netdata/netdata/pull/16565) ([stelfrag](https://github.com/stelfrag))
+- Fix release metadata workflow [\#16563](https://github.com/netdata/netdata/pull/16563) ([tkatsoulas](https://github.com/tkatsoulas))
+- Make the systemd-journal mandatory package on Centos 7 and Amazon linux 2 [\#16562](https://github.com/netdata/netdata/pull/16562) ([tkatsoulas](https://github.com/tkatsoulas))
+- Fix for AMD GPU drm different format proc file [\#16561](https://github.com/netdata/netdata/pull/16561) ([MrZammler](https://github.com/MrZammler))
+- Revert "remove discourse badge from readme" [\#16560](https://github.com/netdata/netdata/pull/16560) ([ilyam8](https://github.com/ilyam8))
+- Change the workflow on how we set the right permissions for perf-plugin [\#16558](https://github.com/netdata/netdata/pull/16558) ([tkatsoulas](https://github.com/tkatsoulas))
+- Add README for gorilla [\#16553](https://github.com/netdata/netdata/pull/16553) ([vkalintiris](https://github.com/vkalintiris))
+- Bump new version on the release changelog GHA [\#16551](https://github.com/netdata/netdata/pull/16551) ([tkatsoulas](https://github.com/tkatsoulas))
+- set "HOME" after switching to netdata user [\#16548](https://github.com/netdata/netdata/pull/16548) ([ilyam8](https://github.com/ilyam8))
+- wip documentation about functions table [\#16535](https://github.com/netdata/netdata/pull/16535) ([ktsaou](https://github.com/ktsaou))
+- Remove openSUSE 15.4 from CI [\#16449](https://github.com/netdata/netdata/pull/16449) ([tkatsoulas](https://github.com/tkatsoulas))
+- Remove fedora 37 from CI [\#16422](https://github.com/netdata/netdata/pull/16422) ([tkatsoulas](https://github.com/tkatsoulas))
+
+## [v1.44.0](https://github.com/netdata/netdata/tree/v1.44.0) (2023-12-06)
+
+[Full Changelog](https://github.com/netdata/netdata/compare/v1.43.2...v1.44.0)
**Merged pull requests:**
-- Regenerate integrations.js [\#15985](https://github.com/netdata/netdata/pull/15985) ([netdatabot](https://github.com/netdatabot))
-- Re-store rrdvars on late dimensions [\#15984](https://github.com/netdata/netdata/pull/15984) ([MrZammler](https://github.com/MrZammler))
-- Functions: allow collectors to be restarted [\#15983](https://github.com/netdata/netdata/pull/15983) ([ktsaou](https://github.com/ktsaou))
-- Metadata fixes for some collectors [\#15982](https://github.com/netdata/netdata/pull/15982) ([Ancairon](https://github.com/Ancairon))
-- update go.d.plugin to v0.55.0 [\#15981](https://github.com/netdata/netdata/pull/15981) ([ilyam8](https://github.com/ilyam8))
-- bump UI to v6.37.1 [\#15980](https://github.com/netdata/netdata/pull/15980) ([ilyam8](https://github.com/ilyam8))
-- apps.plugin function is not thread safe [\#15978](https://github.com/netdata/netdata/pull/15978) ([ktsaou](https://github.com/ktsaou))
-- Facets: fixes 5 [\#15976](https://github.com/netdata/netdata/pull/15976) ([ktsaou](https://github.com/ktsaou))
-- facets histogram: do not send db retention for facets [\#15974](https://github.com/netdata/netdata/pull/15974) ([ktsaou](https://github.com/ktsaou))
-- extend ml default training from ~24 to ~48 hours [\#15971](https://github.com/netdata/netdata/pull/15971) ([andrewm4894](https://github.com/andrewm4894))
-- facets histogram when empty [\#15970](https://github.com/netdata/netdata/pull/15970) ([ktsaou](https://github.com/ktsaou))
-- facets: do not shadow local variable [\#15968](https://github.com/netdata/netdata/pull/15968) ([ktsaou](https://github.com/ktsaou))
-- Skip trying to preserve file owners when bundling external code. [\#15966](https://github.com/netdata/netdata/pull/15966) ([Ferroin](https://github.com/Ferroin))
-- fix using undefined var when loading job statuses in python.d [\#15965](https://github.com/netdata/netdata/pull/15965) ([ilyam8](https://github.com/ilyam8))
-- facets: data-only queries [\#15961](https://github.com/netdata/netdata/pull/15961) ([ktsaou](https://github.com/ktsaou))
-- Clarifying the possible installation types [\#15958](https://github.com/netdata/netdata/pull/15958) ([tkatsoulas](https://github.com/tkatsoulas))
-- fix journal direction parsing [\#15957](https://github.com/netdata/netdata/pull/15957) ([ktsaou](https://github.com/ktsaou))
-- facets and journal improvements [\#15956](https://github.com/netdata/netdata/pull/15956) ([ktsaou](https://github.com/ktsaou))
-- Fix CID 400366 [\#15953](https://github.com/netdata/netdata/pull/15953) ([stelfrag](https://github.com/stelfrag))
-- Update descriptions. [\#15952](https://github.com/netdata/netdata/pull/15952) ([thiagoftsm](https://github.com/thiagoftsm))
-- Update slabinfo metadata [\#15951](https://github.com/netdata/netdata/pull/15951) ([thiagoftsm](https://github.com/thiagoftsm))
-- Disk Labels [\#15949](https://github.com/netdata/netdata/pull/15949) ([ktsaou](https://github.com/ktsaou))
-- streaming logs [\#15948](https://github.com/netdata/netdata/pull/15948) ([ktsaou](https://github.com/ktsaou))
-- Regenerate integrations.js [\#15946](https://github.com/netdata/netdata/pull/15946) ([netdatabot](https://github.com/netdatabot))
-- Integrations: Add a note to enable the collectors [\#15945](https://github.com/netdata/netdata/pull/15945) ([MrZammler](https://github.com/MrZammler))
-- Integrations: Add a note to install charts.d plugin [\#15943](https://github.com/netdata/netdata/pull/15943) ([MrZammler](https://github.com/MrZammler))
-- Add description about packages [\#15941](https://github.com/netdata/netdata/pull/15941) ([thiagoftsm](https://github.com/thiagoftsm))
-- facets optimizations [\#15940](https://github.com/netdata/netdata/pull/15940) ([ktsaou](https://github.com/ktsaou))
-- improved facets info [\#15936](https://github.com/netdata/netdata/pull/15936) ([ktsaou](https://github.com/ktsaou))
-- feat: Adds access control configuration for ntfy [\#15932](https://github.com/netdata/netdata/pull/15932) ([miversen33](https://github.com/miversen33))
-- fix memory leak on prometheus exporter and code cleanup [\#15929](https://github.com/netdata/netdata/pull/15929) ([ktsaou](https://github.com/ktsaou))
-- systemd-journal and facets: info and sources [\#15928](https://github.com/netdata/netdata/pull/15928) ([ktsaou](https://github.com/ktsaou))
-- systemd-journal and facets improvements [\#15926](https://github.com/netdata/netdata/pull/15926) ([ktsaou](https://github.com/ktsaou))
-- add specific info on how to access the dashboards [\#15925](https://github.com/netdata/netdata/pull/15925) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Reduce workload during cleanup [\#15919](https://github.com/netdata/netdata/pull/15919) ([stelfrag](https://github.com/stelfrag))
-- Replace \_ with spaces for name variable for ntfy [\#15909](https://github.com/netdata/netdata/pull/15909) ([MAH69IK](https://github.com/MAH69IK))
-- python.d/sensors: Increase voltage limits 127 -\> 400 [\#15905](https://github.com/netdata/netdata/pull/15905) ([kylemanna](https://github.com/kylemanna))
-- Assorted Dockerfile cleanup. [\#15902](https://github.com/netdata/netdata/pull/15902) ([Ferroin](https://github.com/Ferroin))
-- Improve shutdown of the metadata thread [\#15901](https://github.com/netdata/netdata/pull/15901) ([stelfrag](https://github.com/stelfrag))
-- bump ui to v6.32.0 [\#15897](https://github.com/netdata/netdata/pull/15897) ([andrewm4894](https://github.com/andrewm4894))
-- Update change-metrics-storage.md [\#15896](https://github.com/netdata/netdata/pull/15896) ([Ancairon](https://github.com/Ancairon))
-- make `anomaly_detection.type_anomaly_rate` stacked [\#15895](https://github.com/netdata/netdata/pull/15895) ([andrewm4894](https://github.com/andrewm4894))
-- Update pfsense.md [\#15894](https://github.com/netdata/netdata/pull/15894) ([Ancairon](https://github.com/Ancairon))
-- Initial tooling for Integrations Documentation [\#15893](https://github.com/netdata/netdata/pull/15893) ([Ancairon](https://github.com/Ancairon))
-- Reset the obsolete flag on service thread [\#15892](https://github.com/netdata/netdata/pull/15892) ([MrZammler](https://github.com/MrZammler))
-- Add better recovery for corrupted metadata [\#15891](https://github.com/netdata/netdata/pull/15891) ([stelfrag](https://github.com/stelfrag))
-- Add index to ACLK table to improve update statements [\#15890](https://github.com/netdata/netdata/pull/15890) ([stelfrag](https://github.com/stelfrag))
-- Limit atomic operations for statistics [\#15887](https://github.com/netdata/netdata/pull/15887) ([ktsaou](https://github.com/ktsaou))
-- Properly document issues with installing on hosts without IPv4. [\#15882](https://github.com/netdata/netdata/pull/15882) ([Ferroin](https://github.com/Ferroin))
-- allow any field to be a facet [\#15880](https://github.com/netdata/netdata/pull/15880) ([ktsaou](https://github.com/ktsaou))
-- Regenerate integrations.js [\#15879](https://github.com/netdata/netdata/pull/15879) ([netdatabot](https://github.com/netdatabot))
-- use the newer XXH3 128bits algorithm, instead of the classic XXH128 [\#15878](https://github.com/netdata/netdata/pull/15878) ([ktsaou](https://github.com/ktsaou))
-- Skip copying environment/install-type files when checking existing installs. [\#15876](https://github.com/netdata/netdata/pull/15876) ([Ferroin](https://github.com/Ferroin))
-- ML add new `delete old models param` to readme [\#15873](https://github.com/netdata/netdata/pull/15873) ([andrewm4894](https://github.com/andrewm4894))
-- Update SQLITE version to 3.42.0 [\#15870](https://github.com/netdata/netdata/pull/15870) ([stelfrag](https://github.com/stelfrag))
-- Regenerate integrations.js [\#15867](https://github.com/netdata/netdata/pull/15867) ([netdatabot](https://github.com/netdatabot))
-- Add a fail reason to pinpoint exactly what went wrong [\#15866](https://github.com/netdata/netdata/pull/15866) ([stelfrag](https://github.com/stelfrag))
-- Add plugin and module information to collector integrations. [\#15864](https://github.com/netdata/netdata/pull/15864) ([Ferroin](https://github.com/Ferroin))
-- Regenerate integrations.js [\#15862](https://github.com/netdata/netdata/pull/15862) ([netdatabot](https://github.com/netdatabot))
-- Explicitly depend on version-matched plugins in native packages. [\#15861](https://github.com/netdata/netdata/pull/15861) ([Ferroin](https://github.com/Ferroin))
-- Apply a label prefix for netdata labels [\#15860](https://github.com/netdata/netdata/pull/15860) ([kevin-fwu](https://github.com/kevin-fwu))
-- fix proc meminfo cached calculation [\#15859](https://github.com/netdata/netdata/pull/15859) ([ilyam8](https://github.com/ilyam8))
-- Fix compilation warnings [\#15858](https://github.com/netdata/netdata/pull/15858) ([stelfrag](https://github.com/stelfrag))
-- packaging cleanup after \#15842 [\#15857](https://github.com/netdata/netdata/pull/15857) ([ilyam8](https://github.com/ilyam8))
-- Add a chart that groups anomaly rate by chart type. [\#15856](https://github.com/netdata/netdata/pull/15856) ([vkalintiris](https://github.com/vkalintiris))
-- fix packaging static build openssl 32bit [\#15855](https://github.com/netdata/netdata/pull/15855) ([ilyam8](https://github.com/ilyam8))
-- fix packaging mark stable static build [\#15854](https://github.com/netdata/netdata/pull/15854) ([ilyam8](https://github.com/ilyam8))
-- eBPF socket function [\#15850](https://github.com/netdata/netdata/pull/15850) ([thiagoftsm](https://github.com/thiagoftsm))
-- Facets histograms [\#15846](https://github.com/netdata/netdata/pull/15846) ([ktsaou](https://github.com/ktsaou))
-- reworked pluginsd caching of RDAs to avoid crashes [\#15845](https://github.com/netdata/netdata/pull/15845) ([ktsaou](https://github.com/ktsaou))
-- Fix static build SSL [\#15842](https://github.com/netdata/netdata/pull/15842) ([ktsaou](https://github.com/ktsaou))
-- bump bundled ui to v6.29.0 [\#15841](https://github.com/netdata/netdata/pull/15841) ([ilyam8](https://github.com/ilyam8))
-- Fix configure: WARNING: unrecognized options: --with-zlib [\#15840](https://github.com/netdata/netdata/pull/15840) ([stelfrag](https://github.com/stelfrag))
-- Fix compilation warning [\#15839](https://github.com/netdata/netdata/pull/15839) ([stelfrag](https://github.com/stelfrag))
-- Fix warning when compiling with -flto [\#15838](https://github.com/netdata/netdata/pull/15838) ([stelfrag](https://github.com/stelfrag))
-- workaround for systems that do not have SD\_JOURNAL\_OS\_ROOT [\#15837](https://github.com/netdata/netdata/pull/15837) ([ktsaou](https://github.com/ktsaou))
-- added ilove.html [\#15836](https://github.com/netdata/netdata/pull/15836) ([ktsaou](https://github.com/ktsaou))
-- Fix CID 382964: Code maintainability issues \(SIZEOF\_MISMATCH\) [\#15833](https://github.com/netdata/netdata/pull/15833) ([stelfrag](https://github.com/stelfrag))
-- Fix coverity 393052: API usage errors \(LOCK\) [\#15832](https://github.com/netdata/netdata/pull/15832) ([stelfrag](https://github.com/stelfrag))
-- systemd-journal in containers [\#15830](https://github.com/netdata/netdata/pull/15830) ([ktsaou](https://github.com/ktsaou))
-- RPM: fixed attrs for conf.d dirs [\#15828](https://github.com/netdata/netdata/pull/15828) ([k0ste](https://github.com/k0ste))
-- Avoid resource leak [\#15827](https://github.com/netdata/netdata/pull/15827) ([stelfrag](https://github.com/stelfrag))
-- Release fd if setsockopt or bind fails [\#15826](https://github.com/netdata/netdata/pull/15826) ([stelfrag](https://github.com/stelfrag))
-- Fix use after free [\#15825](https://github.com/netdata/netdata/pull/15825) ([stelfrag](https://github.com/stelfrag))
-- Improve dyncfg exit [\#15824](https://github.com/netdata/netdata/pull/15824) ([underhood](https://github.com/underhood))
-- Release job message status to avoid memory leak [\#15822](https://github.com/netdata/netdata/pull/15822) ([stelfrag](https://github.com/stelfrag))
-- ML improve init [\#15819](https://github.com/netdata/netdata/pull/15819) ([stelfrag](https://github.com/stelfrag))
-- Update cmakelist [\#15817](https://github.com/netdata/netdata/pull/15817) ([stelfrag](https://github.com/stelfrag))
-- added /api/v2/ilove.svg endpoint [\#15815](https://github.com/netdata/netdata/pull/15815) ([ktsaou](https://github.com/ktsaou))
-- systemd-journal fixes [\#15814](https://github.com/netdata/netdata/pull/15814) ([ktsaou](https://github.com/ktsaou))
-- fix packaging: link health.log to stdout [\#15813](https://github.com/netdata/netdata/pull/15813) ([ilyam8](https://github.com/ilyam8))
-- docs rename alarm to alert [\#15812](https://github.com/netdata/netdata/pull/15812) ([ilyam8](https://github.com/ilyam8))
-- bump ui to v6.28.0 [\#15810](https://github.com/netdata/netdata/pull/15810) ([ilyam8](https://github.com/ilyam8))
-- return 412 instead of 403 when a bearer token is required [\#15808](https://github.com/netdata/netdata/pull/15808) ([ktsaou](https://github.com/ktsaou))
-- installer setuid fallback for perf and slabinfo plugins [\#15807](https://github.com/netdata/netdata/pull/15807) ([ilyam8](https://github.com/ilyam8))
-- fix api v1 mgmt/health [\#15806](https://github.com/netdata/netdata/pull/15806) ([underhood](https://github.com/underhood))
-- Fix systemd journal build deps in DEB packages. [\#15805](https://github.com/netdata/netdata/pull/15805) ([Ferroin](https://github.com/Ferroin))
-- Clean up python deps for RPM packages. [\#15804](https://github.com/netdata/netdata/pull/15804) ([Ferroin](https://github.com/Ferroin))
-- Add proper SUID fallback for DEB plugin packages. [\#15803](https://github.com/netdata/netdata/pull/15803) ([Ferroin](https://github.com/Ferroin))
-- nfacct.plugin increase restart time from 4 hours to 1 day [\#15801](https://github.com/netdata/netdata/pull/15801) ([ilyam8](https://github.com/ilyam8))
-- Function systemd-journal: always have a nd\_journal\_process [\#15798](https://github.com/netdata/netdata/pull/15798) ([ktsaou](https://github.com/ktsaou))
-- prevent reporting negative retention when the db is empty [\#15796](https://github.com/netdata/netdata/pull/15796) ([ktsaou](https://github.com/ktsaou))
-- Fix typo in Readme [\#15794](https://github.com/netdata/netdata/pull/15794) ([shyamvalsan](https://github.com/shyamvalsan))
-- fix hpssa handle unassigned drives [\#15793](https://github.com/netdata/netdata/pull/15793) ([ilyam8](https://github.com/ilyam8))
-- count functions as collections, to restart plugins [\#15787](https://github.com/netdata/netdata/pull/15787) ([ktsaou](https://github.com/ktsaou))
-- Set correct path for ansible-playbook in deployment tutorial [\#15786](https://github.com/netdata/netdata/pull/15786) ([novotnyJiri](https://github.com/novotnyJiri))
-- minor Dyncfg mvp0 fixes [\#15785](https://github.com/netdata/netdata/pull/15785) ([underhood](https://github.com/underhood))
-- fix docker-compose example [\#15784](https://github.com/netdata/netdata/pull/15784) ([zhqu1148980644](https://github.com/zhqu1148980644))
-- mark integrations milestones as completed in README.md [\#15783](https://github.com/netdata/netdata/pull/15783) ([tkatsoulas](https://github.com/tkatsoulas))
-- Update an oversight on the openSUSE 15.5 packages [\#15781](https://github.com/netdata/netdata/pull/15781) ([tkatsoulas](https://github.com/tkatsoulas))
-- Bump openssl version of static builds to 1.1.1v [\#15779](https://github.com/netdata/netdata/pull/15779) ([tkatsoulas](https://github.com/tkatsoulas))
-- fix: the cleanup was not performed during the kickstart.sh dry run [\#15775](https://github.com/netdata/netdata/pull/15775) ([ilyam8](https://github.com/ilyam8))
-- don't return `-1` if the socket was closed [\#15771](https://github.com/netdata/netdata/pull/15771) ([moonbreon](https://github.com/moonbreon))
-- Increase alert snapshot chunk size [\#15748](https://github.com/netdata/netdata/pull/15748) ([MrZammler](https://github.com/MrZammler))
-- Added CentOS-Stream to distros [\#15742](https://github.com/netdata/netdata/pull/15742) ([k0ste](https://github.com/k0ste))
-- Unconditionally delete very old models. [\#15720](https://github.com/netdata/netdata/pull/15720) ([vkalintiris](https://github.com/vkalintiris))
-- Misc code cleanup [\#15665](https://github.com/netdata/netdata/pull/15665) ([stelfrag](https://github.com/stelfrag))
-- Metadata cleanup improvements [\#15462](https://github.com/netdata/netdata/pull/15462) ([stelfrag](https://github.com/stelfrag))
+- update bundled UI to v6.66.1 [\#16554](https://github.com/netdata/netdata/pull/16554) ([ilyam8](https://github.com/ilyam8))
+- Improve page validity check during database extent load [\#16552](https://github.com/netdata/netdata/pull/16552) ([stelfrag](https://github.com/stelfrag))
+- Proper Learn-friendly links [\#16547](https://github.com/netdata/netdata/pull/16547) ([Ancairon](https://github.com/Ancairon))
+- docs required for release [\#16546](https://github.com/netdata/netdata/pull/16546) ([ktsaou](https://github.com/ktsaou))
+- Add option to change page type for tier 0 to gorilla [\#16545](https://github.com/netdata/netdata/pull/16545) ([vkalintiris](https://github.com/vkalintiris))
+- fix alpine deps [\#16543](https://github.com/netdata/netdata/pull/16543) ([tkatsoulas](https://github.com/tkatsoulas))
+- change level to debug "took too long to be updated" [\#16540](https://github.com/netdata/netdata/pull/16540) ([ilyam8](https://github.com/ilyam8))
+- apps: fix uptime for groups with 0 processes [\#16538](https://github.com/netdata/netdata/pull/16538) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16536](https://github.com/netdata/netdata/pull/16536) ([netdatabot](https://github.com/netdatabot))
+- Reorg kickstart guide's steps [\#16534](https://github.com/netdata/netdata/pull/16534) ([tkatsoulas](https://github.com/tkatsoulas))
+- update go.d plugin to v0.57.2 [\#16533](https://github.com/netdata/netdata/pull/16533) ([ilyam8](https://github.com/ilyam8))
+- Update getting-started-light-poc.md [\#16532](https://github.com/netdata/netdata/pull/16532) ([M4itee](https://github.com/M4itee))
+- Acquire receiver\_lock to to avoid race condition [\#16531](https://github.com/netdata/netdata/pull/16531) ([stelfrag](https://github.com/stelfrag))
+- link aclk.log to stdout in docker [\#16529](https://github.com/netdata/netdata/pull/16529) ([ilyam8](https://github.com/ilyam8))
+- Update getting-started.md [\#16528](https://github.com/netdata/netdata/pull/16528) ([Ancairon](https://github.com/Ancairon))
+- Make image available to Learn + add a category overview page for new … [\#16527](https://github.com/netdata/netdata/pull/16527) ([Ancairon](https://github.com/Ancairon))
+- logs-management: Disable logs management monitoring section [\#16525](https://github.com/netdata/netdata/pull/16525) ([Dim-P](https://github.com/Dim-P))
+- log method = none is not respected [\#16523](https://github.com/netdata/netdata/pull/16523) ([ktsaou](https://github.com/ktsaou))
+- include more cases for megacli degraded state [\#16522](https://github.com/netdata/netdata/pull/16522) ([ClaraCrazy](https://github.com/ClaraCrazy))
+- update bundled UI to v6.65.0 [\#16520](https://github.com/netdata/netdata/pull/16520) ([ilyam8](https://github.com/ilyam8))
+- log2journal improvements 5 [\#16519](https://github.com/netdata/netdata/pull/16519) ([ktsaou](https://github.com/ktsaou))
+- change log level to debug for dbengine routine operations on start [\#16518](https://github.com/netdata/netdata/pull/16518) ([ilyam8](https://github.com/ilyam8))
+- remove system info logging [\#16517](https://github.com/netdata/netdata/pull/16517) ([ilyam8](https://github.com/ilyam8))
+- python.d: logger: remove timestamp when logging to journald. [\#16516](https://github.com/netdata/netdata/pull/16516) ([ilyam8](https://github.com/ilyam8))
+- python.d: mute stock jobs logging during check\(\) [\#16515](https://github.com/netdata/netdata/pull/16515) ([ilyam8](https://github.com/ilyam8))
+- logs-management: Add prefix to chart names [\#16514](https://github.com/netdata/netdata/pull/16514) ([Dim-P](https://github.com/Dim-P))
+- docs: add with-systemd-units-monitoring example to docker [\#16513](https://github.com/netdata/netdata/pull/16513) ([ilyam8](https://github.com/ilyam8))
+- apps: fix "has aggregated" debug output [\#16512](https://github.com/netdata/netdata/pull/16512) ([ilyam8](https://github.com/ilyam8))
+- log2journal improvements 4 [\#16510](https://github.com/netdata/netdata/pull/16510) ([ktsaou](https://github.com/ktsaou))
+- journal improvements part 3 [\#16509](https://github.com/netdata/netdata/pull/16509) ([ktsaou](https://github.com/ktsaou))
+- convert some error messages to info [\#16508](https://github.com/netdata/netdata/pull/16508) ([ilyam8](https://github.com/ilyam8))
+- Resolve coverity issue 410232 [\#16507](https://github.com/netdata/netdata/pull/16507) ([stelfrag](https://github.com/stelfrag))
+- convert some error messages to info [\#16505](https://github.com/netdata/netdata/pull/16505) ([ilyam8](https://github.com/ilyam8))
+- diskspace/diskstats: don't create runtime disk config by default [\#16503](https://github.com/netdata/netdata/pull/16503) ([ilyam8](https://github.com/ilyam8))
+- Fix CID 410152 Dereference after null check [\#16502](https://github.com/netdata/netdata/pull/16502) ([stelfrag](https://github.com/stelfrag))
+- proc\_net\_dev: don't create runtime device config by default [\#16501](https://github.com/netdata/netdata/pull/16501) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16500](https://github.com/netdata/netdata/pull/16500) ([netdatabot](https://github.com/netdatabot))
+- remove discourse badge from readme [\#16499](https://github.com/netdata/netdata/pull/16499) ([ilyam8](https://github.com/ilyam8))
+- add curl example to create\_netdata\_conf\(\) [\#16498](https://github.com/netdata/netdata/pull/16498) ([ilyam8](https://github.com/ilyam8))
+- add /var/log mount to docker [\#16496](https://github.com/netdata/netdata/pull/16496) ([ilyam8](https://github.com/ilyam8))
+- Fix occasional shutdown deadlock [\#16495](https://github.com/netdata/netdata/pull/16495) ([stelfrag](https://github.com/stelfrag))
+- Log2journal improvements part2 [\#16494](https://github.com/netdata/netdata/pull/16494) ([ktsaou](https://github.com/ktsaou))
+- proc\_net\_dev: remove device config section [\#16492](https://github.com/netdata/netdata/pull/16492) ([ilyam8](https://github.com/ilyam8))
+- Spelling fixes to documentation [\#16490](https://github.com/netdata/netdata/pull/16490) ([M4itee](https://github.com/M4itee))
+- Fix builds on macOS due to missing endianness functions [\#16489](https://github.com/netdata/netdata/pull/16489) ([vkalintiris](https://github.com/vkalintiris))
+- log2journal: added missing yaml elements [\#16488](https://github.com/netdata/netdata/pull/16488) ([ktsaou](https://github.com/ktsaou))
+- When unregistering an ephemeral host, delete its chart labels [\#16486](https://github.com/netdata/netdata/pull/16486) ([stelfrag](https://github.com/stelfrag))
+- logs-management: Add option to submit logs to system journal [\#16485](https://github.com/netdata/netdata/pull/16485) ([Dim-P](https://github.com/Dim-P))
+- logs-management: Add function cancellability [\#16484](https://github.com/netdata/netdata/pull/16484) ([Dim-P](https://github.com/Dim-P))
+- Fix incorrect DEB package build dep. [\#16483](https://github.com/netdata/netdata/pull/16483) ([Ferroin](https://github.com/Ferroin))
+- Bump new version to cov-analysis tool [\#16482](https://github.com/netdata/netdata/pull/16482) ([tkatsoulas](https://github.com/tkatsoulas))
+- log2journal moved to collectors [\#16481](https://github.com/netdata/netdata/pull/16481) ([ktsaou](https://github.com/ktsaou))
+- Disable netdata monitoring section by default [\#16480](https://github.com/netdata/netdata/pull/16480) ([MrZammler](https://github.com/MrZammler))
+- Log2journal yaml configuration support [\#16479](https://github.com/netdata/netdata/pull/16479) ([ktsaou](https://github.com/ktsaou))
+- log alarm notifications to health.log [\#16476](https://github.com/netdata/netdata/pull/16476) ([ktsaou](https://github.com/ktsaou))
+- journals management improvements [\#16475](https://github.com/netdata/netdata/pull/16475) ([ktsaou](https://github.com/ktsaou))
+- SEO changes for Collector names [\#16473](https://github.com/netdata/netdata/pull/16473) ([sashwathn](https://github.com/sashwathn))
+- Check context post processing queue before sending status to cloud [\#16472](https://github.com/netdata/netdata/pull/16472) ([stelfrag](https://github.com/stelfrag))
+- fix charts.d plugin loading configuration [\#16471](https://github.com/netdata/netdata/pull/16471) ([ilyam8](https://github.com/ilyam8))
+- Fix error limit to respect the log every [\#16469](https://github.com/netdata/netdata/pull/16469) ([stelfrag](https://github.com/stelfrag))
+- Journal better estimations and watcher [\#16467](https://github.com/netdata/netdata/pull/16467) ([ktsaou](https://github.com/ktsaou))
+- update go.d plugin version to v0.57.1 [\#16465](https://github.com/netdata/netdata/pull/16465) ([ilyam8](https://github.com/ilyam8))
+- Add option to disable ML. [\#16463](https://github.com/netdata/netdata/pull/16463) ([vkalintiris](https://github.com/vkalintiris))
+- fix analytics logs [\#16462](https://github.com/netdata/netdata/pull/16462) ([ktsaou](https://github.com/ktsaou))
+- fix logs bashism [\#16461](https://github.com/netdata/netdata/pull/16461) ([ktsaou](https://github.com/ktsaou))
+- fix log2journal incorrect log [\#16460](https://github.com/netdata/netdata/pull/16460) ([ktsaou](https://github.com/ktsaou))
+- fixes for logging [\#16459](https://github.com/netdata/netdata/pull/16459) ([ktsaou](https://github.com/ktsaou))
+- when the namespace socket does not work, continue trying [\#16458](https://github.com/netdata/netdata/pull/16458) ([ktsaou](https://github.com/ktsaou))
+- set journal path for logging [\#16457](https://github.com/netdata/netdata/pull/16457) ([ktsaou](https://github.com/ktsaou))
+- add sbindir\_POST to PATH of bash scripts that use `systemd-cat-native` [\#16456](https://github.com/netdata/netdata/pull/16456) ([ilyam8](https://github.com/ilyam8))
+- add LogNamespace to systemd units [\#16454](https://github.com/netdata/netdata/pull/16454) ([ilyam8](https://github.com/ilyam8))
+- Update non-zero uuid key + child conf. [\#16452](https://github.com/netdata/netdata/pull/16452) ([vkalintiris](https://github.com/vkalintiris))
+- Add missing argument. [\#16451](https://github.com/netdata/netdata/pull/16451) ([vkalintiris](https://github.com/vkalintiris))
+- log flood protection to 1000 log lines / 1 minute [\#16450](https://github.com/netdata/netdata/pull/16450) ([ilyam8](https://github.com/ilyam8))
+- Code cleanup [\#16448](https://github.com/netdata/netdata/pull/16448) ([stelfrag](https://github.com/stelfrag))
+- fix: link daemon.log to stderr in docker [\#16447](https://github.com/netdata/netdata/pull/16447) ([ilyam8](https://github.com/ilyam8))
+- Doc change: Curl no longer supports spaces in the URL. [\#16446](https://github.com/netdata/netdata/pull/16446) ([luisj1983](https://github.com/luisj1983))
+- journal estimations [\#16445](https://github.com/netdata/netdata/pull/16445) ([ktsaou](https://github.com/ktsaou))
+- journal startup [\#16443](https://github.com/netdata/netdata/pull/16443) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16442](https://github.com/netdata/netdata/pull/16442) ([netdatabot](https://github.com/netdatabot))
+- Fix icon filename [\#16441](https://github.com/netdata/netdata/pull/16441) ([shyamvalsan](https://github.com/shyamvalsan))
+- On-Prem documentation full and light [\#16440](https://github.com/netdata/netdata/pull/16440) ([M4itee](https://github.com/M4itee))
+- Minor: Small health docs typo fix [\#16439](https://github.com/netdata/netdata/pull/16439) ([MrZammler](https://github.com/MrZammler))
+- Removes Observabilitycon banner README.md [\#16434](https://github.com/netdata/netdata/pull/16434) ([Aliki92](https://github.com/Aliki92))
+- Journal sampling [\#16433](https://github.com/netdata/netdata/pull/16433) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16431](https://github.com/netdata/netdata/pull/16431) ([netdatabot](https://github.com/netdatabot))
+- Regenerate integrations.js [\#16430](https://github.com/netdata/netdata/pull/16430) ([netdatabot](https://github.com/netdatabot))
+- proc\_net\_dev: keep nic\_speed\_max in kilobits [\#16429](https://github.com/netdata/netdata/pull/16429) ([ilyam8](https://github.com/ilyam8))
+- update go.d plugin to v0.57.0 [\#16427](https://github.com/netdata/netdata/pull/16427) ([ilyam8](https://github.com/ilyam8))
+- Adds config info for Telegram cloud notification [\#16424](https://github.com/netdata/netdata/pull/16424) ([juacker](https://github.com/juacker))
+- Minor: Remove backtick from doc [\#16423](https://github.com/netdata/netdata/pull/16423) ([MrZammler](https://github.com/MrZammler))
+- Update netdata-functions.md [\#16421](https://github.com/netdata/netdata/pull/16421) ([shyamvalsan](https://github.com/shyamvalsan))
+- disable socket port reuse [\#16420](https://github.com/netdata/netdata/pull/16420) ([ilyam8](https://github.com/ilyam8))
+- fix proc net dev: keep iface speed chart var in Mbits [\#16418](https://github.com/netdata/netdata/pull/16418) ([ilyam8](https://github.com/ilyam8))
+- Don't print errors from reading filtered alerts [\#16417](https://github.com/netdata/netdata/pull/16417) ([MrZammler](https://github.com/MrZammler))
+- /api/v1/charts: bring back chart id to `title` [\#16416](https://github.com/netdata/netdata/pull/16416) ([ilyam8](https://github.com/ilyam8))
+- fix: don't count reused connections as new [\#16414](https://github.com/netdata/netdata/pull/16414) ([ilyam8](https://github.com/ilyam8))
+- Add support for installing a specific major version of the agent on install. [\#16413](https://github.com/netdata/netdata/pull/16413) ([Ferroin](https://github.com/Ferroin))
+- Remove queue limit from ACLK sync event loop [\#16411](https://github.com/netdata/netdata/pull/16411) ([stelfrag](https://github.com/stelfrag))
+- Regenerate integrations.js [\#16409](https://github.com/netdata/netdata/pull/16409) ([netdatabot](https://github.com/netdatabot))
+- Improve handling around EPEL requirement for RPM packages. [\#16406](https://github.com/netdata/netdata/pull/16406) ([Ferroin](https://github.com/Ferroin))
+- Fix typo in metadata \(eBPF\) [\#16405](https://github.com/netdata/netdata/pull/16405) ([thiagoftsm](https://github.com/thiagoftsm))
+- docker: use /host/etc/hostname if mounted [\#16401](https://github.com/netdata/netdata/pull/16401) ([ilyam8](https://github.com/ilyam8))
+- adaptec\_raid: fix parsing PD without NCQ status [\#16400](https://github.com/netdata/netdata/pull/16400) ([ilyam8](https://github.com/ilyam8))
+- eBPF apps order [\#16395](https://github.com/netdata/netdata/pull/16395) ([thiagoftsm](https://github.com/thiagoftsm))
+- fix systemd-units func expiration time [\#16393](https://github.com/netdata/netdata/pull/16393) ([ilyam8](https://github.com/ilyam8))
+- docker: mount /etc/localtime [\#16392](https://github.com/netdata/netdata/pull/16392) ([ilyam8](https://github.com/ilyam8))
+- fix "differ in signedness" warn in cgroup [\#16391](https://github.com/netdata/netdata/pull/16391) ([ilyam8](https://github.com/ilyam8))
+- fix v0 dashboard [\#16389](https://github.com/netdata/netdata/pull/16389) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16386](https://github.com/netdata/netdata/pull/16386) ([netdatabot](https://github.com/netdatabot))
+- skip spaces when reading cpuset [\#16385](https://github.com/netdata/netdata/pull/16385) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16384](https://github.com/netdata/netdata/pull/16384) ([netdatabot](https://github.com/netdatabot))
+- use pre-configured message\_ids to identify common logs [\#16383](https://github.com/netdata/netdata/pull/16383) ([ktsaou](https://github.com/ktsaou))
+- Handle ephemeral hosts [\#16381](https://github.com/netdata/netdata/pull/16381) ([stelfrag](https://github.com/stelfrag))
+- docs: remove 'families' from health reference [\#16380](https://github.com/netdata/netdata/pull/16380) ([ilyam8](https://github.com/ilyam8))
+- fix cloud aws sns notification meta [\#16379](https://github.com/netdata/netdata/pull/16379) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16378](https://github.com/netdata/netdata/pull/16378) ([netdatabot](https://github.com/netdatabot))
+- update bundled UI to v6.59.0 [\#16377](https://github.com/netdata/netdata/pull/16377) ([ilyam8](https://github.com/ilyam8))
+- health guides: remove guides for alerts that don't exist in the repo [\#16375](https://github.com/netdata/netdata/pull/16375) ([ilyam8](https://github.com/ilyam8))
+- add pids current to cgroups meta [\#16374](https://github.com/netdata/netdata/pull/16374) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16373](https://github.com/netdata/netdata/pull/16373) ([netdatabot](https://github.com/netdatabot))
+- docs: remove unused cloud notification methods mds [\#16372](https://github.com/netdata/netdata/pull/16372) ([ilyam8](https://github.com/ilyam8))
+- Add configuration documentation for Cloud AWS SNS [\#16371](https://github.com/netdata/netdata/pull/16371) ([car12o](https://github.com/car12o))
+- pacakging: add zstd dev to install-required-packages [\#16370](https://github.com/netdata/netdata/pull/16370) ([ilyam8](https://github.com/ilyam8))
+- cgroups: collect pids/pids.current [\#16369](https://github.com/netdata/netdata/pull/16369) ([ilyam8](https://github.com/ilyam8))
+- docs: Correct time unit for tier 2 explanation [\#16368](https://github.com/netdata/netdata/pull/16368) ([sepek](https://github.com/sepek))
+- cgroups: fix throttle\_duration chart context [\#16367](https://github.com/netdata/netdata/pull/16367) ([ilyam8](https://github.com/ilyam8))
+- Introduce agent release metadata pipelines [\#16366](https://github.com/netdata/netdata/pull/16366) ([tkatsoulas](https://github.com/tkatsoulas))
+- fix system.net when inside lxc [\#16364](https://github.com/netdata/netdata/pull/16364) ([ilyam8](https://github.com/ilyam8))
+- collectors/freeipmi: add ipmi-sensors function [\#16363](https://github.com/netdata/netdata/pull/16363) ([ilyam8](https://github.com/ilyam8))
+- Add assorted improvements to the version policy draft. [\#16362](https://github.com/netdata/netdata/pull/16362) ([Ferroin](https://github.com/Ferroin))
+- Add a apcupsd status code metric [\#16361](https://github.com/netdata/netdata/pull/16361) ([thomasbeaudry](https://github.com/thomasbeaudry))
+- Switch alarm\_log to use the buffer json functions [\#16360](https://github.com/netdata/netdata/pull/16360) ([stelfrag](https://github.com/stelfrag))
+- Switch charts / chart to use buffer json functions [\#16359](https://github.com/netdata/netdata/pull/16359) ([stelfrag](https://github.com/stelfrag))
+- health: put guides into subdirs [\#16358](https://github.com/netdata/netdata/pull/16358) ([ilyam8](https://github.com/ilyam8))
+- New logging layer [\#16357](https://github.com/netdata/netdata/pull/16357) ([ktsaou](https://github.com/ktsaou))
+- Import alert guides from Netdata Assistant [\#16355](https://github.com/netdata/netdata/pull/16355) ([ralphm](https://github.com/ralphm))
+- update bundle UI to v6.58.5 [\#16354](https://github.com/netdata/netdata/pull/16354) ([ilyam8](https://github.com/ilyam8))
+- Update CODEOWNERS [\#16353](https://github.com/netdata/netdata/pull/16353) ([Ancairon](https://github.com/Ancairon))
+- Copy outdated alert guides to health/guides [\#16352](https://github.com/netdata/netdata/pull/16352) ([Ancairon](https://github.com/Ancairon))
+- Replace rrdset\_is\_obsolete & rrdset\_isnot\_obsolete [\#16351](https://github.com/netdata/netdata/pull/16351) ([MrZammler](https://github.com/MrZammler))
+- fix zstd in static build [\#16349](https://github.com/netdata/netdata/pull/16349) ([ilyam8](https://github.com/ilyam8))
+- add rrddim\_get\_last\_stored\_value to simplify function code in internal collectors [\#16348](https://github.com/netdata/netdata/pull/16348) ([ilyam8](https://github.com/ilyam8))
+- change defaults for functions [\#16347](https://github.com/netdata/netdata/pull/16347) ([ktsaou](https://github.com/ktsaou))
+- give the streaming function to nightly users [\#16346](https://github.com/netdata/netdata/pull/16346) ([ktsaou](https://github.com/ktsaou))
+- diskspace: add mount-points function [\#16345](https://github.com/netdata/netdata/pull/16345) ([ilyam8](https://github.com/ilyam8))
+- Update packaging instructions [\#16344](https://github.com/netdata/netdata/pull/16344) ([tkatsoulas](https://github.com/tkatsoulas))
+- Better database corruption detention during runtime [\#16343](https://github.com/netdata/netdata/pull/16343) ([stelfrag](https://github.com/stelfrag))
+- Improve agent to cloud status update process [\#16342](https://github.com/netdata/netdata/pull/16342) ([stelfrag](https://github.com/stelfrag))
+- h2o add api/v2 support [\#16340](https://github.com/netdata/netdata/pull/16340) ([underhood](https://github.com/underhood))
+- proc/diskstats: add block-devices function [\#16338](https://github.com/netdata/netdata/pull/16338) ([ilyam8](https://github.com/ilyam8))
+- network-interfaces function: add UsedBy field to [\#16337](https://github.com/netdata/netdata/pull/16337) ([ilyam8](https://github.com/ilyam8))
+- Network-interfaces function small improvements [\#16336](https://github.com/netdata/netdata/pull/16336) ([ilyam8](https://github.com/ilyam8))
+- proc netstat: add network interface statistics function [\#16334](https://github.com/netdata/netdata/pull/16334) ([ilyam8](https://github.com/ilyam8))
+- systemd-units improvements [\#16333](https://github.com/netdata/netdata/pull/16333) ([ktsaou](https://github.com/ktsaou))
+- cleanup systemd unit files After [\#16332](https://github.com/netdata/netdata/pull/16332) ([ilyam8](https://github.com/ilyam8))
+- fix: check for null rrdim in cgroup functions [\#16331](https://github.com/netdata/netdata/pull/16331) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16330](https://github.com/netdata/netdata/pull/16330) ([netdatabot](https://github.com/netdatabot))
+- Improve unittests [\#16329](https://github.com/netdata/netdata/pull/16329) ([stelfrag](https://github.com/stelfrag))
+- fix coverity warnings in cgroups [\#16328](https://github.com/netdata/netdata/pull/16328) ([ilyam8](https://github.com/ilyam8))
+- Fix readme images [\#16327](https://github.com/netdata/netdata/pull/16327) ([Ancairon](https://github.com/Ancairon))
+- integrations: fix nightly tag in helm deploy [\#16326](https://github.com/netdata/netdata/pull/16326) ([ilyam8](https://github.com/ilyam8))
+- rename newly added functions [\#16325](https://github.com/netdata/netdata/pull/16325) ([ktsaou](https://github.com/ktsaou))
+- Added section Blog posts README.md [\#16323](https://github.com/netdata/netdata/pull/16323) ([Aliki92](https://github.com/Aliki92))
+- Keep precompiled statements for alarm log queries to improve performance [\#16321](https://github.com/netdata/netdata/pull/16321) ([stelfrag](https://github.com/stelfrag))
+- Fix README images [\#16320](https://github.com/netdata/netdata/pull/16320) ([Ancairon](https://github.com/Ancairon))
+- Fix journal file index when collision is detected [\#16319](https://github.com/netdata/netdata/pull/16319) ([stelfrag](https://github.com/stelfrag))
+- Systemd units function [\#16318](https://github.com/netdata/netdata/pull/16318) ([ktsaou](https://github.com/ktsaou))
+- Optimize database before agent shutdown [\#16317](https://github.com/netdata/netdata/pull/16317) ([stelfrag](https://github.com/stelfrag))
+- `tcp_v6_connect` monitoring [\#16316](https://github.com/netdata/netdata/pull/16316) ([thiagoftsm](https://github.com/thiagoftsm))
+- Improve shutdown when collectors are active [\#16315](https://github.com/netdata/netdata/pull/16315) ([stelfrag](https://github.com/stelfrag))
+- cgroup-top function [\#16314](https://github.com/netdata/netdata/pull/16314) ([ktsaou](https://github.com/ktsaou))
+- Add a note for the docker deployment alongside with cetus [\#16312](https://github.com/netdata/netdata/pull/16312) ([tkatsoulas](https://github.com/tkatsoulas))
+- Update ObservabilityCon README.md [\#16311](https://github.com/netdata/netdata/pull/16311) ([Aliki92](https://github.com/Aliki92))
+- update docker swarm deploy info [\#16308](https://github.com/netdata/netdata/pull/16308) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16306](https://github.com/netdata/netdata/pull/16306) ([netdatabot](https://github.com/netdatabot))
+- Use proper icons for deploy integrations [\#16305](https://github.com/netdata/netdata/pull/16305) ([Ancairon](https://github.com/Ancairon))
+- bump openssl for static in 3.1.4 [\#16303](https://github.com/netdata/netdata/pull/16303) ([tkatsoulas](https://github.com/tkatsoulas))
+- claim.sh: use echo instead of /bin/echo [\#16300](https://github.com/netdata/netdata/pull/16300) ([ilyam8](https://github.com/ilyam8))
+- update journal sources once per minute [\#16298](https://github.com/netdata/netdata/pull/16298) ([ktsaou](https://github.com/ktsaou))
+- Fix label copy [\#16297](https://github.com/netdata/netdata/pull/16297) ([stelfrag](https://github.com/stelfrag))
+- fix missing labels from parents [\#16296](https://github.com/netdata/netdata/pull/16296) ([ktsaou](https://github.com/ktsaou))
+- do not propagate upstream internal label sources [\#16295](https://github.com/netdata/netdata/pull/16295) ([ktsaou](https://github.com/ktsaou))
+- fix various issues identified by coverity [\#16294](https://github.com/netdata/netdata/pull/16294) ([ktsaou](https://github.com/ktsaou))
+- fix missing labels from parents [\#16293](https://github.com/netdata/netdata/pull/16293) ([ktsaou](https://github.com/ktsaou))
+- fix renames in freebsd [\#16292](https://github.com/netdata/netdata/pull/16292) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16291](https://github.com/netdata/netdata/pull/16291) ([netdatabot](https://github.com/netdatabot))
+- fix retention loading [\#16290](https://github.com/netdata/netdata/pull/16290) ([ktsaou](https://github.com/ktsaou))
+- integrations: yes/no instead of True/False in tables [\#16289](https://github.com/netdata/netdata/pull/16289) ([ilyam8](https://github.com/ilyam8))
+- typo fixed in gen\_docs\_integrations.py [\#16288](https://github.com/netdata/netdata/pull/16288) ([khalid586](https://github.com/khalid586))
+- Brotli streaming compression [\#16287](https://github.com/netdata/netdata/pull/16287) ([ktsaou](https://github.com/ktsaou))
+- Apcupsd selftest metric [\#16286](https://github.com/netdata/netdata/pull/16286) ([thomasbeaudry](https://github.com/thomasbeaudry))
+- Fix 404s in markdown files [\#16285](https://github.com/netdata/netdata/pull/16285) ([Ancairon](https://github.com/Ancairon))
+- Regenerate integrations.js [\#16284](https://github.com/netdata/netdata/pull/16284) ([netdatabot](https://github.com/netdatabot))
+- Small optimization of alert queries [\#16282](https://github.com/netdata/netdata/pull/16282) ([MrZammler](https://github.com/MrZammler))
+- update go.d version to 0.56.4 [\#16281](https://github.com/netdata/netdata/pull/16281) ([ilyam8](https://github.com/ilyam8))
+- update bundled UI to v6.57.0 [\#16277](https://github.com/netdata/netdata/pull/16277) ([ilyam8](https://github.com/ilyam8))
+- Remove semicolons from strings [\#16276](https://github.com/netdata/netdata/pull/16276) ([Ancairon](https://github.com/Ancairon))
+- Prevent wrong optimization armv7l static build [\#16274](https://github.com/netdata/netdata/pull/16274) ([stelfrag](https://github.com/stelfrag))
+- local\_listeners: add cmd args for reading specific files [\#16273](https://github.com/netdata/netdata/pull/16273) ([ilyam8](https://github.com/ilyam8))
+- DYNCFG fix REPORT\_JOB\_STATUS streaming [\#16272](https://github.com/netdata/netdata/pull/16272) ([underhood](https://github.com/underhood))
+- fix sources match [\#16271](https://github.com/netdata/netdata/pull/16271) ([ktsaou](https://github.com/ktsaou))
+- Add an obsoletion time for statsd private charts [\#16269](https://github.com/netdata/netdata/pull/16269) ([MrZammler](https://github.com/MrZammler))
+- ZSTD and GZIP/DEFLATE streaming support [\#16268](https://github.com/netdata/netdata/pull/16268) ([ktsaou](https://github.com/ktsaou))
+- journal minor updates [\#16267](https://github.com/netdata/netdata/pull/16267) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16266](https://github.com/netdata/netdata/pull/16266) ([netdatabot](https://github.com/netdatabot))
+- Fix coverity issue 403725 [\#16265](https://github.com/netdata/netdata/pull/16265) ([stelfrag](https://github.com/stelfrag))
+- SUBSTRING simple patterns fix [\#16264](https://github.com/netdata/netdata/pull/16264) ([ktsaou](https://github.com/ktsaou))
+- QUERIES: use tiers only when they have useful data [\#16263](https://github.com/netdata/netdata/pull/16263) ([ktsaou](https://github.com/ktsaou))
+- Improve dimension ML model load [\#16262](https://github.com/netdata/netdata/pull/16262) ([stelfrag](https://github.com/stelfrag))
+- cgroup: add net container\_device label [\#16261](https://github.com/netdata/netdata/pull/16261) ([ilyam8](https://github.com/ilyam8))
+- Replace distutils with packaging for version [\#16259](https://github.com/netdata/netdata/pull/16259) ([MrZammler](https://github.com/MrZammler))
+- Regenerate integrations.js [\#16258](https://github.com/netdata/netdata/pull/16258) ([netdatabot](https://github.com/netdatabot))
+- Fix Discord webhook payload [\#16257](https://github.com/netdata/netdata/pull/16257) ([luchaos](https://github.com/luchaos))
+- Fix HAProxy server status parsing and add MAINT status chart [\#16253](https://github.com/netdata/netdata/pull/16253) ([seniorquico](https://github.com/seniorquico))
+- Journal multiple sources [\#16252](https://github.com/netdata/netdata/pull/16252) ([ktsaou](https://github.com/ktsaou))
+- `most_popular` on markdown metadata for integrations [\#16251](https://github.com/netdata/netdata/pull/16251) ([Ancairon](https://github.com/Ancairon))
+- Dyncfg improvements [\#16250](https://github.com/netdata/netdata/pull/16250) ([ktsaou](https://github.com/ktsaou))
+- Fix label copy to correctly handle duplicate keys [\#16249](https://github.com/netdata/netdata/pull/16249) ([stelfrag](https://github.com/stelfrag))
+- added systemd-journal forward\_secure\_sealing [\#16247](https://github.com/netdata/netdata/pull/16247) ([ktsaou](https://github.com/ktsaou))
+- Terminate cgroups discovery thread faster during shutdown [\#16246](https://github.com/netdata/netdata/pull/16246) ([stelfrag](https://github.com/stelfrag))
+- python.d\(smartd\_log\): collect Total LBAs written/read [\#16245](https://github.com/netdata/netdata/pull/16245) ([watsonbox](https://github.com/watsonbox))
+- fix apps plugin metric names in meta [\#16243](https://github.com/netdata/netdata/pull/16243) ([ilyam8](https://github.com/ilyam8))
+- Drop an unused index from aclk\_alert table [\#16242](https://github.com/netdata/netdata/pull/16242) ([stelfrag](https://github.com/stelfrag))
+- add DYNCFG\_RESET [\#16241](https://github.com/netdata/netdata/pull/16241) ([underhood](https://github.com/underhood))
+- Reuse ML load prepared statement [\#16240](https://github.com/netdata/netdata/pull/16240) ([stelfrag](https://github.com/stelfrag))
+- update bundled UI to v6.53.0 [\#16239](https://github.com/netdata/netdata/pull/16239) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16237](https://github.com/netdata/netdata/pull/16237) ([netdatabot](https://github.com/netdatabot))
+- Active journal centralization guide no encryption [\#16236](https://github.com/netdata/netdata/pull/16236) ([tkatsoulas](https://github.com/tkatsoulas))
+- journal: script to generate self-signed-certificates [\#16235](https://github.com/netdata/netdata/pull/16235) ([ktsaou](https://github.com/ktsaou))
+- Fix dimension HETEROGENEOUS check [\#16234](https://github.com/netdata/netdata/pull/16234) ([stelfrag](https://github.com/stelfrag))
+- uninstaller: remove /etc/cron.d/netdata-updater-daily [\#16233](https://github.com/netdata/netdata/pull/16233) ([ilyam8](https://github.com/ilyam8))
+- Add Erlang to Apps configuration [\#16231](https://github.com/netdata/netdata/pull/16231) ([andyundso](https://github.com/andyundso))
+- remove charts.d/nut [\#16230](https://github.com/netdata/netdata/pull/16230) ([ilyam8](https://github.com/ilyam8))
+- kickstart: rename auto-update-method to auto-update-type [\#16229](https://github.com/netdata/netdata/pull/16229) ([ilyam8](https://github.com/ilyam8))
+- update go.d plugin version to v0.56.3 [\#16228](https://github.com/netdata/netdata/pull/16228) ([ilyam8](https://github.com/ilyam8))
+- Add document outlining our versioning policy and public API. [\#16227](https://github.com/netdata/netdata/pull/16227) ([Ferroin](https://github.com/Ferroin))
+- Changes to `systemd-journal` docs [\#16225](https://github.com/netdata/netdata/pull/16225) ([Ancairon](https://github.com/Ancairon))
+- Fix statistics calculation in 32bit systems [\#16222](https://github.com/netdata/netdata/pull/16222) ([stelfrag](https://github.com/stelfrag))
+- Fix meta unittest [\#16221](https://github.com/netdata/netdata/pull/16221) ([stelfrag](https://github.com/stelfrag))
+- facets: minimize hashtable collisions [\#16215](https://github.com/netdata/netdata/pull/16215) ([ktsaou](https://github.com/ktsaou))
+- Removing support for Alpine 3.15 [\#16205](https://github.com/netdata/netdata/pull/16205) ([tkatsoulas](https://github.com/tkatsoulas))
+- Improve context load on startup [\#16203](https://github.com/netdata/netdata/pull/16203) ([stelfrag](https://github.com/stelfrag))
+- cgroup-network: don't log an error opening pid file if doesn't exist [\#16196](https://github.com/netdata/netdata/pull/16196) ([ilyam8](https://github.com/ilyam8))
+- docker install: support for Proxmox vms/containers name resolution [\#16193](https://github.com/netdata/netdata/pull/16193) ([ilyam8](https://github.com/ilyam8))
+- Introduce workflow to always update bundled packages \(static builds\) into their latest release \(part1\) [\#16191](https://github.com/netdata/netdata/pull/16191) ([tkatsoulas](https://github.com/tkatsoulas))
+- Improvements for labels handling [\#16172](https://github.com/netdata/netdata/pull/16172) ([stelfrag](https://github.com/stelfrag))
+- Split chars \(eBPF \<-\> Apps integration\) [\#16139](https://github.com/netdata/netdata/pull/16139) ([thiagoftsm](https://github.com/thiagoftsm))
+- Faster parents [\#16127](https://github.com/netdata/netdata/pull/16127) ([ktsaou](https://github.com/ktsaou))
+- Update info about custom dashboards [\#16121](https://github.com/netdata/netdata/pull/16121) ([elizabyte8](https://github.com/elizabyte8))
+- Add info to native packages docs about mirroring our repos. [\#16069](https://github.com/netdata/netdata/pull/16069) ([Ferroin](https://github.com/Ferroin))
+- shutdown while waiting for collectors to finish [\#16023](https://github.com/netdata/netdata/pull/16023) ([ktsaou](https://github.com/ktsaou))
+
+## [v1.43.2](https://github.com/netdata/netdata/tree/v1.43.2) (2023-10-30)
+
+[Full Changelog](https://github.com/netdata/netdata/compare/v1.43.1...v1.43.2)
+
+## [v1.43.1](https://github.com/netdata/netdata/tree/v1.43.1) (2023-10-26)
+
+[Full Changelog](https://github.com/netdata/netdata/compare/v1.43.0...v1.43.1)
+
+## [v1.43.0](https://github.com/netdata/netdata/tree/v1.43.0) (2023-10-16)
+
+[Full Changelog](https://github.com/netdata/netdata/compare/v1.42.4...v1.43.0)
+
+**Merged pull requests:**
+
+- update bundled UI to v6.52.2 [\#16219](https://github.com/netdata/netdata/pull/16219) ([ilyam8](https://github.com/ilyam8))
+- dynamic meta queue size [\#16218](https://github.com/netdata/netdata/pull/16218) ([ktsaou](https://github.com/ktsaou))
+- update bundled UI to v6.52.1 [\#16217](https://github.com/netdata/netdata/pull/16217) ([ilyam8](https://github.com/ilyam8))
+- update bundled UI to v6.52.0 [\#16216](https://github.com/netdata/netdata/pull/16216) ([ilyam8](https://github.com/ilyam8))
+- disable logging to syslog by default [\#16214](https://github.com/netdata/netdata/pull/16214) ([ilyam8](https://github.com/ilyam8))
+- add summary to /alerts [\#16213](https://github.com/netdata/netdata/pull/16213) ([MrZammler](https://github.com/MrZammler))
+- registry action hello should always work [\#16212](https://github.com/netdata/netdata/pull/16212) ([ktsaou](https://github.com/ktsaou))
+- apps: fix divide by zero when calc avg uptime [\#16211](https://github.com/netdata/netdata/pull/16211) ([ilyam8](https://github.com/ilyam8))
+- allow patterns in journal queries [\#16210](https://github.com/netdata/netdata/pull/16210) ([ktsaou](https://github.com/ktsaou))
+- ui-6.51.0 [\#16208](https://github.com/netdata/netdata/pull/16208) ([ktsaou](https://github.com/ktsaou))
+- add order in available histograms [\#16204](https://github.com/netdata/netdata/pull/16204) ([ktsaou](https://github.com/ktsaou))
+- update ui to 6.50.2 again [\#16202](https://github.com/netdata/netdata/pull/16202) ([ktsaou](https://github.com/ktsaou))
+- update ui to 6.50.2 [\#16201](https://github.com/netdata/netdata/pull/16201) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16200](https://github.com/netdata/netdata/pull/16200) ([netdatabot](https://github.com/netdatabot))
+- health: attach drops ratio alarms to net.drops [\#16199](https://github.com/netdata/netdata/pull/16199) ([ilyam8](https://github.com/ilyam8))
+- apps: always expose "other" group [\#16198](https://github.com/netdata/netdata/pull/16198) ([ilyam8](https://github.com/ilyam8))
+- journal timeout [\#16195](https://github.com/netdata/netdata/pull/16195) ([ktsaou](https://github.com/ktsaou))
+- systemd-journal timeout to 55 secs [\#16194](https://github.com/netdata/netdata/pull/16194) ([ktsaou](https://github.com/ktsaou))
+- update bundled UI to v6.49.0 [\#16192](https://github.com/netdata/netdata/pull/16192) ([ilyam8](https://github.com/ilyam8))
+- Faster facets [\#16190](https://github.com/netdata/netdata/pull/16190) ([ktsaou](https://github.com/ktsaou))
+- Journal updates [\#16189](https://github.com/netdata/netdata/pull/16189) ([ktsaou](https://github.com/ktsaou))
+- Add agent version on startup [\#16188](https://github.com/netdata/netdata/pull/16188) ([stelfrag](https://github.com/stelfrag))
+- Suppress "families" log [\#16186](https://github.com/netdata/netdata/pull/16186) ([stelfrag](https://github.com/stelfrag))
+- Fix access of memory after free [\#16185](https://github.com/netdata/netdata/pull/16185) ([stelfrag](https://github.com/stelfrag))
+- functions columns [\#16184](https://github.com/netdata/netdata/pull/16184) ([ktsaou](https://github.com/ktsaou))
+- disable \_go\_build in centos 8 & 9 [\#16183](https://github.com/netdata/netdata/pull/16183) ([tkatsoulas](https://github.com/tkatsoulas))
+- Regenerate integrations.js [\#16182](https://github.com/netdata/netdata/pull/16182) ([netdatabot](https://github.com/netdatabot))
+- update go.d to v0.56.2 [\#16181](https://github.com/netdata/netdata/pull/16181) ([ilyam8](https://github.com/ilyam8))
+- Add support for Fedora 39 native packages into our CI [\#16180](https://github.com/netdata/netdata/pull/16180) ([tkatsoulas](https://github.com/tkatsoulas))
+- Add support for Ubuntu 23.10 native packages into our CI [\#16179](https://github.com/netdata/netdata/pull/16179) ([tkatsoulas](https://github.com/tkatsoulas))
+- Update bundled static packages [\#16177](https://github.com/netdata/netdata/pull/16177) ([tkatsoulas](https://github.com/tkatsoulas))
+- Regenerate integrations.js [\#16176](https://github.com/netdata/netdata/pull/16176) ([netdatabot](https://github.com/netdatabot))
+- facets: do not corrupt the index when doubling the hashtable [\#16171](https://github.com/netdata/netdata/pull/16171) ([ktsaou](https://github.com/ktsaou))
+- Add icons to integrations markdown files [\#16169](https://github.com/netdata/netdata/pull/16169) ([Ancairon](https://github.com/Ancairon))
+- Fix netdata-uninstaller; blindly deletes NETDATA\_PREFIX env var [\#16167](https://github.com/netdata/netdata/pull/16167) ([tkatsoulas](https://github.com/tkatsoulas))
+- apps: remove mem\_private on FreeBSD [\#16166](https://github.com/netdata/netdata/pull/16166) ([ilyam8](https://github.com/ilyam8))
+- fix repo path for openSUSE 15.5 packages [\#16161](https://github.com/netdata/netdata/pull/16161) ([tkatsoulas](https://github.com/tkatsoulas))
+- Modify eBPF exit [\#16159](https://github.com/netdata/netdata/pull/16159) ([thiagoftsm](https://github.com/thiagoftsm))
+- Fix compilation warnings [\#16158](https://github.com/netdata/netdata/pull/16158) ([stelfrag](https://github.com/stelfrag))
+- Don't queue removed when there is a newer alert [\#16157](https://github.com/netdata/netdata/pull/16157) ([MrZammler](https://github.com/MrZammler))
+- docker: make chmod o+rX / non fatal [\#16156](https://github.com/netdata/netdata/pull/16156) ([ilyam8](https://github.com/ilyam8))
+- Batch ML model load commands [\#16155](https://github.com/netdata/netdata/pull/16155) ([stelfrag](https://github.com/stelfrag))
+- \[BUGFIX\] MQTT ARM fix [\#16154](https://github.com/netdata/netdata/pull/16154) ([underhood](https://github.com/underhood))
+- Rework guide, add SSL with self-signed certs [\#16153](https://github.com/netdata/netdata/pull/16153) ([tkatsoulas](https://github.com/tkatsoulas))
+- make io charts "write" negative in apps and cgroups \(systemd\) [\#16152](https://github.com/netdata/netdata/pull/16152) ([ilyam8](https://github.com/ilyam8))
+- journal: updates [\#16150](https://github.com/netdata/netdata/pull/16150) ([ktsaou](https://github.com/ktsaou))
+- uninstaller: remove ND systemd preset and tmp dir [\#16148](https://github.com/netdata/netdata/pull/16148) ([ilyam8](https://github.com/ilyam8))
+- fix `test -x` check for uninstaller script [\#16146](https://github.com/netdata/netdata/pull/16146) ([ilyam8](https://github.com/ilyam8))
+- health: don't log an unknown key error for "families" [\#16145](https://github.com/netdata/netdata/pull/16145) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16144](https://github.com/netdata/netdata/pull/16144) ([netdatabot](https://github.com/netdatabot))
+- Update python.d./varnish/metadata.yaml [\#16143](https://github.com/netdata/netdata/pull/16143) ([Ancairon](https://github.com/Ancairon))
+- Bugfix in integrations/setup/template [\#16142](https://github.com/netdata/netdata/pull/16142) ([Ancairon](https://github.com/Ancairon))
+- Fixes in integration generation script [\#16141](https://github.com/netdata/netdata/pull/16141) ([Ancairon](https://github.com/Ancairon))
+- Introduce stringify function for integrations [\#16140](https://github.com/netdata/netdata/pull/16140) ([Ancairon](https://github.com/Ancairon))
+- Regenerate integrations.js [\#16138](https://github.com/netdata/netdata/pull/16138) ([netdatabot](https://github.com/netdatabot))
+- fix random crashes on pthread\_detach\(\) [\#16137](https://github.com/netdata/netdata/pull/16137) ([ktsaou](https://github.com/ktsaou))
+- fix journal help and mark debug keys in the output [\#16133](https://github.com/netdata/netdata/pull/16133) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16132](https://github.com/netdata/netdata/pull/16132) ([netdatabot](https://github.com/netdatabot))
+- apps: change user\_group to usergroup [\#16131](https://github.com/netdata/netdata/pull/16131) ([ilyam8](https://github.com/ilyam8))
+- Retain a list structure instead of a set for data collection integrations categories [\#16130](https://github.com/netdata/netdata/pull/16130) ([Ancairon](https://github.com/Ancairon))
+- Add summary to alerts configurations [\#16129](https://github.com/netdata/netdata/pull/16129) ([MrZammler](https://github.com/MrZammler))
+- Remove multiple categories due to bug [\#16126](https://github.com/netdata/netdata/pull/16126) ([Ancairon](https://github.com/Ancairon))
+- Regenerate integrations.js [\#16125](https://github.com/netdata/netdata/pull/16125) ([netdatabot](https://github.com/netdatabot))
+- update UI to v6.45.0 [\#16124](https://github.com/netdata/netdata/pull/16124) ([ilyam8](https://github.com/ilyam8))
+- journal: fix the 1 second latency in play mode [\#16123](https://github.com/netdata/netdata/pull/16123) ([ktsaou](https://github.com/ktsaou))
+- fix proc netstat metrics [\#16122](https://github.com/netdata/netdata/pull/16122) ([ilyam8](https://github.com/ilyam8))
+- dont strip newlines when forwarding FUNCTION\_PAYLOAD [\#16120](https://github.com/netdata/netdata/pull/16120) ([underhood](https://github.com/underhood))
+- Do not force OOMKill [\#16115](https://github.com/netdata/netdata/pull/16115) ([thiagoftsm](https://github.com/thiagoftsm))
+- fix crash on parsing clabel command with no source [\#16114](https://github.com/netdata/netdata/pull/16114) ([ilyam8](https://github.com/ilyam8))
+- update UI to v6.43.0 [\#16112](https://github.com/netdata/netdata/pull/16112) ([ilyam8](https://github.com/ilyam8))
+- Regenerate integrations.js [\#16111](https://github.com/netdata/netdata/pull/16111) ([netdatabot](https://github.com/netdatabot))
+- journal: respect anchor on non-data-only queries [\#16109](https://github.com/netdata/netdata/pull/16109) ([ktsaou](https://github.com/ktsaou))
+- Fix in generate integrations docs script [\#16108](https://github.com/netdata/netdata/pull/16108) ([Ancairon](https://github.com/Ancairon))
+- journal: go up to stop anchor on data only queries [\#16107](https://github.com/netdata/netdata/pull/16107) ([ktsaou](https://github.com/ktsaou))
+- Update collectors/python.d.plugin/pandas/metadata.yaml [\#16106](https://github.com/netdata/netdata/pull/16106) ([Ancairon](https://github.com/Ancairon))
+- Code improvements [\#16104](https://github.com/netdata/netdata/pull/16104) ([stelfrag](https://github.com/stelfrag))
+- Regenerate integrations.js [\#16103](https://github.com/netdata/netdata/pull/16103) ([netdatabot](https://github.com/netdatabot))
+- Add integrations/cloud-notifications to cleanup [\#16102](https://github.com/netdata/netdata/pull/16102) ([Ancairon](https://github.com/Ancairon))
+- better journal logging [\#16101](https://github.com/netdata/netdata/pull/16101) ([ktsaou](https://github.com/ktsaou))
+- update UI to v6.42.2 [\#16100](https://github.com/netdata/netdata/pull/16100) ([ilyam8](https://github.com/ilyam8))
+- a simple journal optimization [\#16099](https://github.com/netdata/netdata/pull/16099) ([ktsaou](https://github.com/ktsaou))
+- journal: fix incremental queries [\#16098](https://github.com/netdata/netdata/pull/16098) ([ktsaou](https://github.com/ktsaou))
+- Update categories.yaml [\#16097](https://github.com/netdata/netdata/pull/16097) ([Ancairon](https://github.com/Ancairon))
+- Fix systemd-journal.plugin README and prepare it for Learn [\#16096](https://github.com/netdata/netdata/pull/16096) ([Ancairon](https://github.com/Ancairon))
+- Split apps charts [\#16095](https://github.com/netdata/netdata/pull/16095) ([thiagoftsm](https://github.com/thiagoftsm))
+- fix querying out of retention [\#16094](https://github.com/netdata/netdata/pull/16094) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16093](https://github.com/netdata/netdata/pull/16093) ([netdatabot](https://github.com/netdatabot))
+- update go.d.plugin to v0.56.1 [\#16092](https://github.com/netdata/netdata/pull/16092) ([ilyam8](https://github.com/ilyam8))
+- update UI to v6.42.1 [\#16091](https://github.com/netdata/netdata/pull/16091) ([ilyam8](https://github.com/ilyam8))
+- dont use sd\_journal\_open\_files\_fd\(\) that is buggy on older libsystemd [\#16090](https://github.com/netdata/netdata/pull/16090) ([ktsaou](https://github.com/ktsaou))
+- external plugins: respect env NETDATA\_LOG\_SEVERITY\_LEVEL [\#16089](https://github.com/netdata/netdata/pull/16089) ([ilyam8](https://github.com/ilyam8))
+- update UI to v6.42.0 [\#16088](https://github.com/netdata/netdata/pull/16088) ([ilyam8](https://github.com/ilyam8))
+- functions: prevent a busy wait loop [\#16086](https://github.com/netdata/netdata/pull/16086) ([ktsaou](https://github.com/ktsaou))
+- charts.d: respect env NETDATA\_LOG\_SEVERITY\_LEVEL [\#16085](https://github.com/netdata/netdata/pull/16085) ([ilyam8](https://github.com/ilyam8))
+- python.d: respect env NETDATA\_LOG\_SEVERITY\_LEVEL [\#16084](https://github.com/netdata/netdata/pull/16084) ([ilyam8](https://github.com/ilyam8))
+- Address reported socket issue [\#16083](https://github.com/netdata/netdata/pull/16083) ([thiagoftsm](https://github.com/thiagoftsm))
+- Change @linuxnetdata to @netdatahq [\#16082](https://github.com/netdata/netdata/pull/16082) ([ralphm](https://github.com/ralphm))
+- \[Integrations Docs\] Add a badge for either netdata or community maintained [\#16073](https://github.com/netdata/netdata/pull/16073) ([Ancairon](https://github.com/Ancairon))
+- Skip database migration steps in new installation [\#16071](https://github.com/netdata/netdata/pull/16071) ([stelfrag](https://github.com/stelfrag))
+- Improve description about tc.plugin [\#16068](https://github.com/netdata/netdata/pull/16068) ([thiagoftsm](https://github.com/thiagoftsm))
+- Regenerate integrations.js [\#16062](https://github.com/netdata/netdata/pull/16062) ([netdatabot](https://github.com/netdatabot))
+- update go.d version to v0.56.0 [\#16061](https://github.com/netdata/netdata/pull/16061) ([ilyam8](https://github.com/ilyam8))
+- Bugfix on integrations/gen\_docs\_integrations.py [\#16059](https://github.com/netdata/netdata/pull/16059) ([Ancairon](https://github.com/Ancairon))
+- Fix coverity 402975 [\#16058](https://github.com/netdata/netdata/pull/16058) ([stelfrag](https://github.com/stelfrag))
+- Send alerts summary field to cloud [\#16056](https://github.com/netdata/netdata/pull/16056) ([MrZammler](https://github.com/MrZammler))
+- update bundled ui version to v6.41.1 [\#16054](https://github.com/netdata/netdata/pull/16054) ([ilyam8](https://github.com/ilyam8))
+- Update to use versioned base images for CI. [\#16053](https://github.com/netdata/netdata/pull/16053) ([Ferroin](https://github.com/Ferroin))
+- Fix missing find command when installing/updating on Rocky Linux systems. [\#16052](https://github.com/netdata/netdata/pull/16052) ([Ferroin](https://github.com/Ferroin))
+- Fix summary field in table [\#16050](https://github.com/netdata/netdata/pull/16050) ([MrZammler](https://github.com/MrZammler))
+- Switch to uint64\_t to avoid overflow in 32bit systems [\#16048](https://github.com/netdata/netdata/pull/16048) ([stelfrag](https://github.com/stelfrag))
+- Convert the ML database [\#16046](https://github.com/netdata/netdata/pull/16046) ([stelfrag](https://github.com/stelfrag))
+- Regenerate integrations.js [\#16044](https://github.com/netdata/netdata/pull/16044) ([netdatabot](https://github.com/netdatabot))
+- Doc about running a local dashboard through Cloudflare \(community\) [\#16043](https://github.com/netdata/netdata/pull/16043) ([Ancairon](https://github.com/Ancairon))
+- Have one documentation page about Netdata Charts [\#16042](https://github.com/netdata/netdata/pull/16042) ([Ancairon](https://github.com/Ancairon))
+- Remove discontinued Hangouts and StackPulse notification methods [\#16041](https://github.com/netdata/netdata/pull/16041) ([Ancairon](https://github.com/Ancairon))
+- systemd-Journal by file [\#16038](https://github.com/netdata/netdata/pull/16038) ([ktsaou](https://github.com/ktsaou))
+- health: add upsd alerts [\#16036](https://github.com/netdata/netdata/pull/16036) ([ilyam8](https://github.com/ilyam8))
+- Disable mongodb exporter builds where broken. [\#16033](https://github.com/netdata/netdata/pull/16033) ([Ferroin](https://github.com/Ferroin))
+- Run health queries from tier 0 [\#16032](https://github.com/netdata/netdata/pull/16032) ([MrZammler](https://github.com/MrZammler))
+- use `status` as units for `anomaly_detection.detector_events` [\#16028](https://github.com/netdata/netdata/pull/16028) ([andrewm4894](https://github.com/andrewm4894))
+- add description for Homebrew on Apple Silicon Mac\(netdata/learn/\#1789\) [\#16027](https://github.com/netdata/netdata/pull/16027) ([theggs](https://github.com/theggs))
+- Fix package builds on Rocky Linux. [\#16026](https://github.com/netdata/netdata/pull/16026) ([Ferroin](https://github.com/Ferroin))
+- Remove family from alerts [\#16025](https://github.com/netdata/netdata/pull/16025) ([MrZammler](https://github.com/MrZammler))
+- add systemd-journal.plugin to apps\_groups.conf [\#16024](https://github.com/netdata/netdata/pull/16024) ([ilyam8](https://github.com/ilyam8))
+- Fix handling of CI skipping. [\#16022](https://github.com/netdata/netdata/pull/16022) ([Ferroin](https://github.com/Ferroin))
+- update bundled UI to v6.39.0 [\#16020](https://github.com/netdata/netdata/pull/16020) ([ilyam8](https://github.com/ilyam8))
+- Update collector metadata for python collectors [\#16019](https://github.com/netdata/netdata/pull/16019) ([tkatsoulas](https://github.com/tkatsoulas))
+- fix crash on setting thread name [\#16016](https://github.com/netdata/netdata/pull/16016) ([ilyam8](https://github.com/ilyam8))
+- Systemd-Journal: fix crash when the uid or gid do not have names [\#16015](https://github.com/netdata/netdata/pull/16015) ([ktsaou](https://github.com/ktsaou))
+- Avoid duplicate keys in labels [\#16014](https://github.com/netdata/netdata/pull/16014) ([stelfrag](https://github.com/stelfrag))
+- remove the line length limit from pluginsd [\#16013](https://github.com/netdata/netdata/pull/16013) ([ktsaou](https://github.com/ktsaou))
+- Regenerate integrations.js [\#16011](https://github.com/netdata/netdata/pull/16011) ([netdatabot](https://github.com/netdatabot))
+- Simplify the script for generating documentation from integrations [\#16009](https://github.com/netdata/netdata/pull/16009) ([Ancairon](https://github.com/Ancairon))
+- some collector metadata improvements [\#16008](https://github.com/netdata/netdata/pull/16008) ([andrewm4894](https://github.com/andrewm4894))
+- Fix compilation warnings [\#16006](https://github.com/netdata/netdata/pull/16006) ([stelfrag](https://github.com/stelfrag))
+- Update CMakeLists.txt [\#16005](https://github.com/netdata/netdata/pull/16005) ([stelfrag](https://github.com/stelfrag))
+- eBPF socket: function with event loop [\#16004](https://github.com/netdata/netdata/pull/16004) ([thiagoftsm](https://github.com/thiagoftsm))
+- fix compilation warnings [\#16001](https://github.com/netdata/netdata/pull/16001) ([ktsaou](https://github.com/ktsaou))
+- Update integrations/gen\_docs\_integrations.py [\#15997](https://github.com/netdata/netdata/pull/15997) ([Ancairon](https://github.com/Ancairon))
+- Make collectors/COLLECTORS.md have its list autogenerated from integrations.js [\#15995](https://github.com/netdata/netdata/pull/15995) ([Ancairon](https://github.com/Ancairon))
+
+## [v1.42.4](https://github.com/netdata/netdata/tree/v1.42.4) (2023-09-18)
+
+[Full Changelog](https://github.com/netdata/netdata/compare/v1.42.3...v1.42.4)
## [v1.42.3](https://github.com/netdata/netdata/tree/v1.42.3) (2023-09-11)
@@ -141,291 +459,10 @@
[Full Changelog](https://github.com/netdata/netdata/compare/v1.41.0...v1.42.0)
-**Merged pull requests:**
-
-- ci: codacy exclude web/gui/v2/ [\#15780](https://github.com/netdata/netdata/pull/15780) ([ilyam8](https://github.com/ilyam8))
-- update UI to v6.27.0 [\#15778](https://github.com/netdata/netdata/pull/15778) ([ilyam8](https://github.com/ilyam8))
-- ci: fix labeler area/docs [\#15776](https://github.com/netdata/netdata/pull/15776) ([ilyam8](https://github.com/ilyam8))
-- fix claiming via UI for static build [\#15774](https://github.com/netdata/netdata/pull/15774) ([ilyam8](https://github.com/ilyam8))
-- extend the trimming window to avoid empty points at the end of queries [\#15773](https://github.com/netdata/netdata/pull/15773) ([ktsaou](https://github.com/ktsaou))
-- Regenerate integrations.js [\#15772](https://github.com/netdata/netdata/pull/15772) ([netdatabot](https://github.com/netdatabot))
-- Change FreeBSD / macOS system.swap\(io\) to mem.swap\(io\) [\#15769](https://github.com/netdata/netdata/pull/15769) ([Dim-P](https://github.com/Dim-P))
-- update ui to v6.26.3 [\#15767](https://github.com/netdata/netdata/pull/15767) ([ilyam8](https://github.com/ilyam8))
-- Fix CID 398318 [\#15766](https://github.com/netdata/netdata/pull/15766) ([underhood](https://github.com/underhood))
-- Fix coverity issues introduced via drm proc module [\#15765](https://github.com/netdata/netdata/pull/15765) ([Dim-P](https://github.com/Dim-P))
-- Regenerate integrations.js [\#15764](https://github.com/netdata/netdata/pull/15764) ([netdatabot](https://github.com/netdatabot))
-- meta update proc drm icon [\#15763](https://github.com/netdata/netdata/pull/15763) ([ilyam8](https://github.com/ilyam8))
-- Update metadata.yaml [\#15762](https://github.com/netdata/netdata/pull/15762) ([ktsaou](https://github.com/ktsaou))
-- Update metadata.yaml [\#15761](https://github.com/netdata/netdata/pull/15761) ([ktsaou](https://github.com/ktsaou))
-- Regenerate integrations.js [\#15760](https://github.com/netdata/netdata/pull/15760) ([netdatabot](https://github.com/netdatabot))
-- fix nvidia\_smi power\_readings for new drivers [\#15759](https://github.com/netdata/netdata/pull/15759) ([ilyam8](https://github.com/ilyam8))
-- update bundled UI to v2.26.2 [\#15758](https://github.com/netdata/netdata/pull/15758) ([ilyam8](https://github.com/ilyam8))
-- Regenerate integrations.js [\#15751](https://github.com/netdata/netdata/pull/15751) ([netdatabot](https://github.com/netdatabot))
-- ci labeler: remove integrations from area/docs [\#15750](https://github.com/netdata/netdata/pull/15750) ([ilyam8](https://github.com/ilyam8))
-- meta: align left metrics, alerts, and config options [\#15749](https://github.com/netdata/netdata/pull/15749) ([ilyam8](https://github.com/ilyam8))
-- Add dependencies for systemd journal plugin. [\#15747](https://github.com/netdata/netdata/pull/15747) ([Ferroin](https://github.com/Ferroin))
-- prefer cap over setuid for sysetmd-journal in installer [\#15741](https://github.com/netdata/netdata/pull/15741) ([ilyam8](https://github.com/ilyam8))
-- \[cloud-blocker\] https\_client add TLS ext. SNI + support chunked transfer encoding [\#15739](https://github.com/netdata/netdata/pull/15739) ([underhood](https://github.com/underhood))
-- Don't overwrite my vscode settings! [\#15738](https://github.com/netdata/netdata/pull/15738) ([underhood](https://github.com/underhood))
-- faster facets and journal fixes [\#15737](https://github.com/netdata/netdata/pull/15737) ([ktsaou](https://github.com/ktsaou))
-- Adjust namespace used for sd\_journal\_open [\#15736](https://github.com/netdata/netdata/pull/15736) ([stelfrag](https://github.com/stelfrag))
-- Update to latest copy of v2 dashboard. [\#15735](https://github.com/netdata/netdata/pull/15735) ([Ferroin](https://github.com/Ferroin))
-- Add netdata-plugin-systemd-journal package. [\#15733](https://github.com/netdata/netdata/pull/15733) ([Ferroin](https://github.com/Ferroin))
-- proc.plugin: dont log if pressure/irq does not exist [\#15732](https://github.com/netdata/netdata/pull/15732) ([ilyam8](https://github.com/ilyam8))
-- ci: run "Generate Integrations" only in netdata/netdata [\#15731](https://github.com/netdata/netdata/pull/15731) ([ilyam8](https://github.com/ilyam8))
-- Regenerate integrations.js [\#15728](https://github.com/netdata/netdata/pull/15728) ([netdatabot](https://github.com/netdatabot))
-- fix systemd-journal makefile [\#15727](https://github.com/netdata/netdata/pull/15727) ([ktsaou](https://github.com/ktsaou))
-- disable systemdunits alarms [\#15726](https://github.com/netdata/netdata/pull/15726) ([ilyam8](https://github.com/ilyam8))
-- Fix memory corruption [\#15724](https://github.com/netdata/netdata/pull/15724) ([stelfrag](https://github.com/stelfrag))
-- Revert "Refactor RRD code. \(\#15423\)" [\#15723](https://github.com/netdata/netdata/pull/15723) ([vkalintiris](https://github.com/vkalintiris))
-- Changes to the templates for integrations [\#15721](https://github.com/netdata/netdata/pull/15721) ([Ancairon](https://github.com/Ancairon))
-- fix the freez pointer of dyncfg [\#15719](https://github.com/netdata/netdata/pull/15719) ([ktsaou](https://github.com/ktsaou))
-- Update the bundled v2 dashboard to the latest release. [\#15718](https://github.com/netdata/netdata/pull/15718) ([Ferroin](https://github.com/Ferroin))
-- Regenerate integrations.js [\#15717](https://github.com/netdata/netdata/pull/15717) ([netdatabot](https://github.com/netdatabot))
-- fix meta deploy docker swarm NC env var [\#15716](https://github.com/netdata/netdata/pull/15716) ([ilyam8](https://github.com/ilyam8))
-- Regenerate integrations.js [\#15713](https://github.com/netdata/netdata/pull/15713) ([netdatabot](https://github.com/netdatabot))
-- Update metadata.yaml [\#15710](https://github.com/netdata/netdata/pull/15710) ([sashwathn](https://github.com/sashwathn))
-- Regenerate integrations.js [\#15709](https://github.com/netdata/netdata/pull/15709) ([netdatabot](https://github.com/netdatabot))
-- integrations: fix docker compose indent [\#15708](https://github.com/netdata/netdata/pull/15708) ([ilyam8](https://github.com/ilyam8))
-- Better cleanup of aclk alert table entries [\#15706](https://github.com/netdata/netdata/pull/15706) ([MrZammler](https://github.com/MrZammler))
-- Regenerate integrations.js [\#15705](https://github.com/netdata/netdata/pull/15705) ([netdatabot](https://github.com/netdatabot))
-- Fix typo in categories for beanstalk collector metadata. [\#15703](https://github.com/netdata/netdata/pull/15703) ([Ferroin](https://github.com/Ferroin))
-- Assorted fixes for integrations templates. [\#15702](https://github.com/netdata/netdata/pull/15702) ([Ferroin](https://github.com/Ferroin))
-- integrations: fix metrics availability [\#15701](https://github.com/netdata/netdata/pull/15701) ([ilyam8](https://github.com/ilyam8))
-- Fix handling of troubleshooting section in integrations. [\#15700](https://github.com/netdata/netdata/pull/15700) ([Ferroin](https://github.com/Ferroin))
-- update vscode yaml schemas association [\#15697](https://github.com/netdata/netdata/pull/15697) ([ilyam8](https://github.com/ilyam8))
-- Update categories.yaml [\#15696](https://github.com/netdata/netdata/pull/15696) ([sashwathn](https://github.com/sashwathn))
-- Regenerate integrations.js [\#15695](https://github.com/netdata/netdata/pull/15695) ([netdatabot](https://github.com/netdatabot))
-- Extend eBPF default shutdown [\#15694](https://github.com/netdata/netdata/pull/15694) ([thiagoftsm](https://github.com/thiagoftsm))
-- Fix integrations regen workflow [\#15693](https://github.com/netdata/netdata/pull/15693) ([Ferroin](https://github.com/Ferroin))
-- bump go.d.plugin v0.54.1 [\#15692](https://github.com/netdata/netdata/pull/15692) ([ilyam8](https://github.com/ilyam8))
-- Update names [\#15691](https://github.com/netdata/netdata/pull/15691) ([thiagoftsm](https://github.com/thiagoftsm))
-- Update metadata.yaml [\#15690](https://github.com/netdata/netdata/pull/15690) ([sashwathn](https://github.com/sashwathn))
-- Update categories.yaml [\#15689](https://github.com/netdata/netdata/pull/15689) ([sashwathn](https://github.com/sashwathn))
-- Update metadata.yaml [\#15688](https://github.com/netdata/netdata/pull/15688) ([sashwathn](https://github.com/sashwathn))
-- Update deploy.yaml [\#15687](https://github.com/netdata/netdata/pull/15687) ([sashwathn](https://github.com/sashwathn))
-- Update categories.yaml [\#15686](https://github.com/netdata/netdata/pull/15686) ([sashwathn](https://github.com/sashwathn))
-- Update categories.yaml [\#15685](https://github.com/netdata/netdata/pull/15685) ([sashwathn](https://github.com/sashwathn))
-- Update metadata.yaml [\#15684](https://github.com/netdata/netdata/pull/15684) ([sashwathn](https://github.com/sashwathn))
-- Update categories.yaml [\#15683](https://github.com/netdata/netdata/pull/15683) ([sashwathn](https://github.com/sashwathn))
-- Update categories.yaml [\#15682](https://github.com/netdata/netdata/pull/15682) ([sashwathn](https://github.com/sashwathn))
-- Update categories.yaml [\#15681](https://github.com/netdata/netdata/pull/15681) ([sashwathn](https://github.com/sashwathn))
-- Update metadata.yaml [\#15680](https://github.com/netdata/netdata/pull/15680) ([sashwathn](https://github.com/sashwathn))
-- Update metadata.yaml [\#15679](https://github.com/netdata/netdata/pull/15679) ([shyamvalsan](https://github.com/shyamvalsan))
-- Update metadata.yaml [\#15678](https://github.com/netdata/netdata/pull/15678) ([sashwathn](https://github.com/sashwathn))
-- Update Webhook icon [\#15677](https://github.com/netdata/netdata/pull/15677) ([sashwathn](https://github.com/sashwathn))
-- Update deploy.yaml to fix Docker and Kubernetes commands [\#15676](https://github.com/netdata/netdata/pull/15676) ([sashwathn](https://github.com/sashwathn))
-- meta MacOS =\> macOS [\#15675](https://github.com/netdata/netdata/pull/15675) ([ilyam8](https://github.com/ilyam8))
-- Adapt Cloud notifications to the new schema [\#15674](https://github.com/netdata/netdata/pull/15674) ([sashwathn](https://github.com/sashwathn))
-- Fix formatting [\#15673](https://github.com/netdata/netdata/pull/15673) ([shyamvalsan](https://github.com/shyamvalsan))
-- Fixing tables \(aws sns\) [\#15671](https://github.com/netdata/netdata/pull/15671) ([shyamvalsan](https://github.com/shyamvalsan))
-- Update metadata.yaml for Cloud Notifications [\#15670](https://github.com/netdata/netdata/pull/15670) ([sashwathn](https://github.com/sashwathn))
-- remove " Metrics" from linux categories [\#15669](https://github.com/netdata/netdata/pull/15669) ([ilyam8](https://github.com/ilyam8))
-- Fix table formatting \(custom exporter\) [\#15668](https://github.com/netdata/netdata/pull/15668) ([shyamvalsan](https://github.com/shyamvalsan))
-- Fix icon prometheus exporter icon [\#15666](https://github.com/netdata/netdata/pull/15666) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- freeipmi change restart message to info [\#15664](https://github.com/netdata/netdata/pull/15664) ([ilyam8](https://github.com/ilyam8))
-- fix proc.plugin meta filename [\#15659](https://github.com/netdata/netdata/pull/15659) ([ilyam8](https://github.com/ilyam8))
-- small improvements to README.md [\#15658](https://github.com/netdata/netdata/pull/15658) ([ilyam8](https://github.com/ilyam8))
-- Fix icon for solarwinds [\#15657](https://github.com/netdata/netdata/pull/15657) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Fix Apps plugin icons [\#15655](https://github.com/netdata/netdata/pull/15655) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- fix pandas category [\#15654](https://github.com/netdata/netdata/pull/15654) ([andrewm4894](https://github.com/andrewm4894))
-- Fix exporter icons [\#15652](https://github.com/netdata/netdata/pull/15652) ([shyamvalsan](https://github.com/shyamvalsan))
-- disable freeipmi in docker by default [\#15651](https://github.com/netdata/netdata/pull/15651) ([ilyam8](https://github.com/ilyam8))
-- Fixing FreeBSD icons [\#15650](https://github.com/netdata/netdata/pull/15650) ([shyamvalsan](https://github.com/shyamvalsan))
-- Fix exporter schema to support multiple entries per file. [\#15649](https://github.com/netdata/netdata/pull/15649) ([Ferroin](https://github.com/Ferroin))
-- Fixing icons in netdata/netdata repo [\#15647](https://github.com/netdata/netdata/pull/15647) ([shyamvalsan](https://github.com/shyamvalsan))
-- Fix name in the yaml of example python collector [\#15646](https://github.com/netdata/netdata/pull/15646) ([Ancairon](https://github.com/Ancairon))
-- Fix icons [\#15645](https://github.com/netdata/netdata/pull/15645) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Fix icons for notifications [\#15644](https://github.com/netdata/netdata/pull/15644) ([shyamvalsan](https://github.com/shyamvalsan))
-- convert collectors meta files from single to multi [\#15642](https://github.com/netdata/netdata/pull/15642) ([ilyam8](https://github.com/ilyam8))
-- fix edit-config for containerized Netdata when running from host [\#15641](https://github.com/netdata/netdata/pull/15641) ([ilyam8](https://github.com/ilyam8))
-- fix: 🐛 docker bind-mount stock files creation [\#15639](https://github.com/netdata/netdata/pull/15639) ([Leny1996](https://github.com/Leny1996))
-- The icon\_filename value was not in quotes - Fixed [\#15635](https://github.com/netdata/netdata/pull/15635) ([sashwathn](https://github.com/sashwathn))
-- Update graphite metadata.yaml [\#15634](https://github.com/netdata/netdata/pull/15634) ([shyamvalsan](https://github.com/shyamvalsan))
-- Debugfs yaml update [\#15633](https://github.com/netdata/netdata/pull/15633) ([thiagoftsm](https://github.com/thiagoftsm))
-- Update metadata.yaml [\#15632](https://github.com/netdata/netdata/pull/15632) ([shyamvalsan](https://github.com/shyamvalsan))
-- review images for integrations from security to windows systems [\#15630](https://github.com/netdata/netdata/pull/15630) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- bump ui to v6.23.0 [\#15629](https://github.com/netdata/netdata/pull/15629) ([ilyam8](https://github.com/ilyam8))
-- Updated Cloud Notification Integrations with the new schema [\#15628](https://github.com/netdata/netdata/pull/15628) ([sashwathn](https://github.com/sashwathn))
-- Add additional variable section to instance data in schema. [\#15627](https://github.com/netdata/netdata/pull/15627) ([Ferroin](https://github.com/Ferroin))
-- fix icons for message brokers and hardware [\#15626](https://github.com/netdata/netdata/pull/15626) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Add key for notifications to control what global config options get displayed [\#15625](https://github.com/netdata/netdata/pull/15625) ([Ferroin](https://github.com/Ferroin))
-- fix icons for webservers integrations [\#15624](https://github.com/netdata/netdata/pull/15624) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Add notification metadata for agent notifications [\#15622](https://github.com/netdata/netdata/pull/15622) ([shyamvalsan](https://github.com/shyamvalsan))
-- fix icons for db integrations [\#15621](https://github.com/netdata/netdata/pull/15621) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Rename multi\_metadata.yaml to metadata.yaml [\#15619](https://github.com/netdata/netdata/pull/15619) ([shyamvalsan](https://github.com/shyamvalsan))
-- Rename multi\_metadata.yaml to metadata.yaml [\#15618](https://github.com/netdata/netdata/pull/15618) ([shyamvalsan](https://github.com/shyamvalsan))
-- Fix up notification schema to better support cloud notifications. [\#15616](https://github.com/netdata/netdata/pull/15616) ([Ferroin](https://github.com/Ferroin))
-- Updated all cloud notifications except generic webhook [\#15615](https://github.com/netdata/netdata/pull/15615) ([sashwathn](https://github.com/sashwathn))
-- prefer titles, families, units and priorities from collected charts [\#15614](https://github.com/netdata/netdata/pull/15614) ([ktsaou](https://github.com/ktsaou))
-- Update categories.yaml to add notifications [\#15613](https://github.com/netdata/netdata/pull/15613) ([sashwathn](https://github.com/sashwathn))
-- ci disable yamllint line-length check [\#15612](https://github.com/netdata/netdata/pull/15612) ([ilyam8](https://github.com/ilyam8))
-- Fix descriptions in config objects, make them single line [\#15610](https://github.com/netdata/netdata/pull/15610) ([Ancairon](https://github.com/Ancairon))
-- Update icons [\#15609](https://github.com/netdata/netdata/pull/15609) ([shyamvalsan](https://github.com/shyamvalsan))
-- Update icon [\#15608](https://github.com/netdata/netdata/pull/15608) ([shyamvalsan](https://github.com/shyamvalsan))
-- Update icon [\#15607](https://github.com/netdata/netdata/pull/15607) ([shyamvalsan](https://github.com/shyamvalsan))
-- Update documentation [\#15606](https://github.com/netdata/netdata/pull/15606) ([kiela](https://github.com/kiela))
-- fix potential crash bug. [\#15605](https://github.com/netdata/netdata/pull/15605) ([icy17](https://github.com/icy17))
-- FreeBSD yaml update [\#15603](https://github.com/netdata/netdata/pull/15603) ([thiagoftsm](https://github.com/thiagoftsm))
-- Macos yaml update [\#15602](https://github.com/netdata/netdata/pull/15602) ([thiagoftsm](https://github.com/thiagoftsm))
-- minor changes in README.md [\#15601](https://github.com/netdata/netdata/pull/15601) ([tkatsoulas](https://github.com/tkatsoulas))
-- reviewed icos for a bunch of integrations [\#15599](https://github.com/netdata/netdata/pull/15599) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Sample Cloud Notifications metadata for Discord [\#15597](https://github.com/netdata/netdata/pull/15597) ([sashwathn](https://github.com/sashwathn))
-- Updated icons in deploy section [\#15596](https://github.com/netdata/netdata/pull/15596) ([shyamvalsan](https://github.com/shyamvalsan))
-- 10 points per query min [\#15595](https://github.com/netdata/netdata/pull/15595) ([ktsaou](https://github.com/ktsaou))
-- CUPS yaml update [\#15594](https://github.com/netdata/netdata/pull/15594) ([thiagoftsm](https://github.com/thiagoftsm))
-- remove metrics.csv files [\#15593](https://github.com/netdata/netdata/pull/15593) ([ilyam8](https://github.com/ilyam8))
-- fix tomcat meta [\#15592](https://github.com/netdata/netdata/pull/15592) ([ilyam8](https://github.com/ilyam8))
-- Added a sample metadata.yaml for Alerta [\#15591](https://github.com/netdata/netdata/pull/15591) ([sashwathn](https://github.com/sashwathn))
-- remove the noise by silencing alerts that dont need to wake up people [\#15590](https://github.com/netdata/netdata/pull/15590) ([ktsaou](https://github.com/ktsaou))
-- Fix health query [\#15589](https://github.com/netdata/netdata/pull/15589) ([stelfrag](https://github.com/stelfrag))
-- Fix typo in notification schema. [\#15588](https://github.com/netdata/netdata/pull/15588) ([Ferroin](https://github.com/Ferroin))
-- Update icons for relevant integrations in proc.plugin [\#15587](https://github.com/netdata/netdata/pull/15587) ([sashwathn](https://github.com/sashwathn))
-- Update icon for power supply [\#15586](https://github.com/netdata/netdata/pull/15586) ([sashwathn](https://github.com/sashwathn))
-- Update Slabinfo Logo [\#15585](https://github.com/netdata/netdata/pull/15585) ([sashwathn](https://github.com/sashwathn))
-- fix cpu MHz from /proc/cpuinfo [\#15584](https://github.com/netdata/netdata/pull/15584) ([ilyam8](https://github.com/ilyam8))
-- small readme icon fix [\#15583](https://github.com/netdata/netdata/pull/15583) ([andrewm4894](https://github.com/andrewm4894))
-- update pandas collector metadata [\#15582](https://github.com/netdata/netdata/pull/15582) ([andrewm4894](https://github.com/andrewm4894))
-- Update zscores metadata yaml [\#15581](https://github.com/netdata/netdata/pull/15581) ([andrewm4894](https://github.com/andrewm4894))
-- Create metadata.yaml for MongoDB exporter [\#15580](https://github.com/netdata/netdata/pull/15580) ([shyamvalsan](https://github.com/shyamvalsan))
-- Create metadata.yaml for JSON exporter [\#15579](https://github.com/netdata/netdata/pull/15579) ([shyamvalsan](https://github.com/shyamvalsan))
-- Create metadata.yaml for Google PubSub exporter [\#15578](https://github.com/netdata/netdata/pull/15578) ([shyamvalsan](https://github.com/shyamvalsan))
-- Create metadata.yaml for AWS kinesis exporter [\#15577](https://github.com/netdata/netdata/pull/15577) ([shyamvalsan](https://github.com/shyamvalsan))
-- Create multi\_metadata.yaml for graphite exporters [\#15576](https://github.com/netdata/netdata/pull/15576) ([shyamvalsan](https://github.com/shyamvalsan))
-- Create multi\_metadata.yaml [\#15575](https://github.com/netdata/netdata/pull/15575) ([shyamvalsan](https://github.com/shyamvalsan))
-- Add missing file in CMakeLists.txt [\#15574](https://github.com/netdata/netdata/pull/15574) ([stelfrag](https://github.com/stelfrag))
-- comment out anomalies metadata and add note [\#15573](https://github.com/netdata/netdata/pull/15573) ([andrewm4894](https://github.com/andrewm4894))
-- Fixed deployment commands for Docker, Kubernetes and Linux [\#15572](https://github.com/netdata/netdata/pull/15572) ([sashwathn](https://github.com/sashwathn))
-- filter out systemd-udevd.service/udevd [\#15571](https://github.com/netdata/netdata/pull/15571) ([ilyam8](https://github.com/ilyam8))
-- Added FreeBSD integration and fixed Windows installation Steps [\#15570](https://github.com/netdata/netdata/pull/15570) ([sashwathn](https://github.com/sashwathn))
-- fix schema validation for some meta files [\#15569](https://github.com/netdata/netdata/pull/15569) ([ilyam8](https://github.com/ilyam8))
-- Drop duplicate / unused index [\#15568](https://github.com/netdata/netdata/pull/15568) ([stelfrag](https://github.com/stelfrag))
-- Xen yaml update [\#15567](https://github.com/netdata/netdata/pull/15567) ([thiagoftsm](https://github.com/thiagoftsm))
-- Timex yaml update [\#15565](https://github.com/netdata/netdata/pull/15565) ([thiagoftsm](https://github.com/thiagoftsm))
-- Create metadata.yaml for OpenTSDB Exporter [\#15563](https://github.com/netdata/netdata/pull/15563) ([shyamvalsan](https://github.com/shyamvalsan))
-- TC yaml update [\#15562](https://github.com/netdata/netdata/pull/15562) ([thiagoftsm](https://github.com/thiagoftsm))
-- Added Exporter and Notifications categories and removed them from Data Collection [\#15561](https://github.com/netdata/netdata/pull/15561) ([sashwathn](https://github.com/sashwathn))
-- Update slabinfo yaml [\#15560](https://github.com/netdata/netdata/pull/15560) ([thiagoftsm](https://github.com/thiagoftsm))
-- Update metadata.yaml for charts.d collectors [\#15559](https://github.com/netdata/netdata/pull/15559) ([MrZammler](https://github.com/MrZammler))
-- Perf yaml [\#15558](https://github.com/netdata/netdata/pull/15558) ([thiagoftsm](https://github.com/thiagoftsm))
-- detect the path the netdata-claim.sh script is in [\#15556](https://github.com/netdata/netdata/pull/15556) ([ktsaou](https://github.com/ktsaou))
-- Fixed typos in code blocks and added missing icons [\#15555](https://github.com/netdata/netdata/pull/15555) ([sashwathn](https://github.com/sashwathn))
-- Remove temporarily from the CI Tumbleweed support [\#15554](https://github.com/netdata/netdata/pull/15554) ([tkatsoulas](https://github.com/tkatsoulas))
-- fix ebpf.plugin system swapcalls [\#15553](https://github.com/netdata/netdata/pull/15553) ([ilyam8](https://github.com/ilyam8))
-- Fixes for `deploy.yaml`. [\#15551](https://github.com/netdata/netdata/pull/15551) ([Ferroin](https://github.com/Ferroin))
-- bump ui to v6.22.1 [\#15550](https://github.com/netdata/netdata/pull/15550) ([ilyam8](https://github.com/ilyam8))
-- Add schema and examples for notification method metadata. [\#15549](https://github.com/netdata/netdata/pull/15549) ([Ferroin](https://github.com/Ferroin))
-- Update python sensors metadata yaml [\#15548](https://github.com/netdata/netdata/pull/15548) ([andrewm4894](https://github.com/andrewm4894))
-- fix yamls [\#15547](https://github.com/netdata/netdata/pull/15547) ([Ancairon](https://github.com/Ancairon))
-- fix expiration dates for API responses [\#15546](https://github.com/netdata/netdata/pull/15546) ([ktsaou](https://github.com/ktsaou))
-- Add exporter integration schema. [\#15545](https://github.com/netdata/netdata/pull/15545) ([Ferroin](https://github.com/Ferroin))
-- postfix metadata.yaml - add links and some descriptions [\#15544](https://github.com/netdata/netdata/pull/15544) ([andrewm4894](https://github.com/andrewm4894))
-- Update metadata for multiple python collectors. [\#15543](https://github.com/netdata/netdata/pull/15543) ([tkatsoulas](https://github.com/tkatsoulas))
-- bump ui to v6.22.0 [\#15542](https://github.com/netdata/netdata/pull/15542) ([ilyam8](https://github.com/ilyam8))
-- Fill in yaml files for some python collectors [\#15541](https://github.com/netdata/netdata/pull/15541) ([Ancairon](https://github.com/Ancairon))
-- Fix deployment and categories [\#15540](https://github.com/netdata/netdata/pull/15540) ([sashwathn](https://github.com/sashwathn))
-- docs: fix apps fd badges and typos [\#15539](https://github.com/netdata/netdata/pull/15539) ([ilyam8](https://github.com/ilyam8))
-- change api.netdata.cloud to app.netdata.cloud [\#15538](https://github.com/netdata/netdata/pull/15538) ([ilyam8](https://github.com/ilyam8))
-- Update metadata.yaml for some python collectors - 2 [\#15537](https://github.com/netdata/netdata/pull/15537) ([MrZammler](https://github.com/MrZammler))
-- Change nvidia\_smi link to go version in COLLECTORS.md [\#15536](https://github.com/netdata/netdata/pull/15536) ([Ancairon](https://github.com/Ancairon))
-- Update nfacct yaml [\#15535](https://github.com/netdata/netdata/pull/15535) ([thiagoftsm](https://github.com/thiagoftsm))
-- Update ioping yaml [\#15534](https://github.com/netdata/netdata/pull/15534) ([thiagoftsm](https://github.com/thiagoftsm))
-- Freeimpi yaml [\#15533](https://github.com/netdata/netdata/pull/15533) ([thiagoftsm](https://github.com/thiagoftsm))
-- Updated all Linux distros, macOS and Docker [\#15532](https://github.com/netdata/netdata/pull/15532) ([sashwathn](https://github.com/sashwathn))
-- Update platform support info and add a schema. [\#15531](https://github.com/netdata/netdata/pull/15531) ([Ferroin](https://github.com/Ferroin))
-- added cloud status in registry?action=hello [\#15530](https://github.com/netdata/netdata/pull/15530) ([ktsaou](https://github.com/ktsaou))
-- update memcached metadata.yaml [\#15529](https://github.com/netdata/netdata/pull/15529) ([andrewm4894](https://github.com/andrewm4894))
-- Update python d varnish metadata [\#15528](https://github.com/netdata/netdata/pull/15528) ([andrewm4894](https://github.com/andrewm4894))
-- Update yaml description \(diskspace\) [\#15527](https://github.com/netdata/netdata/pull/15527) ([thiagoftsm](https://github.com/thiagoftsm))
-- wait for node\_id while claiming [\#15526](https://github.com/netdata/netdata/pull/15526) ([ktsaou](https://github.com/ktsaou))
-- add `diskquota` collector to third party collectors list [\#15524](https://github.com/netdata/netdata/pull/15524) ([andrewm4894](https://github.com/andrewm4894))
-- Add quick\_start key to deploy schema. [\#15522](https://github.com/netdata/netdata/pull/15522) ([Ferroin](https://github.com/Ferroin))
-- Add a schema for the categories.yaml file. [\#15521](https://github.com/netdata/netdata/pull/15521) ([Ferroin](https://github.com/Ferroin))
-- fix collector multi schema [\#15520](https://github.com/netdata/netdata/pull/15520) ([ilyam8](https://github.com/ilyam8))
-- Allow to create alert hashes with --disable-cloud [\#15519](https://github.com/netdata/netdata/pull/15519) ([MrZammler](https://github.com/MrZammler))
-- Python collector yaml updates [\#15517](https://github.com/netdata/netdata/pull/15517) ([Ancairon](https://github.com/Ancairon))
-- eBPF Yaml complement [\#15516](https://github.com/netdata/netdata/pull/15516) ([thiagoftsm](https://github.com/thiagoftsm))
-- Add AMD GPU collector [\#15515](https://github.com/netdata/netdata/pull/15515) ([Dim-P](https://github.com/Dim-P))
-- Update metadata.yaml for some python collectors [\#15513](https://github.com/netdata/netdata/pull/15513) ([MrZammler](https://github.com/MrZammler))
-- Update metadata.yaml for some python collectors [\#15510](https://github.com/netdata/netdata/pull/15510) ([andrewm4894](https://github.com/andrewm4894))
-- Add schema for deployment integrations and centralize integrations schemas. [\#15509](https://github.com/netdata/netdata/pull/15509) ([Ferroin](https://github.com/Ferroin))
-- update gitignore to include vscode settings for schema validation [\#15508](https://github.com/netdata/netdata/pull/15508) ([andrewm4894](https://github.com/andrewm4894))
-- Add Samba collector yaml [\#15507](https://github.com/netdata/netdata/pull/15507) ([Ancairon](https://github.com/Ancairon))
-- Fill in metadata for idlejitter plugin. [\#15506](https://github.com/netdata/netdata/pull/15506) ([Ferroin](https://github.com/Ferroin))
-- apps.plugin limits tracing [\#15504](https://github.com/netdata/netdata/pull/15504) ([ktsaou](https://github.com/ktsaou))
-- Allow manage/health api call to be used without bearer [\#15503](https://github.com/netdata/netdata/pull/15503) ([MrZammler](https://github.com/MrZammler))
-- Avoid an extra uuid\_copy when creating new MRG entries [\#15502](https://github.com/netdata/netdata/pull/15502) ([stelfrag](https://github.com/stelfrag))
-- freeipmi flush keepalive msgs [\#15499](https://github.com/netdata/netdata/pull/15499) ([ilyam8](https://github.com/ilyam8))
-- add required properties to multi-module schema [\#15496](https://github.com/netdata/netdata/pull/15496) ([ilyam8](https://github.com/ilyam8))
-- proc integrations [\#15494](https://github.com/netdata/netdata/pull/15494) ([ktsaou](https://github.com/ktsaou))
-- docs: clarify health percentage option [\#15492](https://github.com/netdata/netdata/pull/15492) ([ilyam8](https://github.com/ilyam8))
-- Fix resource leak - CID 396310 [\#15491](https://github.com/netdata/netdata/pull/15491) ([stelfrag](https://github.com/stelfrag))
-- Improve the update of the alert chart name in the database [\#15490](https://github.com/netdata/netdata/pull/15490) ([stelfrag](https://github.com/stelfrag))
-- PCI Advanced Error Reporting \(AER\) [\#15488](https://github.com/netdata/netdata/pull/15488) ([ktsaou](https://github.com/ktsaou))
-- Dynamic Config MVP0 [\#15486](https://github.com/netdata/netdata/pull/15486) ([underhood](https://github.com/underhood))
-- Add a machine distinct id to analytics [\#15485](https://github.com/netdata/netdata/pull/15485) ([MrZammler](https://github.com/MrZammler))
-- Add basic slabinfo metadata. [\#15484](https://github.com/netdata/netdata/pull/15484) ([Ferroin](https://github.com/Ferroin))
-- Update charts.d.plugin yaml [\#15483](https://github.com/netdata/netdata/pull/15483) ([Ancairon](https://github.com/Ancairon))
-- Make title reflect legacy agent dashboard [\#15479](https://github.com/netdata/netdata/pull/15479) ([Ancairon](https://github.com/Ancairon))
-- docs: note that health foreach works only with template [\#15478](https://github.com/netdata/netdata/pull/15478) ([ilyam8](https://github.com/ilyam8))
-- Yaml file updates [\#15477](https://github.com/netdata/netdata/pull/15477) ([Ancairon](https://github.com/Ancairon))
-- Rename most-popular to most\_popular in categories.yaml [\#15476](https://github.com/netdata/netdata/pull/15476) ([Ancairon](https://github.com/Ancairon))
-- Fix coverity issue [\#15475](https://github.com/netdata/netdata/pull/15475) ([stelfrag](https://github.com/stelfrag))
-- eBPF Yaml [\#15474](https://github.com/netdata/netdata/pull/15474) ([thiagoftsm](https://github.com/thiagoftsm))
-- Memory Controller \(MC\) and DIMM Error Detection And Correction \(EDAC\) [\#15473](https://github.com/netdata/netdata/pull/15473) ([ktsaou](https://github.com/ktsaou))
-- meta schema change multi-instance to multi\_instance [\#15470](https://github.com/netdata/netdata/pull/15470) ([ilyam8](https://github.com/ilyam8))
-- fix anchors [\#15469](https://github.com/netdata/netdata/pull/15469) ([Ancairon](https://github.com/Ancairon))
-- fix the calculation of incremental-sum [\#15468](https://github.com/netdata/netdata/pull/15468) ([ktsaou](https://github.com/ktsaou))
-- apps.plugin fds limits improvements [\#15467](https://github.com/netdata/netdata/pull/15467) ([ktsaou](https://github.com/ktsaou))
-- Add community key in schema [\#15465](https://github.com/netdata/netdata/pull/15465) ([Ancairon](https://github.com/Ancairon))
-- Overhaul deployment strategies documentation [\#15464](https://github.com/netdata/netdata/pull/15464) ([ralphm](https://github.com/ralphm))
-- Update debugfs plugin metadata. [\#15463](https://github.com/netdata/netdata/pull/15463) ([Ferroin](https://github.com/Ferroin))
-- Update proc plugin yaml [\#15460](https://github.com/netdata/netdata/pull/15460) ([Ancairon](https://github.com/Ancairon))
-- Macos yaml updates [\#15459](https://github.com/netdata/netdata/pull/15459) ([Ancairon](https://github.com/Ancairon))
-- Freeipmi yaml updates [\#15458](https://github.com/netdata/netdata/pull/15458) ([Ancairon](https://github.com/Ancairon))
-- Add short descriptions to cgroups yaml [\#15457](https://github.com/netdata/netdata/pull/15457) ([Ancairon](https://github.com/Ancairon))
-- readme: reorder cols in whats new and add links [\#15455](https://github.com/netdata/netdata/pull/15455) ([andrewm4894](https://github.com/andrewm4894))
-- Store and transmit chart\_name to cloud in alert events [\#15441](https://github.com/netdata/netdata/pull/15441) ([MrZammler](https://github.com/MrZammler))
-- Refactor RRD code. [\#15423](https://github.com/netdata/netdata/pull/15423) ([vkalintiris](https://github.com/vkalintiris))
-
## [v1.41.0](https://github.com/netdata/netdata/tree/v1.41.0) (2023-07-19)
[Full Changelog](https://github.com/netdata/netdata/compare/v1.40.1...v1.41.0)
-**Merged pull requests:**
-
-- Include license for web v2 [\#15453](https://github.com/netdata/netdata/pull/15453) ([tkatsoulas](https://github.com/tkatsoulas))
-- Updates to metadata.yaml [\#15452](https://github.com/netdata/netdata/pull/15452) ([shyamvalsan](https://github.com/shyamvalsan))
-- Add apps yaml [\#15451](https://github.com/netdata/netdata/pull/15451) ([Ancairon](https://github.com/Ancairon))
-- Add cgroups yaml [\#15450](https://github.com/netdata/netdata/pull/15450) ([Ancairon](https://github.com/Ancairon))
-- Fix multiline [\#15449](https://github.com/netdata/netdata/pull/15449) ([Ancairon](https://github.com/Ancairon))
-- bump v2 dashboard to v6.21.3 [\#15448](https://github.com/netdata/netdata/pull/15448) ([ilyam8](https://github.com/ilyam8))
-- fix alerts transitions search when something specific is asked for [\#15447](https://github.com/netdata/netdata/pull/15447) ([ktsaou](https://github.com/ktsaou))
-- collector meta: remove meta.alternative\_monitored\_instances [\#15445](https://github.com/netdata/netdata/pull/15445) ([ilyam8](https://github.com/ilyam8))
-- added missing fields to alerts instances [\#15442](https://github.com/netdata/netdata/pull/15442) ([ktsaou](https://github.com/ktsaou))
-- removed dup categories [\#15440](https://github.com/netdata/netdata/pull/15440) ([hugovalente-pm](https://github.com/hugovalente-pm))
-- Create netdata-assistant docs [\#15438](https://github.com/netdata/netdata/pull/15438) ([shyamvalsan](https://github.com/shyamvalsan))
-- apps.plugin fds limits improvements [\#15437](https://github.com/netdata/netdata/pull/15437) ([ktsaou](https://github.com/ktsaou))
-- disable apps\_group\_file\_descriptors\_utilization alarm [\#15435](https://github.com/netdata/netdata/pull/15435) ([ilyam8](https://github.com/ilyam8))
-- Add catch-all category entry in categories.yaml [\#15434](https://github.com/netdata/netdata/pull/15434) ([Ancairon](https://github.com/Ancairon))
-- Update CODEOWNERS [\#15433](https://github.com/netdata/netdata/pull/15433) ([andrewm4894](https://github.com/andrewm4894))
-- Remove duplicate category from categories.yaml [\#15432](https://github.com/netdata/netdata/pull/15432) ([Ancairon](https://github.com/Ancairon))
-- readme: add link for netdata cloud and sign-in cta [\#15431](https://github.com/netdata/netdata/pull/15431) ([andrewm4894](https://github.com/andrewm4894))
-- add chart id and name to alert instances and transitions [\#15430](https://github.com/netdata/netdata/pull/15430) ([ktsaou](https://github.com/ktsaou))
-- update v2 dashboard [\#15427](https://github.com/netdata/netdata/pull/15427) ([ilyam8](https://github.com/ilyam8))
-- fix unlocked registry access and add hostname to search response [\#15426](https://github.com/netdata/netdata/pull/15426) ([ktsaou](https://github.com/ktsaou))
-- Update README.md [\#15424](https://github.com/netdata/netdata/pull/15424) ([christophidesp](https://github.com/christophidesp))
-- Decode url before checking for question mark [\#15422](https://github.com/netdata/netdata/pull/15422) ([MrZammler](https://github.com/MrZammler))
-- use real-time clock for http response headers [\#15421](https://github.com/netdata/netdata/pull/15421) ([ktsaou](https://github.com/ktsaou))
-- Bugfix on alerts generation for yamls [\#15420](https://github.com/netdata/netdata/pull/15420) ([Ancairon](https://github.com/Ancairon))
-- Minor typo fix on consul.conf [\#15419](https://github.com/netdata/netdata/pull/15419) ([Ancairon](https://github.com/Ancairon))
-- monitor applications file descriptor limits [\#15417](https://github.com/netdata/netdata/pull/15417) ([ktsaou](https://github.com/ktsaou))
-- Update README.md [\#15416](https://github.com/netdata/netdata/pull/15416) ([ktsaou](https://github.com/ktsaou))
-- Update README.md [\#15414](https://github.com/netdata/netdata/pull/15414) ([ktsaou](https://github.com/ktsaou))
-- collector meta: restrict chart\_type to known values [\#15413](https://github.com/netdata/netdata/pull/15413) ([ilyam8](https://github.com/ilyam8))
-- Update README.md [\#15412](https://github.com/netdata/netdata/pull/15412) ([tkatsoulas](https://github.com/tkatsoulas))
-- add reference to cncf [\#15408](https://github.com/netdata/netdata/pull/15408) ([hugovalente-pm](https://github.com/hugovalente-pm))
-
## [v1.40.1](https://github.com/netdata/netdata/tree/v1.40.1) (2023-06-27)
[Full Changelog](https://github.com/netdata/netdata/compare/v1.40.0...v1.40.1)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb96b461c82acc..5d70d470bd7a1b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -465,6 +465,7 @@ set(LIBNETDATA_FILES
libnetdata/log/log.h
libnetdata/os.c
libnetdata/os.h
+ libnetdata/endian.h
libnetdata/onewayalloc/onewayalloc.c
libnetdata/onewayalloc/onewayalloc.h
libnetdata/popen/popen.c
@@ -488,9 +489,15 @@ set(LIBNETDATA_FILES
libnetdata/threads/threads.h
libnetdata/url/url.c
libnetdata/url/url.h
+ libnetdata/dyn_conf/dyn_conf.c
+ libnetdata/dyn_conf/dyn_conf.h
libnetdata/string/utf8.h
libnetdata/worker_utilization/worker_utilization.c
libnetdata/worker_utilization/worker_utilization.h
+ libnetdata/facets/facets.c
+ libnetdata/facets/facets.h
+ libnetdata/functions_evloop/functions_evloop.h
+ libnetdata/functions_evloop/functions_evloop.c
libnetdata/http/http_defs.h
)
@@ -711,6 +718,10 @@ set(STATSD_PLUGIN_FILES
collectors/statsd.plugin/statsd.c
)
+set(SYSTEMD_JOURNAL_PLUGIN_FILES
+ collectors/systemd-journal.plugin/systemd-journal.c
+ )
+
set(RRD_PLUGIN_FILES
database/contexts/api_v1.c
database/contexts/api_v2.c
@@ -735,6 +746,7 @@ set(RRD_PLUGIN_FILES
database/rrdfunctions.h
database/rrdhost.c
database/rrdlabels.c
+ database/rrdlabels.h
database/rrd.c
database/rrd.h
database/rrdset.c
@@ -764,6 +776,9 @@ set(RRD_PLUGIN_FILES
database/sqlite/sqlite_aclk_alert.h
database/sqlite/sqlite3.c
database/sqlite/sqlite3.h
+ database/sqlite/sqlite3recover.c
+ database/sqlite/sqlite3recover.h
+ database/sqlite/dbdata.c
database/engine/rrdengine.c
database/engine/rrdengine.h
database/engine/rrddiskprotocol.h
@@ -783,6 +798,10 @@ set(RRD_PLUGIN_FILES
database/engine/metric.h
database/engine/pdc.c
database/engine/pdc.h
+ database/engine/page.c
+ database/engine/page.h
+ database/engine/page_test.cc
+ database/engine/page_test.h
database/KolmogorovSmirnovDist.c
database/KolmogorovSmirnovDist.h
)
@@ -870,6 +889,7 @@ set(STREAMING_PLUGIN_FILES
streaming/sender.c
streaming/replication.c
streaming/replication.h
+ streaming/common.h
)
set(CLAIM_PLUGIN_FILES
@@ -1055,6 +1075,44 @@ ELSE()
message(STATUS "ML: disabled")
ENDIF()
+set(LOGSMANAGEMENT_FILES
+ logsmanagement/rrd_api/rrd_api_docker_ev.c
+ logsmanagement/rrd_api/rrd_api_docker_ev.h
+ logsmanagement/rrd_api/rrd_api_generic.c
+ logsmanagement/rrd_api/rrd_api_generic.h
+ logsmanagement/rrd_api/rrd_api_kernel.c
+ logsmanagement/rrd_api/rrd_api_kernel.h
+ logsmanagement/rrd_api/rrd_api_mqtt.c
+ logsmanagement/rrd_api/rrd_api_mqtt.h
+ logsmanagement/rrd_api/rrd_api_stats.c
+ logsmanagement/rrd_api/rrd_api_stats.h
+ logsmanagement/rrd_api/rrd_api_systemd.c
+ logsmanagement/rrd_api/rrd_api_systemd.h
+ logsmanagement/rrd_api/rrd_api_web_log.c
+ logsmanagement/rrd_api/rrd_api_web_log.h
+ logsmanagement/rrd_api/rrd_api.h
+ logsmanagement/unit_test/unit_test.c
+ logsmanagement/unit_test/unit_test.h
+ logsmanagement/circular_buffer.c
+ logsmanagement/circular_buffer.h
+ logsmanagement/db_api.c
+ logsmanagement/db_api.h
+ logsmanagement/file_info.h
+ logsmanagement/flb_plugin.c
+ logsmanagement/flb_plugin.h
+ logsmanagement/functions.c
+ logsmanagement/functions.h
+ logsmanagement/helper.h
+ logsmanagement/defaults.h
+ logsmanagement/logsmanag_config.c
+ logsmanagement/logsmanag_config.h
+ logsmanagement/logsmanagement.c
+ logsmanagement/parser.c
+ logsmanagement/parser.h
+ logsmanagement/query.c
+ logsmanagement/query.h
+ )
+
set(NETDATA_FILES
collectors/all.h
${DAEMON_FILES}
@@ -1067,6 +1125,7 @@ set(NETDATA_FILES
${RRD_PLUGIN_FILES}
${REGISTRY_PLUGIN_FILES}
${STATSD_PLUGIN_FILES}
+ ${SYSTEMD_JOURNAL_PLUGIN_FILES}
${STREAMING_PLUGIN_FILES}
${WEB_PLUGIN_FILES}
${CLAIM_PLUGIN_FILES}
@@ -1092,6 +1151,13 @@ add_definitions(
-DVARLIB_DIR="/var/lib/netdata"
)
+# -----------------------------------------------------------------------------
+# logs management
+
+IF(ENABLE_LOGSMANAGEMENT)
+ list(APPEND NETDATA_FILES ${LOGSMANAGEMENT_FILES})
+ENDIF()
+
# -----------------------------------------------------------------------------
# kinesis exporting connector
@@ -1728,7 +1794,6 @@ endif()
endif()
endif()
-
# generate config.h so that CMake becomes independent of automake
## netdata version
diff --git a/Makefile.am b/Makefile.am
index e0e85d2881e285..398c6fb20cec23 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -114,6 +114,7 @@ SUBDIRS += \
web \
claim \
spawn \
+ logsmanagement \
$(NULL)
AM_CFLAGS = \
@@ -128,6 +129,8 @@ AM_CFLAGS = \
$(OPTIONAL_CUPS_CFLAGS) \
$(OPTIONAL_XENSTAT_CFLAGS) \
$(OPTIONAL_BPF_CFLAGS) \
+ $(OPTIONAL_SYSTEMD_CFLAGS) \
+ $(OPTIONAL_GTEST_CFLAGS) \
$(NULL)
sbin_PROGRAMS =
@@ -144,18 +147,28 @@ LIBNETDATA_FILES = \
libnetdata/avl/avl.h \
libnetdata/buffer/buffer.c \
libnetdata/buffer/buffer.h \
+ libnetdata/buffered_reader/buffered_reader.c \
+ libnetdata/buffered_reader/buffered_reader.h \
libnetdata/circular_buffer/circular_buffer.c \
libnetdata/circular_buffer/circular_buffer.h \
libnetdata/clocks/clocks.c \
libnetdata/clocks/clocks.h \
libnetdata/completion/completion.c \
libnetdata/completion/completion.h \
+ libnetdata/datetime/iso8601.c \
+ libnetdata/datetime/iso8601.h \
+ libnetdata/datetime/rfc3339.c \
+ libnetdata/datetime/rfc3339.h \
+ libnetdata/datetime/rfc7231.c \
+ libnetdata/datetime/rfc7231.h \
libnetdata/dictionary/dictionary.c \
libnetdata/dictionary/dictionary.h \
libnetdata/eval/eval.c \
libnetdata/eval/eval.h \
libnetdata/facets/facets.c \
libnetdata/facets/facets.h \
+ libnetdata/functions_evloop/functions_evloop.c \
+ libnetdata/functions_evloop/functions_evloop.h \
libnetdata/gorilla/gorilla.h \
libnetdata/gorilla/gorilla.cc \
libnetdata/inlined.h \
@@ -164,8 +177,12 @@ LIBNETDATA_FILES = \
libnetdata/libnetdata.c \
libnetdata/libnetdata.h \
libnetdata/required_dummies.h \
+ libnetdata/line_splitter/line_splitter.c \
+ libnetdata/line_splitter/line_splitter.h \
libnetdata/locks/locks.c \
libnetdata/locks/locks.h \
+ libnetdata/log/journal.c \
+ libnetdata/log/journal.h \
libnetdata/log/log.c \
libnetdata/log/log.h \
libnetdata/onewayalloc/onewayalloc.c \
@@ -176,6 +193,7 @@ LIBNETDATA_FILES = \
libnetdata/procfile/procfile.h \
libnetdata/os.c \
libnetdata/os.h \
+ libnetdata/endian.h \
libnetdata/simple_pattern/simple_pattern.c \
libnetdata/simple_pattern/simple_pattern.h \
libnetdata/socket/socket.c \
@@ -192,6 +210,8 @@ LIBNETDATA_FILES = \
libnetdata/threads/threads.h \
libnetdata/url/url.c \
libnetdata/url/url.h \
+ libnetdata/uuid/uuid.c \
+ libnetdata/uuid/uuid.h \
libnetdata/json/json.c \
libnetdata/json/json.h \
libnetdata/json/jsmn.c \
@@ -201,9 +221,11 @@ LIBNETDATA_FILES = \
libnetdata/string/utf8.h \
libnetdata/worker_utilization/worker_utilization.c \
libnetdata/worker_utilization/worker_utilization.h \
+ libnetdata/xxhash.h \
libnetdata/http/http_defs.h \
libnetdata/dyn_conf/dyn_conf.c \
libnetdata/dyn_conf/dyn_conf.h \
+ libnetdata/simple_hashtable.h \
$(NULL)
if ENABLE_PLUGIN_EBPF
@@ -277,6 +299,10 @@ IDLEJITTER_PLUGIN_FILES = \
$(NULL)
CGROUPS_PLUGIN_FILES = \
+ collectors/cgroups.plugin/cgroup-internals.h \
+ collectors/cgroups.plugin/cgroup-discovery.c \
+ collectors/cgroups.plugin/cgroup-charts.c \
+ collectors/cgroups.plugin/cgroup-top.c \
collectors/cgroups.plugin/sys_fs_cgroup.c \
collectors/cgroups.plugin/sys_fs_cgroup.h \
$(NULL)
@@ -305,10 +331,84 @@ FREEIPMI_PLUGIN_FILES = \
$(NULL)
SYSTEMD_JOURNAL_PLUGIN_FILES = \
+ collectors/systemd-journal.plugin/systemd-internals.h \
+ collectors/systemd-journal.plugin/systemd-main.c \
+ collectors/systemd-journal.plugin/systemd-units.c \
collectors/systemd-journal.plugin/systemd-journal.c \
+ collectors/systemd-journal.plugin/systemd-journal-watcher.c \
+ collectors/systemd-journal.plugin/systemd-journal-annotations.c \
+ collectors/systemd-journal.plugin/systemd-journal-files.c \
+ collectors/systemd-journal.plugin/systemd-journal-fstat.c \
$(LIBNETDATA_FILES) \
$(NULL)
+SYSTEMD_CAT_NATIVE_FILES = \
+ libnetdata/log/systemd-cat-native.c \
+ libnetdata/log/systemd-cat-native.h \
+ $(LIBNETDATA_FILES) \
+ $(NULL)
+
+LOG2JOURNAL_FILES = \
+ collectors/log2journal/log2journal.h \
+ collectors/log2journal/log2journal.c \
+ collectors/log2journal/log2journal-help.c \
+ collectors/log2journal/log2journal-yaml.c \
+ collectors/log2journal/log2journal-json.c \
+ collectors/log2journal/log2journal-logfmt.c \
+ collectors/log2journal/log2journal-pcre2.c \
+ collectors/log2journal/log2journal-params.c \
+ collectors/log2journal/log2journal-inject.c \
+ collectors/log2journal/log2journal-pattern.c \
+ collectors/log2journal/log2journal-replace.c \
+ collectors/log2journal/log2journal-rename.c \
+ collectors/log2journal/log2journal-rewrite.c \
+ $(NULL)
+
+
+LOGSMANAGEMENT_FILES = \
+ logsmanagement/circular_buffer.c \
+ logsmanagement/circular_buffer.h \
+ logsmanagement/db_api.c \
+ logsmanagement/db_api.h \
+ logsmanagement/defaults.h \
+ logsmanagement/file_info.h \
+ logsmanagement/flb_plugin.c \
+ logsmanagement/flb_plugin.h \
+ logsmanagement/functions.c \
+ logsmanagement/functions.h \
+ logsmanagement/helper.h \
+ logsmanagement/logsmanag_config.c \
+ logsmanagement/logsmanag_config.h \
+ logsmanagement/logsmanagement.c \
+ logsmanagement/parser.c \
+ logsmanagement/parser.h \
+ logsmanagement/query.c \
+ logsmanagement/query.h \
+ logsmanagement/rrd_api/rrd_api_docker_ev.c \
+ logsmanagement/rrd_api/rrd_api_docker_ev.h \
+ logsmanagement/rrd_api/rrd_api_generic.c \
+ logsmanagement/rrd_api/rrd_api_generic.h \
+ logsmanagement/rrd_api/rrd_api_kernel.c \
+ logsmanagement/rrd_api/rrd_api_kernel.h \
+ logsmanagement/rrd_api/rrd_api_mqtt.c \
+ logsmanagement/rrd_api/rrd_api_mqtt.h \
+ logsmanagement/rrd_api/rrd_api_stats.c \
+ logsmanagement/rrd_api/rrd_api_stats.h \
+ logsmanagement/rrd_api/rrd_api_systemd.c \
+ logsmanagement/rrd_api/rrd_api_systemd.h \
+ logsmanagement/rrd_api/rrd_api_web_log.c \
+ logsmanagement/rrd_api/rrd_api_web_log.h \
+ logsmanagement/rrd_api/rrd_api.h \
+ database/sqlite/sqlite3.c \
+ database/sqlite/sqlite3.h \
+ $(LIBNETDATA_FILES) \
+ $(NULL)
+
+LOGSMANAGEMENT_TESTS_FILES = \
+ logsmanagement/unit_test/unit_test.c \
+ logsmanagement/unit_test/unit_test.h \
+ $(NULL)
+
CUPS_PLUGIN_FILES = \
collectors/cups.plugin/cups_plugin.c \
$(LIBNETDATA_FILES) \
@@ -472,6 +572,7 @@ RRD_PLUGIN_FILES = \
database/rrdfamily.c \
database/rrdhost.c \
database/rrdlabels.c \
+ database/rrdlabels.h \
database/rrd.c \
database/rrd.h \
database/rrdset.c \
@@ -503,6 +604,9 @@ RRD_PLUGIN_FILES = \
database/sqlite/sqlite_aclk_alert.h \
database/sqlite/sqlite3.c \
database/sqlite/sqlite3.h \
+ database/sqlite/sqlite3recover.c \
+ database/sqlite/sqlite3recover.h \
+ database/sqlite/dbdata.c \
database/KolmogorovSmirnovDist.c \
database/KolmogorovSmirnovDist.h \
$(NULL)
@@ -572,10 +676,14 @@ if ENABLE_DBENGINE
database/engine/cache.h \
database/engine/metric.c \
database/engine/metric.h \
+ database/engine/page.c \
+ database/engine/page.h \
+ database/engine/page_test.cc \
+ database/engine/page_test.h \
database/engine/pdc.c \
database/engine/pdc.h \
$(NULL)
-
+
RRD_PLUGIN_KSY_BUILTFILES = \
database/engine/journalfile_v2.ksy \
database/engine/journalfile_v2_virtmemb.ksy \
@@ -598,6 +706,8 @@ API_PLUGIN_FILES = \
web/api/exporters/allmetrics.h \
web/api/exporters/shell/allmetrics_shell.c \
web/api/exporters/shell/allmetrics_shell.h \
+ web/api/ilove/ilove.c \
+ web/api/ilove/ilove.h \
web/api/queries/average/average.c \
web/api/queries/average/average.h \
web/api/queries/countif/countif.c \
@@ -657,11 +767,21 @@ API_PLUGIN_FILES = \
STREAMING_PLUGIN_FILES = \
streaming/rrdpush.c \
streaming/compression.c \
+ streaming/compression.h \
+ streaming/compression_brotli.c \
+ streaming/compression_brotli.h \
+ streaming/compression_gzip.c \
+ streaming/compression_gzip.h \
+ streaming/compression_lz4.c \
+ streaming/compression_lz4.h \
+ streaming/compression_zstd.c \
+ streaming/compression_zstd.h \
streaming/sender.c \
streaming/receiver.c \
streaming/replication.h \
streaming/replication.c \
streaming/rrdpush.h \
+ streaming/common.h \
$(NULL)
REGISTRY_PLUGIN_FILES = \
@@ -702,8 +822,6 @@ CLAIM_FILES = \
if ENABLE_ACLK
ACLK_FILES = \
- aclk/aclk_util.c \
- aclk/aclk_util.h \
aclk/aclk_stats.c \
aclk/aclk_stats.h \
aclk/aclk_query.c \
@@ -716,8 +834,6 @@ ACLK_FILES = \
aclk/aclk_tx_msgs.h \
aclk/aclk_rx_msgs.c \
aclk/aclk_rx_msgs.h \
- aclk/https_client.c \
- aclk/https_client.h \
aclk/aclk_alarm_api.c \
aclk/aclk_alarm_api.h \
aclk/aclk_contexts_api.c \
@@ -765,15 +881,9 @@ libmqttwebsockets_a_SOURCES = \
mqtt_websockets/src/common_public.c \
mqtt_websockets/src/include/common_public.h \
mqtt_websockets/src/include/common_internal.h \
- mqtt_websockets/c-rbuf/src/ringbuffer.c \
- mqtt_websockets/c-rbuf/include/ringbuffer.h \
- mqtt_websockets/c-rbuf/src/ringbuffer_internal.h \
- mqtt_websockets/c_rhash/src/c_rhash.c \
- mqtt_websockets/c_rhash/include/c_rhash.h \
- mqtt_websockets/c_rhash/src/c_rhash_internal.h \
$(NULL)
-libmqttwebsockets_a_CFLAGS = $(CFLAGS) -DMQTT_WSS_CUSTOM_ALLOC -DRBUF_CUSTOM_MALLOC -DMQTT_WSS_CPUSTATS -I$(srcdir)/aclk/helpers -I$(srcdir)/mqtt_websockets/c_rhash/include
+libmqttwebsockets_a_CFLAGS = $(CFLAGS) -DMQTT_WSS_CUSTOM_ALLOC -DMQTT_WSS_CPUSTATS -I$(srcdir)/aclk/helpers -I$(srcdir)/mqtt_websockets/c_rhash/include
if MQTT_WSS_DEBUG
libmqttwebsockets_a_CFLAGS += -DMQTT_WSS_DEBUG
@@ -879,8 +989,25 @@ ACLK_ALWAYS_BUILD_FILES = \
aclk/aclk.h \
aclk/aclk_capas.c \
aclk/aclk_capas.h \
+ aclk/aclk_util.c \
+ aclk/aclk_util.h \
+ aclk/https_client.c \
+ aclk/https_client.h \
+ $(NULL)
+
+noinst_LIBRARIES += libcrutils.a
+
+libcrutils_a_SOURCES = \
+ mqtt_websockets/c-rbuf/src/ringbuffer.c \
+ mqtt_websockets/c-rbuf/include/ringbuffer.h \
+ mqtt_websockets/c-rbuf/src/ringbuffer_internal.h \
+ mqtt_websockets/c_rhash/src/c_rhash.c \
+ mqtt_websockets/c_rhash/include/c_rhash.h \
+ mqtt_websockets/c_rhash/src/c_rhash_internal.h \
$(NULL)
+libcrutils_a_CFLAGS = $(CFLAGS) -DRBUF_CUSTOM_MALLOC -I$(srcdir)/aclk/helpers -I$(abs_top_srcdir)/mqtt_websockets/c-rbuf/include -I$(srcdir)/mqtt_websockets/c_rhash/include
+
SPAWN_PLUGIN_FILES = \
spawn/spawn.c \
spawn/spawn_server.c \
@@ -968,6 +1095,10 @@ H2O_FILES = \
web/server/h2o/http_server.h \
web/server/h2o/h2o_utils.c \
web/server/h2o/h2o_utils.h \
+ web/server/h2o/streaming.c \
+ web/server/h2o/streaming.h \
+ web/server/h2o/connlist.c \
+ web/server/h2o/connlist.h \
$(NULL)
libh2o_a_SOURCES = \
@@ -1134,13 +1265,20 @@ NETDATA_COMMON_LIBS = \
$(OPTIONAL_MQTT_LIBS) \
$(OPTIONAL_UV_LIBS) \
$(OPTIONAL_LZ4_LIBS) \
+ $(OPTIONAL_CURL_LIBS) \
+ $(OPTIONAL_ZSTD_LIBS) \
+ $(OPTIONAL_BROTLIENC_LIBS) \
+ $(OPTIONAL_BROTLIDEC_LIBS) \
$(OPTIONAL_DATACHANNEL_LIBS) \
libjudy.a \
+ libcrutils.a \
$(OPTIONAL_SSL_LIBS) \
$(OPTIONAL_JSONC_LIBS) \
$(OPTIONAL_YAML_LIBS) \
$(OPTIONAL_ATOMIC_LIBS) \
$(OPTIONAL_DL_LIBS) \
+ $(OPTIONAL_SYSTEMD_LIBS) \
+ $(OPTIONAL_GTEST_LIBS) \
$(NULL)
if ENABLE_ACLK
@@ -1240,6 +1378,15 @@ if ENABLE_PLUGIN_FREEIPMI
$(NULL)
endif
+if ENABLE_LOG2JOURNAL
+ sbin_PROGRAMS += log2journal
+ log2journal_SOURCES = $(LOG2JOURNAL_FILES)
+ log2journal_LDADD = \
+ $(OPTIONAL_PCRE2_LIBS) \
+ $(OPTIONAL_YAML_LIBS) \
+ $(NULL)
+endif
+
if ENABLE_PLUGIN_SYSTEMD_JOURNAL
plugins_PROGRAMS += systemd-journal.plugin
systemd_journal_plugin_SOURCES = $(SYSTEMD_JOURNAL_PLUGIN_FILES)
@@ -1249,6 +1396,24 @@ if ENABLE_PLUGIN_SYSTEMD_JOURNAL
$(NULL)
endif
+sbin_PROGRAMS += systemd-cat-native
+systemd_cat_native_SOURCES = $(SYSTEMD_CAT_NATIVE_FILES)
+systemd_cat_native_LDADD = \
+ $(NETDATA_COMMON_LIBS) \
+ $(NULL)
+
+if ENABLE_LOGSMANAGEMENT
+ plugins_PROGRAMS += logs-management.plugin
+ logs_management_plugin_SOURCES = $(LOGSMANAGEMENT_FILES)
+if ENABLE_LOGSMANAGEMENT_TESTS
+ logs_management_plugin_SOURCES += $(LOGSMANAGEMENT_TESTS_FILES)
+endif
+ logs_management_plugin_LDADD = \
+ $(NETDATA_COMMON_LIBS) \
+ $(OPTIONAL_SYSTEMD_LIBS) \
+ $(NULL)
+endif
+
if ENABLE_PLUGIN_EBPF
plugins_PROGRAMS += ebpf.plugin
ebpf_plugin_SOURCES = $(EBPF_PLUGIN_FILES)
diff --git a/README.md b/README.md
index aa408f3ac4140d..1fb026992c3753 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
-
+
-
+
Monitor your servers, containers, and applications,
in high-resolution and in real-time.
@@ -22,7 +22,6 @@
-
@@ -53,12 +52,40 @@ It scales nicely from just a single server to thousands of servers, even in comp
- :bell: **Out of box Alerts**
Comes with hundreds of alerts out of the box to detect common issues and pitfalls, revealing issues that can easily go unnoticed. It supports several notification methods to let you know when your attention is needed.
+- 📖 **systemd Journal Logs Explorer**
+ Provides a `systemd` journal logs explorer, to view, filter and analyze system and applications logs by directly accessing `systemd` journal files on individual hosts and infrastructure-wide logs centralization servers.
+
- :sunglasses: **Low Maintenance**
Fully automated in every aspect: automated dashboards, out-of-the-box alerts, auto-detection and auto-discovery of metrics, zero-touch machine-learning, easy scalability and high availability, and CI/CD friendly.
- :star: **Open and Extensible**
Netdata is a modular platform that can be extended in all possible ways and it also integrates nicely with other monitoring solutions.
+---
+
+**LATEST BLOG POST**:
+
+
+
+
+
+
+
+
+
+
+On the same workload, Netdata uses **35% less CPU**, **49% less RAM**, **12% less bandwidth**, **98% less disk I/O**, and is **75% more disk space efficient** on high resolution metrics storage, while providing more than a year of overall retention on the same disk footprint Prometheus offers 7 days of retention. [Read the full analysis in our blog](https://blog.netdata.cloud/netdata-vs-prometheus-performance-analysis/).
+
+---
+
+NEW: **Netdata and LOGS !** 🥳
+
+Check the [systemd-journal plugin of Netdata](https://github.com/netdata/netdata/tree/master/collectors/systemd-journal.plugin), that allows you to view, explore, analyze and query `systemd` journal logs!
+
+
+
+---
+
@@ -66,7 +93,7 @@ It scales nicely from just a single server to thousands of servers, even in comp
Netdata actively supports and is a member of the Cloud Native Computing Foundation (CNCF)
- ...and due to your love :heart:, it is the 3rd most :star:'d project in the CNCF landscape!
+ ...and due to your love :heart: and :star:, Netdata is leading the Observability category at the CNCF landscape!
@@ -98,9 +125,6 @@ It scales nicely from just a single server to thousands of servers, even in comp
## What's New and Coming?
-> **Spoiler!**
-> Netdata Integrations Marketplace is coming...
-
Click to see our immediate development plans and a summary view of the last 12 months' releases...
@@ -108,13 +132,13 @@ It scales nicely from just a single server to thousands of servers, even in comp
|:-----------------------------:|:---------------------------------------------------------------------------------------------------:|:------------:|:-------------------------------------------------------------------------------------------------------:|
| WebRTC | Browser to Agent communication via WebRTC. | later | POC |
| Advanced Troubleshooting | Expanded view of dashboard charts integrating Metrics Correlations, Anomaly Advisor, and many more. | later | interrupted |
-| Easy Custom
Dashboards | Drag and drop charts to create custom dashboards on the fly, while troubleshooting! | next | planned |
-| More Customizability | Set default settings for all charts and views! | next | planned |
-| SystemD Journal | View the SystemD Journal of your systems on the dashboard. | soon | in progress |
+| Easy Custom
Dashboards | Drag and drop charts to create custom dashboards on the fly, while troubleshooting! | soon | planned |
+| More Customizability | Set default settings for all charts and views! | soon | planned |
| UCUM Units | Migrate all metrics to the Unified Code for Units of Measure. | soon | in progress |
-| **Netdata Cloud
On-Prem** | **Netdata Cloud available for On-Prem installation!** | **soon** | **in progress** |
| Click to Activate | Configure Alerts and Data Collectors from the UI! | soon | in progress |
-| Integrations | Netdata Integrations Marketplace! | soon | finishing |
+| **Netdata Cloud
On-Prem** | **Netdata Cloud available for On-Prem installation!** | **available** | [fill this form](https://www.netdata.cloud/contact-us/?subject=on-prem) |
+| `systemd` journal | View the `systemd` journal logs of your systems on the dashboard. | Oct
2023 | [v1.43](https://github.com/netdata/netdata/releases/tag/v1.43.0) |
+| Integrations | Netdata Integrations Marketplace! | Aug
2023 | [v1.42](https://github.com/netdata/netdata/releases#v1420-integrations-marketplace) |
| New Agent UI | Now Netdata Cloud and Netdata Agent share the same dashboard! | Jul
2023 | [v1.41](https://github.com/netdata/netdata/releases/tag/v1.41.0#v1410-one-dashboard) |
| Summary Dashboards | High level tiles everywhere! | Jun
2023 | [v1.40](https://github.com/netdata/netdata/releases/tag/v1.40.0#v1400-visualization-summary-dashboards) |
| Machine Learning | Multiple ML models per metric. | Jun
2023 | [v1.40](https://github.com/netdata/netdata/releases/tag/v1.40.0#v1400-ml-extended-training) |
@@ -168,6 +192,9 @@ It scales nicely from just a single server to thousands of servers, even in comp
Check also the [Netdata Deployment Strategies](https://learn.netdata.cloud/docs/architecture/deployment-strategies) to decide how to deploy it in your infrastructure.
+ By default, you will have immediately available a local dashboard. Netdata starts a web server for its dashboard at port `19999`. Open up your web browser of choice and
+navigate to `http://NODE:19999`, replacing `NODE` with the IP address or hostname of your Agent. If installed on localhost, you can access it through `http://localhost:19999`.
+
### 2. **Configure Collectors** :boom:
Netdata auto-detects and auto-discovers most operating system data sources and applications. However, many data sources require some manual configuration, usually to allow Netdata to get access to the metrics.
@@ -180,7 +207,7 @@ It scales nicely from just a single server to thousands of servers, even in comp
Netdata comes with hundreds of pre-configured alerts, that automatically check your metrics, immediately after they start getting collected.
- Netdata can dispatch alert notifications to multiple third party systems, including: `email`, `Alerta`, `AWS SNS`, `Discord`, `Dynatrace`, `flock`, `gotify`, `IRC`, `Matrix`, `MessageBird`, `Microsoft Teams`, `ntfy`, `OPSgenie`, `PagerDuty`, `Prowl`, `PushBullet`, `PushOver`, `RocketChat`, `Slack`, `SMS tools`, `StackPulse`, `Syslog`, `Telegram`, `Twilio`.
+ Netdata can dispatch alert notifications to multiple third party systems, including: `email`, `Alerta`, `AWS SNS`, `Discord`, `Dynatrace`, `flock`, `gotify`, `IRC`, `Matrix`, `MessageBird`, `Microsoft Teams`, `ntfy`, `OPSgenie`, `PagerDuty`, `Prowl`, `PushBullet`, `PushOver`, `RocketChat`, `Slack`, `SMS tools`, `Syslog`, `Telegram`, `Twilio`.
By default, Netdata will send e-mail notifications, if there is a configured MTA on the system.
@@ -217,11 +244,12 @@ It scales nicely from just a single server to thousands of servers, even in comp
When your Netdata nodes are connected to Netdata Cloud, you can (on top of the above):
+ - Access your Netdata agents from anywhere
+ - Access sensitive Netdata agent features (like "Netdata Functions": processes, systemd-journal)
- Organize your infra in spaces and rooms
- Create, manage, and share **custom dashboards**
- Invite your team and assign roles to them (Role Based Access Control - RBAC)
- - Access Netdata Functions (processes top from the UI and more)
- - Get infinite horizontal scalability (multiple independent parents are viewed as one infra)
+ - Get infinite horizontal scalability (multiple independent Netdata Agents are viewed as one infra)
- Configure alerts from the UI (coming soon)
- Configure data collection from the UI (coming soon)
- Netdata Mobile App notifications (coming soon)
@@ -248,7 +276,7 @@ Each Netdata Agent can perform the following functions:
1. **`COLLECT` metrics from their sources**
Uses [internal](https://github.com/netdata/netdata/tree/master/collectors) and [external](https://github.com/netdata/go.d.plugin/tree/master/modules) plugins to collect data from their sources.
- Netdata auto-detects and collects almost everything from the operating system: including CPU, Interrupts, Memory, Disks, Mount Points, Filesystems, Network Stack, Network Interfaces, Containers, VMs, Processes, SystemD Units, Linux Performance Metrics, Linux eBPF, Hardware Sensors, IPMI, and more.
+ Netdata auto-detects and collects almost everything from the operating system: including CPU, Interrupts, Memory, Disks, Mount Points, Filesystems, Network Stack, Network Interfaces, Containers, VMs, Processes, `systemd` units, Linux Performance Metrics, Linux eBPF, Hardware Sensors, IPMI, and more.
It collects application metrics from applications: PostgreSQL, MySQL/MariaDB, Redis, MongoDB, Nginx, Apache, and hundreds more.
@@ -407,6 +435,8 @@ Yes, you can!
Netdata has been designed to spread disk writes across time. Each metric is flushed to disk every 17 minutes, but metrics are flushed evenly across time, at an almost constant rate. Also, metrics are packed into bigger blocks we call `extents` and are compressed with LZ4 before saving them, to minimize the number of I/O operations made.
+Netdata also employs direct I/O for all its database operations, ensuring optimized performance. By managing its own caches, Netdata avoids overburdening system caches, facilitating a harmonious coexistence with other applications.
+
Single node Agents (not Parents), should have a constant rate of about 50 KiB/s or less, with some spikes above that every minute (flushing of tier 1) and higher spikes every hour (flushing of tier 2).
Health Alerts and Machine-Learning run queries to evaluate their expressions and learn from the metrics' patterns. These are also spread over time, so there should be an almost constant read rate too.
@@ -427,6 +457,8 @@ Using the above, the Netdata Agent on your production system will not use a disk
Netdata is a "ready to use" monitoring solution. Prometheus and Grafana are tools to build your own monitoring solution.
+Netdata is also a lot faster, requires singificantly less resources and puts almost no stress on the server it runs. For a performance comparison check [this blog](https://blog.netdata.cloud/netdata-vs-prometheus-performance-analysis/).
+
Click to see detailed answer ...
@@ -442,6 +474,8 @@ So, the biggest difference of Netdata to Prometheus, and Grafana, is that we dec
Maintaining such an index is a challenge: first, because the raw metrics collected do not provide this information, so we have to add it, and second because we need to maintain this index for the lifetime of each metric, which with our current database retention, it is usually more than a year.
+ At the same time, Netdata provides better retention than Prometheus due to database tiering, scales easier than Prometheus due to streaming, supports anomaly detection and it has a mertrics scoring engine to find the needle in the haystack when needed.
+
- When compared to Grafana, Netdata is fully automated. Grafana has more customization capabilities than Netdata, but Netdata presents fully functional dashboards by itself and most importantly it gives you the means to understand, analyze, filter, slice and dice the data without the need for you to edit queries or be aware of any peculiarities the underlying metrics may have.
Furthermore, to help you when you need to find the needle in the haystack, Netdata has advanced troubleshooting tools provided by the Netdata metrics scoring engine, that allows it to score metrics based on their anomaly rate, their differences or similarities for any given time frame.
@@ -545,12 +579,14 @@ Subscribing to Netdata Cloud is optional but many users find it enhances their e
The Netdata Agent dashboard and the Netdata Cloud dashboard are the same. Still, Netdata Cloud provides additional features, that the Netdata Agent is not capable of. These include:
- 1. Customizability (custom dashboards and other settings are persisted when you are signed in to Netdata Cloud)
- 2. Configuration of Alerts and Data Collection from the UI (coming soon)
- 3. Security (role-based access control - RBAC).
- 4. Horizontal Scalability ("blend" multiple independent parents in one uniform infrastructure)
- 5. Central Dispatch of Alert Notifications (even when multiple independent parents are involved)
- 6. Mobile App for Alert Notifications (coming soon)
+ 1. Access your infrastructure from anywhere.
+ 2. Have SSO to protect sensitive features.
+ 3. Customizability (custom dashboards and other settings are persisted when you are signed in to Netdata Cloud)
+ 4. Configuration of Alerts and Data Collection from the UI (coming soon)
+ 5. Security (role-based access control - RBAC).
+ 6. Horizontal Scalability ("blend" multiple independent parents in one uniform infrastructure)
+ 7. Central Dispatch of Alert Notifications (even when multiple independent parents are involved)
+ 8. Mobile App for Alert Notifications (coming soon)
So, although it is not required, you can get the most out of your Netdata setup by using Netdata Cloud.
@@ -595,9 +631,9 @@ Netdata is a widely adopted project...
Click to see detailed answer ...
-Browse the [Netdata stargazers on GitHub](https://github.com/netdata/netdata/stargazers) to discover users from renowned companies and enterprises, such as AMD, Amazon, Baidu, Cisco, Delta, Facebook, IBM, Intel, Netflix, Qualcomm, Riot Games, SAP, Samsung, Unity, Valve, and many others.
+Browse the [Netdata stargazers on GitHub](https://github.com/netdata/netdata/stargazers) to discover users from renowned companies and enterprises, such as ABN AMRO Bank, AMD, Amazon, Baidu, Booking.com, Cisco, Delta, Facebook, Google, IBM, Intel, Logitech, Netflix, Nokia, Qualcomm, Realtek Semiconductor Corp, Redhat, Riot Games, SAP, Samsung, Unity, Valve, and many others.
-Netdata also enjoys significant usage in academia, with notable institutions including New York University, Columbia University, New Jersey University, among several others.
+Netdata also enjoys significant usage in academia, with notable institutions including New York University, Columbia University, New Jersey University, Seoul National University, University College London, among several others.
And, Netdata is also used by numerous governmental organizations worldwide.
@@ -646,6 +682,39 @@ The Netdata Cloud UI is not open-source. But we thought that it is to the benefi
+### :moneybag: What is your monetization strategy?
+
+Netdata generates revenue through subscriptions to advanced features of Netdata Cloud and sales of on-premise and private versions of Netdata Cloud.
+
+Click to see detailed answer ...
+
+
+Netdata generates revenue from these activities:
+
+1. **Netdata Cloud Subscriptions**
+ Direct funding for our project's vision comes from users subscribing to Netdata Cloud's advanced features.
+
+2. **Netdata Cloud On-Prem or Private**
+ Purchasing the on-premises or private versions of Netdata Cloud supports our financial growth.
+
+Our Open-Source Community and the free access to Netdata Cloud, contribute to Netdata in the following ways:
+
+- **Netdata Cloud Community Use**
+ The free usage of Netdata Cloud demonstrates its market relevance. While this doesn't generate revenue, it reinforces trust among new users and aids in securing appropriate project funding.
+
+- **User Feedback**
+ Feedback, especially issues and bug reports, is invaluable. It steers us towards a more resilient and efficient product. This, too, isn't a revenue source but is pivotal for our project's evolution.
+
+- **Anonymous Telemetry Insights**
+ Users who keep anonymous telemetry enabled, help us make data informed decisions in refining and enhancing Netdata. This isn't a revenue stream, but knowing which features are used and how, contributes in building a better product for everyone.
+
+We don't monetize, directly or indirectly, users' or "device heuristics" data. Any data collected from community members are exclusively used for the purposes stated above.
+
+Netdata grows financially when tehnnology intensive organizations and operators, need - due to regulatory or business requirements - the entire Netdata suite (including Netdata Cloud) on-prem or private, bundled with top-tier support. It is a win-win case for all parties involved: these companies get a battle tested, robust and reliable solution, while the broader community that helps us build this product, enjoys it at no cost.
+
+
+
+
## :book: Documentation
Netdata's documentation is available at [**Netdata Learn**](https://learn.netdata.cloud).
@@ -675,7 +744,7 @@ Join the Netdata community:
> [Click here for the schedule](https://www.meetup.com/netdata/events/).
You can also find Netdata on:
-[Twitter](https://twitter.com/linuxnetdata) | [YouTube](https://www.youtube.com/c/Netdata) | [Reddit](https://www.reddit.com/r/netdata/) | [LinkedIn](https://www.linkedin.com/company/netdata-cloud/) | [StackShare](https://stackshare.io/netdata) | [Product Hunt](https://www.producthunt.com/posts/netdata-monitoring-agent/) | [Repology](https://repology.org/metapackage/netdata/versions) | [Facebook](https://www.facebook.com/linuxnetdata/)
+[Twitter](https://twitter.com/netdatahq) | [YouTube](https://www.youtube.com/c/Netdata) | [Reddit](https://www.reddit.com/r/netdata/) | [LinkedIn](https://www.linkedin.com/company/netdata-cloud/) | [StackShare](https://stackshare.io/netdata) | [Product Hunt](https://www.producthunt.com/posts/netdata-monitoring-agent/) | [Repology](https://repology.org/metapackage/netdata/versions) | [Facebook](https://www.facebook.com/linuxnetdata/)
## :pray: Contribute
@@ -702,7 +771,6 @@ General information about contributions:
- Check our [Security Policy](https://github.com/netdata/netdata/security/policy).
- Found a bug? Open a [GitHub issue](https://github.com/netdata/netdata/issues/new?assignees=&labels=bug%2Cneeds+triage&template=BUG_REPORT.yml&title=%5BBug%5D%3A+).
- Read our [Contributing Guide](https://github.com/netdata/.github/blob/main/CONTRIBUTING.md), which contains all the information you need to contribute to Netdata, such as improving our documentation, engaging in the community, and developing new features. We've made it as frictionless as possible, but if you need help, just ping us on our community forums!
-- We have a whole category dedicated to contributing and extending Netdata on our [community forums](https://community.netdata.cloud/c/agent-development/9)
Package maintainers should read the guide on [building Netdata from source](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/source.md) for
instructions on building each Netdata component from the source and preparing a package.
diff --git a/REDISTRIBUTED.md b/REDISTRIBUTED.md
index 76ee9fb766af0f..0ccc843f7507ab 100644
--- a/REDISTRIBUTED.md
+++ b/REDISTRIBUTED.md
@@ -108,7 +108,7 @@ connectivity is not available.
Copyright 2016, Hyunje Alex Jun and other contributors
[MIT License](https://github.com/noraesae/perfect-scrollbar/blob/master/LICENSE)
-- [FontAwesome](https://fortawesome.github.io/Font-Awesome/)
+- [FontAwesome](https://github.com/FortAwesome/Font-Awesome)
Created by Dave Gandy
Font license: [SIL OFL 1.1](http://scripts.sil.org/OFL)
@@ -141,7 +141,7 @@ connectivity is not available.
Copyright 2014, Pavel Rojtberg
[LGPL 2.1 License](http://opensource.org/licenses/LGPL-2.1)
-- [PyYAML](https://bitbucket.org/blackjack/pysensors)
+- [PyYAML](https://pypi.org/project/PyYAML/)
Copyright 2006, Kirill Simonov
[MIT License](https://github.com/yaml/pyyaml/blob/master/LICENSE)
@@ -190,4 +190,9 @@ connectivity is not available.
Copyright March 2010 by Université de Montréal, Richard Simard and Pierre L'Ecuyer
[GPL 3.0](https://www.gnu.org/licenses/gpl-3.0.en.html)
+- [xxHash](https://github.com/Cyan4973/xxHash)
+
+ Copyright (c) 2012-2021 Yann Collet
+ [BSD](https://github.com/Cyan4973/xxHash/blob/dev/LICENSE)
+
diff --git a/aclk/aclk.c b/aclk/aclk.c
index 312db076ff5a56..e95d7d6ab7c6b8 100644
--- a/aclk/aclk.c
+++ b/aclk/aclk.c
@@ -154,7 +154,9 @@ static int load_private_key()
static int wait_till_cloud_enabled()
{
- netdata_log_info("Waiting for Cloud to be enabled");
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "Waiting for Cloud to be enabled");
+
while (!netdata_cloud_enabled) {
sleep_usec(USEC_PER_SEC * 1);
if (!service_running(SERVICE_ACLK))
@@ -233,17 +235,22 @@ void aclk_mqtt_wss_log_cb(mqtt_wss_log_type_t log_type, const char* str)
switch(log_type) {
case MQTT_WSS_LOG_ERROR:
case MQTT_WSS_LOG_FATAL:
+ nd_log(NDLS_DAEMON, NDLP_ERR, "%s", str);
+ return;
+
case MQTT_WSS_LOG_WARN:
- error_report("%s", str);
+ nd_log(NDLS_DAEMON, NDLP_WARNING, "%s", str);
return;
+
case MQTT_WSS_LOG_INFO:
- netdata_log_info("%s", str);
+ nd_log(NDLS_DAEMON, NDLP_INFO, "%s", str);
return;
+
case MQTT_WSS_LOG_DEBUG:
- netdata_log_debug(D_ACLK, "%s", str);
return;
+
default:
- netdata_log_error("Unknown log type from mqtt_wss");
+ nd_log(NDLS_DAEMON, NDLP_ERR, "Unknown log type from mqtt_wss");
}
}
@@ -297,7 +304,9 @@ static void puback_callback(uint16_t packet_id)
#endif
if (aclk_shared_state.mqtt_shutdown_msg_id == (int)packet_id) {
- netdata_log_info("Shutdown message has been acknowledged by the cloud. Exiting gracefully");
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Shutdown message has been acknowledged by the cloud. Exiting gracefully");
+
aclk_shared_state.mqtt_shutdown_msg_rcvd = 1;
}
}
@@ -335,9 +344,11 @@ static int handle_connection(mqtt_wss_client client)
}
if (disconnect_req || aclk_kill_link) {
- netdata_log_info("Going to restart connection due to disconnect_req=%s (cloud req), aclk_kill_link=%s (reclaim)",
- disconnect_req ? "true" : "false",
- aclk_kill_link ? "true" : "false");
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Going to restart connection due to disconnect_req=%s (cloud req), aclk_kill_link=%s (reclaim)",
+ disconnect_req ? "true" : "false",
+ aclk_kill_link ? "true" : "false");
+
disconnect_req = 0;
aclk_kill_link = 0;
aclk_graceful_disconnect(client);
@@ -390,7 +401,9 @@ static inline void mqtt_connected_actions(mqtt_wss_client client)
void aclk_graceful_disconnect(mqtt_wss_client client)
{
- netdata_log_info("Preparing to gracefully shutdown ACLK connection");
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Preparing to gracefully shutdown ACLK connection");
+
aclk_queue_lock();
aclk_queue_flush();
@@ -403,17 +416,22 @@ void aclk_graceful_disconnect(mqtt_wss_client client)
break;
}
if (aclk_shared_state.mqtt_shutdown_msg_rcvd) {
- netdata_log_info("MQTT App Layer `disconnect` message sent successfully");
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "MQTT App Layer `disconnect` message sent successfully");
break;
}
}
- netdata_log_info("ACLK link is down");
- netdata_log_access("ACLK DISCONNECTED");
+
+ nd_log(NDLS_DAEMON, NDLP_WARNING, "ACLK link is down");
+ nd_log(NDLS_ACCESS, NDLP_WARNING, "ACLK DISCONNECTED");
+
aclk_stats_upd_online(0);
last_disconnect_time = now_realtime_sec();
aclk_connected = 0;
- netdata_log_info("Attempting to gracefully shutdown the MQTT/WSS connection");
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Attempting to gracefully shutdown the MQTT/WSS connection");
+
mqtt_wss_disconnect(client, 1000);
}
@@ -455,7 +473,9 @@ static int aclk_block_till_recon_allowed() {
next_connection_attempt = now_realtime_sec() + (recon_delay / MSEC_PER_SEC);
last_backoff_value = (float)recon_delay / MSEC_PER_SEC;
- netdata_log_info("Wait before attempting to reconnect in %.3f seconds", recon_delay / (float)MSEC_PER_SEC);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Wait before attempting to reconnect in %.3f seconds", recon_delay / (float)MSEC_PER_SEC);
+
// we want to wake up from time to time to check netdata_exit
while (recon_delay)
{
@@ -593,7 +613,9 @@ static int aclk_attempt_to_connect(mqtt_wss_client client)
return 1;
}
- netdata_log_info("Attempting connection now");
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Attempting connection now");
+
memset(&base_url, 0, sizeof(url_t));
if (url_parse(aclk_cloud_base_url, &base_url)) {
aclk_status = ACLK_STATUS_INVALID_CLOUD_URL;
@@ -680,7 +702,9 @@ static int aclk_attempt_to_connect(mqtt_wss_client client)
error_report("Can't use encoding=proto without at least \"proto\" capability.");
continue;
}
- netdata_log_info("New ACLK protobuf protocol negotiated successfully (/env response).");
+
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "New ACLK protobuf protocol negotiated successfully (/env response).");
memset(&auth_url, 0, sizeof(url_t));
if (url_parse(aclk_env->auth_endpoint, &auth_url)) {
@@ -750,9 +774,9 @@ static int aclk_attempt_to_connect(mqtt_wss_client client)
if (!ret) {
last_conn_time_mqtt = now_realtime_sec();
- netdata_log_info("ACLK connection successfully established");
+ nd_log(NDLS_DAEMON, NDLP_INFO, "ACLK connection successfully established");
aclk_status = ACLK_STATUS_CONNECTED;
- netdata_log_access("ACLK CONNECTED");
+ nd_log(NDLS_ACCESS, NDLP_INFO, "ACLK CONNECTED");
mqtt_connected_actions(client);
return 0;
}
@@ -798,7 +822,9 @@ void *aclk_main(void *ptr)
netdata_thread_disable_cancelability();
#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK )
- netdata_log_info("Killing ACLK thread -> cloud functionality has been disabled");
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "Killing ACLK thread -> cloud functionality has been disabled");
+
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
return NULL;
#endif
@@ -857,7 +883,7 @@ void *aclk_main(void *ptr)
aclk_stats_upd_online(0);
last_disconnect_time = now_realtime_sec();
aclk_connected = 0;
- netdata_log_access("ACLK DISCONNECTED");
+ nd_log(NDLS_ACCESS, NDLP_WARNING, "ACLK DISCONNECTED");
}
} while (service_running(SERVICE_ACLK));
@@ -891,7 +917,7 @@ void *aclk_main(void *ptr)
return NULL;
}
-void aclk_host_state_update(RRDHOST *host, int cmd)
+void aclk_host_state_update(RRDHOST *host, int cmd, int queryable)
{
uuid_t node_id;
int ret = 0;
@@ -924,7 +950,9 @@ void aclk_host_state_update(RRDHOST *host, int cmd)
rrdhost_aclk_state_unlock(localhost);
create_query->data.bin_payload.topic = ACLK_TOPICID_CREATE_NODE;
create_query->data.bin_payload.msg_name = "CreateNodeInstance";
- netdata_log_info("Registering host=%s, hops=%u", host->machine_guid, host->system_info->hops);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Registering host=%s, hops=%u", host->machine_guid, host->system_info->hops);
+
aclk_queue_query(create_query);
return;
}
@@ -934,7 +962,7 @@ void aclk_host_state_update(RRDHOST *host, int cmd)
node_instance_connection_t node_state_update = {
.hops = host->system_info->hops,
.live = cmd,
- .queryable = 1,
+ .queryable = queryable,
.session_id = aclk_session_newarch
};
node_state_update.node_id = mallocz(UUID_STR_LEN);
@@ -947,8 +975,9 @@ void aclk_host_state_update(RRDHOST *host, int cmd)
query->data.bin_payload.payload = generate_node_instance_connection(&query->data.bin_payload.size, &node_state_update);
rrdhost_aclk_state_unlock(localhost);
- netdata_log_info("Queuing status update for node=%s, live=%d, hops=%u",(char*)node_state_update.node_id, cmd,
- host->system_info->hops);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Queuing status update for node=%s, live=%d, hops=%u, queryable=%d",
+ (char*)node_state_update.node_id, cmd, host->system_info->hops, queryable);
freez((void*)node_state_update.node_id);
query->data.bin_payload.msg_name = "UpdateNodeInstanceConnection";
query->data.bin_payload.topic = ACLK_TOPICID_NODE_CONN;
@@ -990,9 +1019,10 @@ void aclk_send_node_instances()
node_state_update.claim_id = localhost->aclk_state.claimed_id;
query->data.bin_payload.payload = generate_node_instance_connection(&query->data.bin_payload.size, &node_state_update);
rrdhost_aclk_state_unlock(localhost);
- netdata_log_info("Queuing status update for node=%s, live=%d, hops=%d",(char*)node_state_update.node_id,
- list->live,
- list->hops);
+
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Queuing status update for node=%s, live=%d, hops=%d, queryable=1",
+ (char*)node_state_update.node_id, list->live, list->hops);
freez((void*)node_state_update.capabilities);
freez((void*)node_state_update.node_id);
@@ -1014,8 +1044,11 @@ void aclk_send_node_instances()
node_instance_creation.claim_id = localhost->aclk_state.claimed_id,
create_query->data.bin_payload.payload = generate_node_instance_creation(&create_query->data.bin_payload.size, &node_instance_creation);
rrdhost_aclk_state_unlock(localhost);
- netdata_log_info("Queuing registration for host=%s, hops=%d",(char*)node_instance_creation.machine_guid,
- list->hops);
+
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Queuing registration for host=%s, hops=%d",
+ (char*)node_instance_creation.machine_guid, list->hops);
+
freez((void *)node_instance_creation.machine_guid);
aclk_queue_query(create_query);
}
@@ -1292,7 +1325,7 @@ char *aclk_state_json(void)
}
void add_aclk_host_labels(void) {
- DICTIONARY *labels = localhost->rrdlabels;
+ RRDLABELS *labels = localhost->rrdlabels;
#ifdef ENABLE_ACLK
rrdlabels_add(labels, "_aclk_available", "true", RRDLABEL_SRC_AUTO|RRDLABEL_SRC_ACLK);
@@ -1322,7 +1355,7 @@ void add_aclk_host_labels(void) {
void aclk_queue_node_info(RRDHOST *host, bool immediate)
{
- struct aclk_sync_host_config *wc = (struct aclk_sync_host_config *) host->aclk_sync_host_config;
+ struct aclk_sync_cfg_t *wc = host->aclk_config;
if (likely(wc))
wc->node_info_send_time = (host == localhost || immediate) ? 1 : now_realtime_sec();
}
diff --git a/aclk/aclk.h b/aclk/aclk.h
index 0badc1a628c6f1..72d1a2e119fd2f 100644
--- a/aclk/aclk.h
+++ b/aclk/aclk.h
@@ -75,7 +75,7 @@ extern struct aclk_shared_state {
int mqtt_shutdown_msg_rcvd;
} aclk_shared_state;
-void aclk_host_state_update(RRDHOST *host, int cmd);
+void aclk_host_state_update(RRDHOST *host, int cmd, int queryable);
void aclk_send_node_instances(void);
void aclk_send_bin_msg(char *msg, size_t msg_len, enum aclk_topics subtopic, const char *msgname);
diff --git a/aclk/aclk_otp.c b/aclk/aclk_otp.c
index 99b2adea272a88..207ca08cf0d16a 100644
--- a/aclk/aclk_otp.c
+++ b/aclk/aclk_otp.c
@@ -502,7 +502,7 @@ int aclk_get_mqtt_otp(RSA *p_key, char **mqtt_id, char **mqtt_usr, char **mqtt_p
}
// Decrypt Challenge / Get response
- unsigned char *response_plaintext;
+ unsigned char *response_plaintext = NULL;
int response_plaintext_bytes = private_decrypt(p_key, challenge, challenge_bytes, &response_plaintext);
if (response_plaintext_bytes < 0) {
netdata_log_error("Couldn't decrypt the challenge received");
diff --git a/aclk/aclk_query.c b/aclk/aclk_query.c
index 07d571be178b0e..da5385fdb83523 100644
--- a/aclk/aclk_query.c
+++ b/aclk/aclk_query.c
@@ -90,6 +90,12 @@ static bool aclk_web_client_interrupt_cb(struct web_client *w __maybe_unused, vo
}
static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query) {
+ ND_LOG_STACK lgs[] = {
+ ND_LOG_FIELD_TXT(NDF_SRC_TRANSPORT, "aclk"),
+ ND_LOG_FIELD_END(),
+ };
+ ND_LOG_STACK_PUSH(lgs);
+
int retval = 0;
BUFFER *local_buffer = NULL;
size_t size = 0;
@@ -110,9 +116,9 @@ static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
usec_t t;
web_client_timeout_checkpoint_set(w, query->timeout);
if(web_client_timeout_checkpoint_and_check(w, &t)) {
- netdata_log_access("QUERY CANCELED: QUEUE TIME EXCEEDED %llu ms (LIMIT %d ms)", t / USEC_PER_MS, query->timeout);
+ nd_log(NDLS_ACCESS, NDLP_ERR, "QUERY CANCELED: QUEUE TIME EXCEEDED %llu ms (LIMIT %d ms)", t / USEC_PER_MS, query->timeout);
retval = 1;
- w->response.code = HTTP_RESP_BACKEND_FETCH_FAILED;
+ w->response.code = HTTP_RESP_SERVICE_UNAVAILABLE;
aclk_http_msg_v2_err(query_thr->client, query->callback_topic, query->msg_id, w->response.code, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, NULL, 0);
goto cleanup;
}
@@ -217,25 +223,8 @@ static int http_api_v2(struct aclk_query_thread *query_thr, aclk_query_t query)
// send msg.
w->response.code = aclk_http_msg_v2(query_thr->client, query->callback_topic, query->msg_id, t, query->created, w->response.code, local_buffer->buffer, local_buffer->len);
- struct timeval tv;
-
cleanup:
- now_monotonic_high_precision_timeval(&tv);
- netdata_log_access("%llu: %d '[ACLK]:%d' '%s' (sent/all = %zu/%zu bytes %0.0f%%, prep/sent/total = %0.2f/%0.2f/%0.2f ms) %d '%s'",
- w->id
- , gettid()
- , query_thr->idx
- , "DATA"
- , sent
- , size
- , size > sent ? -(((size - sent) / (double)size) * 100.0) : ((size > 0) ? (((sent - size ) / (double)size) * 100.0) : 0.0)
- , dt_usec(&w->timings.tv_ready, &w->timings.tv_in) / 1000.0
- , dt_usec(&tv, &w->timings.tv_ready) / 1000.0
- , dt_usec(&tv, &w->timings.tv_in) / 1000.0
- , w->response.code
- , strip_control_characters((char *)buffer_tostring(w->url_as_received))
- );
-
+ web_client_log_completed_request(w, false);
web_client_release_to_cache(w);
pending_req_list_rm(query->msg_id);
diff --git a/aclk/aclk_rx_msgs.c b/aclk/aclk_rx_msgs.c
index 84ade2b34601a7..0e91e28c04f47c 100644
--- a/aclk/aclk_rx_msgs.c
+++ b/aclk/aclk_rx_msgs.c
@@ -108,7 +108,7 @@ static inline int aclk_v2_payload_get_query(const char *payload, char **query_ur
}
start = payload + 4;
- if(!(end = strstr(payload, " HTTP/1.1\x0D\x0A"))) {
+ if(!(end = strstr(payload, HTTP_1_1 HTTP_ENDL))) {
errno = 0;
netdata_log_error("Doesn't look like HTTP GET request.");
return 1;
@@ -449,13 +449,13 @@ int stop_streaming_contexts(const char *msg, size_t msg_len)
int cancel_pending_req(const char *msg, size_t msg_len)
{
- struct aclk_cancel_pending_req cmd;
+ struct aclk_cancel_pending_req cmd = {.request_id = NULL, .trace_id = NULL};
if(parse_cancel_pending_req(msg, msg_len, &cmd)) {
error_report("Error parsing CancelPendingReq");
return 1;
}
- netdata_log_access("ACLK CancelPendingRequest REQ: %s, cloud trace-id: %s", cmd.request_id, cmd.trace_id);
+ nd_log(NDLS_ACCESS, NDLP_NOTICE, "ACLK CancelPendingRequest REQ: %s, cloud trace-id: %s", cmd.request_id, cmd.trace_id);
if (mark_pending_req_cancelled(cmd.request_id))
error_report("CancelPending Request for %s failed. No such pending request.", cmd.request_id);
diff --git a/aclk/aclk_tx_msgs.c b/aclk/aclk_tx_msgs.c
index 26e20dfb2c8a2a..4102c818d39ee1 100644
--- a/aclk/aclk_tx_msgs.c
+++ b/aclk/aclk_tx_msgs.c
@@ -194,15 +194,16 @@ int aclk_http_msg_v2(mqtt_wss_client client, const char *topic, const char *msg_
int rc = aclk_send_message_with_bin_payload(client, msg, topic, payload, payload_len);
switch (rc) {
- case HTTP_RESP_FORBIDDEN:
- aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_REQ_REPLY_TOO_BIG, CLOUD_EMSG_REQ_REPLY_TOO_BIG, NULL, 0);
- break;
- case HTTP_RESP_INTERNAL_SERVER_ERROR:
- aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_FAIL_TOPIC, CLOUD_EMSG_FAIL_TOPIC, payload, payload_len);
- break;
- case HTTP_RESP_BACKEND_FETCH_FAILED:
- aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, payload, payload_len);
- break;
+ case HTTP_RESP_FORBIDDEN:
+ aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_REQ_REPLY_TOO_BIG, CLOUD_EMSG_REQ_REPLY_TOO_BIG, NULL, 0);
+ break;
+ case HTTP_RESP_INTERNAL_SERVER_ERROR:
+ aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_FAIL_TOPIC, CLOUD_EMSG_FAIL_TOPIC, payload, payload_len);
+ break;
+ case HTTP_RESP_GATEWAY_TIMEOUT:
+ case HTTP_RESP_SERVICE_UNAVAILABLE:
+ aclk_http_msg_v2_err(client, topic, msg_id, rc, CLOUD_EC_SND_TIMEOUT, CLOUD_EMSG_SND_TIMEOUT, payload, payload_len);
+ break;
}
return rc ? rc : http_code;
}
diff --git a/aclk/aclk_util.c b/aclk/aclk_util.c
index 00920e0690ee13..3bf2e3f188a2c4 100644
--- a/aclk/aclk_util.c
+++ b/aclk/aclk_util.c
@@ -1,6 +1,9 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include "aclk_util.h"
+
+#ifdef ENABLE_ACLK
+
#include "aclk_proxy.h"
#include "daemon/common.h"
@@ -437,6 +440,7 @@ void aclk_set_proxy(char **ohost, int *port, char **uname, char **pwd, enum mqtt
freez(proxy);
}
+#endif /* ENABLE_ACLK */
#if defined(OPENSSL_VERSION_NUMBER) && OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110
static EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void)
diff --git a/aclk/aclk_util.h b/aclk/aclk_util.h
index 6b7e4e9c2b3bbb..38ef5b0bcbf1ce 100644
--- a/aclk/aclk_util.h
+++ b/aclk/aclk_util.h
@@ -3,6 +3,8 @@
#define ACLK_UTIL_H
#include "libnetdata/libnetdata.h"
+
+#ifdef ENABLE_ACLK
#include "mqtt_wss_client.h"
#define CLOUD_EC_MALFORMED_NODE_ID 1
@@ -112,6 +114,7 @@ unsigned long int aclk_tbeb_delay(int reset, int base, unsigned long int min, un
#define aclk_tbeb_reset(x) aclk_tbeb_delay(1, 0, 0, 0)
void aclk_set_proxy(char **ohost, int *port, char **uname, char **pwd, enum mqtt_wss_proxy_type *type);
+#endif /* ENABLE_ACLK */
int base64_encode_helper(unsigned char *out, int *outl, const unsigned char *in, int in_len);
diff --git a/aclk/https_client.c b/aclk/https_client.c
index 623082027dc72c..5385786b82875d 100644
--- a/aclk/https_client.c
+++ b/aclk/https_client.c
@@ -4,20 +4,12 @@
#include "https_client.h"
-#include "mqtt_websockets/c-rbuf/include/ringbuffer.h"
-
#include "aclk_util.h"
#include "daemon/global_statistics.h"
#define DEFAULT_CHUNKED_RESPONSE_BUFFER_SIZE (4096)
-enum http_parse_state {
- HTTP_PARSE_INITIAL = 0,
- HTTP_PARSE_HEADERS,
- HTTP_PARSE_CONTENT
-};
-
static const char *http_req_type_to_str(http_req_type_t req) {
switch (req) {
case HTTP_REQ_GET:
@@ -33,39 +25,33 @@ static const char *http_req_type_to_str(http_req_type_t req) {
#define TRANSFER_ENCODING_CHUNKED (-2)
-typedef struct {
- enum http_parse_state state;
- int content_length;
- int http_code;
-
- // for chunked data only
- char *chunked_response;
- size_t chunked_response_size;
- size_t chunked_response_written;
-
- enum chunked_content_state {
- CHUNKED_CONTENT_CHUNK_SIZE = 0,
- CHUNKED_CONTENT_CHUNK_DATA,
- CHUNKED_CONTENT_CHUNK_END_CRLF,
- CHUNKED_CONTENT_FINAL_CRLF
- } chunked_content_state;
-
- size_t chunk_size;
- size_t chunk_got;
-} http_parse_ctx;
-
#define HTTP_PARSE_CTX_INITIALIZER { .state = HTTP_PARSE_INITIAL, .content_length = -1, .http_code = 0 }
-static inline void http_parse_ctx_clear(http_parse_ctx *ctx) {
+void http_parse_ctx_create(http_parse_ctx *ctx)
+{
ctx->state = HTTP_PARSE_INITIAL;
ctx->content_length = -1;
ctx->http_code = 0;
+ ctx->headers = c_rhash_new(0);
+ ctx->flags = HTTP_PARSE_FLAGS_DEFAULT;
+}
+
+void http_parse_ctx_destroy(http_parse_ctx *ctx)
+{
+ c_rhash_iter_t iter;
+ const char *key;
+
+ c_rhash_iter_t_initialize(&iter);
+ while ( !c_rhash_iter_str_keys(ctx->headers, &iter, &key) ) {
+ void *val;
+ c_rhash_get_ptr_by_str(ctx->headers, key, &val);
+ freez(val);
+ }
+
+ c_rhash_destroy(ctx->headers);
}
#define POLL_TO_MS 100
-#define NEED_MORE_DATA 0
-#define PARSE_SUCCESS 1
-#define PARSE_ERROR -1
#define HTTP_LINE_TERM "\x0D\x0A"
#define RESP_PROTO "HTTP/1.1 "
#define HTTP_KEYVAL_SEPARATOR ": "
@@ -76,7 +62,7 @@ static int process_http_hdr(http_parse_ctx *parse_ctx, const char *key, const ch
{
// currently we care only about specific headers
// we can skip the rest
- if (!strcmp("content-length", key)) {
+ if (parse_ctx->content_length < 0 && !strcmp("content-length", key)) {
if (parse_ctx->content_length == TRANSFER_ENCODING_CHUNKED) {
netdata_log_error("Content-length and transfer-encoding: chunked headers are mutually exclusive");
return 1;
@@ -85,7 +71,7 @@ static int process_http_hdr(http_parse_ctx *parse_ctx, const char *key, const ch
netdata_log_error("Duplicate content-length header");
return 1;
}
- parse_ctx->content_length = atoi(val);
+ parse_ctx->content_length = str2u(val);
if (parse_ctx->content_length < 0) {
netdata_log_error("Invalid content-length %d", parse_ctx->content_length);
return 1;
@@ -102,9 +88,20 @@ static int process_http_hdr(http_parse_ctx *parse_ctx, const char *key, const ch
}
return 0;
}
+ char *val_cpy = strdupz(val);
+ c_rhash_insert_str_ptr(parse_ctx->headers, key, val_cpy);
return 0;
}
+const char *get_http_header_by_name(http_parse_ctx *ctx, const char *name)
+{
+ const char *ret;
+ if (c_rhash_get_ptr_by_str(ctx->headers, name, (void**)&ret))
+ return NULL;
+
+ return ret;
+}
+
static int parse_http_hdr(rbuf_t buf, http_parse_ctx *parse_ctx)
{
int idx, idx_end;
@@ -169,8 +166,8 @@ static int process_chunked_content(rbuf_t buf, http_parse_ctx *parse_ctx)
case CHUNKED_CONTENT_CHUNK_SIZE:
if (!rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx)) {
if (rbuf_bytes_available(buf) >= rbuf_get_capacity(buf))
- return PARSE_ERROR;
- return NEED_MORE_DATA;
+ return HTTP_PARSE_ERROR;
+ return HTTP_PARSE_NEED_MORE_DATA;
}
if (idx == 0) {
parse_ctx->chunked_content_state = CHUNKED_CONTENT_FINAL_CRLF;
@@ -178,7 +175,7 @@ static int process_chunked_content(rbuf_t buf, http_parse_ctx *parse_ctx)
}
if (idx >= HTTP_HDR_BUFFER_SIZE) {
netdata_log_error("Chunk size is too long");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
char buf_size[HTTP_HDR_BUFFER_SIZE];
rbuf_pop(buf, buf_size, idx);
@@ -186,13 +183,13 @@ static int process_chunked_content(rbuf_t buf, http_parse_ctx *parse_ctx)
long chunk_size = strtol(buf_size, NULL, 16);
if (chunk_size < 0 || chunk_size == LONG_MAX) {
netdata_log_error("Chunk size out of range");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
parse_ctx->chunk_size = chunk_size;
if (parse_ctx->chunk_size == 0) {
if (errno == EINVAL) {
netdata_log_error("Invalid chunk size");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
parse_ctx->chunked_content_state = CHUNKED_CONTENT_CHUNK_END_CRLF;
continue;
@@ -204,7 +201,7 @@ static int process_chunked_content(rbuf_t buf, http_parse_ctx *parse_ctx)
// fallthrough
case CHUNKED_CONTENT_CHUNK_DATA:
if (!(bytes_to_copy = rbuf_bytes_available(buf)))
- return NEED_MORE_DATA;
+ return HTTP_PARSE_NEED_MORE_DATA;
if (bytes_to_copy > parse_ctx->chunk_size - parse_ctx->chunk_got)
bytes_to_copy = parse_ctx->chunk_size - parse_ctx->chunk_got;
rbuf_pop(buf, parse_ctx->chunked_response + parse_ctx->chunked_response_written, bytes_to_copy);
@@ -217,19 +214,19 @@ static int process_chunked_content(rbuf_t buf, http_parse_ctx *parse_ctx)
case CHUNKED_CONTENT_FINAL_CRLF:
case CHUNKED_CONTENT_CHUNK_END_CRLF:
if (rbuf_bytes_available(buf) < strlen(HTTP_LINE_TERM))
- return NEED_MORE_DATA;
+ return HTTP_PARSE_NEED_MORE_DATA;
char buf_crlf[strlen(HTTP_LINE_TERM)];
rbuf_pop(buf, buf_crlf, strlen(HTTP_LINE_TERM));
if (memcmp(buf_crlf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM))) {
netdata_log_error("CRLF expected");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
if (parse_ctx->chunked_content_state == CHUNKED_CONTENT_FINAL_CRLF) {
if (parse_ctx->chunked_response_size != parse_ctx->chunked_response_written)
netdata_log_error("Chunked response size mismatch");
chunked_response_buffer_grow_by(parse_ctx, 1);
parse_ctx->chunked_response[parse_ctx->chunked_response_written] = 0;
- return PARSE_SUCCESS;
+ return HTTP_PARSE_SUCCESS;
}
if (parse_ctx->chunk_size == 0) {
parse_ctx->chunked_content_state = CHUNKED_CONTENT_FINAL_CRLF;
@@ -241,34 +238,34 @@ static int process_chunked_content(rbuf_t buf, http_parse_ctx *parse_ctx)
} while(1);
}
-static int parse_http_response(rbuf_t buf, http_parse_ctx *parse_ctx)
+http_parse_rc parse_http_response(rbuf_t buf, http_parse_ctx *parse_ctx)
{
int idx;
char rc[4];
do {
if (parse_ctx->state != HTTP_PARSE_CONTENT && !rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx))
- return NEED_MORE_DATA;
+ return HTTP_PARSE_NEED_MORE_DATA;
switch (parse_ctx->state) {
case HTTP_PARSE_INITIAL:
if (rbuf_memcmp_n(buf, RESP_PROTO, strlen(RESP_PROTO))) {
netdata_log_error("Expected response to start with \"%s\"", RESP_PROTO);
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
rbuf_bump_tail(buf, strlen(RESP_PROTO));
if (rbuf_pop(buf, rc, 4) != 4) {
netdata_log_error("Expected HTTP status code");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
if (rc[3] != ' ') {
netdata_log_error("Expected space after HTTP return code");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
rc[3] = 0;
parse_ctx->http_code = atoi(rc);
if (parse_ctx->http_code < 100 || parse_ctx->http_code >= 600) {
netdata_log_error("HTTP code not in range 100 to 599");
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
}
rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx);
@@ -284,7 +281,7 @@ static int parse_http_response(rbuf_t buf, http_parse_ctx *parse_ctx)
break;
}
if (parse_http_hdr(buf, parse_ctx))
- return PARSE_ERROR;
+ return HTTP_PARSE_ERROR;
rbuf_find_bytes(buf, HTTP_LINE_TERM, strlen(HTTP_LINE_TERM), &idx);
rbuf_bump_tail(buf, idx + strlen(HTTP_LINE_TERM));
break;
@@ -294,11 +291,14 @@ static int parse_http_response(rbuf_t buf, http_parse_ctx *parse_ctx)
return process_chunked_content(buf, parse_ctx);
if (parse_ctx->content_length < 0)
- return PARSE_SUCCESS;
+ return HTTP_PARSE_SUCCESS;
+
+ if (parse_ctx->flags & HTTP_PARSE_FLAG_DONT_WAIT_FOR_CONTENT)
+ return HTTP_PARSE_SUCCESS;
if (rbuf_bytes_available(buf) >= (size_t)parse_ctx->content_length)
- return PARSE_SUCCESS;
- return NEED_MORE_DATA;
+ return HTTP_PARSE_SUCCESS;
+ return HTTP_PARSE_NEED_MORE_DATA;
}
} while(1);
}
@@ -486,7 +486,7 @@ static int read_parse_response(https_req_ctx_t *ctx) {
} while (ctx->poll_fd.events == 0 && rbuf_bytes_free(ctx->buf_rx) > 0);
} while (!(ret = parse_http_response(ctx->buf_rx, &ctx->parse_ctx)));
- if (ret != PARSE_SUCCESS) {
+ if (ret != HTTP_PARSE_SUCCESS) {
netdata_log_error("Error parsing HTTP response");
return 1;
}
@@ -500,7 +500,7 @@ static int handle_http_request(https_req_ctx_t *ctx) {
BUFFER *hdr = buffer_create(TX_BUFFER_SIZE, &netdata_buffers_statistics.buffers_aclk);
int rc = 0;
- http_parse_ctx_clear(&ctx->parse_ctx);
+ http_parse_ctx_create(&ctx->parse_ctx);
// Prepare data to send
switch (ctx->request->request_type) {
@@ -526,7 +526,7 @@ static int handle_http_request(https_req_ctx_t *ctx) {
buffer_strcat(hdr, ctx->request->url);
}
- buffer_strcat(hdr, " HTTP/1.1\x0D\x0A");
+ buffer_strcat(hdr, HTTP_1_1 HTTP_ENDL);
//TODO Headers!
if (ctx->request->request_type != HTTP_REQ_CONNECT) {
@@ -661,12 +661,15 @@ int https_request(https_req_t *request, https_req_response_t *response) {
ctx->request = &req;
if (handle_http_request(ctx)) {
netdata_log_error("Failed to CONNECT with proxy");
+ http_parse_ctx_destroy(&ctx->parse_ctx);
goto exit_sock;
}
if (ctx->parse_ctx.http_code != 200) {
netdata_log_error("Proxy didn't return 200 OK (got %d)", ctx->parse_ctx.http_code);
+ http_parse_ctx_destroy(&ctx->parse_ctx);
goto exit_sock;
}
+ http_parse_ctx_destroy(&ctx->parse_ctx);
netdata_log_info("Proxy accepted CONNECT upgrade");
}
ctx->request = request;
@@ -713,8 +716,10 @@ int https_request(https_req_t *request, https_req_response_t *response) {
// The actual request here
if (handle_http_request(ctx)) {
netdata_log_error("Couldn't process request");
+ http_parse_ctx_destroy(&ctx->parse_ctx);
goto exit_SSL;
}
+ http_parse_ctx_destroy(&ctx->parse_ctx);
response->http_code = ctx->parse_ctx.http_code;
if (ctx->parse_ctx.content_length == TRANSFER_ENCODING_CHUNKED) {
response->payload_size = ctx->parse_ctx.chunked_response_size;
diff --git a/aclk/https_client.h b/aclk/https_client.h
index daf4766f87a15a..0b97fbb0253188 100644
--- a/aclk/https_client.h
+++ b/aclk/https_client.h
@@ -5,6 +5,9 @@
#include "libnetdata/libnetdata.h"
+#include "mqtt_websockets/c-rbuf/include/ringbuffer.h"
+#include "mqtt_websockets/c_rhash/include/c_rhash.h"
+
typedef enum http_req_type {
HTTP_REQ_GET = 0,
HTTP_REQ_POST,
@@ -77,4 +80,56 @@ void https_req_response_init(https_req_response_t *res);
int https_request(https_req_t *request, https_req_response_t *response);
+// we expose previously internal parser as this is usefull also from
+// other parts of the code
+enum http_parse_state {
+ HTTP_PARSE_INITIAL = 0,
+ HTTP_PARSE_HEADERS,
+ HTTP_PARSE_CONTENT
+};
+
+typedef uint32_t parse_ctx_flags_t;
+
+#define HTTP_PARSE_FLAG_DONT_WAIT_FOR_CONTENT ((parse_ctx_flags_t)0x01)
+
+#define HTTP_PARSE_FLAGS_DEFAULT ((parse_ctx_flags_t)0)
+
+typedef struct {
+ parse_ctx_flags_t flags;
+
+ enum http_parse_state state;
+ int content_length;
+ int http_code;
+
+ c_rhash headers;
+
+ // for chunked data only
+ char *chunked_response;
+ size_t chunked_response_size;
+ size_t chunked_response_written;
+
+ enum chunked_content_state {
+ CHUNKED_CONTENT_CHUNK_SIZE = 0,
+ CHUNKED_CONTENT_CHUNK_DATA,
+ CHUNKED_CONTENT_CHUNK_END_CRLF,
+ CHUNKED_CONTENT_FINAL_CRLF
+ } chunked_content_state;
+
+ size_t chunk_size;
+ size_t chunk_got;
+} http_parse_ctx;
+
+void http_parse_ctx_create(http_parse_ctx *ctx);
+void http_parse_ctx_destroy(http_parse_ctx *ctx);
+
+typedef enum {
+ HTTP_PARSE_ERROR = -1,
+ HTTP_PARSE_NEED_MORE_DATA = 0,
+ HTTP_PARSE_SUCCESS = 1
+} http_parse_rc;
+
+http_parse_rc parse_http_response(rbuf_t buf, http_parse_ctx *parse_ctx);
+
+const char *get_http_header_by_name(http_parse_ctx *ctx, const char *name);
+
#endif /* NETDATA_HTTPS_CLIENT_H */
diff --git a/aclk/schema-wrappers/alarm_config.cc b/aclk/schema-wrappers/alarm_config.cc
index fe0b0517cf6ad2..64d28f3242323d 100644
--- a/aclk/schema-wrappers/alarm_config.cc
+++ b/aclk/schema-wrappers/alarm_config.cc
@@ -15,28 +15,22 @@ void destroy_aclk_alarm_configuration(struct aclk_alarm_configuration *cfg)
freez(cfg->alarm);
freez(cfg->tmpl);
freez(cfg->on_chart);
-
freez(cfg->classification);
freez(cfg->type);
freez(cfg->component);
-
freez(cfg->os);
freez(cfg->hosts);
freez(cfg->plugin);
freez(cfg->module);
freez(cfg->charts);
- freez(cfg->families);
freez(cfg->lookup);
freez(cfg->every);
freez(cfg->units);
-
freez(cfg->green);
freez(cfg->red);
-
freez(cfg->calculation_expr);
freez(cfg->warning_expr);
freez(cfg->critical_expr);
-
freez(cfg->recipient);
freez(cfg->exec);
freez(cfg->delay);
@@ -44,12 +38,11 @@ void destroy_aclk_alarm_configuration(struct aclk_alarm_configuration *cfg)
freez(cfg->info);
freez(cfg->options);
freez(cfg->host_labels);
-
freez(cfg->p_db_lookup_dimensions);
freez(cfg->p_db_lookup_method);
freez(cfg->p_db_lookup_options);
-
freez(cfg->chart_labels);
+ freez(cfg->summary);
}
char *generate_provide_alarm_configuration(size_t *len, struct provide_alarm_configuration *data)
@@ -65,14 +58,12 @@ char *generate_provide_alarm_configuration(size_t *len, struct provide_alarm_con
cfg->set_template_(data->cfg.tmpl);
if(data->cfg.on_chart)
cfg->set_on_chart(data->cfg.on_chart);
-
if (data->cfg.classification)
cfg->set_classification(data->cfg.classification);
if (data->cfg.type)
cfg->set_type(data->cfg.type);
if (data->cfg.component)
cfg->set_component(data->cfg.component);
-
if (data->cfg.os)
cfg->set_os(data->cfg.os);
if (data->cfg.hosts)
@@ -83,27 +74,22 @@ char *generate_provide_alarm_configuration(size_t *len, struct provide_alarm_con
cfg->set_module(data->cfg.module);
if(data->cfg.charts)
cfg->set_charts(data->cfg.charts);
- if(data->cfg.families)
- cfg->set_families(data->cfg.families);
if(data->cfg.lookup)
cfg->set_lookup(data->cfg.lookup);
if(data->cfg.every)
cfg->set_every(data->cfg.every);
if(data->cfg.units)
cfg->set_units(data->cfg.units);
-
if (data->cfg.green)
cfg->set_green(data->cfg.green);
if (data->cfg.red)
cfg->set_red(data->cfg.red);
-
if (data->cfg.calculation_expr)
cfg->set_calculation_expr(data->cfg.calculation_expr);
if (data->cfg.warning_expr)
cfg->set_warning_expr(data->cfg.warning_expr);
if (data->cfg.critical_expr)
cfg->set_critical_expr(data->cfg.critical_expr);
-
if (data->cfg.recipient)
cfg->set_recipient(data->cfg.recipient);
if (data->cfg.exec)
@@ -131,6 +117,8 @@ char *generate_provide_alarm_configuration(size_t *len, struct provide_alarm_con
if (data->cfg.chart_labels)
cfg->set_chart_labels(data->cfg.chart_labels);
+ if (data->cfg.summary)
+ cfg->set_summary(data->cfg.summary);
*len = PROTO_COMPAT_MSG_SIZE(msg);
char *bin = (char*)mallocz(*len);
diff --git a/aclk/schema-wrappers/alarm_config.h b/aclk/schema-wrappers/alarm_config.h
index 4eaa4fd708d989..3c9a5d9a89cb74 100644
--- a/aclk/schema-wrappers/alarm_config.h
+++ b/aclk/schema-wrappers/alarm_config.h
@@ -24,7 +24,6 @@ struct aclk_alarm_configuration {
char *plugin;
char *module;
char *charts;
- char *families;
char *lookup;
char *every;
char *units;
@@ -52,6 +51,7 @@ struct aclk_alarm_configuration {
int32_t p_update_every;
char *chart_labels;
+ char *summary;
};
void destroy_aclk_alarm_configuration(struct aclk_alarm_configuration *cfg);
diff --git a/aclk/schema-wrappers/alarm_stream.cc b/aclk/schema-wrappers/alarm_stream.cc
index 1538bc9e09420c..29d80e39eb24c2 100644
--- a/aclk/schema-wrappers/alarm_stream.cc
+++ b/aclk/schema-wrappers/alarm_stream.cc
@@ -66,60 +66,41 @@ static alarms::v1::AlarmStatus aclk_alarm_status_to_proto(enum aclk_alarm_status
void destroy_alarm_log_entry(struct alarm_log_entry *entry)
{
- //freez(entry->node_id);
- //freez(entry->claim_id);
-
freez(entry->chart);
freez(entry->name);
- freez(entry->family);
-
freez(entry->config_hash);
-
freez(entry->timezone);
-
freez(entry->exec_path);
freez(entry->conf_source);
freez(entry->command);
-
freez(entry->value_string);
freez(entry->old_value_string);
-
freez(entry->rendered_info);
freez(entry->chart_context);
freez(entry->transition_id);
freez(entry->chart_name);
+ freez(entry->summary);
}
static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *proto)
{
proto->set_node_id(data->node_id);
proto->set_claim_id(data->claim_id);
-
proto->set_chart(data->chart);
proto->set_name(data->name);
- if (data->family)
- proto->set_family(data->family);
-
proto->set_when(data->when);
-
proto->set_config_hash(data->config_hash);
-
proto->set_utc_offset(data->utc_offset);
proto->set_timezone(data->timezone);
-
proto->set_exec_path(data->exec_path);
proto->set_conf_source(data->conf_source);
proto->set_command(data->command);
-
proto->set_duration(data->duration);
proto->set_non_clear_duration(data->non_clear_duration);
-
-
proto->set_status(aclk_alarm_status_to_proto(data->status));
proto->set_old_status(aclk_alarm_status_to_proto(data->old_status));
proto->set_delay(data->delay);
proto->set_delay_up_to_timestamp(data->delay_up_to_timestamp);
-
proto->set_last_repeat(data->last_repeat);
proto->set_silenced(data->silenced);
@@ -136,6 +117,7 @@ static void fill_alarm_log_entry(struct alarm_log_entry *data, AlarmLogEntry *pr
proto->set_event_id(data->event_id);
proto->set_transition_id(data->transition_id);
proto->set_chart_name(data->chart_name);
+ proto->set_summary(data->summary);
}
char *generate_alarm_log_entry(size_t *len, struct alarm_log_entry *data)
diff --git a/aclk/schema-wrappers/alarm_stream.h b/aclk/schema-wrappers/alarm_stream.h
index 87893e0db6767f..3c81ff4452a433 100644
--- a/aclk/schema-wrappers/alarm_stream.h
+++ b/aclk/schema-wrappers/alarm_stream.h
@@ -76,7 +76,8 @@ struct alarm_log_entry {
char *chart_name;
uint64_t event_id;
- char *transition_id;
+ char *transition_id;
+ char *summary;
};
struct send_alarm_checkpoint {
diff --git a/aclk/schema-wrappers/node_info.h b/aclk/schema-wrappers/node_info.h
index de4ade78aaaa4d..4f57601dff5190 100644
--- a/aclk/schema-wrappers/node_info.h
+++ b/aclk/schema-wrappers/node_info.h
@@ -39,7 +39,7 @@ struct aclk_node_info {
const char *custom_info;
const char *machine_guid;
- DICTIONARY *host_labels_ptr;
+ RRDLABELS *host_labels_ptr;
struct machine_learning_info ml_info;
};
diff --git a/aclk/schema-wrappers/schema_wrapper_utils.cc b/aclk/schema-wrappers/schema_wrapper_utils.cc
index 6573e629970f09..96a4b9bf13d002 100644
--- a/aclk/schema-wrappers/schema_wrapper_utils.cc
+++ b/aclk/schema-wrappers/schema_wrapper_utils.cc
@@ -14,8 +14,8 @@ void set_timeval_from_google_timestamp(const google::protobuf::Timestamp &ts, st
tv->tv_usec = ts.nanos()/1000;
}
-int label_add_to_map_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) {
- (void)ls;
+int label_add_to_map_callback(const char *name, const char *value, RRDLABEL_SRC ls __maybe_unused, void *data)
+{
auto map = (google::protobuf::Map *)data;
map->insert({name, value});
return 1;
diff --git a/build_external/clean-install-arch-debug.Dockerfile b/build_external/clean-install-arch-debug.Dockerfile
index 5a67bfbc39c973..a3080ea69375d1 100644
--- a/build_external/clean-install-arch-debug.Dockerfile
+++ b/build_external/clean-install-arch-debug.Dockerfile
@@ -48,9 +48,10 @@ RUN find . -type f >/opt/netdata/manifest
RUN CFLAGS="-Og -g -ggdb -Wall -Wextra -Wformat-signedness -DNETDATA_INTERNAL_CHECKS=1\
-DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --require-cloud --disable-lto
-RUN ln -sf /dev/stdout /var/log/netdata/access.log
-RUN ln -sf /dev/stdout /var/log/netdata/debug.log
-RUN ln -sf /dev/stderr /var/log/netdata/error.log
+RUN ln -sf /dev/stdout /var/log/netdata/access.log && \
+ ln -sf /dev/stdout /var/log/netdata/debug.log && \
+ ln -sf /dev/stderr /var/log/netdata/error.log && \
+ ln -sf /dev/stdout /var/log/netdata/fluentbit.log
RUN printf >/opt/netdata/source/gdb_batch '\
set args -D \n\
diff --git a/build_external/clean-install-arch-extras.Dockerfile b/build_external/clean-install-arch-extras.Dockerfile
index 8c6f4fbaa21af6..8b18057e9934ce 100644
--- a/build_external/clean-install-arch-extras.Dockerfile
+++ b/build_external/clean-install-arch-extras.Dockerfile
@@ -48,9 +48,10 @@ RUN find . -type f >/opt/netdata/manifest
RUN CFLAGS="-Og -g -ggdb -Wall -Wextra -Wformat-signedness -DNETDATA_INTERNAL_CHECKS=1\
-DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --require-cloud --disable-lto
-RUN ln -sf /dev/stdout /var/log/netdata/access.log
-RUN ln -sf /dev/stdout /var/log/netdata/debug.log
-RUN ln -sf /dev/stderr /var/log/netdata/error.log
+RUN ln -sf /dev/stdout /var/log/netdata/access.log && \
+ ln -sf /dev/stdout /var/log/netdata/debug.log && \
+ ln -sf /dev/stderr /var/log/netdata/error.log && \
+ ln -sf /dev/stdout /var/log/netdata/fluentbit.log
RUN rm /var/lib/netdata/registry/netdata.public.unique.id
diff --git a/build_external/clean-install-arch.Dockerfile b/build_external/clean-install-arch.Dockerfile
index d4d0d47061b8d1..b3c61fa15a5219 100644
--- a/build_external/clean-install-arch.Dockerfile
+++ b/build_external/clean-install-arch.Dockerfile
@@ -47,8 +47,9 @@ RUN find . -type f >/opt/netdata/manifest
RUN CFLAGS="-O1 -ggdb -Wall -Wextra -Wformat-signedness -DNETDATA_INTERNAL_CHECKS=1\
-DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --disable-lto
-RUN ln -sf /dev/stdout /var/log/netdata/access.log
-RUN ln -sf /dev/stdout /var/log/netdata/debug.log
-RUN ln -sf /dev/stderr /var/log/netdata/error.log
+RUN ln -sf /dev/stdout /var/log/netdata/access.log && \
+ ln -sf /dev/stdout /var/log/netdata/debug.log && \
+ ln -sf /dev/stderr /var/log/netdata/error.log && \
+ ln -sf /dev/stdout /var/log/netdata/fluentbit.log
CMD ["/usr/sbin/netdata", "-D"]
diff --git a/build_external/clean-install.Dockerfile b/build_external/clean-install.Dockerfile
index bf63a5599e8444..0ee154e3007965 100644
--- a/build_external/clean-install.Dockerfile
+++ b/build_external/clean-install.Dockerfile
@@ -29,9 +29,10 @@ RUN find . -type f >/opt/netdata/manifest
RUN CFLAGS="-O1 -ggdb -Wall -Wextra -Wformat-signedness -DNETDATA_INTERNAL_CHECKS=1\
-DNETDATA_VERIFY_LOCKS=1 ${EXTRA_CFLAGS}" ./netdata-installer.sh --disable-lto
-RUN ln -sf /dev/stdout /var/log/netdata/access.log
-RUN ln -sf /dev/stdout /var/log/netdata/debug.log
-RUN ln -sf /dev/stderr /var/log/netdata/error.log
+RUN ln -sf /dev/stdout /var/log/netdata/access.log && \
+ ln -sf /dev/stdout /var/log/netdata/debug.log && \
+ ln -sf /dev/stderr /var/log/netdata/error.log && \
+ ln -sf /dev/stdout /var/log/netdata/fluentbit.log
RUN rm /var/lib/netdata/registry/netdata.public.unique.id
diff --git a/build_external/scenarios/children-to-localhost/child_netdata.conf b/build_external/scenarios/children-to-localhost/child_netdata.conf
new file mode 100644
index 00000000000000..1f8b0a1d6717e3
--- /dev/null
+++ b/build_external/scenarios/children-to-localhost/child_netdata.conf
@@ -0,0 +1,2 @@
+[db]
+ mode = dbengine
diff --git a/build_external/scenarios/children-to-localhost/child_stream.conf b/build_external/scenarios/children-to-localhost/child_stream.conf
index 72a353fe05ea87..a8ed306bb6b627 100644
--- a/build_external/scenarios/children-to-localhost/child_stream.conf
+++ b/build_external/scenarios/children-to-localhost/child_stream.conf
@@ -1,7 +1,7 @@
[stream]
enabled = yes
destination = tcp:host.docker.internal
- api key = 00000000-0000-0000-0000-000000000000
+ api key = 11111111-2222-3333-4444-555555555555
timeout seconds = 60
default port = 19999
send charts matching = *
diff --git a/build_external/scenarios/children-to-localhost/docker-compose.yml b/build_external/scenarios/children-to-localhost/docker-compose.yml
index 59739f9eba9ff9..dea56fe5268952 100644
--- a/build_external/scenarios/children-to-localhost/docker-compose.yml
+++ b/build_external/scenarios/children-to-localhost/docker-compose.yml
@@ -5,5 +5,6 @@ services:
command: /usr/sbin/netdata -D
volumes:
- ./child_stream.conf:/etc/netdata/stream.conf:ro
+ - ./child_netdata.conf:/etc/netdata/netdata.conf:ro
extra_hosts:
- "host.docker.internal:host-gateway"
diff --git a/build_external/scenarios/children-to-localhost/parent_stream.conf b/build_external/scenarios/children-to-localhost/parent_stream.conf
index bf85ae258ad832..36a416b912cf21 100644
--- a/build_external/scenarios/children-to-localhost/parent_stream.conf
+++ b/build_external/scenarios/children-to-localhost/parent_stream.conf
@@ -1,4 +1,4 @@
-[00000000-0000-0000-0000-000000000000]
+[11111111-2222-3333-4444-555555555555]
enabled = yes
allow from = *
default history = 3600
diff --git a/claim/claim.c b/claim/claim.c
index d81440d2a1b350..774b65ebb17235 100644
--- a/claim/claim.c
+++ b/claim/claim.c
@@ -47,7 +47,7 @@ char *get_agent_claimid()
extern struct registry registry;
/* rrd_init() and post_conf_load() must have been called before this function */
-CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, const char **msg)
+CLAIM_AGENT_RESPONSE claim_agent(const char *claiming_arguments, bool force, const char **msg __maybe_unused)
{
if (!force || !netdata_cloud_enabled) {
netdata_log_error("Refusing to claim agent -> cloud functionality has been disabled");
@@ -323,11 +323,11 @@ static bool check_claim_param(const char *s) {
}
void claim_reload_all(void) {
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
load_claiming_state();
registry_update_cloud_base_url();
rrdpush_send_claimed_id(localhost);
- error_log_limit_reset();
+ nd_log_limits_reset();
}
int api_v2_claim(struct web_client *w, char *url) {
diff --git a/claim/netdata-claim.sh.in b/claim/netdata-claim.sh.in
index cc6c1093220d45..a3db591cdee2ba 100755
--- a/claim/netdata-claim.sh.in
+++ b/claim/netdata-claim.sh.in
@@ -175,7 +175,7 @@ elif [ -f "${MACHINE_GUID_FILE}" ]; then
echo >&2 "netdata.public.unique.id is not readable. Please make sure you have rights to read it (Filename: ${MACHINE_GUID_FILE})."
exit 18
else
- if mkdir -p "${MACHINE_GUID_FILE%/*}" && /bin/echo -n "$(gen_id)" > "${MACHINE_GUID_FILE}"; then
+ if mkdir -p "${MACHINE_GUID_FILE%/*}" && echo -n "$(gen_id)" > "${MACHINE_GUID_FILE}"; then
ID="$(cat "${MACHINE_GUID_FILE}")"
MGUID=$ID
else
diff --git a/cli/cli.c b/cli/cli.c
index 288173b1e571d7..2efa518e63bf02 100644
--- a/cli/cli.c
+++ b/cli/cli.c
@@ -3,25 +3,18 @@
#include "cli.h"
#include "daemon/pipename.h"
-void error_int(int is_collector __maybe_unused, const char *prefix __maybe_unused, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) {
- FILE *fp = stderr;
-
+void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... ) {
va_list args;
- va_start( args, fmt );
- vfprintf(fp, fmt, args );
- va_end( args );
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args );
+ va_end(args);
}
#ifdef NETDATA_INTERNAL_CHECKS
uint64_t debug_flags;
-void debug_int( const char *file __maybe_unused , const char *function __maybe_unused , const unsigned long line __maybe_unused, const char *fmt __maybe_unused, ... )
-{
-
-}
-
-void fatal_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt __maybe_unused, ... )
+void netdata_logger_fatal( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt __maybe_unused, ... )
{
abort();
};
diff --git a/collectors/COLLECTORS.md b/collectors/COLLECTORS.md
index aa56ac7024b392..9a349959346b62 100644
--- a/collectors/COLLECTORS.md
+++ b/collectors/COLLECTORS.md
@@ -41,641 +41,1154 @@ If you don't see the app/service you'd like to monitor in this list:
in [Go](https://github.com/netdata/go.d.plugin/blob/master/README.md#how-to-develop-a-collector)
or [Python](https://github.com/netdata/netdata/blob/master/docs/guides/python-collector.md)
-## Available Collectors
-
-- [Monitor anything with Netdata](#monitor-anything-with-netdata)
- - [Add your application to Netdata](#add-your-application-to-netdata)
- - [Available Collectors](#available-collectors)
- - [Service and application collectors](#service-and-application-collectors)
- - [Generic](#generic)
- - [APM (application performance monitoring)](#apm-application-performance-monitoring)
- - [Containers and VMs](#containers-and-vms)
- - [Data stores](#data-stores)
- - [Distributed computing](#distributed-computing)
- - [Email](#email)
- - [Kubernetes](#kubernetes)
- - [Logs](#logs)
- - [Messaging](#messaging)
- - [Network](#network)
- - [Provisioning](#provisioning)
- - [Remote devices](#remote-devices)
- - [Search](#search)
- - [Storage](#storage)
- - [Web](#web)
- - [System collectors](#system-collectors)
- - [Applications](#applications)
- - [Disks and filesystems](#disks-and-filesystems)
- - [eBPF](#ebpf)
- - [Hardware](#hardware)
- - [Memory](#memory)
- - [Networks](#networks)
- - [Operating systems](#operating-systems)
- - [Processes](#processes)
- - [Resources](#resources)
- - [Users](#users)
- - [Netdata collectors](#netdata-collectors)
- - [Orchestrators](#orchestrators)
- - [Third-party collectors](#third-party-collectors)
- - [Etc](#etc)
-
-## Service and application collectors
-
-The Netdata Agent auto-detects and collects metrics from all of the services and applications below. You can also
-configure any of these collectors according to your setup and infrastructure.
-
-### Generic
-
-- [Prometheus endpoints](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/README.md): Gathers
- metrics from any number of Prometheus endpoints, with support to autodetect more than 600 services and applications.
-- [Pandas](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/pandas/README.md): A Python
- collector that gathers
- metrics from a [pandas](https://pandas.pydata.org/) dataframe. Pandas is a high level data processing library in
- Python that can read various formats of data from local files or web endpoints. Custom processing and transformation
- logic can also be expressed as part of the collector configuration.
-
-### APM (application performance monitoring)
-
-- [Go applications](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/go_expvar/README.md):
- Monitor any Go application that exposes its
- metrics with the `expvar` package from the Go standard library.
-- [Java Spring Boot 2 applications](https://github.com/netdata/go.d.plugin/blob/master/modules/springboot2/README.md):
- Monitor running Java Spring Boot 2 applications that expose their metrics with the use of the Spring Boot Actuator.
-- [statsd](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/README.md): Implement a high
- performance `statsd` server for Netdata.
-- [phpDaemon](https://github.com/netdata/go.d.plugin/blob/master/modules/phpdaemon/README.md): Collect worker
- statistics (total, active, idle), and uptime for web and network applications.
-- [uWSGI](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/uwsgi/README.md): Monitor
- performance metrics exposed by the uWSGI Stats
- Server.
+## Available Data Collection Integrations
+
+### APM
+
+- [Alamos FE2 server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/alamos_fe2_server.md)
+
+- [Apache Airflow](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/apache_airflow.md)
+
+- [Apache Flink](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/apache_flink.md)
+
+- [Audisto](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/audisto.md)
+
+- [Dependency-Track](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dependency-track.md)
+
+- [Go applications (EXPVAR)](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md)
+
+- [Google Pagespeed](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/google_pagespeed.md)
+
+- [IBM AIX systems Njmon](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ibm_aix_systems_njmon.md)
+
+- [JMX](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/jmx.md)
+
+- [Java Spring-boot 2 applications](https://github.com/netdata/go.d.plugin/blob/master/modules/springboot2/integrations/java_spring-boot_2_applications.md)
+
+- [NRPE daemon](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nrpe_daemon.md)
+
+- [Sentry](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sentry.md)
+
+- [Sysload](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sysload.md)
+
+- [VSCode](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/vscode.md)
+
+- [YOURLS URL Shortener](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/yourls_url_shortener.md)
+
+- [bpftrace variables](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/bpftrace_variables.md)
+
+- [gpsd](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/gpsd.md)
+
+- [jolokia](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/jolokia.md)
+
+- [phpDaemon](https://github.com/netdata/go.d.plugin/blob/master/modules/phpdaemon/integrations/phpdaemon.md)
+
+### Authentication and Authorization
+
+- [Fail2ban](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md)
+
+- [FreeRADIUS](https://github.com/netdata/go.d.plugin/blob/master/modules/freeradius/integrations/freeradius.md)
+
+- [HashiCorp Vault secrets](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hashicorp_vault_secrets.md)
+
+- [LDAP](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ldap.md)
+
+- [OpenLDAP (community)](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openldap_community.md)
+
+- [OpenLDAP](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/openldap/integrations/openldap.md)
+
+- [RADIUS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/radius.md)
+
+- [SSH](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ssh.md)
+
+- [TACACS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tacacs.md)
+
+### Blockchain Servers
+
+- [Chia](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/chia.md)
+
+- [Crypto exchanges](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/crypto_exchanges.md)
+
+- [Cryptowatch](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cryptowatch.md)
+
+- [Energi Core Wallet](https://github.com/netdata/go.d.plugin/blob/master/modules/energid/integrations/energi_core_wallet.md)
+
+- [Go-ethereum](https://github.com/netdata/go.d.plugin/blob/master/modules/geth/integrations/go-ethereum.md)
+
+- [Helium miner (validator)](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/helium_miner_validator.md)
+
+- [IOTA full node](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/iota_full_node.md)
+
+- [Sia](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sia.md)
+
+### CICD Platforms
+
+- [Concourse](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/concourse.md)
+
+- [GitLab Runner](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/gitlab_runner.md)
+
+- [Jenkins](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/jenkins.md)
+
+- [Puppet](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/puppet/integrations/puppet.md)
+
+### Cloud Provider Managed
+
+- [AWS EC2 Compute instances](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_ec2_compute_instances.md)
+
+- [AWS EC2 Spot Instance](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_ec2_spot_instance.md)
+
+- [AWS ECS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_ecs.md)
+
+- [AWS Health events](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_health_events.md)
+
+- [AWS Quota](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_quota.md)
+
+- [AWS S3 buckets](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_s3_buckets.md)
+
+- [AWS SQS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_sqs.md)
+
+- [AWS instance health](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_instance_health.md)
+
+- [Akamai Global Traffic Management](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/akamai_global_traffic_management.md)
+
+- [Akami Cloudmonitor](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/akami_cloudmonitor.md)
+
+- [Alibaba Cloud](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/alibaba_cloud.md)
+
+- [ArvanCloud CDN](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/arvancloud_cdn.md)
+
+- [Azure AD App passwords](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/azure_ad_app_passwords.md)
+
+- [Azure Elastic Pool SQL](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/azure_elastic_pool_sql.md)
+
+- [Azure Resources](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/azure_resources.md)
+
+- [Azure SQL](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/azure_sql.md)
+
+- [Azure Service Bus](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/azure_service_bus.md)
+
+- [Azure application](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/azure_application.md)
+
+- [BigQuery](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/bigquery.md)
+
+- [CloudWatch](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cloudwatch.md)
+
+- [Dell EMC ECS cluster](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dell_emc_ecs_cluster.md)
+
+- [DigitalOcean](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/digitalocean.md)
+
+- [GCP GCE](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/gcp_gce.md)
+
+- [GCP Quota](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/gcp_quota.md)
+
+- [Google Cloud Platform](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/google_cloud_platform.md)
+
+- [Google Stackdriver](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/google_stackdriver.md)
+
+- [Linode](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/linode.md)
+
+- [Lustre metadata](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/lustre_metadata.md)
+
+- [Nextcloud servers](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nextcloud_servers.md)
+
+- [OpenStack](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openstack.md)
+
+- [Zerto](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/zerto.md)
### Containers and VMs
-- [Docker containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md): Monitor the
- health and performance of individual Docker containers using the cgroups collector plugin.
-- [DockerD](https://github.com/netdata/go.d.plugin/blob/master/modules/docker/README.md): Collect container health
- statistics.
-- [Docker Engine](https://github.com/netdata/go.d.plugin/blob/master/modules/docker_engine/README.md): Collect
- runtime statistics from the `docker` daemon using the `metrics-address` feature.
-- [Docker Hub](https://github.com/netdata/go.d.plugin/blob/master/modules/dockerhub/README.md): Collect statistics
- about Docker repositories, such as pulls, starts, status, time since last update, and more.
-- [Libvirt](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md): Monitor the health and
- performance of individual Libvirt containers
- using the cgroups collector plugin.
-- [LXC](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md): Monitor the health and
- performance of individual LXC containers using
- the cgroups collector plugin.
-- [LXD](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md): Monitor the health and
- performance of individual LXD containers using
- the cgroups collector plugin.
-- [systemd-nspawn](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md): Monitor the
- health and performance of individual
- systemd-nspawn containers using the cgroups collector plugin.
-- [vCenter Server Appliance](https://github.com/netdata/go.d.plugin/blob/master/modules/vcsa/README.md): Monitor
- appliance system, components, and software update health statuses via the Health API.
-- [vSphere](https://github.com/netdata/go.d.plugin/blob/master/modules/vsphere/README.md): Collect host and virtual
- machine performance metrics.
-- [Xen/XCP-ng](https://github.com/netdata/netdata/blob/master/collectors/xenstat.plugin/README.md): Collect XenServer
- and XCP-ng metrics using `libxenstat`.
-
-### Data stores
-
-- [CockroachDB](https://github.com/netdata/go.d.plugin/blob/master/modules/cockroachdb/README.md): Monitor various
- database components using `_status/vars` endpoint.
-- [Consul](https://github.com/netdata/go.d.plugin/blob/master/modules/consul/README.md): Capture service and unbound
- checks status (passing, warning, critical, maintenance).
-- [Couchbase](https://github.com/netdata/go.d.plugin/blob/master/modules/couchbase/README.md): Gather per-bucket
- metrics from any number of instances of the distributed JSON document database.
-- [CouchDB](https://github.com/netdata/go.d.plugin/blob/master/modules/couchdb/README.md): Monitor database health and
- performance metrics
- (reads/writes, HTTP traffic, replication status, etc).
-- [MongoDB](https://github.com/netdata/go.d.plugin/blob/master/modules/mongodb/README.md): Collect server, database,
- replication and sharding performance and health metrics.
-- [MySQL](https://github.com/netdata/go.d.plugin/blob/master/modules/mysql/README.md): Collect database global,
- replication and per user statistics.
-- [OracleDB](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/oracledb/README.md): Monitor
- database performance and health metrics.
-- [Pika](https://github.com/netdata/go.d.plugin/blob/master/modules/pika/README.md): Gather metric, such as clients,
- memory usage, queries, and more from the Redis interface-compatible database.
-- [Postgres](https://github.com/netdata/go.d.plugin/blob/master/modules/postgres/README.md): Collect database health
- and performance metrics.
-- [ProxySQL](https://github.com/netdata/go.d.plugin/blob/master/modules/proxysql/README.md): Monitor database backend
- and frontend performance metrics.
-- [Redis](https://github.com/netdata/go.d.plugin/blob/master/modules/redis/README.md): Monitor status from any
- number of database instances by reading the server's response to the `INFO ALL` command.
-- [RethinkDB](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/rethinkdbs/README.md): Collect
- database server and cluster statistics.
-- [Riak KV](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/riakkv/README.md): Collect
- database stats from the `/stats` endpoint.
-- [Zookeeper](https://github.com/netdata/go.d.plugin/blob/master/modules/zookeeper/README.md): Monitor application
- health metrics reading the server's response to the `mntr` command.
-- [Memcached](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/memcached/README.md): Collect
- memory-caching system performance metrics.
-
-### Distributed computing
-
-- [BOINC](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/boinc/README.md): Monitor the total
- number of tasks, open tasks, and task
- states for the distributed computing client.
-- [Gearman](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/gearman/README.md): Collect
- application summary (queued, running) and per-job
- worker statistics (queued, idle, running).
-
-### Email
-
-- [Dovecot](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/dovecot/README.md): Collect email
- server performance metrics by reading the
- server's response to the `EXPORT global` command.
-- [EXIM](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/exim/README.md): Uses the `exim` tool
- to monitor the queue length of a
- mail/message transfer agent (MTA).
-- [Postfix](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/postfix/README.md): Uses
- the `postqueue` tool to monitor the queue length of a
- mail/message transfer agent (MTA).
+- [Containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/containers.md)
-### Kubernetes
+- [Docker Engine](https://github.com/netdata/go.d.plugin/blob/master/modules/docker_engine/integrations/docker_engine.md)
+
+- [Docker Hub repository](https://github.com/netdata/go.d.plugin/blob/master/modules/dockerhub/integrations/docker_hub_repository.md)
+
+- [Docker](https://github.com/netdata/go.d.plugin/blob/master/modules/docker/integrations/docker.md)
+
+- [LXC Containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/lxc_containers.md)
+
+- [Libvirt Containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/libvirt_containers.md)
+
+- [NSX-T](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nsx-t.md)
+
+- [Podman](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/podman.md)
+
+- [Proxmox Containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/proxmox_containers.md)
+
+- [Proxmox VE](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/proxmox_ve.md)
+
+- [VMware vCenter Server](https://github.com/netdata/go.d.plugin/blob/master/modules/vsphere/integrations/vmware_vcenter_server.md)
+
+- [Virtual Machines](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/virtual_machines.md)
+
+- [Xen XCP-ng](https://github.com/netdata/netdata/blob/master/collectors/xenstat.plugin/integrations/xen_xcp-ng.md)
+
+- [cAdvisor](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cadvisor.md)
+
+- [oVirt Containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/ovirt_containers.md)
+
+- [vCenter Server Appliance](https://github.com/netdata/go.d.plugin/blob/master/modules/vcsa/integrations/vcenter_server_appliance.md)
+
+### Databases
+
+- [4D Server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/4d_server.md)
+
+- [AWS RDS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aws_rds.md)
+
+- [Cassandra](https://github.com/netdata/go.d.plugin/blob/master/modules/cassandra/integrations/cassandra.md)
+
+- [ClickHouse](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/clickhouse.md)
+
+- [ClusterControl CMON](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/clustercontrol_cmon.md)
+
+- [CockroachDB](https://github.com/netdata/go.d.plugin/blob/master/modules/cockroachdb/integrations/cockroachdb.md)
+
+- [CouchDB](https://github.com/netdata/go.d.plugin/blob/master/modules/couchdb/integrations/couchdb.md)
+
+- [Couchbase](https://github.com/netdata/go.d.plugin/blob/master/modules/couchbase/integrations/couchbase.md)
+
+- [HANA](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hana.md)
+
+- [Hasura GraphQL Server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hasura_graphql_server.md)
+
+- [InfluxDB](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/influxdb.md)
+
+- [Machbase](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/machbase.md)
+
+- [MariaDB](https://github.com/netdata/go.d.plugin/blob/master/modules/mysql/integrations/mariadb.md)
+
+- [Memcached (community)](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/memcached_community.md)
+
+- [Memcached](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/memcached/integrations/memcached.md)
+
+- [MongoDB](https://github.com/netdata/go.d.plugin/blob/master/modules/mongodb/integrations/mongodb.md)
+
+- [MySQL](https://github.com/netdata/go.d.plugin/blob/master/modules/mysql/integrations/mysql.md)
+
+- [ODBC](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/odbc.md)
+
+- [Oracle DB (community)](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/oracle_db_community.md)
+
+- [Oracle DB](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/oracledb/integrations/oracle_db.md)
+
+- [Patroni](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/patroni.md)
+
+- [Percona MySQL](https://github.com/netdata/go.d.plugin/blob/master/modules/mysql/integrations/percona_mysql.md)
+
+- [PgBouncer](https://github.com/netdata/go.d.plugin/blob/master/modules/pgbouncer/integrations/pgbouncer.md)
+
+- [Pgpool-II](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/pgpool-ii.md)
+
+- [Pika](https://github.com/netdata/go.d.plugin/blob/master/modules/pika/integrations/pika.md)
+
+- [PostgreSQL](https://github.com/netdata/go.d.plugin/blob/master/modules/postgres/integrations/postgresql.md)
+
+- [ProxySQL](https://github.com/netdata/go.d.plugin/blob/master/modules/proxysql/integrations/proxysql.md)
+
+- [Redis](https://github.com/netdata/go.d.plugin/blob/master/modules/redis/integrations/redis.md)
+
+- [RethinkDB](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md)
+
+- [RiakKV](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/riakkv/integrations/riakkv.md)
+
+- [SQL Database agnostic](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sql_database_agnostic.md)
+
+- [Vertica](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/vertica.md)
+
+- [Warp10](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/warp10.md)
+
+- [pgBackRest](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/pgbackrest.md)
+
+### Distributed Computing Systems
+
+- [BOINC](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/boinc/integrations/boinc.md)
+
+- [Gearman](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/gearman/integrations/gearman.md)
-- [Kubelet](https://github.com/netdata/go.d.plugin/blob/master/modules/k8s_kubelet/README.md): Monitor one or more
- instances of the Kubelet agent and collects metrics on number of pods/containers running, volume of Docker
- operations, and more.
-- [kube-proxy](https://github.com/netdata/go.d.plugin/blob/master/modules/k8s_kubeproxy/README.md): Collect
- metrics, such as syncing proxy rules and REST client requests, from one or more instances of `kube-proxy`.
-- [Service discovery](https://github.com/netdata/agent-service-discovery/blob/master/README.md): Find what services are running on a
- cluster's pods, converts that into configuration files, and exports them so they can be monitored by Netdata.
-
-### Logs
-
-- [Fluentd](https://github.com/netdata/go.d.plugin/blob/master/modules/fluentd/README.md): Gather application
- plugins metrics from an endpoint provided by `in_monitor plugin`.
-- [Logstash](https://github.com/netdata/go.d.plugin/blob/master/modules/logstash/README.md): Monitor JVM threads,
- memory usage, garbage collection statistics, and more.
-- [OpenVPN status logs](https://github.com/netdata/go.d.plugin/blob/master/modules/openvpn_status_log/README.md): Parse
- server log files and provide summary (client, traffic) metrics.
-- [Squid web server logs](https://github.com/netdata/go.d.plugin/blob/master/modules/squidlog/README.md): Tail Squid
- access logs to return the volume of requests, types of requests, bandwidth, and much more.
-- [Web server logs (Go version for Apache, NGINX)](https://github.com/netdata/go.d.plugin/blob/master/modules/weblog/README.md): Tail access logs and provide
- very detailed web server performance statistics. This module is able to parse 200k+ rows in less than half a second.
-- [Web server logs (Apache, NGINX)](https://github.com/netdata/go.d.plugin/blob/master/modules/weblog/README.md): Tail
- access log
- file and collect web server/caching proxy metrics.
-
-### Messaging
-
-- [ActiveMQ](https://github.com/netdata/go.d.plugin/blob/master/modules/activemq/README.md): Collect message broker
- queues and topics statistics using the ActiveMQ Console API.
-- [Beanstalk](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/beanstalk/README.md): Collect
- server and tube-level statistics, such as CPU
- usage, jobs rates, commands, and more.
-- [Pulsar](https://github.com/netdata/go.d.plugin/blob/master/modules/pulsar/README.md): Collect summary,
- namespaces, and topics performance statistics.
-- [RabbitMQ](https://github.com/netdata/go.d.plugin/blob/master/modules/rabbitmq/README.md): Collect message
- broker overview, system and per virtual host metrics.
-- [VerneMQ](https://github.com/netdata/go.d.plugin/blob/master/modules/vernemq/README.md): Monitor MQTT broker
- health and performance metrics. It collects all available info for both MQTTv3 and v5 communication
-
-### Network
-
-- [Bind 9](https://github.com/netdata/go.d.plugin/blob/master/modules/bind/README.md): Collect nameserver summary
- performance statistics via a web interface (`statistics-channels` feature).
-- [Chrony](https://github.com/netdata/go.d.plugin/blob/master/modules/chrony/README.md): Monitor the precision and
- statistics of a local `chronyd` server.
-- [CoreDNS](https://github.com/netdata/go.d.plugin/blob/master/modules/coredns/README.md): Measure DNS query round
- trip time.
-- [Dnsmasq](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsmasq_dhcp/README.md): Automatically
- detects all configured `Dnsmasq` DHCP ranges and Monitor their utilization.
-- [DNSdist](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsdist/README.md): Collect
- load-balancer performance and health metrics.
-- [Dnsmasq DNS Forwarder](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsmasq/README.md): Gather
- queries, entries, operations, and events for the lightweight DNS forwarder.
-- [DNS Query Time](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsquery/README.md): Monitor the round
- trip time for DNS queries in milliseconds.
-- [Freeradius](https://github.com/netdata/go.d.plugin/blob/master/modules/freeradius/README.md): Collect
- server authentication and accounting statistics from the `status server`.
-- [Libreswan](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/libreswan/README.md): Collect
- bytes-in, bytes-out, and uptime metrics.
-- [Icecast](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/icecast/README.md): Monitor the
- number of listeners for active sources.
-- [ISC Bind (RDNC)](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/bind_rndc/README.md):
- Collect nameserver summary performance
- statistics using the `rndc` tool.
-- [ISC DHCP](https://github.com/netdata/go.d.plugin/blob/master/modules/isc_dhcpd/README.md): Reads a
- `dhcpd.leases` file and collects metrics on total active leases, pool active leases, and pool utilization.
-- [OpenLDAP](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/openldap/README.md): Provides
- statistics information from the OpenLDAP
- (`slapd`) server.
-- [NSD](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/nsd/README.md): Monitor nameserver
- performance metrics using the `nsd-control`
- tool.
-- [NTP daemon](https://github.com/netdata/go.d.plugin/blob/master/modules/ntpd/README.md): Monitor the system variables
- of the local `ntpd` daemon (optionally including variables of the polled peers) using the NTP Control Message Protocol
- via a UDP socket.
-- [OpenSIPS](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/opensips/README.md): Collect
- server health and performance metrics using the
- `opensipsctl` tool.
-- [OpenVPN](https://github.com/netdata/go.d.plugin/blob/master/modules/openvpn/README.md): Gather server summary
- (client, traffic) and per user metrics (traffic, connection time) stats using `management-interface`.
-- [Pi-hole](https://github.com/netdata/go.d.plugin/blob/master/modules/pihole/README.md): Monitor basic (DNS
- queries, clients, blocklist) and extended (top clients, top permitted, and blocked domains) statistics using the PHP
- API.
-- [PowerDNS Authoritative Server](https://github.com/netdata/go.d.plugin/blob/master/modules/powerdns/README.md):
- Monitor one or more instances of the nameserver software to collect questions, events, and latency metrics.
-- [PowerDNS Recursor](https://github.com/netdata/go.d.plugin/blob/master/modules/powerdns/README.md#recursor):
- Gather incoming/outgoing questions, drops, timeouts, and cache usage from any number of DNS recursor instances.
-- [RetroShare](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/retroshare/README.md): Monitor
- application bandwidth, peers, and DHT
- metrics.
-- [Tor](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/tor/README.md): Capture traffic usage
- statistics using the Tor control port.
-- [Unbound](https://github.com/netdata/go.d.plugin/blob/master/modules/unbound/README.md): Collect DNS resolver
- summary and extended system and per thread metrics via the `remote-control` interface.
-
-### Provisioning
-
-- [Puppet](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/puppet/README.md): Monitor the
- status of Puppet Server and Puppet DB.
-
-### Remote devices
-
-- [AM2320](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/am2320/README.md): Monitor sensor
- temperature and humidity.
-- [Access point](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/ap/README.md): Monitor
- client, traffic and signal metrics using the `aw`
- tool.
-- [APC UPS](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/apcupsd/README.md): Capture status
- information using the `apcaccess` tool.
-- [Energi Core](https://github.com/netdata/go.d.plugin/blob/master/modules/energid/README.md): Monitor
- blockchain indexes, memory usage, network usage, and transactions of wallet instances.
-- [UPS/PDU](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/nut/README.md): Read the status of
- UPS/PDU devices using the `upsc` tool.
-- [SNMP devices](https://github.com/netdata/go.d.plugin/blob/master/modules/snmp/README.md): Gather data using the SNMP
- protocol.
-- [1-Wire sensors](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/w1sensor/README.md):
- Monitor sensor temperature.
-
-### Search
-
-- [Elasticsearch](https://github.com/netdata/go.d.plugin/blob/master/modules/elasticsearch/README.md): Collect
- dozens of metrics on search engine performance from local nodes and local indices. Includes cluster health and
- statistics.
-- [Solr](https://github.com/netdata/go.d.plugin/blob/master/modules/solr/README.md): Collect application search
- requests, search errors, update requests, and update errors statistics.
-
-### Storage
-
-- [Ceph](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/ceph/README.md): Monitor the Ceph
- cluster usage and server data consumption.
-- [HDFS](https://github.com/netdata/go.d.plugin/blob/master/modules/hdfs/README.md): Monitor health and performance
- metrics for filesystem datanodes and namenodes.
-- [IPFS](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/ipfs/README.md): Collect file system
- bandwidth, peers, and repo metrics.
-- [Scaleio](https://github.com/netdata/go.d.plugin/blob/master/modules/scaleio/README.md): Monitor storage system,
- storage pools, and SDCS health and performance metrics via VxFlex OS Gateway API.
-- [Samba](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/samba/README.md): Collect file
- sharing metrics using the `smbstatus` tool.
-
-### Web
-
-- [Apache](https://github.com/netdata/go.d.plugin/blob/master/modules/apache/README.md): Collect Apache web
- server performance metrics via the `server-status?auto` endpoint.
-- [HAProxy](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/haproxy/README.md): Collect
- frontend, backend, and health metrics.
-- [HTTP endpoints](https://github.com/netdata/go.d.plugin/blob/master/modules/httpcheck/README.md): Monitor
- any HTTP endpoint's availability and response time.
-- [Lighttpd](https://github.com/netdata/go.d.plugin/blob/master/modules/lighttpd/README.md): Collect web server
- performance metrics using the `server-status?auto` endpoint.
-- [Litespeed](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/litespeed/README.md): Collect
- web server data (network, connection,
- requests, cache) by reading `.rtreport*` files.
-- [Nginx](https://github.com/netdata/go.d.plugin/blob/master/modules/nginx/README.md): Monitor web server
- status information by gathering metrics via `ngx_http_stub_status_module`.
-- [Nginx VTS](https://github.com/netdata/go.d.plugin/blob/master/modules/nginxvts/README.md): Gathers metrics from
- any Nginx deployment with the _virtual host traffic status module_ enabled, including metrics on uptime, memory
- usage, and cache, and more.
-- [PHP-FPM](https://github.com/netdata/go.d.plugin/blob/master/modules/phpfpm/README.md): Collect application
- summary and processes health metrics by scraping the status page (`/status?full`).
-- [TCP endpoints](https://github.com/netdata/go.d.plugin/blob/master/modules/portcheck/README.md): Monitor any
- TCP endpoint's availability and response time.
-- [Spigot Minecraft servers](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/spigotmc/README.md):
- Monitor average ticket rate and number
- of users.
-- [Squid](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/squid/README.md): Monitor client and
- server bandwidth/requests by gathering
- data from the Cache Manager component.
-- [Tengine](https://github.com/netdata/go.d.plugin/blob/master/modules/tengine/README.md): Monitor web server
- statistics using information provided by `ngx_http_reqstat_module`.
-- [Tomcat](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/tomcat/README.md): Collect web
- server performance metrics from the Manager App
- (`/manager/status?XML=true`).
-- [Traefik](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/traefik/README.md): Uses Traefik's
- Health API to provide statistics.
-- [Varnish](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/varnish/README.md): Provides HTTP
- accelerator global, backends (VBE), and
- disks (SMF) statistics using the `varnishstat` tool.
-- [x509 check](https://github.com/netdata/go.d.plugin/blob/master/modules/x509check/README.md): Monitor certificate
- expiration time.
-- [Whois domain expiry](https://github.com/netdata/go.d.plugin/blob/master/modules/whoisquery/README.md): Checks the
- remaining time until a given domain is expired.
-
-## System collectors
-
-The Netdata Agent can collect these system- and hardware-level metrics using a variety of collectors, some of which
-(such as `proc.plugin`) collect multiple types of metrics simultaneously.
-
-### Applications
-
-- [Fail2ban](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/fail2ban/README.md): Parses
- configuration files to detect all jails, then
- uses log files to report ban rates and volume of banned IPs.
-- [Monit](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/monit/README.md): Monitor statuses
- of targets (service-checks) using the XML
- stats interface.
-- [Windows](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/README.md): Collect CPU, memory,
- network, disk, OS, system, and log-in metrics scraping [windows_exporter](https://github.com/prometheus-community/windows_exporter).
-
-### Disks and filesystems
-
-- [BCACHE](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor BCACHE statistics
- with the `proc.plugin` collector.
-- [Block devices](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Gather metrics about
- the health and performance of block
- devices using the `proc.plugin` collector.
-- [Btrfs](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitors Btrfs filesystems
- with the `proc.plugin` collector.
-- [Device mapper](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Gather metrics about
- the Linux device mapper with the proc
- collector.
-- [Disk space](https://github.com/netdata/netdata/blob/master/collectors/diskspace.plugin/README.md): Collect disk space
- usage metrics on Linux mount points.
-- [Clock synchronization](https://github.com/netdata/netdata/blob/master/collectors/timex.plugin/README.md): Collect the
- system clock synchronization status on Linux.
-- [Files and directories](https://github.com/netdata/go.d.plugin/blob/master/modules/filecheck/README.md): Gather
- metrics about the existence, modification time, and size of files or directories.
-- [ioping.plugin](https://github.com/netdata/netdata/blob/master/collectors/ioping.plugin/README.md): Measure disk
- read/write latency.
-- [NFS file servers and clients](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md):
- Gather operations, utilization, and space usage
- using the `proc.plugin` collector.
-- [RAID arrays](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Collect health, disk
- status, operation status, and more with the `proc.plugin` collector.
-- [Veritas Volume Manager](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Gather
- metrics about the Veritas Volume Manager (VVM).
-- [ZFS](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor bandwidth and
- utilization of ZFS disks/partitions using the proc
- collector.
+### DNS and DHCP Servers
+
+- [Akamai Edge DNS Traffic](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/akamai_edge_dns_traffic.md)
+
+- [CoreDNS](https://github.com/netdata/go.d.plugin/blob/master/modules/coredns/integrations/coredns.md)
+
+- [DNS query](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsquery/integrations/dns_query.md)
+
+- [DNSBL](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dnsbl.md)
+
+- [DNSdist](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsdist/integrations/dnsdist.md)
+
+- [Dnsmasq DHCP](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsmasq_dhcp/integrations/dnsmasq_dhcp.md)
+
+- [Dnsmasq](https://github.com/netdata/go.d.plugin/blob/master/modules/dnsmasq/integrations/dnsmasq.md)
+
+- [ISC Bind (RNDC)](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md)
+
+- [ISC DHCP](https://github.com/netdata/go.d.plugin/blob/master/modules/isc_dhcpd/integrations/isc_dhcp.md)
+
+- [Name Server Daemon](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md)
+
+- [NextDNS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nextdns.md)
+
+- [Pi-hole](https://github.com/netdata/go.d.plugin/blob/master/modules/pihole/integrations/pi-hole.md)
+
+- [PowerDNS Authoritative Server](https://github.com/netdata/go.d.plugin/blob/master/modules/powerdns/integrations/powerdns_authoritative_server.md)
+
+- [PowerDNS Recursor](https://github.com/netdata/go.d.plugin/blob/master/modules/powerdns_recursor/integrations/powerdns_recursor.md)
+
+- [Unbound](https://github.com/netdata/go.d.plugin/blob/master/modules/unbound/integrations/unbound.md)
### eBPF
-- [Files](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/README.md): Provides information about
- how often a system calls kernel
- functions related to file descriptors using the eBPF collector.
-- [Virtual file system (VFS)](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/README.md): Monitor
- IO, errors, deleted objects, and
- more for kernel virtual file systems (VFS) using the eBPF collector.
-- [Processes](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/README.md): Monitor threads, task
- exits, and errors using the eBPF collector.
-
-### Hardware
-
-- [Adaptec RAID](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/adaptec_raid/README.md):
- Monitor logical and physical devices health
- metrics using the `arcconf` tool.
-- [CUPS](https://github.com/netdata/netdata/blob/master/collectors/cups.plugin/README.md): Monitor CUPS.
-- [FreeIPMI](https://github.com/netdata/netdata/blob/master/collectors/freeipmi.plugin/README.md):
- Uses `libipmimonitoring-dev` or `libipmimonitoring-devel` to
- monitor the number of sensors, temperatures, voltages, currents, and more.
-- [Hard drive temperature](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/hddtemp/README.md):
- Monitor the temperature of storage
- devices.
-- [HP Smart Storage Arrays](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/hpssa/README.md):
- Monitor controller, cache module, logical
- and physical drive state, and temperature using the `ssacli` tool.
-- [MegaRAID controllers](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/megacli/README.md):
- Collect adapter, physical drives, and
- battery stats using the `megacli` tool.
-- [NVIDIA GPU](https://github.com/netdata/go.d.plugin/blob/master/modules/nvidia_smi/README.md): Monitor
- performance metrics (memory usage, fan
- speed, pcie bandwidth utilization, temperature, and more) using the `nvidia-smi` tool.
-- [Sensors](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/sensors/README.md): Reads system
- sensors information (temperature, voltage,
- electric current, power, and more) from `/sys/devices/`.
-- [S.M.A.R.T](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/smartd_log/README.md): Reads
- SMART Disk Monitoring daemon logs.
-
-### Memory
-
-- [Available memory](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Tracks changes in
- available RAM using the `proc.plugin` collector.
-- [Committed memory](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor committed
- memory using the `proc.plugin` collector.
-- [Huge pages](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Gather metrics about
- huge pages in Linux and FreeBSD with the
- `proc.plugin` collector.
-- [KSM](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Measure the amount of merging,
- savings, and effectiveness using the
- `proc.plugin` collector.
-- [Numa](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Gather metrics on the number
- of non-uniform memory access (NUMA) events
- every second using the `proc.plugin` collector.
-- [Page faults](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Collect the number of
- memory page faults per second using the
- `proc.plugin` collector.
-- [RAM](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Collect metrics on system RAM,
- available RAM, and more using the
- `proc.plugin` collector.
-- [SLAB](https://github.com/netdata/netdata/blob/master/collectors/slabinfo.plugin/README.md): Collect kernel SLAB
- details on Linux systems.
-- [swap](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor the amount of free
- and used swap at every second using the
- `proc.plugin` collector.
-- [Writeback memory](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Collect how much
- memory is actively being written to disk at
- every second using the `proc.plugin` collector.
-
-### Networks
-
-- [Access points](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/ap/README.md): Visualizes
- data related to access points.
-- [Ping](https://github.com/netdata/go.d.plugin/blob/master/modules/ping/README.md): Measure network latency, jitter and
- packet loss between the monitored node
- and any number of remote network end points.
-- [Netfilter](https://github.com/netdata/netdata/blob/master/collectors/nfacct.plugin/README.md): Collect netfilter
- firewall, connection tracker, and accounting
- metrics using `libmnl` and `libnetfilter_acct`.
-- [Network stack](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor the
- networking stack for errors, TCP connection aborts,
- bandwidth, and more.
-- [Network QoS](https://github.com/netdata/netdata/blob/master/collectors/tc.plugin/README.md): Collect traffic QoS
- metrics (`tc`) of Linux network interfaces.
-- [SYNPROXY](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor entries uses, SYN
- packets received, TCP cookies, and more.
-
-### Operating systems
-
-- [freebsd.plugin](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/README.md): Collect resource
- usage and performance data on FreeBSD systems.
-- [macOS](https://github.com/netdata/netdata/blob/master/collectors/macos.plugin/README.md): Collect resource usage and
- performance data on macOS systems.
-
-### Processes
-
-- [Applications](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md): Gather CPU, disk,
- memory, network, eBPF, and other metrics per
- application using the `apps.plugin` collector.
-- [systemd](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md): Monitor the CPU and
- memory usage of systemd services using the
- `cgroups.plugin` collector.
-- [systemd unit states](https://github.com/netdata/go.d.plugin/blob/master/modules/systemdunits/README.md): See the
- state (active, inactive, activating, deactivating, failed) of various systemd unit types.
-- [System processes](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Collect metrics
- on system load and total processes running
- using `/proc/loadavg` and the `proc.plugin` collector.
-- [Uptime](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor the uptime of a
- system using the `proc.plugin` collector.
-
-### Resources
-
-- [CPU frequency](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor CPU
- frequency, as set by the `cpufreq` kernel module,
- using the `proc.plugin` collector.
-- [CPU idle](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Measure CPU idle every
- second using the `proc.plugin` collector.
-- [CPU performance](https://github.com/netdata/netdata/blob/master/collectors/perf.plugin/README.md): Collect CPU
- performance metrics using performance monitoring
- units (PMU).
-- [CPU throttling](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Gather metrics
- about thermal throttling using the `/proc/stat`
- module and the `proc.plugin` collector.
-- [CPU utilization](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Capture CPU
- utilization, both system-wide and per-core, using
- the `/proc/stat` module and the `proc.plugin` collector.
-- [Entropy](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor the available
- entropy on a system using the `proc.plugin`
- collector.
-- [Interprocess Communication (IPC)](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md):
- Monitor IPC semaphores and shared memory
- using the `proc.plugin` collector.
-- [Interrupts](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Monitor interrupts per
- second using the `proc.plugin` collector.
-- [IdleJitter](https://github.com/netdata/netdata/blob/master/collectors/idlejitter.plugin/README.md): Measure CPU
- latency and jitter on all operating systems.
-- [SoftIRQs](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Collect metrics on
- SoftIRQs, both system-wide and per-core, using the
- `proc.plugin` collector.
-- [SoftNet](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md): Capture SoftNet events per
- second, both system-wide and per-core,
- using the `proc.plugin` collector.
-
-### Users
-
-- [systemd-logind](https://github.com/netdata/go.d.plugin/blob/master/modules/logind/README.md): Monitor active
- sessions, users, and seats tracked
- by `systemd-logind` or `elogind`.
-- [User/group usage](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md): Gather CPU, disk,
- memory, network, and other metrics per user
- and user group using the `apps.plugin` collector.
-
-## Netdata collectors
-
-These collectors are recursive in nature, in that they monitor some function of the Netdata Agent itself. Some
-collectors are described only in code and associated charts in Netdata dashboards.
-
-- [ACLK (code only)](https://github.com/netdata/netdata/blob/master/aclk/legacy/aclk_stats.c): View whether a Netdata
- Agent is connected to Netdata Cloud via the [ACLK](https://github.com/netdata/netdata/blob/master/aclk/README.md), the
- volume of queries, process times, and more.
-- [Alarms](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/alarms/README.md): This collector
- creates an
- **Alarms** menu with one line plot showing the alarm states of a Netdata Agent over time.
-- [Anomalies](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/anomalies/README.md): This
- collector uses the
- Python PyOD library to perform unsupervised anomaly detection on your Netdata charts and/or dimensions.
-- [Exporting (code only)](https://github.com/netdata/netdata/blob/master/exporting/send_internal_metrics.c): Gather
- metrics on CPU utilization for
- the [exporting engine](https://github.com/netdata/netdata/blob/master/exporting/README.md), and specific metrics for
- each enabled
- exporting connector.
-- [Global statistics (code only)](https://github.com/netdata/netdata/blob/master/daemon/global_statistics.c): See
- metrics on the CPU utilization, network traffic, volume of web clients, API responses, database engine usage, and
- more.
-
-## Orchestrators
-
-Plugin orchestrators organize and run many of the above collectors.
-
-If you're interested in developing a new collector that you'd like to contribute to Netdata, we highly recommend using
-the `go.d.plugin`.
-
-- [go.d.plugin](https://github.com/netdata/go.d.plugin): An orchestrator for data collection modules written in `go`.
-- [python.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md): An
- orchestrator for data collection modules written in `python` v2/v3.
-- [charts.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/README.md): An
- orchestrator for data collection modules written in `bash` v4+.
-
-## Third-party collectors
-
-These collectors are developed and maintained by third parties and, unlike the other collectors, are not installed by
-default. To use a third-party collector, visit their GitHub/documentation page and follow their installation procedures.
-
-
-Typical third party Python collector installation instructions
-
-In general the below steps should be sufficient to use a third party collector.
-
-1. Download collector code file
- into [folder expected by Netdata](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#environment-variables).
-2. Download default collector configuration file
- into [folder expected by Netdata](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#environment-variables).
-3. [Edit configuration file](https://github.com/netdata/netdata/blob/master/docs/collect/enable-configure#configure-a-collector)
- from step 2 if required.
-4. [Enable collector](https://github.com/netdata/netdata/blob/master/docs/collect/enable-configure#enable-a-collector-or-its-orchestrator).
-5. [Restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md)
-
-For example below are the steps to enable
-the [Python ClickHouse collector](https://github.com/netdata/community/tree/main/collectors/python.d.plugin/clickhouse).
-
-```bash
-# download python collector script to /usr/libexec/netdata/python.d/
-$ sudo wget https://raw.githubusercontent.com/netdata/community/main/collectors/python.d.plugin/clickhouse/clickhouse.chart.py -O /usr/libexec/netdata/python.d/clickhouse.chart.py
-
-# (optional) download default .conf to /etc/netdata/python.d/
-$ sudo wget https://raw.githubusercontent.com/netdata/community/main/collectors/python.d.plugin/clickhouse/clickhouse.conf -O /etc/netdata/python.d/clickhouse.conf
-
-# enable collector by adding line a new line with "clickhouse: yes" to /etc/netdata/python.d.conf file
-# this will append to the file if it already exists or create it if not
-$ sudo echo "clickhouse: yes" >> /etc/netdata/python.d.conf
-
-# (optional) edit clickhouse.conf if needed
-$ sudo vi /etc/netdata/python.d/clickhouse.conf
-
-# restart netdata
-# see docs for more information: https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md
-$ sudo systemctl restart netdata
-```
-
-
-
-- [CyberPower UPS](https://github.com/HawtDogFlvrWtr/netdata_cyberpwrups_plugin): Polls CyberPower UPS data using
- PowerPanel® Personal Linux.
-- [Logged-in users](https://github.com/veksh/netdata-numsessions): Collect the number of currently logged-on users.
-- [nextcloud](https://github.com/arnowelzel/netdata-nextcloud): Monitor Nextcloud servers.
-- [nim-netdata-plugin](https://github.com/FedericoCeratto/nim-netdata-plugin): A helper to create native Netdata
- plugins using Nim.
-- [Nvidia GPUs](https://github.com/coraxx/netdata_nv_plugin): Monitor Nvidia GPUs.
-- [Teamspeak 3](https://github.com/coraxx/netdata_ts3_plugin): Pulls active users and bandwidth from TeamSpeak 3
- servers.
-- [SSH](https://github.com/Yaser-Amiri/netdata-ssh-module): Monitor failed authentication requests of an SSH server.
-- [ClickHouse](https://github.com/netdata/community/tree/main/collectors/python.d.plugin/clickhouse):
- Monitor [ClickHouse](https://clickhouse.com/) database.
-- [Ethtool](https://github.com/ghanapunq/netdata_ethtool_plugin): Monitor network interfaces with ethtool.
-- [netdata-needrestart](https://github.com/nodiscc/netdata-needrestart) - Check/graph the number of processes/services/kernels that should be restarted after upgrading packages.
-- [netdata-debsecan](https://github.com/nodiscc/netdata-debsecan) - Check/graph the number of CVEs in currently installed packages.
-- [netdata-logcount](https://github.com/nodiscc/netdata-logcount) - Check/graph the number of syslog messages, by level over time.
-- [netdata-apt](https://github.com/nodiscc/netdata-apt) - Check/graph and alert on the number of upgradeable packages, and available distribution upgrades.
-- [diskquota](https://github.com/netdata/community/tree/main/collectors/python.d.plugin/diskquota) - Monitors the defined quotas on one or more filesystems depending on configuration.
-
-## Etc
-
-- [charts.d example](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/example/README.md): An
- example `charts.d` collector.
-- [python.d example](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/example/README.md): An
- example `python.d` collector.
-- [go.d example](https://github.com/netdata/go.d.plugin/blob/master/modules/example/README.md): An
- example `go.d` collector.
+- [eBPF Cachestat](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_cachestat.md)
+
+- [eBPF DCstat](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_dcstat.md)
+
+- [eBPF Disk](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_disk.md)
+
+- [eBPF Filedescriptor](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md)
+
+- [eBPF Filesystem](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_filesystem.md)
+
+- [eBPF Hardirq](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_hardirq.md)
+
+- [eBPF MDflush](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_mdflush.md)
+
+- [eBPF Mount](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_mount.md)
+
+- [eBPF OOMkill](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_oomkill.md)
+
+- [eBPF Process](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_process.md)
+
+- [eBPF Processes](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_processes.md)
+
+- [eBPF SHM](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_shm.md)
+
+- [eBPF SWAP](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_swap.md)
+
+- [eBPF Socket](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_socket.md)
+
+- [eBPF SoftIRQ](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_softirq.md)
+
+- [eBPF Sync](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_sync.md)
+
+- [eBPF VFS](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/integrations/ebpf_vfs.md)
+
+### FreeBSD
+
+- [FreeBSD NFS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/freebsd_nfs.md)
+
+- [FreeBSD RCTL-RACCT](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/freebsd_rctl-racct.md)
+
+- [dev.cpu.0.freq](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md)
+
+- [dev.cpu.temperature](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md)
+
+- [devstat](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/devstat.md)
+
+- [getifaddrs](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/getifaddrs.md)
+
+- [getmntinfo](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/getmntinfo.md)
+
+- [hw.intrcnt](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/hw.intrcnt.md)
+
+- [ipfw](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/ipfw.md)
+
+- [kern.cp_time](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/kern.cp_time.md)
+
+- [kern.ipc.msq](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/kern.ipc.msq.md)
+
+- [kern.ipc.sem](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/kern.ipc.sem.md)
+
+- [kern.ipc.shm](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/kern.ipc.shm.md)
+
+- [net.inet.icmp.stats](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md)
+
+- [net.inet.ip.stats](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md)
+
+- [net.inet.tcp.states](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md)
+
+- [net.inet.tcp.stats](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md)
+
+- [net.inet.udp.stats](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md)
+
+- [net.inet6.icmp6.stats](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md)
+
+- [net.inet6.ip6.stats](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md)
+
+- [net.isr](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/net.isr.md)
+
+- [system.ram](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/system.ram.md)
+
+- [uptime](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/uptime.md)
+
+- [vm.loadavg](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.loadavg.md)
+
+- [vm.stats.sys.v_intr](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md)
+
+- [vm.stats.sys.v_soft](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md)
+
+- [vm.stats.sys.v_swtch](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md)
+
+- [vm.stats.vm.v_pgfaults](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md)
+
+- [vm.stats.vm.v_swappgs](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md)
+
+- [vm.swap_info](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.swap_info.md)
+
+- [vm.vmtotal](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/vm.vmtotal.md)
+
+- [zfs](https://github.com/netdata/netdata/blob/master/collectors/freebsd.plugin/integrations/zfs.md)
+
+### FTP Servers
+
+- [ProFTPD](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/proftpd.md)
+
+### Gaming
+
+- [BungeeCord](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/bungeecord.md)
+
+- [CS:GO](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cs:go.md)
+
+- [Minecraft](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/minecraft.md)
+
+- [OpenRCT2](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openrct2.md)
+
+- [SpigotMC](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md)
+
+- [Steam](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/steam.md)
+
+### Generic Data Collection
+
+- [Custom Exporter](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/custom_exporter.md)
+
+- [Excel spreadsheet](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/excel_spreadsheet.md)
+
+- [Generic Command Line Output](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/generic_command_line_output.md)
+
+- [JetBrains Floating License Server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/jetbrains_floating_license_server.md)
+
+- [OpenWeatherMap](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openweathermap.md)
+
+- [Pandas](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/pandas/integrations/pandas.md)
+
+- [Prometheus endpoint](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/prometheus_endpoint.md)
+
+- [SNMP devices](https://github.com/netdata/go.d.plugin/blob/master/modules/snmp/integrations/snmp_devices.md)
+
+- [Shell command](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/shell_command.md)
+
+- [Tankerkoenig API](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tankerkoenig_api.md)
+
+- [TwinCAT ADS Web Service](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/twincat_ads_web_service.md)
+
+### Hardware Devices and Sensors
+
+- [1-Wire Sensors](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md)
+
+- [AM2320](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/am2320/integrations/am2320.md)
+
+- [AMD CPU & GPU](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/amd_cpu_&_gpu.md)
+
+- [AMD GPU](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/amd_gpu.md)
+
+- [ARM HWCPipe](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/arm_hwcpipe.md)
+
+- [CUPS](https://github.com/netdata/netdata/blob/master/collectors/cups.plugin/integrations/cups.md)
+
+- [HDD temperature](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md)
+
+- [HP iLO](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hp_ilo.md)
+
+- [IBM CryptoExpress (CEX) cards](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ibm_cryptoexpress_cex_cards.md)
+
+- [IBM Z Hardware Management Console](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ibm_z_hardware_management_console.md)
+
+- [IPMI (By SoundCloud)](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ipmi_by_soundcloud.md)
+
+- [Intelligent Platform Management Interface (IPMI)](https://github.com/netdata/netdata/blob/master/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md)
+
+- [Linux Sensors (lm-sensors)](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md)
+
+- [Linux Sensors (sysfs)](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md)
+
+- [NVML](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nvml.md)
+
+- [Nvidia GPU](https://github.com/netdata/go.d.plugin/blob/master/modules/nvidia_smi/integrations/nvidia_gpu.md)
+
+- [Raritan PDU](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/raritan_pdu.md)
+
+- [S.M.A.R.T.](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md)
+
+- [ServerTech](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/servertech.md)
+
+- [Siemens S7 PLC](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/siemens_s7_plc.md)
+
+- [T-Rex NVIDIA GPU Miner](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/t-rex_nvidia_gpu_miner.md)
+
+### IoT Devices
+
+- [Airthings Waveplus air sensor](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/airthings_waveplus_air_sensor.md)
+
+- [Bobcat Miner 300](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/bobcat_miner_300.md)
+
+- [Christ Elektronik CLM5IP power panel](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/christ_elektronik_clm5ip_power_panel.md)
+
+- [CraftBeerPi](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/craftbeerpi.md)
+
+- [Dutch Electricity Smart Meter](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dutch_electricity_smart_meter.md)
+
+- [Elgato Key Light devices.](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/elgato_key_light_devices..md)
+
+- [Energomera smart power meters](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/energomera_smart_power_meters.md)
+
+- [Helium hotspot](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/helium_hotspot.md)
+
+- [Homebridge](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/homebridge.md)
+
+- [Homey](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/homey.md)
+
+- [Jarvis Standing Desk](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/jarvis_standing_desk.md)
+
+- [MP707 USB thermometer](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mp707_usb_thermometer.md)
+
+- [Modbus protocol](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/modbus_protocol.md)
+
+- [Monnit Sensors MQTT](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/monnit_sensors_mqtt.md)
+
+- [Nature Remo E lite devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nature_remo_e_lite_devices.md)
+
+- [Netatmo sensors](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/netatmo_sensors.md)
+
+- [OpenHAB](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openhab.md)
+
+- [Personal Weather Station](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/personal_weather_station.md)
+
+- [Philips Hue](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/philips_hue.md)
+
+- [Pimoroni Enviro+](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/pimoroni_enviro+.md)
+
+- [Powerpal devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/powerpal_devices.md)
+
+- [Radio Thermostat](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/radio_thermostat.md)
+
+- [SMA Inverters](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sma_inverters.md)
+
+- [Salicru EQX inverter](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/salicru_eqx_inverter.md)
+
+- [Sense Energy](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sense_energy.md)
+
+- [Shelly humidity sensor](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/shelly_humidity_sensor.md)
+
+- [Smart meters SML](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/smart_meters_sml.md)
+
+- [Solar logging stick](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/solar_logging_stick.md)
+
+- [SolarEdge inverters](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/solaredge_inverters.md)
+
+- [Solis Ginlong 5G inverters](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/solis_ginlong_5g_inverters.md)
+
+- [Sunspec Solar Energy](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sunspec_solar_energy.md)
+
+- [TP-Link P110](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tp-link_p110.md)
+
+- [Tado smart heating solution](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tado_smart_heating_solution.md)
+
+- [Tesla Powerwall](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tesla_powerwall.md)
+
+- [Tesla Wall Connector](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tesla_wall_connector.md)
+
+- [Tesla vehicle](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/tesla_vehicle.md)
+
+- [Xiaomi Mi Flora](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/xiaomi_mi_flora.md)
+
+- [iqAir AirVisual air quality monitors](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/iqair_airvisual_air_quality_monitors.md)
+
+### Kubernetes
+
+- [Cilium Agent](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cilium_agent.md)
+
+- [Cilium Operator](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cilium_operator.md)
+
+- [Cilium Proxy](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cilium_proxy.md)
+
+- [Kubelet](https://github.com/netdata/go.d.plugin/blob/master/modules/k8s_kubelet/integrations/kubelet.md)
+
+- [Kubeproxy](https://github.com/netdata/go.d.plugin/blob/master/modules/k8s_kubeproxy/integrations/kubeproxy.md)
+
+- [Kubernetes Cluster Cloud Cost](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/kubernetes_cluster_cloud_cost.md)
+
+- [Kubernetes Cluster State](https://github.com/netdata/go.d.plugin/blob/master/modules/k8s_state/integrations/kubernetes_cluster_state.md)
+
+- [Kubernetes Containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/kubernetes_containers.md)
+
+- [Rancher](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/rancher.md)
+
+### Linux Systems
+
+- [CPU performance](https://github.com/netdata/netdata/blob/master/collectors/perf.plugin/integrations/cpu_performance.md)
+
+- [Disk space](https://github.com/netdata/netdata/blob/master/collectors/diskspace.plugin/integrations/disk_space.md)
+
+- [Files and directories](https://github.com/netdata/go.d.plugin/blob/master/modules/filecheck/integrations/files_and_directories.md)
+
+- [OpenRC](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openrc.md)
+
+#### CPU
+
+- [Interrupts](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/interrupts.md)
+
+- [SoftIRQ statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/softirq_statistics.md)
+
+#### Disk
+
+- [Disk Statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/disk_statistics.md)
+
+- [MD RAID](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/md_raid.md)
+
+##### BTRFS
+
+- [BTRFS](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/btrfs.md)
+
+##### NFS
+
+- [NFS Client](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/nfs_client.md)
+
+- [NFS Server](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/nfs_server.md)
+
+##### ZFS
+
+- [ZFS Adaptive Replacement Cache](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md)
+
+- [ZFS Pools](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/zfs_pools.md)
+
+#### Firewall
+
+- [Conntrack](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/conntrack.md)
+
+- [Netfilter](https://github.com/netdata/netdata/blob/master/collectors/nfacct.plugin/integrations/netfilter.md)
+
+- [Synproxy](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/synproxy.md)
+
+- [nftables](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nftables.md)
+
+#### IPC
+
+- [Inter Process Communication](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/inter_process_communication.md)
+
+#### Kernel
+
+- [Linux kernel SLAB allocator statistics](https://github.com/netdata/netdata/blob/master/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md)
+
+- [Power Capping](https://github.com/netdata/netdata/blob/master/collectors/debugfs.plugin/integrations/power_capping.md)
+
+#### Memory
+
+- [Kernel Same-Page Merging](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/kernel_same-page_merging.md)
+
+- [Linux ZSwap](https://github.com/netdata/netdata/blob/master/collectors/debugfs.plugin/integrations/linux_zswap.md)
+
+- [Memory Statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/memory_statistics.md)
+
+- [Memory Usage](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/memory_usage.md)
+
+- [Memory modules (DIMMs)](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/memory_modules_dimms.md)
+
+- [Non-Uniform Memory Access](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/non-uniform_memory_access.md)
+
+- [Page types](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/page_types.md)
+
+- [System Memory Fragmentation](https://github.com/netdata/netdata/blob/master/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md)
+
+- [ZRAM](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/zram.md)
+
+#### Network
+
+- [Access Points](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/ap/integrations/access_points.md)
+
+- [IP Virtual Server](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/ip_virtual_server.md)
+
+- [IPv6 Socket Statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/ipv6_socket_statistics.md)
+
+- [InfiniBand](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/infiniband.md)
+
+- [Network interfaces](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/network_interfaces.md)
+
+- [Network statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/network_statistics.md)
+
+- [SCTP Statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/sctp_statistics.md)
+
+- [Socket statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/socket_statistics.md)
+
+- [Softnet Statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/softnet_statistics.md)
+
+- [Wireless network interfaces](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/wireless_network_interfaces.md)
+
+- [tc QoS classes](https://github.com/netdata/netdata/blob/master/collectors/tc.plugin/integrations/tc_qos_classes.md)
+
+#### Power Supply
+
+- [Power Supply](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/power_supply.md)
+
+#### Pressure
+
+- [Pressure Stall Information](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/pressure_stall_information.md)
+
+#### System
+
+- [Entropy](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/entropy.md)
+
+- [System Load Average](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/system_load_average.md)
+
+- [System Uptime](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/system_uptime.md)
+
+- [System statistics](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/integrations/system_statistics.md)
+
+### Logs Servers
+
+- [AuthLog](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/authlog.md)
+
+- [Fluentd](https://github.com/netdata/go.d.plugin/blob/master/modules/fluentd/integrations/fluentd.md)
+
+- [Graylog Server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/graylog_server.md)
+
+- [Logstash](https://github.com/netdata/go.d.plugin/blob/master/modules/logstash/integrations/logstash.md)
+
+- [journald](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/journald.md)
+
+- [loki](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/loki.md)
+
+- [mtail](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mtail.md)
+
+### macOS Systems
+
+- [Apple Time Machine](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/apple_time_machine.md)
+
+- [macOS](https://github.com/netdata/netdata/blob/master/collectors/macos.plugin/integrations/macos.md)
+
+### Mail Servers
+
+- [DMARC](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dmarc.md)
+
+- [Dovecot](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/dovecot/integrations/dovecot.md)
+
+- [Exim](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/exim/integrations/exim.md)
+
+- [Halon](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/halon.md)
+
+- [Maildir](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/maildir.md)
+
+- [Postfix](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/postfix/integrations/postfix.md)
+
+### Media Services
+
+- [Discourse](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/discourse.md)
+
+- [Icecast](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/icecast/integrations/icecast.md)
+
+- [OBS Studio](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/obs_studio.md)
+
+- [RetroShare](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/retroshare/integrations/retroshare.md)
+
+- [SABnzbd](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sabnzbd.md)
+
+- [Stream](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/stream.md)
+
+- [Twitch](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/twitch.md)
+
+- [Zulip](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/zulip.md)
+
+### Message Brokers
+
+- [ActiveMQ](https://github.com/netdata/go.d.plugin/blob/master/modules/activemq/integrations/activemq.md)
+
+- [Apache Pulsar](https://github.com/netdata/go.d.plugin/blob/master/modules/pulsar/integrations/apache_pulsar.md)
+
+- [Beanstalk](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md)
+
+- [IBM MQ](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ibm_mq.md)
+
+- [Kafka Connect](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/kafka_connect.md)
+
+- [Kafka ZooKeeper](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/kafka_zookeeper.md)
+
+- [Kafka](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/kafka.md)
+
+- [MQTT Blackbox](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mqtt_blackbox.md)
+
+- [RabbitMQ](https://github.com/netdata/go.d.plugin/blob/master/modules/rabbitmq/integrations/rabbitmq.md)
+
+- [Redis Queue](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/redis_queue.md)
+
+- [VerneMQ](https://github.com/netdata/go.d.plugin/blob/master/modules/vernemq/integrations/vernemq.md)
+
+- [XMPP Server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/xmpp_server.md)
+
+- [mosquitto](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mosquitto.md)
+
+### Networking Stack and Network Interfaces
+
+- [8430FT modem](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/8430ft_modem.md)
+
+- [A10 ACOS network devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/a10_acos_network_devices.md)
+
+- [Andrews & Arnold line status](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/andrews_&_arnold_line_status.md)
+
+- [Aruba devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/aruba_devices.md)
+
+- [Bird Routing Daemon](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/bird_routing_daemon.md)
+
+- [Checkpoint device](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/checkpoint_device.md)
+
+- [Cisco ACI](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cisco_aci.md)
+
+- [Citrix NetScaler](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/citrix_netscaler.md)
+
+- [DDWRT Routers](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ddwrt_routers.md)
+
+- [FRRouting](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/frrouting.md)
+
+- [Fortigate firewall](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/fortigate_firewall.md)
+
+- [Freifunk network](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/freifunk_network.md)
+
+- [Fritzbox network devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/fritzbox_network_devices.md)
+
+- [Hitron CGN series CPE](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hitron_cgn_series_cpe.md)
+
+- [Hitron CODA Cable Modem](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hitron_coda_cable_modem.md)
+
+- [Huawei devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/huawei_devices.md)
+
+- [Keepalived](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/keepalived.md)
+
+- [Meraki dashboard](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/meraki_dashboard.md)
+
+- [MikroTik devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mikrotik_devices.md)
+
+- [Mikrotik RouterOS devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mikrotik_routeros_devices.md)
+
+- [NetFlow](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/netflow.md)
+
+- [NetMeter](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/netmeter.md)
+
+- [Open vSwitch](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/open_vswitch.md)
+
+- [OpenROADM devices](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openroadm_devices.md)
+
+- [RIPE Atlas](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ripe_atlas.md)
+
+- [SONiC NOS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sonic_nos.md)
+
+- [SmartRG 808AC Cable Modem](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/smartrg_808ac_cable_modem.md)
+
+- [Starlink (SpaceX)](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/starlink_spacex.md)
+
+- [Traceroute](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/traceroute.md)
+
+- [Ubiquiti UFiber OLT](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ubiquiti_ufiber_olt.md)
+
+- [Zyxel GS1200-8](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/zyxel_gs1200-8.md)
+
+### Incident Management
+
+- [OTRS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/otrs.md)
+
+- [StatusPage](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/statuspage.md)
+
+### Observability
+
+- [Collectd](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/collectd.md)
+
+- [Dynatrace](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dynatrace.md)
+
+- [Grafana](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/grafana.md)
+
+- [Hubble](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hubble.md)
+
+- [Naemon](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/naemon.md)
+
+- [Nagios](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/nagios.md)
+
+- [New Relic](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/new_relic.md)
+
+### Other
+
+- [Example collector](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/example/integrations/example_collector.md)
+
+- [GitHub API rate limit](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/github_api_rate_limit.md)
+
+- [GitHub repository](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/github_repository.md)
+
+- [Netdata Agent alarms](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md)
+
+- [python.d changefinder](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md)
+
+- [python.d zscores](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md)
+
+### Processes and System Services
+
+- [Applications](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/integrations/applications.md)
+
+- [Supervisor](https://github.com/netdata/go.d.plugin/blob/master/modules/supervisord/integrations/supervisor.md)
+
+- [User Groups](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/integrations/user_groups.md)
+
+- [Users](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/integrations/users.md)
+
+### Provisioning Systems
+
+- [BOSH](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/bosh.md)
+
+- [Cloud Foundry Firehose](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cloud_foundry_firehose.md)
+
+- [Cloud Foundry](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cloud_foundry.md)
+
+- [Spacelift](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/spacelift.md)
+
+### Search Engines
+
+- [Elasticsearch](https://github.com/netdata/go.d.plugin/blob/master/modules/elasticsearch/integrations/elasticsearch.md)
+
+- [Meilisearch](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/meilisearch.md)
+
+- [OpenSearch](https://github.com/netdata/go.d.plugin/blob/master/modules/elasticsearch/integrations/opensearch.md)
+
+- [Solr](https://github.com/netdata/go.d.plugin/blob/master/modules/solr/integrations/solr.md)
+
+- [Sphinx](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/sphinx.md)
+
+### Security Systems
+
+- [Certificate Transparency](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/certificate_transparency.md)
+
+- [ClamAV daemon](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/clamav_daemon.md)
+
+- [Clamscan results](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/clamscan_results.md)
+
+- [Crowdsec](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/crowdsec.md)
+
+- [Honeypot](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/honeypot.md)
+
+- [Lynis audit reports](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/lynis_audit_reports.md)
+
+- [OpenVAS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/openvas.md)
+
+- [SSL Certificate](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ssl_certificate.md)
+
+- [Suricata](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/suricata.md)
+
+- [Vault PKI](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/vault_pki.md)
+
+### Service Discovery / Registry
+
+- [Consul](https://github.com/netdata/go.d.plugin/blob/master/modules/consul/integrations/consul.md)
+
+- [Kafka Consumer Lag](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/kafka_consumer_lag.md)
+
+- [ZooKeeper](https://github.com/netdata/go.d.plugin/blob/master/modules/zookeeper/integrations/zookeeper.md)
+
+- [etcd](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/etcd.md)
+
+### Storage, Mount Points and Filesystems
+
+- [AdaptecRAID](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md)
+
+- [Altaro Backup](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/altaro_backup.md)
+
+- [Borg backup](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/borg_backup.md)
+
+- [CVMFS clients](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cvmfs_clients.md)
+
+- [Ceph](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/ceph/integrations/ceph.md)
+
+- [Dell EMC Isilon cluster](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dell_emc_isilon_cluster.md)
+
+- [Dell EMC ScaleIO](https://github.com/netdata/go.d.plugin/blob/master/modules/scaleio/integrations/dell_emc_scaleio.md)
+
+- [Dell EMC XtremIO cluster](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dell_emc_xtremio_cluster.md)
+
+- [Dell PowerMax](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/dell_powermax.md)
+
+- [EOS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/eos.md)
+
+- [Generic storage enclosure tool](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/generic_storage_enclosure_tool.md)
+
+- [HDSentinel](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hdsentinel.md)
+
+- [HP Smart Storage Arrays](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md)
+
+- [Hadoop Distributed File System (HDFS)](https://github.com/netdata/go.d.plugin/blob/master/modules/hdfs/integrations/hadoop_distributed_file_system_hdfs.md)
+
+- [IBM Spectrum Virtualize](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ibm_spectrum_virtualize.md)
+
+- [IBM Spectrum](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/ibm_spectrum.md)
+
+- [IPFS](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/ipfs/integrations/ipfs.md)
+
+- [Lagerist Disk latency](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/lagerist_disk_latency.md)
+
+- [MegaCLI](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/megacli/integrations/megacli.md)
+
+- [MogileFS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mogilefs.md)
+
+- [NVMe devices](https://github.com/netdata/go.d.plugin/blob/master/modules/nvme/integrations/nvme_devices.md)
+
+- [NetApp Solidfire](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/netapp_solidfire.md)
+
+- [Netapp ONTAP API](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/netapp_ontap_api.md)
+
+- [Samba](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/samba/integrations/samba.md)
+
+- [Starwind VSAN VSphere Edition](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/starwind_vsan_vsphere_edition.md)
+
+- [Storidge](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/storidge.md)
+
+- [Synology ActiveBackup](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/synology_activebackup.md)
+
+### Synthetic Checks
+
+- [Blackbox](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/blackbox.md)
+
+- [Domain expiration date](https://github.com/netdata/go.d.plugin/blob/master/modules/whoisquery/integrations/domain_expiration_date.md)
+
+- [HTTP Endpoints](https://github.com/netdata/go.d.plugin/blob/master/modules/httpcheck/integrations/http_endpoints.md)
+
+- [IOPing](https://github.com/netdata/netdata/blob/master/collectors/ioping.plugin/integrations/ioping.md)
+
+- [Idle OS Jitter](https://github.com/netdata/netdata/blob/master/collectors/idlejitter.plugin/integrations/idle_os_jitter.md)
+
+- [Monit](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/monit/integrations/monit.md)
+
+- [Ping](https://github.com/netdata/go.d.plugin/blob/master/modules/ping/integrations/ping.md)
+
+- [Pingdom](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/pingdom.md)
+
+- [Site 24x7](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/site_24x7.md)
+
+- [TCP Endpoints](https://github.com/netdata/go.d.plugin/blob/master/modules/portcheck/integrations/tcp_endpoints.md)
+
+- [Uptimerobot](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/uptimerobot.md)
+
+- [X.509 certificate](https://github.com/netdata/go.d.plugin/blob/master/modules/x509check/integrations/x.509_certificate.md)
+
+### System Clock and NTP
+
+- [Chrony](https://github.com/netdata/go.d.plugin/blob/master/modules/chrony/integrations/chrony.md)
+
+- [NTPd](https://github.com/netdata/go.d.plugin/blob/master/modules/ntpd/integrations/ntpd.md)
+
+- [Timex](https://github.com/netdata/netdata/blob/master/collectors/timex.plugin/integrations/timex.md)
+
+### Systemd
+
+- [Systemd Services](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/integrations/systemd_services.md)
+
+- [Systemd Units](https://github.com/netdata/go.d.plugin/blob/master/modules/systemdunits/integrations/systemd_units.md)
+
+- [systemd-logind users](https://github.com/netdata/go.d.plugin/blob/master/modules/logind/integrations/systemd-logind_users.md)
+
+### Task Queues
+
+- [Celery](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/celery.md)
+
+- [Mesos](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/mesos.md)
+
+- [Slurm](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/slurm.md)
+
+### Telephony Servers
+
+- [GTP](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/gtp.md)
+
+- [Kannel](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/kannel.md)
+
+- [OpenSIPS](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/opensips/integrations/opensips.md)
+
+### UPS
+
+- [APC UPS](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/apcupsd/integrations/apc_ups.md)
+
+- [Eaton UPS](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/eaton_ups.md)
+
+- [UPS (NUT)](https://github.com/netdata/go.d.plugin/blob/master/modules/upsd/integrations/ups_nut.md)
+
+### VPNs
+
+- [Fastd](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/fastd.md)
+
+- [Libreswan](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/libreswan/integrations/libreswan.md)
+
+- [OpenVPN status log](https://github.com/netdata/go.d.plugin/blob/master/modules/openvpn_status_log/integrations/openvpn_status_log.md)
+
+- [OpenVPN](https://github.com/netdata/go.d.plugin/blob/master/modules/openvpn/integrations/openvpn.md)
+
+- [SoftEther VPN Server](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/softether_vpn_server.md)
+
+- [Speedify CLI](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/speedify_cli.md)
+
+- [Tor](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/tor/integrations/tor.md)
+
+- [WireGuard](https://github.com/netdata/go.d.plugin/blob/master/modules/wireguard/integrations/wireguard.md)
+
+- [strongSwan](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/strongswan.md)
+
+### Web Servers and Web Proxies
+
+- [APIcast](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/apicast.md)
+
+- [Apache](https://github.com/netdata/go.d.plugin/blob/master/modules/apache/integrations/apache.md)
+
+- [Clash](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/clash.md)
+
+- [Cloudflare PCAP](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/cloudflare_pcap.md)
+
+- [Envoy](https://github.com/netdata/go.d.plugin/blob/master/modules/envoy/integrations/envoy.md)
+
+- [Gobetween](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/gobetween.md)
+
+- [HAProxy](https://github.com/netdata/go.d.plugin/blob/master/modules/haproxy/integrations/haproxy.md)
+
+- [HHVM](https://github.com/netdata/go.d.plugin/blob/master/modules/prometheus/integrations/hhvm.md)
+
+- [HTTPD](https://github.com/netdata/go.d.plugin/blob/master/modules/apache/integrations/httpd.md)
+
+- [Lighttpd](https://github.com/netdata/go.d.plugin/blob/master/modules/lighttpd/integrations/lighttpd.md)
+
+- [Litespeed](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/litespeed/integrations/litespeed.md)
+
+- [NGINX Plus](https://github.com/netdata/go.d.plugin/blob/master/modules/nginxplus/integrations/nginx_plus.md)
+
+- [NGINX VTS](https://github.com/netdata/go.d.plugin/blob/master/modules/nginxvts/integrations/nginx_vts.md)
+
+- [NGINX](https://github.com/netdata/go.d.plugin/blob/master/modules/nginx/integrations/nginx.md)
+
+- [PHP-FPM](https://github.com/netdata/go.d.plugin/blob/master/modules/phpfpm/integrations/php-fpm.md)
+
+- [Squid log files](https://github.com/netdata/go.d.plugin/blob/master/modules/squidlog/integrations/squid_log_files.md)
+
+- [Squid](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/squid/integrations/squid.md)
+
+- [Tengine](https://github.com/netdata/go.d.plugin/blob/master/modules/tengine/integrations/tengine.md)
+
+- [Tomcat](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/tomcat/integrations/tomcat.md)
+
+- [Traefik](https://github.com/netdata/go.d.plugin/blob/master/modules/traefik/integrations/traefik.md)
+
+- [Varnish](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/varnish/integrations/varnish.md)
+
+- [Web server log files](https://github.com/netdata/go.d.plugin/blob/master/modules/weblog/integrations/web_server_log_files.md)
+
+- [uWSGI](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md)
+
+### Windows Systems
+
+- [Active Directory](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/integrations/active_directory.md)
+
+- [HyperV](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/integrations/hyperv.md)
+
+- [MS Exchange](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/integrations/ms_exchange.md)
+
+- [MS SQL Server](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/integrations/ms_sql_server.md)
+
+- [NET Framework](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/integrations/net_framework.md)
+
+- [Windows](https://github.com/netdata/go.d.plugin/blob/master/modules/windows/integrations/windows.md)
diff --git a/collectors/Makefile.am b/collectors/Makefile.am
index d477e5b80ec6e5..1bbb2e0ef8e228 100644
--- a/collectors/Makefile.am
+++ b/collectors/Makefile.am
@@ -15,6 +15,7 @@ SUBDIRS = \
freebsd.plugin \
freeipmi.plugin \
idlejitter.plugin \
+ log2journal \
macos.plugin \
nfacct.plugin \
xenstat.plugin \
diff --git a/collectors/all.h b/collectors/all.h
index 22b75aaaa74ef7..38241dfa982a81 100644
--- a/collectors/all.h
+++ b/collectors/all.h
@@ -266,65 +266,76 @@
// IP STACK
-#define NETDATA_CHART_PRIO_IP_ERRORS 4100
-#define NETDATA_CHART_PRIO_IP_TCP_CONNABORTS 4210
-#define NETDATA_CHART_PRIO_IP_TCP_SYN_QUEUE 4215
-#define NETDATA_CHART_PRIO_IP_TCP_ACCEPT_QUEUE 4216
-#define NETDATA_CHART_PRIO_IP_TCP_REORDERS 4220
-#define NETDATA_CHART_PRIO_IP_TCP_OFO 4250
-#define NETDATA_CHART_PRIO_IP_TCP_SYNCOOKIES 4260
-#define NETDATA_CHART_PRIO_IP_TCP_MEM 4290
-#define NETDATA_CHART_PRIO_IP_BCAST 4500
-#define NETDATA_CHART_PRIO_IP_BCAST_PACKETS 4510
-#define NETDATA_CHART_PRIO_IP_MCAST 4600
-#define NETDATA_CHART_PRIO_IP_MCAST_PACKETS 4610
-#define NETDATA_CHART_PRIO_IP_ECN 4700
+#define NETDATA_CHART_PRIO_IP_TCP_PACKETS 4200
+#define NETDATA_CHART_PRIO_IP_TCP_ERRORS 4210
+#define NETDATA_CHART_PRIO_IP_TCP_ESTABLISHED_CONNS 4220
+#define NETDATA_CHART_PRIO_IP_TCP_OPENS 4220
+#define NETDATA_CHART_PRIO_IP_TCP_HANDSHAKE 4230
+#define NETDATA_CHART_PRIO_IP_TCP_CONNABORTS 4240
+#define NETDATA_CHART_PRIO_IP_TCP_SYN_QUEUE 4250
+#define NETDATA_CHART_PRIO_IP_TCP_ACCEPT_QUEUE 4260
+#define NETDATA_CHART_PRIO_IP_TCP_REORDERS 4270
+#define NETDATA_CHART_PRIO_IP_TCP_OFO 4280
+#define NETDATA_CHART_PRIO_IP_TCP_SYNCOOKIES 4290
+#define NETDATA_CHART_PRIO_IP_TCP_MEM_PRESSURE 4300
+#define NETDATA_CHART_PRIO_IP_SOCKETS 4310
// IPv4
-#define NETDATA_CHART_PRIO_IPV4_SOCKETS 5100
-#define NETDATA_CHART_PRIO_IPV4_PACKETS 5130
-#define NETDATA_CHART_PRIO_IPV4_ERRORS 5150
-#define NETDATA_CHART_PRIO_IPV4_ICMP 5170
-#define NETDATA_CHART_PRIO_IPV4_TCP 5200
-#define NETDATA_CHART_PRIO_IPV4_TCP_SOCKETS 5201
-#define NETDATA_CHART_PRIO_IPV4_TCP_MEM 5290
-#define NETDATA_CHART_PRIO_IPV4_UDP 5300
-#define NETDATA_CHART_PRIO_IPV4_UDP_MEM 5390
-#define NETDATA_CHART_PRIO_IPV4_UDPLITE 5400
+#define NETDATA_CHART_PRIO_IPV4_PACKETS 5000
+#define NETDATA_CHART_PRIO_IPV4_ERRORS 5050
+#define NETDATA_CHART_PRIO_IPV4_BCAST 5100
+#define NETDATA_CHART_PRIO_IPV4_BCAST_PACKETS 5105
+#define NETDATA_CHART_PRIO_IPV4_MCAST 5150
+#define NETDATA_CHART_PRIO_IPV4_MCAST_PACKETS 5155
+#define NETDATA_CHART_PRIO_IPV4_TCP_SOCKETS 5180
+#define NETDATA_CHART_PRIO_IPV4_TCP_SOCKETS_MEM 5185
+#define NETDATA_CHART_PRIO_IPV4_ICMP_PACKETS 5200
+#define NETDATA_CHART_PRIO_IPV4_ICMP_MESSAGES 5205
+#define NETDATA_CHART_PRIO_IPV4_ICMP_ERRORS 5210
+#define NETDATA_CHART_PRIO_IPV4_UDP_PACKETS 5250
+#define NETDATA_CHART_PRIO_IPV4_UDP_ERRORS 5255
+#define NETDATA_CHART_PRIO_IPV4_UDP_SOCKETS 5260
+#define NETDATA_CHART_PRIO_IPV4_UDP_SOCKETS_MEM 5265
+#define NETDATA_CHART_PRIO_IPV4_UDPLITE_PACKETS 5300
+#define NETDATA_CHART_PRIO_IPV4_UDPLITE_ERRORS 5305
+#define NETDATA_CHART_PRIO_IPV4_UDPLITE_SOCKETS 5310
+#define NETDATA_CHART_PRIO_IPV4_ECN 5350
+#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS_IN 5400
+#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS_OUT 5405
+#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS_SOCKETS 5410
+#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS_SOCKETS_MEM 5415
#define NETDATA_CHART_PRIO_IPV4_RAW 5450
-#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS 5460
-#define NETDATA_CHART_PRIO_IPV4_FRAGMENTS_MEM 5470
// IPv6
-
-#define NETDATA_CHART_PRIO_IPV6_PACKETS 6200
-#define NETDATA_CHART_PRIO_IPV6_ECT 6210
-#define NETDATA_CHART_PRIO_IPV6_ERRORS 6300
-#define NETDATA_CHART_PRIO_IPV6_FRAGMENTS 6400
-#define NETDATA_CHART_PRIO_IPV6_FRAGSOUT 6401
-#define NETDATA_CHART_PRIO_IPV6_FRAGSIN 6402
-#define NETDATA_CHART_PRIO_IPV6_TCP 6500
-#define NETDATA_CHART_PRIO_IPV6_UDP 6600
-#define NETDATA_CHART_PRIO_IPV6_UDP_PACKETS 6601
-#define NETDATA_CHART_PRIO_IPV6_UDP_ERRORS 6610
-#define NETDATA_CHART_PRIO_IPV6_UDPLITE 6700
-#define NETDATA_CHART_PRIO_IPV6_UDPLITE_PACKETS 6701
-#define NETDATA_CHART_PRIO_IPV6_UDPLITE_ERRORS 6710
-#define NETDATA_CHART_PRIO_IPV6_RAW 6800
-#define NETDATA_CHART_PRIO_IPV6_BCAST 6840
-#define NETDATA_CHART_PRIO_IPV6_MCAST 6850
-#define NETDATA_CHART_PRIO_IPV6_MCAST_PACKETS 6851
-#define NETDATA_CHART_PRIO_IPV6_ICMP 6900
-#define NETDATA_CHART_PRIO_IPV6_ICMP_REDIR 6910
-#define NETDATA_CHART_PRIO_IPV6_ICMP_ERRORS 6920
-#define NETDATA_CHART_PRIO_IPV6_ICMP_ECHOS 6930
-#define NETDATA_CHART_PRIO_IPV6_ICMP_GROUPMEMB 6940
-#define NETDATA_CHART_PRIO_IPV6_ICMP_ROUTER 6950
-#define NETDATA_CHART_PRIO_IPV6_ICMP_NEIGHBOR 6960
-#define NETDATA_CHART_PRIO_IPV6_ICMP_LDV2 6970
-#define NETDATA_CHART_PRIO_IPV6_ICMP_TYPES 6980
-
+#define NETDATA_CHART_PRIO_IPV6_PACKETS 6000
+#define NETDATA_CHART_PRIO_IPV6_ERRORS 6005
+#define NETDATA_CHART_PRIO_IPV6_BCAST 6050
+#define NETDATA_CHART_PRIO_IPV6_MCAST 6100
+#define NETDATA_CHART_PRIO_IPV6_MCAST_PACKETS 6105
+#define NETDATA_CHART_PRIO_IPV6_TCP_SOCKETS 6140
+#define NETDATA_CHART_PRIO_IPV6_ICMP 6150
+#define NETDATA_CHART_PRIO_IPV6_ICMP_REDIR 6155
+#define NETDATA_CHART_PRIO_IPV6_ICMP_ERRORS 6160
+#define NETDATA_CHART_PRIO_IPV6_ICMP_ECHOS 6165
+#define NETDATA_CHART_PRIO_IPV6_ICMP_GROUPMEMB 6170
+#define NETDATA_CHART_PRIO_IPV6_ICMP_ROUTER 6180
+#define NETDATA_CHART_PRIO_IPV6_ICMP_NEIGHBOR 6185
+#define NETDATA_CHART_PRIO_IPV6_ICMP_LDV2 6190
+#define NETDATA_CHART_PRIO_IPV6_ICMP_TYPES 6195
+#define NETDATA_CHART_PRIO_IPV6_UDP 6200
+#define NETDATA_CHART_PRIO_IPV6_UDP_PACKETS 6205
+#define NETDATA_CHART_PRIO_IPV6_UDP_ERRORS 6210
+#define NETDATA_CHART_PRIO_IPV6_UDP_SOCKETS 6215
+#define NETDATA_CHART_PRIO_IPV6_UDPLITE 6250
+#define NETDATA_CHART_PRIO_IPV6_UDPLITE_PACKETS 6255
+#define NETDATA_CHART_PRIO_IPV6_UDPLITE_ERRORS 6260
+#define NETDATA_CHART_PRIO_IPV6_UDPLITE_SOCKETS 6265
+#define NETDATA_CHART_PRIO_IPV6_ECT 6300
+#define NETDATA_CHART_PRIO_IPV6_FRAGSIN 6350
+#define NETDATA_CHART_PRIO_IPV6_FRAGSOUT 6355
+#define NETDATA_CHART_PRIO_IPV6_FRAGMENTS_SOCKETS 6360
+#define NETDATA_CHART_PRIO_IPV6_RAW_SOCKETS 6400
// Network interfaces
@@ -390,6 +401,11 @@
#define NETDATA_CHART_PRIO_STATSD_PRIVATE 90000 // many charts
+// Logs Management
+
+#define NETDATA_CHART_PRIO_LOGS_BASE 95000 // many charts
+#define NETDATA_CHART_PRIO_LOGS_STATS_BASE 160000 // logsmanagement stats in "Netdata Monitoring"
+
// PCI
#define NETDATA_CHART_PRIO_PCI_AER 100000
@@ -403,7 +419,8 @@
// [ml] charts
#define ML_CHART_PRIO_DIMENSIONS 39181
#define ML_CHART_PRIO_ANOMALY_RATE 39182
-#define ML_CHART_PRIO_DETECTOR_EVENTS 39183
+#define ML_CHART_PRIO_TYPE_ANOMALY_RATE 39183
+#define ML_CHART_PRIO_DETECTOR_EVENTS 39184
// [netdata.ml] charts
#define NETDATA_ML_CHART_RUNNING 890001
diff --git a/collectors/apps.plugin/apps_groups.conf b/collectors/apps.plugin/apps_groups.conf
index 659bd0f0316260..195536a0abbd91 100644
--- a/collectors/apps.plugin/apps_groups.conf
+++ b/collectors/apps.plugin/apps_groups.conf
@@ -83,13 +83,15 @@ xenstat.plugin: xenstat.plugin
perf.plugin: perf.plugin
charts.d.plugin: *charts.d.plugin*
python.d.plugin: *python.d.plugin*
+systemd-journal.plugin:*systemd-journal.plugin*
tc-qos-helper: *tc-qos-helper.sh*
fping: fping
ioping: ioping
go.d.plugin: *go.d.plugin*
-slabinfo.plugin: slabinfo.plugin
+slabinfo.plugin: *slabinfo.plugin*
ebpf.plugin: *ebpf.plugin*
debugfs.plugin: *debugfs.plugin*
+logs-management.plugin: *logs-management.plugin*
# agent-service-discovery
agent_sd: agent_sd
@@ -136,7 +138,7 @@ modem: ModemManager
netmanager: NetworkManager nm* systemd-networkd networkctl netplan connmand wicked* avahi-autoipd networkd-dispatcher
firewall: firewalld ufw nft
tor: tor
-bluetooth: bluetooth bluez bluedevil obexd
+bluetooth: bluetooth bluetoothd bluez bluedevil obexd
# -----------------------------------------------------------------------------
# high availability and balancers
@@ -159,7 +161,7 @@ chat: irssi *vines* *prosody* murmurd
# -----------------------------------------------------------------------------
# monitoring
-logs: ulogd* syslog* rsyslog* logrotate systemd-journald rotatelogs sysklogd metalog
+logs: ulogd* syslog* rsyslog* logrotate *systemd-journal* rotatelogs sysklogd metalog
nms: snmpd vnstatd smokeping zabbix* munin* mon openhpid tailon nrpe
monit: monit
splunk: splunkd
@@ -209,7 +211,7 @@ proxmox-ve: pve* spiceproxy
# -----------------------------------------------------------------------------
# containers & virtual machines
-containers: lxc* docker* balena*
+containers: lxc* docker* balena* containerd
VMs: vbox* VBox* qemu* kvm*
libvirt: virtlogd virtqemud virtstoraged virtnetworkd virtlockd virtinterfaced
libvirt: virtnodedevd virtproxyd virtsecretd libvirtd
@@ -238,7 +240,7 @@ dhcp: *dhcp* dhclient
# -----------------------------------------------------------------------------
# name servers and clients
-dns: named unbound nsd pdns_server knotd gdnsd yadifad dnsmasq systemd-resolve* pihole* avahi-daemon avahi-dnsconfd
+dns: named unbound nsd pdns_server knotd gdnsd yadifad dnsmasq *systemd-resolve* pihole* avahi-daemon avahi-dnsconfd
dnsdist: dnsdist
# -----------------------------------------------------------------------------
@@ -271,7 +273,7 @@ backup: rsync lsyncd bacula* borg rclone
# -----------------------------------------------------------------------------
# cron
-cron: cron* atd anacron systemd-cron* incrond
+cron: cron* atd anacron *systemd-cron* incrond
# -----------------------------------------------------------------------------
# UPS
@@ -319,7 +321,7 @@ airflow: *airflow*
# -----------------------------------------------------------------------------
# GUI
-X: X Xorg xinit xdm Xwayland xsettingsd
+X: X Xorg xinit xdm Xwayland xsettingsd touchegg
wayland: swaylock swayidle waypipe wayvnc
kde: *kdeinit* kdm sddm plasmashell startplasma-* kwin* kwallet* krunner kactivitymanager*
gnome: gnome-* gdm gconf* mutter
@@ -353,11 +355,11 @@ kswapd: kswapd
zswap: zswap
kcompactd: kcompactd
-system: systemd-* udisks* udevd* *udevd ipv6_addrconf dbus-* rtkit*
+system: systemd* udisks* udevd* *udevd ipv6_addrconf dbus-* rtkit*
system: mdadm acpid uuidd upowerd elogind* eudev mdev lvmpolld dmeventd
system: accounts-daemon rngd haveged rasdaemon irqbalance start-stop-daemon
system: supervise-daemon openrc* init runit runsvdir runsv auditd lsmd
-system: abrt* nscd rtkit-daemon gpg-agent usbguard*
+system: abrt* nscd rtkit-daemon gpg-agent usbguard* boltd geoclue
kernel: kworker kthreadd kauditd lockd khelper kdevtmpfs khungtaskd rpciod
kernel: fsnotify_mark kthrotld deferwq scsi_* kdmflush oom_reaper kdevtempfs
@@ -380,6 +382,7 @@ rabbitmq: *rabbitmq*
sidekiq: *sidekiq*
java: java
ipfs: ipfs
+erlang: beam.smp
node: node
factorio: factorio
diff --git a/collectors/apps.plugin/apps_plugin.c b/collectors/apps.plugin/apps_plugin.c
index d25ae3f9bd2148..ecfea1f6f7095e 100644
--- a/collectors/apps.plugin/apps_plugin.c
+++ b/collectors/apps.plugin/apps_plugin.c
@@ -265,10 +265,12 @@ struct target {
uint32_t idhash;
char name[MAX_NAME + 1];
-
+ char clean_name[MAX_NAME + 1]; // sanitized name used in chart id (need to replace at least dots)
uid_t uid;
gid_t gid;
+ bool is_other;
+
kernel_uint_t minflt;
kernel_uint_t cminflt;
kernel_uint_t majflt;
@@ -782,7 +784,8 @@ static struct target *get_users_target(uid_t uid) {
snprintfz(w->name, MAX_NAME, "%s", pw->pw_name);
}
- netdata_fix_chart_name(w->name);
+ strncpyz(w->clean_name, w->name, MAX_NAME);
+ netdata_fix_chart_name(w->clean_name);
w->uid = uid;
@@ -830,7 +833,8 @@ struct target *get_groups_target(gid_t gid)
snprintfz(w->name, MAX_NAME, "%s", gr->gr_name);
}
- netdata_fix_chart_name(w->name);
+ strncpyz(w->clean_name, w->name, MAX_NAME);
+ netdata_fix_chart_name(w->clean_name);
w->gid = gid;
@@ -899,6 +903,14 @@ static struct target *get_apps_groups_target(const char *id, struct target *targ
else
// copy the id
strncpyz(w->name, nid, MAX_NAME);
+
+ // dots are used to distinguish chart type and id in streaming, so we should replace them
+ strncpyz(w->clean_name, w->name, MAX_NAME);
+ netdata_fix_chart_name(w->clean_name);
+ for (char *d = w->clean_name; *d; d++) {
+ if (*d == '.')
+ *d = '_';
+ }
strncpyz(w->compare, nid, MAX_COMPARE_NAME);
size_t len = strlen(w->compare);
@@ -997,6 +1009,7 @@ static int read_apps_groups_conf(const char *path, const char *file)
apps_groups_default_target = get_apps_groups_target("p+!o@w#e$i^r&7*5(-i)l-o_", NULL, "other"); // match nothing
if(!apps_groups_default_target)
fatal("Cannot create default target");
+ apps_groups_default_target->is_other = true;
// allow the user to override group 'other'
if(apps_groups_default_target->target)
@@ -1457,17 +1470,17 @@ static inline int read_proc_pid_limits(struct pid_stat *p, void *ptr) {
netdata_log_info(
"FDS_LIMITS: PID %d (%s) is using "
"%0.2f %% of its fds limits, "
- "open fds = %llu ("
- "files = %llu, "
- "pipes = %llu, "
- "sockets = %llu, "
- "inotifies = %llu, "
- "eventfds = %llu, "
- "timerfds = %llu, "
- "signalfds = %llu, "
- "eventpolls = %llu "
- "other = %llu "
- "), open fds limit = %llu, "
+ "open fds = %"PRIu64 "("
+ "files = %"PRIu64 ", "
+ "pipes = %"PRIu64 ", "
+ "sockets = %"PRIu64", "
+ "inotifies = %"PRIu64", "
+ "eventfds = %"PRIu64", "
+ "timerfds = %"PRIu64", "
+ "signalfds = %"PRIu64", "
+ "eventpolls = %"PRIu64" "
+ "other = %"PRIu64" "
+ "), open fds limit = %"PRIu64", "
"%s, "
"original line [%s]",
p->pid, p->comm, p->openfds_limits_percent, all_fds,
@@ -2460,7 +2473,7 @@ static inline int debug_print_process_and_parents(struct pid_stat *p, usec_t tim
for(i = 0; i < indent ;i++) buffer[i] = ' ';
buffer[i] = '\0';
- fprintf(stderr, " %s %s%s (%d %s %llu"
+ fprintf(stderr, " %s %s%s (%d %s %"PRIu64""
, buffer
, prefix
, p->comm
@@ -3431,8 +3444,8 @@ static void calculate_netdata_statistics(void) {
// ----------------------------------------------------------------------------
// update chart dimensions
-static inline void send_BEGIN(const char *type, const char *id, usec_t usec) {
- fprintf(stdout, "BEGIN %s.%s %llu\n", type, id, usec);
+static inline void send_BEGIN(const char *type, const char *name,const char *metric, usec_t usec) {
+ fprintf(stdout, "BEGIN %s.%s_%s %" PRIu64 "\n", type, name, metric, usec);
}
static inline void send_SET(const char *name, kernel_uint_t value) {
@@ -3440,7 +3453,7 @@ static inline void send_SET(const char *name, kernel_uint_t value) {
}
static inline void send_END(void) {
- fprintf(stdout, "END\n");
+ fprintf(stdout, "END\n\n");
}
void send_resource_usage_to_netdata(usec_t dt) {
@@ -3518,11 +3531,11 @@ void send_resource_usage_to_netdata(usec_t dt) {
}
fprintf(stdout,
- "BEGIN netdata.apps_cpu %llu\n"
- "SET user = %llu\n"
- "SET system = %llu\n"
+ "BEGIN netdata.apps_cpu %"PRIu64"\n"
+ "SET user = %"PRIu64"\n"
+ "SET system = %"PRIu64"\n"
"END\n"
- "BEGIN netdata.apps_sizes %llu\n"
+ "BEGIN netdata.apps_sizes %"PRIu64"\n"
"SET calls = %zu\n"
"SET files = %zu\n"
"SET filenames = %zu\n"
@@ -3549,7 +3562,7 @@ void send_resource_usage_to_netdata(usec_t dt) {
);
fprintf(stdout,
- "BEGIN netdata.apps_fix %llu\n"
+ "BEGIN netdata.apps_fix %"PRIu64"\n"
"SET utime = %u\n"
"SET stime = %u\n"
"SET gtime = %u\n"
@@ -3566,7 +3579,7 @@ void send_resource_usage_to_netdata(usec_t dt) {
if(include_exited_childs)
fprintf(stdout,
- "BEGIN netdata.apps_children_fix %llu\n"
+ "BEGIN netdata.apps_children_fix %"PRIu64"\n"
"SET cutime = %u\n"
"SET cstime = %u\n"
"SET cgtime = %u\n"
@@ -3736,249 +3749,118 @@ static void normalize_utilization(struct target *root) {
static void send_collected_data_to_netdata(struct target *root, const char *type, usec_t dt) {
struct target *w;
- send_BEGIN(type, "cpu", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (kernel_uint_t)(w->utime * utime_fix_ratio) + (kernel_uint_t)(w->stime * stime_fix_ratio) + (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cutime * cutime_fix_ratio) + (kernel_uint_t)(w->cstime * cstime_fix_ratio) + (kernel_uint_t)(w->cgtime * cgtime_fix_ratio)):0ULL));
- }
- send_END();
-
- send_BEGIN(type, "cpu_user", dt);
for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (kernel_uint_t)(w->utime * utime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cutime * cutime_fix_ratio)):0ULL));
- }
- send_END();
-
- send_BEGIN(type, "cpu_system", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (kernel_uint_t)(w->stime * stime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cstime * cstime_fix_ratio)):0ULL));
- }
- send_END();
+ if (unlikely(!w->exposed))
+ continue;
- if(show_guest_time) {
- send_BEGIN(type, "cpu_guest", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cgtime * cgtime_fix_ratio)):0ULL));
- }
+ send_BEGIN(type, w->clean_name, "processes", dt);
+ send_SET("processes", w->processes);
send_END();
- }
-#ifndef __FreeBSD__
- send_BEGIN(type, "voluntary_ctxt_switches", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->status_voluntary_ctxt_switches);
- }
- send_END();
-
- send_BEGIN(type, "involuntary_ctxt_switches", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->status_nonvoluntary_ctxt_switches);
- }
- send_END();
-#endif
-
- send_BEGIN(type, "threads", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- send_SET(w->name, w->num_threads);
- }
- send_END();
-
- send_BEGIN(type, "processes", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- send_SET(w->name, w->processes);
- }
- send_END();
-
-#ifndef __FreeBSD__
- send_BEGIN(type, "uptime", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (global_uptime > w->starttime)?(global_uptime - w->starttime):0);
- }
- send_END();
-
- if (enable_detailed_uptime_charts) {
- send_BEGIN(type, "uptime_min", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->uptime_min);
- }
+ send_BEGIN(type, w->clean_name, "threads", dt);
+ send_SET("threads", w->num_threads);
send_END();
- send_BEGIN(type, "uptime_avg", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->uptime_sum / w->processes);
- }
- send_END();
+ if (unlikely(!w->processes && !w->is_other))
+ continue;
- send_BEGIN(type, "uptime_max", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->uptime_max);
- }
+ send_BEGIN(type, w->clean_name, "cpu_utilization", dt);
+ send_SET("user", (kernel_uint_t)(w->utime * utime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cutime * cutime_fix_ratio)) : 0ULL));
+ send_SET("system", (kernel_uint_t)(w->stime * stime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cstime * cstime_fix_ratio)) : 0ULL));
send_END();
- }
-#endif
-
- send_BEGIN(type, "mem", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (w->status_vmrss > w->status_vmshared)?(w->status_vmrss - w->status_vmshared):0ULL);
- }
- send_END();
-
- send_BEGIN(type, "rss", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->status_vmrss);
- }
- send_END();
-
- send_BEGIN(type, "vmem", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->status_vmsize);
- }
- send_END();
-
-#ifndef __FreeBSD__
- send_BEGIN(type, "swap", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->status_vmswap);
- }
- send_END();
-#endif
-
- send_BEGIN(type, "minor_faults", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (kernel_uint_t)(w->minflt * minflt_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cminflt * cminflt_fix_ratio)):0ULL));
- }
- send_END();
-
- send_BEGIN(type, "major_faults", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, (kernel_uint_t)(w->majflt * majflt_fix_ratio) + (include_exited_childs?((kernel_uint_t)(w->cmajflt * cmajflt_fix_ratio)):0ULL));
- }
- send_END();
#ifndef __FreeBSD__
- send_BEGIN(type, "lreads", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->io_logical_bytes_read);
- }
- send_END();
-
- send_BEGIN(type, "lwrites", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->io_logical_bytes_written);
- }
- send_END();
-#endif
-
- send_BEGIN(type, "preads", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->io_storage_bytes_read);
- }
- send_END();
-
- send_BEGIN(type, "pwrites", dt);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed && w->processes))
- send_SET(w->name, w->io_storage_bytes_written);
- }
- send_END();
-
- if(enable_file_charts) {
- send_BEGIN(type, "fds_open_limit", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->max_open_files_percent * 100.0);
+ if (enable_guest_charts) {
+ send_BEGIN(type, w->clean_name, "cpu_guest_utilization", dt);
+ send_SET("guest", (kernel_uint_t)(w->gtime * gtime_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cgtime * cgtime_fix_ratio)) : 0ULL));
+ send_END();
}
- send_END();
- send_BEGIN(type, "fds_open", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, pid_openfds_sum(w));
- }
+ send_BEGIN(type, w->clean_name, "cpu_context_switches", dt);
+ send_SET("voluntary", w->status_voluntary_ctxt_switches);
+ send_SET("involuntary", w->status_nonvoluntary_ctxt_switches);
send_END();
- send_BEGIN(type, "fds_files", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.files);
- }
+ send_BEGIN(type, w->clean_name, "mem_private_usage", dt);
+ send_SET("mem", (w->status_vmrss > w->status_vmshared)?(w->status_vmrss - w->status_vmshared) : 0ULL);
send_END();
+#endif
- send_BEGIN(type, "fds_sockets", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.sockets);
- }
+ send_BEGIN(type, w->clean_name, "mem_usage", dt);
+ send_SET("rss", w->status_vmrss);
send_END();
- send_BEGIN(type, "fds_pipes", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.pipes);
- }
+ send_BEGIN(type, w->clean_name, "vmem_usage", dt);
+ send_SET("vmem", w->status_vmsize);
send_END();
- send_BEGIN(type, "fds_inotifies", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.inotifies);
- }
+ send_BEGIN(type, w->clean_name, "mem_page_faults", dt);
+ send_SET("minor", (kernel_uint_t)(w->minflt * minflt_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cminflt * cminflt_fix_ratio)) : 0ULL));
+ send_SET("major", (kernel_uint_t)(w->majflt * majflt_fix_ratio) + (include_exited_childs ? ((kernel_uint_t)(w->cmajflt * cmajflt_fix_ratio)) : 0ULL));
send_END();
- send_BEGIN(type, "fds_eventfds", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.eventfds);
- }
+#ifndef __FreeBSD__
+ send_BEGIN(type, w->clean_name, "swap_usage", dt);
+ send_SET("swap", w->status_vmswap);
send_END();
+#endif
- send_BEGIN(type, "fds_timerfds", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.timerfds);
+#ifndef __FreeBSD__
+ if (w->processes == 0) {
+ send_BEGIN(type, w->clean_name, "uptime", dt);
+ send_SET("uptime", 0);
+ send_END();
+
+ if (enable_detailed_uptime_charts) {
+ send_BEGIN(type, w->clean_name, "uptime_summary", dt);
+ send_SET("min", 0);
+ send_SET("avg", 0);
+ send_SET("max", 0);
+ send_END();
+ }
+ } else {
+ send_BEGIN(type, w->clean_name, "uptime", dt);
+ send_SET("uptime", (global_uptime > w->starttime) ? (global_uptime - w->starttime) : 0);
+ send_END();
+
+ if (enable_detailed_uptime_charts) {
+ send_BEGIN(type, w->clean_name, "uptime_summary", dt);
+ send_SET("min", w->uptime_min);
+ send_SET("avg", w->processes > 0 ? w->uptime_sum / w->processes : 0);
+ send_SET("max", w->uptime_max);
+ send_END();
+ }
}
- send_END();
+#endif
- send_BEGIN(type, "fds_signalfds", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.signalfds);
- }
+ send_BEGIN(type, w->clean_name, "disk_physical_io", dt);
+ send_SET("reads", w->io_storage_bytes_read);
+ send_SET("writes", w->io_storage_bytes_written);
send_END();
- send_BEGIN(type, "fds_eventpolls", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.eventpolls);
- }
+#ifndef __FreeBSD__
+ send_BEGIN(type, w->clean_name, "disk_logical_io", dt);
+ send_SET("reads", w->io_logical_bytes_read);
+ send_SET("writes", w->io_logical_bytes_written);
send_END();
-
- send_BEGIN(type, "fds_other", dt);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes))
- send_SET(w->name, w->openfds.other);
+#endif
+ if (enable_file_charts) {
+ send_BEGIN(type, w->clean_name, "fds_open_limit", dt);
+ send_SET("limit", w->max_open_files_percent * 100.0);
+ send_END();
+
+ send_BEGIN(type, w->clean_name, "fds_open", dt);
+ send_SET("files", w->openfds.files);
+ send_SET("sockets", w->openfds.sockets);
+ send_SET("pipes", w->openfds.sockets);
+ send_SET("inotifies", w->openfds.inotifies);
+ send_SET("event", w->openfds.eventfds);
+ send_SET("timer", w->openfds.timerfds);
+ send_SET("signal", w->openfds.signalfds);
+ send_SET("eventpolls", w->openfds.eventpolls);
+ send_SET("other", w->openfds.other);
+ send_END();
}
- send_END();
}
}
@@ -3986,312 +3868,146 @@ static void send_collected_data_to_netdata(struct target *root, const char *type
// ----------------------------------------------------------------------------
// generate the charts
-static void send_charts_updates_to_netdata(struct target *root, const char *type, const char *title)
+static void send_charts_updates_to_netdata(struct target *root, const char *type, const char *lbl_name, const char *title)
{
struct target *w;
- int newly_added = 0;
- for(w = root ; w ; w = w->next) {
- if (w->target) continue;
-
- if(unlikely(w->processes && (debug_enabled || w->debug_enabled))) {
- struct pid_on_target *pid_on_target;
-
- fprintf(stderr, "apps.plugin: target '%s' has aggregated %u process%s:", w->name, w->processes, (w->processes == 1)?"":"es");
-
- for(pid_on_target = w->root_pid; pid_on_target; pid_on_target = pid_on_target->next) {
- fprintf(stderr, " %d", pid_on_target->pid);
+ if (debug_enabled) {
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->target && w->processes)) {
+ struct pid_on_target *pid_on_target;
+ fprintf(stderr, "apps.plugin: target '%s' has aggregated %u process(es):", w->name, w->processes);
+ for (pid_on_target = w->root_pid; pid_on_target; pid_on_target = pid_on_target->next) {
+ fprintf(stderr, " %d", pid_on_target->pid);
+ }
+ fputc('\n', stderr);
}
-
- fputc('\n', stderr);
}
-
- if (!w->exposed && w->processes) {
- newly_added++;
- w->exposed = 1;
- if (debug_enabled || w->debug_enabled)
- debug_log_int("%s just added - regenerating charts.", w->name);
- }
- }
-
- // nothing more to show
- if(!newly_added && show_guest_time == show_guest_time_old) return;
-
- // we have something new to show
- // update the charts
- fprintf(stdout, "CHART %s.cpu '' '%s CPU Time (100%% = 1 core)' 'percentage' cpu %s.cpu stacked 20001 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu %s\n", w->name, time_factor * RATES_DETAIL / 100, w->hidden ? "hidden" : "");
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.mem '' '%s Real Memory (w/o shared)' 'MiB' mem %s.mem stacked 20003 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.rss '' '%s Resident Set Size (w/shared)' 'MiB' mem %s.rss stacked 20004 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L);
}
- APPS_PLUGIN_FUNCTIONS();
- fprintf(stdout, "CHART %s.vmem '' '%s Virtual Memory Size' 'MiB' mem %s.vmem stacked 20005 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L);
- }
- APPS_PLUGIN_FUNCTIONS();
+ for (w = root; w; w = w->next) {
+ if (likely(w->exposed || (!w->processes && !w->is_other)))
+ continue;
- fprintf(stdout, "CHART %s.threads '' '%s Threads' 'threads' processes %s.threads stacked 20006 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
+ w->exposed = 1;
- fprintf(stdout, "CHART %s.processes '' '%s Processes' 'processes' processes %s.processes stacked 20007 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_cpu_utilization '' '%s CPU utilization (100%% = 1 core)' 'percentage' cpu %s.cpu_utilization stacked 20001 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION user '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU);
+ fprintf(stdout, "DIMENSION system '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU);
#ifndef __FreeBSD__
- fprintf(stdout, "CHART %s.uptime '' '%s Carried Over Uptime' 'seconds' processes %s.uptime line 20008 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- if (enable_detailed_uptime_charts) {
- fprintf(stdout, "CHART %s.uptime_min '' '%s Minimum Uptime' 'seconds' processes %s.uptime_min line 20009 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.uptime_avg '' '%s Average Uptime' 'seconds' processes %s.uptime_avg line 20010 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.uptime_max '' '%s Maximum Uptime' 'seconds' processes %s.uptime_max line 20011 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
- }
+ if (enable_guest_charts) {
+ fprintf(stdout, "CHART %s.%s_cpu_guest_utilization '' '%s CPU guest utlization (100%% = 1 core)' 'percentage' cpu %s.cpu_guest_utilization line 20005 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION guest '' absolute 1 %llu\n", time_factor * RATES_DETAIL / 100LLU);
+ }
+
+ fprintf(stdout, "CHART %s.%s_cpu_context_switches '' '%s CPU context switches' 'switches/s' cpu %s.cpu_context_switches stacked 20010 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION voluntary '' absolute 1 %llu\n", RATES_DETAIL);
+ fprintf(stdout, "DIMENSION involuntary '' absolute 1 %llu\n", RATES_DETAIL);
+
+ fprintf(stdout, "CHART %s.%s_mem_private_usage '' '%s memory usage without shared' 'MiB' mem %s.mem_private_usage area 20050 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION mem '' absolute %ld %ld\n", 1L, 1024L);
#endif
- fprintf(stdout, "CHART %s.cpu_user '' '%s CPU User Time (100%% = 1 core)' 'percentage' cpu %s.cpu_user stacked 20020 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, time_factor * RATES_DETAIL / 100LLU);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_mem_usage '' '%s memory RSS usage' 'MiB' mem %s.mem_usage area 20055 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION rss '' absolute %ld %ld\n", 1L, 1024L);
- fprintf(stdout, "CHART %s.cpu_system '' '%s CPU System Time (100%% = 1 core)' 'percentage' cpu %s.cpu_system stacked 20021 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, time_factor * RATES_DETAIL / 100LLU);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_mem_page_faults '' '%s memory page faults' 'pgfaults/s' mem %s.mem_page_faults stacked 20060 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION major '' absolute 1 %llu\n", RATES_DETAIL);
+ fprintf(stdout, "DIMENSION minor '' absolute 1 %llu\n", RATES_DETAIL);
- if(show_guest_time) {
- fprintf(stdout, "CHART %s.cpu_guest '' '%s CPU Guest Time (100%% = 1 core)' 'percentage' cpu %s.cpu_guest stacked 20022 %d\n", type, title, type, update_every);
- for (w = root; w; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, time_factor * RATES_DETAIL / 100LLU);
- }
- APPS_PLUGIN_FUNCTIONS();
- }
+ fprintf(stdout, "CHART %s.%s_vmem_usage '' '%s virtual memory size' 'MiB' mem %s.vmem_usage line 20065 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION vmem '' absolute %ld %ld\n", 1L, 1024L);
#ifndef __FreeBSD__
- fprintf(stdout, "CHART %s.voluntary_ctxt_switches '' '%s Voluntary Context Switches' 'switches/s' cpu %s.voluntary_ctxt_switches stacked 20023 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.involuntary_ctxt_switches '' '%s Involuntary Context Switches' 'switches/s' cpu %s.involuntary_ctxt_switches stacked 20024 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_swap_usage '' '%s swap usage' 'MiB' mem %s.swap_usage area 20065 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION swap '' absolute %ld %ld\n", 1L, 1024L);
#endif
#ifndef __FreeBSD__
- fprintf(stdout, "CHART %s.swap '' '%s Swap Memory' 'MiB' swap %s.swap stacked 20011 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute %ld %ld\n", w->name, 1L, 1024L);
- }
- APPS_PLUGIN_FUNCTIONS();
-#endif
-
- fprintf(stdout, "CHART %s.major_faults '' '%s Major Page Faults (swap read)' 'page faults/s' swap %s.major_faults stacked 20012 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.minor_faults '' '%s Minor Page Faults' 'page faults/s' mem %s.minor_faults stacked 20011 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
-#ifdef __FreeBSD__
- // FIXME: same metric name as in Linux but different units.
- fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'blocks/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'blocks/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'KiB/s' disk %s.disk_physical_io area 20100 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", 1024LLU * RATES_DETAIL);
+ fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", 1024LLU * RATES_DETAIL);
+
+ fprintf(stdout, "CHART %s.%s_disk_logical_io '' '%s disk logical IO' 'KiB/s' disk %s.disk_logical_io area 20105 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", 1024LLU * RATES_DETAIL);
+ fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", 1024LLU * RATES_DETAIL);
#else
- fprintf(stdout, "CHART %s.preads '' '%s Disk Reads' 'KiB/s' disk %s.preads stacked 20002 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.pwrites '' '%s Disk Writes' 'KiB/s' disk %s.pwrites stacked 20002 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.lreads '' '%s Disk Logical Reads' 'KiB/s' disk %s.lreads stacked 20042 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.lwrites '' '%s I/O Logical Writes' 'KiB/s' disk %s.lwrites stacked 20042 %d\n", type, title, type, update_every);
- for (w = root; w ; w = w->next) {
- if(unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 %llu\n", w->name, 1024LLU * RATES_DETAIL);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_disk_physical_io '' '%s disk physical IO' 'blocks/s' disk %s.disk_physical_block_io area 20100 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION reads '' absolute 1 %llu\n", RATES_DETAIL);
+ fprintf(stdout, "DIMENSION writes '' absolute -1 %llu\n", RATES_DETAIL);
#endif
- if(enable_file_charts) {
- fprintf(stdout, "CHART %s.fds_open_limit '' '%s Open File Descriptors Limit' '%%' fds %s.fds_open_limit line 20050 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 100\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_open '' '%s Open File Descriptors' 'fds' fds %s.fds_open stacked 20051 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_files '' '%s Open Files' 'fds' fds %s.fds_files stacked 20052 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_sockets '' '%s Open Sockets' 'fds' fds %s.fds_sockets stacked 20053 %d\n",
- type, title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_pipes '' '%s Pipes' 'fds' fds %s.fds_pipes stacked 20054 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_inotifies '' '%s iNotify File Descriptors' 'fds' fds %s.fds_inotifies stacked 20055 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
+ fprintf(stdout, "CHART %s.%s_processes '' '%s processes' 'processes' processes %s.processes line 20150 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION processes '' absolute 1 1\n");
+
+ fprintf(stdout, "CHART %s.%s_threads '' '%s threads' 'threads' processes %s.threads line 20155 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION threads '' absolute 1 1\n");
+
+ if (enable_file_charts) {
+ fprintf(stdout, "CHART %s.%s_fds_open_limit '' '%s open file descriptors limit' '%%' fds %s.fds_open_limit line 20200 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION limit '' absolute 1 100\n");
+
+ fprintf(stdout, "CHART %s.%s_fds_open '' '%s open files descriptors' 'fds' fds %s.fds_open stacked 20210 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION files '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION sockets '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION pipes '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION inotifies '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION event '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION timer '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION signal '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION eventpolls '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION other '' absolute 1 1\n");
+ }
- fprintf(stdout, "CHART %s.fds_eventfds '' '%s Event File Descriptors' 'fds' fds %s.fds_eventfds stacked 20056 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_timerfds '' '%s Timer File Descriptors' 'fds' fds %s.fds_timerfds stacked 20057 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_signalfds '' '%s Signal File Descriptors' 'fds' fds %s.fds_signalfds stacked 20058 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_eventpolls '' '%s Event Poll File Descriptors' 'fds' fds %s.fds_eventpolls stacked 20059 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
-
- fprintf(stdout, "CHART %s.fds_other '' '%s Other File Descriptors' 'fds' fds %s.fds_other stacked 20060 %d\n", type,
- title, type, update_every);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' absolute 1 1\n", w->name);
- }
- APPS_PLUGIN_FUNCTIONS();
+#ifndef __FreeBSD__
+ fprintf(stdout, "CHART %s.%s_uptime '' '%s uptime' 'seconds' uptime %s.uptime line 20250 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION uptime '' absolute 1 1\n");
+
+ if (enable_detailed_uptime_charts) {
+ fprintf(stdout, "CHART %s.%s_uptime_summary '' '%s uptime summary' 'seconds' uptime %s.uptime_summary area 20255 %d\n", type, w->clean_name, title, type, update_every);
+ fprintf(stdout, "CLABEL '%s' '%s' 1\n", lbl_name, w->name);
+ fprintf(stdout, "CLABEL_COMMIT\n");
+ fprintf(stdout, "DIMENSION min '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION avg '' absolute 1 1\n");
+ fprintf(stdout, "DIMENSION max '' absolute 1 1\n");
+ }
+#endif
}
}
-
#ifndef __FreeBSD__
static void send_proc_states_count(usec_t dt)
{
@@ -4310,7 +4026,7 @@ static void send_proc_states_count(usec_t dt)
}
// send process state count
- send_BEGIN("system", "processes_state", dt);
+ fprintf(stdout, "BEGIN system.processes_state %" PRIu64 "\n", dt);
for (proc_state i = PROC_STATUS_RUNNING; i < PROC_STATUS_END; i++) {
send_SET(proc_states[i], proc_state_count[i]);
}
@@ -4575,7 +4291,7 @@ static int check_capabilities() {
}
#endif
-static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER;
+static netdata_mutex_t apps_and_stdout_mutex = NETDATA_MUTEX_INITIALIZER;
#define PROCESS_FILTER_CATEGORY "category:"
#define PROCESS_FILTER_USER "user:"
@@ -4629,8 +4345,8 @@ static void get_MemTotal(void) {
}
static void apps_plugin_function_processes_help(const char *transaction) {
- pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600);
- fprintf(stdout, "%s",
+ BUFFER *wb = buffer_create(0, NULL);
+ buffer_sprintf(wb, "%s",
"apps.plugin / processes\n"
"\n"
"Function `processes` presents all the currently running processes of the system.\n"
@@ -4660,7 +4376,9 @@ static void apps_plugin_function_processes_help(const char *transaction) {
"\n"
"Filters can be combined. Each filter can be given only one time.\n"
);
- pluginsd_function_result_end_to_stdout();
+
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb);
+ buffer_free(wb);
}
#define add_value_field_llu_with_max(wb, key, value) do { \
@@ -4675,7 +4393,7 @@ static void apps_plugin_function_processes_help(const char *transaction) {
buffer_json_add_array_item_double(wb, _tmp); \
} while(0)
-static void function_processes(const char *transaction, char *function __maybe_unused, char *line_buffer __maybe_unused, int line_max __maybe_unused, int timeout __maybe_unused) {
+static void function_processes(const char *transaction, char *function __maybe_unused, int timeout __maybe_unused, bool *cancelled __maybe_unused) {
struct pid_stat *p;
char *words[PLUGINSD_MAX_WORDS] = { NULL };
@@ -4696,21 +4414,24 @@ static void function_processes(const char *transaction, char *function __maybe_u
if(!category && strncmp(keyword, PROCESS_FILTER_CATEGORY, strlen(PROCESS_FILTER_CATEGORY)) == 0) {
category = find_target_by_name(apps_groups_root_target, &keyword[strlen(PROCESS_FILTER_CATEGORY)]);
if(!category) {
- pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, "No category with that name found.");
+ pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_BAD_REQUEST,
+ "No category with that name found.");
return;
}
}
else if(!user && strncmp(keyword, PROCESS_FILTER_USER, strlen(PROCESS_FILTER_USER)) == 0) {
user = find_target_by_name(users_root_target, &keyword[strlen(PROCESS_FILTER_USER)]);
if(!user) {
- pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, "No user with that name found.");
+ pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_BAD_REQUEST,
+ "No user with that name found.");
return;
}
}
else if(strncmp(keyword, PROCESS_FILTER_GROUP, strlen(PROCESS_FILTER_GROUP)) == 0) {
group = find_target_by_name(groups_root_target, &keyword[strlen(PROCESS_FILTER_GROUP)]);
if(!group) {
- pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, "No group with that name found.");
+ pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_BAD_REQUEST,
+ "No group with that name found.");
return;
}
}
@@ -4736,13 +4457,12 @@ static void function_processes(const char *transaction, char *function __maybe_u
else {
char msg[PLUGINSD_LINE_MAX];
snprintfz(msg, PLUGINSD_LINE_MAX, "Invalid parameter '%s'", keyword);
- pluginsd_function_json_error(transaction, HTTP_RESP_BAD_REQUEST, msg);
+ pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_BAD_REQUEST, msg);
return;
}
}
time_t expires = now_realtime_sec() + update_every;
- pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires);
unsigned int cpu_divisor = time_factor * RATES_DETAIL / 100;
unsigned int memory_divisor = 1024;
@@ -5096,13 +4816,13 @@ static void function_processes(const char *transaction, char *function __maybe_u
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
2, "KiB/s", LReads_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "LWrites", "Logical I/O Writes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
RRDF_FIELD_VISUAL_BAR,
RRDF_FIELD_TRANSFORM_NUMBER,
2, "KiB/s", LWrites_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
#endif
// I/O calls
@@ -5110,12 +4830,12 @@ static void function_processes(const char *transaction, char *function __maybe_u
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2,
"calls/s", RCalls_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "WCalls", "I/O Write Calls", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 2,
"calls/s", WCalls_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
// minor page faults
buffer_rrdf_table_add_field(wb, field_id++, "MinFlt", "Minor Page Faults/s", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
@@ -5153,7 +4873,7 @@ static void function_processes(const char *transaction, char *function __maybe_u
RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR,
RRDF_FIELD_TRANSFORM_NUMBER, 2, "pgflts/s", TMajFlt_max, RRDF_FIELD_SORT_DESCENDING,
NULL, RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
// open file descriptors
buffer_rrdf_table_add_field(wb, field_id++, "FDsLimitPercent", "Percentage of Open Descriptors vs Limits",
@@ -5165,24 +4885,24 @@ static void function_processes(const char *transaction, char *function __maybe_u
RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR,
RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", FDs_max, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "Files", "Open Files", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0,
"fds",
Files_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "Pipes", "Open Pipes", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0,
"fds",
Pipes_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "Sockets", "Open Sockets", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0,
"fds", Sockets_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "iNotiFDs", "Open iNotify Descriptors",
RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR,
RRDF_FIELD_TRANSFORM_NUMBER, 0, "fds", iNotiFDs_max, RRDF_FIELD_SORT_DESCENDING,
@@ -5219,12 +4939,12 @@ static void function_processes(const char *transaction, char *function __maybe_u
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0,
"processes", Processes_max, RRDF_FIELD_SORT_DESCENDING, NULL,
RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "Threads", "Threads", RRDF_FIELD_TYPE_BAR_WITH_INTEGER,
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, 0,
"threads", Threads_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_SUM,
RRDF_FIELD_FILTER_RANGE,
- RRDF_FIELD_OPTS_VISIBLE, NULL);
+ RRDF_FIELD_OPTS_NONE, NULL);
buffer_rrdf_table_add_field(wb, field_id++, "Uptime", "Uptime in seconds", RRDF_FIELD_TYPE_DURATION,
RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_DURATION_S, 2,
"seconds", Uptime_max, RRDF_FIELD_SORT_DESCENDING, NULL, RRDF_FIELD_SUMMARY_MAX,
@@ -5520,87 +5240,19 @@ static void function_processes(const char *transaction, char *function __maybe_u
buffer_json_member_add_time_t(wb, "expires", expires);
buffer_json_finalize(wb);
- fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout);
- buffer_free(wb);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires, wb);
- pluginsd_function_result_end_to_stdout();
+ buffer_free(wb);
}
static bool apps_plugin_exit = false;
-static void *reader_main(void *arg __maybe_unused) {
- char buffer[PLUGINSD_LINE_MAX + 1];
-
- char *s = NULL;
- while(!apps_plugin_exit && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) {
-
- char *words[PLUGINSD_MAX_WORDS] = { NULL };
- size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS);
-
- const char *keyword = get_word(words, num_words, 0);
-
- if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) {
- char *transaction = get_word(words, num_words, 1);
- char *timeout_s = get_word(words, num_words, 2);
- char *function = get_word(words, num_words, 3);
-
- if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) {
- netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.",
- keyword,
- transaction?transaction:"(unset)",
- timeout_s?timeout_s:"(unset)",
- function?function:"(unset)");
- }
- else {
- int timeout = str2i(timeout_s);
- if(timeout <= 0) timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT;
-
-// internal_error(true, "Received function '%s', transaction '%s', timeout %d", function, transaction, timeout);
-
- netdata_mutex_lock(&mutex);
-
- if(strncmp(function, "processes", strlen("processes")) == 0)
- function_processes(transaction, function, buffer, PLUGINSD_LINE_MAX + 1, timeout);
- else
- pluginsd_function_json_error(transaction, HTTP_RESP_NOT_FOUND, "No function with this name found in apps.plugin.");
-
- fflush(stdout);
- netdata_mutex_unlock(&mutex);
-
-// internal_error(true, "Done with function '%s', transaction '%s', timeout %d", function, transaction, timeout);
- }
- }
- else
- netdata_log_error("Received unknown command: %s", keyword?keyword:"(unset)");
- }
-
- if(!s || feof(stdin) || ferror(stdin)) {
- apps_plugin_exit = true;
- netdata_log_error("Received error on stdin.");
- }
-
- exit(1);
- return NULL;
-}
-
int main(int argc, char **argv) {
- // debug_flags = D_PROCFILE;
- stderror = stderr;
-
clocks_init();
+ nd_log_initialize_for_external_plugins("apps.plugin");
pagesize = (size_t)sysconf(_SC_PAGESIZE);
- // set the name for logging
- program_name = "apps.plugin";
-
- // disable syslog for apps.plugin
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
-
bool send_resource_usage = true;
{
const char *s = getenv("NETDATA_INTERNALS_MONITORING");
@@ -5686,10 +5338,17 @@ int main(int argc, char **argv) {
all_pids = callocz(sizeof(struct pid_stat *), (size_t) pid_max + 1);
- netdata_thread_t reader_thread;
- netdata_thread_create(&reader_thread, "APPS_READER", NETDATA_THREAD_OPTION_DONT_LOG, reader_main, NULL);
- netdata_mutex_lock(&mutex);
+ // ------------------------------------------------------------------------
+ // the event loop for functions
+
+ struct functions_evloop_globals *wg =
+ functions_evloop_init(1, "APPS", &apps_and_stdout_mutex, &apps_plugin_exit);
+
+ functions_evloop_add_function(wg, "processes", function_processes, PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT);
+
+ // ------------------------------------------------------------------------
+ netdata_mutex_lock(&apps_and_stdout_mutex);
APPS_PLUGIN_GLOBAL_FUNCTIONS();
usec_t step = update_every * USEC_PER_SEC;
@@ -5697,7 +5356,7 @@ int main(int argc, char **argv) {
heartbeat_t hb;
heartbeat_init(&hb);
for(; !apps_plugin_exit ; global_iterations_counter++) {
- netdata_mutex_unlock(&mutex);
+ netdata_mutex_unlock(&apps_and_stdout_mutex);
#ifdef NETDATA_PROFILING
#warning "compiling for profiling"
@@ -5708,17 +5367,15 @@ int main(int argc, char **argv) {
#else
usec_t dt = heartbeat_next(&hb, step);
#endif
- netdata_mutex_lock(&mutex);
+ netdata_mutex_lock(&apps_and_stdout_mutex);
struct pollfd pollfd = { .fd = fileno(stdout), .events = POLLERR };
if (unlikely(poll(&pollfd, 1, 0) < 0)) {
- netdata_mutex_unlock(&mutex);
- netdata_thread_cancel(reader_thread);
+ netdata_mutex_unlock(&apps_and_stdout_mutex);
fatal("Cannot check if a pipe is available");
}
if (unlikely(pollfd.revents & POLLERR)) {
- netdata_mutex_unlock(&mutex);
- netdata_thread_cancel(reader_thread);
+ netdata_mutex_unlock(&apps_and_stdout_mutex);
fatal("Received error on read pipe.");
}
@@ -5728,8 +5385,7 @@ int main(int argc, char **argv) {
if(!collect_data_for_all_processes()) {
netdata_log_error("Cannot collect /proc data for running processes. Disabling apps.plugin...");
printf("DISABLE\n");
- netdata_mutex_unlock(&mutex);
- netdata_thread_cancel(reader_thread);
+ netdata_mutex_unlock(&apps_and_stdout_mutex);
exit(1);
}
@@ -5743,21 +5399,18 @@ int main(int argc, char **argv) {
send_proc_states_count(dt);
#endif
- // this is smart enough to show only newly added apps, when needed
- send_charts_updates_to_netdata(apps_groups_root_target, "apps", "Apps");
- if(likely(enable_users_charts))
- send_charts_updates_to_netdata(users_root_target, "users", "Users");
+ send_charts_updates_to_netdata(apps_groups_root_target, "app", "app_group", "Apps");
+ send_collected_data_to_netdata(apps_groups_root_target, "app", dt);
- if(likely(enable_groups_charts))
- send_charts_updates_to_netdata(groups_root_target, "groups", "User Groups");
-
- send_collected_data_to_netdata(apps_groups_root_target, "apps", dt);
-
- if(likely(enable_users_charts))
- send_collected_data_to_netdata(users_root_target, "users", dt);
+ if (enable_users_charts) {
+ send_charts_updates_to_netdata(users_root_target, "user", "user", "Users");
+ send_collected_data_to_netdata(users_root_target, "user", dt);
+ }
- if(likely(enable_groups_charts))
- send_collected_data_to_netdata(groups_root_target, "groups", dt);
+ if (enable_groups_charts) {
+ send_charts_updates_to_netdata(groups_root_target, "usergroup", "user_group", "User Groups");
+ send_collected_data_to_netdata(groups_root_target, "usergroup", dt);
+ }
fflush(stdout);
@@ -5765,5 +5418,5 @@ int main(int argc, char **argv) {
debug_log("done Loop No %zu", global_iterations_counter);
}
- netdata_mutex_unlock(&mutex);
+ netdata_mutex_unlock(&apps_and_stdout_mutex);
}
diff --git a/collectors/apps.plugin/integrations/applications.md b/collectors/apps.plugin/integrations/applications.md
new file mode 100644
index 00000000000000..e5219fcc2e226e
--- /dev/null
+++ b/collectors/apps.plugin/integrations/applications.md
@@ -0,0 +1,114 @@
+
+
+# Applications
+
+
+
+
+
+Plugin: apps.plugin
+Module: apps
+
+
+
+## Overview
+
+Monitor Applications for optimal software performance and resource usage.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per applications group
+
+These metrics refer to the application group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.cpu_utilization | user, system | percentage |
+| app.cpu_guest_utilization | guest | percentage |
+| app.cpu_context_switches | voluntary, involuntary | switches/s |
+| app.mem_usage | rss | MiB |
+| app.mem_private_usage | mem | MiB |
+| app.vmem_usage | vmem | MiB |
+| app.mem_page_faults | minor, major | pgfaults/s |
+| app.swap_usage | swap | MiB |
+| app.disk_physical_io | reads, writes | KiB/s |
+| app.disk_logical_io | reads, writes | KiB/s |
+| app.processes | processes | processes |
+| app.threads | threads | threads |
+| app.fds_open_limit | limit | percentage |
+| app.fds_open | files, sockets, pipes, inotifies, event, timer, signal, eventpolls, other | fds |
+| app.uptime | uptime | seconds |
+| app.uptime_summary | min, avg, max | seconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/apps.plugin/integrations/user_groups.md b/collectors/apps.plugin/integrations/user_groups.md
new file mode 100644
index 00000000000000..4ccbfc95fb476c
--- /dev/null
+++ b/collectors/apps.plugin/integrations/user_groups.md
@@ -0,0 +1,114 @@
+
+
+# User Groups
+
+
+
+
+
+Plugin: apps.plugin
+Module: groups
+
+
+
+## Overview
+
+This integration monitors resource utilization on a user groups context.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per user group
+
+These metrics refer to the user group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| user_group | The name of the user group. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| usergroup.cpu_utilization | user, system | percentage |
+| usergroup.cpu_guest_utilization | guest | percentage |
+| usergroup.cpu_context_switches | voluntary, involuntary | switches/s |
+| usergroup.mem_usage | rss | MiB |
+| usergroup.mem_private_usage | mem | MiB |
+| usergroup.vmem_usage | vmem | MiB |
+| usergroup.mem_page_faults | minor, major | pgfaults/s |
+| usergroup.swap_usage | swap | MiB |
+| usergroup.disk_physical_io | reads, writes | KiB/s |
+| usergroup.disk_logical_io | reads, writes | KiB/s |
+| usergroup.processes | processes | processes |
+| usergroup.threads | threads | threads |
+| usergroup.fds_open_limit | limit | percentage |
+| usergroup.fds_open | files, sockets, pipes, inotifies, event, timer, signal, eventpolls, other | fds |
+| usergroup.uptime | uptime | seconds |
+| usergroup.uptime_summary | min, avg, max | seconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/apps.plugin/integrations/users.md b/collectors/apps.plugin/integrations/users.md
new file mode 100644
index 00000000000000..c151fd8a2d45f9
--- /dev/null
+++ b/collectors/apps.plugin/integrations/users.md
@@ -0,0 +1,114 @@
+
+
+# Users
+
+
+
+
+
+Plugin: apps.plugin
+Module: users
+
+
+
+## Overview
+
+This integration monitors resource utilization on a user context.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per user
+
+These metrics refer to the user.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| user | The name of the user. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| user.cpu_utilization | user, system | percentage |
+| user.cpu_guest_utilization | guest | percentage |
+| user.cpu_context_switches | voluntary, involuntary | switches/s |
+| user.mem_usage | rss | MiB |
+| user.mem_private_usage | mem | MiB |
+| user.vmem_usage | vmem | MiB |
+| user.mem_page_faults | minor, major | pgfaults/s |
+| user.swap_usage | swap | MiB |
+| user.disk_physical_io | reads, writes | KiB/s |
+| user.disk_logical_io | reads, writes | KiB/s |
+| user.processes | processes | processes |
+| user.threads | threads | threads |
+| user.fds_open_limit | limit | percentage |
+| user.fds_open | files, sockets, pipes, inotifies, event, timer, signal, eventpolls, other | fds |
+| user.uptime | uptime | seconds |
+| user.uptime_summary | min, avg, max | seconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/apps.plugin/metadata.yaml b/collectors/apps.plugin/metadata.yaml
index 9794a5ea2064e4..f5f22853a53c41 100644
--- a/collectors/apps.plugin/metadata.yaml
+++ b/collectors/apps.plugin/metadata.yaml
@@ -67,160 +67,123 @@ modules:
description: ""
availability: []
scopes:
- - name: global
- description: ""
- labels: []
+ - name: applications group
+ description: These metrics refer to the application group.
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.cpu
- description: Apps CPU Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.cpu_user
- description: Apps CPU User Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.cpu_system
- description: Apps CPU System Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.cpu_guest
- description: Apps CPU Guest Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.mem
- description: Apps Real Memory (w/o shared)
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.rss
- description: Apps Resident Set Size (w/shared)
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.vmem
- description: Apps Virtual Memory Size
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.swap
- description: Apps Swap Memory
- unit: "MiB"
+ - name: app.cpu_utilization
+ description: Apps CPU utilization (100% = 1 core)
+ unit: percentage
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.major_faults
- description: Apps Major Page Faults (swap read)
- unit: "page faults/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.minor_faults
- description: Apps Minor Page Faults (swap read)
- unit: "page faults/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.preads
- description: Apps Disk Reads
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.pwrites
- description: Apps Disk Writes
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.lreads
- description: Apps Disk Logical Reads
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.lwrites
- description: Apps I/O Logical Writes
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.threads
- description: Apps Threads
- unit: "threads"
- chart_type: stacked
+ - name: user
+ - name: system
+ - name: app.cpu_guest_utilization
+ description: Apps CPU guest utilization (100% = 1 core)
+ unit: percentage
+ chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.processes
- description: Apps Processes
- unit: "processes"
+ - name: guest
+ - name: app.cpu_context_switches
+ description: Apps CPU context switches
+ unit: switches/s
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.voluntary_ctxt_switches
- description: Apps Voluntary Context Switches
- unit: "processes"
- chart_type: stacked
+ - name: voluntary
+ - name: involuntary
+ - name: app.mem_usage
+ description: Apps memory RSS usage
+ unit: MiB
+ chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.involuntary_ctxt_switches
- description: Apps Involuntary Context Switches
- unit: "processes"
+ - name: rss
+ - name: app.mem_private_usage
+ description: Apps memory usage without shared
+ unit: MiB
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.uptime
- description: Apps Carried Over Uptime
- unit: "seconds"
+ - name: mem
+ - name: app.vmem_usage
+ description: Apps virtual memory size
+ unit: MiB
chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.uptime_min
- description: Apps Minimum Uptime
- unit: "seconds"
+ - name: vmem
+ - name: app.mem_page_faults
+ description: Apps memory page faults
+ unit: pgfaults/s
+ chart_type: stacked
+ dimensions:
+ - name: minor
+ - name: major
+ - name: app.swap_usage
+ description: Apps swap usage
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: swap
+ - name: app.disk_physical_io
+ description: Apps disk physical IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: reads
+ - name: writes
+ - name: app.disk_logical_io
+ description: Apps disk logical IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: reads
+ - name: writes
+ - name: app.processes
+ description: Apps processes
+ unit: processes
chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.uptime_avg
- description: Apps Average Uptime
- unit: "seconds"
+ - name: processes
+ - name: app.threads
+ description: Apps threads
+ unit: threads
chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.uptime_max
- description: Apps Maximum Uptime
- unit: "seconds"
+ - name: threads
+ - name: app.fds_open_limit
+ description: Apps open file descriptors limit
+ unit: percentage
chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.files
- description: Apps Open Files
- unit: "open files"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: apps.sockets
- description: Apps Open Sockets
- unit: "open sockets"
- chart_type: stacked
+ - name: limit
+ - name: app.fds_open
+ description: Apps open file descriptors
+ unit: fds
+ chart_type: stacked
+ dimensions:
+ - name: files
+ - name: sockets
+ - name: pipes
+ - name: inotifies
+ - name: event
+ - name: timer
+ - name: signal
+ - name: eventpolls
+ - name: other
+ - name: app.uptime
+ description: Apps uptime
+ unit: seconds
+ chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.pipes
- description: Apps Open Pipes
- unit: "open pipes"
- chart_type: stacked
+ - name: uptime
+ - name: app.uptime_summary
+ description: Apps uptime summary
+ unit: seconds
+ chart_type: area
dimensions:
- - name: a dimension per app group
+ - name: min
+ - name: avg
+ - name: max
- meta:
plugin_name: apps.plugin
module_name: groups
@@ -289,160 +252,123 @@ modules:
description: ""
availability: []
scopes:
- - name: global
- description: ""
- labels: []
+ - name: user group
+ description: These metrics refer to the user group.
+ labels:
+ - name: user_group
+ description: The name of the user group.
metrics:
- - name: groups.cpu
- description: User Groups CPU Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.cpu_user
- description: User Groups CPU User Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.cpu_system
- description: User Groups CPU System Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.cpu_guest
- description: User Groups CPU Guest Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.mem
- description: User Groups Real Memory (w/o shared)
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.rss
- description: User Groups Resident Set Size (w/shared)
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.vmem
- description: User Groups Virtual Memory Size
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.swap
- description: User Groups Swap Memory
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.major_faults
- description: User Groups Major Page Faults (swap read)
- unit: "page faults/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.minor_faults
- description: User Groups Page Faults (swap read)
- unit: "page faults/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.preads
- description: User Groups Disk Reads
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.pwrites
- description: User Groups Disk Writes
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.lreads
- description: User Groups Disk Logical Reads
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.lwrites
- description: User Groups I/O Logical Writes
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.threads
- description: User Groups Threads
- unit: "threads"
+ - name: usergroup.cpu_utilization
+ description: User Groups CPU utilization (100% = 1 core)
+ unit: percentage
chart_type: stacked
dimensions:
- - name: a dimension per user group
- - name: groups.processes
- description: User Groups Processes
- unit: "processes"
- chart_type: stacked
- dimensions:
- - name: a dimension per user group
- - name: groups.voluntary_ctxt_switches
- description: User Groups Voluntary Context Switches
- unit: "processes"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: groups.involuntary_ctxt_switches
- description: User Groups Involuntary Context Switches
- unit: "processes"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: groups.uptime
- description: User Groups Carried Over Uptime
- unit: "seconds"
+ - name: user
+ - name: system
+ - name: usergroup.cpu_guest_utilization
+ description: User Groups CPU guest utilization (100% = 1 core)
+ unit: percentage
chart_type: line
dimensions:
- - name: a dimension per user group
- - name: groups.uptime_min
- description: User Groups Minimum Uptime
- unit: "seconds"
+ - name: guest
+ - name: usergroup.cpu_context_switches
+ description: User Groups CPU context switches
+ unit: switches/s
+ chart_type: stacked
+ dimensions:
+ - name: voluntary
+ - name: involuntary
+ - name: usergroup.mem_usage
+ description: User Groups memory RSS usage
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: rss
+ - name: usergroup.mem_private_usage
+ description: User Groups memory usage without shared
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: mem
+ - name: usergroup.vmem_usage
+ description: User Groups virtual memory size
+ unit: MiB
chart_type: line
dimensions:
- - name: a dimension per user group
- - name: groups.uptime_avg
- description: User Groups Average Uptime
- unit: "seconds"
+ - name: vmem
+ - name: usergroup.mem_page_faults
+ description: User Groups memory page faults
+ unit: pgfaults/s
+ chart_type: stacked
+ dimensions:
+ - name: minor
+ - name: major
+ - name: usergroup.swap_usage
+ description: User Groups swap usage
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: swap
+ - name: usergroup.disk_physical_io
+ description: User Groups disk physical IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: reads
+ - name: writes
+ - name: usergroup.disk_logical_io
+ description: User Groups disk logical IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: reads
+ - name: writes
+ - name: usergroup.processes
+ description: User Groups processes
+ unit: processes
chart_type: line
dimensions:
- - name: a dimension per user group
- - name: groups.uptime_max
- description: User Groups Maximum Uptime
- unit: "seconds"
+ - name: processes
+ - name: usergroup.threads
+ description: User Groups threads
+ unit: threads
chart_type: line
dimensions:
- - name: a dimension per user group
- - name: groups.files
- description: User Groups Open Files
- unit: "open files"
- chart_type: stacked
+ - name: threads
+ - name: usergroup.fds_open_limit
+ description: User Groups open file descriptors limit
+ unit: percentage
+ chart_type: line
dimensions:
- - name: a dimension per user group
- - name: groups.sockets
- description: User Groups Open Sockets
- unit: "open sockets"
- chart_type: stacked
+ - name: limit
+ - name: usergroup.fds_open
+ description: User Groups open file descriptors
+ unit: fds
+ chart_type: stacked
+ dimensions:
+ - name: files
+ - name: sockets
+ - name: pipes
+ - name: inotifies
+ - name: event
+ - name: timer
+ - name: signal
+ - name: eventpolls
+ - name: other
+ - name: usergroup.uptime
+ description: User Groups uptime
+ unit: seconds
+ chart_type: line
dimensions:
- - name: a dimension per user group
- - name: groups.pipes
- description: User Groups Open Pipes
- unit: "open pipes"
- chart_type: stacked
+ - name: uptime
+ - name: usergroup.uptime_summary
+ description: User Groups uptime summary
+ unit: seconds
+ chart_type: area
dimensions:
- - name: a dimension per user group
+ - name: min
+ - name: avg
+ - name: max
- meta:
plugin_name: apps.plugin
module_name: users
@@ -509,157 +435,120 @@ modules:
description: ""
availability: []
scopes:
- - name: global
- description: ""
- labels: []
+ - name: user
+ description: These metrics refer to the user.
+ labels:
+ - name: user
+ description: The name of the user.
metrics:
- - name: users.cpu
- description: Users CPU Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.cpu_user
- description: Users CPU User Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.cpu_system
- description: Users CPU System Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.cpu_guest
- description: Users CPU Guest Time (100% = 1 core)
- unit: "percentage"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.mem
- description: Users Real Memory (w/o shared)
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.rss
- description: Users Resident Set Size (w/shared)
- unit: "MiB"
+ - name: user.cpu_utilization
+ description: User CPU utilization (100% = 1 core)
+ unit: percentage
chart_type: stacked
dimensions:
- - name: a dimension per user
- - name: users.vmem
- description: Users Virtual Memory Size
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.swap
- description: Users Swap Memory
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.major_faults
- description: Users Major Page Faults (swap read)
- unit: "page faults/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.minor_faults
- description: Users Page Faults (swap read)
- unit: "page faults/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.preads
- description: Users Disk Reads
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.pwrites
- description: Users Disk Writes
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.lreads
- description: Users Disk Logical Reads
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.lwrites
- description: Users I/O Logical Writes
- unit: "KiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.threads
- description: Users Threads
- unit: "threads"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.processes
- description: Users Processes
- unit: "processes"
- chart_type: stacked
- dimensions:
- - name: a dimension per user
- - name: users.voluntary_ctxt_switches
- description: Users Voluntary Context Switches
- unit: "processes"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: users.involuntary_ctxt_switches
- description: Users Involuntary Context Switches
- unit: "processes"
- chart_type: stacked
- dimensions:
- - name: a dimension per app group
- - name: users.uptime
- description: Users Carried Over Uptime
- unit: "seconds"
+ - name: user
+ - name: system
+ - name: user.cpu_guest_utilization
+ description: User CPU guest utilization (100% = 1 core)
+ unit: percentage
chart_type: line
dimensions:
- - name: a dimension per user
- - name: users.uptime_min
- description: Users Minimum Uptime
- unit: "seconds"
+ - name: guest
+ - name: user.cpu_context_switches
+ description: User CPU context switches
+ unit: switches/s
+ chart_type: stacked
+ dimensions:
+ - name: voluntary
+ - name: involuntary
+ - name: user.mem_usage
+ description: User memory RSS usage
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: rss
+ - name: user.mem_private_usage
+ description: User memory usage without shared
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: mem
+ - name: user.vmem_usage
+ description: User virtual memory size
+ unit: MiB
chart_type: line
dimensions:
- - name: a dimension per user
- - name: users.uptime_avg
- description: Users Average Uptime
- unit: "seconds"
+ - name: vmem
+ - name: user.mem_page_faults
+ description: User memory page faults
+ unit: pgfaults/s
+ chart_type: stacked
+ dimensions:
+ - name: minor
+ - name: major
+ - name: user.swap_usage
+ description: User swap usage
+ unit: MiB
+ chart_type: area
+ dimensions:
+ - name: swap
+ - name: user.disk_physical_io
+ description: User disk physical IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: reads
+ - name: writes
+ - name: user.disk_logical_io
+ description: User disk logical IO
+ unit: KiB/s
+ chart_type: area
+ dimensions:
+ - name: reads
+ - name: writes
+ - name: user.processes
+ description: User processes
+ unit: processes
chart_type: line
dimensions:
- - name: a dimension per user
- - name: users.uptime_max
- description: Users Maximum Uptime
- unit: "seconds"
+ - name: processes
+ - name: user.threads
+ description: User threads
+ unit: threads
chart_type: line
dimensions:
- - name: a dimension per user
- - name: users.files
- description: Users Open Files
- unit: "open files"
- chart_type: stacked
+ - name: threads
+ - name: user.fds_open_limit
+ description: User open file descriptors limit
+ unit: percentage
+ chart_type: line
dimensions:
- - name: a dimension per user
- - name: users.sockets
- description: Users Open Sockets
- unit: "open sockets"
- chart_type: stacked
+ - name: limit
+ - name: user.fds_open
+ description: User open file descriptors
+ unit: fds
+ chart_type: stacked
+ dimensions:
+ - name: files
+ - name: sockets
+ - name: pipes
+ - name: inotifies
+ - name: event
+ - name: timer
+ - name: signal
+ - name: eventpolls
+ - name: other
+ - name: user.uptime
+ description: User uptime
+ unit: seconds
+ chart_type: line
dimensions:
- - name: a dimension per user
- - name: users.pipes
- description: Users Open Pipes
- unit: "open pipes"
- chart_type: stacked
+ - name: uptime
+ - name: user.uptime_summary
+ description: User uptime summary
+ unit: seconds
+ chart_type: area
dimensions:
- - name: a dimension per user
+ - name: min
+ - name: avg
+ - name: max
diff --git a/collectors/cgroups.plugin/Makefile.am b/collectors/cgroups.plugin/Makefile.am
index 354b9fbdc10644..0f6062420b2d1d 100644
--- a/collectors/cgroups.plugin/Makefile.am
+++ b/collectors/cgroups.plugin/Makefile.am
@@ -3,11 +3,21 @@
AUTOMAKE_OPTIONS = subdir-objects
MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+CLEANFILES = \
+ cgroup-name.sh \
+ cgroup-network-helper.sh \
+ $(NULL)
+
+include $(top_srcdir)/build/subst.inc
+SUFFIXES = .in
+
dist_plugins_SCRIPTS = \
cgroup-name.sh \
cgroup-network-helper.sh \
$(NULL)
dist_noinst_DATA = \
+ cgroup-name.sh.in \
+ cgroup-network-helper.sh.in \
README.md \
$(NULL)
diff --git a/collectors/cgroups.plugin/README.md b/collectors/cgroups.plugin/README.md
index 2e4fff2303da78..ba6a20e5e05c2a 100644
--- a/collectors/cgroups.plugin/README.md
+++ b/collectors/cgroups.plugin/README.md
@@ -139,10 +139,10 @@ chart instead of `auto` to enable it permanently. For example:
You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero
metrics for all internal Netdata plugins.
-### Alarms
+### Alerts
-CPU and memory limits are watched and used to rise alarms. Memory usage for every cgroup is checked against `ram`
-and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alarms is available in `health.d/cgroups.conf`
+CPU and memory limits are watched and used to rise alerts. Memory usage for every cgroup is checked against `ram`
+and `ram+swap` limits. CPU usage for every cgroup is checked against `cpuset.cpus` and `cpu.cfs_period_us` + `cpu.cfs_quota_us` pair assigned for the cgroup. Configuration for the alerts is available in `health.d/cgroups.conf`
file.
## Monitoring systemd services
@@ -264,7 +264,7 @@ Network interfaces and cgroups (containers) are self-cleaned. When a network int
a few errors in error.log complaining about files it cannot find, but immediately:
1. It will detect this is a removed container or network interface
-2. It will freeze/pause all alarms for them
+2. It will freeze/pause all alerts for them
3. It will mark their charts as obsolete
4. Obsolete charts are not be offered on new dashboard sessions (so hit F5 and the charts are gone)
5. Existing dashboard sessions will continue to see them, but of course they will not refresh
diff --git a/collectors/cgroups.plugin/cgroup-charts.c b/collectors/cgroups.plugin/cgroup-charts.c
new file mode 100644
index 00000000000000..a89e8ac4545d3d
--- /dev/null
+++ b/collectors/cgroups.plugin/cgroup-charts.c
@@ -0,0 +1,1526 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "cgroup-internals.h"
+
+void update_cpu_utilization_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_cpu;
+
+ if (unlikely(!cg->st_cpu)) {
+ char *title;
+ char *context;
+ int prio;
+
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services CPU utilization (100%% = 1 core)";
+ context = "systemd.service.cpu.utilization";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD;
+ } else {
+ title = k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU)" : "CPU Usage (100%% = 1 core)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu";
+ prio = cgroup_containers_chart_priority;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_cpu = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ cg->st_cpu_rd_user = rrddim_add(chart, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
+ cg->st_cpu_rd_system = rrddim_add(chart, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
+ } else {
+ cg->st_cpu_rd_user = rrddim_add(chart, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL);
+ cg->st_cpu_rd_system = rrddim_add(chart, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL);
+ }
+ }
+
+ rrddim_set_by_pointer(chart, cg->st_cpu_rd_user, (collected_number)cg->cpuacct_stat.user);
+ rrddim_set_by_pointer(chart, cg->st_cpu_rd_system, (collected_number)cg->cpuacct_stat.system);
+ rrdset_done(chart);
+}
+
+void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_limit) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ RRDSET *chart = cg->st_cpu_limit;
+
+ if (unlikely(!cg->st_cpu_limit)) {
+ char *title = "CPU Usage within the limits";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit";
+ int prio = cgroup_containers_chart_priority - 1;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_cpu_limit = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_limit",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED))
+ rrddim_add(chart, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE);
+ else
+ rrddim_add(chart, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE);
+ cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100;
+ }
+
+ NETDATA_DOUBLE cpu_usage = 0;
+ cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100;
+ NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (cpu_limit * cgroup_update_every);
+
+ rrdset_isnot_obsolete___safe_from_collector_thread(chart);
+
+ rrddim_set(chart, "used", (cpu_used > 0) ? (collected_number)cpu_used : 0);
+
+ cg->prev_cpu_usage = cpu_usage;
+
+ rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, cpu_limit);
+ rrdset_done(chart);
+}
+
+void update_cpu_throttled_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ RRDSET *chart = cg->st_cpu_nr_throttled;
+
+ if (unlikely(!cg->st_cpu_nr_throttled)) {
+ char *title = "CPU Throttled Runnable Periods";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled";
+ int prio = cgroup_containers_chart_priority + 10;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_cpu_nr_throttled = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "throttled",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set(chart, "throttled", (collected_number)cg->cpuacct_cpu_throttling.nr_throttled_perc);
+ rrdset_done(chart);
+}
+
+void update_cpu_throttled_duration_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ RRDSET *chart = cg->st_cpu_throttled_time;
+
+ if (unlikely(!cg->st_cpu_throttled_time)) {
+ char *title = "CPU Throttled Time Duration";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration";
+ int prio = cgroup_containers_chart_priority + 15;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_cpu_throttled_time = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "throttled_duration",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "duration", (collected_number)cg->cpuacct_cpu_throttling.throttled_time);
+ rrdset_done(chart);
+}
+
+void update_cpu_shares_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ RRDSET *chart = cg->st_cpu_shares;
+
+ if (unlikely(!cg->st_cpu_shares)) {
+ char *title = "CPU Time Relative Share";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares";
+ int prio = cgroup_containers_chart_priority + 20;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_cpu_shares = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_shares",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "shares",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set(chart, "shares", (collected_number)cg->cpuacct_cpu_shares.shares);
+ rrdset_done(chart);
+}
+
+void update_cpu_per_core_usage_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ char id[RRD_ID_LENGTH_MAX + 1];
+ unsigned int i;
+
+ if (unlikely(!cg->st_cpu_per_core)) {
+ char *title = k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU) Per Core" : "CPU Usage (100%% = 1 core) Per Core";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core";
+ int prio = cgroup_containers_chart_priority + 100;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ cg->st_cpu_per_core = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_per_core",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels);
+
+ for (i = 0; i < cg->cpuacct_usage.cpus; i++) {
+ snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i);
+ rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL);
+ }
+ }
+
+ for (i = 0; i < cg->cpuacct_usage.cpus; i++) {
+ snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i);
+ rrddim_set(cg->st_cpu_per_core, id, (collected_number)cg->cpuacct_usage.cpu_percpu[i]);
+ }
+ rrdset_done(cg->st_cpu_per_core);
+}
+
+void update_mem_usage_detailed_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_mem;
+
+ if (unlikely(!cg->st_mem)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Memory";
+ context = "systemd.service.memory.ram.usage";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 15;
+ } else {
+ title = "Memory Usage";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem";
+ prio = cgroup_containers_chart_priority + 220;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+
+ chart = cg->st_mem = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem",
+ NULL,
+ "mem",
+ context,
+ title,
+ "MiB",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ rrddim_add(chart, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+
+ if (cg->memory.detailed_has_swap)
+ rrddim_add(chart, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+
+ rrddim_add(chart, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ } else {
+ rrddim_add(chart, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+ }
+
+ if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ rrddim_set(chart, "cache", (collected_number)cg->memory.total_cache);
+ collected_number rss = (collected_number)(cg->memory.total_rss - cg->memory.total_rss_huge);
+ if (rss < 0)
+ rss = 0;
+ rrddim_set(chart, "rss", rss);
+ if (cg->memory.detailed_has_swap)
+ rrddim_set(chart, "swap", (collected_number)cg->memory.total_swap);
+ rrddim_set(chart, "rss_huge", (collected_number)cg->memory.total_rss_huge);
+ rrddim_set(chart, "mapped_file", (collected_number)cg->memory.total_mapped_file);
+ } else {
+ rrddim_set(chart, "anon", (collected_number)cg->memory.anon);
+ rrddim_set(chart, "kernel_stack", (collected_number)cg->memory.kernel_stack);
+ rrddim_set(chart, "slab", (collected_number)cg->memory.slab);
+ rrddim_set(chart, "sock", (collected_number)cg->memory.sock);
+ rrddim_set(chart, "anon_thp", (collected_number)cg->memory.anon_thp);
+ rrddim_set(chart, "file", (collected_number)cg->memory.total_mapped_file);
+ }
+ rrdset_done(chart);
+}
+
+void update_mem_writeback_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_writeback;
+
+ if (unlikely(!cg->st_writeback)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Writeback Memory";
+ context = "systemd.service.memory.writeback";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 20;
+ } else {
+ title = "Writeback Memory";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback";
+ prio = cgroup_containers_chart_priority + 300;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_writeback = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "writeback",
+ NULL,
+ "mem",
+ context,
+ title,
+ "MiB",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_AREA);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ if (cg->memory.detailed_has_dirty)
+ rrddim_add(chart, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ if (cg->memory.detailed_has_dirty)
+ rrddim_set(chart, "dirty", (collected_number)cg->memory.total_dirty);
+ rrddim_set(chart, "writeback", (collected_number)cg->memory.total_writeback);
+ rrdset_done(chart);
+}
+
+void update_mem_activity_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_mem_activity;
+
+ if (unlikely(!cg->st_mem_activity)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Memory Paging IO";
+ context = "systemd.service.memory.paging.io";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 30;
+ } else {
+ title = "Memory Activity";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity";
+ prio = cgroup_containers_chart_priority + 400;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_mem_activity = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_activity",
+ NULL,
+ "mem",
+ context,
+ title,
+ "MiB/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ // FIXME: systemd just in, out
+ rrddim_add(chart, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
+ rrddim_add(chart, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "pgpgin", (collected_number)cg->memory.total_pgpgin);
+ rrddim_set(chart, "pgpgout", (collected_number)cg->memory.total_pgpgout);
+ rrdset_done(chart);
+}
+
+void update_mem_pgfaults_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_pgfaults;
+
+ if (unlikely(!cg->st_pgfaults)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Memory Page Faults";
+ context = "systemd.service.memory.paging.faults";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 25;
+ } else {
+ title = "Memory Page Faults";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults";
+ prio = cgroup_containers_chart_priority + 500;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_pgfaults = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "pgfaults",
+ NULL,
+ "mem",
+ context,
+ title,
+ "MiB/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
+ rrddim_add(chart, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "pgfault", (collected_number)cg->memory.total_pgfault);
+ rrddim_set(chart, "pgmajfault", (collected_number)cg->memory.total_pgmajfault);
+ rrdset_done(chart);
+}
+
+void update_mem_usage_limit_chart(struct cgroup *cg, unsigned long long memory_limit) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ RRDSET *chart = cg->st_mem_usage_limit;
+
+ if (unlikely(!cg->st_mem_usage_limit)) {
+ char *title = "Used RAM within the limits";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit" : "cgroup.mem_usage_limit";
+ int prio = cgroup_containers_chart_priority + 200;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_mem_usage_limit = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_usage_limit",
+ NULL,
+ "mem",
+ context,
+ title,
+ "MiB",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ rrddim_add(chart, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrdset_isnot_obsolete___safe_from_collector_thread(chart);
+
+ rrddim_set(chart, "available", (collected_number)(memory_limit - cg->memory.usage_in_bytes));
+ rrddim_set(chart, "used", (collected_number)cg->memory.usage_in_bytes);
+ rrdset_done(chart);
+}
+
+void update_mem_utilization_chart(struct cgroup *cg, unsigned long long memory_limit) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ RRDSET *chart = cg->st_mem_utilization;
+
+ if (unlikely(!cg->st_mem_utilization)) {
+ char *title = "Memory Utilization";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization";
+ int prio = cgroup_containers_chart_priority + 199;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_mem_utilization = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_utilization",
+ NULL,
+ "mem",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_AREA);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ rrddim_add(chart, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrdset_isnot_obsolete___safe_from_collector_thread(chart);
+ collected_number util = (collected_number)(cg->memory.usage_in_bytes * 100 / memory_limit);
+ rrddim_set(chart, "utilization", util);
+ rrdset_done(chart);
+}
+
+void update_mem_failcnt_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_mem_failcnt;
+
+ if (unlikely(!cg->st_mem_failcnt)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Memory Limit Failures";
+ context = "systemd.service.memory.failcnt";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 10;
+ } else {
+ title = "Memory Limit Failures";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt";
+ prio = cgroup_containers_chart_priority + 250;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_mem_failcnt = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_failcnt",
+ NULL,
+ "mem",
+ context,
+ title,
+ "count",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "failures", (collected_number)cg->memory.failcnt);
+ rrdset_done(chart);
+}
+
+void update_mem_usage_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_mem_usage;
+
+ if (unlikely(!cg->st_mem_usage)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Used Memory";
+ context = "systemd.service.memory.usage";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 5;
+ } else {
+ title = "Used Memory";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage";
+ prio = cgroup_containers_chart_priority + 210;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_mem_usage = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_usage",
+ NULL,
+ "mem",
+ context,
+ title,
+ "MiB",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ cg->st_mem_rd_ram = rrddim_add(chart, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ cg->st_mem_rd_swap = rrddim_add(chart, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, cg->st_mem_rd_ram, (collected_number)cg->memory.usage_in_bytes);
+
+ if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ rrddim_set_by_pointer(
+ chart,
+ cg->st_mem_rd_swap,
+ cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ?
+ (collected_number)(cg->memory.msw_usage_in_bytes -
+ (cg->memory.usage_in_bytes + cg->memory.total_inactive_file)) :
+ 0);
+ } else {
+ rrddim_set_by_pointer(chart, cg->st_mem_rd_swap, (collected_number)cg->memory.msw_usage_in_bytes);
+ }
+
+ rrdset_done(chart);
+}
+
+void update_io_serviced_bytes_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_io;
+
+ if (unlikely(!cg->st_io)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Disk Read/Write Bandwidth";
+ context = "systemd.service.disk.io";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 35;
+ } else {
+ title = "I/O Bandwidth (all disks)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io";
+ prio = cgroup_containers_chart_priority + 1200;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_io = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "io",
+ NULL,
+ "disk",
+ context,
+ title,
+ "KiB/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_AREA);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ cg->st_io_rd_read = rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
+ cg->st_io_rd_written = rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, cg->st_io_rd_read, (collected_number)cg->io_service_bytes.Read);
+ rrddim_set_by_pointer(chart, cg->st_io_rd_written, (collected_number)cg->io_service_bytes.Write);
+ rrdset_done(chart);
+}
+
+void update_io_serviced_ops_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_serviced_ops;
+
+ if (unlikely(!cg->st_serviced_ops)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Disk Read/Write Operations";
+ context = "systemd.service.disk.iops";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 40;
+ } else {
+ title = "Serviced I/O Operations (all disks)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops";
+ prio = cgroup_containers_chart_priority + 1200;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_serviced_ops = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "serviced_ops",
+ NULL,
+ "disk",
+ context,
+ title,
+ "operations/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "read", (collected_number)cg->io_serviced.Read);
+ rrddim_set(chart, "write", (collected_number)cg->io_serviced.Write);
+ rrdset_done(chart);
+}
+
+void update_throttle_io_serviced_bytes_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_throttle_io;
+
+ if (unlikely(!cg->st_throttle_io)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Throttle Disk Read/Write Bandwidth";
+ context = "systemd.service.disk.throttle.io";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 45;
+ } else {
+ title = "Throttle I/O Bandwidth (all disks)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io";
+ prio = cgroup_containers_chart_priority + 1200;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_throttle_io = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "throttle_io",
+ NULL,
+ "disk",
+ context,
+ title,
+ "KiB/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_AREA);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+
+ cg->st_throttle_io_rd_read = rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
+ cg->st_throttle_io_rd_written = rrddim_add(chart, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, cg->st_throttle_io_rd_read, (collected_number)cg->throttle_io_service_bytes.Read);
+ rrddim_set_by_pointer(chart, cg->st_throttle_io_rd_written, (collected_number)cg->throttle_io_service_bytes.Write);
+ rrdset_done(chart);
+}
+
+void update_throttle_io_serviced_ops_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_throttle_serviced_ops;
+
+ if (unlikely(!cg->st_throttle_serviced_ops)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Throttle Disk Read/Write Operations";
+ context = "systemd.service.disk.throttle.iops";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 50;
+ } else {
+ title = "Throttle Serviced I/O Operations (all disks)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops";
+ prio = cgroup_containers_chart_priority + 1200;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_throttle_serviced_ops = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "throttle_serviced_ops",
+ NULL,
+ "disk",
+ context,
+ title,
+ "operations/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "read", (collected_number)cg->throttle_io_serviced.Read);
+ rrddim_set(chart, "write", (collected_number)cg->throttle_io_serviced.Write);
+ rrdset_done(chart);
+}
+
+void update_io_queued_ops_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_queued_ops;
+
+ if (unlikely(!cg->st_queued_ops)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Queued Disk Read/Write Operations";
+ context = "systemd.service.disk.queued_iops";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 55;
+ } else {
+ title = "Queued I/O Operations (all disks)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops";
+ prio = cgroup_containers_chart_priority + 2000;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_queued_ops = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "queued_ops",
+ NULL,
+ "disk",
+ context,
+ title,
+ "operations",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rrddim_add(chart, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set(chart, "read", (collected_number)cg->io_queued.Read);
+ rrddim_set(chart, "write", (collected_number)cg->io_queued.Write);
+ rrdset_done(chart);
+}
+
+void update_io_merged_ops_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_merged_ops;
+
+ if (unlikely(!cg->st_merged_ops)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Merged Disk Read/Write Operations";
+ context = "systemd.service.disk.merged_iops";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 60;
+ } else {
+ title = "Merged I/O Operations (all disks)";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops";
+ prio = cgroup_containers_chart_priority + 2100;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_merged_ops = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "merged_ops",
+ NULL,
+ "disk",
+ context,
+ title,
+ "operations/s",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ rrddim_add(chart, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
+ rrddim_add(chart, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set(chart, "read", (collected_number)cg->io_merged.Read);
+ rrddim_set(chart, "write", (collected_number)cg->io_merged.Write);
+ rrdset_done(chart);
+}
+
+void update_cpu_some_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->cpu_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "CPU some pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure";
+ int prio = cgroup_containers_chart_priority + 2200;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_some_pressure",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->cpu_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "CPU some pressure stall time";
+ char *context =
+ k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2220;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_some_pressure_stall_time",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_cpu_full_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->cpu_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "CPU full pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure";
+ int prio = cgroup_containers_chart_priority + 2240;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_full_pressure",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->cpu_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "CPU full pressure stall time";
+ char *context =
+ k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2260;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "cpu_full_pressure_stall_time",
+ NULL,
+ "cpu",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_mem_some_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->memory_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "Memory some pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure";
+ int prio = cgroup_containers_chart_priority + 2300;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_some_pressure",
+ NULL,
+ "mem",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_mem_some_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->memory_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "Memory some pressure stall time";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" :
+ "cgroup.memory_some_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2320;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "memory_some_pressure_stall_time",
+ NULL,
+ "mem",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_mem_full_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->memory_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "Memory full pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure";
+ int prio = cgroup_containers_chart_priority + 2340;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "mem_full_pressure",
+ NULL,
+ "mem",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_mem_full_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->memory_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "Memory full pressure stall time";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" :
+ "cgroup.memory_full_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2360;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "memory_full_pressure_stall_time",
+ NULL,
+ "mem",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_irq_some_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->irq_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "IRQ some pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure";
+ int prio = cgroup_containers_chart_priority + 2310;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "irq_some_pressure",
+ NULL,
+ "interrupts",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_irq_some_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->irq_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "IRQ some pressure stall time";
+ char *context =
+ k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2330;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "irq_some_pressure_stall_time",
+ NULL,
+ "interrupts",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_irq_full_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->irq_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "IRQ full pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure";
+ int prio = cgroup_containers_chart_priority + 2350;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "irq_full_pressure",
+ NULL,
+ "interrupts",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_irq_full_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->irq_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "IRQ full pressure stall time";
+ char *context =
+ k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2370;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "irq_full_pressure_stall_time",
+ NULL,
+ "interrupts",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_io_some_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->io_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "I/O some pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure";
+ int prio = cgroup_containers_chart_priority + 2400;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "io_some_pressure",
+ NULL,
+ "disk",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_io_some_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->io_pressure;
+ struct pressure_charts *pcs = &res->some;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "I/O some pressure stall time";
+ char *context =
+ k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2420;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "io_some_pressure_stall_time",
+ NULL,
+ "disk",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_io_full_pressure_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->io_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->share_time.st;
+
+ if (unlikely(!pcs->share_time.st)) {
+ char *title = "I/O full pressure";
+ char *context = k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure";
+ int prio = cgroup_containers_chart_priority + 2440;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->share_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "io_full_pressure",
+ NULL,
+ "disk",
+ context,
+ title,
+ "percentage",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->share_time.rd10, (collected_number)(pcs->share_time.value10 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd60, (collected_number)(pcs->share_time.value60 * 100));
+ rrddim_set_by_pointer(chart, pcs->share_time.rd300, (collected_number)(pcs->share_time.value300 * 100));
+ rrdset_done(chart);
+}
+
+void update_io_full_pressure_stall_time_chart(struct cgroup *cg) {
+ if (is_cgroup_systemd_service(cg))
+ return;
+
+ struct pressure *res = &cg->io_pressure;
+ struct pressure_charts *pcs = &res->full;
+ RRDSET *chart = pcs->total_time.st;
+
+ if (unlikely(!pcs->total_time.st)) {
+ char *title = "I/O full pressure stall time";
+ char *context =
+ k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time";
+ int prio = cgroup_containers_chart_priority + 2460;
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = pcs->total_time.st = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "io_full_pressure_stall_time",
+ NULL,
+ "disk",
+ context,
+ title,
+ "ms",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(chart, pcs->total_time.rdtotal, (collected_number)(pcs->total_time.value_total));
+ rrdset_done(chart);
+}
+
+void update_pids_current_chart(struct cgroup *cg) {
+ RRDSET *chart = cg->st_pids;
+
+ if (unlikely(!cg->st_pids)) {
+ char *title;
+ char *context;
+ int prio;
+ if (is_cgroup_systemd_service(cg)) {
+ title = "Systemd Services Number of Processes";
+ context = "systemd.service.pids.current";
+ prio = NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70;
+ } else {
+ title = "Number of processes";
+ context = k8s_is_kubepod(cg) ? "k8s.cgroup.pids_current" : "cgroup.pids_current";
+ prio = cgroup_containers_chart_priority + 2150;
+ }
+
+ char buff[RRD_ID_LENGTH_MAX + 1];
+ chart = cg->st_pids = rrdset_create_localhost(
+ cgroup_chart_type(buff, cg),
+ "pids_current",
+ NULL,
+ "pids",
+ context,
+ title,
+ "pids",
+ PLUGIN_CGROUPS_NAME,
+ is_cgroup_systemd_service(cg) ? PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME : PLUGIN_CGROUPS_MODULE_CGROUPS_NAME,
+ prio,
+ cgroup_update_every,
+ RRDSET_TYPE_LINE);
+
+ rrdset_update_rrdlabels(chart, cg->chart_labels);
+ cg->st_pids_rd_pids_current = rrddim_add(chart, "pids", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(chart, cg->st_pids_rd_pids_current, (collected_number)cg->pids.pids_current);
+ rrdset_done(chart);
+}
diff --git a/collectors/cgroups.plugin/cgroup-discovery.c b/collectors/cgroups.plugin/cgroup-discovery.c
new file mode 100644
index 00000000000000..28c6d96cf93c50
--- /dev/null
+++ b/collectors/cgroups.plugin/cgroup-discovery.c
@@ -0,0 +1,1243 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "cgroup-internals.h"
+
+// discovery cgroup thread worker jobs
+#define WORKER_DISCOVERY_INIT 0
+#define WORKER_DISCOVERY_FIND 1
+#define WORKER_DISCOVERY_PROCESS 2
+#define WORKER_DISCOVERY_PROCESS_RENAME 3
+#define WORKER_DISCOVERY_PROCESS_NETWORK 4
+#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5
+#define WORKER_DISCOVERY_UPDATE 6
+#define WORKER_DISCOVERY_CLEANUP 7
+#define WORKER_DISCOVERY_COPY 8
+#define WORKER_DISCOVERY_SHARE 9
+#define WORKER_DISCOVERY_LOCK 10
+
+#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11
+#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11
+#endif
+
+struct cgroup *discovered_cgroup_root = NULL;
+
+char cgroup_chart_id_prefix[] = "cgroup_";
+char services_chart_id_prefix[] = "systemd_";
+char *cgroups_rename_script = NULL;
+
+
+// ----------------------------------------------------------------------------
+
+static inline void free_pressure(struct pressure *res) {
+ if (res->some.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.share_time.st);
+ if (res->some.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->some.total_time.st);
+ if (res->full.share_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.share_time.st);
+ if (res->full.total_time.st) rrdset_is_obsolete___safe_from_collector_thread(res->full.total_time.st);
+ freez(res->filename);
+}
+
+static inline void cgroup_free_network_interfaces(struct cgroup *cg) {
+ while(cg->interfaces) {
+ struct cgroup_network_interface *i = cg->interfaces;
+ cg->interfaces = i->next;
+
+ // delete the registration of proc_net_dev rename
+ netdev_rename_device_del(i->host_device);
+
+ freez((void *)i->host_device);
+ freez((void *)i->container_device);
+ freez((void *)i);
+ }
+}
+
+static inline void cgroup_free(struct cgroup *cg) {
+ netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
+
+ cgroup_netdev_delete(cg);
+
+ if(cg->st_cpu) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu);
+ if(cg->st_cpu_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit);
+ if(cg->st_cpu_per_core) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_per_core);
+ if(cg->st_cpu_nr_throttled) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_nr_throttled);
+ if(cg->st_cpu_throttled_time) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_throttled_time);
+ if(cg->st_cpu_shares) rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_shares);
+ if(cg->st_mem) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem);
+ if(cg->st_writeback) rrdset_is_obsolete___safe_from_collector_thread(cg->st_writeback);
+ if(cg->st_mem_activity) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_activity);
+ if(cg->st_pgfaults) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pgfaults);
+ if(cg->st_mem_usage) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage);
+ if(cg->st_mem_usage_limit) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit);
+ if(cg->st_mem_utilization) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization);
+ if(cg->st_mem_failcnt) rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_failcnt);
+ if(cg->st_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_io);
+ if(cg->st_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_serviced_ops);
+ if(cg->st_throttle_io) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_io);
+ if(cg->st_throttle_serviced_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_throttle_serviced_ops);
+ if(cg->st_queued_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_queued_ops);
+ if(cg->st_merged_ops) rrdset_is_obsolete___safe_from_collector_thread(cg->st_merged_ops);
+ if(cg->st_pids) rrdset_is_obsolete___safe_from_collector_thread(cg->st_pids);
+
+ freez(cg->filename_cpuset_cpus);
+ freez(cg->filename_cpu_cfs_period);
+ freez(cg->filename_cpu_cfs_quota);
+ freez(cg->filename_memory_limit);
+ freez(cg->filename_memoryswap_limit);
+
+ cgroup_free_network_interfaces(cg);
+
+ freez(cg->cpuacct_usage.cpu_percpu);
+
+ freez(cg->cpuacct_stat.filename);
+ freez(cg->cpuacct_usage.filename);
+ freez(cg->cpuacct_cpu_throttling.filename);
+ freez(cg->cpuacct_cpu_shares.filename);
+
+ arl_free(cg->memory.arl_base);
+ freez(cg->memory.filename_detailed);
+ freez(cg->memory.filename_failcnt);
+ freez(cg->memory.filename_usage_in_bytes);
+ freez(cg->memory.filename_msw_usage_in_bytes);
+
+ freez(cg->io_service_bytes.filename);
+ freez(cg->io_serviced.filename);
+
+ freez(cg->throttle_io_service_bytes.filename);
+ freez(cg->throttle_io_serviced.filename);
+
+ freez(cg->io_merged.filename);
+ freez(cg->io_queued.filename);
+ freez(cg->pids.pids_current_filename);
+
+ free_pressure(&cg->cpu_pressure);
+ free_pressure(&cg->io_pressure);
+ free_pressure(&cg->memory_pressure);
+ free_pressure(&cg->irq_pressure);
+
+ freez(cg->id);
+ freez(cg->intermediate_id);
+ freez(cg->chart_id);
+ freez(cg->name);
+
+ rrdlabels_destroy(cg->chart_labels);
+
+ freez(cg);
+
+ cgroup_root_count--;
+}
+
+// ----------------------------------------------------------------------------
+// add/remove/find cgroup objects
+
+#define CGROUP_CHARTID_LINE_MAX 1024
+
+static inline char *cgroup_chart_id_strdupz(const char *s) {
+ if(!s || !*s) s = "/";
+
+ if(*s == '/' && s[1] != '\0') s++;
+
+ char *r = strdupz(s);
+ netdata_fix_chart_id(r);
+
+ return r;
+}
+
+// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed
+static inline void substitute_dots_in_id(char *s) {
+ // dots are used to distinguish chart type and id in streaming, so we should replace them
+ for (char *d = s; *d; d++) {
+ if (*d == '.')
+ *d = '-';
+ }
+}
+
+// ----------------------------------------------------------------------------
+// parse k8s labels
+
+char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data) {
+ // the first word, up to the first space is the name
+ char *name = strsep_skip_consecutive_separators(&data, " ");
+
+ // the rest are key=value pairs separated by comma
+ while(data) {
+ char *pair = strsep_skip_consecutive_separators(&data, ",");
+ rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO | RRDLABEL_SRC_K8S);
+ }
+
+ return name;
+}
+
+static inline void discovery_rename_cgroup(struct cgroup *cg) {
+ if (!cg->pending_renames) {
+ return;
+ }
+ cg->pending_renames--;
+
+ netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
+ netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id);
+ pid_t cgroup_pid;
+
+ FILE *fp_child_input, *fp_child_output;
+ (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id);
+ if (!fp_child_output) {
+ collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id);
+ cg->pending_renames = 0;
+ cg->processed = 1;
+ return;
+ }
+
+ char buffer[CGROUP_CHARTID_LINE_MAX + 1];
+ char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output);
+ int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
+
+ switch (exit_code) {
+ case 0:
+ cg->pending_renames = 0;
+ break;
+
+ case 3:
+ cg->pending_renames = 0;
+ cg->processed = 1;
+ break;
+
+ default:
+ break;
+ }
+
+ if (cg->pending_renames || cg->processed)
+ return;
+ if (!new_name || !*new_name || *new_name == '\n')
+ return;
+ if (!(new_name = trim(new_name)))
+ return;
+
+ if (!cg->chart_labels)
+ cg->chart_labels = rrdlabels_create();
+ // read the new labels and remove the obsolete ones
+ rrdlabels_unmark_all(cg->chart_labels);
+ char *name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name);
+ rrdlabels_remove_all_unmarked(cg->chart_labels);
+
+ freez(cg->name);
+ cg->name = strdupz(name);
+
+ freez(cg->chart_id);
+ cg->chart_id = cgroup_chart_id_strdupz(name);
+
+ substitute_dots_in_id(cg->chart_id);
+ cg->hash_chart_id = simple_hash(cg->chart_id);
+}
+
+static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) {
+ struct stat buf;
+
+ snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id);
+ if (likely(stat(out->path, &buf) == 0)) {
+ return;
+ }
+
+ snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id);
+ if (likely(stat(out->path, &buf) == 0)) {
+ return;
+ }
+
+ snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id);
+ if (likely(stat(out->path, &buf) == 0)) {
+ return;
+ }
+
+ snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id);
+ if (likely(stat(out->path, &buf) == 0)) {
+ return;
+ }
+
+ out->path[0] = '\0';
+ out->enabled = 0;
+}
+
+static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) {
+ char buffer[CGROUP_CHARTID_LINE_MAX + 1];
+ cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE;
+ strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX);
+ char *s = buffer;
+
+ // skip to the last slash
+ size_t len = strlen(s);
+ while (len--) {
+ if (unlikely(s[len] == '/')) {
+ break;
+ }
+ }
+ if (len) {
+ s = &s[len + 1];
+ }
+
+ // remove extension
+ len = strlen(s);
+ while (len--) {
+ if (unlikely(s[len] == '.')) {
+ break;
+ }
+ }
+ if (len) {
+ s[len] = '\0';
+ }
+
+ freez(cg->name);
+ cg->name = strdupz(s);
+
+ freez(cg->chart_id);
+ cg->chart_id = cgroup_chart_id_strdupz(s);
+ substitute_dots_in_id(cg->chart_id);
+ cg->hash_chart_id = simple_hash(cg->chart_id);
+}
+
+static inline struct cgroup *discovery_cgroup_add(const char *id) {
+ netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id);
+
+ struct cgroup *cg = callocz(1, sizeof(struct cgroup));
+
+ cg->id = strdupz(id);
+ cg->hash = simple_hash(cg->id);
+
+ cg->name = strdupz(id);
+
+ cg->intermediate_id = cgroup_chart_id_strdupz(id);
+
+ cg->chart_id = cgroup_chart_id_strdupz(id);
+ substitute_dots_in_id(cg->chart_id);
+ cg->hash_chart_id = simple_hash(cg->chart_id);
+
+ if (cgroup_use_unified_cgroups) {
+ cg->options |= CGROUP_OPTIONS_IS_UNIFIED;
+ }
+
+ if (!discovered_cgroup_root)
+ discovered_cgroup_root = cg;
+ else {
+ struct cgroup *t;
+ for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) {
+ }
+ t->discovered_next = cg;
+ }
+
+ return cg;
+}
+
+static inline struct cgroup *discovery_cgroup_find(const char *id) {
+ netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id);
+
+ uint32_t hash = simple_hash(id);
+
+ struct cgroup *cg;
+ for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
+ if(hash == cg->hash && strcmp(id, cg->id) == 0)
+ break;
+ }
+
+ netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found");
+ return cg;
+}
+
+static int calc_cgroup_depth(const char *id) {
+ int depth = 0;
+ const char *s;
+ for (s = id; *s; s++) {
+ depth += unlikely(*s == '/');
+ }
+ return depth;
+}
+
+static inline void discovery_find_cgroup_in_dir_callback(const char *dir) {
+ if (!dir || !*dir) {
+ dir = "/";
+ }
+ netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir);
+
+ struct cgroup *cg = discovery_cgroup_find(dir);
+ if (cg) {
+ cg->available = 1;
+ return;
+ }
+
+ if (cgroup_root_count >= cgroup_root_max) {
+ collector_info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, dir);
+ return;
+ }
+
+ if (cgroup_max_depth > 0) {
+ int depth = calc_cgroup_depth(dir);
+ if (depth > cgroup_max_depth) {
+ collector_info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth);
+ return;
+ }
+ }
+
+ cg = discovery_cgroup_add(dir);
+ cg->available = 1;
+ cg->first_time_seen = 1;
+ cg->function_ready = false;
+ cgroup_root_count++;
+}
+
+static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) {
+ if(!this) this = base;
+ netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base);
+
+ size_t dirlen = strlen(this), baselen = strlen(base);
+
+ int ret = -1;
+ int enabled = -1;
+
+ const char *relative_path = &this[baselen];
+ if(!*relative_path) relative_path = "/";
+
+ DIR *dir = opendir(this);
+ if(!dir) {
+ collector_error("CGROUP: cannot read directory '%s'", base);
+ return ret;
+ }
+ ret = 1;
+
+ callback(relative_path);
+
+ struct dirent *de = NULL;
+ while((de = readdir(dir))) {
+ if(de->d_type == DT_DIR
+ && (
+ (de->d_name[0] == '.' && de->d_name[1] == '\0')
+ || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')
+ ))
+ continue;
+
+ if(de->d_type == DT_DIR) {
+ if(enabled == -1) {
+ const char *r = relative_path;
+ if(*r == '\0') r = "/";
+
+ // do not decent in directories we are not interested
+ enabled = matches_search_cgroup_paths(r);
+ }
+
+ if(enabled) {
+ char *s = mallocz(dirlen + strlen(de->d_name) + 2);
+ strcpy(s, this);
+ strcat(s, "/");
+ strcat(s, de->d_name);
+ int ret2 = discovery_find_dir_in_subdirs(base, s, callback);
+ if(ret2 > 0) ret += ret2;
+ freez(s);
+ }
+ }
+ }
+
+ closedir(dir);
+ return ret;
+}
+
+static inline void discovery_mark_as_unavailable_all_cgroups() {
+ for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
+ cg->available = 0;
+ }
+}
+
+static inline void discovery_update_filenames_cgroup_v1(struct cgroup *cg) {
+ char filename[FILENAME_MAX + 1];
+ struct stat buf;
+
+ // CPU
+ if (unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpuacct_stat.filename = strdupz(filename);
+ cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id);
+ cg->filename_cpuset_cpus = strdupz(filename);
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id);
+ cg->filename_cpu_cfs_period = strdupz(filename);
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id);
+ cg->filename_cpu_cfs_quota = strdupz(filename);
+ }
+ }
+ // FIXME: remove usage_percpu
+ if (unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpuacct_usage.filename = strdupz(filename);
+ cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage;
+ }
+ }
+ if (unlikely(
+ cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename &&
+ !is_cgroup_systemd_service(cg))) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpuacct_cpu_throttling.filename = strdupz(filename);
+ cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling;
+ }
+ }
+ if (unlikely(
+ cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename && !is_cgroup_systemd_service(cg))) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpuacct_cpu_shares.filename = strdupz(filename);
+ cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares;
+ }
+ }
+
+ // Memory
+ if (unlikely(
+ (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed &&
+ (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_detailed = strdupz(filename);
+ cg->memory.enabled_detailed =
+ (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO;
+ }
+ }
+ if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_usage_in_bytes = strdupz(filename);
+ cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id);
+ cg->filename_memory_limit = strdupz(filename);
+ }
+ }
+ if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
+ cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id);
+ cg->filename_memoryswap_limit = strdupz(filename);
+ }
+ }
+ if (unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_failcnt = strdupz(filename);
+ cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt;
+ }
+ }
+
+ // Blkio
+ if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id);
+ if (unlikely(stat(filename, &buf) != -1)) {
+ cg->io_service_bytes.filename = strdupz(filename);
+ cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
+ } else {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_service_bytes.filename = strdupz(filename);
+ cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
+ }
+ }
+ }
+ if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id);
+ if (unlikely(stat(filename, &buf) != -1)) {
+ cg->io_serviced.filename = strdupz(filename);
+ cg->io_serviced.enabled = cgroup_enable_blkio_ops;
+ } else {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_serviced.filename = strdupz(filename);
+ cg->io_serviced.enabled = cgroup_enable_blkio_ops;
+ }
+ }
+ }
+ if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id);
+ if (unlikely(stat(filename, &buf) != -1)) {
+ cg->throttle_io_service_bytes.filename = strdupz(filename);
+ cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io;
+ } else {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->throttle_io_service_bytes.filename = strdupz(filename);
+ cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io;
+ }
+ }
+ }
+ if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id);
+ if (unlikely(stat(filename, &buf) != -1)) {
+ cg->throttle_io_serviced.filename = strdupz(filename);
+ cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops;
+ } else {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->throttle_io_serviced.filename = strdupz(filename);
+ cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops;
+ }
+ }
+ }
+ if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id);
+ if (unlikely(stat(filename, &buf) != -1)) {
+ cg->io_merged.filename = strdupz(filename);
+ cg->io_merged.enabled = cgroup_enable_blkio_merged_ops;
+ } else {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_merged.filename = strdupz(filename);
+ cg->io_merged.enabled = cgroup_enable_blkio_merged_ops;
+ }
+ }
+ }
+ if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id);
+ if (unlikely(stat(filename, &buf) != -1)) {
+ cg->io_queued.filename = strdupz(filename);
+ cg->io_queued.enabled = cgroup_enable_blkio_queued_ops;
+ } else {
+ snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_queued.filename = strdupz(filename);
+ cg->io_queued.enabled = cgroup_enable_blkio_queued_ops;
+ }
+ }
+ }
+
+ // Pids
+ if (unlikely(!cg->pids.pids_current_filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_pids_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->pids.pids_current_filename = strdupz(filename);
+ }
+ }
+}
+
+static inline void discovery_update_filenames_cgroup_v2(struct cgroup *cg) {
+ char filename[FILENAME_MAX + 1];
+ struct stat buf;
+
+ // CPU
+ if (unlikely((cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) && !cg->cpuacct_stat.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpuacct_stat.filename = strdupz(filename);
+ cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
+ cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling;
+ cg->filename_cpuset_cpus = NULL;
+ cg->filename_cpu_cfs_period = NULL;
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id);
+ cg->filename_cpu_cfs_quota = strdupz(filename);
+ }
+ }
+ if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpuacct_cpu_shares.filename = strdupz(filename);
+ cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares;
+ }
+ }
+
+ // Memory
+ // FIXME: this if condition!
+ if (unlikely(
+ (cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed &&
+ (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_detailed = strdupz(filename);
+ cg->memory.enabled_detailed =
+ (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_AUTO;
+ }
+ }
+
+ if (unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_usage_in_bytes = strdupz(filename);
+ cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id);
+ cg->filename_memory_limit = strdupz(filename);
+ }
+ }
+
+ if (unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
+ cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id);
+ cg->filename_memoryswap_limit = strdupz(filename);
+ }
+ }
+
+ // Blkio
+ if (unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_service_bytes.filename = strdupz(filename);
+ cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
+ }
+ }
+
+ if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_serviced.filename = strdupz(filename);
+ cg->io_serviced.enabled = cgroup_enable_blkio_ops;
+ }
+ }
+
+ // PSI
+ if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->cpu_pressure.filename = strdupz(filename);
+ cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu;
+ cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO;
+ }
+ }
+
+ if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->io_pressure.filename = strdupz(filename);
+ cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some;
+ cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full;
+ }
+ }
+
+ if (unlikely(
+ (cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) &&
+ !cg->memory_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->memory_pressure.filename = strdupz(filename);
+ cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some;
+ cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full;
+ }
+ }
+
+ if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->irq_pressure.filename = strdupz(filename);
+ cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
+ cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
+ }
+ }
+
+ // Pids
+ if (unlikely(!cg->pids.pids_current_filename)) {
+ snprintfz(filename, FILENAME_MAX, "%s%s/pids.current", cgroup_unified_base, cg->id);
+ if (likely(stat(filename, &buf) != -1)) {
+ cg->pids.pids_current_filename = strdupz(filename);
+ }
+ }
+}
+
+static inline void discovery_update_filenames_all_cgroups() {
+ for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
+ if (unlikely(!cg->available || !cg->enabled || cg->pending_renames))
+ continue;
+
+ if (!cgroup_use_unified_cgroups)
+ discovery_update_filenames_cgroup_v1(cg);
+ else if (likely(cgroup_unified_exist))
+ discovery_update_filenames_cgroup_v2(cg);
+ }
+}
+
+static inline void discovery_cleanup_all_cgroups() {
+ struct cgroup *cg = discovered_cgroup_root, *last = NULL;
+
+ for(; cg ;) {
+ if(!cg->available) {
+ // enable the first duplicate cgroup
+ {
+ struct cgroup *t;
+ for (t = discovered_cgroup_root; t; t = t->discovered_next) {
+ if (t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE &&
+ (is_cgroup_systemd_service(t) == is_cgroup_systemd_service(cg)) &&
+ t->hash_chart_id == cg->hash_chart_id && !strcmp(t->chart_id, cg->chart_id)) {
+ netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id);
+ t->enabled = 1;
+ t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE;
+ break;
+ }
+ }
+ }
+
+ if(!last)
+ discovered_cgroup_root = cg->discovered_next;
+ else
+ last->discovered_next = cg->discovered_next;
+
+ cgroup_free(cg);
+
+ if(!last)
+ cg = discovered_cgroup_root;
+ else
+ cg = last->discovered_next;
+ }
+ else {
+ last = cg;
+ cg = cg->discovered_next;
+ }
+ }
+}
+
+static inline void discovery_copy_discovered_cgroups_to_reader() {
+ netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list");
+
+ struct cgroup *cg;
+
+ for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
+ cg->next = cg->discovered_next;
+ }
+
+ cgroup_root = discovered_cgroup_root;
+}
+
+static inline void discovery_share_cgroups_with_ebpf() {
+ struct cgroup *cg;
+ int count;
+ struct stat buf;
+
+ if (shm_mutex_cgroup_ebpf == SEM_FAILED) {
+ return;
+ }
+ sem_wait(shm_mutex_cgroup_ebpf);
+
+ for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) {
+ netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count];
+ char *prefix = (is_cgroup_systemd_service(cg)) ? services_chart_id_prefix : cgroup_chart_id_prefix;
+ snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_id);
+ ptr->hash = simple_hash(ptr->name);
+ ptr->options = cg->options;
+ ptr->enabled = cg->enabled;
+ if (cgroup_use_unified_cgroups) {
+ snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id);
+ if (likely(stat(ptr->path, &buf) == -1)) {
+ ptr->path[0] = '\0';
+ ptr->enabled = 0;
+ }
+ } else {
+ is_cgroup_procs_exist(ptr, cg->id);
+ }
+
+ netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled);
+ }
+
+ shm_cgroup_ebpf.header->cgroup_root_count = count;
+ sem_post(shm_mutex_cgroup_ebpf);
+}
+
+static inline void discovery_find_all_cgroups_v1() {
+ if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) {
+ if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
+ cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
+ collector_error("CGROUP: disabled cpu statistics.");
+ }
+ }
+
+ if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io ||
+ cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) {
+ if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
+ cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io =
+ cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops =
+ CONFIG_BOOLEAN_NO;
+ collector_error("CGROUP: disabled blkio statistics.");
+ }
+ }
+
+ if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) {
+ if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
+ cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt =
+ CONFIG_BOOLEAN_NO;
+ collector_error("CGROUP: disabled memory statistics.");
+ }
+ }
+
+ if (cgroup_search_in_devices) {
+ if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
+ cgroup_search_in_devices = 0;
+ collector_error("CGROUP: disabled devices statistics.");
+ }
+ }
+}
+
+static inline void discovery_find_all_cgroups_v2() {
+ if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
+ cgroup_unified_exist = CONFIG_BOOLEAN_NO;
+ collector_error("CGROUP: disabled unified cgroups statistics.");
+ }
+}
+
+static int is_digits_only(const char *s) {
+ do {
+ if (!isdigit(*s++)) {
+ return 0;
+ }
+ } while (*s);
+
+ return 1;
+}
+
+static int is_cgroup_k8s_container(const char *id) {
+ // examples:
+ // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147
+ const char *p = id;
+ const char *pp = NULL;
+ int i = 0;
+ size_t l = 3; // pod
+ while ((p = strstr(p, "pod"))) {
+ i++;
+ p += l;
+ pp = p;
+ }
+ return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp);
+}
+
+#define TASK_COMM_LEN 16
+
+static int k8s_get_container_first_proc_comm(const char *id, char *comm) {
+ if (!is_cgroup_k8s_container(id)) {
+ return 1;
+ }
+
+ static procfile *ff = NULL;
+
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id);
+
+ ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG);
+ if (unlikely(!ff)) {
+ netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
+ return 1;
+ }
+
+ ff = procfile_readall(ff);
+ if (unlikely(!ff)) {
+ netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
+ return 1;
+ }
+
+ unsigned long lines = procfile_lines(ff);
+ if (likely(lines < 2)) {
+ return 1;
+ }
+
+ char *pid = procfile_lineword(ff, 0, 0);
+ if (!pid || !*pid) {
+ return 1;
+ }
+
+ snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid);
+
+ ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT);
+ if (unlikely(!ff)) {
+ netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
+ return 1;
+ }
+
+ ff = procfile_readall(ff);
+ if (unlikely(!ff)) {
+ netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
+ return 1;
+ }
+
+ lines = procfile_lines(ff);
+ if (unlikely(lines != 2)) {
+ return 1;
+ }
+
+ char *proc_comm = procfile_lineword(ff, 0, 0);
+ if (!proc_comm || !*proc_comm) {
+ return 1;
+ }
+
+ strncpyz(comm, proc_comm, TASK_COMM_LEN);
+ return 0;
+}
+
+static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) {
+ if (!cg->first_time_seen) {
+ return;
+ }
+ cg->first_time_seen = 0;
+
+ char comm[TASK_COMM_LEN + 1];
+
+ if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) {
+ if (strstr(cg->id, "kubepods")) {
+ cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S;
+ } else {
+ cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN;
+ }
+ }
+
+ if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) {
+ // container initialization may take some time when CPU % is high
+ // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number)
+ if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) {
+ cg->first_time_seen = 1;
+ return;
+ }
+ if (!strcmp(comm, "pause")) {
+ // a container that holds the network namespace for the pod
+ // we don't need to collect its metrics
+ cg->processed = 1;
+ return;
+ }
+ }
+
+ if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) {
+ netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_id);
+ convert_cgroup_to_systemd_service(cg);
+ return;
+ }
+
+ if (matches_enabled_cgroup_renames(cg->id)) {
+ netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_id);
+ if (is_inside_k8s && is_cgroup_k8s_container(cg->id)) {
+ // it may take up to a minute for the K8s API to return data for the container
+ // tested on AWS K8s cluster with 100% CPU utilization
+ cg->pending_renames = 9; // 1.5 minute
+ } else {
+ cg->pending_renames = 2;
+ }
+ }
+}
+
+static int discovery_is_cgroup_duplicate(struct cgroup *cg) {
+ // https://github.com/netdata/netdata/issues/797#issuecomment-241248884
+ struct cgroup *c;
+ for (c = discovered_cgroup_root; c; c = c->discovered_next) {
+ if (c != cg && c->enabled && (is_cgroup_systemd_service(c) == is_cgroup_systemd_service(cg)) &&
+ c->hash_chart_id == cg->hash_chart_id && !strcmp(c->chart_id, cg->chart_id)) {
+ collector_error(
+ "CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.",
+ cg->chart_id,
+ c->id,
+ cg->id);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+// ----------------------------------------------------------------------------
+// cgroup network interfaces
+
+#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048
+
+static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
+ netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s'", cg->id, cg->chart_id);
+
+ pid_t cgroup_pid;
+ char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
+
+ if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id);
+ }
+ else {
+ snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id);
+ }
+
+ netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id);
+ FILE *fp_child_input, *fp_child_output;
+ (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier);
+ if(!fp_child_output) {
+ collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier);
+ return;
+ }
+
+ char *s;
+ char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
+ while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) {
+ trim(s);
+
+ if(*s && *s != '\n') {
+ char *t = s;
+ while(*t && *t != ' ') t++;
+ if(*t == ' ') {
+ *t = '\0';
+ t++;
+ }
+
+ if(!*s) {
+ collector_error("CGROUP: empty host interface returned by script");
+ continue;
+ }
+
+ if(!*t) {
+ collector_error("CGROUP: empty guest interface returned by script");
+ continue;
+ }
+
+ struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface));
+ i->host_device = strdupz(s);
+ i->container_device = strdupz(t);
+ i->next = cg->interfaces;
+ cg->interfaces = i;
+
+ collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device);
+
+ // register a device rename to proc_net_dev.c
+ netdev_rename_device_add(i->host_device, i->container_device, cg->chart_id, cg->chart_labels,
+ k8s_is_kubepod(cg) ? "k8s." : "", cgroup_netdev_get(cg));
+ }
+ }
+
+ netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
+ // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id);
+}
+
+static inline void discovery_process_cgroup(struct cgroup *cg) {
+ if (!cg->available || cg->processed) {
+ return;
+ }
+
+ if (cg->first_time_seen) {
+ worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME);
+ discovery_process_first_time_seen_cgroup(cg);
+ if (unlikely(cg->first_time_seen || cg->processed)) {
+ return;
+ }
+ }
+
+ if (cg->pending_renames) {
+ worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME);
+ discovery_rename_cgroup(cg);
+ if (unlikely(cg->pending_renames || cg->processed)) {
+ return;
+ }
+ }
+
+ cg->processed = 1;
+
+ if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) {
+ collector_info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id);
+ return;
+ }
+
+ if (is_cgroup_systemd_service(cg)) {
+ if (discovery_is_cgroup_duplicate(cg)) {
+ cg->enabled = 0;
+ cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
+ return;
+ }
+ if (!cg->chart_labels)
+ cg->chart_labels = rrdlabels_create();
+ rrdlabels_add(cg->chart_labels, "service_name", cg->name, RRDLABEL_SRC_AUTO);
+ cg->enabled = 1;
+ return;
+ }
+
+ if (!(cg->enabled = matches_enabled_cgroup_names(cg->name))) {
+ netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->name);
+ return;
+ }
+
+ if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) {
+ netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->name);
+ return;
+ }
+
+ if (discovery_is_cgroup_duplicate(cg)) {
+ cg->enabled = 0;
+ cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
+ return;
+ }
+
+ if (!cg->chart_labels)
+ cg->chart_labels = rrdlabels_create();
+
+ if (!k8s_is_kubepod(cg)) {
+ rrdlabels_add(cg->chart_labels, "cgroup_name", cg->name, RRDLABEL_SRC_AUTO);
+ if (!rrdlabels_exist(cg->chart_labels, "image"))
+ rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO);
+ }
+
+ worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK);
+ read_cgroup_network_interfaces(cg);
+}
+
+static inline void discovery_find_all_cgroups() {
+ netdata_log_debug(D_CGROUP, "searching for cgroups");
+
+ worker_is_busy(WORKER_DISCOVERY_INIT);
+ discovery_mark_as_unavailable_all_cgroups();
+
+ worker_is_busy(WORKER_DISCOVERY_FIND);
+ if (!cgroup_use_unified_cgroups) {
+ discovery_find_all_cgroups_v1();
+ } else {
+ discovery_find_all_cgroups_v2();
+ }
+
+ for (struct cgroup *cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
+ worker_is_busy(WORKER_DISCOVERY_PROCESS);
+ discovery_process_cgroup(cg);
+ }
+
+ worker_is_busy(WORKER_DISCOVERY_UPDATE);
+ discovery_update_filenames_all_cgroups();
+
+ worker_is_busy(WORKER_DISCOVERY_LOCK);
+ uv_mutex_lock(&cgroup_root_mutex);
+
+ worker_is_busy(WORKER_DISCOVERY_CLEANUP);
+ discovery_cleanup_all_cgroups();
+
+ worker_is_busy(WORKER_DISCOVERY_COPY);
+ discovery_copy_discovered_cgroups_to_reader();
+
+ uv_mutex_unlock(&cgroup_root_mutex);
+
+ worker_is_busy(WORKER_DISCOVERY_SHARE);
+ discovery_share_cgroups_with_ebpf();
+
+ netdata_log_debug(D_CGROUP, "done searching for cgroups");
+}
+
+void cgroup_discovery_worker(void *ptr)
+{
+ UNUSED(ptr);
+
+ worker_register("CGROUPSDISC");
+ worker_register_job_name(WORKER_DISCOVERY_INIT, "init");
+ worker_register_job_name(WORKER_DISCOVERY_FIND, "find");
+ worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process");
+ worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename");
+ worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network");
+ worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new");
+ worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update");
+ worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup");
+ worker_register_job_name(WORKER_DISCOVERY_COPY, "copy");
+ worker_register_job_name(WORKER_DISCOVERY_SHARE, "share");
+ worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock");
+
+ entrypoint_parent_process_comm = simple_pattern_create(
+ " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3)
+ " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961
+ NULL,
+ SIMPLE_PATTERN_EXACT, true);
+
+ service_register(SERVICE_THREAD_TYPE_LIBUV, NULL, NULL, NULL, false);
+
+ while (service_running(SERVICE_COLLECTORS)) {
+ worker_is_idle();
+
+ uv_mutex_lock(&discovery_thread.mutex);
+ uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex);
+ uv_mutex_unlock(&discovery_thread.mutex);
+
+ if (unlikely(!service_running(SERVICE_COLLECTORS)))
+ break;
+
+ discovery_find_all_cgroups();
+ }
+ collector_info("discovery thread stopped");
+ worker_unregister();
+ service_exits();
+ __atomic_store_n(&discovery_thread.exited,1,__ATOMIC_RELAXED);
+}
diff --git a/collectors/cgroups.plugin/cgroup-internals.h b/collectors/cgroups.plugin/cgroup-internals.h
new file mode 100644
index 00000000000000..a6980224066b43
--- /dev/null
+++ b/collectors/cgroups.plugin/cgroup-internals.h
@@ -0,0 +1,514 @@
+#include "sys_fs_cgroup.h"
+
+#ifndef NETDATA_CGROUP_INTERNALS_H
+#define NETDATA_CGROUP_INTERNALS_H 1
+
+#ifdef NETDATA_INTERNAL_CHECKS
+#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT
+#else
+#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO
+#endif
+
+struct blkio {
+ int updated;
+ int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+ int delay_counter;
+
+ char *filename;
+
+ unsigned long long Read;
+ unsigned long long Write;
+/*
+ unsigned long long Sync;
+ unsigned long long Async;
+ unsigned long long Total;
+*/
+};
+
+struct pids {
+ char *pids_current_filename;
+ int pids_current_updated;
+ unsigned long long pids_current;
+};
+
+// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
+struct memory {
+ ARL_BASE *arl_base;
+ ARL_ENTRY *arl_dirty;
+ ARL_ENTRY *arl_swap;
+
+ int updated_detailed;
+ int updated_usage_in_bytes;
+ int updated_msw_usage_in_bytes;
+ int updated_failcnt;
+
+ int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+ int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+ int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+ int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+
+ int delay_counter_detailed;
+ int delay_counter_failcnt;
+
+ char *filename_detailed;
+ char *filename_usage_in_bytes;
+ char *filename_msw_usage_in_bytes;
+ char *filename_failcnt;
+
+ int detailed_has_dirty;
+ int detailed_has_swap;
+
+ // detailed metrics
+/*
+ unsigned long long cache;
+ unsigned long long rss;
+ unsigned long long rss_huge;
+ unsigned long long mapped_file;
+ unsigned long long writeback;
+ unsigned long long dirty;
+ unsigned long long swap;
+ unsigned long long pgpgin;
+ unsigned long long pgpgout;
+ unsigned long long pgfault;
+ unsigned long long pgmajfault;
+ unsigned long long inactive_anon;
+ unsigned long long active_anon;
+ unsigned long long inactive_file;
+ unsigned long long active_file;
+ unsigned long long unevictable;
+ unsigned long long hierarchical_memory_limit;
+*/
+ //unified cgroups metrics
+ unsigned long long anon;
+ unsigned long long kernel_stack;
+ unsigned long long slab;
+ unsigned long long sock;
+ // unsigned long long shmem;
+ unsigned long long anon_thp;
+ //unsigned long long file_writeback;
+ //unsigned long long file_dirty;
+ //unsigned long long file;
+
+ unsigned long long total_cache;
+ unsigned long long total_rss;
+ unsigned long long total_rss_huge;
+ unsigned long long total_mapped_file;
+ unsigned long long total_writeback;
+ unsigned long long total_dirty;
+ unsigned long long total_swap;
+ unsigned long long total_pgpgin;
+ unsigned long long total_pgpgout;
+ unsigned long long total_pgfault;
+ unsigned long long total_pgmajfault;
+/*
+ unsigned long long total_inactive_anon;
+ unsigned long long total_active_anon;
+*/
+
+ unsigned long long total_inactive_file;
+
+/*
+ unsigned long long total_active_file;
+ unsigned long long total_unevictable;
+*/
+
+ // single file metrics
+ unsigned long long usage_in_bytes;
+ unsigned long long msw_usage_in_bytes;
+ unsigned long long failcnt;
+};
+
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
+struct cpuacct_stat {
+ int updated;
+ int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+
+ char *filename;
+
+ unsigned long long user; // v1, v2(user_usec)
+ unsigned long long system; // v1, v2(system_usec)
+};
+
+// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
+struct cpuacct_usage {
+ int updated;
+ int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+
+ char *filename;
+
+ unsigned int cpus;
+ unsigned long long *cpu_percpu;
+};
+
+// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec'
+struct cpuacct_cpu_throttling {
+ int updated;
+ int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+
+ char *filename;
+
+ unsigned long long nr_periods;
+ unsigned long long nr_throttled;
+ unsigned long long throttled_time;
+
+ unsigned long long nr_throttled_perc;
+};
+
+// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs
+// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications
+struct cpuacct_cpu_shares {
+ int updated;
+ int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
+
+ char *filename;
+
+ unsigned long long shares;
+};
+
+struct cgroup_network_interface {
+ const char *host_device;
+ const char *container_device;
+ struct cgroup_network_interface *next;
+};
+
+enum cgroups_container_orchestrator {
+ CGROUPS_ORCHESTRATOR_UNSET,
+ CGROUPS_ORCHESTRATOR_UNKNOWN,
+ CGROUPS_ORCHESTRATOR_K8S
+};
+
+
+// *** WARNING *** The fields are not thread safe. Take care of safe usage.
+struct cgroup {
+ uint32_t options;
+
+ int first_time_seen; // first time seen by the discoverer
+ int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option)
+
+ char available; // found in the filesystem
+ char enabled; // enabled in the config
+
+ bool function_ready; // true after the first iteration of chart creation/update
+
+ char pending_renames;
+
+ char *id;
+ uint32_t hash;
+
+ char *intermediate_id; // TODO: remove it when the renaming script is fixed
+
+ char *chart_id;
+ uint32_t hash_chart_id;
+
+ // 'cgroup_name' label value.
+ // by default this is the *id (path), later changed to the resolved name (cgroup-name.sh) or systemd service name.
+ char *name;
+
+ RRDLABELS *chart_labels;
+
+ int container_orchestrator;
+
+ struct cpuacct_stat cpuacct_stat;
+ struct cpuacct_usage cpuacct_usage;
+ struct cpuacct_cpu_throttling cpuacct_cpu_throttling;
+ struct cpuacct_cpu_shares cpuacct_cpu_shares;
+
+ struct memory memory;
+
+ struct blkio io_service_bytes; // bytes
+ struct blkio io_serviced; // operations
+
+ struct blkio throttle_io_service_bytes; // bytes
+ struct blkio throttle_io_serviced; // operations
+
+ struct blkio io_merged; // operations
+ struct blkio io_queued; // operations
+
+ struct pids pids;
+
+ struct cgroup_network_interface *interfaces;
+
+ struct pressure cpu_pressure;
+ struct pressure io_pressure;
+ struct pressure memory_pressure;
+ struct pressure irq_pressure;
+
+ // Cpu
+ RRDSET *st_cpu;
+ RRDDIM *st_cpu_rd_user;
+ RRDDIM *st_cpu_rd_system;
+
+ RRDSET *st_cpu_limit;
+ RRDSET *st_cpu_per_core;
+ RRDSET *st_cpu_nr_throttled;
+ RRDSET *st_cpu_throttled_time;
+ RRDSET *st_cpu_shares;
+
+ // Memory
+ RRDSET *st_mem;
+ RRDDIM *st_mem_rd_ram;
+ RRDDIM *st_mem_rd_swap;
+
+ RRDSET *st_mem_utilization;
+ RRDSET *st_writeback;
+ RRDSET *st_mem_activity;
+ RRDSET *st_pgfaults;
+ RRDSET *st_mem_usage;
+ RRDSET *st_mem_usage_limit;
+ RRDSET *st_mem_failcnt;
+
+ // Blkio
+ RRDSET *st_io;
+ RRDDIM *st_io_rd_read;
+ RRDDIM *st_io_rd_written;
+
+ RRDSET *st_serviced_ops;
+
+ RRDSET *st_throttle_io;
+ RRDDIM *st_throttle_io_rd_read;
+ RRDDIM *st_throttle_io_rd_written;
+
+ RRDSET *st_throttle_serviced_ops;
+
+ RRDSET *st_queued_ops;
+ RRDSET *st_merged_ops;
+
+ // Pids
+ RRDSET *st_pids;
+ RRDDIM *st_pids_rd_pids_current;
+
+ // per cgroup chart variables
+ char *filename_cpuset_cpus;
+ unsigned long long cpuset_cpus;
+
+ char *filename_cpu_cfs_period;
+ unsigned long long cpu_cfs_period;
+
+ char *filename_cpu_cfs_quota;
+ unsigned long long cpu_cfs_quota;
+
+ const RRDSETVAR_ACQUIRED *chart_var_cpu_limit;
+ NETDATA_DOUBLE prev_cpu_usage;
+
+ char *filename_memory_limit;
+ unsigned long long memory_limit;
+ const RRDSETVAR_ACQUIRED *chart_var_memory_limit;
+
+ char *filename_memoryswap_limit;
+ unsigned long long memoryswap_limit;
+ const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit;
+
+ const DICTIONARY_ITEM *cgroup_netdev_link;
+
+ struct cgroup *next;
+ struct cgroup *discovered_next;
+
+};
+
+struct discovery_thread {
+ uv_thread_t thread;
+ uv_mutex_t mutex;
+ uv_cond_t cond_var;
+ int exited;
+};
+
+extern struct discovery_thread discovery_thread;
+
+extern char *cgroups_rename_script;
+extern char cgroup_chart_id_prefix[];
+extern char services_chart_id_prefix[];
+extern uv_mutex_t cgroup_root_mutex;
+
+void cgroup_discovery_worker(void *ptr);
+
+extern int is_inside_k8s;
+extern long system_page_size;
+extern int cgroup_enable_cpuacct_stat;
+extern int cgroup_enable_cpuacct_usage;
+extern int cgroup_enable_cpuacct_cpu_throttling;
+extern int cgroup_enable_cpuacct_cpu_shares;
+extern int cgroup_enable_memory;
+extern int cgroup_enable_detailed_memory;
+extern int cgroup_enable_memory_failcnt;
+extern int cgroup_enable_swap;
+extern int cgroup_enable_blkio_io;
+extern int cgroup_enable_blkio_ops;
+extern int cgroup_enable_blkio_throttle_io;
+extern int cgroup_enable_blkio_throttle_ops;
+extern int cgroup_enable_blkio_merged_ops;
+extern int cgroup_enable_blkio_queued_ops;
+extern int cgroup_enable_pressure_cpu;
+extern int cgroup_enable_pressure_io_some;
+extern int cgroup_enable_pressure_io_full;
+extern int cgroup_enable_pressure_memory_some;
+extern int cgroup_enable_pressure_memory_full;
+extern int cgroup_enable_pressure_irq_some;
+extern int cgroup_enable_pressure_irq_full;
+extern int cgroup_enable_systemd_services;
+extern int cgroup_enable_systemd_services_detailed_memory;
+extern int cgroup_used_memory;
+extern int cgroup_use_unified_cgroups;
+extern int cgroup_unified_exist;
+extern int cgroup_search_in_devices;
+extern int cgroup_check_for_new_every;
+extern int cgroup_update_every;
+extern int cgroup_containers_chart_priority;
+extern int cgroup_recheck_zero_blkio_every_iterations;
+extern int cgroup_recheck_zero_mem_failcnt_every_iterations;
+extern int cgroup_recheck_zero_mem_detailed_every_iterations;
+extern char *cgroup_cpuacct_base;
+extern char *cgroup_cpuset_base;
+extern char *cgroup_blkio_base;
+extern char *cgroup_memory_base;
+extern char *cgroup_pids_base;
+extern char *cgroup_devices_base;
+extern char *cgroup_unified_base;
+extern int cgroup_root_count;
+extern int cgroup_root_max;
+extern int cgroup_max_depth;
+extern SIMPLE_PATTERN *enabled_cgroup_paths;
+extern SIMPLE_PATTERN *enabled_cgroup_names;
+extern SIMPLE_PATTERN *search_cgroup_paths;
+extern SIMPLE_PATTERN *enabled_cgroup_renames;
+extern SIMPLE_PATTERN *systemd_services_cgroups;
+extern SIMPLE_PATTERN *entrypoint_parent_process_comm;
+extern char *cgroups_network_interface_script;
+extern int cgroups_check;
+extern uint32_t Read_hash;
+extern uint32_t Write_hash;
+extern uint32_t user_hash;
+extern uint32_t system_hash;
+extern uint32_t user_usec_hash;
+extern uint32_t system_usec_hash;
+extern uint32_t nr_periods_hash;
+extern uint32_t nr_throttled_hash;
+extern uint32_t throttled_time_hash;
+extern uint32_t throttled_usec_hash;
+extern struct cgroup *cgroup_root;
+
+extern netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf;
+extern int shm_fd_cgroup_ebpf;
+extern sem_t *shm_mutex_cgroup_ebpf;
+
+enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 };
+
+enum cgroups_systemd_setting {
+ SYSTEMD_CGROUP_ERR,
+ SYSTEMD_CGROUP_LEGACY,
+ SYSTEMD_CGROUP_HYBRID,
+ SYSTEMD_CGROUP_UNIFIED
+};
+
+struct cgroups_systemd_config_setting {
+ char *name;
+ enum cgroups_systemd_setting setting;
+};
+
+extern struct cgroups_systemd_config_setting cgroups_systemd_options[];
+
+static inline int matches_enabled_cgroup_paths(char *id) {
+ return simple_pattern_matches(enabled_cgroup_paths, id);
+}
+
+static inline int matches_enabled_cgroup_names(char *name) {
+ return simple_pattern_matches(enabled_cgroup_names, name);
+}
+
+static inline int matches_enabled_cgroup_renames(char *id) {
+ return simple_pattern_matches(enabled_cgroup_renames, id);
+}
+
+static inline int matches_systemd_services_cgroups(char *id) {
+ return simple_pattern_matches(systemd_services_cgroups, id);
+}
+
+static inline int matches_search_cgroup_paths(const char *dir) {
+ return simple_pattern_matches(search_cgroup_paths, dir);
+}
+
+static inline int matches_entrypoint_parent_process_comm(const char *comm) {
+ return simple_pattern_matches(entrypoint_parent_process_comm, comm);
+}
+
+static inline int is_cgroup_systemd_service(struct cgroup *cg) {
+ return (int)(cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE);
+}
+
+static inline int k8s_is_kubepod(struct cgroup *cg) {
+ return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S;
+}
+
+static inline char *cgroup_chart_type(char *buffer, struct cgroup *cg) {
+ buffer[0] = '\0';
+
+ if (cg->chart_id[0] == '\0' || (cg->chart_id[0] == '/' && cg->chart_id[1] == '\0'))
+ strncpy(buffer, "cgroup_root", RRD_ID_LENGTH_MAX);
+ else if (is_cgroup_systemd_service(cg))
+ snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", services_chart_id_prefix, cg->chart_id);
+ else
+ snprintfz(buffer, RRD_ID_LENGTH_MAX, "%s%s", cgroup_chart_id_prefix, cg->chart_id);
+
+ return buffer;
+}
+
+#define RRDFUNCTIONS_CGTOP_HELP "View running containers"
+
+int cgroup_function_cgroup_top(BUFFER *wb, int timeout, const char *function, void *collector_data,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb, void *register_canceller_cb_data);
+int cgroup_function_systemd_top(BUFFER *wb, int timeout, const char *function, void *collector_data,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb, void *register_canceller_cb_data);
+
+void cgroup_netdev_link_init(void);
+const DICTIONARY_ITEM *cgroup_netdev_get(struct cgroup *cg);
+void cgroup_netdev_delete(struct cgroup *cg);
+
+void update_cpu_utilization_chart(struct cgroup *cg);
+void update_cpu_utilization_limit_chart(struct cgroup *cg, NETDATA_DOUBLE cpu_limit);
+void update_cpu_throttled_chart(struct cgroup *cg);
+void update_cpu_throttled_duration_chart(struct cgroup *cg);
+void update_cpu_shares_chart(struct cgroup *cg);
+void update_cpu_per_core_usage_chart(struct cgroup *cg);
+
+void update_mem_usage_limit_chart(struct cgroup *cg, unsigned long long memory_limit);
+void update_mem_utilization_chart(struct cgroup *cg, unsigned long long memory_limit);
+void update_mem_usage_detailed_chart(struct cgroup *cg);
+void update_mem_writeback_chart(struct cgroup *cg);
+void update_mem_activity_chart(struct cgroup *cg);
+void update_mem_pgfaults_chart(struct cgroup *cg);
+void update_mem_failcnt_chart(struct cgroup *cg);
+void update_mem_usage_chart(struct cgroup *cg);
+
+void update_io_serviced_bytes_chart(struct cgroup *cg);
+void update_io_serviced_ops_chart(struct cgroup *cg);
+void update_throttle_io_serviced_bytes_chart(struct cgroup *cg);
+void update_throttle_io_serviced_ops_chart(struct cgroup *cg);
+void update_io_queued_ops_chart(struct cgroup *cg);
+void update_io_merged_ops_chart(struct cgroup *cg);
+
+void update_pids_current_chart(struct cgroup *cg);
+
+void update_cpu_some_pressure_chart(struct cgroup *cg);
+void update_cpu_some_pressure_stall_time_chart(struct cgroup *cg);
+void update_cpu_full_pressure_chart(struct cgroup *cg);
+void update_cpu_full_pressure_stall_time_chart(struct cgroup *cg);
+
+void update_mem_some_pressure_chart(struct cgroup *cg);
+void update_mem_some_pressure_stall_time_chart(struct cgroup *cg);
+void update_mem_full_pressure_chart(struct cgroup *cg);
+void update_mem_full_pressure_stall_time_chart(struct cgroup *cg);
+
+void update_irq_some_pressure_chart(struct cgroup *cg);
+void update_irq_some_pressure_stall_time_chart(struct cgroup *cg);
+void update_irq_full_pressure_chart(struct cgroup *cg);
+void update_irq_full_pressure_stall_time_chart(struct cgroup *cg);
+
+void update_io_some_pressure_chart(struct cgroup *cg);
+void update_io_some_pressure_stall_time_chart(struct cgroup *cg);
+void update_io_full_pressure_chart(struct cgroup *cg);
+void update_io_full_pressure_stall_time_chart(struct cgroup *cg);
+
+#endif // NETDATA_CGROUP_INTERNALS_H
\ No newline at end of file
diff --git a/collectors/cgroups.plugin/cgroup-name.sh b/collectors/cgroups.plugin/cgroup-name.sh.in
similarity index 90%
rename from collectors/cgroups.plugin/cgroup-name.sh
rename to collectors/cgroups.plugin/cgroup-name.sh.in
index 6edd9d9f0444de..0f8b63256b5356 100755
--- a/collectors/cgroups.plugin/cgroup-name.sh
+++ b/collectors/cgroups.plugin/cgroup-name.sh.in
@@ -3,48 +3,115 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis
+# (C) 2023 Netdata Inc.
# SPDX-License-Identifier: GPL-3.0-or-later
#
# Script to find a better name for cgroups
#
-export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
+export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@"
export LC_ALL=C
+cmd_line="'${0}' $(printf "'%s' " "${@}")"
+
# -----------------------------------------------------------------------------
+# logging
PROGRAM_NAME="$(basename "${0}")"
-logdate() {
- date "+%Y-%m-%d %H:%M:%S"
+# these should be the same with syslog() priorities
+NDLP_EMERG=0 # system is unusable
+NDLP_ALERT=1 # action must be taken immediately
+NDLP_CRIT=2 # critical conditions
+NDLP_ERR=3 # error conditions
+NDLP_WARN=4 # warning conditions
+NDLP_NOTICE=5 # normal but significant condition
+NDLP_INFO=6 # informational
+NDLP_DEBUG=7 # debug-level messages
+
+# the max (numerically) log level we will log
+LOG_LEVEL=$NDLP_INFO
+
+set_log_min_priority() {
+ case "${NETDATA_LOG_LEVEL,,}" in
+ "emerg" | "emergency")
+ LOG_LEVEL=$NDLP_EMERG
+ ;;
+
+ "alert")
+ LOG_LEVEL=$NDLP_ALERT
+ ;;
+
+ "crit" | "critical")
+ LOG_LEVEL=$NDLP_CRIT
+ ;;
+
+ "err" | "error")
+ LOG_LEVEL=$NDLP_ERR
+ ;;
+
+ "warn" | "warning")
+ LOG_LEVEL=$NDLP_WARN
+ ;;
+
+ "notice")
+ LOG_LEVEL=$NDLP_NOTICE
+ ;;
+
+ "info")
+ LOG_LEVEL=$NDLP_INFO
+ ;;
+
+ "debug")
+ LOG_LEVEL=$NDLP_DEBUG
+ ;;
+ esac
}
-log() {
- local status="${1}"
- shift
+set_log_min_priority
- echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
+log() {
+ local level="${1}"
+ shift 1
+
+ [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
+
+ systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
+log() {
+ local level="${1}"
+ shift 1
+
+ [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
+
+ systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <&2 "BASH version 4 or later is required (this is ${BASH_VERSION})."
+ exit 1
+fi
# -----------------------------------------------------------------------------
# parse the arguments
@@ -81,7 +152,10 @@ do
case "${1}" in
--cgroup) cgroup="${2}"; shift 1;;
--pid|-p) pid="${2}"; shift 1;;
- --debug|debug) debug=1;;
+ --debug|debug)
+ debug=1
+ LOG_LEVEL=$NDLP_DEBUG
+ ;;
*) fatal "Cannot understand argument '${1}'";;
esac
diff --git a/collectors/cgroups.plugin/cgroup-network.c b/collectors/cgroups.plugin/cgroup-network.c
index a490df3945ae6d..79c78732a56fb6 100644
--- a/collectors/cgroups.plugin/cgroup-network.c
+++ b/collectors/cgroups.plugin/cgroup-network.c
@@ -10,10 +10,16 @@
#include
#endif
-char environment_variable2[FILENAME_MAX + 50] = "";
+char env_netdata_host_prefix[FILENAME_MAX + 50] = "";
+char env_netdata_log_method[FILENAME_MAX + 50] = "";
+char env_netdata_log_format[FILENAME_MAX + 50] = "";
+char env_netdata_log_level[FILENAME_MAX + 50] = "";
char *environment[] = {
"PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin",
- environment_variable2,
+ env_netdata_host_prefix,
+ env_netdata_log_method,
+ env_netdata_log_format,
+ env_netdata_log_level,
NULL
};
@@ -286,7 +292,8 @@ int switch_namespace(const char *prefix, pid_t pid) {
pid_t read_pid_from_cgroup_file(const char *filename) {
int fd = open(filename, procfile_open_flags);
if(fd == -1) {
- collector_error("Cannot open pid_from_cgroup() file '%s'.", filename);
+ if (errno != ENOENT)
+ collector_error("Cannot open pid_from_cgroup() file '%s'.", filename);
return 0;
}
@@ -646,12 +653,11 @@ void usage(void) {
}
int main(int argc, char **argv) {
- stderror = stderr;
pid_t pid = 0;
- program_name = argv[0];
program_version = VERSION;
- error_log_syslog = 0;
+ clocks_init();
+ nd_log_initialize_for_external_plugins("cgroup-network");
// since cgroup-network runs as root, prevent it from opening symbolic links
procfile_open_flags = O_RDONLY|O_NOFOLLOW;
@@ -669,7 +675,20 @@ int main(int argc, char **argv) {
// build a safe environment for our script
// the first environment variable is a fixed PATH=
- snprintfz(environment_variable2, sizeof(environment_variable2) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix);
+ snprintfz(env_netdata_host_prefix, sizeof(env_netdata_host_prefix) - 1, "NETDATA_HOST_PREFIX=%s", netdata_configured_host_prefix);
+
+ char *s;
+
+ s = getenv("NETDATA_LOG_METHOD");
+ snprintfz(env_netdata_log_method, sizeof(env_netdata_log_method) - 1, "NETDATA_LOG_METHOD=%s", nd_log_method_for_external_plugins(s));
+
+ s = getenv("NETDATA_LOG_FORMAT");
+ if (s)
+ snprintfz(env_netdata_log_format, sizeof(env_netdata_log_format) - 1, "NETDATA_LOG_FORMAT=%s", s);
+
+ s = getenv("NETDATA_LOG_LEVEL");
+ if (s)
+ snprintfz(env_netdata_log_level, sizeof(env_netdata_log_level) - 1, "NETDATA_LOG_LEVEL=%s", s);
// ------------------------------------------------------------------------
diff --git a/collectors/cgroups.plugin/cgroup-top.c b/collectors/cgroups.plugin/cgroup-top.c
new file mode 100644
index 00000000000000..0e64b908d83340
--- /dev/null
+++ b/collectors/cgroups.plugin/cgroup-top.c
@@ -0,0 +1,519 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "cgroup-internals.h"
+
+struct cgroup_netdev_link {
+ size_t read_slot;
+ NETDATA_DOUBLE received[2];
+ NETDATA_DOUBLE sent[2];
+};
+
+static DICTIONARY *cgroup_netdev_link_dict = NULL;
+
+void cgroup_netdev_link_init(void) {
+ cgroup_netdev_link_dict = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE|DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(struct cgroup_netdev_link));
+}
+
+const DICTIONARY_ITEM *cgroup_netdev_get(struct cgroup *cg) {
+ if(cg->cgroup_netdev_link)
+ return cg->cgroup_netdev_link;
+
+
+ struct cgroup_netdev_link t = {
+ .read_slot = 0,
+ .received = { NAN, NAN },
+ .sent = { NAN, NAN },
+ };
+
+ cg->cgroup_netdev_link = dictionary_set_and_acquire_item(cgroup_netdev_link_dict, cg->id, &t, sizeof(struct cgroup_netdev_link));
+ return dictionary_acquired_item_dup(cgroup_netdev_link_dict, cg->cgroup_netdev_link);
+}
+
+void cgroup_netdev_delete(struct cgroup *cg) {
+ if(cg->cgroup_netdev_link) {
+ dictionary_acquired_item_release(cgroup_netdev_link_dict, cg->cgroup_netdev_link);
+ dictionary_del(cgroup_netdev_link_dict, cg->id);
+ dictionary_garbage_collect(cgroup_netdev_link_dict);
+ }
+}
+
+void cgroup_netdev_release(const DICTIONARY_ITEM *link) {
+ if(link)
+ dictionary_acquired_item_release(cgroup_netdev_link_dict, link);
+}
+
+const void *cgroup_netdev_dup(const DICTIONARY_ITEM *link) {
+ return dictionary_acquired_item_dup(cgroup_netdev_link_dict, link);
+}
+
+void cgroup_netdev_reset_all(void) {
+ struct cgroup_netdev_link *t;
+ dfe_start_read(cgroup_netdev_link_dict, t) {
+ if(t->read_slot >= 1) {
+ t->read_slot = 0;
+ t->received[1] = NAN;
+ t->sent[1] = NAN;
+ }
+ else {
+ t->read_slot = 1;
+ t->received[0] = NAN;
+ t->sent[0] = NAN;
+ }
+ }
+ dfe_done(t);
+}
+
+void cgroup_netdev_add_bandwidth(const DICTIONARY_ITEM *link, NETDATA_DOUBLE received, NETDATA_DOUBLE sent) {
+ if(!link)
+ return;
+
+ struct cgroup_netdev_link *t = dictionary_acquired_item_value(link);
+
+ size_t slot = (t->read_slot) ? 0 : 1;
+
+ if(isnan(t->received[slot]))
+ t->received[slot] = received;
+ else
+ t->received[slot] += received;
+
+ if(isnan(t->sent[slot]))
+ t->sent[slot] = sent;
+ else
+ t->sent[slot] += sent;
+}
+
+void cgroup_netdev_get_bandwidth(struct cgroup *cg, NETDATA_DOUBLE *received, NETDATA_DOUBLE *sent) {
+ if(!cg->cgroup_netdev_link) {
+ *received = NAN;
+ *sent = NAN;
+ return;
+ }
+
+ struct cgroup_netdev_link *t = dictionary_acquired_item_value(cg->cgroup_netdev_link);
+
+ size_t slot = (t->read_slot) ? 1 : 0;
+
+ *received = t->received[slot];
+ *sent = t->sent[slot];
+}
+
+int cgroup_function_cgroup_top(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused,
+ void *collector_data __maybe_unused,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused,
+ void *register_canceller_cb_data __maybe_unused) {
+
+ buffer_flush(wb);
+ wb->content_type = CT_APPLICATION_JSON;
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
+
+ buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost));
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", 1);
+ buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP);
+ buffer_json_member_add_array(wb, "data");
+
+ double max_pids = 0.0;
+ double max_cpu = 0.0;
+ double max_ram = 0.0;
+ double max_disk_io_read = 0.0;
+ double max_disk_io_written = 0.0;
+ double max_net_received = 0.0;
+ double max_net_sent = 0.0;
+
+ RRDDIM *rd = NULL;
+
+ uv_mutex_lock(&cgroup_root_mutex);
+
+ for(struct cgroup *cg = cgroup_root; cg ; cg = cg->next) {
+ if(unlikely(!cg->enabled || cg->pending_renames || !cg->function_ready || is_cgroup_systemd_service(cg)))
+ continue;
+
+ buffer_json_add_array_item_array(wb);
+
+ buffer_json_add_array_item_string(wb, cg->name); // Name
+
+ if(k8s_is_kubepod(cg))
+ buffer_json_add_array_item_string(wb, "k8s"); // Kind
+ else
+ buffer_json_add_array_item_string(wb, "cgroup"); // Kind
+
+ double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0);
+
+ double cpu = NAN;
+ if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) {
+ cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value;
+ max_cpu = MAX(max_cpu, cpu);
+ }
+
+ double ram = rrddim_get_last_stored_value(cg->st_mem_rd_ram, &max_ram, 1.0);
+
+ rd = cg->st_throttle_io_rd_read ? cg->st_throttle_io_rd_read : cg->st_io_rd_read;
+ double disk_io_read = rrddim_get_last_stored_value(rd, &max_disk_io_read, 1024.0);
+ rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written;
+ double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0);
+
+ NETDATA_DOUBLE received, sent;
+ cgroup_netdev_get_bandwidth(cg, &received, &sent);
+ if (!isnan(received) && !isnan(sent)) {
+ received /= 1000.0;
+ sent /= 1000.0;
+ max_net_received = MAX(max_net_received, received);
+ max_net_sent = MAX(max_net_sent, sent);
+ }
+
+ buffer_json_add_array_item_double(wb, pids_current);
+ buffer_json_add_array_item_double(wb, cpu);
+ buffer_json_add_array_item_double(wb, ram);
+ buffer_json_add_array_item_double(wb, disk_io_read);
+ buffer_json_add_array_item_double(wb, disk_io_written);
+ buffer_json_add_array_item_double(wb, received);
+ buffer_json_add_array_item_double(wb, sent);
+
+ buffer_json_array_close(wb);
+ }
+
+ uv_mutex_unlock(&cgroup_root_mutex);
+
+ buffer_json_array_close(wb); // data
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ // Node
+ buffer_rrdf_table_add_field(wb, field_id++, "Name", "CGROUP Name",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+
+ // Kind
+ buffer_rrdf_table_add_field(wb, field_id++, "Kind", "CGROUP Kind",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // PIDs
+ buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // CPU
+ buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "%", max_cpu, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // RAM
+ buffer_rrdf_table_add_field(wb, field_id++, "RAM", "RAM Usage",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_ram, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // Disk IO Reads
+ buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Disk Read Data",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_disk_io_read, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // Disk IO Writes
+ buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Disk Written Data",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_disk_io_written, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // Network Received
+ buffer_rrdf_table_add_field(wb, field_id++, "Received", "Network Traffic Received",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Mbps", max_net_received, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // Network Sent
+ buffer_rrdf_table_add_field(wb, field_id++, "Sent", "Network Traffic Sent ",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Mbps", max_net_sent, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ }
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "CPU");
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "CPU");
+ {
+ buffer_json_member_add_string(wb, "name", "CPU");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "CPU");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Memory");
+ {
+ buffer_json_member_add_string(wb, "name", "Memory");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "RAM");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Traffic");
+ {
+ buffer_json_member_add_string(wb, "name", "Traffic");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Received");
+ buffer_json_add_array_item_string(wb, "Sent");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "CPU");
+ buffer_json_add_array_item_string(wb, "Name");
+ buffer_json_array_close(wb);
+
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Memory");
+ buffer_json_add_array_item_string(wb, "Name");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_object(wb, "group_by");
+ {
+ buffer_json_member_add_object(wb, "Kind");
+ {
+ buffer_json_member_add_string(wb, "name", "Kind");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Kind");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // group_by
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ int response = HTTP_RESP_OK;
+ if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) {
+ buffer_flush(wb);
+ response = HTTP_RESP_CLIENT_CLOSED_REQUEST;
+ }
+
+ if(result_cb)
+ result_cb(wb, response, result_cb_data);
+
+ return response;
+}
+
+int cgroup_function_systemd_top(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused,
+ void *collector_data __maybe_unused,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused,
+ void *register_canceller_cb_data __maybe_unused) {
+
+ buffer_flush(wb);
+ wb->content_type = CT_APPLICATION_JSON;
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
+
+ buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost));
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", 1);
+ buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_CGTOP_HELP);
+ buffer_json_member_add_array(wb, "data");
+
+ double max_pids = 0.0;
+ double max_cpu = 0.0;
+ double max_ram = 0.0;
+ double max_disk_io_read = 0.0;
+ double max_disk_io_written = 0.0;
+
+ RRDDIM *rd = NULL;
+
+ uv_mutex_lock(&cgroup_root_mutex);
+
+ for(struct cgroup *cg = cgroup_root; cg ; cg = cg->next) {
+ if(unlikely(!cg->enabled || cg->pending_renames || !cg->function_ready || !is_cgroup_systemd_service(cg)))
+ continue;
+
+ buffer_json_add_array_item_array(wb);
+
+ buffer_json_add_array_item_string(wb, cg->name);
+
+ double pids_current = rrddim_get_last_stored_value(cg->st_pids_rd_pids_current, &max_pids, 1.0);
+
+ double cpu = NAN;
+ if (cg->st_cpu_rd_user && cg->st_cpu_rd_system) {
+ cpu = cg->st_cpu_rd_user->collector.last_stored_value + cg->st_cpu_rd_system->collector.last_stored_value;
+ max_cpu = MAX(max_cpu, cpu);
+ }
+
+ double ram = rrddim_get_last_stored_value(cg->st_mem_rd_ram, &max_ram, 1.0);
+
+ rd = cg->st_throttle_io_rd_read ? cg->st_throttle_io_rd_read : cg->st_io_rd_read;
+ double disk_io_read = rrddim_get_last_stored_value(rd, &max_disk_io_read, 1024.0);
+ rd = cg->st_throttle_io_rd_written ? cg->st_throttle_io_rd_written : cg->st_io_rd_written;
+ double disk_io_written = rrddim_get_last_stored_value(rd, &max_disk_io_written, 1024.0);
+
+ buffer_json_add_array_item_double(wb, pids_current);
+ buffer_json_add_array_item_double(wb, cpu);
+ buffer_json_add_array_item_double(wb, ram);
+ buffer_json_add_array_item_double(wb, disk_io_read);
+ buffer_json_add_array_item_double(wb, disk_io_written);
+
+ buffer_json_array_close(wb);
+ }
+
+ uv_mutex_unlock(&cgroup_root_mutex);
+
+ buffer_json_array_close(wb); // data
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ // Node
+ buffer_rrdf_table_add_field(wb, field_id++, "Name", "Systemd Service Name",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+
+ // PIDs
+ buffer_rrdf_table_add_field(wb, field_id++, "PIDs", "Number of Processes Currently in the CGROUP",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 0, "pids", max_pids, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // CPU
+ buffer_rrdf_table_add_field(wb, field_id++, "CPU", "CPU Usage",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "%", max_cpu, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // RAM
+ buffer_rrdf_table_add_field(wb, field_id++, "RAM", "RAM Usage",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_ram, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // Disk IO Reads
+ buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Disk Read Data",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_disk_io_read, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ // Disk IO Writes
+ buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Disk Written Data",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_disk_io_written, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ }
+
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "CPU");
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "CPU");
+ {
+ buffer_json_member_add_string(wb, "name", "CPU");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "CPU");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Memory");
+ {
+ buffer_json_member_add_string(wb, "name", "Memory");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "RAM");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "CPU");
+ buffer_json_add_array_item_string(wb, "Name");
+ buffer_json_array_close(wb);
+
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Memory");
+ buffer_json_add_array_item_string(wb, "Name");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ int response = HTTP_RESP_OK;
+ if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) {
+ buffer_flush(wb);
+ response = HTTP_RESP_CLIENT_CLOSED_REQUEST;
+ }
+
+ if(result_cb)
+ result_cb(wb, response, result_cb_data);
+
+ return response;
+}
diff --git a/collectors/cgroups.plugin/integrations/containers.md b/collectors/cgroups.plugin/integrations/containers.md
new file mode 100644
index 00000000000000..6273d1e918e560
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/containers.md
@@ -0,0 +1,169 @@
+
+
+# Containers
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor Containers for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cpu_limit | used | percentage |
+| cgroup.cpu | user, system | percentage |
+| cgroup.cpu_per_core | a dimension per core | percentage |
+| cgroup.throttled | throttled | percentage |
+| cgroup.throttled_duration | duration | ms |
+| cgroup.cpu_shares | shares | shares |
+| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| cgroup.writeback | dirty, writeback | MiB |
+| cgroup.mem_activity | in, out | MiB/s |
+| cgroup.pgfaults | pgfault, swap | MiB/s |
+| cgroup.mem_usage | ram, swap | MiB |
+| cgroup.mem_usage_limit | available, used | MiB |
+| cgroup.mem_utilization | utilization | percentage |
+| cgroup.mem_failcnt | failures | count |
+| cgroup.io | read, write | KiB/s |
+| cgroup.serviced_ops | read, write | operations/s |
+| cgroup.throttle_io | read, write | KiB/s |
+| cgroup.throttle_serviced_ops | read, write | operations/s |
+| cgroup.queued_ops | read, write | operations |
+| cgroup.merged_ops | read, write | operations/s |
+| cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_some_pressure_stall_time | time | ms |
+| cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_full_pressure_stall_time | time | ms |
+| cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_some_pressure_stall_time | time | ms |
+| cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_full_pressure_stall_time | time | ms |
+| cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| cgroup.io_some_pressure_stall_time | time | ms |
+| cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| cgroup.io_full_pressure_stall_time | time | ms |
+| cgroup.pids_current | pids | pids |
+
+### Per cgroup network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_net | received, sent | kilobits/s |
+| cgroup.net_packets | received, sent, multicast | pps |
+| cgroup.net_errors | inbound, outbound | errors/s |
+| cgroup.net_drops | inbound, outbound | errors/s |
+| cgroup.net_fifo | receive, transmit | errors/s |
+| cgroup.net_compressed | receive, sent | pps |
+| cgroup.net_events | frames, collisions, carrier | events/s |
+| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| cgroup.net_carrier | up, down | state |
+| cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization |
+| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/kubernetes_containers.md b/collectors/cgroups.plugin/integrations/kubernetes_containers.md
new file mode 100644
index 00000000000000..9be32a12a1493c
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/kubernetes_containers.md
@@ -0,0 +1,183 @@
+
+
+# Kubernetes Containers
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor Containers for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per k8s cgroup
+
+These metrics refer to the Pod container.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| k8s_node_name | Node name. The value of _pod.spec.nodeName_. |
+| k8s_namespace | Namespace name. The value of _pod.metadata.namespace_. |
+| k8s_controller_kind | Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_. |
+| k8s_controller_name | Controller name.The value of _pod.OwnerReferences.Controller.Name_. |
+| k8s_pod_name | Pod name. The value of _pod.metadata.name_. |
+| k8s_container_name | Container name. The value of _pod.spec.containers.name_. |
+| k8s_kind | Instance kind: "pod" or "container". |
+| k8s_qos_class | QoS class (guaranteed, burstable, besteffort). |
+| k8s_cluster_id | Cluster ID. The value of kube-system namespace _namespace.metadata.uid_. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| k8s.cgroup.cpu_limit | used | percentage |
+| k8s.cgroup.cpu | user, system | percentage |
+| k8s.cgroup.cpu_per_core | a dimension per core | percentage |
+| k8s.cgroup.throttled | throttled | percentage |
+| k8s.cgroup.throttled_duration | duration | ms |
+| k8s.cgroup.cpu_shares | shares | shares |
+| k8s.cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| k8s.cgroup.writeback | dirty, writeback | MiB |
+| k8s.cgroup.mem_activity | in, out | MiB/s |
+| k8s.cgroup.pgfaults | pgfault, swap | MiB/s |
+| k8s.cgroup.mem_usage | ram, swap | MiB |
+| k8s.cgroup.mem_usage_limit | available, used | MiB |
+| k8s.cgroup.mem_utilization | utilization | percentage |
+| k8s.cgroup.mem_failcnt | failures | count |
+| k8s.cgroup.io | read, write | KiB/s |
+| k8s.cgroup.serviced_ops | read, write | operations/s |
+| k8s.cgroup.throttle_io | read, write | KiB/s |
+| k8s.cgroup.throttle_serviced_ops | read, write | operations/s |
+| k8s.cgroup.queued_ops | read, write | operations |
+| k8s.cgroup.merged_ops | read, write | operations/s |
+| k8s.cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| k8s.cgroup.cpu_some_pressure_stall_time | time | ms |
+| k8s.cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| k8s.cgroup.cpu_full_pressure_stall_time | time | ms |
+| k8s.cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| k8s.cgroup.memory_some_pressure_stall_time | time | ms |
+| k8s.cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| k8s.cgroup.memory_full_pressure_stall_time | time | ms |
+| k8s.cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| k8s.cgroup.io_some_pressure_stall_time | time | ms |
+| k8s.cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| k8s.cgroup.io_full_pressure_stall_time | time | ms |
+| k8s.cgroup.pids_current | pids | pids |
+
+### Per k8s cgroup network device
+
+These metrics refer to the Pod container network interface.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+| k8s_node_name | Node name. The value of _pod.spec.nodeName_. |
+| k8s_namespace | Namespace name. The value of _pod.metadata.namespace_. |
+| k8s_controller_kind | Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_. |
+| k8s_controller_name | Controller name.The value of _pod.OwnerReferences.Controller.Name_. |
+| k8s_pod_name | Pod name. The value of _pod.metadata.name_. |
+| k8s_container_name | Container name. The value of _pod.spec.containers.name_. |
+| k8s_kind | Instance kind: "pod" or "container". |
+| k8s_qos_class | QoS class (guaranteed, burstable, besteffort). |
+| k8s_cluster_id | Cluster ID. The value of kube-system namespace _namespace.metadata.uid_. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| k8s.cgroup.net_net | received, sent | kilobits/s |
+| k8s.cgroup.net_packets | received, sent, multicast | pps |
+| k8s.cgroup.net_errors | inbound, outbound | errors/s |
+| k8s.cgroup.net_drops | inbound, outbound | errors/s |
+| k8s.cgroup.net_fifo | receive, transmit | errors/s |
+| k8s.cgroup.net_compressed | receive, sent | pps |
+| k8s.cgroup.net_events | frames, collisions, carrier | events/s |
+| k8s.cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| k8s.cgroup.net_carrier | up, down | state |
+| k8s.cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ k8s_cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ k8s_cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.mem_usage | cgroup memory utilization |
+| [ k8s_cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ k8s_cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | k8s.cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/libvirt_containers.md b/collectors/cgroups.plugin/integrations/libvirt_containers.md
new file mode 100644
index 00000000000000..fed4546984623c
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/libvirt_containers.md
@@ -0,0 +1,169 @@
+
+
+# Libvirt Containers
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor Libvirt for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cpu_limit | used | percentage |
+| cgroup.cpu | user, system | percentage |
+| cgroup.cpu_per_core | a dimension per core | percentage |
+| cgroup.throttled | throttled | percentage |
+| cgroup.throttled_duration | duration | ms |
+| cgroup.cpu_shares | shares | shares |
+| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| cgroup.writeback | dirty, writeback | MiB |
+| cgroup.mem_activity | in, out | MiB/s |
+| cgroup.pgfaults | pgfault, swap | MiB/s |
+| cgroup.mem_usage | ram, swap | MiB |
+| cgroup.mem_usage_limit | available, used | MiB |
+| cgroup.mem_utilization | utilization | percentage |
+| cgroup.mem_failcnt | failures | count |
+| cgroup.io | read, write | KiB/s |
+| cgroup.serviced_ops | read, write | operations/s |
+| cgroup.throttle_io | read, write | KiB/s |
+| cgroup.throttle_serviced_ops | read, write | operations/s |
+| cgroup.queued_ops | read, write | operations |
+| cgroup.merged_ops | read, write | operations/s |
+| cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_some_pressure_stall_time | time | ms |
+| cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_full_pressure_stall_time | time | ms |
+| cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_some_pressure_stall_time | time | ms |
+| cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_full_pressure_stall_time | time | ms |
+| cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| cgroup.io_some_pressure_stall_time | time | ms |
+| cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| cgroup.io_full_pressure_stall_time | time | ms |
+| cgroup.pids_current | pids | pids |
+
+### Per cgroup network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_net | received, sent | kilobits/s |
+| cgroup.net_packets | received, sent, multicast | pps |
+| cgroup.net_errors | inbound, outbound | errors/s |
+| cgroup.net_drops | inbound, outbound | errors/s |
+| cgroup.net_fifo | receive, transmit | errors/s |
+| cgroup.net_compressed | receive, sent | pps |
+| cgroup.net_events | frames, collisions, carrier | events/s |
+| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| cgroup.net_carrier | up, down | state |
+| cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization |
+| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/lxc_containers.md b/collectors/cgroups.plugin/integrations/lxc_containers.md
new file mode 100644
index 00000000000000..3f05ffd5fe204b
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/lxc_containers.md
@@ -0,0 +1,169 @@
+
+
+# LXC Containers
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor LXC Containers for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cpu_limit | used | percentage |
+| cgroup.cpu | user, system | percentage |
+| cgroup.cpu_per_core | a dimension per core | percentage |
+| cgroup.throttled | throttled | percentage |
+| cgroup.throttled_duration | duration | ms |
+| cgroup.cpu_shares | shares | shares |
+| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| cgroup.writeback | dirty, writeback | MiB |
+| cgroup.mem_activity | in, out | MiB/s |
+| cgroup.pgfaults | pgfault, swap | MiB/s |
+| cgroup.mem_usage | ram, swap | MiB |
+| cgroup.mem_usage_limit | available, used | MiB |
+| cgroup.mem_utilization | utilization | percentage |
+| cgroup.mem_failcnt | failures | count |
+| cgroup.io | read, write | KiB/s |
+| cgroup.serviced_ops | read, write | operations/s |
+| cgroup.throttle_io | read, write | KiB/s |
+| cgroup.throttle_serviced_ops | read, write | operations/s |
+| cgroup.queued_ops | read, write | operations |
+| cgroup.merged_ops | read, write | operations/s |
+| cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_some_pressure_stall_time | time | ms |
+| cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_full_pressure_stall_time | time | ms |
+| cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_some_pressure_stall_time | time | ms |
+| cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_full_pressure_stall_time | time | ms |
+| cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| cgroup.io_some_pressure_stall_time | time | ms |
+| cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| cgroup.io_full_pressure_stall_time | time | ms |
+| cgroup.pids_current | pids | pids |
+
+### Per cgroup network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_net | received, sent | kilobits/s |
+| cgroup.net_packets | received, sent, multicast | pps |
+| cgroup.net_errors | inbound, outbound | errors/s |
+| cgroup.net_drops | inbound, outbound | errors/s |
+| cgroup.net_fifo | receive, transmit | errors/s |
+| cgroup.net_compressed | receive, sent | pps |
+| cgroup.net_events | frames, collisions, carrier | events/s |
+| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| cgroup.net_carrier | up, down | state |
+| cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization |
+| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/ovirt_containers.md b/collectors/cgroups.plugin/integrations/ovirt_containers.md
new file mode 100644
index 00000000000000..5771aeea1b797d
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/ovirt_containers.md
@@ -0,0 +1,169 @@
+
+
+# oVirt Containers
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor oVirt for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cpu_limit | used | percentage |
+| cgroup.cpu | user, system | percentage |
+| cgroup.cpu_per_core | a dimension per core | percentage |
+| cgroup.throttled | throttled | percentage |
+| cgroup.throttled_duration | duration | ms |
+| cgroup.cpu_shares | shares | shares |
+| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| cgroup.writeback | dirty, writeback | MiB |
+| cgroup.mem_activity | in, out | MiB/s |
+| cgroup.pgfaults | pgfault, swap | MiB/s |
+| cgroup.mem_usage | ram, swap | MiB |
+| cgroup.mem_usage_limit | available, used | MiB |
+| cgroup.mem_utilization | utilization | percentage |
+| cgroup.mem_failcnt | failures | count |
+| cgroup.io | read, write | KiB/s |
+| cgroup.serviced_ops | read, write | operations/s |
+| cgroup.throttle_io | read, write | KiB/s |
+| cgroup.throttle_serviced_ops | read, write | operations/s |
+| cgroup.queued_ops | read, write | operations |
+| cgroup.merged_ops | read, write | operations/s |
+| cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_some_pressure_stall_time | time | ms |
+| cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_full_pressure_stall_time | time | ms |
+| cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_some_pressure_stall_time | time | ms |
+| cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_full_pressure_stall_time | time | ms |
+| cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| cgroup.io_some_pressure_stall_time | time | ms |
+| cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| cgroup.io_full_pressure_stall_time | time | ms |
+| cgroup.pids_current | pids | pids |
+
+### Per cgroup network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_net | received, sent | kilobits/s |
+| cgroup.net_packets | received, sent, multicast | pps |
+| cgroup.net_errors | inbound, outbound | errors/s |
+| cgroup.net_drops | inbound, outbound | errors/s |
+| cgroup.net_fifo | receive, transmit | errors/s |
+| cgroup.net_compressed | receive, sent | pps |
+| cgroup.net_events | frames, collisions, carrier | events/s |
+| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| cgroup.net_carrier | up, down | state |
+| cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization |
+| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/proxmox_containers.md b/collectors/cgroups.plugin/integrations/proxmox_containers.md
new file mode 100644
index 00000000000000..1804a40ca0a7af
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/proxmox_containers.md
@@ -0,0 +1,169 @@
+
+
+# Proxmox Containers
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor Proxmox for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cpu_limit | used | percentage |
+| cgroup.cpu | user, system | percentage |
+| cgroup.cpu_per_core | a dimension per core | percentage |
+| cgroup.throttled | throttled | percentage |
+| cgroup.throttled_duration | duration | ms |
+| cgroup.cpu_shares | shares | shares |
+| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| cgroup.writeback | dirty, writeback | MiB |
+| cgroup.mem_activity | in, out | MiB/s |
+| cgroup.pgfaults | pgfault, swap | MiB/s |
+| cgroup.mem_usage | ram, swap | MiB |
+| cgroup.mem_usage_limit | available, used | MiB |
+| cgroup.mem_utilization | utilization | percentage |
+| cgroup.mem_failcnt | failures | count |
+| cgroup.io | read, write | KiB/s |
+| cgroup.serviced_ops | read, write | operations/s |
+| cgroup.throttle_io | read, write | KiB/s |
+| cgroup.throttle_serviced_ops | read, write | operations/s |
+| cgroup.queued_ops | read, write | operations |
+| cgroup.merged_ops | read, write | operations/s |
+| cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_some_pressure_stall_time | time | ms |
+| cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_full_pressure_stall_time | time | ms |
+| cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_some_pressure_stall_time | time | ms |
+| cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_full_pressure_stall_time | time | ms |
+| cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| cgroup.io_some_pressure_stall_time | time | ms |
+| cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| cgroup.io_full_pressure_stall_time | time | ms |
+| cgroup.pids_current | pids | pids |
+
+### Per cgroup network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_net | received, sent | kilobits/s |
+| cgroup.net_packets | received, sent, multicast | pps |
+| cgroup.net_errors | inbound, outbound | errors/s |
+| cgroup.net_drops | inbound, outbound | errors/s |
+| cgroup.net_fifo | receive, transmit | errors/s |
+| cgroup.net_compressed | receive, sent | pps |
+| cgroup.net_events | frames, collisions, carrier | events/s |
+| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| cgroup.net_carrier | up, down | state |
+| cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization |
+| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/systemd_services.md b/collectors/cgroups.plugin/integrations/systemd_services.md
new file mode 100644
index 00000000000000..0ce9063669b998
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/systemd_services.md
@@ -0,0 +1,112 @@
+
+
+# Systemd Services
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor Containers for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per systemd service
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| service_name | Service name |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| systemd.service.cpu.utilization | user, system | percentage |
+| systemd.service.memory.usage | ram, swap | MiB |
+| systemd.service.memory.failcnt | fail | failures/s |
+| systemd.service.memory.ram.usage | rss, cache, mapped_file, rss_huge | MiB |
+| systemd.service.memory.writeback | writeback, dirty | MiB |
+| systemd.service.memory.paging.faults | minor, major | MiB/s |
+| systemd.service.memory.paging.io | in, out | MiB/s |
+| systemd.service.disk.io | read, write | KiB/s |
+| systemd.service.disk.iops | read, write | operations/s |
+| systemd.service.disk.throttle.io | read, write | KiB/s |
+| systemd.service.disk.throttle.iops | read, write | operations/s |
+| systemd.service.disk.queued_iops | read, write | operations/s |
+| systemd.service.disk.merged_iops | read, write | operations/s |
+| systemd.service.pids.current | pids | pids |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/integrations/virtual_machines.md b/collectors/cgroups.plugin/integrations/virtual_machines.md
new file mode 100644
index 00000000000000..6a64923c47d8b1
--- /dev/null
+++ b/collectors/cgroups.plugin/integrations/virtual_machines.md
@@ -0,0 +1,169 @@
+
+
+# Virtual Machines
+
+
+
+
+
+Plugin: cgroups.plugin
+Module: /sys/fs/cgroup
+
+
+
+## Overview
+
+Monitor Virtual Machines for performance, resource usage, and health status.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cpu_limit | used | percentage |
+| cgroup.cpu | user, system | percentage |
+| cgroup.cpu_per_core | a dimension per core | percentage |
+| cgroup.throttled | throttled | percentage |
+| cgroup.throttled_duration | duration | ms |
+| cgroup.cpu_shares | shares | shares |
+| cgroup.mem | cache, rss, swap, rss_huge, mapped_file | MiB |
+| cgroup.writeback | dirty, writeback | MiB |
+| cgroup.mem_activity | in, out | MiB/s |
+| cgroup.pgfaults | pgfault, swap | MiB/s |
+| cgroup.mem_usage | ram, swap | MiB |
+| cgroup.mem_usage_limit | available, used | MiB |
+| cgroup.mem_utilization | utilization | percentage |
+| cgroup.mem_failcnt | failures | count |
+| cgroup.io | read, write | KiB/s |
+| cgroup.serviced_ops | read, write | operations/s |
+| cgroup.throttle_io | read, write | KiB/s |
+| cgroup.throttle_serviced_ops | read, write | operations/s |
+| cgroup.queued_ops | read, write | operations |
+| cgroup.merged_ops | read, write | operations/s |
+| cgroup.cpu_some_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_some_pressure_stall_time | time | ms |
+| cgroup.cpu_full_pressure | some10, some60, some300 | percentage |
+| cgroup.cpu_full_pressure_stall_time | time | ms |
+| cgroup.memory_some_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_some_pressure_stall_time | time | ms |
+| cgroup.memory_full_pressure | some10, some60, some300 | percentage |
+| cgroup.memory_full_pressure_stall_time | time | ms |
+| cgroup.io_some_pressure | some10, some60, some300 | percentage |
+| cgroup.io_some_pressure_stall_time | time | ms |
+| cgroup.io_full_pressure | some10, some60, some300 | percentage |
+| cgroup.io_full_pressure_stall_time | time | ms |
+| cgroup.pids_current | pids | pids |
+
+### Per cgroup network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| container_name | The container name or group path if name resolution fails. |
+| image | Docker/Podman container image name. |
+| device | The name of the host network interface linked to the container's network interface. |
+| container_device | Container network interface name. |
+| interface_type | Network interface type. Always "virtual" for the containers. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_net | received, sent | kilobits/s |
+| cgroup.net_packets | received, sent, multicast | pps |
+| cgroup.net_errors | inbound, outbound | errors/s |
+| cgroup.net_drops | inbound, outbound | errors/s |
+| cgroup.net_fifo | receive, transmit | errors/s |
+| cgroup.net_compressed | receive, sent | pps |
+| cgroup.net_events | frames, collisions, carrier | events/s |
+| cgroup.net_operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| cgroup.net_carrier | up, down | state |
+| cgroup.net_mtu | mtu | octets |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ cgroup_10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.cpu_limit | average cgroup CPU utilization over the last 10 minutes |
+| [ cgroup_ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.mem_usage | cgroup memory utilization |
+| [ cgroup_1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ cgroup_10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/cgroups.conf) | cgroup.net_packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cgroups.plugin/metadata.yaml b/collectors/cgroups.plugin/metadata.yaml
index b342d30a3bf926..a1abbb5a94fe15 100644
--- a/collectors/cgroups.plugin/metadata.yaml
+++ b/collectors/cgroups.plugin/metadata.yaml
@@ -86,9 +86,9 @@ modules:
description: ""
labels:
- name: container_name
- description: TBD
+ description: The container name or group path if name resolution fails.
- name: image
- description: TBD
+ description: Docker/Podman container image name.
metrics:
- name: cgroup.cpu_limit
description: CPU Usage within the limits
@@ -310,17 +310,25 @@ modules:
chart_type: line
dimensions:
- name: time
+ - name: cgroup.pids_current
+ description: Number of processes
+ unit: "pids"
+ chart_type: line
+ dimensions:
+ - name: pids
- name: cgroup network device
description: ""
labels:
- name: container_name
- description: TBD
+ description: The container name or group path if name resolution fails.
- name: image
- description: TBD
+ description: Docker/Podman container image name.
- name: device
- description: TBD
+ description: "The name of the host network interface linked to the container's network interface."
+ - name: container_device
+ description: Container network interface name.
- name: interface_type
- description: TBD
+ description: 'Network interface type. Always "virtual" for the containers.'
metrics:
- name: cgroup.net_net
description: Bandwidth
@@ -406,7 +414,7 @@ modules:
link: https://kubernetes.io/
icon_filename: kubernetes.svg
categories:
- - data-collection.containers-and-vms
+ #- data-collection.containers-and-vms
- data-collection.kubernetes
keywords:
- k8s
@@ -445,30 +453,26 @@ modules:
availability: []
scopes:
- name: k8s cgroup
- description: ""
+ description: These metrics refer to the Pod container.
labels:
+ - name: k8s_node_name
+ description: 'Node name. The value of _pod.spec.nodeName_.'
- name: k8s_namespace
- description: TBD
- - name: k8s_pod_name
- description: TBD
- - name: k8s_pod_uid
- description: TBD
+ description: 'Namespace name. The value of _pod.metadata.namespace_.'
- name: k8s_controller_kind
- description: TBD
+ description: 'Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_.'
- name: k8s_controller_name
- description: TBD
- - name: k8s_node_name
- description: TBD
+ description: 'Controller name.The value of _pod.OwnerReferences.Controller.Name_.'
+ - name: k8s_pod_name
+ description: 'Pod name. The value of _pod.metadata.name_.'
- name: k8s_container_name
- description: TBD
- - name: k8s_container_id
- description: TBD
+ description: 'Container name. The value of _pod.spec.containers.name_.'
- name: k8s_kind
- description: TBD
+ description: 'Instance kind: "pod" or "container".'
- name: k8s_qos_class
- description: TBD
+ description: 'QoS class (guaranteed, burstable, besteffort).'
- name: k8s_cluster_id
- description: TBD
+ description: 'Cluster ID. The value of kube-system namespace _namespace.metadata.uid_.'
metrics:
- name: k8s.cgroup.cpu_limit
description: CPU Usage within the limits
@@ -690,35 +694,39 @@ modules:
chart_type: line
dimensions:
- name: time
+ - name: k8s.cgroup.pids_current
+ description: Number of processes
+ unit: "pids"
+ chart_type: line
+ dimensions:
+ - name: pids
- name: k8s cgroup network device
- description: ""
+ description: These metrics refer to the Pod container network interface.
labels:
- name: device
- description: TBD
+ description: "The name of the host network interface linked to the container's network interface."
+ - name: container_device
+ description: Container network interface name.
- name: interface_type
- description: TBD
+ description: 'Network interface type. Always "virtual" for the containers.'
+ - name: k8s_node_name
+ description: 'Node name. The value of _pod.spec.nodeName_.'
- name: k8s_namespace
- description: TBD
- - name: k8s_pod_name
- description: TBD
- - name: k8s_pod_uid
- description: TBD
+ description: 'Namespace name. The value of _pod.metadata.namespace_.'
- name: k8s_controller_kind
- description: TBD
+ description: 'Controller kind (ReplicaSet, DaemonSet, StatefulSet, Job, etc.). The value of _pod.OwnerReferences.Controller.Kind_.'
- name: k8s_controller_name
- description: TBD
- - name: k8s_node_name
- description: TBD
+ description: 'Controller name.The value of _pod.OwnerReferences.Controller.Name_.'
+ - name: k8s_pod_name
+ description: 'Pod name. The value of _pod.metadata.name_.'
- name: k8s_container_name
- description: TBD
- - name: k8s_container_id
- description: TBD
+ description: 'Container name. The value of _pod.spec.containers.name_.'
- name: k8s_kind
- description: TBD
+ description: 'Instance kind: "pod" or "container".'
- name: k8s_qos_class
- description: TBD
+ description: 'QoS class (guaranteed, burstable, besteffort).'
- name: k8s_cluster_id
- description: TBD
+ description: 'Cluster ID. The value of kube-system namespace _namespace.metadata.uid_.'
metrics:
- name: k8s.cgroup.net_net
description: Bandwidth
@@ -821,154 +829,110 @@ modules:
description: ""
availability: []
scopes:
- - name: global
+ - name: systemd service
description: ""
- labels: []
+ labels:
+ - name: service_name
+ description: Service name
metrics:
- - name: services.cpu
+ - name: systemd.service.cpu.utilization
description: Systemd Services CPU utilization (100% = 1 core)
- unit: "percentage"
+ unit: percentage
chart_type: stacked
dimensions:
- - name: a dimension per systemd service
- - name: services.mem_usage
+ - name: user
+ - name: system
+ - name: systemd.service.memory.usage
description: Systemd Services Used Memory
- unit: "MiB"
+ unit: MiB
chart_type: stacked
dimensions:
- - name: a dimension per systemd service
- - name: services.mem_rss
- description: Systemd Services RSS Memory
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_mapped
- description: Systemd Services Mapped Memory
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_cache
- description: Systemd Services Cache Memory
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_writeback
- description: Systemd Services Writeback Memory
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_pgfault
- description: Systemd Services Memory Minor Page Faults
- unit: "MiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_pgmajfault
- description: Systemd Services Memory Major Page Faults
- unit: "MiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_pgpgin
- description: Systemd Services Memory Charging Activity
- unit: "MiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_pgpgout
- description: Systemd Services Memory Uncharging Activity
- unit: "MiB/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.mem_failcnt
+ - name: ram
+ - name: swap
+ - name: systemd.service.memory.failcnt
description: Systemd Services Memory Limit Failures
- unit: "failures"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.swap_usage
- description: Systemd Services Swap Memory Used
- unit: "MiB"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
- - name: services.io_read
- description: Systemd Services Disk Read Bandwidth
- unit: "KiB/s"
- chart_type: stacked
+ unit: failures/s
+ chart_type: line
dimensions:
- - name: a dimension per systemd service
- - name: services.io_write
- description: Systemd Services Disk Write Bandwidth
- unit: "KiB/s"
+ - name: fail
+ - name: systemd.service.memory.ram.usage
+ description: Systemd Services Memory
+ unit: MiB
chart_type: stacked
dimensions:
- - name: a dimension per systemd service
- - name: services.io_ops_read
- description: Systemd Services Disk Read Operations
- unit: "operations/s"
+ - name: rss
+ - name: cache
+ - name: mapped_file
+ - name: rss_huge
+ - name: systemd.service.memory.writeback
+ description: Systemd Services Writeback Memory
+ unit: MiB
chart_type: stacked
dimensions:
- - name: a dimension per systemd service
- - name: services.io_ops_write
- description: Systemd Services Disk Write Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: writeback
+ - name: dirty
+ - name: systemd.service.memory.paging.faults
+ description: Systemd Services Memory Minor and Major Page Faults
+ unit: MiB/s
+ chart_type: area
dimensions:
- - name: a dimension per systemd service
- - name: services.throttle_io_read
- description: Systemd Services Throttle Disk Read Bandwidth
- unit: "KiB/s"
- chart_type: stacked
+ - name: minor
+ - name: major
+ - name: systemd.service.memory.paging.io
+ description: Systemd Services Memory Paging IO
+ unit: MiB/s
+ chart_type: area
dimensions:
- - name: a dimension per systemd service
- - name: services.services.throttle_io_write
- description: Systemd Services Throttle Disk Write Bandwidth
- unit: "KiB/s"
- chart_type: stacked
+ - name: in
+ - name: out
+ - name: systemd.service.disk.io
+ description: Systemd Services Disk Read/Write Bandwidth
+ unit: KiB/s
+ chart_type: area
dimensions:
- - name: a dimension per systemd service
- - name: services.throttle_io_ops_read
- description: Systemd Services Throttle Disk Read Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: read
+ - name: write
+ - name: systemd.service.disk.iops
+ description: Systemd Services Disk Read/Write Operations
+ unit: operations/s
+ chart_type: line
dimensions:
- - name: a dimension per systemd service
- - name: throttle_io_ops_write
- description: Systemd Services Throttle Disk Write Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: read
+ - name: write
+ - name: systemd.service.disk.throttle.io
+ description: Systemd Services Throttle Disk Read/Write Bandwidth
+ unit: KiB/s
+ chart_type: area
dimensions:
- - name: a dimension per systemd service
- - name: services.queued_io_ops_read
- description: Systemd Services Queued Disk Read Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: read
+ - name: write
+ - name: systemd.service.disk.throttle.iops
+ description: Systemd Services Throttle Disk Read/Write Operations
+ unit: operations/s
+ chart_type: line
dimensions:
- - name: a dimension per systemd service
- - name: services.queued_io_ops_write
- description: Systemd Services Queued Disk Write Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: read
+ - name: write
+ - name: systemd.service.disk.queued_iops
+ description: Systemd Services Queued Disk Read/Write Operations
+ unit: operations/s
+ chart_type: line
dimensions:
- - name: a dimension per systemd service
- - name: services.merged_io_ops_read
- description: Systemd Services Merged Disk Read Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: read
+ - name: write
+ - name: systemd.service.disk.merged_iops
+ description: Systemd Services Merged Disk Read/Write Operations
+ unit: operations/s
+ chart_type: line
dimensions:
- - name: a dimension per systemd service
- - name: services.merged_io_ops_write
- description: Systemd Services Merged Disk Write Operations
- unit: "operations/s"
- chart_type: stacked
+ - name: read
+ - name: write
+ - name: systemd.service.pids.current
+ description: Systemd Services Number of Processes
+ unit: pids
+ chart_type: line
dimensions:
- - name: a dimension per systemd service
+ - name: pids
- <<: *module
meta:
<<: *meta
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.c b/collectors/cgroups.plugin/sys_fs_cgroup.c
index 9c7488c827dc6d..705edf6f748fc2 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.c
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.c
@@ -1,148 +1,98 @@
// SPDX-License-Identifier: GPL-3.0-or-later
-#include "sys_fs_cgroup.h"
-
-#define PLUGIN_CGROUPS_NAME "cgroups.plugin"
-#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd"
-#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup"
-
-#ifdef NETDATA_INTERNAL_CHECKS
-#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_DEFAULT
-#else
-#define CGROUP_PROCFILE_FLAG PROCFILE_FLAG_NO_ERROR_ON_FILE_IO
-#endif
+#include "cgroup-internals.h"
// main cgroups thread worker jobs
#define WORKER_CGROUPS_LOCK 0
#define WORKER_CGROUPS_READ 1
#define WORKER_CGROUPS_CHART 2
-// discovery cgroup thread worker jobs
-#define WORKER_DISCOVERY_INIT 0
-#define WORKER_DISCOVERY_FIND 1
-#define WORKER_DISCOVERY_PROCESS 2
-#define WORKER_DISCOVERY_PROCESS_RENAME 3
-#define WORKER_DISCOVERY_PROCESS_NETWORK 4
-#define WORKER_DISCOVERY_PROCESS_FIRST_TIME 5
-#define WORKER_DISCOVERY_UPDATE 6
-#define WORKER_DISCOVERY_CLEANUP 7
-#define WORKER_DISCOVERY_COPY 8
-#define WORKER_DISCOVERY_SHARE 9
-#define WORKER_DISCOVERY_LOCK 10
-
-#if WORKER_UTILIZATION_MAX_JOB_TYPES < 11
-#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 11
-#endif
-
// ----------------------------------------------------------------------------
// cgroup globals
+unsigned long long host_ram_total = 0;
+int is_inside_k8s = 0;
+long system_page_size = 4096; // system will be queried via sysconf() in configuration()
+int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
+int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES;
+int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO;
+int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
+int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
+int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
+int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
+int cgroup_used_memory = CONFIG_BOOLEAN_YES;
+int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO;
+int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO;
+int cgroup_search_in_devices = 1;
+int cgroup_check_for_new_every = 10;
+int cgroup_update_every = 1;
+int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS;
+int cgroup_recheck_zero_blkio_every_iterations = 10;
+int cgroup_recheck_zero_mem_failcnt_every_iterations = 10;
+int cgroup_recheck_zero_mem_detailed_every_iterations = 10;
+char *cgroup_cpuacct_base = NULL;
+char *cgroup_cpuset_base = NULL;
+char *cgroup_blkio_base = NULL;
+char *cgroup_memory_base = NULL;
+char *cgroup_devices_base = NULL;
+char *cgroup_pids_base = NULL;
+char *cgroup_unified_base = NULL;
+int cgroup_root_count = 0;
+int cgroup_root_max = 1000;
+int cgroup_max_depth = 0;
+SIMPLE_PATTERN *enabled_cgroup_paths = NULL;
+SIMPLE_PATTERN *enabled_cgroup_names = NULL;
+SIMPLE_PATTERN *search_cgroup_paths = NULL;
+SIMPLE_PATTERN *enabled_cgroup_renames = NULL;
+SIMPLE_PATTERN *systemd_services_cgroups = NULL;
+SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL;
+char *cgroups_network_interface_script = NULL;
+int cgroups_check = 0;
+uint32_t Read_hash = 0;
+uint32_t Write_hash = 0;
+uint32_t user_hash = 0;
+uint32_t system_hash = 0;
+uint32_t user_usec_hash = 0;
+uint32_t system_usec_hash = 0;
+uint32_t nr_periods_hash = 0;
+uint32_t nr_throttled_hash = 0;
+uint32_t throttled_time_hash = 0;
+uint32_t throttled_usec_hash = 0;
-static char cgroup_chart_id_prefix[] = "cgroup_";
-
-static int is_inside_k8s = 0;
-
-static long system_page_size = 4096; // system will be queried via sysconf() in configuration()
-
-static int cgroup_enable_cpuacct_stat = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_cpuacct_cpu_throttling = CONFIG_BOOLEAN_YES;
-static int cgroup_enable_cpuacct_cpu_shares = CONFIG_BOOLEAN_NO;
-static int cgroup_enable_memory = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_detailed_memory = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_memory_failcnt = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_swap = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_io = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_throttle_io = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_throttle_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_merged_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_blkio_queued_ops = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_cpu = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_io_some = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_io_full = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_memory_some = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_AUTO;
-static int cgroup_enable_pressure_irq_some = CONFIG_BOOLEAN_NO;
-static int cgroup_enable_pressure_irq_full = CONFIG_BOOLEAN_AUTO;
-
-static int cgroup_enable_systemd_services = CONFIG_BOOLEAN_YES;
-static int cgroup_enable_systemd_services_detailed_memory = CONFIG_BOOLEAN_NO;
-static int cgroup_used_memory = CONFIG_BOOLEAN_YES;
-
-static int cgroup_use_unified_cgroups = CONFIG_BOOLEAN_NO;
-static int cgroup_unified_exist = CONFIG_BOOLEAN_AUTO;
-
-static int cgroup_search_in_devices = 1;
-
-static int cgroup_check_for_new_every = 10;
-static int cgroup_update_every = 1;
-static int cgroup_containers_chart_priority = NETDATA_CHART_PRIO_CGROUPS_CONTAINERS;
-
-static int cgroup_recheck_zero_blkio_every_iterations = 10;
-static int cgroup_recheck_zero_mem_failcnt_every_iterations = 10;
-static int cgroup_recheck_zero_mem_detailed_every_iterations = 10;
-
-static char *cgroup_cpuacct_base = NULL;
-static char *cgroup_cpuset_base = NULL;
-static char *cgroup_blkio_base = NULL;
-static char *cgroup_memory_base = NULL;
-static char *cgroup_devices_base = NULL;
-static char *cgroup_unified_base = NULL;
-
-static int cgroup_root_count = 0;
-static int cgroup_root_max = 1000;
-static int cgroup_max_depth = 0;
-
-static SIMPLE_PATTERN *enabled_cgroup_paths = NULL;
-static SIMPLE_PATTERN *enabled_cgroup_names = NULL;
-static SIMPLE_PATTERN *search_cgroup_paths = NULL;
-static SIMPLE_PATTERN *enabled_cgroup_renames = NULL;
-static SIMPLE_PATTERN *systemd_services_cgroups = NULL;
-
-static SIMPLE_PATTERN *entrypoint_parent_process_comm = NULL;
-
-static char *cgroups_rename_script = NULL;
-static char *cgroups_network_interface_script = NULL;
-
-static int cgroups_check = 0;
-
-static uint32_t Read_hash = 0;
-static uint32_t Write_hash = 0;
-static uint32_t user_hash = 0;
-static uint32_t system_hash = 0;
-static uint32_t user_usec_hash = 0;
-static uint32_t system_usec_hash = 0;
-static uint32_t nr_periods_hash = 0;
-static uint32_t nr_throttled_hash = 0;
-static uint32_t throttled_time_hash = 0;
-static uint32_t throttled_usec_hash = 0;
-
-enum cgroups_type { CGROUPS_AUTODETECT_FAIL, CGROUPS_V1, CGROUPS_V2 };
-
-enum cgroups_systemd_setting {
- SYSTEMD_CGROUP_ERR,
- SYSTEMD_CGROUP_LEGACY,
- SYSTEMD_CGROUP_HYBRID,
- SYSTEMD_CGROUP_UNIFIED
-};
-
-struct cgroups_systemd_config_setting {
- char *name;
- enum cgroups_systemd_setting setting;
-};
+// *** WARNING *** The fields are not thread safe. Take care of safe usage.
+struct cgroup *cgroup_root = NULL;
+uv_mutex_t cgroup_root_mutex;
-static struct cgroups_systemd_config_setting cgroups_systemd_options[] = {
- { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY },
- { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID },
- { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED },
- { .name = NULL, .setting = SYSTEMD_CGROUP_ERR },
+struct cgroups_systemd_config_setting cgroups_systemd_options[] = {
+ { .name = "legacy", .setting = SYSTEMD_CGROUP_LEGACY },
+ { .name = "hybrid", .setting = SYSTEMD_CGROUP_HYBRID },
+ { .name = "unified", .setting = SYSTEMD_CGROUP_UNIFIED },
+ { .name = NULL, .setting = SYSTEMD_CGROUP_ERR },
};
// Shared memory with information from detected cgroups
netdata_ebpf_cgroup_shm_t shm_cgroup_ebpf = {NULL, NULL};
-static int shm_fd_cgroup_ebpf = -1;
+int shm_fd_cgroup_ebpf = -1;
sem_t *shm_mutex_cgroup_ebpf = SEM_FAILED;
+struct discovery_thread discovery_thread;
+
+
/* on Fed systemd is not in PATH for some reason */
#define SYSTEMD_CMD_RHEL "/usr/lib/systemd/systemd --version"
#define SYSTEMD_HIERARCHY_STRING "default-hierarchy="
@@ -361,54 +311,70 @@ void read_cgroup_plugin_configuration() {
cgroup_enable_pressure_memory_full = CONFIG_BOOLEAN_NO;
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuacct");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuacct");
+ if (!mi) {
collector_error("CGROUP: cannot find cpuacct mountinfo. Assuming default: /sys/fs/cgroup/cpuacct");
s = "/sys/fs/cgroup/cpuacct";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_cpuacct_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuacct", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "cpuset");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "cpuset");
+ if (!mi) {
collector_error("CGROUP: cannot find cpuset mountinfo. Assuming default: /sys/fs/cgroup/cpuset");
s = "/sys/fs/cgroup/cpuset";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_cpuset_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/cpuset", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "blkio");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "blkio");
+ if (!mi) {
collector_error("CGROUP: cannot find blkio mountinfo. Assuming default: /sys/fs/cgroup/blkio");
s = "/sys/fs/cgroup/blkio";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_blkio_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/blkio", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "memory");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "memory");
+ if (!mi) {
collector_error("CGROUP: cannot find memory mountinfo. Assuming default: /sys/fs/cgroup/memory");
s = "/sys/fs/cgroup/memory";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_memory_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/memory", filename);
mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "devices");
- if(!mi) mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices");
- if(!mi) {
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "devices");
+ if (!mi) {
collector_error("CGROUP: cannot find devices mountinfo. Assuming default: /sys/fs/cgroup/devices");
s = "/sys/fs/cgroup/devices";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
set_cgroup_base_path(filename, s);
cgroup_devices_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/devices", filename);
+
+ mi = mountinfo_find_by_filesystem_super_option(root, "cgroup", "pids");
+ if (!mi)
+ mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup", "pids");
+ if (!mi) {
+ collector_error("CGROUP: cannot find pids mountinfo. Assuming default: /sys/fs/cgroup/pids");
+ s = "/sys/fs/cgroup/pids";
+ } else
+ s = mi->mount_point;
+ set_cgroup_base_path(filename, s);
+ cgroup_pids_base = config_get("plugin:cgroups", "path to /sys/fs/cgroup/pids", filename);
}
else {
//cgroup_enable_cpuacct_stat =
@@ -428,22 +394,19 @@ void read_cgroup_plugin_configuration() {
cgroup_used_memory = CONFIG_BOOLEAN_NO; //unified cgroups use different values
//TODO: can there be more than 1 cgroup2 mount point?
- mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw"); //there is no cgroup2 specific super option - for now use 'rw' option
- if(mi)
- netdata_log_debug(D_CGROUP, "found unified cgroup root using super options, with path: '%s'", mi->mount_point);
- if(!mi) {
+ //there is no cgroup2 specific super option - for now use 'rw' option
+ mi = mountinfo_find_by_filesystem_super_option(root, "cgroup2", "rw");
+ if (!mi) {
mi = mountinfo_find_by_filesystem_mount_source(root, "cgroup2", "cgroup");
- if(mi)
- netdata_log_debug(D_CGROUP, "found unified cgroup root using mountsource info, with path: '%s'", mi->mount_point);
}
- if(!mi) {
+ if (!mi) {
collector_error("CGROUP: cannot find cgroup2 mountinfo. Assuming default: /sys/fs/cgroup");
s = "/sys/fs/cgroup";
- }
- else s = mi->mount_point;
+ } else
+ s = mi->mount_point;
+
set_cgroup_base_path(filename, s);
cgroup_unified_base = config_get("plugin:cgroups", "path to unified cgroups", filename);
- netdata_log_debug(D_CGROUP, "using cgroup root: '%s'", cgroup_unified_base);
}
cgroup_root_max = (int)config_get_number("plugin:cgroups", "max cgroups to allow", cgroup_root_max);
@@ -620,422 +583,6 @@ void netdata_cgroup_ebpf_initialize_shm()
shm_unlink(NETDATA_SHARED_MEMORY_EBPF_CGROUP_NAME);
}
-// ----------------------------------------------------------------------------
-// cgroup objects
-
-struct blkio {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int delay_counter;
-
- char *filename;
-
- unsigned long long Read;
- unsigned long long Write;
-/*
- unsigned long long Sync;
- unsigned long long Async;
- unsigned long long Total;
-*/
-};
-
-// https://www.kernel.org/doc/Documentation/cgroup-v1/memory.txt
-struct memory {
- ARL_BASE *arl_base;
- ARL_ENTRY *arl_dirty;
- ARL_ENTRY *arl_swap;
-
- int updated_detailed;
- int updated_usage_in_bytes;
- int updated_msw_usage_in_bytes;
- int updated_failcnt;
-
- int enabled_detailed; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int enabled_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int enabled_msw_usage_in_bytes; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
- int enabled_failcnt; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- int delay_counter_detailed;
- int delay_counter_failcnt;
-
- char *filename_detailed;
- char *filename_usage_in_bytes;
- char *filename_msw_usage_in_bytes;
- char *filename_failcnt;
-
- int detailed_has_dirty;
- int detailed_has_swap;
-
- // detailed metrics
-/*
- unsigned long long cache;
- unsigned long long rss;
- unsigned long long rss_huge;
- unsigned long long mapped_file;
- unsigned long long writeback;
- unsigned long long dirty;
- unsigned long long swap;
- unsigned long long pgpgin;
- unsigned long long pgpgout;
- unsigned long long pgfault;
- unsigned long long pgmajfault;
- unsigned long long inactive_anon;
- unsigned long long active_anon;
- unsigned long long inactive_file;
- unsigned long long active_file;
- unsigned long long unevictable;
- unsigned long long hierarchical_memory_limit;
-*/
- //unified cgroups metrics
- unsigned long long anon;
- unsigned long long kernel_stack;
- unsigned long long slab;
- unsigned long long sock;
- unsigned long long shmem;
- unsigned long long anon_thp;
- //unsigned long long file_writeback;
- //unsigned long long file_dirty;
- //unsigned long long file;
-
- unsigned long long total_cache;
- unsigned long long total_rss;
- unsigned long long total_rss_huge;
- unsigned long long total_mapped_file;
- unsigned long long total_writeback;
- unsigned long long total_dirty;
- unsigned long long total_swap;
- unsigned long long total_pgpgin;
- unsigned long long total_pgpgout;
- unsigned long long total_pgfault;
- unsigned long long total_pgmajfault;
-/*
- unsigned long long total_inactive_anon;
- unsigned long long total_active_anon;
-*/
-
- unsigned long long total_inactive_file;
-
-/*
- unsigned long long total_active_file;
- unsigned long long total_unevictable;
-*/
-
- // single file metrics
- unsigned long long usage_in_bytes;
- unsigned long long msw_usage_in_bytes;
- unsigned long long failcnt;
-};
-
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
-struct cpuacct_stat {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned long long user; // v1, v2(user_usec)
- unsigned long long system; // v1, v2(system_usec)
-};
-
-// https://www.kernel.org/doc/Documentation/cgroup-v1/cpuacct.txt
-struct cpuacct_usage {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned int cpus;
- unsigned long long *cpu_percpu;
-};
-
-// represents cpuacct/cpu.stat, for v2 'cpuacct_stat' is used for 'user_usec', 'system_usec'
-struct cpuacct_cpu_throttling {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned long long nr_periods;
- unsigned long long nr_throttled;
- unsigned long long throttled_time;
-
- unsigned long long nr_throttled_perc;
-};
-
-// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/6/html/resource_management_guide/sec-cpu#sect-cfs
-// https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications_managing-monitoring-and-updating-the-kernel#proc_controlling-distribution-of-cpu-time-for-applications-by-adjusting-cpu-weight_using-cgroups-v2-to-control-distribution-of-cpu-time-for-applications
-struct cpuacct_cpu_shares {
- int updated;
- int enabled; // CONFIG_BOOLEAN_YES or CONFIG_BOOLEAN_AUTO
-
- char *filename;
-
- unsigned long long shares;
-};
-
-struct cgroup_network_interface {
- const char *host_device;
- const char *container_device;
- struct cgroup_network_interface *next;
-};
-
-enum cgroups_container_orchestrator {
- CGROUPS_ORCHESTRATOR_UNSET,
- CGROUPS_ORCHESTRATOR_UNKNOWN,
- CGROUPS_ORCHESTRATOR_K8S
-};
-
-// *** WARNING *** The fields are not thread safe. Take care of safe usage.
-struct cgroup {
- uint32_t options;
-
- int first_time_seen; // first time seen by the discoverer
- int processed; // the discoverer is done processing a cgroup (resolved name, set 'enabled' option)
-
- char available; // found in the filesystem
- char enabled; // enabled in the config
-
- char pending_renames;
- char *intermediate_id; // TODO: remove it when the renaming script is fixed
-
- char *id;
- uint32_t hash;
-
- char *chart_id;
- uint32_t hash_chart;
-
- char *chart_title;
-
- DICTIONARY *chart_labels;
-
- int container_orchestrator;
-
- struct cpuacct_stat cpuacct_stat;
- struct cpuacct_usage cpuacct_usage;
- struct cpuacct_cpu_throttling cpuacct_cpu_throttling;
- struct cpuacct_cpu_shares cpuacct_cpu_shares;
-
- struct memory memory;
-
- struct blkio io_service_bytes; // bytes
- struct blkio io_serviced; // operations
-
- struct blkio throttle_io_service_bytes; // bytes
- struct blkio throttle_io_serviced; // operations
-
- struct blkio io_merged; // operations
- struct blkio io_queued; // operations
-
- struct cgroup_network_interface *interfaces;
-
- struct pressure cpu_pressure;
- struct pressure io_pressure;
- struct pressure memory_pressure;
- struct pressure irq_pressure;
-
- // per cgroup charts
- RRDSET *st_cpu;
- RRDSET *st_cpu_limit;
- RRDSET *st_cpu_per_core;
- RRDSET *st_cpu_nr_throttled;
- RRDSET *st_cpu_throttled_time;
- RRDSET *st_cpu_shares;
-
- RRDSET *st_mem;
- RRDSET *st_mem_utilization;
- RRDSET *st_writeback;
- RRDSET *st_mem_activity;
- RRDSET *st_pgfaults;
- RRDSET *st_mem_usage;
- RRDSET *st_mem_usage_limit;
- RRDSET *st_mem_failcnt;
-
- RRDSET *st_io;
- RRDSET *st_serviced_ops;
- RRDSET *st_throttle_io;
- RRDSET *st_throttle_serviced_ops;
- RRDSET *st_queued_ops;
- RRDSET *st_merged_ops;
-
- // per cgroup chart variables
- char *filename_cpuset_cpus;
- unsigned long long cpuset_cpus;
-
- char *filename_cpu_cfs_period;
- unsigned long long cpu_cfs_period;
-
- char *filename_cpu_cfs_quota;
- unsigned long long cpu_cfs_quota;
-
- const RRDSETVAR_ACQUIRED *chart_var_cpu_limit;
- NETDATA_DOUBLE prev_cpu_usage;
-
- char *filename_memory_limit;
- unsigned long long memory_limit;
- const RRDSETVAR_ACQUIRED *chart_var_memory_limit;
-
- char *filename_memoryswap_limit;
- unsigned long long memoryswap_limit;
- const RRDSETVAR_ACQUIRED *chart_var_memoryswap_limit;
-
- // services
- RRDDIM *rd_cpu;
- RRDDIM *rd_mem_usage;
- RRDDIM *rd_mem_failcnt;
- RRDDIM *rd_swap_usage;
-
- RRDDIM *rd_mem_detailed_cache;
- RRDDIM *rd_mem_detailed_rss;
- RRDDIM *rd_mem_detailed_mapped;
- RRDDIM *rd_mem_detailed_writeback;
- RRDDIM *rd_mem_detailed_pgpgin;
- RRDDIM *rd_mem_detailed_pgpgout;
- RRDDIM *rd_mem_detailed_pgfault;
- RRDDIM *rd_mem_detailed_pgmajfault;
-
- RRDDIM *rd_io_service_bytes_read;
- RRDDIM *rd_io_serviced_read;
- RRDDIM *rd_throttle_io_read;
- RRDDIM *rd_throttle_io_serviced_read;
- RRDDIM *rd_io_queued_read;
- RRDDIM *rd_io_merged_read;
-
- RRDDIM *rd_io_service_bytes_write;
- RRDDIM *rd_io_serviced_write;
- RRDDIM *rd_throttle_io_write;
- RRDDIM *rd_throttle_io_serviced_write;
- RRDDIM *rd_io_queued_write;
- RRDDIM *rd_io_merged_write;
-
- struct cgroup *next;
- struct cgroup *discovered_next;
-
-} *cgroup_root = NULL;
-
-uv_mutex_t cgroup_root_mutex;
-
-struct cgroup *discovered_cgroup_root = NULL;
-
-struct discovery_thread {
- uv_thread_t thread;
- uv_mutex_t mutex;
- uv_cond_t cond_var;
- int start_discovery;
- int exited;
-} discovery_thread;
-
-// ---------------------------------------------------------------------------------------------
-
-static inline int matches_enabled_cgroup_paths(char *id) {
- return simple_pattern_matches(enabled_cgroup_paths, id);
-}
-
-static inline int matches_enabled_cgroup_names(char *name) {
- return simple_pattern_matches(enabled_cgroup_names, name);
-}
-
-static inline int matches_enabled_cgroup_renames(char *id) {
- return simple_pattern_matches(enabled_cgroup_renames, id);
-}
-
-static inline int matches_systemd_services_cgroups(char *id) {
- return simple_pattern_matches(systemd_services_cgroups, id);
-}
-
-static inline int matches_search_cgroup_paths(const char *dir) {
- return simple_pattern_matches(search_cgroup_paths, dir);
-}
-
-static inline int matches_entrypoint_parent_process_comm(const char *comm) {
- return simple_pattern_matches(entrypoint_parent_process_comm, comm);
-}
-
-static inline int is_cgroup_systemd_service(struct cgroup *cg) {
- return (cg->options & CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE);
-}
-
-// ---------------------------------------------------------------------------------------------
-static int k8s_is_kubepod(struct cgroup *cg) {
- return cg->container_orchestrator == CGROUPS_ORCHESTRATOR_K8S;
-}
-
-static int k8s_is_container(const char *id) {
- // examples:
- // https://github.com/netdata/netdata/blob/0fc101679dcd12f1cb8acdd07bb4c85d8e553e53/collectors/cgroups.plugin/cgroup-name.sh#L121-L147
- const char *p = id;
- const char *pp = NULL;
- int i = 0;
- size_t l = 3; // pod
- while ((p = strstr(p, "pod"))) {
- i++;
- p += l;
- pp = p;
- }
- return !(i < 2 || !pp || !(pp = strchr(pp, '/')) || !pp++ || !*pp);
-}
-
-#define TASK_COMM_LEN 16
-
-static int k8s_get_container_first_proc_comm(const char *id, char *comm) {
- if (!k8s_is_container(id)) {
- return 1;
- }
-
- static procfile *ff = NULL;
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s/%s/cgroup.procs", cgroup_cpuacct_base, id);
-
- ff = procfile_reopen(ff, filename, NULL, CGROUP_PROCFILE_FLAG);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
- return 1;
- }
-
- ff = procfile_readall(ff);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
- return 1;
- }
-
- unsigned long lines = procfile_lines(ff);
- if (likely(lines < 2)) {
- return 1;
- }
-
- char *pid = procfile_lineword(ff, 0, 0);
- if (!pid || !*pid) {
- return 1;
- }
-
- snprintfz(filename, FILENAME_MAX, "%s/proc/%s/comm", netdata_configured_host_prefix, pid);
-
- ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot open file '%s'.", filename);
- return 1;
- }
-
- ff = procfile_readall(ff);
- if (unlikely(!ff)) {
- netdata_log_debug(D_CGROUP, "CGROUP: k8s_is_pause_container(): cannot read file '%s'.", filename);
- return 1;
- }
-
- lines = procfile_lines(ff);
- if (unlikely(lines != 2)) {
- return 1;
- }
-
- char *proc_comm = procfile_lineword(ff, 0, 0);
- if (!proc_comm || !*proc_comm) {
- return 1;
- }
-
- strncpyz(comm, proc_comm, TASK_COMM_LEN);
- return 0;
-}
-
// ---------------------------------------------------------------------------------------------
static unsigned long long calc_delta(unsigned long long curr, unsigned long long prev) {
@@ -1049,16 +596,7 @@ static unsigned long long calc_percentage(unsigned long long value, unsigned lon
if (total == 0) {
return 0;
}
- return (NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100;
-}
-
-static int calc_cgroup_depth(const char *id) {
- int depth = 0;
- const char *s;
- for (s = id; *s; s++) {
- depth += unlikely(*s == '/');
- }
- return depth;
+ return (unsigned long long)((NETDATA_DOUBLE)value / (NETDATA_DOUBLE)total * 100);
}
// ----------------------------------------------------------------------------
@@ -1622,6 +1160,15 @@ static inline void cgroup_read_memory(struct memory *mem, char parent_cg_is_unif
}
}
+static void cgroup_read_pids_current(struct pids *pids) {
+ pids->pids_current_updated = 0;
+
+ if (unlikely(!pids->pids_current_filename))
+ return;
+
+ pids->pids_current_updated = !read_single_number_file(pids->pids_current_filename, &pids->pids_current);
+}
+
static inline void read_cgroup(struct cgroup *cg) {
netdata_log_debug(D_CGROUP, "reading metrics for cgroups '%s'", cg->id);
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
@@ -1636,6 +1183,7 @@ static inline void read_cgroup(struct cgroup *cg) {
cgroup_read_blkio(&cg->throttle_io_serviced);
cgroup_read_blkio(&cg->io_merged);
cgroup_read_blkio(&cg->io_queued);
+ cgroup_read_pids_current(&cg->pids);
}
else {
//TODO: io_service_bytes and io_serviced use same file merge into 1 function
@@ -1648,6 +1196,7 @@ static inline void read_cgroup(struct cgroup *cg) {
cgroup2_read_pressure(&cg->memory_pressure);
cgroup2_read_pressure(&cg->irq_pressure);
cgroup_read_memory(&cg->memory, 1);
+ cgroup_read_pids_current(&cg->pids);
}
}
@@ -1662,2092 +1211,182 @@ static inline void read_all_discovered_cgroups(struct cgroup *root) {
}
}
-// ----------------------------------------------------------------------------
-// cgroup network interfaces
+// update CPU and memory limits
-#define CGROUP_NETWORK_INTERFACE_MAX_LINE 2048
-static inline void read_cgroup_network_interfaces(struct cgroup *cg) {
- netdata_log_debug(D_CGROUP, "looking for the network interfaces of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title);
+static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) {
+ if(*filename) {
+ int ret = -1;
- pid_t cgroup_pid;
- char cgroup_identifier[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
+ if(value == &cg->cpuset_cpus) {
+ unsigned long ncpus = read_cpuset_cpus(*filename, get_system_cpus());
+ if(ncpus) {
+ *value = ncpus;
+ ret = 0;
+ }
+ }
+ else if(value == &cg->cpu_cfs_period || value == &cg->cpu_cfs_quota) {
+ ret = read_single_number_file(*filename, value);
+ }
+ else ret = -1;
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_cpuacct_base, cg->id);
- }
- else {
- snprintfz(cgroup_identifier, CGROUP_NETWORK_INTERFACE_MAX_LINE, "%s%s", cgroup_unified_base, cg->id);
+ if(ret) {
+ collector_error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
+ freez(*filename);
+ *filename = NULL;
+ }
}
+}
- netdata_log_debug(D_CGROUP, "executing cgroup_identifier %s --cgroup '%s' for cgroup '%s'", cgroups_network_interface_script, cgroup_identifier, cg->id);
- FILE *fp_child_input, *fp_child_output;
- (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_network_interface_script, "--cgroup", cgroup_identifier);
- if(!fp_child_output) {
- collector_error("CGROUP: cannot popen(%s --cgroup \"%s\", \"r\").", cgroups_network_interface_script, cgroup_identifier);
- return;
- }
+static inline void update_cpu_limits2(struct cgroup *cg) {
+ if(cg->filename_cpu_cfs_quota){
+ static procfile *ff = NULL;
- char *s;
- char buffer[CGROUP_NETWORK_INTERFACE_MAX_LINE + 1];
- while((s = fgets(buffer, CGROUP_NETWORK_INTERFACE_MAX_LINE, fp_child_output))) {
- trim(s);
-
- if(*s && *s != '\n') {
- char *t = s;
- while(*t && *t != ' ') t++;
- if(*t == ' ') {
- *t = '\0';
- t++;
- }
+ ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, CGROUP_PROCFILE_FLAG);
+ if(unlikely(!ff)) {
+ goto cpu_limits2_err;
+ }
- if(!*s) {
- collector_error("CGROUP: empty host interface returned by script");
- continue;
- }
+ ff = procfile_readall(ff);
+ if(unlikely(!ff)) {
+ goto cpu_limits2_err;
+ }
- if(!*t) {
- collector_error("CGROUP: empty guest interface returned by script");
- continue;
- }
+ unsigned long lines = procfile_lines(ff);
- struct cgroup_network_interface *i = callocz(1, sizeof(struct cgroup_network_interface));
- i->host_device = strdupz(s);
- i->container_device = strdupz(t);
- i->next = cg->interfaces;
- cg->interfaces = i;
+ if (unlikely(lines < 1)) {
+ collector_error("CGROUP: file '%s' should have 1 lines.", cg->filename_cpu_cfs_quota);
+ return;
+ }
- collector_info("CGROUP: cgroup '%s' has network interface '%s' as '%s'", cg->id, i->host_device, i->container_device);
+ cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1), NULL);
+ cg->cpuset_cpus = get_system_cpus();
- // register a device rename to proc_net_dev.c
- netdev_rename_device_add(
- i->host_device, i->container_device, cg->chart_id, cg->chart_labels, k8s_is_kubepod(cg) ? "k8s." : "");
+ char *s = "max\n\0";
+ if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){
+ cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus;
+ } else {
+ cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0), NULL);
}
- }
+ netdata_log_debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota);
+ return;
+
+cpu_limits2_err:
+ collector_error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, cg->filename_cpu_cfs_quota);
+ freez(cg->filename_cpu_cfs_quota);
+ cg->filename_cpu_cfs_quota = NULL;
- netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
- // netdata_log_debug(D_CGROUP, "closed cgroup_identifier for cgroup '%s'", cg->id);
+ }
}
-static inline void free_cgroup_network_interfaces(struct cgroup *cg) {
- while(cg->interfaces) {
- struct cgroup_network_interface *i = cg->interfaces;
- cg->interfaces = i->next;
+static inline int update_memory_limits(struct cgroup *cg) {
+ char **filename = &cg->filename_memory_limit;
+ const RRDSETVAR_ACQUIRED **chart_var = &cg->chart_var_memory_limit;
+ unsigned long long *value = &cg->memory_limit;
- // delete the registration of proc_net_dev rename
- netdev_rename_device_del(i->host_device);
+ if(*filename) {
+ if(unlikely(!*chart_var)) {
+ *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, "memory_limit");
+ if(!*chart_var) {
+ collector_error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, "memory_limit");
+ freez(*filename);
+ *filename = NULL;
+ }
+ }
- freez((void *)i->host_device);
- freez((void *)i->container_device);
- freez((void *)i);
+ if(*filename && *chart_var) {
+ if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ if(read_single_number_file(*filename, value)) {
+ collector_error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
+ freez(*filename);
+ *filename = NULL;
+ }
+ else {
+ rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
+ return 1;
+ }
+ } else {
+ char buffer[30 + 1];
+ int ret = read_file(*filename, buffer, 30);
+ if(ret) {
+ collector_error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
+ freez(*filename);
+ *filename = NULL;
+ return 0;
+ }
+ char *s = "max\n\0";
+ if(strcmp(s, buffer) == 0){
+ *value = UINT64_MAX;
+ rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
+ return 1;
+ }
+ *value = str2ull(buffer, NULL);
+ rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value) / (1024.0 * 1024.0));
+ return 1;
+ }
+ }
}
+ return 0;
}
// ----------------------------------------------------------------------------
-// add/remove/find cgroup objects
+// generate charts
-#define CGROUP_CHARTID_LINE_MAX 1024
+void update_cgroup_systemd_services_charts() {
+ for (struct cgroup *cg = cgroup_root; cg; cg = cg->next) {
+ if (unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg)))
+ continue;
-static inline char *cgroup_title_strdupz(const char *s) {
- if(!s || !*s) s = "/";
-
- if(*s == '/' && s[1] != '\0') s++;
-
- char *r = strdupz(s);
- netdata_fix_chart_name(r);
-
- return r;
-}
-
-static inline char *cgroup_chart_id_strdupz(const char *s) {
- if(!s || !*s) s = "/";
-
- if(*s == '/' && s[1] != '\0') s++;
-
- char *r = strdupz(s);
- netdata_fix_chart_id(r);
-
- return r;
-}
-
-// TODO: move the code to cgroup_chart_id_strdupz() when the renaming script is fixed
-static inline void substitute_dots_in_id(char *s) {
- // dots are used to distinguish chart type and id in streaming, so we should replace them
- for (char *d = s; *d; d++) {
- if (*d == '.')
- *d = '-';
- }
-}
-
-// ----------------------------------------------------------------------------
-// parse k8s labels
-
-char *cgroup_parse_resolved_name_and_labels(DICTIONARY *labels, char *data) {
- // the first word, up to the first space is the name
- char *name = strsep_skip_consecutive_separators(&data, " ");
-
- // the rest are key=value pairs separated by comma
- while(data) {
- char *pair = strsep_skip_consecutive_separators(&data, ",");
- rrdlabels_add_pair(labels, pair, RRDLABEL_SRC_AUTO| RRDLABEL_SRC_K8S);
- }
-
- return name;
-}
-
-// ----------------------------------------------------------------------------
-
-static inline void free_pressure(struct pressure *res) {
- if (res->some.share_time.st) rrdset_is_obsolete(res->some.share_time.st);
- if (res->some.total_time.st) rrdset_is_obsolete(res->some.total_time.st);
- if (res->full.share_time.st) rrdset_is_obsolete(res->full.share_time.st);
- if (res->full.total_time.st) rrdset_is_obsolete(res->full.total_time.st);
- freez(res->filename);
-}
-
-static inline void cgroup_free(struct cgroup *cg) {
- netdata_log_debug(D_CGROUP, "Removing cgroup '%s' with chart id '%s' (was %s and %s)", cg->id, cg->chart_id, (cg->enabled)?"enabled":"disabled", (cg->available)?"available":"not available");
-
- if(cg->st_cpu) rrdset_is_obsolete(cg->st_cpu);
- if(cg->st_cpu_limit) rrdset_is_obsolete(cg->st_cpu_limit);
- if(cg->st_cpu_per_core) rrdset_is_obsolete(cg->st_cpu_per_core);
- if(cg->st_cpu_nr_throttled) rrdset_is_obsolete(cg->st_cpu_nr_throttled);
- if(cg->st_cpu_throttled_time) rrdset_is_obsolete(cg->st_cpu_throttled_time);
- if(cg->st_cpu_shares) rrdset_is_obsolete(cg->st_cpu_shares);
- if(cg->st_mem) rrdset_is_obsolete(cg->st_mem);
- if(cg->st_writeback) rrdset_is_obsolete(cg->st_writeback);
- if(cg->st_mem_activity) rrdset_is_obsolete(cg->st_mem_activity);
- if(cg->st_pgfaults) rrdset_is_obsolete(cg->st_pgfaults);
- if(cg->st_mem_usage) rrdset_is_obsolete(cg->st_mem_usage);
- if(cg->st_mem_usage_limit) rrdset_is_obsolete(cg->st_mem_usage_limit);
- if(cg->st_mem_utilization) rrdset_is_obsolete(cg->st_mem_utilization);
- if(cg->st_mem_failcnt) rrdset_is_obsolete(cg->st_mem_failcnt);
- if(cg->st_io) rrdset_is_obsolete(cg->st_io);
- if(cg->st_serviced_ops) rrdset_is_obsolete(cg->st_serviced_ops);
- if(cg->st_throttle_io) rrdset_is_obsolete(cg->st_throttle_io);
- if(cg->st_throttle_serviced_ops) rrdset_is_obsolete(cg->st_throttle_serviced_ops);
- if(cg->st_queued_ops) rrdset_is_obsolete(cg->st_queued_ops);
- if(cg->st_merged_ops) rrdset_is_obsolete(cg->st_merged_ops);
-
- freez(cg->filename_cpuset_cpus);
- freez(cg->filename_cpu_cfs_period);
- freez(cg->filename_cpu_cfs_quota);
- freez(cg->filename_memory_limit);
- freez(cg->filename_memoryswap_limit);
-
- free_cgroup_network_interfaces(cg);
-
- freez(cg->cpuacct_usage.cpu_percpu);
-
- freez(cg->cpuacct_stat.filename);
- freez(cg->cpuacct_usage.filename);
- freez(cg->cpuacct_cpu_throttling.filename);
- freez(cg->cpuacct_cpu_shares.filename);
-
- arl_free(cg->memory.arl_base);
- freez(cg->memory.filename_detailed);
- freez(cg->memory.filename_failcnt);
- freez(cg->memory.filename_usage_in_bytes);
- freez(cg->memory.filename_msw_usage_in_bytes);
-
- freez(cg->io_service_bytes.filename);
- freez(cg->io_serviced.filename);
-
- freez(cg->throttle_io_service_bytes.filename);
- freez(cg->throttle_io_serviced.filename);
-
- freez(cg->io_merged.filename);
- freez(cg->io_queued.filename);
-
- free_pressure(&cg->cpu_pressure);
- free_pressure(&cg->io_pressure);
- free_pressure(&cg->memory_pressure);
- free_pressure(&cg->irq_pressure);
-
- freez(cg->id);
- freez(cg->intermediate_id);
- freez(cg->chart_id);
- freez(cg->chart_title);
-
- rrdlabels_destroy(cg->chart_labels);
-
- freez(cg);
-
- cgroup_root_count--;
-}
-
-// ----------------------------------------------------------------------------
-
-static inline void discovery_rename_cgroup(struct cgroup *cg) {
- if (!cg->pending_renames) {
- return;
- }
- cg->pending_renames--;
-
- netdata_log_debug(D_CGROUP, "looking for the name of cgroup '%s' with chart id '%s' and title '%s'", cg->id, cg->chart_id, cg->chart_title);
- netdata_log_debug(D_CGROUP, "executing command %s \"%s\" for cgroup '%s'", cgroups_rename_script, cg->intermediate_id, cg->chart_id);
- pid_t cgroup_pid;
-
- FILE *fp_child_input, *fp_child_output;
- (void)netdata_popen_raw_default_flags_and_environment(&cgroup_pid, &fp_child_input, &fp_child_output, cgroups_rename_script, cg->id, cg->intermediate_id);
- if (!fp_child_output) {
- collector_error("CGROUP: cannot popen(%s \"%s\", \"r\").", cgroups_rename_script, cg->intermediate_id);
- cg->pending_renames = 0;
- cg->processed = 1;
- return;
- }
-
- char buffer[CGROUP_CHARTID_LINE_MAX + 1];
- char *new_name = fgets(buffer, CGROUP_CHARTID_LINE_MAX, fp_child_output);
- int exit_code = netdata_pclose(fp_child_input, fp_child_output, cgroup_pid);
-
- switch (exit_code) {
- case 0:
- cg->pending_renames = 0;
- break;
-
- case 3:
- cg->pending_renames = 0;
- cg->processed = 1;
- break;
- }
-
- if (cg->pending_renames || cg->processed)
- return;
- if (!new_name || !*new_name || *new_name == '\n')
- return;
- if (!(new_name = trim(new_name)))
- return;
-
- char *name = new_name;
-
- if (!cg->chart_labels)
- cg->chart_labels = rrdlabels_create();
- // read the new labels and remove the obsolete ones
- rrdlabels_unmark_all(cg->chart_labels);
- name = cgroup_parse_resolved_name_and_labels(cg->chart_labels, new_name);
- rrdlabels_remove_all_unmarked(cg->chart_labels);
-
- freez(cg->chart_title);
- cg->chart_title = cgroup_title_strdupz(name);
-
- freez(cg->chart_id);
- cg->chart_id = cgroup_chart_id_strdupz(name);
-
- substitute_dots_in_id(cg->chart_id);
- cg->hash_chart = simple_hash(cg->chart_id);
-}
-
-static void is_cgroup_procs_exist(netdata_ebpf_cgroup_shm_body_t *out, char *id) {
- struct stat buf;
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_cpuset_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_blkio_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_memory_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- snprintfz(out->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_devices_base, id);
- if (likely(stat(out->path, &buf) == 0)) {
- return;
- }
-
- out->path[0] = '\0';
- out->enabled = 0;
-}
-
-static inline void convert_cgroup_to_systemd_service(struct cgroup *cg) {
- char buffer[CGROUP_CHARTID_LINE_MAX + 1];
- cg->options |= CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE;
- strncpyz(buffer, cg->id, CGROUP_CHARTID_LINE_MAX);
- char *s = buffer;
-
- // skip to the last slash
- size_t len = strlen(s);
- while (len--) {
- if (unlikely(s[len] == '/')) {
- break;
- }
- }
- if (len) {
- s = &s[len + 1];
- }
-
- // remove extension
- len = strlen(s);
- while (len--) {
- if (unlikely(s[len] == '.')) {
- break;
- }
- }
- if (len) {
- s[len] = '\0';
- }
-
- freez(cg->chart_title);
- cg->chart_title = cgroup_title_strdupz(s);
-}
-
-static inline struct cgroup *discovery_cgroup_add(const char *id) {
- netdata_log_debug(D_CGROUP, "adding to list, cgroup with id '%s'", id);
-
- struct cgroup *cg = callocz(1, sizeof(struct cgroup));
- cg->id = strdupz(id);
- cg->hash = simple_hash(cg->id);
- cg->chart_title = cgroup_title_strdupz(id);
- cg->intermediate_id = cgroup_chart_id_strdupz(id);
- cg->chart_id = cgroup_chart_id_strdupz(id);
- substitute_dots_in_id(cg->chart_id);
- cg->hash_chart = simple_hash(cg->chart_id);
- if (cgroup_use_unified_cgroups) {
- cg->options |= CGROUP_OPTIONS_IS_UNIFIED;
- }
-
- if (!discovered_cgroup_root)
- discovered_cgroup_root = cg;
- else {
- struct cgroup *t;
- for (t = discovered_cgroup_root; t->discovered_next; t = t->discovered_next) {
- }
- t->discovered_next = cg;
- }
-
- return cg;
-}
-
-static inline struct cgroup *discovery_cgroup_find(const char *id) {
- netdata_log_debug(D_CGROUP, "searching for cgroup '%s'", id);
-
- uint32_t hash = simple_hash(id);
-
- struct cgroup *cg;
- for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
- if(hash == cg->hash && strcmp(id, cg->id) == 0)
- break;
- }
-
- netdata_log_debug(D_CGROUP, "cgroup '%s' %s in memory", id, (cg)?"found":"not found");
- return cg;
-}
-
-static inline void discovery_find_cgroup_in_dir_callback(const char *dir) {
- if (!dir || !*dir) {
- dir = "/";
- }
- netdata_log_debug(D_CGROUP, "examining cgroup dir '%s'", dir);
-
- struct cgroup *cg = discovery_cgroup_find(dir);
- if (cg) {
- cg->available = 1;
- return;
- }
-
- if (cgroup_root_count >= cgroup_root_max) {
- collector_info("CGROUP: maximum number of cgroups reached (%d). Not adding cgroup '%s'", cgroup_root_count, dir);
- return;
- }
-
- if (cgroup_max_depth > 0) {
- int depth = calc_cgroup_depth(dir);
- if (depth > cgroup_max_depth) {
- collector_info("CGROUP: '%s' is too deep (%d, while max is %d)", dir, depth, cgroup_max_depth);
- return;
- }
- }
-
- cg = discovery_cgroup_add(dir);
- cg->available = 1;
- cg->first_time_seen = 1;
- cgroup_root_count++;
-}
-
-static inline int discovery_find_dir_in_subdirs(const char *base, const char *this, void (*callback)(const char *)) {
- if(!this) this = base;
- netdata_log_debug(D_CGROUP, "searching for directories in '%s' (base '%s')", this?this:"", base);
-
- size_t dirlen = strlen(this), baselen = strlen(base);
-
- int ret = -1;
- int enabled = -1;
-
- const char *relative_path = &this[baselen];
- if(!*relative_path) relative_path = "/";
-
- DIR *dir = opendir(this);
- if(!dir) {
- collector_error("CGROUP: cannot read directory '%s'", base);
- return ret;
- }
- ret = 1;
-
- callback(relative_path);
-
- struct dirent *de = NULL;
- while((de = readdir(dir))) {
- if(de->d_type == DT_DIR
- && (
- (de->d_name[0] == '.' && de->d_name[1] == '\0')
- || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0')
- ))
- continue;
-
- if(de->d_type == DT_DIR) {
- if(enabled == -1) {
- const char *r = relative_path;
- if(*r == '\0') r = "/";
-
- // do not decent in directories we are not interested
- enabled = matches_search_cgroup_paths(r);
- }
-
- if(enabled) {
- char *s = mallocz(dirlen + strlen(de->d_name) + 2);
- strcpy(s, this);
- strcat(s, "/");
- strcat(s, de->d_name);
- int ret2 = discovery_find_dir_in_subdirs(base, s, callback);
- if(ret2 > 0) ret += ret2;
- freez(s);
- }
- }
- }
-
- closedir(dir);
- return ret;
-}
-
-static inline void discovery_mark_all_cgroups_as_unavailable() {
- netdata_log_debug(D_CGROUP, "marking all cgroups as not available");
- struct cgroup *cg;
- for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
- cg->available = 0;
- }
-}
-
-static inline void discovery_update_filenames() {
- struct cgroup *cg;
- struct stat buf;
- for(cg = discovered_cgroup_root; cg ; cg = cg->discovered_next) {
- if(unlikely(!cg->available || !cg->enabled || cg->pending_renames))
- continue;
-
- netdata_log_debug(D_CGROUP, "checking paths for cgroup '%s'", cg->id);
-
- // check for newly added cgroups
- // and update the filenames they read
- char filename[FILENAME_MAX + 1];
- if(!cgroup_use_unified_cgroups) {
- if(unlikely(cgroup_enable_cpuacct_stat && !cg->cpuacct_stat.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.stat", cgroup_cpuacct_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->cpuacct_stat.filename = strdupz(filename);
- cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
- snprintfz(filename, FILENAME_MAX, "%s%s/cpuset.cpus", cgroup_cpuset_base, cg->id);
- cg->filename_cpuset_cpus = strdupz(filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_period_us", cgroup_cpuacct_base, cg->id);
- cg->filename_cpu_cfs_period = strdupz(filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.cfs_quota_us", cgroup_cpuacct_base, cg->id);
- cg->filename_cpu_cfs_quota = strdupz(filename);
- netdata_log_debug(D_CGROUP, "cpuacct.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename);
- }
- else
- netdata_log_debug(D_CGROUP, "cpuacct.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_cpuacct_usage && !cg->cpuacct_usage.filename && !is_cgroup_systemd_service(cg))) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpuacct.usage_percpu", cgroup_cpuacct_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->cpuacct_usage.filename = strdupz(filename);
- cg->cpuacct_usage.enabled = cgroup_enable_cpuacct_usage;
- netdata_log_debug(D_CGROUP, "cpuacct.usage_percpu filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_usage.filename);
- }
- else
- netdata_log_debug(D_CGROUP, "cpuacct.usage_percpu file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- if(unlikely(cgroup_enable_cpuacct_cpu_throttling && !cg->cpuacct_cpu_throttling.filename && !is_cgroup_systemd_service(cg))) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_cpuacct_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->cpuacct_cpu_throttling.filename = strdupz(filename);
- cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling;
- netdata_log_debug(D_CGROUP, "cpu.stat filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_throttling.filename);
- }
- else
- netdata_log_debug(D_CGROUP, "cpu.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- if (unlikely(
- cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename &&
- !is_cgroup_systemd_service(cg))) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.shares", cgroup_cpuacct_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->cpuacct_cpu_shares.filename = strdupz(filename);
- cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares;
- netdata_log_debug(
- D_CGROUP, "cpu.shares filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename);
- } else
- netdata_log_debug(D_CGROUP, "cpu.shares file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_memory_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_detailed = strdupz(filename);
- cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO;
- netdata_log_debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.usage_in_bytes", cgroup_memory_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_usage_in_bytes = strdupz(filename);
- cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
- netdata_log_debug(D_CGROUP, "memory.usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes);
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.limit_in_bytes", cgroup_memory_base, cg->id);
- cg->filename_memory_limit = strdupz(filename);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.usage_in_bytes", cgroup_memory_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
- cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.memsw.limit_in_bytes", cgroup_memory_base, cg->id);
- cg->filename_memoryswap_limit = strdupz(filename);
- netdata_log_debug(D_CGROUP, "memory.msw_usage_in_bytes filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.msw_usage_in_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_memory_failcnt && !cg->memory.filename_failcnt)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.failcnt", cgroup_memory_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_failcnt = strdupz(filename);
- cg->memory.enabled_failcnt = cgroup_enable_memory_failcnt;
- netdata_log_debug(D_CGROUP, "memory.failcnt filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_failcnt);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.failcnt file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes_recursive", cgroup_blkio_base, cg->id);
- if (unlikely(stat(filename, &buf) != -1)) {
- cg->io_service_bytes.filename = strdupz(filename);
- cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
- netdata_log_debug(D_CGROUP, "blkio.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_service_bytes", cgroup_blkio_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->io_service_bytes.filename = strdupz(filename);
- cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
- netdata_log_debug(D_CGROUP, "blkio.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- }
- }
-
- if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced_recursive", cgroup_blkio_base, cg->id);
- if (unlikely(stat(filename, &buf) != -1)) {
- cg->io_serviced.filename = strdupz(filename);
- cg->io_serviced.enabled = cgroup_enable_blkio_ops;
- netdata_log_debug(D_CGROUP, "blkio.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_serviced", cgroup_blkio_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->io_serviced.filename = strdupz(filename);
- cg->io_serviced.enabled = cgroup_enable_blkio_ops;
- netdata_log_debug(D_CGROUP, "blkio.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->io_serviced.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- }
- }
-
- if (unlikely(cgroup_enable_blkio_throttle_io && !cg->throttle_io_service_bytes.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes_recursive", cgroup_blkio_base, cg->id);
- if (unlikely(stat(filename, &buf) != -1)) {
- cg->throttle_io_service_bytes.filename = strdupz(filename);
- cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io;
- netdata_log_debug(D_CGROUP,"blkio.throttle.io_service_bytes_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- snprintfz(
- filename, FILENAME_MAX, "%s%s/blkio.throttle.io_service_bytes", cgroup_blkio_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->throttle_io_service_bytes.filename = strdupz(filename);
- cg->throttle_io_service_bytes.enabled = cgroup_enable_blkio_throttle_io;
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_service_bytes.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_service_bytes file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- }
- }
-
- if (unlikely(cgroup_enable_blkio_throttle_ops && !cg->throttle_io_serviced.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced_recursive", cgroup_blkio_base, cg->id);
- if (unlikely(stat(filename, &buf) != -1)) {
- cg->throttle_io_serviced.filename = strdupz(filename);
- cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops;
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced_recursive filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.throttle.io_serviced", cgroup_blkio_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->throttle_io_serviced.filename = strdupz(filename);
- cg->throttle_io_serviced.enabled = cgroup_enable_blkio_throttle_ops;
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced filename for cgroup '%s': '%s'", cg->id, cg->throttle_io_serviced.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.throttle.io_serviced file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- }
- }
-
- if (unlikely(cgroup_enable_blkio_merged_ops && !cg->io_merged.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged_recursive", cgroup_blkio_base, cg->id);
- if (unlikely(stat(filename, &buf) != -1)) {
- cg->io_merged.filename = strdupz(filename);
- cg->io_merged.enabled = cgroup_enable_blkio_merged_ops;
- netdata_log_debug(D_CGROUP, "blkio.io_merged_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_merged_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_merged", cgroup_blkio_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->io_merged.filename = strdupz(filename);
- cg->io_merged.enabled = cgroup_enable_blkio_merged_ops;
- netdata_log_debug(D_CGROUP, "blkio.io_merged filename for cgroup '%s': '%s'", cg->id, cg->io_merged.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_merged file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- }
- }
-
- if (unlikely(cgroup_enable_blkio_queued_ops && !cg->io_queued.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued_recursive", cgroup_blkio_base, cg->id);
- if (unlikely(stat(filename, &buf) != -1)) {
- cg->io_queued.filename = strdupz(filename);
- cg->io_queued.enabled = cgroup_enable_blkio_queued_ops;
- netdata_log_debug(D_CGROUP, "blkio.io_queued_recursive filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_queued_recursive file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- snprintfz(filename, FILENAME_MAX, "%s%s/blkio.io_queued", cgroup_blkio_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->io_queued.filename = strdupz(filename);
- cg->io_queued.enabled = cgroup_enable_blkio_queued_ops;
- netdata_log_debug(D_CGROUP, "blkio.io_queued filename for cgroup '%s': '%s'", cg->id, cg->io_queued.filename);
- } else {
- netdata_log_debug(D_CGROUP, "blkio.io_queued file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- }
- }
- }
- else if(likely(cgroup_unified_exist)) {
- if(unlikely(cgroup_enable_blkio_io && !cg->io_service_bytes.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->io_service_bytes.filename = strdupz(filename);
- cg->io_service_bytes.enabled = cgroup_enable_blkio_io;
- netdata_log_debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename);
- } else
- netdata_log_debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- if (unlikely(cgroup_enable_blkio_ops && !cg->io_serviced.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/io.stat", cgroup_unified_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->io_serviced.filename = strdupz(filename);
- cg->io_serviced.enabled = cgroup_enable_blkio_ops;
- netdata_log_debug(D_CGROUP, "io.stat filename for unified cgroup '%s': '%s'", cg->id, cg->io_service_bytes.filename);
- } else
- netdata_log_debug(D_CGROUP, "io.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- if (unlikely(
- (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_cpu_throttling) &&
- !cg->cpuacct_stat.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.stat", cgroup_unified_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->cpuacct_stat.filename = strdupz(filename);
- cg->cpuacct_stat.enabled = cgroup_enable_cpuacct_stat;
- cg->cpuacct_cpu_throttling.enabled = cgroup_enable_cpuacct_cpu_throttling;
- cg->filename_cpuset_cpus = NULL;
- cg->filename_cpu_cfs_period = NULL;
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.max", cgroup_unified_base, cg->id);
- cg->filename_cpu_cfs_quota = strdupz(filename);
- netdata_log_debug(D_CGROUP, "cpu.stat filename for unified cgroup '%s': '%s'", cg->id, cg->cpuacct_stat.filename);
- }
- else
- netdata_log_debug(D_CGROUP, "cpu.stat file for unified cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
- if (unlikely(cgroup_enable_cpuacct_cpu_shares && !cg->cpuacct_cpu_shares.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.weight", cgroup_unified_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->cpuacct_cpu_shares.filename = strdupz(filename);
- cg->cpuacct_cpu_shares.enabled = cgroup_enable_cpuacct_cpu_shares;
- netdata_log_debug(D_CGROUP, "cpu.weight filename for cgroup '%s': '%s'", cg->id, cg->cpuacct_cpu_shares.filename);
- } else
- netdata_log_debug(D_CGROUP, "cpu.weight file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely((cgroup_enable_detailed_memory || cgroup_used_memory) && !cg->memory.filename_detailed && (cgroup_used_memory || cgroup_enable_systemd_services_detailed_memory || !is_cgroup_systemd_service(cg)))) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.stat", cgroup_unified_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_detailed = strdupz(filename);
- cg->memory.enabled_detailed = (cgroup_enable_detailed_memory == CONFIG_BOOLEAN_YES)?CONFIG_BOOLEAN_YES:CONFIG_BOOLEAN_AUTO;
- netdata_log_debug(D_CGROUP, "memory.stat filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_detailed);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.stat file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_memory && !cg->memory.filename_usage_in_bytes)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.current", cgroup_unified_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_usage_in_bytes = strdupz(filename);
- cg->memory.enabled_usage_in_bytes = cgroup_enable_memory;
- netdata_log_debug(D_CGROUP, "memory.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_usage_in_bytes);
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.max", cgroup_unified_base, cg->id);
- cg->filename_memory_limit = strdupz(filename);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.current file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if(unlikely(cgroup_enable_swap && !cg->memory.filename_msw_usage_in_bytes)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.current", cgroup_unified_base, cg->id);
- if(likely(stat(filename, &buf) != -1)) {
- cg->memory.filename_msw_usage_in_bytes = strdupz(filename);
- cg->memory.enabled_msw_usage_in_bytes = cgroup_enable_swap;
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.swap.max", cgroup_unified_base, cg->id);
- cg->filename_memoryswap_limit = strdupz(filename);
- netdata_log_debug(D_CGROUP, "memory.swap.current filename for cgroup '%s': '%s'", cg->id, cg->memory.filename_msw_usage_in_bytes);
- }
- else
- netdata_log_debug(D_CGROUP, "memory.swap file for cgroup '%s': '%s' does not exist.", cg->id, filename);
- }
-
- if (unlikely(cgroup_enable_pressure_cpu && !cg->cpu_pressure.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/cpu.pressure", cgroup_unified_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->cpu_pressure.filename = strdupz(filename);
- cg->cpu_pressure.some.enabled = cgroup_enable_pressure_cpu;
- cg->cpu_pressure.full.enabled = CONFIG_BOOLEAN_NO;
- netdata_log_debug(D_CGROUP, "cpu.pressure filename for cgroup '%s': '%s'", cg->id, cg->cpu_pressure.filename);
- } else {
- netdata_log_debug(D_CGROUP, "cpu.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
- }
- }
-
- if (unlikely((cgroup_enable_pressure_io_some || cgroup_enable_pressure_io_full) && !cg->io_pressure.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/io.pressure", cgroup_unified_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->io_pressure.filename = strdupz(filename);
- cg->io_pressure.some.enabled = cgroup_enable_pressure_io_some;
- cg->io_pressure.full.enabled = cgroup_enable_pressure_io_full;
- netdata_log_debug(D_CGROUP, "io.pressure filename for cgroup '%s': '%s'", cg->id, cg->io_pressure.filename);
- } else {
- netdata_log_debug(D_CGROUP, "io.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
- }
- }
-
- if (unlikely((cgroup_enable_pressure_memory_some || cgroup_enable_pressure_memory_full) && !cg->memory_pressure.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/memory.pressure", cgroup_unified_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->memory_pressure.filename = strdupz(filename);
- cg->memory_pressure.some.enabled = cgroup_enable_pressure_memory_some;
- cg->memory_pressure.full.enabled = cgroup_enable_pressure_memory_full;
- netdata_log_debug(D_CGROUP, "memory.pressure filename for cgroup '%s': '%s'", cg->id, cg->memory_pressure.filename);
- } else {
- netdata_log_debug(D_CGROUP, "memory.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
- }
- }
-
- if (unlikely((cgroup_enable_pressure_irq_some || cgroup_enable_pressure_irq_full) && !cg->irq_pressure.filename)) {
- snprintfz(filename, FILENAME_MAX, "%s%s/irq.pressure", cgroup_unified_base, cg->id);
- if (likely(stat(filename, &buf) != -1)) {
- cg->irq_pressure.filename = strdupz(filename);
- cg->irq_pressure.some.enabled = cgroup_enable_pressure_irq_some;
- cg->irq_pressure.full.enabled = cgroup_enable_pressure_irq_full;
- netdata_log_debug(D_CGROUP, "irq.pressure filename for cgroup '%s': '%s'", cg->id, cg->irq_pressure.filename);
- } else {
- netdata_log_debug(D_CGROUP, "irq.pressure file for cgroup '%s': '%s' does not exist", cg->id, filename);
- }
- }
- }
- }
-}
-
-static inline void discovery_cleanup_all_cgroups() {
- struct cgroup *cg = discovered_cgroup_root, *last = NULL;
-
- for(; cg ;) {
- if(!cg->available) {
- // enable the first duplicate cgroup
- {
- struct cgroup *t;
- for(t = discovered_cgroup_root; t ; t = t->discovered_next) {
- if(t != cg && t->available && !t->enabled && t->options & CGROUP_OPTIONS_DISABLED_DUPLICATE && t->hash_chart == cg->hash_chart && !strcmp(t->chart_id, cg->chart_id)) {
- netdata_log_debug(D_CGROUP, "Enabling duplicate of cgroup '%s' with id '%s', because the original with id '%s' stopped.", t->chart_id, t->id, cg->id);
- t->enabled = 1;
- t->options &= ~CGROUP_OPTIONS_DISABLED_DUPLICATE;
- break;
- }
- }
- }
-
- if(!last)
- discovered_cgroup_root = cg->discovered_next;
- else
- last->discovered_next = cg->discovered_next;
-
- cgroup_free(cg);
-
- if(!last)
- cg = discovered_cgroup_root;
- else
- cg = last->discovered_next;
- }
- else {
- last = cg;
- cg = cg->discovered_next;
- }
- }
-}
-
-static inline void discovery_copy_discovered_cgroups_to_reader() {
- netdata_log_debug(D_CGROUP, "copy discovered cgroups to the main group list");
-
- struct cgroup *cg;
-
- for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
- cg->next = cg->discovered_next;
- }
-
- cgroup_root = discovered_cgroup_root;
-}
-
-static inline void discovery_share_cgroups_with_ebpf() {
- struct cgroup *cg;
- int count;
- struct stat buf;
-
- if (shm_mutex_cgroup_ebpf == SEM_FAILED) {
- return;
- }
- sem_wait(shm_mutex_cgroup_ebpf);
-
- for (cg = cgroup_root, count = 0; cg; cg = cg->next, count++) {
- netdata_ebpf_cgroup_shm_body_t *ptr = &shm_cgroup_ebpf.body[count];
- char *prefix = (is_cgroup_systemd_service(cg)) ? "" : "cgroup_";
- snprintfz(ptr->name, CGROUP_EBPF_NAME_SHARED_LENGTH - 1, "%s%s", prefix, cg->chart_title);
- ptr->hash = simple_hash(ptr->name);
- ptr->options = cg->options;
- ptr->enabled = cg->enabled;
- if (cgroup_use_unified_cgroups) {
- snprintfz(ptr->path, FILENAME_MAX, "%s%s/cgroup.procs", cgroup_unified_base, cg->id);
- if (likely(stat(ptr->path, &buf) == -1)) {
- ptr->path[0] = '\0';
- ptr->enabled = 0;
- }
- } else {
- is_cgroup_procs_exist(ptr, cg->id);
- }
-
- netdata_log_debug(D_CGROUP, "cgroup shared: NAME=%s, ENABLED=%d", ptr->name, ptr->enabled);
- }
-
- shm_cgroup_ebpf.header->cgroup_root_count = count;
- sem_post(shm_mutex_cgroup_ebpf);
-}
-
-static inline void discovery_find_all_cgroups_v1() {
- if (cgroup_enable_cpuacct_stat || cgroup_enable_cpuacct_usage) {
- if (discovery_find_dir_in_subdirs(cgroup_cpuacct_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
- cgroup_enable_cpuacct_stat = cgroup_enable_cpuacct_usage = CONFIG_BOOLEAN_NO;
- collector_error("CGROUP: disabled cpu statistics.");
- }
- }
-
- if (cgroup_enable_blkio_io || cgroup_enable_blkio_ops || cgroup_enable_blkio_throttle_io ||
- cgroup_enable_blkio_throttle_ops || cgroup_enable_blkio_merged_ops || cgroup_enable_blkio_queued_ops) {
- if (discovery_find_dir_in_subdirs(cgroup_blkio_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
- cgroup_enable_blkio_io = cgroup_enable_blkio_ops = cgroup_enable_blkio_throttle_io =
- cgroup_enable_blkio_throttle_ops = cgroup_enable_blkio_merged_ops = cgroup_enable_blkio_queued_ops =
- CONFIG_BOOLEAN_NO;
- collector_error("CGROUP: disabled blkio statistics.");
- }
- }
-
- if (cgroup_enable_memory || cgroup_enable_detailed_memory || cgroup_enable_swap || cgroup_enable_memory_failcnt) {
- if (discovery_find_dir_in_subdirs(cgroup_memory_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
- cgroup_enable_memory = cgroup_enable_detailed_memory = cgroup_enable_swap = cgroup_enable_memory_failcnt =
- CONFIG_BOOLEAN_NO;
- collector_error("CGROUP: disabled memory statistics.");
- }
- }
-
- if (cgroup_search_in_devices) {
- if (discovery_find_dir_in_subdirs(cgroup_devices_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
- cgroup_search_in_devices = 0;
- collector_error("CGROUP: disabled devices statistics.");
- }
- }
-}
-
-static inline void discovery_find_all_cgroups_v2() {
- if (discovery_find_dir_in_subdirs(cgroup_unified_base, NULL, discovery_find_cgroup_in_dir_callback) == -1) {
- cgroup_unified_exist = CONFIG_BOOLEAN_NO;
- collector_error("CGROUP: disabled unified cgroups statistics.");
- }
-}
-
-static int is_digits_only(const char *s) {
- do {
- if (!isdigit(*s++)) {
- return 0;
- }
- } while (*s);
-
- return 1;
-}
-
-static inline void discovery_process_first_time_seen_cgroup(struct cgroup *cg) {
- if (!cg->first_time_seen) {
- return;
- }
- cg->first_time_seen = 0;
-
- char comm[TASK_COMM_LEN + 1];
-
- if (cg->container_orchestrator == CGROUPS_ORCHESTRATOR_UNSET) {
- if (strstr(cg->id, "kubepods")) {
- cg->container_orchestrator = CGROUPS_ORCHESTRATOR_K8S;
- } else {
- cg->container_orchestrator = CGROUPS_ORCHESTRATOR_UNKNOWN;
- }
- }
-
- if (is_inside_k8s && !k8s_get_container_first_proc_comm(cg->id, comm)) {
- // container initialization may take some time when CPU % is high
- // seen on GKE: comm is '6' before 'runc:[2:INIT]' (dunno if it could be another number)
- if (is_digits_only(comm) || matches_entrypoint_parent_process_comm(comm)) {
- cg->first_time_seen = 1;
- return;
- }
- if (!strcmp(comm, "pause")) {
- // a container that holds the network namespace for the pod
- // we don't need to collect its metrics
- cg->processed = 1;
- return;
- }
- }
-
- if (cgroup_enable_systemd_services && matches_systemd_services_cgroups(cg->id)) {
- netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'cgroups to match as systemd services'", cg->id, cg->chart_title);
- convert_cgroup_to_systemd_service(cg);
- return;
- }
-
- if (matches_enabled_cgroup_renames(cg->id)) {
- netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') matches 'run script to rename cgroups matching', will try to rename it", cg->id, cg->chart_title);
- if (is_inside_k8s && k8s_is_container(cg->id)) {
- // it may take up to a minute for the K8s API to return data for the container
- // tested on AWS K8s cluster with 100% CPU utilization
- cg->pending_renames = 9; // 1.5 minute
- } else {
- cg->pending_renames = 2;
- }
- }
-}
-
-static int discovery_is_cgroup_duplicate(struct cgroup *cg) {
- // https://github.com/netdata/netdata/issues/797#issuecomment-241248884
- struct cgroup *c;
- for (c = discovered_cgroup_root; c; c = c->discovered_next) {
- if (c != cg && c->enabled && c->hash_chart == cg->hash_chart && !strcmp(c->chart_id, cg->chart_id)) {
- collector_error("CGROUP: chart id '%s' already exists with id '%s' and is enabled and available. Disabling cgroup with id '%s'.", cg->chart_id, c->id, cg->id);
- return 1;
- }
- }
- return 0;
-}
-
-static inline void discovery_process_cgroup(struct cgroup *cg) {
- if (!cg) {
- netdata_log_debug(D_CGROUP, "discovery_process_cgroup() received NULL");
- return;
- }
- if (!cg->available || cg->processed) {
- return;
- }
-
- if (cg->first_time_seen) {
- worker_is_busy(WORKER_DISCOVERY_PROCESS_FIRST_TIME);
- discovery_process_first_time_seen_cgroup(cg);
- if (unlikely(cg->first_time_seen || cg->processed)) {
- return;
- }
- }
-
- if (cg->pending_renames) {
- worker_is_busy(WORKER_DISCOVERY_PROCESS_RENAME);
- discovery_rename_cgroup(cg);
- if (unlikely(cg->pending_renames || cg->processed)) {
- return;
- }
- }
-
- cg->processed = 1;
-
- if ((strlen(cg->chart_id) + strlen(cgroup_chart_id_prefix)) >= RRD_ID_LENGTH_MAX) {
- collector_info("cgroup '%s' (chart id '%s') disabled because chart_id exceeds the limit (RRD_ID_LENGTH_MAX)", cg->id, cg->chart_id);
- return;
- }
-
- if (is_cgroup_systemd_service(cg)) {
- cg->enabled = 1;
- return;
- }
-
- if (!(cg->enabled = matches_enabled_cgroup_names(cg->chart_title))) {
- netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups names matching'", cg->id, cg->chart_title);
- return;
- }
-
- if (!(cg->enabled = matches_enabled_cgroup_paths(cg->id))) {
- netdata_log_debug(D_CGROUP, "cgroup '%s' (name '%s') disabled by 'enable by default cgroups matching'", cg->id, cg->chart_title);
- return;
- }
-
- if (discovery_is_cgroup_duplicate(cg)) {
- cg->enabled = 0;
- cg->options |= CGROUP_OPTIONS_DISABLED_DUPLICATE;
- return;
- }
-
- if (!cg->chart_labels)
- cg->chart_labels = rrdlabels_create();
-
- if (!k8s_is_kubepod(cg)) {
- rrdlabels_add(cg->chart_labels, "cgroup_name", cg->chart_id, RRDLABEL_SRC_AUTO);
- if (!dictionary_get(cg->chart_labels, "image")) {
- rrdlabels_add(cg->chart_labels, "image", "", RRDLABEL_SRC_AUTO);
- }
- }
-
- worker_is_busy(WORKER_DISCOVERY_PROCESS_NETWORK);
- read_cgroup_network_interfaces(cg);
-}
-
-static inline void discovery_find_all_cgroups() {
- netdata_log_debug(D_CGROUP, "searching for cgroups");
-
- worker_is_busy(WORKER_DISCOVERY_INIT);
- discovery_mark_all_cgroups_as_unavailable();
-
- worker_is_busy(WORKER_DISCOVERY_FIND);
- if (!cgroup_use_unified_cgroups) {
- discovery_find_all_cgroups_v1();
- } else {
- discovery_find_all_cgroups_v2();
- }
-
- struct cgroup *cg;
- for (cg = discovered_cgroup_root; cg; cg = cg->discovered_next) {
- worker_is_busy(WORKER_DISCOVERY_PROCESS);
- discovery_process_cgroup(cg);
- }
-
- worker_is_busy(WORKER_DISCOVERY_UPDATE);
- discovery_update_filenames();
-
- worker_is_busy(WORKER_DISCOVERY_LOCK);
- uv_mutex_lock(&cgroup_root_mutex);
-
- worker_is_busy(WORKER_DISCOVERY_CLEANUP);
- discovery_cleanup_all_cgroups();
-
- worker_is_busy(WORKER_DISCOVERY_COPY);
- discovery_copy_discovered_cgroups_to_reader();
-
- uv_mutex_unlock(&cgroup_root_mutex);
-
- worker_is_busy(WORKER_DISCOVERY_SHARE);
- discovery_share_cgroups_with_ebpf();
-
- netdata_log_debug(D_CGROUP, "done searching for cgroups");
-}
-
-static void cgroup_discovery_cleanup(void *ptr) {
- UNUSED(ptr);
-
- discovery_thread.exited = 1;
- worker_unregister();
- service_exits();
-}
-
-void cgroup_discovery_worker(void *ptr)
-{
- UNUSED(ptr);
-
- netdata_thread_cleanup_push(cgroup_discovery_cleanup, ptr);
-
- worker_register("CGROUPSDISC");
- worker_register_job_name(WORKER_DISCOVERY_INIT, "init");
- worker_register_job_name(WORKER_DISCOVERY_FIND, "find");
- worker_register_job_name(WORKER_DISCOVERY_PROCESS, "process");
- worker_register_job_name(WORKER_DISCOVERY_PROCESS_RENAME, "rename");
- worker_register_job_name(WORKER_DISCOVERY_PROCESS_NETWORK, "network");
- worker_register_job_name(WORKER_DISCOVERY_PROCESS_FIRST_TIME, "new");
- worker_register_job_name(WORKER_DISCOVERY_UPDATE, "update");
- worker_register_job_name(WORKER_DISCOVERY_CLEANUP, "cleanup");
- worker_register_job_name(WORKER_DISCOVERY_COPY, "copy");
- worker_register_job_name(WORKER_DISCOVERY_SHARE, "share");
- worker_register_job_name(WORKER_DISCOVERY_LOCK, "lock");
-
- entrypoint_parent_process_comm = simple_pattern_create(
- " runc:[* " // http://terenceli.github.io/%E6%8A%80%E6%9C%AF/2021/12/28/runc-internals-3)
- " exe ", // https://github.com/falcosecurity/falco/blob/9d41b0a151b83693929d3a9c84f7c5c85d070d3a/rules/falco_rules.yaml#L1961
- NULL,
- SIMPLE_PATTERN_EXACT, true);
-
- while (service_running(SERVICE_COLLECTORS)) {
- worker_is_idle();
-
- uv_mutex_lock(&discovery_thread.mutex);
- while (!discovery_thread.start_discovery && service_running(SERVICE_COLLECTORS))
- uv_cond_wait(&discovery_thread.cond_var, &discovery_thread.mutex);
- discovery_thread.start_discovery = 0;
- uv_mutex_unlock(&discovery_thread.mutex);
-
- if (unlikely(!service_running(SERVICE_COLLECTORS)))
- break;
-
- discovery_find_all_cgroups();
- }
-
- netdata_thread_cleanup_pop(1);
-}
-
-// ----------------------------------------------------------------------------
-// generate charts
-
-#define CHART_TITLE_MAX 300
-
-void update_systemd_services_charts(
- int update_every
- , int do_cpu
- , int do_mem_usage
- , int do_mem_detailed
- , int do_mem_failcnt
- , int do_swap_usage
- , int do_io
- , int do_io_ops
- , int do_throttle_io
- , int do_throttle_ops
- , int do_queued_ops
- , int do_merged_ops
-) {
- static RRDSET
- *st_cpu = NULL,
- *st_mem_usage = NULL,
- *st_mem_failcnt = NULL,
- *st_swap_usage = NULL,
-
- *st_mem_detailed_cache = NULL,
- *st_mem_detailed_rss = NULL,
- *st_mem_detailed_mapped = NULL,
- *st_mem_detailed_writeback = NULL,
- *st_mem_detailed_pgfault = NULL,
- *st_mem_detailed_pgmajfault = NULL,
- *st_mem_detailed_pgpgin = NULL,
- *st_mem_detailed_pgpgout = NULL,
-
- *st_io_read = NULL,
- *st_io_serviced_read = NULL,
- *st_throttle_io_read = NULL,
- *st_throttle_ops_read = NULL,
- *st_queued_ops_read = NULL,
- *st_merged_ops_read = NULL,
-
- *st_io_write = NULL,
- *st_io_serviced_write = NULL,
- *st_throttle_io_write = NULL,
- *st_throttle_ops_write = NULL,
- *st_queued_ops_write = NULL,
- *st_merged_ops_write = NULL;
-
- // create the charts
-
- if (unlikely(do_cpu && !st_cpu)) {
- char title[CHART_TITLE_MAX + 1];
- snprintfz(title, CHART_TITLE_MAX, "Systemd Services CPU utilization (100%% = 1 core)");
-
- st_cpu = rrdset_create_localhost(
- "services"
- , "cpu"
- , NULL
- , "cpu"
- , "services.cpu"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if (unlikely(do_mem_usage && !st_mem_usage)) {
- st_mem_usage = rrdset_create_localhost(
- "services"
- , "mem_usage"
- , NULL
- , "mem"
- , "services.mem_usage"
- , "Systemd Services Used Memory"
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 10
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(likely(do_mem_detailed)) {
- if(unlikely(!st_mem_detailed_rss)) {
- st_mem_detailed_rss = rrdset_create_localhost(
- "services"
- , "mem_rss"
- , NULL
- , "mem"
- , "services.mem_rss"
- , "Systemd Services RSS Memory"
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 20
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_mem_detailed_mapped)) {
- st_mem_detailed_mapped = rrdset_create_localhost(
- "services"
- , "mem_mapped"
- , NULL
- , "mem"
- , "services.mem_mapped"
- , "Systemd Services Mapped Memory"
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 30
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_mem_detailed_cache)) {
- st_mem_detailed_cache = rrdset_create_localhost(
- "services"
- , "mem_cache"
- , NULL
- , "mem"
- , "services.mem_cache"
- , "Systemd Services Cache Memory"
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 40
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_mem_detailed_writeback)) {
- st_mem_detailed_writeback = rrdset_create_localhost(
- "services"
- , "mem_writeback"
- , NULL
- , "mem"
- , "services.mem_writeback"
- , "Systemd Services Writeback Memory"
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 50
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- }
-
- if(unlikely(!st_mem_detailed_pgfault)) {
- st_mem_detailed_pgfault = rrdset_create_localhost(
- "services"
- , "mem_pgfault"
- , NULL
- , "mem"
- , "services.mem_pgfault"
- , "Systemd Services Memory Minor Page Faults"
- , "MiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 60
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_mem_detailed_pgmajfault)) {
- st_mem_detailed_pgmajfault = rrdset_create_localhost(
- "services"
- , "mem_pgmajfault"
- , NULL
- , "mem"
- , "services.mem_pgmajfault"
- , "Systemd Services Memory Major Page Faults"
- , "MiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 70
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_mem_detailed_pgpgin)) {
- st_mem_detailed_pgpgin = rrdset_create_localhost(
- "services"
- , "mem_pgpgin"
- , NULL
- , "mem"
- , "services.mem_pgpgin"
- , "Systemd Services Memory Charging Activity"
- , "MiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 80
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- }
-
- if(unlikely(!st_mem_detailed_pgpgout)) {
- st_mem_detailed_pgpgout = rrdset_create_localhost(
- "services"
- , "mem_pgpgout"
- , NULL
- , "mem"
- , "services.mem_pgpgout"
- , "Systemd Services Memory Uncharging Activity"
- , "MiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 90
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- if(unlikely(do_mem_failcnt && !st_mem_failcnt)) {
- st_mem_failcnt = rrdset_create_localhost(
- "services"
- , "mem_failcnt"
- , NULL
- , "mem"
- , "services.mem_failcnt"
- , "Systemd Services Memory Limit Failures"
- , "failures"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 110
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if (do_swap_usage && !st_swap_usage) {
- st_swap_usage = rrdset_create_localhost(
- "services"
- , "swap_usage"
- , NULL
- , "swap"
- , "services.swap_usage"
- , "Systemd Services Swap Memory Used"
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 100
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(likely(do_io)) {
- if(unlikely(!st_io_read)) {
- st_io_read = rrdset_create_localhost(
- "services"
- , "io_read"
- , NULL
- , "disk"
- , "services.io_read"
- , "Systemd Services Disk Read Bandwidth"
- , "KiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 120
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_io_write)) {
- st_io_write = rrdset_create_localhost(
- "services"
- , "io_write"
- , NULL
- , "disk"
- , "services.io_write"
- , "Systemd Services Disk Write Bandwidth"
- , "KiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 130
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- if(likely(do_io_ops)) {
- if(unlikely(!st_io_serviced_read)) {
- st_io_serviced_read = rrdset_create_localhost(
- "services"
- , "io_ops_read"
- , NULL
- , "disk"
- , "services.io_ops_read"
- , "Systemd Services Disk Read Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 140
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_io_serviced_write)) {
- st_io_serviced_write = rrdset_create_localhost(
- "services"
- , "io_ops_write"
- , NULL
- , "disk"
- , "services.io_ops_write"
- , "Systemd Services Disk Write Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 150
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- if(likely(do_throttle_io)) {
- if(unlikely(!st_throttle_io_read)) {
-
- st_throttle_io_read = rrdset_create_localhost(
- "services"
- , "throttle_io_read"
- , NULL
- , "disk"
- , "services.throttle_io_read"
- , "Systemd Services Throttle Disk Read Bandwidth"
- , "KiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 160
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- }
-
- if(unlikely(!st_throttle_io_write)) {
- st_throttle_io_write = rrdset_create_localhost(
- "services"
- , "throttle_io_write"
- , NULL
- , "disk"
- , "services.throttle_io_write"
- , "Systemd Services Throttle Disk Write Bandwidth"
- , "KiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 170
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- if(likely(do_throttle_ops)) {
- if(unlikely(!st_throttle_ops_read)) {
- st_throttle_ops_read = rrdset_create_localhost(
- "services"
- , "throttle_io_ops_read"
- , NULL
- , "disk"
- , "services.throttle_io_ops_read"
- , "Systemd Services Throttle Disk Read Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 180
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_throttle_ops_write)) {
- st_throttle_ops_write = rrdset_create_localhost(
- "services"
- , "throttle_io_ops_write"
- , NULL
- , "disk"
- , "services.throttle_io_ops_write"
- , "Systemd Services Throttle Disk Write Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 190
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- if(likely(do_queued_ops)) {
- if(unlikely(!st_queued_ops_read)) {
- st_queued_ops_read = rrdset_create_localhost(
- "services"
- , "queued_io_ops_read"
- , NULL
- , "disk"
- , "services.queued_io_ops_read"
- , "Systemd Services Queued Disk Read Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 200
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_queued_ops_write)) {
-
- st_queued_ops_write = rrdset_create_localhost(
- "services"
- , "queued_io_ops_write"
- , NULL
- , "disk"
- , "services.queued_io_ops_write"
- , "Systemd Services Queued Disk Write Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 210
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- if(likely(do_merged_ops)) {
- if(unlikely(!st_merged_ops_read)) {
- st_merged_ops_read = rrdset_create_localhost(
- "services"
- , "merged_io_ops_read"
- , NULL
- , "disk"
- , "services.merged_io_ops_read"
- , "Systemd Services Merged Disk Read Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 220
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
-
- if(unlikely(!st_merged_ops_write)) {
- st_merged_ops_write = rrdset_create_localhost(
- "services"
- , "merged_io_ops_write"
- , NULL
- , "disk"
- , "services.merged_io_ops_write"
- , "Systemd Services Merged Disk Write Operations"
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME
- , NETDATA_CHART_PRIO_CGROUPS_SYSTEMD + 230
- , update_every
- , RRDSET_TYPE_STACKED
- );
- }
- }
-
- // update the values
- struct cgroup *cg;
- for(cg = cgroup_root; cg ; cg = cg->next) {
- if(unlikely(!cg->enabled || cg->pending_renames || !is_cgroup_systemd_service(cg)))
- continue;
-
- if(likely(do_cpu && cg->cpuacct_stat.updated)) {
- if(unlikely(!cg->rd_cpu)){
-
-
- if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- cg->rd_cpu = rrddim_add(st_cpu, cg->chart_id, cg->chart_title, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
- } else {
- cg->rd_cpu = rrddim_add(st_cpu, cg->chart_id, cg->chart_title, 100, 1000000, RRD_ALGORITHM_INCREMENTAL);
- }
- }
-
- rrddim_set_by_pointer(st_cpu, cg->rd_cpu, cg->cpuacct_stat.user + cg->cpuacct_stat.system);
- }
-
- if(likely(do_mem_usage && cg->memory.updated_usage_in_bytes)) {
- if(unlikely(!cg->rd_mem_usage))
- cg->rd_mem_usage = rrddim_add(st_mem_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(st_mem_usage, cg->rd_mem_usage, cg->memory.usage_in_bytes);
- }
-
- if(likely(do_mem_detailed && cg->memory.updated_detailed)) {
- if(unlikely(!cg->rd_mem_detailed_rss))
- cg->rd_mem_detailed_rss = rrddim_add(st_mem_detailed_rss, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(st_mem_detailed_rss, cg->rd_mem_detailed_rss, cg->memory.total_rss);
-
- if(unlikely(!cg->rd_mem_detailed_mapped))
- cg->rd_mem_detailed_mapped = rrddim_add(st_mem_detailed_mapped, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(st_mem_detailed_mapped, cg->rd_mem_detailed_mapped, cg->memory.total_mapped_file);
-
- if(unlikely(!cg->rd_mem_detailed_cache))
- cg->rd_mem_detailed_cache = rrddim_add(st_mem_detailed_cache, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(st_mem_detailed_cache, cg->rd_mem_detailed_cache, cg->memory.total_cache);
-
- if(unlikely(!cg->rd_mem_detailed_writeback))
- cg->rd_mem_detailed_writeback = rrddim_add(st_mem_detailed_writeback, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_set_by_pointer(st_mem_detailed_writeback, cg->rd_mem_detailed_writeback, cg->memory.total_writeback);
-
- if(unlikely(!cg->rd_mem_detailed_pgfault))
- cg->rd_mem_detailed_pgfault = rrddim_add(st_mem_detailed_pgfault, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_mem_detailed_pgfault, cg->rd_mem_detailed_pgfault, cg->memory.total_pgfault);
-
- if(unlikely(!cg->rd_mem_detailed_pgmajfault))
- cg->rd_mem_detailed_pgmajfault = rrddim_add(st_mem_detailed_pgmajfault, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_mem_detailed_pgmajfault, cg->rd_mem_detailed_pgmajfault, cg->memory.total_pgmajfault);
-
- if(unlikely(!cg->rd_mem_detailed_pgpgin))
- cg->rd_mem_detailed_pgpgin = rrddim_add(st_mem_detailed_pgpgin, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_mem_detailed_pgpgin, cg->rd_mem_detailed_pgpgin, cg->memory.total_pgpgin);
-
- if(unlikely(!cg->rd_mem_detailed_pgpgout))
- cg->rd_mem_detailed_pgpgout = rrddim_add(st_mem_detailed_pgpgout, cg->chart_id, cg->chart_title, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_mem_detailed_pgpgout, cg->rd_mem_detailed_pgpgout, cg->memory.total_pgpgout);
- }
-
- if(likely(do_mem_failcnt && cg->memory.updated_failcnt)) {
- if(unlikely(!cg->rd_mem_failcnt))
- cg->rd_mem_failcnt = rrddim_add(st_mem_failcnt, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_mem_failcnt, cg->rd_mem_failcnt, cg->memory.failcnt);
- }
-
- if(likely(do_swap_usage && cg->memory.updated_msw_usage_in_bytes)) {
- if(unlikely(!cg->rd_swap_usage))
- cg->rd_swap_usage = rrddim_add(st_swap_usage, cg->chart_id, cg->chart_title, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- rrddim_set_by_pointer(
- st_swap_usage,
- cg->rd_swap_usage,
- cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ?
- cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0);
- } else {
- rrddim_set_by_pointer(st_swap_usage, cg->rd_swap_usage, cg->memory.msw_usage_in_bytes);
- }
- }
-
- if(likely(do_io && cg->io_service_bytes.updated)) {
- if(unlikely(!cg->rd_io_service_bytes_read))
- cg->rd_io_service_bytes_read = rrddim_add(st_io_read, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_io_read, cg->rd_io_service_bytes_read, cg->io_service_bytes.Read);
-
- if(unlikely(!cg->rd_io_service_bytes_write))
- cg->rd_io_service_bytes_write = rrddim_add(st_io_write, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_io_write, cg->rd_io_service_bytes_write, cg->io_service_bytes.Write);
- }
-
- if(likely(do_io_ops && cg->io_serviced.updated)) {
- if(unlikely(!cg->rd_io_serviced_read))
- cg->rd_io_serviced_read = rrddim_add(st_io_serviced_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_io_serviced_read, cg->rd_io_serviced_read, cg->io_serviced.Read);
-
- if(unlikely(!cg->rd_io_serviced_write))
- cg->rd_io_serviced_write = rrddim_add(st_io_serviced_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_io_serviced_write, cg->rd_io_serviced_write, cg->io_serviced.Write);
- }
-
- if(likely(do_throttle_io && cg->throttle_io_service_bytes.updated)) {
- if(unlikely(!cg->rd_throttle_io_read))
- cg->rd_throttle_io_read = rrddim_add(st_throttle_io_read, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_throttle_io_read, cg->rd_throttle_io_read, cg->throttle_io_service_bytes.Read);
-
- if(unlikely(!cg->rd_throttle_io_write))
- cg->rd_throttle_io_write = rrddim_add(st_throttle_io_write, cg->chart_id, cg->chart_title, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_throttle_io_write, cg->rd_throttle_io_write, cg->throttle_io_service_bytes.Write);
- }
-
- if(likely(do_throttle_ops && cg->throttle_io_serviced.updated)) {
- if(unlikely(!cg->rd_throttle_io_serviced_read))
- cg->rd_throttle_io_serviced_read = rrddim_add(st_throttle_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_throttle_ops_read, cg->rd_throttle_io_serviced_read, cg->throttle_io_serviced.Read);
-
- if(unlikely(!cg->rd_throttle_io_serviced_write))
- cg->rd_throttle_io_serviced_write = rrddim_add(st_throttle_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_throttle_ops_write, cg->rd_throttle_io_serviced_write, cg->throttle_io_serviced.Write);
- }
-
- if(likely(do_queued_ops && cg->io_queued.updated)) {
- if(unlikely(!cg->rd_io_queued_read))
- cg->rd_io_queued_read = rrddim_add(st_queued_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_queued_ops_read, cg->rd_io_queued_read, cg->io_queued.Read);
-
- if(unlikely(!cg->rd_io_queued_write))
- cg->rd_io_queued_write = rrddim_add(st_queued_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_queued_ops_write, cg->rd_io_queued_write, cg->io_queued.Write);
- }
-
- if(likely(do_merged_ops && cg->io_merged.updated)) {
- if(unlikely(!cg->rd_io_merged_read))
- cg->rd_io_merged_read = rrddim_add(st_merged_ops_read, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_merged_ops_read, cg->rd_io_merged_read, cg->io_merged.Read);
-
- if(unlikely(!cg->rd_io_merged_write))
- cg->rd_io_merged_write = rrddim_add(st_merged_ops_write, cg->chart_id, cg->chart_title, 1, 1, RRD_ALGORITHM_INCREMENTAL);
-
- rrddim_set_by_pointer(st_merged_ops_write, cg->rd_io_merged_write, cg->io_merged.Write);
- }
- }
-
- // complete the iteration
- if(likely(do_cpu))
- rrdset_done(st_cpu);
-
- if(likely(do_mem_usage))
- rrdset_done(st_mem_usage);
-
- if(unlikely(do_mem_detailed)) {
- rrdset_done(st_mem_detailed_cache);
- rrdset_done(st_mem_detailed_rss);
- rrdset_done(st_mem_detailed_mapped);
- rrdset_done(st_mem_detailed_writeback);
- rrdset_done(st_mem_detailed_pgfault);
- rrdset_done(st_mem_detailed_pgmajfault);
- rrdset_done(st_mem_detailed_pgpgin);
- rrdset_done(st_mem_detailed_pgpgout);
- }
-
- if(likely(do_mem_failcnt))
- rrdset_done(st_mem_failcnt);
-
- if(likely(do_swap_usage))
- rrdset_done(st_swap_usage);
-
- if(likely(do_io)) {
- rrdset_done(st_io_read);
- rrdset_done(st_io_write);
- }
-
- if(likely(do_io_ops)) {
- rrdset_done(st_io_serviced_read);
- rrdset_done(st_io_serviced_write);
- }
-
- if(likely(do_throttle_io)) {
- rrdset_done(st_throttle_io_read);
- rrdset_done(st_throttle_io_write);
- }
-
- if(likely(do_throttle_ops)) {
- rrdset_done(st_throttle_ops_read);
- rrdset_done(st_throttle_ops_write);
- }
-
- if(likely(do_queued_ops)) {
- rrdset_done(st_queued_ops_read);
- rrdset_done(st_queued_ops_write);
- }
-
- if(likely(do_merged_ops)) {
- rrdset_done(st_merged_ops_read);
- rrdset_done(st_merged_ops_write);
- }
-}
-
-static inline char *cgroup_chart_type(char *buffer, const char *id, size_t len) {
- if(buffer[0]) return buffer;
-
- if(id[0] == '\0' || (id[0] == '/' && id[1] == '\0'))
- strncpy(buffer, "cgroup_root", len);
- else
- snprintfz(buffer, len, "%s%s", cgroup_chart_id_prefix, id);
-
- netdata_fix_chart_id(buffer);
- return buffer;
-}
-
-static inline void update_cpu_limits(char **filename, unsigned long long *value, struct cgroup *cg) {
- if(*filename) {
- int ret = -1;
-
- if(value == &cg->cpuset_cpus) {
- unsigned long ncpus = read_cpuset_cpus(*filename, get_system_cpus());
- if(ncpus) {
- *value = ncpus;
- ret = 0;
- }
+ if (likely(cg->cpuacct_stat.updated)) {
+ update_cpu_utilization_chart(cg);
}
- else if(value == &cg->cpu_cfs_period) {
- ret = read_single_number_file(*filename, value);
+ if (likely(cg->memory.updated_msw_usage_in_bytes)) {
+ update_mem_usage_chart(cg);
}
- else if(value == &cg->cpu_cfs_quota) {
- ret = read_single_number_file(*filename, value);
+ if (likely(cg->memory.updated_failcnt)) {
+ update_mem_failcnt_chart(cg);
}
- else ret = -1;
-
- if(ret) {
- collector_error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
- freez(*filename);
- *filename = NULL;
+ if (likely(cg->memory.updated_detailed)) {
+ update_mem_usage_detailed_chart(cg);
+ update_mem_writeback_chart(cg);
+ update_mem_pgfaults_chart(cg);
+ if (!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
+ update_mem_activity_chart(cg);
+ }
}
- }
-}
-
-static inline void update_cpu_limits2(struct cgroup *cg) {
- if(cg->filename_cpu_cfs_quota){
- static procfile *ff = NULL;
-
- ff = procfile_reopen(ff, cg->filename_cpu_cfs_quota, NULL, CGROUP_PROCFILE_FLAG);
- if(unlikely(!ff)) {
- goto cpu_limits2_err;
+ if (likely(cg->io_service_bytes.updated)) {
+ update_io_serviced_bytes_chart(cg);
}
-
- ff = procfile_readall(ff);
- if(unlikely(!ff)) {
- goto cpu_limits2_err;
+ if (likely(cg->io_serviced.updated)) {
+ update_io_serviced_ops_chart(cg);
}
-
- unsigned long lines = procfile_lines(ff);
-
- if (unlikely(lines < 1)) {
- collector_error("CGROUP: file '%s' should have 1 lines.", cg->filename_cpu_cfs_quota);
- return;
+ if (likely(cg->throttle_io_service_bytes.updated)) {
+ update_throttle_io_serviced_bytes_chart(cg);
}
-
- cg->cpu_cfs_period = str2ull(procfile_lineword(ff, 0, 1), NULL);
- cg->cpuset_cpus = get_system_cpus();
-
- char *s = "max\n\0";
- if(strcmp(s, procfile_lineword(ff, 0, 0)) == 0){
- cg->cpu_cfs_quota = cg->cpu_cfs_period * cg->cpuset_cpus;
- } else {
- cg->cpu_cfs_quota = str2ull(procfile_lineword(ff, 0, 0), NULL);
+ if (likely(cg->throttle_io_serviced.updated)) {
+ update_throttle_io_serviced_ops_chart(cg);
}
- netdata_log_debug(D_CGROUP, "CPU limits values: %llu %llu %llu", cg->cpu_cfs_period, cg->cpuset_cpus, cg->cpu_cfs_quota);
- return;
-
-cpu_limits2_err:
- collector_error("Cannot refresh cgroup %s cpu limit by reading '%s'. Will not update its limit anymore.", cg->id, cg->filename_cpu_cfs_quota);
- freez(cg->filename_cpu_cfs_quota);
- cg->filename_cpu_cfs_quota = NULL;
-
- }
-}
-
-static inline int update_memory_limits(char **filename, const RRDSETVAR_ACQUIRED **chart_var, unsigned long long *value, const char *chart_var_name, struct cgroup *cg) {
- if(*filename) {
- if(unlikely(!*chart_var)) {
- *chart_var = rrdsetvar_custom_chart_variable_add_and_acquire(cg->st_mem_usage, chart_var_name);
- if(!*chart_var) {
- collector_error("Cannot create cgroup %s chart variable '%s'. Will not update its limit anymore.", cg->id, chart_var_name);
- freez(*filename);
- *filename = NULL;
- }
+ if (likely(cg->io_queued.updated)) {
+ update_io_queued_ops_chart(cg);
+ }
+ if (likely(cg->io_merged.updated)) {
+ update_io_merged_ops_chart(cg);
}
- if(*filename && *chart_var) {
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- if(read_single_number_file(*filename, value)) {
- collector_error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
- freez(*filename);
- *filename = NULL;
- }
- else {
- rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024)));
- return 1;
- }
- } else {
- char buffer[30 + 1];
- int ret = read_file(*filename, buffer, 30);
- if(ret) {
- collector_error("Cannot refresh cgroup %s memory limit by reading '%s'. Will not update its limit anymore.", cg->id, *filename);
- freez(*filename);
- *filename = NULL;
- return 0;
- }
- char *s = "max\n\0";
- if(strcmp(s, buffer) == 0){
- *value = UINT64_MAX;
- rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024)));
- return 1;
- }
- *value = str2ull(buffer, NULL);
- rrdsetvar_custom_chart_variable_set(cg->st_mem_usage, *chart_var, (NETDATA_DOUBLE)(*value / (1024 * 1024)));
- return 1;
- }
+ if (likely(cg->pids.pids_current_updated)) {
+ update_pids_current_chart(cg);
}
+
+ cg->function_ready = true;
}
- return 0;
}
-void update_cgroup_charts(int update_every) {
- netdata_log_debug(D_CGROUP, "updating cgroups charts");
-
- char type[RRD_ID_LENGTH_MAX + 1];
- char title[CHART_TITLE_MAX + 1];
-
- int services_do_cpu = 0,
- services_do_mem_usage = 0,
- services_do_mem_detailed = 0,
- services_do_mem_failcnt = 0,
- services_do_swap_usage = 0,
- services_do_io = 0,
- services_do_io_ops = 0,
- services_do_throttle_io = 0,
- services_do_throttle_ops = 0,
- services_do_queued_ops = 0,
- services_do_merged_ops = 0;
-
- struct cgroup *cg;
- for(cg = cgroup_root; cg ; cg = cg->next) {
- if(unlikely(!cg->enabled || cg->pending_renames))
- continue;
-
- if(likely(cgroup_enable_systemd_services && is_cgroup_systemd_service(cg))) {
- if(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES) services_do_cpu++;
-
- if(cgroup_enable_systemd_services_detailed_memory && cg->memory.updated_detailed && cg->memory.enabled_detailed) services_do_mem_detailed++;
- if(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_mem_usage++;
- if(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES) services_do_mem_failcnt++;
- if(cg->memory.updated_msw_usage_in_bytes && cg->memory.enabled_msw_usage_in_bytes == CONFIG_BOOLEAN_YES) services_do_swap_usage++;
-
- if(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_io++;
- if(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_io_ops++;
- if(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_io++;
- if(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES) services_do_throttle_ops++;
- if(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES) services_do_queued_ops++;
- if(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES) services_do_merged_ops++;
+void update_cgroup_charts() {
+ for (struct cgroup *cg = cgroup_root; cg; cg = cg->next) {
+ if(unlikely(!cg->enabled || cg->pending_renames || is_cgroup_systemd_service(cg)))
continue;
- }
-
- type[0] = '\0';
-
- if(likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_cpu)) {
- snprintfz(
- title,
- CHART_TITLE_MAX,
- k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU)" : "CPU Usage (100%% = 1 core)");
-
- cg->st_cpu = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu" : "cgroup.cpu"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- rrdset_update_rrdlabels(cg->st_cpu, cg->chart_labels);
-
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- rrddim_add(cg->st_cpu, "user", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_cpu, "system", NULL, 100, system_hz, RRD_ALGORITHM_INCREMENTAL);
- }
- else {
- rrddim_add(cg->st_cpu, "user", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_cpu, "system", NULL, 100, 1000000, RRD_ALGORITHM_INCREMENTAL);
- }
- }
- rrddim_set(cg->st_cpu, "user", cg->cpuacct_stat.user);
- rrddim_set(cg->st_cpu, "system", cg->cpuacct_stat.system);
- rrdset_done(cg->st_cpu);
+ if (likely(cg->cpuacct_stat.updated && cg->cpuacct_stat.enabled == CONFIG_BOOLEAN_YES)) {
+ update_cpu_utilization_chart(cg);
if(likely(cg->filename_cpuset_cpus || cg->filename_cpu_cfs_period || cg->filename_cpu_cfs_quota)) {
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
@@ -3769,8 +1408,7 @@ void update_cgroup_charts(int update_every) {
if(cg->filename_cpu_cfs_quota) freez(cg->filename_cpu_cfs_quota);
cg->filename_cpu_cfs_quota = NULL;
}
- }
- else {
+ } else {
NETDATA_DOUBLE value = 0, quota = 0;
if(likely( ((!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) && (cg->filename_cpuset_cpus || (cg->filename_cpu_cfs_period && cg->filename_cpu_cfs_quota)))
@@ -3784,49 +1422,10 @@ void update_cgroup_charts(int update_every) {
value = (NETDATA_DOUBLE)cg->cpuset_cpus * 100;
}
if(likely(value)) {
- if(unlikely(!cg->st_cpu_limit)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Usage within the limits");
-
- cg->st_cpu_limit = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_limit"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_limit" : "cgroup.cpu_limit"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority - 1
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_cpu_limit, cg->chart_labels);
-
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED))
- rrddim_add(cg->st_cpu_limit, "used", NULL, 1, system_hz, RRD_ALGORITHM_ABSOLUTE);
- else
- rrddim_add(cg->st_cpu_limit, "used", NULL, 1, 1000000, RRD_ALGORITHM_ABSOLUTE);
- cg->prev_cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100;
- }
-
- NETDATA_DOUBLE cpu_usage = 0;
- cpu_usage = (NETDATA_DOUBLE)(cg->cpuacct_stat.user + cg->cpuacct_stat.system) * 100;
- NETDATA_DOUBLE cpu_used = 100 * (cpu_usage - cg->prev_cpu_usage) / (value * update_every);
-
- rrdset_isnot_obsolete(cg->st_cpu_limit);
-
- rrddim_set(cg->st_cpu_limit, "used", (cpu_used > 0)?cpu_used:0);
-
- cg->prev_cpu_usage = cpu_usage;
-
- rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, value);
- rrdset_done(cg->st_cpu_limit);
- }
- else {
- if(unlikely(cg->st_cpu_limit)) {
- rrdset_is_obsolete(cg->st_cpu_limit);
+ update_cpu_utilization_limit_chart(cg, value);
+ } else {
+ if (unlikely(cg->st_cpu_limit)) {
+ rrdset_is_obsolete___safe_from_collector_thread(cg->st_cpu_limit);
cg->st_cpu_limit = NULL;
}
rrdsetvar_custom_chart_variable_set(cg->st_cpu, cg->chart_var_cpu_limit, NAN);
@@ -3836,1056 +1435,137 @@ void update_cgroup_charts(int update_every) {
}
if (likely(cg->cpuacct_cpu_throttling.updated && cg->cpuacct_cpu_throttling.enabled == CONFIG_BOOLEAN_YES)) {
- if (unlikely(!cg->st_cpu_nr_throttled)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Runnable Periods");
-
- cg->st_cpu_nr_throttled = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "throttled"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled" : "cgroup.throttled"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 10
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_cpu_nr_throttled, cg->chart_labels);
- rrddim_add(cg->st_cpu_nr_throttled, "throttled", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- } else {
- rrddim_set(cg->st_cpu_nr_throttled, "throttled", cg->cpuacct_cpu_throttling.nr_throttled_perc);
- rrdset_done(cg->st_cpu_nr_throttled);
- }
-
- if (unlikely(!cg->st_cpu_throttled_time)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Throttled Time Duration");
-
- cg->st_cpu_throttled_time = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "throttled_duration"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.throttled_duration" : "cgroup.throttled_duration"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 15
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_cpu_throttled_time, cg->chart_labels);
- rrddim_add(cg->st_cpu_throttled_time, "duration", NULL, 1, 1000000, RRD_ALGORITHM_INCREMENTAL);
- } else {
- rrddim_set(cg->st_cpu_throttled_time, "duration", cg->cpuacct_cpu_throttling.throttled_time);
- rrdset_done(cg->st_cpu_throttled_time);
- }
+ update_cpu_throttled_chart(cg);
+ update_cpu_throttled_duration_chart(cg);
}
if (likely(cg->cpuacct_cpu_shares.updated && cg->cpuacct_cpu_shares.enabled == CONFIG_BOOLEAN_YES)) {
- if (unlikely(!cg->st_cpu_shares)) {
- snprintfz(title, CHART_TITLE_MAX, "CPU Time Relative Share");
-
- cg->st_cpu_shares = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_shares"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_shares" : "cgroup.cpu_shares"
- , title
- , "shares"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 20
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_cpu_shares, cg->chart_labels);
- rrddim_add(cg->st_cpu_shares, "shares", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- } else {
- rrddim_set(cg->st_cpu_shares, "shares", cg->cpuacct_cpu_shares.shares);
- rrdset_done(cg->st_cpu_shares);
- }
+ update_cpu_shares_chart(cg);
}
- if(likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) {
- char id[RRD_ID_LENGTH_MAX + 1];
- unsigned int i;
-
- if(unlikely(!cg->st_cpu_per_core)) {
- snprintfz(
- title,
- CHART_TITLE_MAX,
- k8s_is_kubepod(cg) ? "CPU Usage (100%% = 1000 mCPU) Per Core" :
- "CPU Usage (100%% = 1 core) Per Core");
-
- cg->st_cpu_per_core = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_per_core"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_per_core" : "cgroup.cpu_per_core"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 100
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- rrdset_update_rrdlabels(cg->st_cpu_per_core, cg->chart_labels);
-
- for(i = 0; i < cg->cpuacct_usage.cpus; i++) {
- snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i);
- rrddim_add(cg->st_cpu_per_core, id, NULL, 100, 1000000000, RRD_ALGORITHM_INCREMENTAL);
- }
- }
-
- for(i = 0; i < cg->cpuacct_usage.cpus ;i++) {
- snprintfz(id, RRD_ID_LENGTH_MAX, "cpu%u", i);
- rrddim_set(cg->st_cpu_per_core, id, cg->cpuacct_usage.cpu_percpu[i]);
- }
- rrdset_done(cg->st_cpu_per_core);
+ if (likely(cg->cpuacct_usage.updated && cg->cpuacct_usage.enabled == CONFIG_BOOLEAN_YES)) {
+ update_cpu_per_core_usage_chart(cg);
}
- if(likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_mem)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Usage");
-
- cg->st_mem = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.mem" : "cgroup.mem"
- , title
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 220
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- rrdset_update_rrdlabels(cg->st_mem, cg->chart_labels);
-
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- rrddim_add(cg->st_mem, "cache", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "rss", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- if(cg->memory.detailed_has_swap)
- rrddim_add(cg->st_mem, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_add(cg->st_mem, "rss_huge", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "mapped_file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- } else {
- rrddim_add(cg->st_mem, "anon", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "kernel_stack", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "slab", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "sock", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "anon_thp", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem, "file", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- }
- }
-
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- rrddim_set(cg->st_mem, "cache", cg->memory.total_cache);
- rrddim_set(cg->st_mem, "rss", (cg->memory.total_rss > cg->memory.total_rss_huge)?(cg->memory.total_rss - cg->memory.total_rss_huge):0);
-
- if(cg->memory.detailed_has_swap)
- rrddim_set(cg->st_mem, "swap", cg->memory.total_swap);
-
- rrddim_set(cg->st_mem, "rss_huge", cg->memory.total_rss_huge);
- rrddim_set(cg->st_mem, "mapped_file", cg->memory.total_mapped_file);
- } else {
- rrddim_set(cg->st_mem, "anon", cg->memory.anon);
- rrddim_set(cg->st_mem, "kernel_stack", cg->memory.kernel_stack);
- rrddim_set(cg->st_mem, "slab", cg->memory.slab);
- rrddim_set(cg->st_mem, "sock", cg->memory.sock);
- rrddim_set(cg->st_mem, "anon_thp", cg->memory.anon_thp);
- rrddim_set(cg->st_mem, "file", cg->memory.total_mapped_file);
- }
- rrdset_done(cg->st_mem);
-
- if(unlikely(!cg->st_writeback)) {
- snprintfz(title, CHART_TITLE_MAX, "Writeback Memory");
-
- cg->st_writeback = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "writeback"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.writeback" : "cgroup.writeback"
- , title
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 300
- , update_every
- , RRDSET_TYPE_AREA
- );
-
- rrdset_update_rrdlabels(cg->st_writeback, cg->chart_labels);
-
- if(cg->memory.detailed_has_dirty)
- rrddim_add(cg->st_writeback, "dirty", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
-
- rrddim_add(cg->st_writeback, "writeback", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- }
-
- if(cg->memory.detailed_has_dirty)
- rrddim_set(cg->st_writeback, "dirty", cg->memory.total_dirty);
-
- rrddim_set(cg->st_writeback, "writeback", cg->memory.total_writeback);
- rrdset_done(cg->st_writeback);
+ if (likely(cg->memory.updated_detailed && cg->memory.enabled_detailed == CONFIG_BOOLEAN_YES)) {
+ update_mem_usage_detailed_chart(cg);
+ update_mem_writeback_chart(cg);
if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- if(unlikely(!cg->st_mem_activity)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Activity");
-
- cg->st_mem_activity = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_activity"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_activity" : "cgroup.mem_activity"
- , title
- , "MiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 400
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_mem_activity, cg->chart_labels);
-
- rrddim_add(cg->st_mem_activity, "pgpgin", "in", system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_mem_activity, "pgpgout", "out", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_mem_activity, "pgpgin", cg->memory.total_pgpgin);
- rrddim_set(cg->st_mem_activity, "pgpgout", cg->memory.total_pgpgout);
- rrdset_done(cg->st_mem_activity);
+ update_mem_activity_chart(cg);
}
- if(unlikely(!cg->st_pgfaults)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Page Faults");
-
- cg->st_pgfaults = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "pgfaults"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.pgfaults" : "cgroup.pgfaults"
- , title
- , "MiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 500
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_pgfaults, cg->chart_labels);
-
- rrddim_add(cg->st_pgfaults, "pgfault", NULL, system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_pgfaults, "pgmajfault", "swap", -system_page_size, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_pgfaults, "pgfault", cg->memory.total_pgfault);
- rrddim_set(cg->st_pgfaults, "pgmajfault", cg->memory.total_pgmajfault);
- rrdset_done(cg->st_pgfaults);
+ update_mem_pgfaults_chart(cg);
}
- if(likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_mem_usage)) {
- snprintfz(title, CHART_TITLE_MAX, "Used Memory");
-
- cg->st_mem_usage = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_usage"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage" : "cgroup.mem_usage"
- , title
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 210
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- rrdset_update_rrdlabels(cg->st_mem_usage, cg->chart_labels);
-
- rrddim_add(cg->st_mem_usage, "ram", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem_usage, "swap", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- }
-
- rrddim_set(cg->st_mem_usage, "ram", cg->memory.usage_in_bytes);
- if(!(cg->options & CGROUP_OPTIONS_IS_UNIFIED)) {
- rrddim_set(
- cg->st_mem_usage,
- "swap",
- cg->memory.msw_usage_in_bytes > (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) ?
- cg->memory.msw_usage_in_bytes - (cg->memory.usage_in_bytes + cg->memory.total_inactive_file) : 0);
- } else {
- rrddim_set(cg->st_mem_usage, "swap", cg->memory.msw_usage_in_bytes);
- }
- rrdset_done(cg->st_mem_usage);
-
- if (likely(update_memory_limits(&cg->filename_memory_limit, &cg->chart_var_memory_limit, &cg->memory_limit, "memory_limit", cg))) {
- static unsigned long long ram_total = 0;
-
- if(unlikely(!ram_total)) {
- procfile *ff = NULL;
-
- char filename[FILENAME_MAX + 1];
- snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo");
- ff = procfile_open(config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT);
-
- if(likely(ff))
- ff = procfile_readall(ff);
- if(likely(ff && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8)))
- ram_total = str2ull(procfile_word(ff, 1), NULL) * 1024;
- else {
- collector_error("Cannot read file %s. Will not update cgroup %s RAM limit anymore.", filename, cg->id);
- freez(cg->filename_memory_limit);
- cg->filename_memory_limit = NULL;
- }
-
- procfile_close(ff);
- }
+ if (likely(cg->memory.updated_usage_in_bytes && cg->memory.enabled_usage_in_bytes == CONFIG_BOOLEAN_YES)) {
+ update_mem_usage_chart(cg);
- if(likely(ram_total)) {
- unsigned long long memory_limit = ram_total;
+ // FIXME: this if should be only for unlimited charts
+ if(likely(host_ram_total)) {
+ // FIXME: do we need to update mem limits on every data collection?
+ if (likely(update_memory_limits(cg))) {
- if(unlikely(cg->memory_limit < ram_total))
+ unsigned long long memory_limit = host_ram_total;
+ if (unlikely(cg->memory_limit < host_ram_total))
memory_limit = cg->memory_limit;
- if(unlikely(!cg->st_mem_usage_limit)) {
- snprintfz(title, CHART_TITLE_MAX, "Used RAM within the limits");
-
- cg->st_mem_usage_limit = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_usage_limit"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_usage_limit": "cgroup.mem_usage_limit"
- , title
- , "MiB"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 200
- , update_every
- , RRDSET_TYPE_STACKED
- );
-
- rrdset_update_rrdlabels(cg->st_mem_usage_limit, cg->chart_labels);
-
- rrddim_add(cg->st_mem_usage_limit, "available", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_mem_usage_limit, "used", NULL, 1, 1024 * 1024, RRD_ALGORITHM_ABSOLUTE);
- }
-
- rrdset_isnot_obsolete(cg->st_mem_usage_limit);
-
- rrddim_set(cg->st_mem_usage_limit, "available", memory_limit - cg->memory.usage_in_bytes);
- rrddim_set(cg->st_mem_usage_limit, "used", cg->memory.usage_in_bytes);
- rrdset_done(cg->st_mem_usage_limit);
-
- if (unlikely(!cg->st_mem_utilization)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Utilization");
-
- cg->st_mem_utilization = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_utilization"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_utilization" : "cgroup.mem_utilization"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 199
- , update_every
- , RRDSET_TYPE_AREA
- );
-
- rrdset_update_rrdlabels(cg->st_mem_utilization, cg->chart_labels);
-
- rrddim_add(cg->st_mem_utilization, "utilization", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ update_mem_usage_limit_chart(cg, memory_limit);
+ update_mem_utilization_chart(cg, memory_limit);
+ } else {
+ if (unlikely(cg->st_mem_usage_limit)) {
+ rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_usage_limit);
+ cg->st_mem_usage_limit = NULL;
}
- if (memory_limit) {
- rrdset_isnot_obsolete(cg->st_mem_utilization);
-
- rrddim_set(
- cg->st_mem_utilization, "utilization", cg->memory.usage_in_bytes * 100 / memory_limit);
- rrdset_done(cg->st_mem_utilization);
+ if (unlikely(cg->st_mem_utilization)) {
+ rrdset_is_obsolete___safe_from_collector_thread(cg->st_mem_utilization);
+ cg->st_mem_utilization = NULL;
}
}
}
- else {
- if(unlikely(cg->st_mem_usage_limit)) {
- rrdset_is_obsolete(cg->st_mem_usage_limit);
- cg->st_mem_usage_limit = NULL;
- }
-
- if(unlikely(cg->st_mem_utilization)) {
- rrdset_is_obsolete(cg->st_mem_utilization);
- cg->st_mem_utilization = NULL;
- }
- }
-
- update_memory_limits(&cg->filename_memoryswap_limit, &cg->chart_var_memoryswap_limit, &cg->memoryswap_limit, "memory_and_swap_limit", cg);
}
- if(likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_mem_failcnt)) {
- snprintfz(title, CHART_TITLE_MAX, "Memory Limit Failures");
-
- cg->st_mem_failcnt = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_failcnt"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.mem_failcnt" : "cgroup.mem_failcnt"
- , title
- , "count"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 250
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_mem_failcnt, cg->chart_labels);
-
- rrddim_add(cg->st_mem_failcnt, "failures", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_mem_failcnt, "failures", cg->memory.failcnt);
- rrdset_done(cg->st_mem_failcnt);
+ if (likely(cg->memory.updated_failcnt && cg->memory.enabled_failcnt == CONFIG_BOOLEAN_YES)) {
+ update_mem_failcnt_chart(cg);
}
- if(likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_io)) {
- snprintfz(title, CHART_TITLE_MAX, "I/O Bandwidth (all disks)");
-
- cg->st_io = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "io"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.io" : "cgroup.io"
- , title
- , "KiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 1200
- , update_every
- , RRDSET_TYPE_AREA
- );
-
- rrdset_update_rrdlabels(cg->st_io, cg->chart_labels);
-
- rrddim_add(cg->st_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_io, "read", cg->io_service_bytes.Read);
- rrddim_set(cg->st_io, "write", cg->io_service_bytes.Write);
- rrdset_done(cg->st_io);
+ if (likely(cg->io_service_bytes.updated && cg->io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) {
+ update_io_serviced_bytes_chart(cg);
}
- if(likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_serviced_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Serviced I/O Operations (all disks)");
-
- cg->st_serviced_ops = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "serviced_ops"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.serviced_ops" : "cgroup.serviced_ops"
- , title
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 1200
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_serviced_ops, cg->chart_labels);
-
- rrddim_add(cg->st_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_serviced_ops, "read", cg->io_serviced.Read);
- rrddim_set(cg->st_serviced_ops, "write", cg->io_serviced.Write);
- rrdset_done(cg->st_serviced_ops);
+ if (likely(cg->io_serviced.updated && cg->io_serviced.enabled == CONFIG_BOOLEAN_YES)) {
+ update_io_serviced_ops_chart(cg);
}
- if(likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_throttle_io)) {
- snprintfz(title, CHART_TITLE_MAX, "Throttle I/O Bandwidth (all disks)");
-
- cg->st_throttle_io = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "throttle_io"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_io" : "cgroup.throttle_io"
- , title
- , "KiB/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 1200
- , update_every
- , RRDSET_TYPE_AREA
- );
-
- rrdset_update_rrdlabels(cg->st_throttle_io, cg->chart_labels);
-
- rrddim_add(cg->st_throttle_io, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_throttle_io, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_throttle_io, "read", cg->throttle_io_service_bytes.Read);
- rrddim_set(cg->st_throttle_io, "write", cg->throttle_io_service_bytes.Write);
- rrdset_done(cg->st_throttle_io);
+ if (likely(cg->throttle_io_service_bytes.updated && cg->throttle_io_service_bytes.enabled == CONFIG_BOOLEAN_YES)) {
+ update_throttle_io_serviced_bytes_chart(cg);
}
- if(likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_throttle_serviced_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Throttle Serviced I/O Operations (all disks)");
-
- cg->st_throttle_serviced_ops = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "throttle_serviced_ops"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.throttle_serviced_ops" : "cgroup.throttle_serviced_ops"
- , title
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 1200
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_throttle_serviced_ops, cg->chart_labels);
-
- rrddim_add(cg->st_throttle_serviced_ops, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_throttle_serviced_ops, "write", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set(cg->st_throttle_serviced_ops, "read", cg->throttle_io_serviced.Read);
- rrddim_set(cg->st_throttle_serviced_ops, "write", cg->throttle_io_serviced.Write);
- rrdset_done(cg->st_throttle_serviced_ops);
+ if (likely(cg->throttle_io_serviced.updated && cg->throttle_io_serviced.enabled == CONFIG_BOOLEAN_YES)) {
+ update_throttle_io_serviced_ops_chart(cg);
}
- if(likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_queued_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Queued I/O Operations (all disks)");
-
- cg->st_queued_ops = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "queued_ops"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.queued_ops" : "cgroup.queued_ops"
- , title
- , "operations"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2000
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_queued_ops, cg->chart_labels);
-
- rrddim_add(cg->st_queued_ops, "read", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rrddim_add(cg->st_queued_ops, "write", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE);
- }
-
- rrddim_set(cg->st_queued_ops, "read", cg->io_queued.Read);
- rrddim_set(cg->st_queued_ops, "write", cg->io_queued.Write);
- rrdset_done(cg->st_queued_ops);
+ if (likely(cg->io_queued.updated && cg->io_queued.enabled == CONFIG_BOOLEAN_YES)) {
+ update_io_queued_ops_chart(cg);
}
- if(likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) {
- if(unlikely(!cg->st_merged_ops)) {
- snprintfz(title, CHART_TITLE_MAX, "Merged I/O Operations (all disks)");
-
- cg->st_merged_ops = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "merged_ops"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.merged_ops" : "cgroup.merged_ops"
- , title
- , "operations/s"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2100
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(cg->st_merged_ops, cg->chart_labels);
-
- rrddim_add(cg->st_merged_ops, "read", NULL, 1, 1024, RRD_ALGORITHM_INCREMENTAL);
- rrddim_add(cg->st_merged_ops, "write", NULL, -1, 1024, RRD_ALGORITHM_INCREMENTAL);
- }
+ if (likely(cg->io_merged.updated && cg->io_merged.enabled == CONFIG_BOOLEAN_YES)) {
+ update_io_merged_ops_chart(cg);
+ }
- rrddim_set(cg->st_merged_ops, "read", cg->io_merged.Read);
- rrddim_set(cg->st_merged_ops, "write", cg->io_merged.Write);
- rrdset_done(cg->st_merged_ops);
+ if (likely(cg->pids.pids_current_updated)) {
+ update_pids_current_chart(cg);
}
if (cg->options & CGROUP_OPTIONS_IS_UNIFIED) {
- struct pressure *res = &cg->cpu_pressure;
-
- if (likely(res->updated && res->some.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->some;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "CPU some pressure");
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_some_pressure"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure" : "cgroup.cpu_some_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2200
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- }
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "CPU some pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_some_pressure_stall_time"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_some_pressure_stall_time" : "cgroup.cpu_some_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2220
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- update_pressure_charts(pcs);
- }
- if (likely(res->updated && res->full.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->full;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "CPU full pressure");
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_full_pressure"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure" : "cgroup.cpu_full_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2240
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- }
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "CPU full pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "cpu_full_pressure_stall_time"
- , NULL
- , "cpu"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.cpu_full_pressure_stall_time" : "cgroup.cpu_full_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2260
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- update_pressure_charts(pcs);
- }
-
- res = &cg->memory_pressure;
-
- if (likely(res->updated && res->some.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->some;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "Memory some pressure");
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_some_pressure"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure" : "cgroup.memory_some_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2300
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- }
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "Memory some pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "memory_some_pressure_stall_time"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_some_pressure_stall_time" : "cgroup.memory_some_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2320
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- update_pressure_charts(pcs);
- }
-
- if (likely(res->updated && res->full.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->full;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "Memory full pressure");
-
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "mem_full_pressure"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure" : "cgroup.memory_full_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2340
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- }
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "Memory full pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "memory_full_pressure_stall_time"
- , NULL
- , "mem"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.memory_full_pressure_stall_time" : "cgroup.memory_full_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2360
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- update_pressure_charts(pcs);
- }
-
- res = &cg->irq_pressure;
-
- if (likely(res->updated && res->some.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->some;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure");
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "irq_some_pressure"
- , NULL
- , "interrupts"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure" : "cgroup.irq_some_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2310
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- }
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "IRQ some pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "irq_some_pressure_stall_time"
- , NULL
- , "interrupts"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_some_pressure_stall_time" : "cgroup.irq_some_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2330
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- update_pressure_charts(pcs);
+ if (likely(cg->cpu_pressure.updated)) {
+ if (cg->cpu_pressure.some.enabled) {
+ update_cpu_some_pressure_chart(cg);
+ update_cpu_some_pressure_stall_time_chart(cg);
+ }
+ if (cg->cpu_pressure.full.enabled) {
+ update_cpu_full_pressure_chart(cg);
+ update_cpu_full_pressure_stall_time_chart(cg);
+ }
}
- if (likely(res->updated && res->full.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->full;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure");
-
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "irq_full_pressure"
- , NULL
- , "interrupts"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure" : "cgroup.irq_full_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2350
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ if (likely(cg->memory_pressure.updated)) {
+ if (cg->memory_pressure.some.enabled) {
+ update_mem_some_pressure_chart(cg);
+ update_mem_some_pressure_stall_time_chart(cg);
}
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "IRQ full pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "irq_full_pressure_stall_time"
- , NULL
- , "interrupts"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.irq_full_pressure_stall_time" : "cgroup.irq_full_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2370
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ if (cg->memory_pressure.full.enabled) {
+ update_mem_full_pressure_chart(cg);
+ update_mem_full_pressure_stall_time_chart(cg);
}
-
- update_pressure_charts(pcs);
}
- res = &cg->io_pressure;
-
- if (likely(res->updated && res->some.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->some;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "I/O some pressure");
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "io_some_pressure"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure" : "cgroup.io_some_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2400
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "some 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "some 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "some 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ if (likely(cg->irq_pressure.updated)) {
+ if (cg->irq_pressure.some.enabled) {
+ update_irq_some_pressure_chart(cg);
+ update_irq_some_pressure_stall_time_chart(cg);
}
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "I/O some pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "io_some_pressure_stall_time"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.io_some_pressure_stall_time" : "cgroup.io_some_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2420
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ if (cg->irq_pressure.full.enabled) {
+ update_irq_full_pressure_chart(cg);
+ update_irq_full_pressure_stall_time_chart(cg);
}
-
- update_pressure_charts(pcs);
}
- if (likely(res->updated && res->full.enabled)) {
- struct pressure_charts *pcs;
- pcs = &res->full;
-
- if (unlikely(!pcs->share_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "I/O full pressure");
- chart = pcs->share_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "io_full_pressure"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure" : "cgroup.io_full_pressure"
- , title
- , "percentage"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2440
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->share_time.st, cg->chart_labels);
- pcs->share_time.rd10 = rrddim_add(chart, "full 10", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd60 = rrddim_add(chart, "full 60", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
- pcs->share_time.rd300 = rrddim_add(chart, "full 300", NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE);
+ if (likely(cg->io_pressure.updated)) {
+ if (cg->io_pressure.some.enabled) {
+ update_io_some_pressure_chart(cg);
+ update_io_some_pressure_stall_time_chart(cg);
}
-
- if (unlikely(!pcs->total_time.st)) {
- RRDSET *chart;
- snprintfz(title, CHART_TITLE_MAX, "I/O full pressure stall time");
- chart = pcs->total_time.st = rrdset_create_localhost(
- cgroup_chart_type(type, cg->chart_id, RRD_ID_LENGTH_MAX)
- , "io_full_pressure_stall_time"
- , NULL
- , "disk"
- , k8s_is_kubepod(cg) ? "k8s.cgroup.io_full_pressure_stall_time" : "cgroup.io_full_pressure_stall_time"
- , title
- , "ms"
- , PLUGIN_CGROUPS_NAME
- , PLUGIN_CGROUPS_MODULE_CGROUPS_NAME
- , cgroup_containers_chart_priority + 2460
- , update_every
- , RRDSET_TYPE_LINE
- );
- rrdset_update_rrdlabels(chart = pcs->total_time.st, cg->chart_labels);
- pcs->total_time.rdtotal = rrddim_add(chart, "time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ if (cg->io_pressure.full.enabled) {
+ update_io_full_pressure_chart(cg);
+ update_io_full_pressure_stall_time_chart(cg);
}
-
- update_pressure_charts(pcs);
}
}
- }
-
- if(likely(cgroup_enable_systemd_services))
- update_systemd_services_charts(update_every, services_do_cpu, services_do_mem_usage, services_do_mem_detailed
- , services_do_mem_failcnt, services_do_swap_usage, services_do_io
- , services_do_io_ops, services_do_throttle_io, services_do_throttle_ops
- , services_do_queued_ops, services_do_merged_ops
- );
- netdata_log_debug(D_CGROUP, "done updating cgroups charts");
+ cg->function_ready = true;
+ }
}
// ----------------------------------------------------------------------------
@@ -4901,19 +1581,15 @@ static void cgroup_main_cleanup(void *ptr) {
usec_t max = 2 * USEC_PER_SEC, step = 50000;
- if (!discovery_thread.exited) {
- collector_info("stopping discovery thread worker");
- uv_mutex_lock(&discovery_thread.mutex);
- discovery_thread.start_discovery = 1;
- uv_cond_signal(&discovery_thread.cond_var);
- uv_mutex_unlock(&discovery_thread.mutex);
- }
-
- collector_info("waiting for discovery thread to finish...");
-
- while (!discovery_thread.exited && max > 0) {
- max -= step;
- sleep_usec(step);
+ if (!__atomic_load_n(&discovery_thread.exited, __ATOMIC_RELAXED)) {
+ collector_info("waiting for discovery thread to finish...");
+ while (!__atomic_load_n(&discovery_thread.exited, __ATOMIC_RELAXED) && max > 0) {
+ uv_mutex_lock(&discovery_thread.mutex);
+ uv_cond_signal(&discovery_thread.cond_var);
+ uv_mutex_unlock(&discovery_thread.mutex);
+ max -= step;
+ sleep_usec(step);
+ }
}
if (shm_mutex_cgroup_ebpf != SEM_FAILED) {
@@ -4932,6 +1608,22 @@ static void cgroup_main_cleanup(void *ptr) {
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
}
+void cgroup_read_host_total_ram() {
+ procfile *ff = NULL;
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, "/proc/meminfo");
+
+ ff = procfile_open(
+ config_get("plugin:cgroups", "meminfo filename to monitor", filename), " \t:", PROCFILE_FLAG_DEFAULT);
+
+ if (likely((ff = procfile_readall(ff)) && procfile_lines(ff) && !strncmp(procfile_word(ff, 0), "MemTotal", 8)))
+ host_ram_total = str2ull(procfile_word(ff, 1), NULL) * 1024;
+ else
+ collector_error("Cannot read file %s. Will not create RAM limit charts.", filename);
+
+ procfile_close(ff);
+}
+
void *cgroups_main(void *ptr) {
worker_register("CGROUPS");
worker_register_job_name(WORKER_CGROUPS_LOCK, "lock");
@@ -4946,6 +1638,9 @@ void *cgroups_main(void *ptr) {
}
read_cgroup_plugin_configuration();
+
+ cgroup_read_host_total_ram();
+
netdata_cgroup_ebpf_initialize_shm();
if (uv_mutex_init(&cgroup_root_mutex)) {
@@ -4953,8 +1648,6 @@ void *cgroups_main(void *ptr) {
goto exit;
}
- // dispatch a discovery worker thread
- discovery_thread.start_discovery = 0;
discovery_thread.exited = 0;
if (uv_mutex_init(&discovery_thread.mutex)) {
@@ -4971,23 +1664,35 @@ void *cgroups_main(void *ptr) {
collector_error("CGROUP: cannot create thread worker. uv_thread_create(): %s", uv_strerror(error));
goto exit;
}
- uv_thread_set_name_np(discovery_thread.thread, "PLUGIN[cgroups]");
+
+ uv_thread_set_name_np(discovery_thread.thread, "P[cgroups]");
+
+ // we register this only on localhost
+ // for the other nodes, the origin server should register it
+ rrd_collector_started(); // this creates a collector that runs for as long as netdata runs
+ cgroup_netdev_link_init();
+ rrd_function_add(localhost, NULL, "containers-vms", 10, RRDFUNCTIONS_CGTOP_HELP, true, cgroup_function_cgroup_top, NULL);
+ rrd_function_add(localhost, NULL, "systemd-services", 10, RRDFUNCTIONS_CGTOP_HELP, true, cgroup_function_systemd_top, NULL);
heartbeat_t hb;
heartbeat_init(&hb);
usec_t step = cgroup_update_every * USEC_PER_SEC;
usec_t find_every = cgroup_check_for_new_every * USEC_PER_SEC, find_dt = 0;
+ netdata_thread_disable_cancelability();
+
while(service_running(SERVICE_COLLECTORS)) {
worker_is_idle();
usec_t hb_dt = heartbeat_next(&hb, step);
- if(unlikely(!service_running(SERVICE_COLLECTORS))) break;
+ if (unlikely(!service_running(SERVICE_COLLECTORS)))
+ break;
find_dt += hb_dt;
if (unlikely(find_dt >= find_every || (!is_inside_k8s && cgroups_check))) {
+ uv_mutex_lock(&discovery_thread.mutex);
uv_cond_signal(&discovery_thread.cond_var);
- discovery_thread.start_discovery = 1;
+ uv_mutex_unlock(&discovery_thread.mutex);
find_dt = 0;
cgroups_check = 0;
}
@@ -4997,18 +1702,28 @@ void *cgroups_main(void *ptr) {
worker_is_busy(WORKER_CGROUPS_READ);
read_all_discovered_cgroups(cgroup_root);
- if(unlikely(!service_running(SERVICE_COLLECTORS))) break;
+
+ if (unlikely(!service_running(SERVICE_COLLECTORS))) {
+ uv_mutex_unlock(&cgroup_root_mutex);
+ break;
+ }
worker_is_busy(WORKER_CGROUPS_CHART);
- update_cgroup_charts(cgroup_update_every);
- if(unlikely(!service_running(SERVICE_COLLECTORS))) break;
+
+ update_cgroup_charts();
+ if (cgroup_enable_systemd_services)
+ update_cgroup_systemd_services_charts();
+
+ if (unlikely(!service_running(SERVICE_COLLECTORS))) {
+ uv_mutex_unlock(&cgroup_root_mutex);
+ break;
+ }
worker_is_idle();
uv_mutex_unlock(&cgroup_root_mutex);
}
exit:
- worker_unregister();
netdata_thread_cleanup_pop(1);
return NULL;
}
diff --git a/collectors/cgroups.plugin/sys_fs_cgroup.h b/collectors/cgroups.plugin/sys_fs_cgroup.h
index dc800ba912c61e..e8cfcf5f60c7d9 100644
--- a/collectors/cgroups.plugin/sys_fs_cgroup.h
+++ b/collectors/cgroups.plugin/sys_fs_cgroup.h
@@ -5,6 +5,10 @@
#include "daemon/common.h"
+#define PLUGIN_CGROUPS_NAME "cgroups.plugin"
+#define PLUGIN_CGROUPS_MODULE_SYSTEMD_NAME "systemd"
+#define PLUGIN_CGROUPS_MODULE_CGROUPS_NAME "/sys/fs/cgroup"
+
#define CGROUP_OPTIONS_DISABLED_DUPLICATE 0x00000001
#define CGROUP_OPTIONS_SYSTEM_SLICE_SERVICE 0x00000002
#define CGROUP_OPTIONS_IS_UNIFIED 0x00000004
@@ -39,6 +43,6 @@ typedef struct netdata_ebpf_cgroup_shm {
#include "../proc.plugin/plugin_proc.h"
-char *cgroup_parse_resolved_name_and_labels(DICTIONARY *labels, char *data);
+char *cgroup_parse_resolved_name_and_labels(RRDLABELS *labels, char *data);
#endif //NETDATA_SYS_FS_CGROUP_H
diff --git a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c
index a0f91530905d5d..bb1fb398856a02 100644
--- a/collectors/cgroups.plugin/tests/test_cgroups_plugin.c
+++ b/collectors/cgroups.plugin/tests/test_cgroups_plugin.c
@@ -20,13 +20,12 @@ struct k8s_test_data {
int i;
};
-static int read_label_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data)
+static int read_label_callback(const char *name, const char *value, void *data)
{
struct k8s_test_data *test_data = (struct k8s_test_data *)data;
test_data->result_key[test_data->i] = name;
test_data->result_value[test_data->i] = value;
- test_data->result_ls[test_data->i] = ls;
test_data->i++;
@@ -37,7 +36,7 @@ static void test_cgroup_parse_resolved_name(void **state)
{
UNUSED(state);
- DICTIONARY *labels = rrdlabels_create();
+ RRDLABELS *labels = rrdlabels_create();
struct k8s_test_data test_data[] = {
// One label
diff --git a/collectors/cgroups.plugin/tests/test_doubles.c b/collectors/cgroups.plugin/tests/test_doubles.c
index 498f649f5d62b2..b13d4b19ceddd2 100644
--- a/collectors/cgroups.plugin/tests/test_doubles.c
+++ b/collectors/cgroups.plugin/tests/test_doubles.c
@@ -2,12 +2,12 @@
#include "test_cgroups_plugin.h"
-void rrdset_is_obsolete(RRDSET *st)
+void rrdset_is_obsolete___safe_from_collector_thread(RRDSET *st)
{
UNUSED(st);
}
-void rrdset_isnot_obsolete(RRDSET *st)
+void rrdset_isnot_obsolete___safe_from_collector_thread(RRDSET *st)
{
UNUSED(st);
}
diff --git a/collectors/charts.d.plugin/Makefile.am b/collectors/charts.d.plugin/Makefile.am
index 03c7f0a949237b..f82992fd4a5006 100644
--- a/collectors/charts.d.plugin/Makefile.am
+++ b/collectors/charts.d.plugin/Makefile.am
@@ -45,6 +45,5 @@ include ap/Makefile.inc
include apcupsd/Makefile.inc
include example/Makefile.inc
include libreswan/Makefile.inc
-include nut/Makefile.inc
include opensips/Makefile.inc
include sensors/Makefile.inc
diff --git a/collectors/charts.d.plugin/ap/README.md b/collectors/charts.d.plugin/ap/README.md
deleted file mode 100644
index 339ad13751e4af..00000000000000
--- a/collectors/charts.d.plugin/ap/README.md
+++ /dev/null
@@ -1,104 +0,0 @@
-
-
-# Access point collector
-
-The `ap` collector visualizes data related to access points.
-
-## Example Netdata charts
-
-
-
-## How it works
-
-It does the following:
-
-1. Runs `iw dev` searching for interfaces that have `type AP`.
-
- From the same output it collects the SSIDs each AP supports by looking for lines `ssid NAME`.
-
- Example:
-
-```sh
-# iw dev
-phy#0
- Interface wlan0
- ifindex 3
- wdev 0x1
- addr 7c:dd:90:77:34:2a
- ssid TSAOUSIS
- type AP
- channel 7 (2442 MHz), width: 20 MHz, center1: 2442 MHz
-```
-
-2. For each interface found, it runs `iw INTERFACE station dump`.
-
- From the output is collects:
-
- - rx/tx bytes
- - rx/tx packets
- - tx retries
- - tx failed
- - signal strength
- - rx/tx bitrate
- - expected throughput
-
- Example:
-
-```sh
-# iw wlan0 station dump
-Station 40:b8:37:5a:ed:5e (on wlan0)
- inactive time: 910 ms
- rx bytes: 15588897
- rx packets: 127772
- tx bytes: 52257763
- tx packets: 95802
- tx retries: 2162
- tx failed: 28
- signal: -43 dBm
- signal avg: -43 dBm
- tx bitrate: 65.0 MBit/s MCS 7
- rx bitrate: 1.0 MBit/s
- expected throughput: 32.125Mbps
- authorized: yes
- authenticated: yes
- preamble: long
- WMM/WME: yes
- MFP: no
- TDLS peer: no
-```
-
-3. For each interface found, it creates 6 charts:
-
- - Number of Connected clients
- - Bandwidth for all clients
- - Packets for all clients
- - Transmit Issues for all clients
- - Average Signal among all clients
- - Average Bitrate (including average expected throughput) among all clients
-
-## Configuration
-
-If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed.
-
-Edit the `charts.d/ap.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d/ap.conf
-```
-
-You can only set `ap_update_every=NUMBER` to change the data collection frequency.
-
-## Auto-detection
-
-The plugin is able to auto-detect if you are running access points on your linux box.
-
-
diff --git a/collectors/charts.d.plugin/ap/README.md b/collectors/charts.d.plugin/ap/README.md
new file mode 120000
index 00000000000000..5b6e751301e428
--- /dev/null
+++ b/collectors/charts.d.plugin/ap/README.md
@@ -0,0 +1 @@
+integrations/access_points.md
\ No newline at end of file
diff --git a/collectors/charts.d.plugin/ap/integrations/access_points.md b/collectors/charts.d.plugin/ap/integrations/access_points.md
new file mode 100644
index 00000000000000..a0de2c4df2938d
--- /dev/null
+++ b/collectors/charts.d.plugin/ap/integrations/access_points.md
@@ -0,0 +1,174 @@
+
+
+# Access Points
+
+
+
+
+
+Plugin: charts.d.plugin
+Module: ap
+
+
+
+## Overview
+
+The ap collector visualizes data related to wireless access points.
+
+It uses the `iw` command line utility to detect access points. For each interface that is of `type AP`, it then runs `iw INTERFACE station dump` and collects statistics.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin is able to auto-detect if you are running access points on your linux box.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per wireless device
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ap.clients | clients | clients |
+| ap.net | received, sent | kilobits/s |
+| ap.packets | received, sent | packets/s |
+| ap.issues | retries, failures | issues/s |
+| ap.signal | average signal | dBm |
+| ap.bitrate | receive, transmit, expected | Mbps |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install charts.d plugin
+
+If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
+
+
+#### `iw` utility.
+
+Make sure the `iw` utility is installed.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `charts.d/ap.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config charts.d/ap.conf
+```
+#### Options
+
+The config file is sourced by the charts.d plugin. It's a standard bash file.
+
+The following collapsed table contains all the options that can be configured for the ap collector.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| ap_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no |
+| ap_priority | Controls the order of charts at the netdata dashboard. | 6900 | no |
+| ap_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |
+
+
+
+#### Examples
+
+##### Change the collection frequency
+
+Specify a custom collection frequence (update_every) for this collector
+
+```yaml
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+ap_update_every=10
+
+# the charts priority on the dashboard
+#ap_priority=6900
+
+# the number of retries to do in case of failure
+# before disabling the module
+#ap_retries=10
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `ap` collector, run the `charts.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `charts.d.plugin` to debug the collector:
+
+ ```bash
+ ./charts.d.plugin debug 1 ap
+ ```
+
+
diff --git a/collectors/charts.d.plugin/ap/metadata.yaml b/collectors/charts.d.plugin/ap/metadata.yaml
index c4e96a14ab2df0..ee941e417c27dc 100644
--- a/collectors/charts.d.plugin/ap/metadata.yaml
+++ b/collectors/charts.d.plugin/ap/metadata.yaml
@@ -41,6 +41,9 @@ modules:
setup:
prerequisites:
list:
+ - title: "Install charts.d plugin"
+ description: |
+ If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
- title: "`iw` utility."
description: "Make sure the `iw` utility is installed."
configuration:
diff --git a/collectors/charts.d.plugin/apcupsd/README.md b/collectors/charts.d.plugin/apcupsd/README.md
deleted file mode 100644
index 00e9697dc81c87..00000000000000
--- a/collectors/charts.d.plugin/apcupsd/README.md
+++ /dev/null
@@ -1,26 +0,0 @@
-
-
-# APC UPS collector
-
-Monitors different APC UPS models and retrieves status information using `apcaccess` tool.
-
-## Configuration
-
-If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed.
-
-Edit the `charts.d/apcupsd.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d/apcupsd.conf
-```
-
-
diff --git a/collectors/charts.d.plugin/apcupsd/README.md b/collectors/charts.d.plugin/apcupsd/README.md
new file mode 120000
index 00000000000000..fc6681fe618065
--- /dev/null
+++ b/collectors/charts.d.plugin/apcupsd/README.md
@@ -0,0 +1 @@
+integrations/apc_ups.md
\ No newline at end of file
diff --git a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh
index ef9a90596a4470..da9cd19c333da7 100644
--- a/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh
+++ b/collectors/charts.d.plugin/apcupsd/apcupsd.chart.sh
@@ -50,7 +50,7 @@ apcupsd_check() {
local host working=0 failed=0
for host in "${!apcupsd_sources[@]}"; do
- apcupsd_get "${apcupsd_sources[${host}]}" > /dev/null
+ apcupsd_get "${apcupsd_sources[${host}]}" >/dev/null
# shellcheck disable=2181
if [ $? -ne 0 ]; then
error "cannot get information for apcupsd server ${host} on ${apcupsd_sources[${host}]}."
@@ -77,7 +77,7 @@ apcupsd_create() {
local host
for host in "${!apcupsd_sources[@]}"; do
# create the charts
- cat << EOF
+ cat <
+
+# APC UPS
+
+
+
+
+
+Plugin: charts.d.plugin
+Module: apcupsd
+
+
+
+## Overview
+
+Monitor APC UPS performance with Netdata for optimal uninterruptible power supply operations. Enhance your power supply reliability with real-time APC UPS metrics.
+
+The collector uses the `apcaccess` tool to contact the `apcupsd` daemon and get the APC UPS statistics.
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, with no configuration provided, the collector will try to contact 127.0.0.1:3551 with using the `apcaccess` utility.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per ups
+
+Metrics related to UPS. Each UPS provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| apcupsd.charge | charge | percentage |
+| apcupsd.battery.voltage | voltage, nominal | Volts |
+| apcupsd.input.voltage | voltage, min, max | Volts |
+| apcupsd.output.voltage | absolute, nominal | Volts |
+| apcupsd.input.frequency | frequency | Hz |
+| apcupsd.load | load | percentage |
+| apcupsd.load_usage | load | Watts |
+| apcupsd.temperature | temp | Celsius |
+| apcupsd.time | time | Minutes |
+| apcupsd.online | online | boolean |
+| apcupsd.selftest | OK, NO, BT, NG | status |
+| apcupsd.status | ONLINE, ONBATT, OVERLOAD, LOWBATT, REPLACEBATT, NOBATT, SLAVE, SLAVEDOWN, COMMLOST, CAL, TRIM, BOOST, SHUTTING_DOWN | status |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ apcupsd_ups_charge ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.charge | average UPS charge over the last minute |
+| [ apcupsd_10min_ups_load ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.load | average UPS load over the last 10 minutes |
+| [ apcupsd_last_collected_secs ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.load | number of seconds since the last successful data collection |
+| [ apcupsd_selftest_warning ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.selftest | self-test failed due to insufficient battery capacity or due to overload. |
+| [ apcupsd_status_onbatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS has switched to battery power because the input power has failed |
+| [ apcupsd_status_overload ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS is overloaded and cannot supply enough power to the load |
+| [ apcupsd_status_lowbatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS battery is low and needs to be recharged |
+| [ apcupsd_status_replacebatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS battery has reached the end of its lifespan and needs to be replaced |
+| [ apcupsd_status_nobatt ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS has no battery |
+| [ apcupsd_status_commlost ](https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf) | apcupsd.status | APC UPS communication link is lost |
+
+
+## Setup
+
+### Prerequisites
+
+#### Install charts.d plugin
+
+If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
+
+
+#### Required software
+
+Make sure the `apcaccess` and `apcupsd` are installed and running.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `charts.d/apcupsd.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config charts.d/apcupsd.conf
+```
+#### Options
+
+The config file is sourced by the charts.d plugin. It's a standard bash file.
+
+The following collapsed table contains all the options that can be configured for the apcupsd collector.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| apcupsd_sources | This is an array of apcupsd sources. You can have multiple entries there. Please refer to the example below on how to set it. | 127.0.0.1:3551 | no |
+| apcupsd_timeout | How long to wait for apcupsd to respond. | 3 | no |
+| apcupsd_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no |
+| apcupsd_priority | The charts priority on the dashboard. | 90000 | no |
+| apcupsd_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |
+
+
+
+#### Examples
+
+##### Multiple apcupsd sources
+
+Specify a multiple apcupsd sources along with a custom update interval
+
+```yaml
+# add all your APC UPSes in this array - uncomment it too
+declare -A apcupsd_sources=(
+ ["local"]="127.0.0.1:3551",
+ ["remote"]="1.2.3.4:3551"
+)
+
+# how long to wait for apcupsd to respond
+#apcupsd_timeout=3
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+apcupsd_update_every=5
+
+# the charts priority on the dashboard
+#apcupsd_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#apcupsd_retries=10
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `apcupsd` collector, run the `charts.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `charts.d.plugin` to debug the collector:
+
+ ```bash
+ ./charts.d.plugin debug 1 apcupsd
+ ```
+
+
diff --git a/collectors/charts.d.plugin/apcupsd/metadata.yaml b/collectors/charts.d.plugin/apcupsd/metadata.yaml
index d078074b7bb325..c333dc96467232 100644
--- a/collectors/charts.d.plugin/apcupsd/metadata.yaml
+++ b/collectors/charts.d.plugin/apcupsd/metadata.yaml
@@ -42,6 +42,9 @@ modules:
setup:
prerequisites:
list:
+ - title: "Install charts.d plugin"
+ description: |
+ If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
- title: "Required software"
description: "Make sure the `apcaccess` and `apcupsd` are installed and running."
configuration:
@@ -121,6 +124,34 @@ modules:
link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
metric: apcupsd.load
info: number of seconds since the last successful data collection
+ - name: apcupsd_selftest_warning
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.selftest
+ info: self-test failed due to insufficient battery capacity or due to overload.
+ - name: apcupsd_status_onbatt
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.status
+ info: APC UPS has switched to battery power because the input power has failed
+ - name: apcupsd_status_overload
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.status
+ info: APC UPS is overloaded and cannot supply enough power to the load
+ - name: apcupsd_status_lowbatt
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.status
+ info: APC UPS battery is low and needs to be recharged
+ - name: apcupsd_status_replacebatt
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.status
+ info: APC UPS battery has reached the end of its lifespan and needs to be replaced
+ - name: apcupsd_status_nobatt
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.status
+ info: APC UPS has no battery
+ - name: apcupsd_status_commlost
+ link: https://github.com/netdata/netdata/blob/master/health/health.d/apcupsd.conf
+ metric: apcupsd.status
+ info: APC UPS communication link is lost
metrics:
folding:
title: Metrics
@@ -196,3 +227,30 @@ modules:
chart_type: line
dimensions:
- name: online
+ - name: apcupsd.selftest
+ description: UPS Self-Test status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: OK
+ - name: NO
+ - name: BT
+ - name: NG
+ - name: apcupsd.status
+ description: UPS Status
+ unit: status
+ chart_type: line
+ dimensions:
+ - name: ONLINE
+ - name: ONBATT
+ - name: OVERLOAD
+ - name: LOWBATT
+ - name: REPLACEBATT
+ - name: NOBATT
+ - name: SLAVE
+ - name: SLAVEDOWN
+ - name: COMMLOST
+ - name: CAL
+ - name: TRIM
+ - name: BOOST
+ - name: SHUTTING_DOWN
diff --git a/collectors/charts.d.plugin/charts.d.conf b/collectors/charts.d.plugin/charts.d.conf
index 2d32f73ea89ee9..4614f259efad48 100644
--- a/collectors/charts.d.plugin/charts.d.conf
+++ b/collectors/charts.d.plugin/charts.d.conf
@@ -36,7 +36,6 @@
# ap=yes
# apcupsd=yes
# libreswan=yes
-# nut=yes
# opensips=yes
# -----------------------------------------------------------------------------
diff --git a/collectors/charts.d.plugin/charts.d.plugin.in b/collectors/charts.d.plugin/charts.d.plugin.in
index 20996eb93739e5..14694809902c66 100755
--- a/collectors/charts.d.plugin/charts.d.plugin.in
+++ b/collectors/charts.d.plugin/charts.d.plugin.in
@@ -13,13 +13,116 @@
# each will have a different config file and modules configuration directory.
#
-export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"
+export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin:@sbindir_POST@"
PROGRAM_FILE="$0"
-PROGRAM_NAME="$(basename $0)"
-PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
MODULE_NAME="main"
+# -----------------------------------------------------------------------------
+# logging
+
+PROGRAM_NAME="$(basename "${0}")"
+SHORT_PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
+
+# these should be the same with syslog() priorities
+NDLP_EMERG=0 # system is unusable
+NDLP_ALERT=1 # action must be taken immediately
+NDLP_CRIT=2 # critical conditions
+NDLP_ERR=3 # error conditions
+NDLP_WARN=4 # warning conditions
+NDLP_NOTICE=5 # normal but significant condition
+NDLP_INFO=6 # informational
+NDLP_DEBUG=7 # debug-level messages
+
+# the max (numerically) log level we will log
+LOG_LEVEL=$NDLP_INFO
+
+set_log_min_priority() {
+ case "${NETDATA_LOG_LEVEL,,}" in
+ "emerg" | "emergency")
+ LOG_LEVEL=$NDLP_EMERG
+ ;;
+
+ "alert")
+ LOG_LEVEL=$NDLP_ALERT
+ ;;
+
+ "crit" | "critical")
+ LOG_LEVEL=$NDLP_CRIT
+ ;;
+
+ "err" | "error")
+ LOG_LEVEL=$NDLP_ERR
+ ;;
+
+ "warn" | "warning")
+ LOG_LEVEL=$NDLP_WARN
+ ;;
+
+ "notice")
+ LOG_LEVEL=$NDLP_NOTICE
+ ;;
+
+ "info")
+ LOG_LEVEL=$NDLP_INFO
+ ;;
+
+ "debug")
+ LOG_LEVEL=$NDLP_DEBUG
+ ;;
+ esac
+}
+
+set_log_min_priority
+
+log() {
+ local level="${1}"
+ shift 1
+
+ [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
+
+ systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <&2 "BASH version 4 or later is required (this is ${BASH_VERSION})."
+ exit 1
+fi
+
# -----------------------------------------------------------------------------
# create temp dir
@@ -47,36 +150,6 @@ logdate() {
date "+%Y-%m-%d %H:%M:%S"
}
-log() {
- local status="${1}"
- shift
-
- echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${MODULE_NAME}: ${*}"
-
-}
-
-warning() {
- log WARNING "${@}"
-}
-
-error() {
- log ERROR "${@}"
-}
-
-info() {
- log INFO "${@}"
-}
-
-fatal() {
- log FATAL "${@}"
- echo "DISABLE"
- exit 1
-}
-
-debug() {
- [ $debug -eq 1 ] && log DEBUG "${@}"
-}
-
# -----------------------------------------------------------------------------
# check a few commands
@@ -176,12 +249,14 @@ while [ ! -z "$1" ]; do
if [ "$1" = "debug" -o "$1" = "all" ]; then
debug=1
+ LOG_LEVEL=$NDLP_DEBUG
shift
continue
fi
if [ -f "$chartsd/$1.chart.sh" ]; then
debug=1
+ LOG_LEVEL=$NDLP_DEBUG
chart_only="$(echo $1.chart.sh | sed "s/\.chart\.sh$//g")"
shift
continue
@@ -189,6 +264,7 @@ while [ ! -z "$1" ]; do
if [ -f "$chartsd/$1" ]; then
debug=1
+ LOG_LEVEL=$NDLP_DEBUG
chart_only="$(echo $1 | sed "s/\.chart\.sh$//g")"
shift
continue
@@ -229,7 +305,7 @@ source "$pluginsd/loopsleepms.sh.inc"
# -----------------------------------------------------------------------------
# load my configuration
-for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${PROGRAM_NAME}.conf"; do
+for myconfig in "${NETDATA_STOCK_CONFIG_DIR}/${SHORT_PROGRAM_NAME}.conf" "${NETDATA_USER_CONFIG_DIR}/${SHORT_PROGRAM_NAME}.conf"; do
if [ -f "$myconfig" ]; then
source "$myconfig"
if [ $? -ne 0 ]; then
diff --git a/collectors/charts.d.plugin/libreswan/README.md b/collectors/charts.d.plugin/libreswan/README.md
deleted file mode 100644
index b6eeb0180ca880..00000000000000
--- a/collectors/charts.d.plugin/libreswan/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-
-
-# Libreswan IPSec tunnel collector
-
-Collects bytes-in, bytes-out and uptime for all established libreswan IPSEC tunnels.
-
-The following charts are created, **per tunnel**:
-
-1. **Uptime**
-
-- the uptime of the tunnel
-
-2. **Traffic**
-
-- bytes in
-- bytes out
-
-## Configuration
-
-If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed.
-
-Edit the `charts.d/libreswan.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d/libreswan.conf
-```
-
-The plugin executes 2 commands to collect all the information it needs:
-
-```sh
-ipsec whack --status
-ipsec whack --trafficstatus
-```
-
-The first command is used to extract the currently established tunnels, their IDs and their names.
-The second command is used to extract the current uptime and traffic.
-
-Most probably user `netdata` will not be able to query libreswan, so the `ipsec` commands will be denied.
-The plugin attempts to run `ipsec` as `sudo ipsec ...`, to get access to libreswan statistics.
-
-To allow user `netdata` execute `sudo ipsec ...`, create the file `/etc/sudoers.d/netdata` with this content:
-
-```
-netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status
-netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus
-```
-
-Make sure the path `/sbin/ipsec` matches your setup (execute `which ipsec` to find the right path).
-
----
-
-
diff --git a/collectors/charts.d.plugin/libreswan/README.md b/collectors/charts.d.plugin/libreswan/README.md
new file mode 120000
index 00000000000000..1416d9597ed6ce
--- /dev/null
+++ b/collectors/charts.d.plugin/libreswan/README.md
@@ -0,0 +1 @@
+integrations/libreswan.md
\ No newline at end of file
diff --git a/collectors/charts.d.plugin/libreswan/integrations/libreswan.md b/collectors/charts.d.plugin/libreswan/integrations/libreswan.md
new file mode 100644
index 00000000000000..bd1eec647730ed
--- /dev/null
+++ b/collectors/charts.d.plugin/libreswan/integrations/libreswan.md
@@ -0,0 +1,194 @@
+
+
+# Libreswan
+
+
+
+
+
+Plugin: charts.d.plugin
+Module: libreswan
+
+
+
+## Overview
+
+Monitor Libreswan performance for optimal IPsec VPN operations. Improve your VPN operations with Netdata''s real-time metrics and built-in alerts.
+
+The collector uses the `ipsec` command to collect the information it needs.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per IPSEC tunnel
+
+Metrics related to IPSEC tunnels. Each tunnel provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| libreswan.net | in, out | kilobits/s |
+| libreswan.uptime | uptime | seconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install charts.d plugin
+
+If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
+
+
+#### Permissions to execute `ipsec`
+
+The plugin executes 2 commands to collect all the information it needs:
+
+```sh
+ipsec whack --status
+ipsec whack --trafficstatus
+```
+
+The first command is used to extract the currently established tunnels, their IDs and their names.
+The second command is used to extract the current uptime and traffic.
+
+Most probably user `netdata` will not be able to query libreswan, so the `ipsec` commands will be denied.
+The plugin attempts to run `ipsec` as `sudo ipsec ...`, to get access to libreswan statistics.
+
+To allow user `netdata` execute `sudo ipsec ...`, create the file `/etc/sudoers.d/netdata` with this content:
+
+```
+netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status
+netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus
+```
+
+Make sure the path `/sbin/ipsec` matches your setup (execute `which ipsec` to find the right path).
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `charts.d/libreswan.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config charts.d/libreswan.conf
+```
+#### Options
+
+The config file is sourced by the charts.d plugin. It's a standard bash file.
+
+The following collapsed table contains all the options that can be configured for the libreswan collector.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| libreswan_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no |
+| libreswan_priority | The charts priority on the dashboard | 90000 | no |
+| libreswan_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |
+| libreswan_sudo | Whether to run `ipsec` with `sudo` or not. | 1 | no |
+
+
+
+#### Examples
+
+##### Run `ipsec` without sudo
+
+Run the `ipsec` utility without sudo
+
+```yaml
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#libreswan_update_every=1
+
+# the charts priority on the dashboard
+#libreswan_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#libreswan_retries=10
+
+# set to 1, to run ipsec with sudo (the default)
+# set to 0, to run ipsec without sudo
+libreswan_sudo=0
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `libreswan` collector, run the `charts.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `charts.d.plugin` to debug the collector:
+
+ ```bash
+ ./charts.d.plugin debug 1 libreswan
+ ```
+
+
diff --git a/collectors/charts.d.plugin/libreswan/metadata.yaml b/collectors/charts.d.plugin/libreswan/metadata.yaml
index 484d79edec27fd..77cb254505ec92 100644
--- a/collectors/charts.d.plugin/libreswan/metadata.yaml
+++ b/collectors/charts.d.plugin/libreswan/metadata.yaml
@@ -40,6 +40,9 @@ modules:
setup:
prerequisites:
list:
+ - title: "Install charts.d plugin"
+ description: |
+ If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
- title: "Permissions to execute `ipsec`"
description: |
The plugin executes 2 commands to collect all the information it needs:
diff --git a/collectors/charts.d.plugin/nut/Makefile.inc b/collectors/charts.d.plugin/nut/Makefile.inc
deleted file mode 100644
index 4fb47145dd821a..00000000000000
--- a/collectors/charts.d.plugin/nut/Makefile.inc
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-# THIS IS NOT A COMPLETE Makefile
-# IT IS INCLUDED BY ITS PARENT'S Makefile.am
-# IT IS REQUIRED TO REFERENCE ALL FILES RELATIVE TO THE PARENT
-
-# install these files
-dist_charts_DATA += nut/nut.chart.sh
-dist_chartsconfig_DATA += nut/nut.conf
-
-# do not install these files, but include them in the distribution
-dist_noinst_DATA += nut/README.md nut/Makefile.inc
-
diff --git a/collectors/charts.d.plugin/nut/README.md b/collectors/charts.d.plugin/nut/README.md
deleted file mode 100644
index 4608ce3e1ac39f..00000000000000
--- a/collectors/charts.d.plugin/nut/README.md
+++ /dev/null
@@ -1,79 +0,0 @@
-
-
-# UPS/PDU collector
-
-Collects UPS data for all power devices configured in the system.
-
-The following charts will be created:
-
-1. **UPS Charge**
-
-- percentage changed
-
-2. **UPS Battery Voltage**
-
-- current voltage
-- high voltage
-- low voltage
-- nominal voltage
-
-3. **UPS Input Voltage**
-
-- current voltage
-- fault voltage
-- nominal voltage
-
-4. **UPS Input Current**
-
-- nominal current
-
-5. **UPS Input Frequency**
-
-- current frequency
-- nominal frequency
-
-6. **UPS Output Voltage**
-
-- current voltage
-
-7. **UPS Load**
-
-- current load
-
-8. **UPS Temperature**
-
-- current temperature
-
-## Configuration
-
-If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed.
-
-Edit the `charts.d/nut.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d/nut.conf
-```
-
-This is the internal default for `charts.d/nut.conf`
-
-```sh
-# a space separated list of UPS names
-# if empty, the list returned by 'upsc -l' will be used
-nut_ups=
-
-# how frequently to collect UPS data
-nut_update_every=2
-```
-
----
-
-
diff --git a/collectors/charts.d.plugin/nut/metadata.yaml b/collectors/charts.d.plugin/nut/metadata.yaml
deleted file mode 100644
index ea2e6b2eb666d0..00000000000000
--- a/collectors/charts.d.plugin/nut/metadata.yaml
+++ /dev/null
@@ -1,219 +0,0 @@
-plugin_name: charts.d.plugin
-modules:
- - meta:
- plugin_name: charts.d.plugin
- module_name: nut
- monitored_instance:
- name: Network UPS Tools (NUT)
- link: ''
- categories:
- - data-collection.ups
- icon_filename: 'plug-circle-bolt.svg'
- related_resources:
- integrations:
- list: []
- info_provided_to_referring_integrations:
- description: ''
- keywords:
- - nut
- - network ups tools
- - ups
- - pdu
- most_popular: false
- overview:
- data_collection:
- metrics_description: 'Examine UPS/PDU metrics with Netdata for insights into power device performance. Improve your power device performance with comprehensive dashboards and anomaly detection.'
- method_description: 'This collector uses the `nut` (Network UPS Tools) to query statistics for multiple UPS devices.'
- supported_platforms:
- include: []
- exclude: []
- multi_instance: true
- additional_permissions:
- description: ''
- default_behavior:
- auto_detection:
- description: ''
- limits:
- description: ''
- performance_impact:
- description: ''
- setup:
- prerequisites:
- list:
- - title: "Required software"
- description: "Make sure the Network UPS Tools (`nut`) is installed and can detect your UPS devices."
- configuration:
- file:
- name: charts.d/nut.conf
- options:
- description: |
- The config file is sourced by the charts.d plugin. It's a standard bash file.
-
- The following collapsed table contains all the options that can be configured for the nut collector.
- folding:
- title: "Config options"
- enabled: true
- list:
- - name: nut_ups
- description: A space separated list of UPS names. If empty, the list returned by `upsc -l` will be used.
- default_value: ""
- required: false
- - name: nut_names
- description: Each line represents an alias for one UPS. If empty, the FQDN will be used.
- default_value: ""
- required: false
- - name: nut_timeout
- description: How long to wait for nut to respond.
- default_value: 2
- required: false
- - name: nut_clients_chart
- description: Set this to 1 to enable another chart showing the number of UPS clients connected to `upsd`.
- default_value: 1
- required: false
- - name: nut_update_every
- description: The data collection frequency. If unset, will inherit the netdata update frequency.
- default_value: 2
- required: false
- - name: nut_priority
- description: The charts priority on the dashboard
- default_value: 90000
- required: false
- - name: nut_retries
- description: The number of retries to do in case of failure before disabling the collector.
- default_value: 10
- required: false
- examples:
- folding:
- enabled: true
- title: "Config"
- list:
- - name: Provide names to UPS devices
- description: Map aliases to UPS devices
- config: |
- # a space separated list of UPS names
- # if empty, the list returned by 'upsc -l' will be used
- #nut_ups=
-
- # each line represents an alias for one UPS
- # if empty, the FQDN will be used
- nut_names["XXXXXX"]="UPS-office"
- nut_names["YYYYYY"]="UPS-rack"
-
- # how much time in seconds, to wait for nut to respond
- #nut_timeout=2
-
- # set this to 1, to enable another chart showing the number
- # of UPS clients connected to upsd
- #nut_clients_chart=1
-
- # the data collection frequency
- # if unset, will inherit the netdata update frequency
- #nut_update_every=2
-
- # the charts priority on the dashboard
- #nut_priority=90000
-
- # the number of retries to do in case of failure
- # before disabling the module
- #nut_retries=10
- troubleshooting:
- problems:
- list: []
- alerts:
- - name: nut_ups_charge
- link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf
- metric: nut.charge
- info: average UPS charge over the last minute
- os: "*"
- - name: nut_10min_ups_load
- link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf
- metric: nut.load
- info: average UPS load over the last 10 minutes
- os: "*"
- - name: nut_last_collected_secs
- link: https://github.com/netdata/netdata/blob/master/health/health.d/nut.conf
- metric: nut.load
- info: number of seconds since the last successful data collection
- metrics:
- folding:
- title: Metrics
- enabled: false
- description: ""
- availability: []
- scopes:
- - name: ups
- description: "Metrics related to UPS. Each UPS provides its own set of the following metrics."
- labels: []
- metrics:
- - name: nut.charge
- description: UPS Charge
- unit: "percentage"
- chart_type: area
- dimensions:
- - name: charge
- - name: nut.runtime
- description: UPS Runtime
- unit: "seconds"
- chart_type: line
- dimensions:
- - name: runtime
- - name: nut.battery.voltage
- description: UPS Battery Voltage
- unit: "Volts"
- chart_type: line
- dimensions:
- - name: voltage
- - name: high
- - name: low
- - name: nominal
- - name: nut.input.voltage
- description: UPS Input Voltage
- unit: "Volts"
- chart_type: line
- dimensions:
- - name: voltage
- - name: fault
- - name: nominal
- - name: nut.input.current
- description: UPS Input Current
- unit: "Ampere"
- chart_type: line
- dimensions:
- - name: nominal
- - name: nut.input.frequency
- description: UPS Input Frequency
- unit: "Hz"
- chart_type: line
- dimensions:
- - name: frequency
- - name: nominal
- - name: nut.output.voltage
- description: UPS Output Voltage
- unit: "Volts"
- chart_type: line
- dimensions:
- - name: voltage
- - name: nut.load
- description: UPS Load
- unit: "percentage"
- chart_type: area
- dimensions:
- - name: load
- - name: nut.load_usage
- description: UPS Load Usage
- unit: "Watts"
- chart_type: area
- dimensions:
- - name: load_usage
- - name: nut.temperature
- description: UPS Temperature
- unit: "temperature"
- chart_type: line
- dimensions:
- - name: temp
- - name: nut.clients
- description: UPS Connected Clients
- unit: "clients"
- chart_type: area
- dimensions:
- - name: clients
diff --git a/collectors/charts.d.plugin/nut/nut.chart.sh b/collectors/charts.d.plugin/nut/nut.chart.sh
deleted file mode 100644
index 7c32b6ddeda86f..00000000000000
--- a/collectors/charts.d.plugin/nut/nut.chart.sh
+++ /dev/null
@@ -1,244 +0,0 @@
-# shellcheck shell=bash
-# no need for shebang - this file is loaded from charts.d.plugin
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-# netdata
-# real-time performance and health monitoring, done right!
-# (C) 2016-2017 Costa Tsaousis
-#
-
-# a space separated list of UPS names
-# if empty, the list returned by 'upsc -l' will be used
-nut_ups=
-
-# how frequently to collect UPS data
-nut_update_every=2
-
-# how much time in seconds, to wait for nut to respond
-nut_timeout=2
-
-# set this to 1, to enable another chart showing the number
-# of UPS clients connected to upsd
-nut_clients_chart=0
-
-# the priority of nut related to other charts
-nut_priority=90000
-
-declare -A nut_ids=()
-declare -A nut_names=()
-
-nut_get_all() {
- run -t $nut_timeout upsc -l
-}
-
-nut_get() {
- run -t $nut_timeout upsc "$1"
-
- if [ "${nut_clients_chart}" -eq "1" ]; then
- printf "ups.connected_clients: "
- run -t $nut_timeout upsc -c "$1" | wc -l
- fi
-}
-
-nut_check() {
-
- # this should return:
- # - 0 to enable the chart
- # - 1 to disable the chart
-
- local x
-
- require_cmd upsc || return 1
-
- [ -z "$nut_ups" ] && nut_ups="$(nut_get_all)"
-
- for x in $nut_ups; do
- nut_get "$x" > /dev/null
- # shellcheck disable=SC2181
- if [ $? -eq 0 ]; then
- if [ -n "${nut_names[${x}]}" ]; then
- nut_ids[$x]="$(fixid "${nut_names[${x}]}")"
- else
- nut_ids[$x]="$(fixid "$x")"
- fi
- continue
- fi
- error "cannot get information for NUT UPS '$x'."
- done
-
- if [ ${#nut_ids[@]} -eq 0 ]; then
- # shellcheck disable=SC2154
- error "Cannot find UPSes - please set nut_ups='ups_name' in $confd/nut.conf"
- return 1
- fi
-
- return 0
-}
-
-nut_create() {
- # create the charts
- local x
-
- for x in "${nut_ids[@]}"; do
- cat << EOF
-CHART nut_$x.charge '' "UPS Charge" "percentage" ups nut.charge area $((nut_priority + 2)) $nut_update_every
-DIMENSION battery_charge charge absolute 1 100
-
-CHART nut_$x.runtime '' "UPS Runtime" "seconds" ups nut.runtime area $((nut_priority + 3)) $nut_update_every
-DIMENSION battery_runtime runtime absolute 1 100
-
-CHART nut_$x.battery_voltage '' "UPS Battery Voltage" "Volts" ups nut.battery.voltage line $((nut_priority + 4)) $nut_update_every
-DIMENSION battery_voltage voltage absolute 1 100
-DIMENSION battery_voltage_high high absolute 1 100
-DIMENSION battery_voltage_low low absolute 1 100
-DIMENSION battery_voltage_nominal nominal absolute 1 100
-
-CHART nut_$x.input_voltage '' "UPS Input Voltage" "Volts" input nut.input.voltage line $((nut_priority + 5)) $nut_update_every
-DIMENSION input_voltage voltage absolute 1 100
-DIMENSION input_voltage_fault fault absolute 1 100
-DIMENSION input_voltage_nominal nominal absolute 1 100
-
-CHART nut_$x.input_current '' "UPS Input Current" "Ampere" input nut.input.current line $((nut_priority + 6)) $nut_update_every
-DIMENSION input_current_nominal nominal absolute 1 100
-
-CHART nut_$x.input_frequency '' "UPS Input Frequency" "Hz" input nut.input.frequency line $((nut_priority + 7)) $nut_update_every
-DIMENSION input_frequency frequency absolute 1 100
-DIMENSION input_frequency_nominal nominal absolute 1 100
-
-CHART nut_$x.output_voltage '' "UPS Output Voltage" "Volts" output nut.output.voltage line $((nut_priority + 8)) $nut_update_every
-DIMENSION output_voltage voltage absolute 1 100
-
-CHART nut_$x.load '' "UPS Load" "percentage" ups nut.load area $((nut_priority)) $nut_update_every
-DIMENSION load load absolute 1 100
-
-CHART nut_$x.load_usage '' "UPS Load Usage" "Watts" ups nut.load_usage area $((nut_priority + 1)) $nut_update_every
-DIMENSION load_usage load_usage absolute 1 100
-
-CHART nut_$x.temp '' "UPS Temperature" "temperature" ups nut.temperature line $((nut_priority + 9)) $nut_update_every
-DIMENSION temp temp absolute 1 100
-EOF
-
- if [ "${nut_clients_chart}" = "1" ]; then
- cat << EOF2
-CHART nut_$x.clients '' "UPS Connected Clients" "clients" ups nut.clients area $((nut_priority + 10)) $nut_update_every
-DIMENSION clients '' absolute 1 1
-EOF2
- fi
-
- done
-
- return 0
-}
-
-nut_update() {
- # the first argument to this function is the microseconds since last update
- # pass this parameter to the BEGIN statement (see below).
-
- # do all the work to collect / calculate the values
- # for each dimension
- # remember: KEEP IT SIMPLE AND SHORT
-
- local i x
- for i in "${!nut_ids[@]}"; do
- x="${nut_ids[$i]}"
- nut_get "$i" | awk "
-BEGIN {
- battery_charge = 0;
- battery_runtime = 0;
- battery_voltage = 0;
- battery_voltage_high = 0;
- battery_voltage_low = 0;
- battery_voltage_nominal = 0;
- input_voltage = 0;
- input_voltage_fault = 0;
- input_voltage_nominal = 0;
- input_current_nominal = 0;
- input_frequency = 0;
- input_frequency_nominal = 0;
- output_voltage = 0;
- load = 0;
- load_usage = 0;
- nompower = 0;
- temp = 0;
- client = 0;
- do_clients = ${nut_clients_chart};
-}
-/^battery.charge: .*/ { battery_charge = \$2 * 100 };
-/^battery.runtime: .*/ { battery_runtime = \$2 * 100 };
-/^battery.voltage: .*/ { battery_voltage = \$2 * 100 };
-/^battery.voltage.high: .*/ { battery_voltage_high = \$2 * 100 };
-/^battery.voltage.low: .*/ { battery_voltage_low = \$2 * 100 };
-/^battery.voltage.nominal: .*/ { battery_voltage_nominal = \$2 * 100 };
-/^input.voltage: .*/ { input_voltage = \$2 * 100 };
-/^input.voltage.fault: .*/ { input_voltage_fault = \$2 * 100 };
-/^input.voltage.nominal: .*/ { input_voltage_nominal = \$2 * 100 };
-/^input.current.nominal: .*/ { input_current_nominal = \$2 * 100 };
-/^input.frequency: .*/ { input_frequency = \$2 * 100 };
-/^input.frequency.nominal: .*/ { input_frequency_nominal = \$2 * 100 };
-/^output.voltage: .*/ { output_voltage = \$2 * 100 };
-/^ups.load: .*/ { load = \$2 * 100 };
-/^ups.realpower.nominal: .*/ { nompower = \$2 };
-/^ups.temperature: .*/ { temp = \$2 * 100 };
-/^ups.connected_clients: .*/ { clients = \$2 };
-END {
- { load_usage = nompower * load / 100 };
-
- print \"BEGIN nut_$x.charge $1\";
- print \"SET battery_charge = \" battery_charge;
- print \"END\"
-
- print \"BEGIN nut_$x.runtime $1\";
- print \"SET battery_runtime = \" battery_runtime;
- print \"END\"
-
- print \"BEGIN nut_$x.battery_voltage $1\";
- print \"SET battery_voltage = \" battery_voltage;
- print \"SET battery_voltage_high = \" battery_voltage_high;
- print \"SET battery_voltage_low = \" battery_voltage_low;
- print \"SET battery_voltage_nominal = \" battery_voltage_nominal;
- print \"END\"
-
- print \"BEGIN nut_$x.input_voltage $1\";
- print \"SET input_voltage = \" input_voltage;
- print \"SET input_voltage_fault = \" input_voltage_fault;
- print \"SET input_voltage_nominal = \" input_voltage_nominal;
- print \"END\"
-
- print \"BEGIN nut_$x.input_current $1\";
- print \"SET input_current_nominal = \" input_current_nominal;
- print \"END\"
-
- print \"BEGIN nut_$x.input_frequency $1\";
- print \"SET input_frequency = \" input_frequency;
- print \"SET input_frequency_nominal = \" input_frequency_nominal;
- print \"END\"
-
- print \"BEGIN nut_$x.output_voltage $1\";
- print \"SET output_voltage = \" output_voltage;
- print \"END\"
-
- print \"BEGIN nut_$x.load $1\";
- print \"SET load = \" load;
- print \"END\"
-
- print \"BEGIN nut_$x.load_usage $1\";
- print \"SET load_usage = \" load_usage;
- print \"END\"
-
- print \"BEGIN nut_$x.temp $1\";
- print \"SET temp = \" temp;
- print \"END\"
-
- if(do_clients) {
- print \"BEGIN nut_$x.clients $1\";
- print \"SET clients = \" clients;
- print \"END\"
- }
-}"
- # shellcheck disable=2181
- [ $? -ne 0 ] && unset "nut_ids[$i]" && error "failed to get values for '$i', disabling it."
- done
-
- [ ${#nut_ids[@]} -eq 0 ] && error "no UPSes left active." && return 1
- return 0
-}
diff --git a/collectors/charts.d.plugin/nut/nut.conf b/collectors/charts.d.plugin/nut/nut.conf
deleted file mode 100644
index b95ad9048c05f0..00000000000000
--- a/collectors/charts.d.plugin/nut/nut.conf
+++ /dev/null
@@ -1,33 +0,0 @@
-# no need for shebang - this file is loaded from charts.d.plugin
-
-# netdata
-# real-time performance and health monitoring, done right!
-# (C) 2018 Costa Tsaousis
-# GPL v3+
-
-# a space separated list of UPS names
-# if empty, the list returned by 'upsc -l' will be used
-#nut_ups=
-
-# each line represents an alias for one UPS
-# if empty, the FQDN will be used
-#nut_names["FQDN1"]="alias"
-#nut_names["FQDN2"]="alias"
-
-# how much time in seconds, to wait for nut to respond
-#nut_timeout=2
-
-# set this to 1, to enable another chart showing the number
-# of UPS clients connected to upsd
-#nut_clients_chart=1
-
-# the data collection frequency
-# if unset, will inherit the netdata update frequency
-#nut_update_every=2
-
-# the charts priority on the dashboard
-#nut_priority=90000
-
-# the number of retries to do in case of failure
-# before disabling the module
-#nut_retries=10
diff --git a/collectors/charts.d.plugin/opensips/README.md b/collectors/charts.d.plugin/opensips/README.md
deleted file mode 100644
index 1d7322140515fd..00000000000000
--- a/collectors/charts.d.plugin/opensips/README.md
+++ /dev/null
@@ -1,24 +0,0 @@
-
-
-# OpenSIPS collector
-
-## Configuration
-
-If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed.
-
-Edit the `charts.d/opensips.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d/opensips.conf
-```
-
-
diff --git a/collectors/charts.d.plugin/opensips/README.md b/collectors/charts.d.plugin/opensips/README.md
new file mode 120000
index 00000000000000..bb85ba6d019f85
--- /dev/null
+++ b/collectors/charts.d.plugin/opensips/README.md
@@ -0,0 +1 @@
+integrations/opensips.md
\ No newline at end of file
diff --git a/collectors/charts.d.plugin/opensips/integrations/opensips.md b/collectors/charts.d.plugin/opensips/integrations/opensips.md
new file mode 100644
index 00000000000000..8c88dba0b256db
--- /dev/null
+++ b/collectors/charts.d.plugin/opensips/integrations/opensips.md
@@ -0,0 +1,192 @@
+
+
+# OpenSIPS
+
+
+
+
+
+Plugin: charts.d.plugin
+Module: opensips
+
+
+
+## Overview
+
+Examine OpenSIPS metrics for insights into SIP server operations. Study call rates, error rates, and response times for reliable voice over IP services.
+
+The collector uses the `opensipsctl` command line utility to gather OpenSIPS metrics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The collector will attempt to call `opensipsctl` along with a default number of parameters, even without any configuration.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per OpenSIPS instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| opensips.dialogs_active | active, early | dialogs |
+| opensips.users | registered, location, contacts, expires | users |
+| opensips.registrar | accepted, rejected | registrations/s |
+| opensips.transactions | UAS, UAC | transactions/s |
+| opensips.core_rcv | requests, replies | queries/s |
+| opensips.core_fwd | requests, replies | queries/s |
+| opensips.core_drop | requests, replies | queries/s |
+| opensips.core_err | requests, replies | queries/s |
+| opensips.core_bad | bad_URIs_rcvd, unsupported_methods, bad_msg_hdr | queries/s |
+| opensips.tm_replies | received, relayed, local | replies/s |
+| opensips.transactions_status | 2xx, 3xx, 4xx, 5xx, 6xx | transactions/s |
+| opensips.transactions_inuse | inuse | transactions |
+| opensips.sl_replies | 1xx, 2xx, 3xx, 4xx, 5xx, 6xx, sent, error, ACKed | replies/s |
+| opensips.dialogs | processed, expire, failed | dialogs/s |
+| opensips.net_waiting | UDP, TCP | kilobytes |
+| opensips.uri_checks | positive, negative | checks / sec |
+| opensips.traces | requests, replies | traces / sec |
+| opensips.shmem | total, used, real_used, max_used, free | kilobytes |
+| opensips.shmem_fragment | fragments | fragments |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install charts.d plugin
+
+If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
+
+
+#### Required software
+
+The collector requires the `opensipsctl` to be installed.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `charts.d/opensips.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config charts.d/opensips.conf
+```
+#### Options
+
+The config file is sourced by the charts.d plugin. It's a standard bash file.
+
+The following collapsed table contains all the options that can be configured for the opensips collector.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| opensips_opts | Specify parameters to the `opensipsctl` command. If the default value fails to get global status, set here whatever options are needed to connect to the opensips server. | fifo get_statistics all | no |
+| opensips_cmd | If `opensipsctl` is not in $PATH, specify it's full path here. | | no |
+| opensips_timeout | How long to wait for `opensipsctl` to respond. | 2 | no |
+| opensips_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 5 | no |
+| opensips_priority | The charts priority on the dashboard. | 80000 | no |
+| opensips_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |
+
+
+
+#### Examples
+
+##### Custom `opensipsctl` command
+
+Set a custom path to the `opensipsctl` command
+
+```yaml
+#opensips_opts="fifo get_statistics all"
+opensips_cmd=/opt/opensips/bin/opensipsctl
+#opensips_timeout=2
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#opensips_update_every=5
+
+# the charts priority on the dashboard
+#opensips_priority=80000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#opensips_retries=10
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `opensips` collector, run the `charts.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `charts.d.plugin` to debug the collector:
+
+ ```bash
+ ./charts.d.plugin debug 1 opensips
+ ```
+
+
diff --git a/collectors/charts.d.plugin/opensips/metadata.yaml b/collectors/charts.d.plugin/opensips/metadata.yaml
index 27f6632862265b..356de5615c2512 100644
--- a/collectors/charts.d.plugin/opensips/metadata.yaml
+++ b/collectors/charts.d.plugin/opensips/metadata.yaml
@@ -41,6 +41,9 @@ modules:
setup:
prerequisites:
list:
+ - title: "Install charts.d plugin"
+ description: |
+ If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
- title: "Required software"
description: "The collector requires the `opensipsctl` to be installed."
configuration:
diff --git a/collectors/charts.d.plugin/sensors/README.md b/collectors/charts.d.plugin/sensors/README.md
deleted file mode 100644
index 0dbe96225b2806..00000000000000
--- a/collectors/charts.d.plugin/sensors/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-# Linux machine sensors collector
-
-Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures).
-For all other cases use the [Python collector](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/sensors), which supports multiple
-jobs, is more efficient and performs calculations on top of the kernel provided values.
-
-This plugin will provide charts for all configured system sensors, by reading sensors directly from the kernel.
-The values graphed are the raw hardware values of the sensors.
-
-The plugin will create Netdata charts for:
-
-1. **Temperature**
-2. **Voltage**
-3. **Current**
-4. **Power**
-5. **Fans Speed**
-6. **Energy**
-7. **Humidity**
-
-One chart for every sensor chip found and each of the above will be created.
-
-## Enable the collector
-
-If using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), make sure `netdata-plugin-chartsd` is installed.
-
-The `sensors` collector is disabled by default.
-
-To enable the collector, you need to edit the configuration file of `charts.d/sensors.conf`. You can do so by using the `edit config` script.
-
-> ### Info
->
-> To edit configuration files in a safe way, we provide the [`edit config` script](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) located in your [Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically.
-> It is recommended to use this way for configuring Netdata.
->
-> Please also note that after most configuration changes you will need to [restart the Agent](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for the changes to take effect.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d.conf
-```
-
-You need to uncomment the regarding `sensors`, and set the value to `force`.
-
-```shell
-# example=force
-sensors=force
-```
-
-## Configuration
-
-Edit the `charts.d/sensors.conf` configuration file using `edit-config`:
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config charts.d/sensors.conf
-```
-
-This is the internal default for `charts.d/sensors.conf`
-
-```sh
-# the directory the kernel keeps sensor data
-sensors_sys_dir="${NETDATA_HOST_PREFIX}/sys/devices"
-
-# how deep in the tree to check for sensor data
-sensors_sys_depth=10
-
-# if set to 1, the script will overwrite internal
-# script functions with code generated ones
-# leave to 1, is faster
-sensors_source_update=1
-
-# how frequently to collect sensor data
-# the default is to collect it at every iteration of charts.d
-sensors_update_every=
-
-# array of sensors which are excluded
-# the default is to include all
-sensors_excluded=()
-```
-
----
diff --git a/collectors/charts.d.plugin/sensors/README.md b/collectors/charts.d.plugin/sensors/README.md
new file mode 120000
index 00000000000000..7e5a416c43abee
--- /dev/null
+++ b/collectors/charts.d.plugin/sensors/README.md
@@ -0,0 +1 @@
+integrations/linux_sensors_sysfs.md
\ No newline at end of file
diff --git a/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md b/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md
new file mode 100644
index 00000000000000..130352f61b52d9
--- /dev/null
+++ b/collectors/charts.d.plugin/sensors/integrations/linux_sensors_sysfs.md
@@ -0,0 +1,201 @@
+
+
+# Linux Sensors (sysfs)
+
+
+
+
+
+Plugin: charts.d.plugin
+Module: sensors
+
+
+
+## Overview
+
+Use this collector when `lm-sensors` doesn't work on your device (e.g. for RPi temperatures).
+For all other cases use the [Python collector](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/sensors), which supports multiple jobs, is more efficient and performs calculations on top of the kernel provided values."
+
+
+It will provide charts for all configured system sensors, by reading sensors directly from the kernel.
+The values graphed are the raw hardware values of the sensors.
+
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, the collector will try to read entries under `/sys/devices`
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per sensor chip
+
+Metrics related to sensor chips. Each chip provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| sensors.temp | {filename} | Celsius |
+| sensors.volt | {filename} | Volts |
+| sensors.curr | {filename} | Ampere |
+| sensors.power | {filename} | Watt |
+| sensors.fans | {filename} | Rotations / Minute |
+| sensors.energy | {filename} | Joule |
+| sensors.humidity | {filename} | Percent |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install charts.d plugin
+
+If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
+
+
+#### Enable the sensors collector
+
+The `sensors` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `charts.d.conf` file.
+
+```bash
+cd /etc/netdata # Replace this path with your Netdata config directory, if different
+sudo ./edit-config charts.d.conf
+```
+
+Change the value of the `sensors` setting to `force` and uncomment the line. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `charts.d/sensors.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config charts.d/sensors.conf
+```
+#### Options
+
+The config file is sourced by the charts.d plugin. It's a standard bash file.
+
+The following collapsed table contains all the options that can be configured for the sensors collector.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| sensors_sys_dir | The directory the kernel exposes sensor data. | /sys/devices | no |
+| sensors_sys_depth | How deep in the tree to check for sensor data. | 10 | no |
+| sensors_source_update | If set to 1, the script will overwrite internal script functions with code generated ones. | 1 | no |
+| sensors_update_every | The data collection frequency. If unset, will inherit the netdata update frequency. | 1 | no |
+| sensors_priority | The charts priority on the dashboard. | 90000 | no |
+| sensors_retries | The number of retries to do in case of failure before disabling the collector. | 10 | no |
+
+
+
+#### Examples
+
+##### Set sensors path depth
+
+Set a different sensors path depth
+
+```yaml
+# the directory the kernel keeps sensor data
+#sensors_sys_dir="/sys/devices"
+
+# how deep in the tree to check for sensor data
+sensors_sys_depth=5
+
+# if set to 1, the script will overwrite internal
+# script functions with code generated ones
+# leave to 1, is faster
+#sensors_source_update=1
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#sensors_update_every=
+
+# the charts priority on the dashboard
+#sensors_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#sensors_retries=10
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `sensors` collector, run the `charts.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `charts.d.plugin` to debug the collector:
+
+ ```bash
+ ./charts.d.plugin debug 1 sensors
+ ```
+
+
diff --git a/collectors/charts.d.plugin/sensors/metadata.yaml b/collectors/charts.d.plugin/sensors/metadata.yaml
index 33beaad299ccae..47f6f4042e989c 100644
--- a/collectors/charts.d.plugin/sensors/metadata.yaml
+++ b/collectors/charts.d.plugin/sensors/metadata.yaml
@@ -44,7 +44,20 @@ modules:
description: ""
setup:
prerequisites:
- list: []
+ list:
+ - title: "Install charts.d plugin"
+ description: |
+ If [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure `netdata-plugin-chartsd` is installed.
+ - title: "Enable the sensors collector"
+ description: |
+ The `sensors` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `charts.d.conf` file.
+
+ ```bash
+ cd /etc/netdata # Replace this path with your Netdata config directory, if different
+ sudo ./edit-config charts.d.conf
+ ```
+
+ Change the value of the `sensors` setting to `force` and uncomment the line. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
configuration:
file:
name: charts.d/sensors.conf
diff --git a/collectors/cups.plugin/README.md b/collectors/cups.plugin/README.md
deleted file mode 100644
index 8652ec575116c8..00000000000000
--- a/collectors/cups.plugin/README.md
+++ /dev/null
@@ -1,68 +0,0 @@
-
-
-# Printers (cups.plugin)
-
-`cups.plugin` collects Common Unix Printing System (CUPS) metrics.
-
-## Prerequisites
-
-This plugin needs a running local CUPS daemon (`cupsd`). This plugin does not need any configuration. Supports cups since version 1.7.
-
-If you installed Netdata using our native packages, you will have to additionally install `netdata-plugin-cups` to use this plugin for data collection. It is not installed by default due to the large number of dependencies it requires.
-
-## Charts
-
-`cups.plugin` provides one common section `destinations` and one section per destination.
-
-> Destinations in CUPS represent individual printers or classes (collections or pools) of printers ()
-
-The section `server` provides these charts:
-
-1. **destinations by state**
-
- - idle
- - printing
- - stopped
-
-2. **destinations by options**
-
- - total
- - accepting jobs
- - shared
-
-3. **total job number by status**
-
- - pending
- - processing
- - held
-
-4. **total job size by status**
-
- - pending
- - processing
- - held
-
-For each destination the plugin provides these charts:
-
-1. **job number by status**
-
- - pending
- - held
- - processing
-
-2. **job size by status**
-
- - pending
- - held
- - processing
-
-At the moment only job status pending, processing, and held are reported because we do not have a method to collect stopped, canceled, aborted and completed jobs which scales.
-
-
diff --git a/collectors/cups.plugin/README.md b/collectors/cups.plugin/README.md
new file mode 120000
index 00000000000000..e325706392dfdb
--- /dev/null
+++ b/collectors/cups.plugin/README.md
@@ -0,0 +1 @@
+integrations/cups.md
\ No newline at end of file
diff --git a/collectors/cups.plugin/cups_plugin.c b/collectors/cups.plugin/cups_plugin.c
index ce7f05d4d23973..827322066a9e2e 100644
--- a/collectors/cups.plugin/cups_plugin.c
+++ b/collectors/cups.plugin/cups_plugin.c
@@ -226,20 +226,8 @@ void reset_metrics() {
}
int main(int argc, char **argv) {
- stderror = stderr;
clocks_init();
-
- // ------------------------------------------------------------------------
- // initialization of netdata plugin
-
- program_name = "cups.plugin";
-
- // disable syslog
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
+ nd_log_initialize_for_external_plugins("cups.plugin");
parse_command_line(argc, argv);
diff --git a/collectors/cups.plugin/integrations/cups.md b/collectors/cups.plugin/integrations/cups.md
new file mode 100644
index 00000000000000..a8ea5b15f6430d
--- /dev/null
+++ b/collectors/cups.plugin/integrations/cups.md
@@ -0,0 +1,141 @@
+
+
+# CUPS
+
+
+
+
+
+Plugin: cups.plugin
+Module: cups.plugin
+
+
+
+## Overview
+
+Monitor CUPS performance for achieving optimal printing system operations. Monitor job statuses, queue lengths, and error rates to ensure smooth printing tasks.
+
+The plugin uses CUPS shared library to connect and monitor the server.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs to access the server. Netdata sets permissions during installation time to reach the server through its library.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin detects when CUPS server is running and tries to connect to it.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per CUPS instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cups.dests_state | idle, printing, stopped | dests |
+| cups.dests_option | total, acceptingjobs, shared | dests |
+| cups.job_num | pending, held, processing | jobs |
+| cups.job_size | pending, held, processing | KB |
+
+### Per destination
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cups.destination_job_num | pending, held, processing | jobs |
+| cups.destination_job_size | pending, held, processing | KB |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Minimum setup
+
+The CUPS server must be installed and running. If you installed `netdata` using a package manager, it is also necessary to install the package `netdata-plugin-cups`.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:cups]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| command options | Additional parameters for the collector | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/cups.plugin/metadata.yaml b/collectors/cups.plugin/metadata.yaml
index a416d392e880bf..9ec2f4118fecd0 100644
--- a/collectors/cups.plugin/metadata.yaml
+++ b/collectors/cups.plugin/metadata.yaml
@@ -37,7 +37,7 @@ modules:
prerequisites:
list:
- title: Minimum setup
- description: "The CUPS server must be installed and running."
+ description: "The CUPS server must be installed and running. If you installed `netdata` using a package manager, it is also necessary to install the package `netdata-plugin-cups`."
configuration:
file:
name: "netdata.conf"
diff --git a/collectors/debugfs.plugin/debugfs_plugin.c b/collectors/debugfs.plugin/debugfs_plugin.c
index c189f908d16a7a..f693de96c1862a 100644
--- a/collectors/debugfs.plugin/debugfs_plugin.c
+++ b/collectors/debugfs.plugin/debugfs_plugin.c
@@ -159,14 +159,8 @@ static void debugfs_parse_args(int argc, char **argv)
int main(int argc, char **argv)
{
- // debug_flags = D_PROCFILE;
- stderror = stderr;
-
- // set the name for logging
- program_name = "debugfs.plugin";
-
- // disable syslog for debugfs.plugin
- error_log_syslog = 0;
+ clocks_init();
+ nd_log_initialize_for_external_plugins("debugfs.plugin");
netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX");
if (verify_netdata_host_prefix() == -1)
@@ -241,6 +235,13 @@ int main(int argc, char **argv)
netdata_log_info("all modules are disabled, exiting...");
return 1;
}
+
+ fprintf(stdout, "\n");
+ fflush(stdout);
+ if (ferror(stdout) && errno == EPIPE) {
+ netdata_log_error("error writing to stdout: EPIPE. Exiting...");
+ return 1;
+ }
}
fprintf(stdout, "EXIT\n");
diff --git a/collectors/debugfs.plugin/integrations/linux_zswap.md b/collectors/debugfs.plugin/integrations/linux_zswap.md
new file mode 100644
index 00000000000000..44478454b0d01d
--- /dev/null
+++ b/collectors/debugfs.plugin/integrations/linux_zswap.md
@@ -0,0 +1,138 @@
+
+
+# Linux ZSwap
+
+
+
+
+
+Plugin: debugfs.plugin
+Module: /sys/kernel/debug/zswap
+
+
+
+## Overview
+
+Collects zswap performance metrics on Linux systems.
+
+
+Parse data from `debugfs file.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+This integration requires read access to files under `/sys/kernel/debug/zswap`, which are accessible only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to debugfs. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing file read permission checks and directory read and execute permission checks. If file capabilities are not usable, then the plugin is instead installed with the SUID bit set in permissions so that it runs as root.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Assuming that debugfs is mounted and the required permissions are available, this integration will automatically detect whether or not the system is using zswap.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+Monitor the performance statistics of zswap.
+
+### Per Linux ZSwap instance
+
+Global zswap performance metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.zswap_pool_compression_ratio | compression_ratio | ratio |
+| system.zswap_pool_compressed_size | compressed_size | bytes |
+| system.zswap_pool_raw_size | uncompressed_size | bytes |
+| system.zswap_rejections | compress_poor, kmemcache_fail, alloc_fail, reclaim_fail | rejections/s |
+| system.zswap_pool_limit_hit | limit | events/s |
+| system.zswap_written_back_raw_bytes | written_back | bytes/s |
+| system.zswap_same_filled_raw_size | same_filled | bytes |
+| system.zswap_duplicate_entry | duplicate | entries/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### filesystem
+
+The debugfs filesystem must be mounted on your host for plugin to collect data. You can run the command-line (`sudo mount -t debugfs none /sys/kernel/debug/`) to mount it locally. It is also recommended to modify your fstab (5) avoiding necessity to mount the filesystem before starting netdata.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:debugfs]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| command options | Additinal parameters for collector | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/debugfs.plugin/integrations/power_capping.md b/collectors/debugfs.plugin/integrations/power_capping.md
new file mode 100644
index 00000000000000..d4b7eb890dbf05
--- /dev/null
+++ b/collectors/debugfs.plugin/integrations/power_capping.md
@@ -0,0 +1,132 @@
+
+
+# Power Capping
+
+
+
+
+
+Plugin: debugfs.plugin
+Module: intel_rapl
+
+
+
+## Overview
+
+Collects power capping performance metrics on Linux systems.
+
+
+Parse data from `debugfs file.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+This integration requires read access to files under `/sys/devices/virtual/powercap`, which are accessible only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to debugfs. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing file read permission checks and directory read and execute permission checks. If file capabilities are not usable, then the plugin is instead installed with the SUID bit set in permissions so that it runs as root.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Assuming that debugfs is mounted and the required permissions are available, this integration will automatically detect whether or not the system is using zswap.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+Monitor the Intel RAPL zones Consumption.
+
+### Per Power Capping instance
+
+Global Intel RAPL zones.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.powercap_intel_rapl_zone | Power | Watts |
+| cpu.powercap_intel_rapl_subzones | dram, core, uncore | Watts |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### filesystem
+
+The debugfs filesystem must be mounted on your host for plugin to collect data. You can run the command-line (`sudo mount -t debugfs none /sys/kernel/debug/`) to mount it locally. It is also recommended to modify your fstab (5) avoiding necessity to mount the filesystem before starting netdata.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:debugfs]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| command options | Additinal parameters for collector | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md b/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md
new file mode 100644
index 00000000000000..ef287bc3011b35
--- /dev/null
+++ b/collectors/debugfs.plugin/integrations/system_memory_fragmentation.md
@@ -0,0 +1,136 @@
+
+
+# System Memory Fragmentation
+
+
+
+
+
+Plugin: debugfs.plugin
+Module: /sys/kernel/debug/extfrag
+
+
+
+## Overview
+
+Collects memory fragmentation statistics from the Linux kernel
+
+Parse data from `debugfs` file
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+This integration requires read access to files under `/sys/kernel/debug/extfrag`, which are accessible only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to debugfs. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing file read permission checks and directory read and execute permission checks. If file capabilities are not usable, then the plugin is instead installed with the SUID bit set in permissions so that it runs as root.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Assuming that debugfs is mounted and the required permissions are available, this integration will automatically run by default.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+Monitor the overall memory fragmentation of the system.
+
+### Per node
+
+Memory fragmentation statistics for each NUMA node in the system.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| numa_node | The NUMA node the metrics are associated with. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.fragmentation_index_dma | order0, order1, order2, order3, order4, order5, order6, order7, order8, order9, order10 | index |
+| mem.fragmentation_index_dma32 | order0, order1, order2, order3, order4, order5, order6, order7, order8, order9, order10 | index |
+| mem.fragmentation_index_normal | order0, order1, order2, order3, order4, order5, order6, order7, order8, order9, order10 | index |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### filesystem
+
+The debugfs filesystem must be mounted on your host for plugin to collect data. You can run the command-line (`sudo mount -t debugfs none /sys/kernel/debug/`) to mount it locally. It is also recommended to modify your fstab (5) avoiding necessity to mount the filesystem before starting netdata.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:debugfs]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| command options | Additinal parameters for collector | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/debugfs.plugin/sys_devices_virtual_powercap.c b/collectors/debugfs.plugin/sys_devices_virtual_powercap.c
index 5f22b19e227974..ee261c27f65bcb 100644
--- a/collectors/debugfs.plugin/sys_devices_virtual_powercap.c
+++ b/collectors/debugfs.plugin/sys_devices_virtual_powercap.c
@@ -151,7 +151,7 @@ int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_u
update_every);
fprintf(stdout,
- "CLABEL 'zone' '%s' 0\n"
+ "CLABEL 'zone' '%s' 1\n"
"CLABEL_COMMIT\n",
zone->name);
@@ -171,7 +171,7 @@ int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_u
update_every);
fprintf(stdout,
- "CLABEL 'zone' '%s' 0\n"
+ "CLABEL 'zone' '%s' 1\n"
"CLABEL_COMMIT\n",
zone->name);
@@ -186,7 +186,7 @@ int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_u
if(get_measurement(zone->path, &zone->energy_uj)) {
fprintf(stdout,
"BEGIN '%s'\n"
- "SET power = %lld\n"
+ "SET power = %llu\n"
"END\n"
, zone->zone_chart_id
, zone->energy_uj);
@@ -200,7 +200,7 @@ int do_sys_devices_virtual_powercap(int update_every, const char *name __maybe_u
for (struct zone_t *subzone = zone->subzones; subzone; subzone = subzone->next) {
if(get_measurement(subzone->path, &subzone->energy_uj)) {
fprintf(stdout,
- "SET '%s' = %lld\n",
+ "SET '%s' = %llu\n",
subzone->name,
subzone->energy_uj);
}
diff --git a/collectors/diskspace.plugin/README.md b/collectors/diskspace.plugin/README.md
deleted file mode 100644
index 5ca1090fdd5b1e..00000000000000
--- a/collectors/diskspace.plugin/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Monitor disk (diskspace.plugin)
-
-This plugin monitors the disk space usage of mounted disks, under Linux. The plugin requires Netdata to have execute/search permissions on the mount point itself, as well as each component of the absolute path to the mount point.
-
-Two charts are available for every mount:
-
-- Disk Space Usage
-- Disk Files (inodes) Usage
-
-## configuration
-
-Simple patterns can be used to exclude mounts from showed statistics based on path or filesystem. By default read-only mounts are not displayed. To display them `yes` should be set for a chart instead of `auto`.
-
-By default, Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though).
-
-Netdata will try to detect mounts that are duplicates (i.e. from the same device), or binds, and will not display charts for them, as the device is usually already monitored.
-
-To configure this plugin, you need to edit the configuration file `netdata.conf`. You can do so by using the `edit config` script.
-
-> ### Info
->
-> To edit configuration files in a safe way, we provide the [`edit config` script](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) located in your [Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory) (typically is `/etc/netdata`) that creates the proper file and opens it in an editor automatically.
-> It is recommended to use this way for configuring Netdata.
->
-> Please also note that after most configuration changes you will need to [restart the Agent](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for the changes to take effect.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config netdata.conf
-```
-
-You can enable the effect of each line by uncommenting it.
-
-You can set `yes` for a chart instead of `auto` to enable it permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins.
-
-```conf
-[plugin:proc:diskspace]
- # remove charts of unmounted disks = yes
- # update every = 1
- # check for new mount points every = 15
- # exclude space metrics on paths = /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/*
- # exclude space metrics on filesystems = *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs
- # space usage for all disks = auto
- # inodes usage for all disks = auto
-```
-
-Charts can be enabled/disabled for every mount separately, just look for the name of the mount after `[plugin:proc:diskspace:`.
-
-```conf
-[plugin:proc:diskspace:/]
- # space usage = auto
- # inodes usage = auto
-```
-
-> for disks performance monitoring, see the `proc` plugin, [here](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md#monitoring-disks)
diff --git a/collectors/diskspace.plugin/README.md b/collectors/diskspace.plugin/README.md
new file mode 120000
index 00000000000000..c9f4e1c5e3e4fc
--- /dev/null
+++ b/collectors/diskspace.plugin/README.md
@@ -0,0 +1 @@
+integrations/disk_space.md
\ No newline at end of file
diff --git a/collectors/diskspace.plugin/integrations/disk_space.md b/collectors/diskspace.plugin/integrations/disk_space.md
new file mode 100644
index 00000000000000..1c937ed7fd8fd0
--- /dev/null
+++ b/collectors/diskspace.plugin/integrations/disk_space.md
@@ -0,0 +1,140 @@
+
+
+# Disk space
+
+
+
+
+
+Plugin: diskspace.plugin
+Module: diskspace.plugin
+
+
+
+## Overview
+
+Monitor Disk space metrics for proficient storage management. Keep track of usage, free space, and error rates to prevent disk space issues.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin reads data from `/proc/self/mountinfo` and `/proc/diskstats file`.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per mount point
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| mount_point | Path used to mount a filesystem |
+| filesystem | The filesystem used to format a partition. |
+| mount_root | Root directory where mount points are present. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.space | avail, used, reserved_for_root | GiB |
+| disk.inodes | avail, used, reserved_for_root | inodes |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ disk_space_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.space | disk ${label:mount_point} space utilization |
+| [ disk_inode_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.inodes | disk ${label:mount_point} inode utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:proc:diskspace]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+You can also specify per mount point `[plugin:proc:diskspace:mountpoint]`
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| remove charts of unmounted disks | Remove chart when a device is unmounted on host. | yes | no |
+| check for new mount points every | Parse proc files frequency. | 15 | no |
+| exclude space metrics on paths | Do not show metrics (charts) for listed paths. This option accepts netdata simple pattern. | /proc/* /sys/* /var/run/user/* /run/user/* /snap/* /var/lib/docker/* | no |
+| exclude space metrics on filesystems | Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. | *gvfs *gluster* *s3fs *ipfs *davfs2 *httpfs *sshfs *gdfs *moosefs fusectl autofs | no |
+| exclude inode metrics on filesystems | Do not show metrics (charts) for listed filesystems. This option accepts netdata simple pattern. | msdosfs msdos vfat overlayfs aufs* *unionfs | no |
+| space usage for all disks | Define if plugin will show metrics for space usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. | auto | no |
+| inodes usage for all disks | Define if plugin will show metrics for inode usage. When value is set to `auto` plugin will try to access information to display if filesystem or path was not discarded with previous option. | auto | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/diskspace.plugin/plugin_diskspace.c b/collectors/diskspace.plugin/plugin_diskspace.c
index 73236a19e2e051..94257810c36b2d 100644
--- a/collectors/diskspace.plugin/plugin_diskspace.c
+++ b/collectors/diskspace.plugin/plugin_diskspace.c
@@ -9,6 +9,8 @@
#define DEFAULT_EXCLUDED_FILESYSTEMS_INODES "msdosfs msdos vfat overlayfs aufs* *unionfs"
#define CONFIG_SECTION_DISKSPACE "plugin:proc:diskspace"
+#define RRDFUNCTIONS_DISKSPACE_HELP "View mount point statistics"
+
#define MAX_STAT_USEC 10000LU
#define SLOW_UPDATE_EVERY 5
@@ -42,7 +44,12 @@ struct mount_point_metadata {
int updated;
int slow;
- DICTIONARY *chart_labels;
+ bool function_ready;
+
+ STRING *filesystem;
+ STRING *mountroot;
+
+ RRDLABELS *chart_labels;
size_t collected; // the number of times this has been collected
@@ -59,7 +66,7 @@ struct mount_point_metadata {
static DICTIONARY *dict_mountpoints = NULL;
-#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st)
+#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete___safe_from_collector_thread(st); (st) = NULL; } } while(st)
int mount_point_cleanup(const char *name, void *entry, int slow) {
(void)name;
@@ -76,10 +83,17 @@ int mount_point_cleanup(const char *name, void *entry, int slow) {
}
if(likely(cleanup_mount_points && mp->collected)) {
+ mp->function_ready = false;
mp->collected = 0;
mp->updated = 0;
mp->shown_error = 0;
+ string_freez(mp->filesystem);
+ string_freez(mp->mountroot);
+
+ rrdset_obsolete_and_pointer_null(mp->st_space);
+ rrdset_obsolete_and_pointer_null(mp->st_inodes);
+
mp->rd_space_avail = NULL;
mp->rd_space_used = NULL;
mp->rd_space_reserved = NULL;
@@ -87,9 +101,6 @@ int mount_point_cleanup(const char *name, void *entry, int slow) {
mp->rd_inodes_avail = NULL;
mp->rd_inodes_used = NULL;
mp->rd_inodes_reserved = NULL;
-
- rrdset_obsolete_and_pointer_null(mp->st_space);
- rrdset_obsolete_and_pointer_null(mp->st_inodes);
}
return 0;
@@ -214,7 +225,7 @@ static void calculate_values_and_show_charts(
m->st_space = rrdset_find_active_bytype_localhost("disk_space", disk);
if(unlikely(!m->st_space || m->st_space->update_every != update_every)) {
char title[4096 + 1];
- snprintfz(title, 4096, "Disk Space Usage");
+ snprintfz(title, sizeof(title) - 1, "Disk Space Usage");
m->st_space = rrdset_create_localhost(
"disk_space"
, disk
@@ -254,7 +265,7 @@ static void calculate_values_and_show_charts(
m->st_inodes = rrdset_find_active_bytype_localhost("disk_inodes", disk);
if(unlikely(!m->st_inodes) || m->st_inodes->update_every != update_every) {
char title[4096 + 1];
- snprintfz(title, 4096, "Disk Files (inodes) Usage");
+ snprintfz(title, sizeof(title) - 1, "Disk Files (inodes) Usage");
m->st_inodes = rrdset_create_localhost(
"disk_inodes"
, disk
@@ -286,6 +297,8 @@ static void calculate_values_and_show_charts(
rendered++;
}
+ m->function_ready = rendered > 0;
+
if(likely(rendered))
m->collected++;
}
@@ -330,22 +343,9 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) {
dict_mountpoints = dictionary_create_advanced(DICT_OPTION_NONE, &dictionary_stats_category_collectors, 0);
}
-#ifdef NETDATA_SKIP_IF_NOT_COLLECT
- if(unlikely(simple_pattern_matches(excluded_mountpoints, mi->mount_point))) {
- netdata_log_debug(D_COLLECTOR, "DISKSPACE: Skipping mount point '%s' (disk '%s', filesystem '%s', root '%s') because it is excluded by configuration.",
- mi->mount_point,
- disk,
- mi->filesystem?mi->filesystem:"",
- mi->root?mi->root:"");
- return;
- }
-#endif
-
struct mount_point_metadata *m = dictionary_get(dict_mountpoints, mi->mount_point);
if(unlikely(!m)) {
int slow = 0;
- char var_name[4096 + 1];
- snprintfz(var_name, 4096, "plugin:proc:diskspace:%s", mi->mount_point);
int def_space = config_get_boolean_ondemand(CONFIG_SECTION_DISKSPACE, "space usage for all disks", CONFIG_BOOLEAN_AUTO);
int def_inodes = config_get_boolean_ondemand(CONFIG_SECTION_DISKSPACE, "inodes usage for all disks", CONFIG_BOOLEAN_AUTO);
@@ -396,8 +396,16 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) {
slow = 1;
}
- do_space = config_get_boolean_ondemand(var_name, "space usage", def_space);
- do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", def_inodes);
+ char var_name[4096 + 1];
+ snprintfz(var_name, 4096, "plugin:proc:diskspace:%s", mi->mount_point);
+
+ do_space = def_space;
+ do_inodes = def_inodes;
+
+ if (config_exists(var_name, "space usage"))
+ do_space = config_get_boolean_ondemand(var_name, "space usage", def_space);
+ if (config_exists(var_name, "inodes usage"))
+ do_inodes = config_get_boolean_ondemand(var_name, "inodes usage", def_inodes);
struct mount_point_metadata mp = {
.do_space = do_space,
@@ -419,6 +427,9 @@ static inline void do_disk_space_stats(struct mountinfo *mi, int update_every) {
.rd_inodes_reserved = NULL
};
+ mp.filesystem = string_strdupz(mi->filesystem);
+ mp.mountroot = string_strdupz(mi->root);
+
mp.chart_labels = rrdlabels_create();
rrdlabels_add(mp.chart_labels, "mount_point", mi->mount_point, RRDLABEL_SRC_AUTO);
rrdlabels_add(mp.chart_labels, "filesystem", mi->filesystem, RRDLABEL_SRC_AUTO);
@@ -625,6 +636,228 @@ static void diskspace_main_cleanup(void *ptr) {
#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 3
#endif
+int diskspace_function_mount_points(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused,
+ void *collector_data __maybe_unused,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused,
+ void *register_canceller_cb_data __maybe_unused) {
+
+ buffer_flush(wb);
+ wb->content_type = CT_APPLICATION_JSON;
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
+
+ buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost));
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", 1);
+ buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_DISKSPACE_HELP);
+ buffer_json_member_add_array(wb, "data");
+
+ double max_space_util = 0.0;
+ double max_space_avail = 0.0;
+ double max_space_used = 0.0;
+ double max_space_reserved = 0.0;
+
+ double max_inodes_util = 0.0;
+ double max_inodes_avail = 0.0;
+ double max_inodes_used = 0.0;
+ double max_inodes_reserved = 0.0;
+
+ struct mount_point_metadata *mp;
+ dfe_start_write(dict_mountpoints, mp) {
+ if (!mp->function_ready)
+ continue;
+
+ buffer_json_add_array_item_array(wb);
+
+ buffer_json_add_array_item_string(wb, mp_dfe.name);
+ buffer_json_add_array_item_string(wb, string2str(mp->filesystem));
+ buffer_json_add_array_item_string(wb, string2str(mp->mountroot));
+
+ double space_avail = rrddim_get_last_stored_value(mp->rd_space_avail, &max_space_avail, 1.0);
+ double space_used = rrddim_get_last_stored_value(mp->rd_space_used, &max_space_used, 1.0);
+ double space_reserved = rrddim_get_last_stored_value(mp->rd_space_reserved, &max_space_reserved, 1.0);
+ double inodes_avail = rrddim_get_last_stored_value(mp->rd_inodes_avail, &max_inodes_avail, 1.0);
+ double inodes_used = rrddim_get_last_stored_value(mp->rd_inodes_used, &max_inodes_used, 1.0);
+ double inodes_reserved = rrddim_get_last_stored_value(mp->rd_inodes_reserved, &max_inodes_reserved, 1.0);
+
+ double space_util = NAN;
+ if (!isnan(space_avail) && !isnan(space_used)) {
+ space_util = space_avail + space_used > 0 ? space_used * 100.0 / (space_avail + space_used) : 0;
+ max_space_util = MAX(max_space_util, space_util);
+ }
+ double inodes_util = NAN;
+ if (!isnan(inodes_avail) && !isnan(inodes_used)) {
+ inodes_util = inodes_avail + inodes_used > 0 ? inodes_used * 100.0 / (inodes_avail + inodes_used) : 0;
+ max_inodes_util = MAX(max_inodes_util, inodes_util);
+ }
+
+ buffer_json_add_array_item_double(wb, space_util);
+ buffer_json_add_array_item_double(wb, space_avail);
+ buffer_json_add_array_item_double(wb, space_used);
+ buffer_json_add_array_item_double(wb, space_reserved);
+
+ buffer_json_add_array_item_double(wb, inodes_util);
+ buffer_json_add_array_item_double(wb, inodes_avail);
+ buffer_json_add_array_item_double(wb, inodes_used);
+ buffer_json_add_array_item_double(wb, inodes_reserved);
+
+ buffer_json_array_close(wb);
+ }
+ dfe_done(mp);
+
+ buffer_json_array_close(wb); // data
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Mountpoint", "Mountpoint Name",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Filesystem", "Mountpoint Filesystem",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Root", "Mountpoint Root",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Used%", "Space Utilization",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "%", max_space_util, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Avail", "Space Avail",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "GiB", max_space_avail, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Used", "Space Used",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "GiB", max_space_used, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Reserved", "Space Reserved for root",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "GiB", max_space_reserved, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "iUsed%", "Inodes Utilization",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "%", max_inodes_util, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "iAvail", "Inodes Avail",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "inodes", max_inodes_avail, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "iUsed", "Inodes Used",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "inodes", max_inodes_used, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "iReserved", "Inodes Reserved for root",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "inodes", max_inodes_reserved, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+ }
+
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "Used%");
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "Utilization");
+ {
+ buffer_json_member_add_string(wb, "name", "Utilization");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Used%");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Usage");
+ {
+ buffer_json_member_add_string(wb, "name", "Usage");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Avail");
+ buffer_json_add_array_item_string(wb, "Used");
+ buffer_json_add_array_item_string(wb, "Reserved");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Inodes");
+ {
+ buffer_json_member_add_string(wb, "name", "Inodes");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "iAvail");
+ buffer_json_add_array_item_string(wb, "iUsed");
+ buffer_json_add_array_item_string(wb, "iReserved");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Utilization");
+ buffer_json_add_array_item_string(wb, "Mountpoint");
+ buffer_json_array_close(wb);
+
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Usage");
+ buffer_json_add_array_item_string(wb, "Mountpoint");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ int response = HTTP_RESP_OK;
+ if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) {
+ buffer_flush(wb);
+ response = HTTP_RESP_CLIENT_CLOSED_REQUEST;
+ }
+
+ if(result_cb)
+ result_cb(wb, response, result_cb_data);
+
+ return response;
+}
+
void *diskspace_main(void *ptr) {
worker_register("DISKSPACE");
worker_register_job_name(WORKER_JOB_MOUNTINFO, "mountinfo");
@@ -632,6 +865,7 @@ void *diskspace_main(void *ptr) {
worker_register_job_name(WORKER_JOB_CLEANUP, "cleanup");
rrd_collector_started();
+ rrd_function_add(localhost, NULL, "mount-points", 10, RRDFUNCTIONS_DISKSPACE_HELP, true, diskspace_function_mount_points, NULL);
netdata_thread_cleanup_push(diskspace_main_cleanup, ptr);
diff --git a/collectors/ebpf.plugin/README.md b/collectors/ebpf.plugin/README.md
index fb036a5aa3939f..06915ea5218ea2 100644
--- a/collectors/ebpf.plugin/README.md
+++ b/collectors/ebpf.plugin/README.md
@@ -261,7 +261,7 @@ You can also enable the following eBPF programs:
- `swap` : This eBPF program creates charts that show information about swap access.
- `mdflush`: This eBPF program creates charts that show information about
- `sync`: Monitor calls to syscalls sync(2), fsync(2), fdatasync(2), syncfs(2), msync(2), and sync_file_range(2).
-- `network viewer`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the
+- `socket`: This eBPF program creates charts with information about `TCP` and `UDP` functions, including the
bandwidth consumed by each.
multi-device software flushes.
- `vfs`: This eBPF program creates charts that show information about VFS (Virtual File System) functions.
@@ -302,12 +302,13 @@ are divided in the following sections:
#### `[network connections]`
-You can configure the information shown on `outbound` and `inbound` charts with the settings in this section.
+You can configure the information shown with function `ebpf_socket` using the settings in this section.
```conf
[network connections]
- maximum dimensions = 500
+ enabled = yes
resolve hostname ips = no
+ resolve service names = yes
ports = 1-1024 !145 !domain
hostnames = !example.com
ips = !127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7
@@ -318,24 +319,23 @@ write `ports = 19999`, Netdata will collect only connections for itself. The `ho
[simple patterns](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md). The `ports`, and `ips` settings accept negation (`!`) to deny
specific values or asterisk alone to define all values.
-In the above example, Netdata will collect metrics for all ports between 1 and 443, with the exception of 53 (domain)
-and 145.
+In the above example, Netdata will collect metrics for all ports between `1` and `1024`, with the exception of `53` (domain)
+and `145`.
The following options are available:
+- `enabled`: Disable network connections monitoring. This can affect directly some funcion output.
+- `resolve hostname ips`: Enable resolving IPs to hostnames. It is disabled by default because it can be too slow.
+- `resolve service names`: Convert destination ports into service names, for example, port `53` protocol `UDP` becomes `domain`.
+ all names are read from /etc/services.
- `ports`: Define the destination ports for Netdata to monitor.
- `hostnames`: The list of hostnames that can be resolved to an IP address.
- `ips`: The IP or range of IPs that you want to monitor. You can use IPv4 or IPv6 addresses, use dashes to define a
- range of IPs, or use CIDR values. By default, only data for private IP addresses is collected, but this can
- be changed with the `ips` setting.
+ range of IPs, or use CIDR values.
-By default, Netdata displays up to 500 dimensions on network connection charts. If there are more possible dimensions,
-they will be bundled into the `other` dimension. You can increase the number of shown dimensions by changing
-the `maximum dimensions` setting.
-
-The dimensions for the traffic charts are created using the destination IPs of the sockets by default. This can be
-changed setting `resolve hostname ips = yes` and restarting Netdata, after this Netdata will create dimensions using
-the `hostnames` every time that is possible to resolve IPs to their hostnames.
+By default the traffic table is created using the destination IPs and ports of the sockets. This can be
+changed, so that Netdata uses service names (if possible), by specifying `resolve service name = yes` in the configuration
+section.
#### `[service name]`
@@ -990,13 +990,15 @@ shows how the lockdown module impacts `ebpf.plugin` based on the selected option
If you or your distribution compiled the kernel with the last combination, your system cannot load shared libraries
required to run `ebpf.plugin`.
-## Function
+## Functions
+
+### ebpf_thread
The eBPF plugin has a [function](https://github.com/netdata/netdata/blob/master/docs/cloud/netdata-functions.md) named
`ebpf_thread` that controls its internal threads and helps to reduce the overhead on host. Using the function you
can run the plugin with all threads disabled and enable them only when you want to take a look in specific areas.
-### List threads
+#### List threads
To list all threads status you can query directly the endpoint function:
@@ -1006,7 +1008,7 @@ It is also possible to query a specific thread adding keyword `thread` and threa
`http://localhost:19999/api/v1/function?function=ebpf_thread%20thread:mount`
-### Enable thread
+#### Enable thread
It is possible to enable a specific thread using the keyword `enable`:
@@ -1019,14 +1021,14 @@ after the thread name:
in this example thread `mount` will run during 600 seconds (10 minutes).
-### Disable thread
+#### Disable thread
It is also possible to stop any thread running using the keyword `disable`. For example, to disable `cachestat` you can
request:
`http://localhost:19999/api/v1/function?function=ebpf_thread%20disable:cachestat`
-### Debugging threads
+#### Debugging threads
You can verify the impact of threads on the host by running the
[ebpf_thread_function.sh](https://github.com/netdata/netdata/blob/master/tests/ebpf/ebpf_thread_function.sh)
@@ -1036,3 +1038,34 @@ You can check the results of having threads running on your environment in the N
dashboard
+
+### ebpf_socket
+
+The eBPF plugin has a [function](https://github.com/netdata/netdata/blob/master/docs/cloud/netdata-functions.md) named
+`ebpf_socket` that shows the current status of open sockets on host.
+
+#### Families
+
+The plugin shows by default sockets for IPV4 and IPV6, but it is possible to select a specific family by passing the
+family as an argument:
+
+`http://localhost:19999/api/v1/function?function=ebpf_socket%20family:IPV4`
+
+#### Resolve
+
+The plugin resolves ports to service names by default. You can show the port number by disabling the name resolution:
+
+`http://localhost:19999/api/v1/function?function=ebpf_socket%20resolve:NO`
+
+#### CIDR
+
+The plugin shows connections for all possible destination IPs by default. You can limit the range by specifying the CIDR:
+
+`http://localhost:19999/api/v1/function?function=ebpf_socket%20cidr:192.168.1.0/24`
+
+#### PORT
+
+The plugin shows connections for all possible ports by default. You can limit the range by specifying a port or range
+of ports:
+
+`http://localhost:19999/api/v1/function?function=ebpf_socket%20port:1-1024`
diff --git a/collectors/ebpf.plugin/ebpf.c b/collectors/ebpf.plugin/ebpf.c
index 844047305c9a5e..381bf5718cf407 100644
--- a/collectors/ebpf.plugin/ebpf.c
+++ b/collectors/ebpf.plugin/ebpf.c
@@ -49,176 +49,258 @@ struct netdata_static_thread cgroup_integration_thread = {
};
ebpf_module_t ebpf_modules[] = {
- { .thread_name = "process", .config_name = "process", .thread_description = NETDATA_EBPF_MODULE_PROCESS_DESC,
- .enabled = 0, .start_routine = ebpf_process_thread,
+ { .info = {.thread_name = "process",
+ .config_name = "process",
+ .thread_description = NETDATA_EBPF_MODULE_PROCESS_DESC},
+ .functions = {.start_routine = ebpf_process_thread,
+ .apps_routine = ebpf_process_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_process_create_apps_charts, .maps = NULL,
- .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &process_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &process_config,
.config_file = NETDATA_PROCESS_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10 |
NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0 },
- { .thread_name = "socket", .config_name = "socket", .thread_description = NETDATA_EBPF_SOCKET_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_socket_thread,
+ { .info = {.thread_name = "socket",
+ .config_name = "socket",
+ .thread_description = NETDATA_EBPF_SOCKET_MODULE_DESC},
+ .functions = {.start_routine = ebpf_socket_thread,
+ .apps_routine = ebpf_socket_create_apps_charts,
+ .fnct_routine = ebpf_socket_read_open_connections,
+ .fcnt_name = EBPF_FUNCTION_SOCKET,
+ .fcnt_desc = EBPF_PLUGIN_SOCKET_FUNCTION_DESCRIPTION,
+ .fcnt_thread_chart_name = NULL,
+ .fcnt_thread_lifetime_name = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_socket_create_apps_charts, .maps = NULL,
+ .maps = NULL,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &socket_config,
.config_file = NETDATA_NETWORK_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = socket_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "cachestat", .config_name = "cachestat", .thread_description = NETDATA_EBPF_CACHESTAT_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_cachestat_thread,
+ { .info = {.thread_name = "cachestat", .config_name = "cachestat", .thread_description = NETDATA_EBPF_CACHESTAT_MODULE_DESC},
+ .functions = {.start_routine = ebpf_cachestat_thread,
+ .apps_routine = ebpf_cachestat_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_cachestat_create_apps_charts, .maps = cachestat_maps,
- .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &cachestat_config,
+ .maps = cachestat_maps, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &cachestat_config,
.config_file = NETDATA_CACHESTAT_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18|
NETDATA_V5_4 | NETDATA_V5_14 | NETDATA_V5_15 | NETDATA_V5_16,
.load = EBPF_LOAD_LEGACY, .targets = cachestat_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "sync", .config_name = "sync", .thread_description = NETDATA_EBPF_SYNC_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_sync_thread,
+ { .info = {.thread_name = "sync",
+ .config_name = "sync",
+ .thread_description = NETDATA_EBPF_SYNC_MODULE_DESC},
+ .functions = {.start_routine = ebpf_sync_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING, .maps = NULL,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config,
+ .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &sync_config,
.config_file = NETDATA_SYNC_CONFIG_FILE,
// All syscalls have the same kernels
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = sync_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "dc", .config_name = "dc", .thread_description = NETDATA_EBPF_DC_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_dcstat_thread,
+ { .info = {.thread_name = "dc",
+ .config_name = "dc",
+ .thread_description = NETDATA_EBPF_DC_MODULE_DESC},
+ .functions = {.start_routine = ebpf_dcstat_thread,
+ .apps_routine = ebpf_dcstat_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_dcstat_create_apps_charts, .maps = dcstat_maps,
+ .maps = dcstat_maps,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &dcstat_config,
.config_file = NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = dc_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "swap", .config_name = "swap", .thread_description = NETDATA_EBPF_SWAP_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_swap_thread,
+ { .info = {.thread_name = "swap", .config_name = "swap", .thread_description = NETDATA_EBPF_SWAP_MODULE_DESC},
+ .functions = {.start_routine = ebpf_swap_thread,
+ .apps_routine = ebpf_swap_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_swap_create_apps_charts, .maps = NULL,
+ .maps = NULL,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &swap_config,
.config_file = NETDATA_DIRECTORY_SWAP_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = swap_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "vfs", .config_name = "vfs", .thread_description = NETDATA_EBPF_VFS_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_vfs_thread,
+ { .info = {.thread_name = "vfs",
+ .config_name = "vfs",
+ .thread_description = NETDATA_EBPF_VFS_MODULE_DESC},
+ .functions = {.start_routine = ebpf_vfs_thread,
+ .apps_routine = ebpf_vfs_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_vfs_create_apps_charts, .maps = NULL,
+ .maps = NULL,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &vfs_config,
.config_file = NETDATA_DIRECTORY_VFS_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = vfs_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "filesystem", .config_name = "filesystem", .thread_description = NETDATA_EBPF_FS_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_filesystem_thread,
+ { .info = {.thread_name = "filesystem", .config_name = "filesystem", .thread_description = NETDATA_EBPF_FS_MODULE_DESC},
+ .functions = {.start_routine = ebpf_filesystem_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fs_config,
.config_file = NETDATA_FILESYSTEM_CONFIG_FILE,
//We are setting kernels as zero, because we load eBPF programs according the kernel running.
.kernels = 0, .load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "disk", .config_name = "disk", .thread_description = NETDATA_EBPF_DISK_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_disk_thread,
+ { .info = {.thread_name = "disk",
+ .config_name = "disk",
+ .thread_description = NETDATA_EBPF_DISK_MODULE_DESC},
+ .functions = {.start_routine = ebpf_disk_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &disk_config,
.config_file = NETDATA_DISK_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "mount", .config_name = "mount", .thread_description = NETDATA_EBPF_MOUNT_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_mount_thread,
+ { .info = {.thread_name = "mount",
+ .config_name = "mount",
+ .thread_description = NETDATA_EBPF_MOUNT_MODULE_DESC},
+ .functions = {.start_routine = ebpf_mount_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mount_config,
.config_file = NETDATA_MOUNT_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = mount_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "fd", .config_name = "fd", .thread_description = NETDATA_EBPF_FD_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_fd_thread,
+ { .info = { .thread_name = "fd",
+ .config_name = "fd",
+ .thread_description = NETDATA_EBPF_FD_MODULE_DESC},
+ .functions = {.start_routine = ebpf_fd_thread,
+ .apps_routine = ebpf_fd_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_fd_create_apps_charts, .maps = NULL,
+ .maps = NULL,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &fd_config,
.config_file = NETDATA_FD_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_11 |
NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = fd_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "hardirq", .config_name = "hardirq", .thread_description = NETDATA_EBPF_HARDIRQ_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_hardirq_thread,
+ { .info = { .thread_name = "hardirq",
+ .config_name = "hardirq",
+ .thread_description = NETDATA_EBPF_HARDIRQ_MODULE_DESC},
+ .functions = {.start_routine = ebpf_hardirq_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &hardirq_config,
.config_file = NETDATA_HARDIRQ_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "softirq", .config_name = "softirq", .thread_description = NETDATA_EBPF_SOFTIRQ_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_softirq_thread,
+ { .info = { .thread_name = "softirq",
+ .config_name = "softirq",
+ .thread_description = NETDATA_EBPF_SOFTIRQ_MODULE_DESC},
+ .functions = {.start_routine = ebpf_softirq_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL },
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &softirq_config,
.config_file = NETDATA_SOFTIRQ_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "oomkill", .config_name = "oomkill", .thread_description = NETDATA_EBPF_OOMKILL_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_oomkill_thread,
+ { .info = {.thread_name = "oomkill",
+ .config_name = "oomkill",
+ .thread_description = NETDATA_EBPF_OOMKILL_MODULE_DESC},
+ .functions = {.start_routine = ebpf_oomkill_thread,
+ .apps_routine = ebpf_oomkill_create_apps_charts,
+ .fnct_routine = NULL},.enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_oomkill_create_apps_charts, .maps = NULL,
+ .maps = NULL,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &oomkill_config,
.config_file = NETDATA_OOMKILL_CONFIG_FILE,
.kernels = NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "shm", .config_name = "shm", .thread_description = NETDATA_EBPF_SHM_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_shm_thread,
+ { .info = {.thread_name = "shm",
+ .config_name = "shm",
+ .thread_description = NETDATA_EBPF_SHM_MODULE_DESC},
+ .functions = {.start_routine = ebpf_shm_thread,
+ .apps_routine = ebpf_shm_create_apps_charts,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_LEVEL_REAL_PARENT, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = ebpf_shm_create_apps_charts, .maps = NULL,
+ .maps = NULL,
.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &shm_config,
.config_file = NETDATA_DIRECTORY_SHM_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = shm_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "mdflush", .config_name = "mdflush", .thread_description = NETDATA_EBPF_MD_MODULE_DESC,
- .enabled = 0, .start_routine = ebpf_mdflush_thread,
+ { .info = { .thread_name = "mdflush",
+ .config_name = "mdflush",
+ .thread_description = NETDATA_EBPF_MD_MODULE_DESC},
+ .functions = {.start_routine = ebpf_mdflush_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = &mdflush_config,
.config_file = NETDATA_DIRECTORY_MDFLUSH_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = mdflush_targets, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = "functions", .config_name = "functions", .thread_description = NETDATA_EBPF_FUNCTIONS_MODULE_DESC,
- .enabled = 1, .start_routine = ebpf_function_thread,
+ { .info = { .thread_name = "functions",
+ .config_name = "functions",
+ .thread_description = NETDATA_EBPF_FUNCTIONS_MODULE_DESC},
+ .functions = {.start_routine = ebpf_function_thread,
+ .apps_routine = NULL,
+ .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_RUNNING,
.update_every = EBPF_DEFAULT_UPDATE_EVERY, .global_charts = 1, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO,
.apps_level = NETDATA_APPS_NOT_SET, .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0,
- .apps_routine = NULL, .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = NULL,
+ .maps = NULL, .pid_map_size = ND_EBPF_DEFAULT_PID_SIZE, .names = NULL, .cfg = NULL,
.config_file = NETDATA_DIRECTORY_FUNCTIONS_CONFIG_FILE,
.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_14,
.load = EBPF_LOAD_LEGACY, .targets = NULL, .probe_links = NULL, .objects = NULL,
.thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES, .lifetime = EBPF_DEFAULT_LIFETIME, .running_time = 0},
- { .thread_name = NULL, .enabled = 0, .start_routine = NULL, .update_every = EBPF_DEFAULT_UPDATE_EVERY,
+ { .info = {.thread_name = NULL, .config_name = NULL},
+ .functions = {.start_routine = NULL, .apps_routine = NULL, .fnct_routine = NULL},
+ .enabled = NETDATA_THREAD_EBPF_NOT_RUNNING, .update_every = EBPF_DEFAULT_UPDATE_EVERY,
.global_charts = 0, .apps_charts = NETDATA_EBPF_APPS_FLAG_NO, .apps_level = NETDATA_APPS_NOT_SET,
- .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .apps_routine = NULL, .maps = NULL,
- .pid_map_size = 0, .names = NULL, .cfg = NULL, .config_name = NULL, .kernels = 0, .load = EBPF_LOAD_LEGACY,
+ .cgroup_charts = CONFIG_BOOLEAN_NO, .mode = MODE_ENTRY, .optional = 0, .maps = NULL,
+ .pid_map_size = 0, .names = NULL, .cfg = NULL, .kernels = 0, .load = EBPF_LOAD_LEGACY,
.targets = NULL, .probe_links = NULL, .objects = NULL, .thread = NULL, .maps_per_core = CONFIG_BOOLEAN_YES},
};
@@ -559,6 +641,8 @@ ebpf_network_viewer_options_t network_viewer_opt;
ebpf_plugin_stats_t plugin_statistics = {.core = 0, .legacy = 0, .running = 0, .threads = 0, .tracepoints = 0,
.probes = 0, .retprobes = 0, .trampolines = 0, .memlock_kern = 0,
.hash_tables = 0};
+netdata_ebpf_judy_pid_t ebpf_judy_pid = {.pid_table = NULL, .index = {.JudyLArray = NULL}};
+bool ebpf_plugin_exit = false;
#ifdef LIBBPF_MAJOR_VERSION
struct btf *default_btf = NULL;
@@ -578,6 +662,61 @@ void *default_btf = NULL;
#endif
char *btf_path = NULL;
+/*****************************************************************
+ *
+ * FUNCTIONS USED TO MANIPULATE JUDY ARRAY
+ *
+ *****************************************************************/
+
+/**
+ * Hashtable insert unsafe
+ *
+ * Find or create a value associated to the index
+ *
+ * @return The lsocket = 0 when new item added to the array otherwise the existing item value is returned in *lsocket
+ * we return a pointer to a pointer, so that the caller can put anything needed at the value of the index.
+ * The pointer to pointer we return has to be used before any other operation that may change the index (insert/delete).
+ *
+ */
+void **ebpf_judy_insert_unsafe(PPvoid_t arr, Word_t key)
+{
+ JError_t J_Error;
+ Pvoid_t *idx = JudyLIns(arr, key, &J_Error);
+ if (unlikely(idx == PJERR)) {
+ netdata_log_error("Cannot add PID to JudyL, JU_ERRNO_* == %u, ID == %d",
+ JU_ERRNO(&J_Error), JU_ERRID(&J_Error));
+ }
+
+ return idx;
+}
+
+/**
+ * Get PID from judy
+ *
+ * Get a pointer for the `pid` from judy_array;
+ *
+ * @param judy_array a judy array where PID is the primary key
+ * @param pid pid stored.
+ */
+netdata_ebpf_judy_pid_stats_t *ebpf_get_pid_from_judy_unsafe(PPvoid_t judy_array, uint32_t pid)
+{
+ netdata_ebpf_judy_pid_stats_t **pid_pptr =
+ (netdata_ebpf_judy_pid_stats_t **)ebpf_judy_insert_unsafe(judy_array, pid);
+ netdata_ebpf_judy_pid_stats_t *pid_ptr = *pid_pptr;
+ if (likely(*pid_pptr == NULL)) {
+ // a new PID added to the index
+ *pid_pptr = aral_mallocz(ebpf_judy_pid.pid_table);
+
+ pid_ptr = *pid_pptr;
+
+ pid_ptr->cmdline = NULL;
+ pid_ptr->socket_stats.JudyLArray = NULL;
+ rw_spinlock_init(&pid_ptr->socket_stats.rw_spinlock);
+ }
+
+ return pid_ptr;
+}
+
/*****************************************************************
*
* FUNCTIONS USED TO ALLOCATE APPS/CGROUP MEMORIES (ARAL)
@@ -626,7 +765,7 @@ static inline void ebpf_check_before2go()
i = 0;
int j;
pthread_mutex_lock(&ebpf_exit_cleanup);
- for (j = 0; ebpf_modules[j].thread_name != NULL; j++) {
+ for (j = 0; ebpf_modules[j].info.thread_name != NULL; j++) {
if (ebpf_modules[j].enabled < NETDATA_THREAD_EBPF_STOPPING)
i++;
}
@@ -704,14 +843,15 @@ void ebpf_unload_legacy_code(struct bpf_object *objects, struct bpf_link **probe
static void ebpf_unload_unique_maps()
{
int i;
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
// These threads are cleaned with other functions
if (i != EBPF_MODULE_SOCKET_IDX)
continue;
if (ebpf_modules[i].enabled != NETDATA_THREAD_EBPF_STOPPED) {
if (ebpf_modules[i].enabled != NETDATA_THREAD_EBPF_NOT_RUNNING)
- netdata_log_error("Cannot unload maps for thread %s, because it is not stopped.", ebpf_modules[i].thread_name);
+ netdata_log_error("Cannot unload maps for thread %s, because it is not stopped.",
+ ebpf_modules[i].info.thread_name);
continue;
}
@@ -775,13 +915,12 @@ static void ebpf_unload_sync()
}
}
-int ebpf_exit_plugin = 0;
/**
* Close the collector gracefully
*
* @param sig is the signal number used to close the collector
*/
-static void ebpf_stop_threads(int sig)
+void ebpf_stop_threads(int sig)
{
UNUSED(sig);
static int only_one = 0;
@@ -794,11 +933,11 @@ static void ebpf_stop_threads(int sig)
}
only_one = 1;
int i;
- for (i = 0; ebpf_modules[i].thread_name != NULL; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name != NULL; i++) {
if (ebpf_modules[i].enabled < NETDATA_THREAD_EBPF_STOPPING) {
netdata_thread_cancel(*ebpf_modules[i].thread->thread);
#ifdef NETDATA_DEV_MODE
- netdata_log_info("Sending cancel for thread %s", ebpf_modules[i].thread_name);
+ netdata_log_info("Sending cancel for thread %s", ebpf_modules[i].info.thread_name);
#endif
}
}
@@ -811,7 +950,7 @@ static void ebpf_stop_threads(int sig)
#endif
pthread_mutex_unlock(&mutex_cgroup_shm);
- ebpf_exit_plugin = 1;
+ ebpf_plugin_exit = true;
ebpf_check_before2go();
@@ -839,8 +978,8 @@ static void ebpf_stop_threads(int sig)
* @param root a pointer for the targets.
*/
static inline void ebpf_create_apps_for_module(ebpf_module_t *em, struct ebpf_target *root) {
- if (em->enabled < NETDATA_THREAD_EBPF_STOPPING && em->apps_charts && em->apps_routine)
- em->apps_routine(em, root);
+ if (em->enabled < NETDATA_THREAD_EBPF_STOPPING && em->apps_charts && em->functions.apps_routine)
+ em->functions.apps_routine(em, root);
}
/**
@@ -920,25 +1059,6 @@ collected_number get_value_from_structure(char *basis, size_t offset)
return ret;
}
-/**
- * Write begin command on standard output
- *
- * @param family the chart family name
- * @param name the chart name
- */
-void write_begin_chart(char *family, char *name)
-{
- printf("BEGIN %s.%s\n", family, name);
-}
-
-/**
- * Write END command on stdout.
- */
-inline void write_end_chart()
-{
- printf("END\n");
-}
-
/**
* Write set command on standard output
*
@@ -962,7 +1082,7 @@ void write_chart_dimension(char *dim, long long value)
*/
void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end)
{
- write_begin_chart(family, name);
+ ebpf_write_begin_chart(family, name, "");
uint32_t i = 0;
while (move && i < end) {
@@ -972,7 +1092,7 @@ void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move
i++;
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -985,7 +1105,7 @@ void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move
*/
void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move, int end)
{
- write_begin_chart(family, name);
+ ebpf_write_begin_chart(family, name, "");
int i = 0;
while (move && i < end) {
@@ -995,7 +1115,7 @@ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move,
i++;
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1010,11 +1130,11 @@ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move,
*/
void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1)
{
- write_begin_chart(family, chart);
+ ebpf_write_begin_chart(family, chart, "");
write_chart_dimension(dim, v1);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1031,19 +1151,20 @@ void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long
*/
void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, char *dread, long long vread)
{
- write_begin_chart(family, chart);
+ ebpf_write_begin_chart(family, chart, "");
write_chart_dimension(dwrite, vwrite);
write_chart_dimension(dread, vread);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
* Write chart cmd on standard output
*
* @param type chart type
- * @param id chart id
+ * @param id chart id (the apps group name).
+ * @param suffix suffix to differentiate charts
* @param title chart title
* @param units units label
* @param family group name used to attach the chart on dashboard
@@ -1053,12 +1174,13 @@ void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite, c
* @param update_every update interval used by plugin
* @param module chart module name, this is the eBPF thread.
*/
-void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *family,
+void ebpf_write_chart_cmd(char *type, char *id, char *suffix, char *title, char *units, char *family,
char *charttype, char *context, int order, int update_every, char *module)
{
- printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d '' 'ebpf.plugin' '%s'\n",
+ printf("CHART %s.%s%s '' '%s' '%s' '%s' '%s' '%s' %d %d '' 'ebpf.plugin' '%s'\n",
type,
id,
+ suffix,
title,
units,
(family)?family:"",
@@ -1074,6 +1196,7 @@ void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *
*
* @param type chart type
* @param id chart id
+ * @param suffix add suffix to obsolete charts.
* @param title chart title
* @param units units label
* @param family group name used to attach the chart on dashboard
@@ -1082,12 +1205,13 @@ void ebpf_write_chart_cmd(char *type, char *id, char *title, char *units, char *
* @param order chart order
* @param update_every value to overwrite the update frequency set by the server.
*/
-void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family,
+void ebpf_write_chart_obsolete(char *type, char *id, char *suffix, char *title, char *units, char *family,
char *charttype, char *context, int order, int update_every)
{
- printf("CHART %s.%s '' '%s' '%s' '%s' '%s' '%s' %d %d 'obsolete'\n",
+ printf("CHART %s.%s%s '' '%s' '%s' '%s' '%s' '%s' %d %d 'obsolete'\n",
type,
id,
+ suffix,
title,
units,
(family)?family:"",
@@ -1159,40 +1283,13 @@ void ebpf_create_chart(char *type,
int update_every,
char *module)
{
- ebpf_write_chart_cmd(type, id, title, units, family, charttype, context, order, update_every, module);
+ ebpf_write_chart_cmd(type, id, "", title, units, family, charttype, context, order, update_every, module);
if (ncd) {
ncd(move, end);
}
}
-/**
- * Create charts on apps submenu
- *
- * @param id the chart id
- * @param title the value displayed on vertical axis.
- * @param units the value displayed on vertical axis.
- * @param family Submenu that the chart will be attached on dashboard.
- * @param charttype chart type
- * @param order the chart order
- * @param algorithm the algorithm used by dimension
- * @param root structure used to create the dimensions.
- * @param update_every update interval used by plugin
- * @param module chart module name, this is the eBPF thread.
- */
-void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family, char *charttype, int order,
- char *algorithm, struct ebpf_target *root, int update_every, char *module)
-{
- struct ebpf_target *w;
- ebpf_write_chart_cmd(NETDATA_APPS_FAMILY, id, title, units, family, charttype, NULL, order,
- update_every, module);
-
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed))
- fprintf(stdout, "DIMENSION %s '' %s 1 1\n", w->name, algorithm);
- }
-}
-
/**
* Call the necessary functions to create a name.
*
@@ -1206,14 +1303,14 @@ void ebpf_create_charts_on_apps(char *id, char *title, char *units, char *family
*/
void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end)
{
- write_begin_chart(family, name);
+ ebpf_write_begin_chart(family, name, "");
uint32_t i;
for (i = 0; i < end; i++) {
write_chart_dimension(dimensions[i], (long long) hist[i]);
}
- write_end_chart();
+ ebpf_write_end_chart();
fflush(stdout);
}
@@ -1238,6 +1335,7 @@ int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em)
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
em->memory_usage,
+ "",
"Bytes allocated for ARAL.",
"bytes",
NETDATA_EBPF_FAMILY,
@@ -1253,6 +1351,7 @@ int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em)
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
em->memory_allocations,
+ "",
"Calls to allocate memory.",
"calls",
NETDATA_EBPF_FAMILY,
@@ -1282,6 +1381,7 @@ void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio)
{
ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY,
em->memory_allocations,
+ "",
"Calls to allocate memory.",
"calls",
NETDATA_EBPF_FAMILY,
@@ -1292,6 +1392,7 @@ void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio)
ebpf_write_chart_obsolete(NETDATA_MONITORING_FAMILY,
em->memory_allocations,
+ "",
"Calls to allocate memory.",
"calls",
NETDATA_EBPF_FAMILY,
@@ -1316,13 +1417,13 @@ void ebpf_send_data_aral_chart(ARAL *memory, ebpf_module_t *em)
struct aral_statistics *stats = aral_statistics(memory);
- write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_usage);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_usage, "");
write_chart_dimension(mem, (long long)stats->structures.allocated_bytes);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_allocations);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, em->memory_allocations, "");
write_chart_dimension(aral, (long long)stats->structures.allocations);
- write_end_chart();
+ ebpf_write_end_chart();
}
/*****************************************************************
@@ -1368,6 +1469,607 @@ void ebpf_read_global_table_stats(netdata_idx_t *stats,
}
}
+/*****************************************************************
+ *
+ * FUNCTIONS USED WITH SOCKET
+ *
+ *****************************************************************/
+
+/**
+ * Netmask
+ *
+ * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h)
+ *
+ * @param prefix create the netmask based in the CIDR value.
+ *
+ * @return
+ */
+static inline in_addr_t ebpf_netmask(int prefix) {
+
+ if (prefix == 0)
+ return (~((in_addr_t) - 1));
+ else
+ return (in_addr_t)(~((1 << (32 - prefix)) - 1));
+
+}
+
+/**
+ * Broadcast
+ *
+ * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h)
+ *
+ * @param addr is the ip address
+ * @param prefix is the CIDR value.
+ *
+ * @return It returns the last address of the range
+ */
+static inline in_addr_t ebpf_broadcast(in_addr_t addr, int prefix)
+{
+ return (addr | ~ebpf_netmask(prefix));
+}
+
+/**
+ * Network
+ *
+ * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h)
+ *
+ * @param addr is the ip address
+ * @param prefix is the CIDR value.
+ *
+ * @return It returns the first address of the range.
+ */
+static inline in_addr_t ebpf_ipv4_network(in_addr_t addr, int prefix)
+{
+ return (addr & ebpf_netmask(prefix));
+}
+
+/**
+ * Calculate ipv6 first address
+ *
+ * @param out the address to store the first address.
+ * @param in the address used to do the math.
+ * @param prefix number of bits used to calculate the address
+ */
+static void get_ipv6_first_addr(union netdata_ip_t *out, union netdata_ip_t *in, uint64_t prefix)
+{
+ uint64_t mask,tmp;
+ uint64_t ret[2];
+
+ memcpy(ret, in->addr32, sizeof(union netdata_ip_t));
+
+ if (prefix == 128) {
+ memcpy(out->addr32, in->addr32, sizeof(union netdata_ip_t));
+ return;
+ } else if (!prefix) {
+ ret[0] = ret[1] = 0;
+ memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
+ return;
+ } else if (prefix <= 64) {
+ ret[1] = 0ULL;
+
+ tmp = be64toh(ret[0]);
+ mask = 0xFFFFFFFFFFFFFFFFULL << (64 - prefix);
+ tmp &= mask;
+ ret[0] = htobe64(tmp);
+ } else {
+ mask = 0xFFFFFFFFFFFFFFFFULL << (128 - prefix);
+ tmp = be64toh(ret[1]);
+ tmp &= mask;
+ ret[1] = htobe64(tmp);
+ }
+
+ memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
+}
+
+/**
+ * Get IPV6 Last Address
+ *
+ * @param out the address to store the last address.
+ * @param in the address used to do the math.
+ * @param prefix number of bits used to calculate the address
+ */
+static void get_ipv6_last_addr(union netdata_ip_t *out, union netdata_ip_t *in, uint64_t prefix)
+{
+ uint64_t mask,tmp;
+ uint64_t ret[2];
+ memcpy(ret, in->addr32, sizeof(union netdata_ip_t));
+
+ if (prefix == 128) {
+ memcpy(out->addr32, in->addr32, sizeof(union netdata_ip_t));
+ return;
+ } else if (!prefix) {
+ ret[0] = ret[1] = 0xFFFFFFFFFFFFFFFF;
+ memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
+ return;
+ } else if (prefix <= 64) {
+ ret[1] = 0xFFFFFFFFFFFFFFFFULL;
+
+ tmp = be64toh(ret[0]);
+ mask = 0xFFFFFFFFFFFFFFFFULL << (64 - prefix);
+ tmp |= ~mask;
+ ret[0] = htobe64(tmp);
+ } else {
+ mask = 0xFFFFFFFFFFFFFFFFULL << (128 - prefix);
+ tmp = be64toh(ret[1]);
+ tmp |= ~mask;
+ ret[1] = htobe64(tmp);
+ }
+
+ memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
+}
+
+/**
+ * IP to network long
+ *
+ * @param dst the vector to store the result
+ * @param ip the source ip given by our users.
+ * @param domain the ip domain (IPV4 or IPV6)
+ * @param source the original string
+ *
+ * @return it returns 0 on success and -1 otherwise.
+ */
+static inline int ebpf_ip2nl(uint8_t *dst, char *ip, int domain, char *source)
+{
+ if (inet_pton(domain, ip, dst) <= 0) {
+ netdata_log_error("The address specified (%s) is invalid ", source);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Clean port Structure
+ *
+ * Clean the allocated list.
+ *
+ * @param clean the list that will be cleaned
+ */
+void ebpf_clean_port_structure(ebpf_network_viewer_port_list_t **clean)
+{
+ ebpf_network_viewer_port_list_t *move = *clean;
+ while (move) {
+ ebpf_network_viewer_port_list_t *next = move->next;
+ freez(move->value);
+ freez(move);
+
+ move = next;
+ }
+ *clean = NULL;
+}
+
+/**
+ * Clean IP structure
+ *
+ * Clean the allocated list.
+ *
+ * @param clean the list that will be cleaned
+ */
+void ebpf_clean_ip_structure(ebpf_network_viewer_ip_list_t **clean)
+{
+ ebpf_network_viewer_ip_list_t *move = *clean;
+ while (move) {
+ ebpf_network_viewer_ip_list_t *next = move->next;
+ freez(move->value);
+ freez(move);
+
+ move = next;
+ }
+ *clean = NULL;
+}
+
+/**
+ * Parse IP List
+ *
+ * Parse IP list and link it.
+ *
+ * @param out a pointer to store the link list
+ * @param ip the value given as parameter
+ */
+static void ebpf_parse_ip_list_unsafe(void **out, char *ip)
+{
+ ebpf_network_viewer_ip_list_t **list = (ebpf_network_viewer_ip_list_t **)out;
+
+ char *ipdup = strdupz(ip);
+ union netdata_ip_t first = { };
+ union netdata_ip_t last = { };
+ char *is_ipv6;
+ if (*ip == '*' && *(ip+1) == '\0') {
+ memset(first.addr8, 0, sizeof(first.addr8));
+ memset(last.addr8, 0xFF, sizeof(last.addr8));
+
+ is_ipv6 = ip;
+
+ ebpf_clean_ip_structure(list);
+ goto storethisip;
+ }
+
+ char *end = ip;
+ // Move while I cannot find a separator
+ while (*end && *end != '/' && *end != '-') end++;
+
+ // We will use only the classic IPV6 for while, but we could consider the base 85 in a near future
+ // https://tools.ietf.org/html/rfc1924
+ is_ipv6 = strchr(ip, ':');
+
+ int select;
+ if (*end && !is_ipv6) { // IPV4 range
+ select = (*end == '/') ? 0 : 1;
+ *end++ = '\0';
+ if (*end == '!') {
+ netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup);
+ goto cleanipdup;
+ }
+
+ if (!select) { // CIDR
+ select = ebpf_ip2nl(first.addr8, ip, AF_INET, ipdup);
+ if (select)
+ goto cleanipdup;
+
+ select = (int) str2i(end);
+ if (select < NETDATA_MINIMUM_IPV4_CIDR || select > NETDATA_MAXIMUM_IPV4_CIDR) {
+ netdata_log_info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip);
+ goto cleanipdup;
+ }
+
+ last.addr32[0] = htonl(ebpf_broadcast(ntohl(first.addr32[0]), select));
+ // This was added to remove
+ // https://app.codacy.com/manual/netdata/netdata/pullRequest?prid=5810941&bid=19021977
+ UNUSED(last.addr32[0]);
+
+ uint32_t ipv4_test = htonl(ebpf_ipv4_network(ntohl(first.addr32[0]), select));
+ if (first.addr32[0] != ipv4_test) {
+ first.addr32[0] = ipv4_test;
+ struct in_addr ipv4_convert;
+ ipv4_convert.s_addr = ipv4_test;
+ char ipv4_msg[INET_ADDRSTRLEN];
+ if(inet_ntop(AF_INET, &ipv4_convert, ipv4_msg, INET_ADDRSTRLEN))
+ netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg);
+ }
+ } else { // Range
+ select = ebpf_ip2nl(first.addr8, ip, AF_INET, ipdup);
+ if (select)
+ goto cleanipdup;
+
+ select = ebpf_ip2nl(last.addr8, end, AF_INET, ipdup);
+ if (select)
+ goto cleanipdup;
+ }
+
+ if (htonl(first.addr32[0]) > htonl(last.addr32[0])) {
+ netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.",
+ ipdup);
+ goto cleanipdup;
+ }
+ } else if (is_ipv6) { // IPV6
+ if (!*end) { // Unique
+ select = ebpf_ip2nl(first.addr8, ip, AF_INET6, ipdup);
+ if (select)
+ goto cleanipdup;
+
+ memcpy(last.addr8, first.addr8, sizeof(first.addr8));
+ } else if (*end == '-') {
+ *end++ = 0x00;
+ if (*end == '!') {
+ netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup);
+ goto cleanipdup;
+ }
+
+ select = ebpf_ip2nl(first.addr8, ip, AF_INET6, ipdup);
+ if (select)
+ goto cleanipdup;
+
+ select = ebpf_ip2nl(last.addr8, end, AF_INET6, ipdup);
+ if (select)
+ goto cleanipdup;
+ } else { // CIDR
+ *end++ = 0x00;
+ if (*end == '!') {
+ netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup);
+ goto cleanipdup;
+ }
+
+ select = str2i(end);
+ if (select < 0 || select > 128) {
+ netdata_log_info("The CIDR %s is not valid, the address %s will be ignored.", end, ip);
+ goto cleanipdup;
+ }
+
+ uint64_t prefix = (uint64_t)select;
+ select = ebpf_ip2nl(first.addr8, ip, AF_INET6, ipdup);
+ if (select)
+ goto cleanipdup;
+
+ get_ipv6_last_addr(&last, &first, prefix);
+
+ union netdata_ip_t ipv6_test;
+ get_ipv6_first_addr(&ipv6_test, &first, prefix);
+
+ if (memcmp(first.addr8, ipv6_test.addr8, sizeof(union netdata_ip_t)) != 0) {
+ memcpy(first.addr8, ipv6_test.addr8, sizeof(union netdata_ip_t));
+
+ struct in6_addr ipv6_convert;
+ memcpy(ipv6_convert.s6_addr, ipv6_test.addr8, sizeof(union netdata_ip_t));
+
+ char ipv6_msg[INET6_ADDRSTRLEN];
+ if(inet_ntop(AF_INET6, &ipv6_convert, ipv6_msg, INET6_ADDRSTRLEN))
+ netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg);
+ }
+ }
+
+ if ((be64toh(*(uint64_t *)&first.addr32[2]) > be64toh(*(uint64_t *)&last.addr32[2]) &&
+ !memcmp(first.addr32, last.addr32, 2*sizeof(uint32_t))) ||
+ (be64toh(*(uint64_t *)&first.addr32) > be64toh(*(uint64_t *)&last.addr32)) ) {
+ netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.",
+ ipdup);
+ goto cleanipdup;
+ }
+ } else { // Unique ip
+ select = ebpf_ip2nl(first.addr8, ip, AF_INET, ipdup);
+ if (select)
+ goto cleanipdup;
+
+ memcpy(last.addr8, first.addr8, sizeof(first.addr8));
+ }
+
+ ebpf_network_viewer_ip_list_t *store;
+
+ storethisip:
+ store = callocz(1, sizeof(ebpf_network_viewer_ip_list_t));
+ store->value = ipdup;
+ store->hash = simple_hash(ipdup);
+ store->ver = (uint8_t)(!is_ipv6)?AF_INET:AF_INET6;
+ memcpy(store->first.addr8, first.addr8, sizeof(first.addr8));
+ memcpy(store->last.addr8, last.addr8, sizeof(last.addr8));
+
+ ebpf_fill_ip_list_unsafe(list, store, "socket");
+ return;
+
+ cleanipdup:
+ freez(ipdup);
+}
+
+/**
+ * Parse IP Range
+ *
+ * Parse the IP ranges given and create Network Viewer IP Structure
+ *
+ * @param ptr is a pointer with the text to parse.
+ */
+void ebpf_parse_ips_unsafe(char *ptr)
+{
+ // No value
+ if (unlikely(!ptr))
+ return;
+
+ while (likely(ptr)) {
+ // Move forward until next valid character
+ while (isspace(*ptr)) ptr++;
+
+ // No valid value found
+ if (unlikely(!*ptr))
+ return;
+
+ // Find space that ends the list
+ char *end = strchr(ptr, ' ');
+ if (end) {
+ *end++ = '\0';
+ }
+
+ int neg = 0;
+ if (*ptr == '!') {
+ neg++;
+ ptr++;
+ }
+
+ if (isascii(*ptr)) { // Parse port
+ ebpf_parse_ip_list_unsafe(
+ (!neg) ? (void **)&network_viewer_opt.included_ips : (void **)&network_viewer_opt.excluded_ips, ptr);
+ }
+
+ ptr = end;
+ }
+}
+
+/**
+ * Fill Port list
+ *
+ * @param out a pointer to the link list.
+ * @param in the structure that will be linked.
+ */
+static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_network_viewer_port_list_t *in)
+{
+ if (likely(*out)) {
+ ebpf_network_viewer_port_list_t *move = *out, *store = *out;
+ uint16_t first = ntohs(in->first);
+ uint16_t last = ntohs(in->last);
+ while (move) {
+ uint16_t cmp_first = ntohs(move->first);
+ uint16_t cmp_last = ntohs(move->last);
+ if (cmp_first <= first && first <= cmp_last &&
+ cmp_first <= last && last <= cmp_last ) {
+ netdata_log_info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.",
+ first, last, cmp_first, cmp_last);
+ freez(in->value);
+ freez(in);
+ return;
+ } else if (first <= cmp_first && cmp_first <= last &&
+ first <= cmp_last && cmp_last <= last) {
+ netdata_log_info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.",
+ first, last, cmp_first, cmp_last);
+ freez(move->value);
+ move->value = in->value;
+ move->first = in->first;
+ move->last = in->last;
+ freez(in);
+ return;
+ }
+
+ store = move;
+ move = move->next;
+ }
+
+ store->next = in;
+ } else {
+ *out = in;
+ }
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ netdata_log_info("Adding values %s( %u, %u) to %s port list used on network viewer",
+ in->value, in->first, in->last,
+ (*out == network_viewer_opt.included_port)?"included":"excluded");
+#endif
+}
+
+/**
+ * Parse Service List
+ *
+ * @param out a pointer to store the link list
+ * @param service the service used to create the structure that will be linked.
+ */
+static void ebpf_parse_service_list(void **out, char *service)
+{
+ ebpf_network_viewer_port_list_t **list = (ebpf_network_viewer_port_list_t **)out;
+ struct servent *serv = getservbyname((const char *)service, "tcp");
+ if (!serv)
+ serv = getservbyname((const char *)service, "udp");
+
+ if (!serv) {
+ netdata_log_info("Cannot resolve the service '%s' with protocols TCP and UDP, it will be ignored", service);
+ return;
+ }
+
+ ebpf_network_viewer_port_list_t *w = callocz(1, sizeof(ebpf_network_viewer_port_list_t));
+ w->value = strdupz(service);
+ w->hash = simple_hash(service);
+
+ w->first = w->last = (uint16_t)serv->s_port;
+
+ fill_port_list(list, w);
+}
+
+/**
+ * Parse port list
+ *
+ * Parse an allocated port list with the range given
+ *
+ * @param out a pointer to store the link list
+ * @param range the informed range for the user.
+ */
+static void ebpf_parse_port_list(void **out, char *range)
+{
+ int first, last;
+ ebpf_network_viewer_port_list_t **list = (ebpf_network_viewer_port_list_t **)out;
+
+ char *copied = strdupz(range);
+ if (*range == '*' && *(range+1) == '\0') {
+ first = 1;
+ last = 65535;
+
+ ebpf_clean_port_structure(list);
+ goto fillenvpl;
+ }
+
+ char *end = range;
+ //Move while I cannot find a separator
+ while (*end && *end != ':' && *end != '-') end++;
+
+ //It has a range
+ if (likely(*end)) {
+ *end++ = '\0';
+ if (*end == '!') {
+ netdata_log_info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied);
+ freez(copied);
+ return;
+ }
+ last = str2i((const char *)end);
+ } else {
+ last = 0;
+ }
+
+ first = str2i((const char *)range);
+ if (first < NETDATA_MINIMUM_PORT_VALUE || first > NETDATA_MAXIMUM_PORT_VALUE) {
+ netdata_log_info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied);
+ freez(copied);
+ return;
+ }
+
+ if (!last)
+ last = first;
+
+ if (last < NETDATA_MINIMUM_PORT_VALUE || last > NETDATA_MAXIMUM_PORT_VALUE) {
+ netdata_log_info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied);
+ freez(copied);
+ return;
+ }
+
+ if (first > last) {
+ netdata_log_info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied);
+ freez(copied);
+ return;
+ }
+
+ ebpf_network_viewer_port_list_t *w;
+ fillenvpl:
+ w = callocz(1, sizeof(ebpf_network_viewer_port_list_t));
+ w->value = copied;
+ w->hash = simple_hash(copied);
+ w->first = (uint16_t)first;
+ w->last = (uint16_t)last;
+ w->cmp_first = (uint16_t)first;
+ w->cmp_last = (uint16_t)last;
+
+ fill_port_list(list, w);
+}
+
+/**
+ * Parse Port Range
+ *
+ * Parse the port ranges given and create Network Viewer Port Structure
+ *
+ * @param ptr is a pointer with the text to parse.
+ */
+void ebpf_parse_ports(char *ptr)
+{
+ // No value
+ if (unlikely(!ptr))
+ return;
+
+ while (likely(ptr)) {
+ // Move forward until next valid character
+ while (isspace(*ptr)) ptr++;
+
+ // No valid value found
+ if (unlikely(!*ptr))
+ return;
+
+ // Find space that ends the list
+ char *end = strchr(ptr, ' ');
+ if (end) {
+ *end++ = '\0';
+ }
+
+ int neg = 0;
+ if (*ptr == '!') {
+ neg++;
+ ptr++;
+ }
+
+ if (isdigit(*ptr)) { // Parse port
+ ebpf_parse_port_list(
+ (!neg) ? (void **)&network_viewer_opt.included_port : (void **)&network_viewer_opt.excluded_port, ptr);
+ } else if (isalpha(*ptr)) { // Parse service
+ ebpf_parse_service_list(
+ (!neg) ? (void **)&network_viewer_opt.included_port : (void **)&network_viewer_opt.excluded_port, ptr);
+ } else if (*ptr == '*') { // All
+ ebpf_parse_port_list(
+ (!neg) ? (void **)&network_viewer_opt.included_port : (void **)&network_viewer_opt.excluded_port, ptr);
+ }
+
+ ptr = end;
+ }
+}
+
/*****************************************************************
*
* FUNCTIONS TO DEFINE OPTIONS
@@ -1428,13 +2130,7 @@ static inline void ebpf_set_thread_mode(netdata_run_mode_t lmode)
*/
static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disable_cgroup)
{
- em->enabled = CONFIG_BOOLEAN_YES;
-
- // oomkill stores data inside apps submenu, so it always need to have apps_enabled for plugin to create
- // its chart, without this comparison eBPF.plugin will try to store invalid data when apps is disabled.
- if (!strcmp(em->thread_name, "oomkill")) {
- em->apps_charts = NETDATA_EBPF_APPS_FLAG_YES;
- }
+ em->enabled = NETDATA_THREAD_EBPF_RUNNING;
if (!disable_cgroup) {
em->cgroup_charts = CONFIG_BOOLEAN_YES;
@@ -1451,8 +2147,8 @@ static inline void ebpf_enable_specific_chart(struct ebpf_module *em, int disabl
static inline void disable_all_global_charts()
{
int i;
- for (i = 0; ebpf_modules[i].thread_name; i++) {
- ebpf_modules[i].enabled = 0;
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
+ ebpf_modules[i].enabled = NETDATA_THREAD_EBPF_NOT_RUNNING;
ebpf_modules[i].global_charts = 0;
}
}
@@ -1465,7 +2161,7 @@ static inline void disable_all_global_charts()
static inline void ebpf_enable_chart(int idx, int disable_cgroup)
{
int i;
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
if (i == idx) {
ebpf_enable_specific_chart(&ebpf_modules[i], disable_cgroup);
break;
@@ -1481,7 +2177,7 @@ static inline void ebpf_enable_chart(int idx, int disable_cgroup)
static inline void ebpf_disable_cgroups()
{
int i;
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
ebpf_modules[i].cgroup_charts = 0;
}
}
@@ -1661,6 +2357,203 @@ uint32_t ebpf_enable_tracepoints(ebpf_tracepoint_t *tps)
*
*****************************************************************/
+/**
+ * Is ip inside the range
+ *
+ * Check if the ip is inside a IP range
+ *
+ * @param rfirst the first ip address of the range
+ * @param rlast the last ip address of the range
+ * @param cmpfirst the first ip to compare
+ * @param cmplast the last ip to compare
+ * @param family the IP family
+ *
+ * @return It returns 1 if the IP is inside the range and 0 otherwise
+ */
+static int ebpf_is_ip_inside_range(union netdata_ip_t *rfirst, union netdata_ip_t *rlast,
+ union netdata_ip_t *cmpfirst, union netdata_ip_t *cmplast, int family)
+{
+ if (family == AF_INET) {
+ if ((rfirst->addr32[0] <= cmpfirst->addr32[0]) && (rlast->addr32[0] >= cmplast->addr32[0]))
+ return 1;
+ } else {
+ if (memcmp(rfirst->addr8, cmpfirst->addr8, sizeof(union netdata_ip_t)) <= 0 &&
+ memcmp(rlast->addr8, cmplast->addr8, sizeof(union netdata_ip_t)) >= 0) {
+ return 1;
+ }
+
+ }
+ return 0;
+}
+
+/**
+ * Fill IP list
+ *
+ * @param out a pointer to the link list.
+ * @param in the structure that will be linked.
+ * @param table the modified table.
+ */
+void ebpf_fill_ip_list_unsafe(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in,
+ char *table __maybe_unused)
+{
+ if (in->ver == AF_INET) { // It is simpler to compare using host order
+ in->first.addr32[0] = ntohl(in->first.addr32[0]);
+ in->last.addr32[0] = ntohl(in->last.addr32[0]);
+ }
+ if (likely(*out)) {
+ ebpf_network_viewer_ip_list_t *move = *out, *store = *out;
+ while (move) {
+ if (in->ver == move->ver &&
+ ebpf_is_ip_inside_range(&move->first, &move->last, &in->first, &in->last, in->ver)) {
+#ifdef NETDATA_DEV_MODE
+ netdata_log_info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.",
+ in->value, move->value);
+#endif
+ freez(in->value);
+ freez(in);
+ return;
+ }
+ store = move;
+ move = move->next;
+ }
+
+ store->next = in;
+ } else {
+ *out = in;
+ }
+
+#ifdef NETDATA_DEV_MODE
+ char first[256], last[512];
+ if (in->ver == AF_INET) {
+ netdata_log_info("Adding values %s: (%u - %u) to %s IP list \"%s\" used on network viewer",
+ in->value, in->first.addr32[0], in->last.addr32[0],
+ (*out == network_viewer_opt.included_ips)?"included":"excluded",
+ table);
+ } else {
+ if (inet_ntop(AF_INET6, in->first.addr8, first, INET6_ADDRSTRLEN) &&
+ inet_ntop(AF_INET6, in->last.addr8, last, INET6_ADDRSTRLEN))
+ netdata_log_info("Adding values %s - %s to %s IP list \"%s\" used on network viewer",
+ first, last,
+ (*out == network_viewer_opt.included_ips)?"included":"excluded",
+ table);
+ }
+#endif
+}
+
+/**
+ * Link hostname
+ *
+ * @param out is the output link list
+ * @param in the hostname to add to list.
+ */
+static void ebpf_link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_network_viewer_hostname_list_t *in)
+{
+ if (likely(*out)) {
+ ebpf_network_viewer_hostname_list_t *move = *out;
+ for (; move->next ; move = move->next ) {
+ if (move->hash == in->hash && !strcmp(move->value, in->value)) {
+ netdata_log_info("The hostname %s was already inserted, it will be ignored.", in->value);
+ freez(in->value);
+ simple_pattern_free(in->value_pattern);
+ freez(in);
+ return;
+ }
+ }
+
+ move->next = in;
+ } else {
+ *out = in;
+ }
+#ifdef NETDATA_INTERNAL_CHECKS
+ netdata_log_info("Adding value %s to %s hostname list used on network viewer",
+ in->value,
+ (*out == network_viewer_opt.included_hostnames)?"included":"excluded");
+#endif
+}
+
+/**
+ * Link Hostnames
+ *
+ * Parse the list of hostnames to create the link list.
+ * This is not associated with the IP, because simple patterns like *example* cannot be resolved to IP.
+ *
+ * @param out is the output link list
+ * @param parse is a pointer with the text to parser.
+ */
+static void ebpf_link_hostnames(char *parse)
+{
+ // No value
+ if (unlikely(!parse))
+ return;
+
+ while (likely(parse)) {
+ // Find the first valid value
+ while (isspace(*parse)) parse++;
+
+ // No valid value found
+ if (unlikely(!*parse))
+ return;
+
+ // Find space that ends the list
+ char *end = strchr(parse, ' ');
+ if (end) {
+ *end++ = '\0';
+ }
+
+ int neg = 0;
+ if (*parse == '!') {
+ neg++;
+ parse++;
+ }
+
+ ebpf_network_viewer_hostname_list_t *hostname = callocz(1 , sizeof(ebpf_network_viewer_hostname_list_t));
+ hostname->value = strdupz(parse);
+ hostname->hash = simple_hash(parse);
+ hostname->value_pattern = simple_pattern_create(parse, NULL, SIMPLE_PATTERN_EXACT, true);
+
+ ebpf_link_hostname((!neg) ? &network_viewer_opt.included_hostnames :
+ &network_viewer_opt.excluded_hostnames,
+ hostname);
+
+ parse = end;
+ }
+}
+
+/**
+ * Parse network viewer section
+ *
+ * @param cfg the configuration structure
+ */
+void parse_network_viewer_section(struct config *cfg)
+{
+ network_viewer_opt.hostname_resolution_enabled = appconfig_get_boolean(cfg,
+ EBPF_NETWORK_VIEWER_SECTION,
+ EBPF_CONFIG_RESOLVE_HOSTNAME,
+ CONFIG_BOOLEAN_NO);
+
+ network_viewer_opt.service_resolution_enabled = appconfig_get_boolean(cfg,
+ EBPF_NETWORK_VIEWER_SECTION,
+ EBPF_CONFIG_RESOLVE_SERVICE,
+ CONFIG_BOOLEAN_YES);
+
+ char *value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_PORTS, NULL);
+ ebpf_parse_ports(value);
+
+ if (network_viewer_opt.hostname_resolution_enabled) {
+ value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_HOSTNAMES, NULL);
+ ebpf_link_hostnames(value);
+ } else {
+ netdata_log_info("Name resolution is disabled, collector will not parse \"hostnames\" list.");
+ }
+
+ value = appconfig_get(cfg,
+ EBPF_NETWORK_VIEWER_SECTION,
+ "ips",
+ NULL);
+ //"ips", "!127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 !::1/128");
+ ebpf_parse_ips_unsafe(value);
+}
+
/**
* Read Local Ports
*
@@ -1705,7 +2598,7 @@ static void read_local_ports(char *filename, uint8_t proto)
*
* Read the local address from the interfaces.
*/
-static void read_local_addresses()
+void ebpf_read_local_addresses_unsafe()
{
struct ifaddrs *ifaddr, *ifa;
if (getifaddrs(&ifaddr) == -1) {
@@ -1754,9 +2647,8 @@ static void read_local_addresses()
}
}
- ebpf_fill_ip_list((family == AF_INET)?&network_viewer_opt.ipv4_local_ip:&network_viewer_opt.ipv6_local_ip,
- w,
- "selector");
+ ebpf_fill_ip_list_unsafe(
+ (family == AF_INET) ? &network_viewer_opt.ipv4_local_ip : &network_viewer_opt.ipv6_local_ip, w, "selector");
}
freeifaddrs(ifaddr);
@@ -1773,6 +2665,7 @@ void ebpf_start_pthread_variables()
pthread_mutex_init(&ebpf_exit_cleanup, NULL);
pthread_mutex_init(&collect_data_mutex, NULL);
pthread_mutex_init(&mutex_cgroup_shm, NULL);
+ rw_spinlock_init(&ebpf_judy_pid.index.rw_spinlock);
}
/**
@@ -1780,6 +2673,8 @@ void ebpf_start_pthread_variables()
*/
static void ebpf_allocate_common_vectors()
{
+ ebpf_judy_pid.pid_table = ebpf_allocate_pid_aral(NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME,
+ sizeof(netdata_ebpf_judy_pid_stats_t));
ebpf_all_pids = callocz((size_t)pid_max, sizeof(struct ebpf_pid_stat *));
ebpf_aral_init();
}
@@ -1825,7 +2720,7 @@ static void ebpf_update_interval(int update_every)
int i;
int value = (int) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION, EBPF_CFG_UPDATE_EVERY,
update_every);
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
ebpf_modules[i].update_every = value;
}
}
@@ -1840,7 +2735,7 @@ static void ebpf_update_table_size()
int i;
uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION,
EBPF_CFG_PID_SIZE, ND_EBPF_DEFAULT_PID_SIZE);
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
ebpf_modules[i].pid_map_size = value;
}
}
@@ -1855,7 +2750,7 @@ static void ebpf_update_lifetime()
int i;
uint32_t value = (uint32_t) appconfig_get_number(&collector_config, EBPF_GLOBAL_SECTION,
EBPF_CFG_LIFETIME, EBPF_DEFAULT_LIFETIME);
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
ebpf_modules[i].lifetime = value;
}
}
@@ -1868,7 +2763,7 @@ static void ebpf_update_lifetime()
static inline void ebpf_set_load_mode(netdata_ebpf_load_mode_t load, netdata_ebpf_load_mode_t origin)
{
int i;
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
ebpf_modules[i].load &= ~NETDATA_EBPF_LOAD_METHODS;
ebpf_modules[i].load |= load | origin ;
}
@@ -1897,7 +2792,7 @@ static void ebpf_update_map_per_core()
int i;
int value = appconfig_get_boolean(&collector_config, EBPF_GLOBAL_SECTION,
EBPF_CFG_MAPS_PER_CORE, CONFIG_BOOLEAN_YES);
- for (i = 0; ebpf_modules[i].thread_name; i++) {
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
ebpf_modules[i].maps_per_core = value;
}
}
@@ -1961,7 +2856,7 @@ static void read_collector_values(int *disable_cgroups,
// Read ebpf programs section
enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION,
- ebpf_modules[EBPF_MODULE_PROCESS_IDX].config_name, CONFIG_BOOLEAN_YES);
+ ebpf_modules[EBPF_MODULE_PROCESS_IDX].info.config_name, CONFIG_BOOLEAN_YES);
if (enabled) {
ebpf_enable_chart(EBPF_MODULE_PROCESS_IDX, *disable_cgroups);
}
@@ -1971,7 +2866,7 @@ static void read_collector_values(int *disable_cgroups,
CONFIG_BOOLEAN_NO);
if (!enabled)
enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION,
- ebpf_modules[EBPF_MODULE_SOCKET_IDX].config_name,
+ ebpf_modules[EBPF_MODULE_SOCKET_IDX].info.config_name,
CONFIG_BOOLEAN_NO);
if (enabled) {
ebpf_enable_chart(EBPF_MODULE_SOCKET_IDX, *disable_cgroups);
@@ -1979,10 +2874,11 @@ static void read_collector_values(int *disable_cgroups,
// This is kept to keep compatibility
enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "network connection monitoring",
- CONFIG_BOOLEAN_NO);
+ CONFIG_BOOLEAN_YES);
if (!enabled)
enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "network connections",
- CONFIG_BOOLEAN_NO);
+ CONFIG_BOOLEAN_YES);
+
network_viewer_opt.enabled = enabled;
if (enabled) {
if (!ebpf_modules[EBPF_MODULE_SOCKET_IDX].enabled)
@@ -1991,7 +2887,7 @@ static void read_collector_values(int *disable_cgroups,
// Read network viewer section if network viewer is enabled
// This is kept here to keep backward compatibility
parse_network_viewer_section(&collector_config);
- parse_service_name_section(&collector_config);
+ ebpf_parse_service_name_section(&collector_config);
}
enabled = appconfig_get_boolean(&collector_config, EBPF_PROGRAMS_SECTION, "cachestat",
@@ -2238,7 +3134,7 @@ static void ebpf_parse_args(int argc, char **argv)
};
memset(&network_viewer_opt, 0, sizeof(network_viewer_opt));
- network_viewer_opt.max_dim = NETDATA_NV_CAP_VALUE;
+ rw_spinlock_init(&network_viewer_opt.rw_spinlock);
if (argc > 1) {
int n = (int)str2l(argv[1]);
@@ -2250,6 +3146,7 @@ static void ebpf_parse_args(int argc, char **argv)
if (!freq)
freq = EBPF_DEFAULT_UPDATE_EVERY;
+ //rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
if (ebpf_load_collector_config(ebpf_user_config_dir, &disable_cgroups, freq)) {
netdata_log_info(
"Does not have a configuration file inside `%s/ebpf.d.conf. It will try to load stock file.",
@@ -2260,6 +3157,7 @@ static void ebpf_parse_args(int argc, char **argv)
}
ebpf_load_thread_config();
+ //rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
while (1) {
int c = getopt_long_only(argc, argv, "", long_options, &option_index);
@@ -2457,8 +3355,7 @@ static void ebpf_parse_args(int argc, char **argv)
}
if (disable_cgroups) {
- if (disable_cgroups)
- ebpf_disable_cgroups();
+ ebpf_disable_cgroups();
}
if (select_threads) {
@@ -2507,16 +3404,16 @@ static char *hash_table_core[NETDATA_EBPF_LOAD_STAT_END] = {"per_core", "unique"
static inline void ebpf_send_hash_table_pid_data(char *chart, uint32_t idx)
{
int i;
- write_begin_chart(NETDATA_MONITORING_FAMILY, chart);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, chart, "");
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
ebpf_module_t *wem = &ebpf_modules[i];
- if (wem->apps_routine)
- write_chart_dimension((char *)wem->thread_name,
+ if (wem->functions.apps_routine)
+ write_chart_dimension((char *)wem->info.thread_name,
(wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ?
wem->hash_table_stats[idx]:
0);
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -2528,13 +3425,13 @@ static inline void ebpf_send_hash_table_pid_data(char *chart, uint32_t idx)
static inline void ebpf_send_global_hash_table_data()
{
int i;
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS, "");
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
ebpf_module_t *wem = &ebpf_modules[i];
- write_chart_dimension((char *)wem->thread_name,
+ write_chart_dimension((char *)wem->info.thread_name,
(wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? NETDATA_CONTROLLER_END: 0);
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -2547,48 +3444,71 @@ void ebpf_send_statistic_data()
if (!publish_internal_metrics)
return;
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_THREADS, "");
int i;
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
ebpf_module_t *wem = &ebpf_modules[i];
- write_chart_dimension((char *)wem->thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0);
+ if (wem->functions.fnct_routine)
+ continue;
+
+ write_chart_dimension((char *)wem->info.thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0);
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LIFE_TIME, "");
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX ; i++) {
ebpf_module_t *wem = &ebpf_modules[i];
// Threads like VFS is slow to load and this can create an invalid number, this is the motive
// we are also testing wem->lifetime value.
- write_chart_dimension((char *)wem->thread_name,
+ if (wem->functions.fnct_routine)
+ continue;
+
+ write_chart_dimension((char *)wem->info.thread_name,
(wem->lifetime && wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ?
(long long) (wem->lifetime - wem->running_time):
0) ;
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_LOAD_METHOD, "");
write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_LEGACY], (long long)plugin_statistics.legacy);
write_chart_dimension(load_event_stat[NETDATA_EBPF_LOAD_STAT_CORE], (long long)plugin_statistics.core);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_KERNEL_MEMORY, "");
write_chart_dimension(memlock_stat, (long long)plugin_statistics.memlock_kern);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_LOADED, "");
write_chart_dimension(hash_table_stat, (long long)plugin_statistics.hash_tables);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE);
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, NETDATA_EBPF_HASH_TABLES_PER_CORE, "");
write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_PER_CORE], (long long)plugin_statistics.hash_percpu);
write_chart_dimension(hash_table_core[NETDATA_EBPF_THREAD_UNIQUE], (long long)plugin_statistics.hash_unique);
- write_end_chart();
+ ebpf_write_end_chart();
ebpf_send_global_hash_table_data();
ebpf_send_hash_table_pid_data(NETDATA_EBPF_HASH_TABLES_INSERT_PID_ELEMENTS, NETDATA_EBPF_GLOBAL_TABLE_PID_TABLE_ADD);
ebpf_send_hash_table_pid_data(NETDATA_EBPF_HASH_TABLES_REMOVE_PID_ELEMENTS, NETDATA_EBPF_GLOBAL_TABLE_PID_TABLE_DEL);
+
+ for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
+ ebpf_module_t *wem = &ebpf_modules[i];
+ if (!wem->functions.fnct_routine)
+ continue;
+
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, (char *)wem->functions.fcnt_thread_chart_name, "");
+ write_chart_dimension((char *)wem->info.thread_name, (wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ? 1 : 0);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_MONITORING_FAMILY, (char *)wem->functions.fcnt_thread_lifetime_name, "");
+ write_chart_dimension((char *)wem->info.thread_name,
+ (wem->lifetime && wem->enabled < NETDATA_THREAD_EBPF_STOPPING) ?
+ (long long) (wem->lifetime - wem->running_time):
+ 0) ;
+ ebpf_write_end_chart();
+ }
}
/**
@@ -2607,57 +3527,52 @@ static void update_internal_metric_variable()
}
/**
- * Create chart for Statistic Thread
+ * Create Thread Chart
*
- * Write to standard output current values for threads.
+ * Write to standard output current values for threads charts.
*
+ * @param name is the chart name
+ * @param title chart title.
+ * @param units chart units
+ * @param order is the chart order
* @param update_every time used to update charts
+ * @param module a module to create a specific chart.
*/
-static inline void ebpf_create_statistic_thread_chart(int update_every)
+static void ebpf_create_thread_chart(char *name,
+ char *title,
+ char *units,
+ int order,
+ int update_every,
+ ebpf_module_t *module)
{
+ // common call for specific and all charts.
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
- NETDATA_EBPF_THREADS,
- "Threads running.",
- "boolean",
+ name,
+ "",
+ title,
+ units,
NETDATA_EBPF_FAMILY,
NETDATA_EBPF_CHART_TYPE_LINE,
NULL,
- NETDATA_EBPF_ORDER_STAT_THREADS,
+ order,
update_every,
- NETDATA_EBPF_MODULE_NAME_PROCESS);
+ "main");
- int i;
- for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
- ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name,
- (char *)ebpf_modules[i].thread_name,
+ if (module) {
+ ebpf_write_global_dimension((char *)module->info.thread_name,
+ (char *)module->info.thread_name,
ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+ return;
}
-}
-
-/**
- * Create lifetime Thread Chart
- *
- * Write to standard output current values for threads lifetime.
- *
- * @param update_every time used to update charts
- */
-static inline void ebpf_create_lifetime_thread_chart(int update_every)
-{
- ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
- NETDATA_EBPF_LIFE_TIME,
- "Threads running.",
- "seconds",
- NETDATA_EBPF_FAMILY,
- NETDATA_EBPF_CHART_TYPE_LINE,
- NULL,
- NETDATA_EBPF_ORDER_STAT_LIFE_TIME,
- update_every,
- NETDATA_EBPF_MODULE_NAME_PROCESS);
int i;
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
- ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name,
- (char *)ebpf_modules[i].thread_name,
+ ebpf_module_t *em = &ebpf_modules[i];
+ if (em->functions.fnct_routine)
+ continue;
+
+ ebpf_write_global_dimension((char *)em->info.thread_name,
+ (char *)em->info.thread_name,
ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
}
}
@@ -2673,6 +3588,7 @@ static inline void ebpf_create_statistic_load_chart(int update_every)
{
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
NETDATA_EBPF_LOAD_METHOD,
+ "",
"Load info.",
"methods",
NETDATA_EBPF_FAMILY,
@@ -2702,6 +3618,7 @@ static inline void ebpf_create_statistic_kernel_memory(int update_every)
{
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
NETDATA_EBPF_KERNEL_MEMORY,
+ "",
"Memory allocated for hash tables.",
"bytes",
NETDATA_EBPF_FAMILY,
@@ -2727,6 +3644,7 @@ static inline void ebpf_create_statistic_hash_tables(int update_every)
{
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
NETDATA_EBPF_HASH_TABLES_LOADED,
+ "",
"Number of hash tables loaded.",
"hash tables",
NETDATA_EBPF_FAMILY,
@@ -2752,6 +3670,7 @@ static inline void ebpf_create_statistic_hash_per_core(int update_every)
{
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
NETDATA_EBPF_HASH_TABLES_PER_CORE,
+ "",
"How threads are loading hash/array tables.",
"threads",
NETDATA_EBPF_FAMILY,
@@ -2781,6 +3700,7 @@ static void ebpf_create_statistic_hash_global_elements(int update_every)
{
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
NETDATA_EBPF_HASH_TABLES_GLOBAL_ELEMENTS,
+ "",
"Controllers inside global table",
"rows",
NETDATA_EBPF_FAMILY,
@@ -2792,8 +3712,8 @@ static void ebpf_create_statistic_hash_global_elements(int update_every)
int i;
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
- ebpf_write_global_dimension((char *)ebpf_modules[i].thread_name,
- (char *)ebpf_modules[i].thread_name,
+ ebpf_write_global_dimension((char *)ebpf_modules[i].info.thread_name,
+ (char *)ebpf_modules[i].info.thread_name,
ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
}
}
@@ -2812,6 +3732,7 @@ static void ebpf_create_statistic_hash_pid_table(int update_every, char *id, cha
{
ebpf_write_chart_cmd(NETDATA_MONITORING_FAMILY,
id,
+ "",
title,
"rows",
NETDATA_EBPF_FAMILY,
@@ -2824,9 +3745,9 @@ static void ebpf_create_statistic_hash_pid_table(int update_every, char *id, cha
int i;
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
ebpf_module_t *wem = &ebpf_modules[i];
- if (wem->apps_routine)
- ebpf_write_global_dimension((char *)wem->thread_name,
- (char *)wem->thread_name,
+ if (wem->functions.apps_routine)
+ ebpf_write_global_dimension((char *)wem->info.thread_name,
+ (char *)wem->info.thread_name,
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
}
}
@@ -2850,15 +3771,63 @@ static void ebpf_create_statistic_charts(int update_every)
create_charts = 0;
- ebpf_create_statistic_thread_chart(update_every);
+ ebpf_create_thread_chart(NETDATA_EBPF_THREADS,
+ "Threads running.",
+ "boolean",
+ NETDATA_EBPF_ORDER_STAT_THREADS,
+ update_every,
+ NULL);
+ /*
#ifdef NETDATA_DEV_MODE
EBPF_PLUGIN_FUNCTIONS(EBPF_FUNCTION_THREAD, EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION);
#endif
-
- ebpf_create_lifetime_thread_chart(update_every);
+ */
+
+ ebpf_create_thread_chart(NETDATA_EBPF_LIFE_TIME,
+ "Time remaining for thread.",
+ "seconds",
+ NETDATA_EBPF_ORDER_STAT_LIFE_TIME,
+ update_every,
+ NULL);
+ /*
#ifdef NETDATA_DEV_MODE
EBPF_PLUGIN_FUNCTIONS(EBPF_FUNCTION_THREAD, EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION);
#endif
+ */
+
+ int i,j;
+ char name[256];
+ for (i = 0, j = NETDATA_EBPF_ORDER_FUNCTION_PER_THREAD; i < EBPF_MODULE_FUNCTION_IDX; i++) {
+ ebpf_module_t *em = &ebpf_modules[i];
+ if (!em->functions.fnct_routine)
+ continue;
+
+ em->functions.order_thread_chart = j;
+ snprintfz(name, sizeof(name) - 1, "%s_%s", NETDATA_EBPF_THREADS, em->info.thread_name);
+ em->functions.fcnt_thread_chart_name = strdupz(name);
+ ebpf_create_thread_chart(name,
+ "Threads running.",
+ "boolean",
+ j++,
+ update_every,
+ em);
+#ifdef NETDATA_DEV_MODE
+ EBPF_PLUGIN_FUNCTIONS(em->functions.fcnt_name, em->functions.fcnt_desc);
+#endif
+
+ em->functions.order_thread_lifetime = j;
+ snprintfz(name, sizeof(name) - 1, "%s_%s", NETDATA_EBPF_LIFE_TIME, em->info.thread_name);
+ em->functions.fcnt_thread_lifetime_name = strdupz(name);
+ ebpf_create_thread_chart(name,
+ "Time remaining for thread.",
+ "seconds",
+ j++,
+ update_every,
+ em);
+#ifdef NETDATA_DEV_MODE
+ EBPF_PLUGIN_FUNCTIONS(em->functions.fcnt_name, em->functions.fcnt_desc);
+#endif
+ }
ebpf_create_statistic_load_chart(update_every);
@@ -3013,7 +3982,7 @@ static void ebpf_kill_previous_process(char *filename, pid_t pid)
*/
void ebpf_pid_file(char *filename, size_t length)
{
- snprintfz(filename, length, "%s%s/ebpf.d/ebpf.pid", netdata_configured_host_prefix, ebpf_plugin_dir);
+ snprintfz(filename, length, "%s/var/run/ebpf.pid", netdata_configured_host_prefix);
}
/**
@@ -3040,8 +4009,8 @@ static void ebpf_manage_pid(pid_t pid)
static void ebpf_set_static_routine()
{
int i;
- for (i = 0; ebpf_modules[i].thread_name; i++) {
- ebpf_threads[i].start_routine = ebpf_modules[i].start_routine;
+ for (i = 0; ebpf_modules[i].info.thread_name; i++) {
+ ebpf_threads[i].start_routine = ebpf_modules[i].functions.start_routine;
}
}
@@ -3055,8 +4024,9 @@ static void ebpf_manage_pid(pid_t pid)
*/
int main(int argc, char **argv)
{
- stderror = stderr;
clocks_init();
+ nd_log_initialize_for_external_plugins("ebpf.plugin");
+
main_thread_id = gettid();
set_global_variables();
@@ -3066,16 +4036,6 @@ int main(int argc, char **argv)
if (ebpf_check_conditions())
return 2;
- // set name
- program_name = "ebpf.plugin";
-
- // disable syslog
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
-
if (ebpf_adjust_memory_limit())
return 3;
@@ -3095,7 +4055,7 @@ int main(int argc, char **argv)
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
#endif
- read_local_addresses();
+ ebpf_read_local_addresses_unsafe();
read_local_ports("/proc/net/tcp", IPPROTO_TCP);
read_local_ports("/proc/net/tcp6", IPPROTO_TCP);
read_local_ports("/proc/net/udp", IPPROTO_UDP);
@@ -3116,13 +4076,13 @@ int main(int argc, char **argv)
ebpf_module_t *em = &ebpf_modules[i];
em->thread = st;
em->thread_id = i;
- if (em->enabled) {
+ if (em->enabled != NETDATA_THREAD_EBPF_NOT_RUNNING) {
st->thread = mallocz(sizeof(netdata_thread_t));
em->enabled = NETDATA_THREAD_EBPF_RUNNING;
em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME;
netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, em);
} else {
- em->enabled = NETDATA_THREAD_EBPF_NOT_RUNNING;
+ em->lifetime = EBPF_DEFAULT_LIFETIME;
}
}
@@ -3133,7 +4093,7 @@ int main(int argc, char **argv)
int update_apps_list = update_apps_every - 1;
int process_maps_per_core = ebpf_modules[EBPF_MODULE_PROCESS_IDX].maps_per_core;
//Plugin will be killed when it receives a signal
- for ( ; !ebpf_exit_plugin ; global_iterations_counter++) {
+ for ( ; !ebpf_plugin_exit; global_iterations_counter++) {
(void)heartbeat_next(&hb, step);
if (global_iterations_counter % EBPF_DEFAULT_UPDATE_EVERY == 0) {
diff --git a/collectors/ebpf.plugin/ebpf.d/network.conf b/collectors/ebpf.plugin/ebpf.d/network.conf
index 00cbf2e8ba07e8..99c32edc13bee8 100644
--- a/collectors/ebpf.plugin/ebpf.d/network.conf
+++ b/collectors/ebpf.plugin/ebpf.d/network.conf
@@ -26,6 +26,11 @@
#
# The `maps per core` defines if hash tables will be per core or not. This option is ignored on kernels older than 4.6.
#
+# The `collect pid` option defines the PID stored inside hash tables and accepts the following options:
+# `real parent`: Only stores real parent inside PID
+# `parent` : Only stores parent PID.
+# `all` : Stores all PIDs used by software. This is the most expensive option.
+#
# The `lifetime` defines the time length a thread will run when it is enabled by a function.
#
# Uncomment lines to define specific options for thread.
@@ -35,12 +40,12 @@
# cgroups = no
# update every = 10
bandwidth table size = 16384
- ipv4 connection table size = 16384
- ipv6 connection table size = 16384
+ socket monitoring table size = 16384
udp connection table size = 4096
ebpf type format = auto
- ebpf co-re tracing = trampoline
+ ebpf co-re tracing = probe
maps per core = no
+ collect pid = all
lifetime = 300
#
@@ -49,11 +54,12 @@
# This is a feature with status WIP(Work in Progress)
#
[network connections]
- maximum dimensions = 50
+ enabled = yes
resolve hostnames = no
- resolve service names = no
+ resolve service names = yes
ports = *
- ips = !127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 !::1/128
+# ips = !127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 !::1/128
+ ips = *
hostnames = *
[service name]
diff --git a/collectors/ebpf.plugin/ebpf.h b/collectors/ebpf.plugin/ebpf.h
index 78e3a9252b73ad..ad7c5a94cddfdc 100644
--- a/collectors/ebpf.plugin/ebpf.h
+++ b/collectors/ebpf.plugin/ebpf.h
@@ -31,6 +31,7 @@
#include "daemon/main.h"
#include "ebpf_apps.h"
+#include "ebpf_functions.h"
#include "ebpf_cgroup.h"
#define NETDATA_EBPF_OLD_CONFIG_FILE "ebpf.conf"
@@ -98,6 +99,26 @@ typedef struct netdata_error_report {
int err;
} netdata_error_report_t;
+typedef struct netdata_ebpf_judy_pid {
+ ARAL *pid_table;
+
+ // Index for PIDs
+ struct { // support for multiple indexing engines
+ Pvoid_t JudyLArray; // the hash table
+ RW_SPINLOCK rw_spinlock; // protect the index
+ } index;
+} netdata_ebpf_judy_pid_t;
+
+typedef struct netdata_ebpf_judy_pid_stats {
+ char *cmdline;
+
+ // Index for Socket timestamp
+ struct { // support for multiple indexing engines
+ Pvoid_t JudyLArray; // the hash table
+ RW_SPINLOCK rw_spinlock; // protect the index
+ } socket_stats;
+} netdata_ebpf_judy_pid_stats_t;
+
extern ebpf_module_t ebpf_modules[];
enum ebpf_main_index {
EBPF_MODULE_PROCESS_IDX,
@@ -217,6 +238,7 @@ void ebpf_global_labels(netdata_syscall_stat_t *is,
void ebpf_write_chart_cmd(char *type,
char *id,
+ char *suffix,
char *title,
char *units,
char *family,
@@ -244,8 +266,6 @@ void ebpf_create_chart(char *type,
int update_every,
char *module);
-void write_begin_chart(char *family, char *name);
-
void write_chart_dimension(char *dim, long long value);
void write_count_chart(char *name, char *family, netdata_publish_syscall_t *move, uint32_t end);
@@ -255,18 +275,47 @@ void write_err_chart(char *name, char *family, netdata_publish_syscall_t *move,
void write_io_chart(char *chart, char *family, char *dwrite, long long vwrite,
char *dread, long long vread);
-void ebpf_create_charts_on_apps(char *name,
- char *title,
- char *units,
- char *family,
- char *charttype,
- int order,
- char *algorithm,
- struct ebpf_target *root,
- int update_every,
- char *module);
-
-void write_end_chart();
+/**
+ * Create Chart labels
+ *
+ * @param name the label name.
+ * @param value the label value.
+ * @param origin the labeel source.
+ */
+static inline void ebpf_create_chart_labels(char *name, char *value, int source)
+{
+ fprintf(stdout, "CLABEL '%s' '%s' %d\n", name, value, source);
+}
+
+/**
+ * Commit label
+ *
+ * Write commit label to stdout
+ */
+static inline void ebpf_commit_label()
+{
+ fprintf(stdout, "CLABEL_COMMIT\n");
+}
+
+/**
+ * Write begin command on standard output
+ *
+ * @param family the chart family name
+ * @param name the chart name
+ * @param metric the chart suffix (used with apps and cgroups)
+ */
+static inline void ebpf_write_begin_chart(char *family, char *name, char *metric)
+{
+ printf("BEGIN %s.%s%s\n", family, name, metric);
+}
+
+/**
+ * Write END command on stdout.
+ */
+static inline void ebpf_write_end_chart()
+{
+ printf("END\n");
+}
int ebpf_enable_tracepoint(ebpf_tracepoint_t *tp);
int ebpf_disable_tracepoint(ebpf_tracepoint_t *tp);
@@ -276,6 +325,9 @@ void ebpf_pid_file(char *filename, size_t length);
#define EBPF_PROGRAMS_SECTION "ebpf programs"
+#define EBPF_COMMON_DIMENSION_PERCENTAGE "%"
+#define EBPF_PROGRAMS_SECTION "ebpf programs"
+
#define EBPF_COMMON_DIMENSION_PERCENTAGE "%"
#define EBPF_COMMON_DIMENSION_CALL "calls/s"
#define EBPF_COMMON_DIMENSION_CONNECTIONS "connections/s"
@@ -313,7 +365,7 @@ void ebpf_cachestat_create_apps_charts(struct ebpf_module *em, void *root);
void ebpf_one_dimension_write_charts(char *family, char *chart, char *dim, long long v1);
collected_number get_value_from_structure(char *basis, size_t offset);
void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em);
-void ebpf_write_chart_obsolete(char *type, char *id, char *title, char *units, char *family,
+void ebpf_write_chart_obsolete(char *type, char *id, char *suffix, char *title, char *units, char *family,
char *charttype, char *context, int order, int update_every);
void write_histogram_chart(char *family, char *name, const netdata_idx_t *hist, char **dimensions, uint32_t end);
void ebpf_update_disabled_plugin_stats(ebpf_module_t *em);
@@ -322,10 +374,19 @@ void ebpf_unload_legacy_code(struct bpf_object *objects, struct bpf_link **probe
void ebpf_read_global_table_stats(netdata_idx_t *stats, netdata_idx_t *values, int map_fd,
int maps_per_core, uint32_t begin, uint32_t end);
+void **ebpf_judy_insert_unsafe(PPvoid_t arr, Word_t key);
+netdata_ebpf_judy_pid_stats_t *ebpf_get_pid_from_judy_unsafe(PPvoid_t judy_array, uint32_t pid);
+
+void parse_network_viewer_section(struct config *cfg);
+void ebpf_clean_ip_structure(ebpf_network_viewer_ip_list_t **clean);
+void ebpf_clean_port_structure(ebpf_network_viewer_port_list_t **clean);
+void ebpf_read_local_addresses_unsafe();
extern ebpf_filesystem_partitions_t localfs[];
extern ebpf_sync_syscalls_t local_syscalls[];
-extern int ebpf_exit_plugin;
+extern bool ebpf_plugin_exit;
+void ebpf_stop_threads(int sig);
+extern netdata_ebpf_judy_pid_t ebpf_judy_pid;
#define EBPF_MAX_SYNCHRONIZATION_TIME 300
diff --git a/collectors/ebpf.plugin/ebpf_apps.c b/collectors/ebpf.plugin/ebpf_apps.c
index c7c0cbbbb00d95..10c452267a8429 100644
--- a/collectors/ebpf.plugin/ebpf_apps.c
+++ b/collectors/ebpf.plugin/ebpf_apps.c
@@ -132,16 +132,6 @@ ebpf_socket_publish_apps_t *ebpf_socket_stat_get(void)
return target;
}
-/**
- * eBPF socket release
- *
- * @param stat Release a target after usage.
- */
-void ebpf_socket_release(ebpf_socket_publish_apps_t *stat)
-{
- aral_freez(ebpf_aral_socket_pid, stat);
-}
-
/*****************************************************************
*
* CACHESTAT ARAL FUNCTIONS
@@ -375,58 +365,6 @@ int ebpf_read_hash_table(void *ep, int fd, uint32_t pid)
return -1;
}
-/**
- * Read socket statistic
- *
- * Read information from kernel ring to user ring.
- *
- * @param ep the table with all process stats values.
- * @param fd the file descriptor mapped from kernel
- * @param ef a pointer for the functions mapped from dynamic library
- * @param pids the list of pids associated to a target.
- *
- * @return
- */
-size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep, int fd, struct ebpf_pid_on_target *pids)
-{
- size_t count = 0;
- while (pids) {
- uint32_t current_pid = pids->pid;
- if (!ebpf_read_hash_table(ep[current_pid], fd, current_pid))
- count++;
-
- pids = pids->next;
- }
-
- return count;
-}
-
-/**
- * Read bandwidth statistic using hash table
- *
- * @param out the output tensor that will receive the information.
- * @param fd the file descriptor that has the data
- * @param bpf_map_lookup_elem a pointer for the function to read the data
- * @param bpf_map_get_next_key a pointer fo the function to read the index.
- */
-size_t read_bandwidth_statistic_using_hash_table(ebpf_bandwidth_t **out, int fd)
-{
- size_t count = 0;
- uint32_t key = 0;
- uint32_t next_key = 0;
-
- while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
- ebpf_bandwidth_t *eps = out[next_key];
- if (!eps) {
- eps = callocz(1, sizeof(ebpf_process_stat_t));
- out[next_key] = eps;
- }
- ebpf_read_hash_table(eps, fd, next_key);
- }
-
- return count;
-}
-
/*****************************************************************
*
* FUNCTIONS CALLED FROM COLLECTORS
@@ -564,6 +502,13 @@ struct ebpf_target *get_apps_groups_target(struct ebpf_target **agrt, const char
// copy the id
strncpyz(w->name, nid, EBPF_MAX_NAME);
+ strncpyz(w->clean_name, w->name, EBPF_MAX_NAME);
+ netdata_fix_chart_name(w->clean_name);
+ for (char *d = w->clean_name; *d; d++) {
+ if (*d == '.')
+ *d = '_';
+ }
+
strncpyz(w->compare, nid, EBPF_MAX_COMPARE_NAME);
size_t len = strlen(w->compare);
if (w->compare[len - 1] == '*') {
@@ -887,6 +832,7 @@ static inline int read_proc_pid_cmdline(struct ebpf_pid_stat *p)
{
static char cmdline[MAX_CMDLINE + 1];
+ int ret = 0;
if (unlikely(!p->cmdline_filename)) {
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s/proc/%d/cmdline", netdata_configured_host_prefix, p->pid);
@@ -909,20 +855,23 @@ static inline int read_proc_pid_cmdline(struct ebpf_pid_stat *p)
cmdline[i] = ' ';
}
- if (p->cmdline)
- freez(p->cmdline);
- p->cmdline = strdupz(cmdline);
-
debug_log("Read file '%s' contents: %s", p->cmdline_filename, p->cmdline);
- return 1;
+ ret = 1;
cleanup:
// copy the command to the command line
if (p->cmdline)
freez(p->cmdline);
p->cmdline = strdupz(p->comm);
- return 0;
+
+ rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock);
+ netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(&ebpf_judy_pid.index.JudyLArray, p->pid);
+ if (pid_ptr)
+ pid_ptr->cmdline = p->cmdline;
+ rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock);
+
+ return ret;
}
/**
@@ -1238,6 +1187,24 @@ static inline void del_pid_entry(pid_t pid)
freez(p->status_filename);
freez(p->io_filename);
freez(p->cmdline_filename);
+
+ rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock);
+ netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(&ebpf_judy_pid.index.JudyLArray, p->pid);
+ if (pid_ptr) {
+ if (pid_ptr->socket_stats.JudyLArray) {
+ Word_t local_socket = 0;
+ Pvoid_t *socket_value;
+ bool first_socket = true;
+ while ((socket_value = JudyLFirstThenNext(pid_ptr->socket_stats.JudyLArray, &local_socket, &first_socket))) {
+ netdata_socket_plus_t *socket_clean = *socket_value;
+ aral_freez(aral_socket_table, socket_clean);
+ }
+ JudyLFreeArray(&pid_ptr->socket_stats.JudyLArray, PJE0);
+ }
+ JudyLDel(&ebpf_judy_pid.index.JudyLArray, p->pid, PJE0);
+ }
+ rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock);
+
freez(p->cmdline);
ebpf_pid_stat_release(p);
@@ -1279,12 +1246,6 @@ int get_pid_comm(pid_t pid, size_t n, char *dest)
*/
void cleanup_variables_from_other_threads(uint32_t pid)
{
- // Clean socket structures
- if (socket_bandwidth_curr) {
- ebpf_socket_release(socket_bandwidth_curr[pid]);
- socket_bandwidth_curr[pid] = NULL;
- }
-
// Clean cachestat structure
if (cachestat_pid) {
ebpf_cachestat_release(cachestat_pid[pid]);
diff --git a/collectors/ebpf.plugin/ebpf_apps.h b/collectors/ebpf.plugin/ebpf_apps.h
index fc894a55fe44d4..2580915078c3ee 100644
--- a/collectors/ebpf.plugin/ebpf_apps.h
+++ b/collectors/ebpf.plugin/ebpf_apps.h
@@ -10,11 +10,13 @@
#include "libnetdata/ebpf/ebpf.h"
#define NETDATA_APPS_FAMILY "apps"
+#define NETDATA_APP_FAMILY "app"
#define NETDATA_APPS_FILE_GROUP "file_access"
+#define NETDATA_APPS_FILE_FDS "fds"
#define NETDATA_APPS_FILE_CGROUP_GROUP "file_access (eBPF)"
#define NETDATA_APPS_PROCESS_GROUP "process (eBPF)"
#define NETDATA_APPS_NET_GROUP "net"
-#define NETDATA_APPS_IPC_SHM_GROUP "ipc shm (eBPF)"
+#define NETDATA_APPS_IPC_SHM_GROUP "ipc shm"
#include "ebpf_process.h"
#include "ebpf_dcstat.h"
@@ -47,8 +49,10 @@ struct ebpf_target {
char id[EBPF_MAX_NAME + 1];
uint32_t idhash;
+ uint32_t charts_created;
char name[EBPF_MAX_NAME + 1];
+ char clean_name[EBPF_MAX_NAME + 1]; // sanitized name used in chart id (need to replace at least dots)
// Changes made to simplify integration between apps and eBPF.
netdata_publish_cachestat_t cachestat;
@@ -150,24 +154,6 @@ typedef struct ebpf_process_stat {
uint8_t removeme;
} ebpf_process_stat_t;
-typedef struct ebpf_bandwidth {
- uint32_t pid;
-
- uint64_t first; // First timestamp
- uint64_t ct; // Last timestamp
- uint64_t bytes_sent; // Bytes sent
- uint64_t bytes_received; // Bytes received
- uint64_t call_tcp_sent; // Number of times tcp_sendmsg was called
- uint64_t call_tcp_received; // Number of times tcp_cleanup_rbuf was called
- uint64_t retransmit; // Number of times tcp_retransmit was called
- uint64_t call_udp_sent; // Number of times udp_sendmsg was called
- uint64_t call_udp_received; // Number of times udp_recvmsg was called
- uint64_t close; // Number of times tcp_close was called
- uint64_t drop; // THIS IS NOT USED FOR WHILE, we are in groom section
- uint32_t tcp_v4_connection; // Number of times tcp_v4_connection was called.
- uint32_t tcp_v6_connection; // Number of times tcp_v6_connection was called.
-} ebpf_bandwidth_t;
-
/**
* Internal function used to write debug messages.
*
@@ -208,12 +194,6 @@ int ebpf_read_hash_table(void *ep, int fd, uint32_t pid);
int get_pid_comm(pid_t pid, size_t n, char *dest);
-size_t read_processes_statistic_using_pid_on_target(ebpf_process_stat_t **ep,
- int fd,
- struct ebpf_pid_on_target *pids);
-
-size_t read_bandwidth_statistic_using_pid_on_target(ebpf_bandwidth_t **ep, int fd, struct ebpf_pid_on_target *pids);
-
void collect_data_for_all_processes(int tbl_pid_stats_fd, int maps_per_core);
void ebpf_process_apps_accumulator(ebpf_process_stat_t *out, int maps_per_core);
@@ -242,7 +222,6 @@ extern ebpf_process_stat_t *process_stat_vector;
extern ARAL *ebpf_aral_socket_pid;
void ebpf_socket_aral_init();
ebpf_socket_publish_apps_t *ebpf_socket_stat_get(void);
-void ebpf_socket_release(ebpf_socket_publish_apps_t *stat);
extern ARAL *ebpf_aral_cachestat_pid;
void ebpf_cachestat_aral_init();
diff --git a/collectors/ebpf.plugin/ebpf_cachestat.c b/collectors/ebpf.plugin/ebpf_cachestat.c
index affecdea2d4a44..d9f8f7b06b6e38 100644
--- a/collectors/ebpf.plugin/ebpf_cachestat.c
+++ b/collectors/ebpf.plugin/ebpf_cachestat.c
@@ -353,6 +353,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_CACHESTAT_HIT_RATIO_CHART,
+ "",
"Hit ratio",
EBPF_COMMON_DIMENSION_PERCENTAGE,
NETDATA_CACHESTAT_SUBMENU,
@@ -363,6 +364,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_CACHESTAT_DIRTY_CHART,
+ "",
"Number of dirty pages",
EBPF_CACHESTAT_DIMENSION_PAGE,
NETDATA_CACHESTAT_SUBMENU,
@@ -373,6 +375,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_CACHESTAT_HIT_CHART,
+ "",
"Number of accessed files",
EBPF_CACHESTAT_DIMENSION_HITS,
NETDATA_CACHESTAT_SUBMENU,
@@ -383,6 +386,7 @@ static void ebpf_obsolete_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_CACHESTAT_MISSES_CHART,
+ "",
"Files out of page cache",
EBPF_CACHESTAT_DIMENSION_MISSES,
NETDATA_CACHESTAT_SUBMENU,
@@ -425,6 +429,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_CACHESTAT_HIT_RATIO_CHART,
+ "",
"Hit ratio",
EBPF_COMMON_DIMENSION_PERCENTAGE,
NETDATA_CACHESTAT_SUBMENU,
@@ -435,6 +440,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_CACHESTAT_DIRTY_CHART,
+ "",
"Number of dirty pages",
EBPF_CACHESTAT_DIMENSION_PAGE,
NETDATA_CACHESTAT_SUBMENU,
@@ -445,6 +451,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_CACHESTAT_HIT_CHART,
+ "",
"Number of accessed files",
EBPF_CACHESTAT_DIMENSION_HITS,
NETDATA_CACHESTAT_SUBMENU,
@@ -455,6 +462,7 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_CACHESTAT_MISSES_CHART,
+ "",
"Files out of page cache",
EBPF_CACHESTAT_DIMENSION_MISSES,
NETDATA_CACHESTAT_SUBMENU,
@@ -473,44 +481,57 @@ static void ebpf_obsolete_cachestat_global(ebpf_module_t *em)
*/
void ebpf_obsolete_cachestat_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_CACHESTAT_HIT_RATIO_CHART,
- "Hit ratio",
- EBPF_COMMON_DIMENSION_PERCENTAGE,
- NETDATA_CACHESTAT_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_LINE,
- NULL,
- 20090,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_hit_ratio",
+ "Hit ratio",
+ EBPF_COMMON_DIMENSION_PERCENTAGE,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ "app.ebpf_cachestat_hit_ratio",
+ 20260,
+ update_every);
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART,
- "Number of accessed files",
- EBPF_CACHESTAT_DIMENSION_HITS,
- NETDATA_CACHESTAT_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20092,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_dirty_pages",
+ "Number of dirty pages",
+ EBPF_CACHESTAT_DIMENSION_PAGE,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_cachestat_dirty_pages",
+ 20261,
+ update_every);
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_CACHESTAT_MISSES_CHART,
- "Files out of page cache",
- EBPF_CACHESTAT_DIMENSION_MISSES,
- NETDATA_CACHESTAT_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20093,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_access",
+ "Number of accessed files",
+ EBPF_CACHESTAT_DIMENSION_HITS,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_cachestat_access",
+ 20262,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_misses",
+ "Files out of page cache",
+ EBPF_CACHESTAT_DIMENSION_MISSES,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_cachestat_misses",
+ 20263,
+ update_every);
+ w->charts_created &= ~(1<update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT);
-
- ebpf_create_charts_on_apps(NETDATA_CACHESTAT_DIRTY_CHART,
- "Number of dirty pages",
- EBPF_CACHESTAT_DIMENSION_PAGE,
- NETDATA_CACHESTAT_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20091,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT);
-
- ebpf_create_charts_on_apps(NETDATA_CACHESTAT_HIT_CHART,
- "Number of accessed files",
- EBPF_CACHESTAT_DIMENSION_HITS,
- NETDATA_CACHESTAT_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20092,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT);
-
- ebpf_create_charts_on_apps(NETDATA_CACHESTAT_MISSES_CHART,
- "Files out of page cache",
- EBPF_CACHESTAT_DIMENSION_MISSES,
- NETDATA_CACHESTAT_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20093,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_CACHESTAT);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_hit_ratio",
+ "Hit ratio",
+ EBPF_COMMON_DIMENSION_PERCENTAGE,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ "app.ebpf_cachestat_hit_ratio",
+ 20260,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_CACHESTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION ratio '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_dirty_pages",
+ "Number of dirty pages",
+ EBPF_CACHESTAT_DIMENSION_PAGE,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ "app.ebpf_cachestat_dirty_pages",
+ 20261,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_CACHESTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION pages '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_access",
+ "Number of accessed files",
+ EBPF_CACHESTAT_DIMENSION_HITS,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_cachestat_access",
+ 20262,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_CACHESTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION hits '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_cachestat_misses",
+ "Files out of page cache",
+ EBPF_CACHESTAT_DIMENSION_MISSES,
+ NETDATA_CACHESTAT_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_cachestat_misses",
+ 20263,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_CACHESTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION misses '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
}
@@ -939,53 +993,42 @@ void ebpf_cache_send_apps_data(struct ebpf_target *root)
struct ebpf_target *w;
collected_number value;
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART);
for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- ebpf_cachestat_sum_pids(&w->cachestat, w->root_pid);
- netdata_cachestat_pid_t *current = &w->cachestat.current;
- netdata_cachestat_pid_t *prev = &w->cachestat.prev;
-
- uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed;
- uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty;
- w->cachestat.dirty = mbd;
- uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru;
- uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied;
-
- cachestat_update_publish(&w->cachestat, mpa, mbd, apcl, apd);
- value = (collected_number) w->cachestat.ratio;
- // Here we are using different approach to have a chart more smooth
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- value = (collected_number) w->cachestat.dirty;
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ ebpf_cachestat_sum_pids(&w->cachestat, w->root_pid);
+ netdata_cachestat_pid_t *current = &w->cachestat.current;
+ netdata_cachestat_pid_t *prev = &w->cachestat.prev;
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_HIT_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = (collected_number) w->cachestat.hit;
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ uint64_t mpa = current->mark_page_accessed - prev->mark_page_accessed;
+ uint64_t mbd = current->mark_buffer_dirty - prev->mark_buffer_dirty;
+ w->cachestat.dirty = mbd;
+ uint64_t apcl = current->add_to_page_cache_lru - prev->add_to_page_cache_lru;
+ uint64_t apd = current->account_page_dirtied - prev->account_page_dirtied;
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_CACHESTAT_MISSES_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = (collected_number) w->cachestat.miss;
- write_chart_dimension(w->name, value);
- }
+ cachestat_update_publish(&w->cachestat, mpa, mbd, apcl, apd);
+
+ value = (collected_number) w->cachestat.ratio;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_hit_ratio");
+ write_chart_dimension("ratio", value);
+ ebpf_write_end_chart();
+
+ value = (collected_number) w->cachestat.dirty;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_dirty_pages");
+ write_chart_dimension("pages", value);
+ ebpf_write_end_chart();
+
+ value = (collected_number) w->cachestat.hit;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_access");
+ write_chart_dimension("hits", value);
+ ebpf_write_end_chart();
+
+ value = (collected_number) w->cachestat.miss;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_cachestat_misses");
+ write_chart_dimension("misses", value);
+ ebpf_write_end_chart();
}
- write_end_chart();
}
/**
@@ -1087,37 +1130,37 @@ static void ebpf_send_systemd_cachestat_charts()
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_RATIO_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_cachestat.ratio);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_DIRTY_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_cachestat.dirty);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_HIT_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_cachestat.hit);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_CACHESTAT_MISSES_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_cachestat.miss);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1127,21 +1170,21 @@ static void ebpf_send_systemd_cachestat_charts()
*/
static void ebpf_send_specific_cachestat_data(char *type, netdata_publish_cachestat_t *npc)
{
- write_begin_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART);
+ ebpf_write_begin_chart(type, NETDATA_CACHESTAT_HIT_RATIO_CHART, "");
write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_RATIO].name, (long long)npc->ratio);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_CACHESTAT_DIRTY_CHART);
+ ebpf_write_begin_chart(type, NETDATA_CACHESTAT_DIRTY_CHART, "");
write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_DIRTY].name, (long long)npc->dirty);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_CACHESTAT_HIT_CHART);
+ ebpf_write_begin_chart(type, NETDATA_CACHESTAT_HIT_CHART, "");
write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_HIT].name, (long long)npc->hit);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_CACHESTAT_MISSES_CHART);
+ ebpf_write_begin_chart(type, NETDATA_CACHESTAT_MISSES_CHART, "");
write_chart_dimension(cachestat_counter_publish_aggregated[NETDATA_CACHESTAT_IDX_MISS].name, (long long)npc->miss);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1201,24 +1244,28 @@ static void ebpf_create_specific_cachestat_charts(char *type, int update_every)
static void ebpf_obsolete_specific_cachestat_charts(char *type, int update_every)
{
ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_RATIO_CHART,
+ "",
"Hit ratio",
EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_CACHESTAT_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_RATIO_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5200, update_every);
ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_DIRTY_CHART,
+ "",
"Number of dirty pages",
EBPF_CACHESTAT_DIMENSION_PAGE, NETDATA_CACHESTAT_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MODIFIED_CACHE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5201, update_every);
ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_HIT_CHART,
+ "",
"Number of accessed files",
EBPF_CACHESTAT_DIMENSION_HITS, NETDATA_CACHESTAT_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_HIT_FILES_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5202, update_every);
ebpf_write_chart_obsolete(type, NETDATA_CACHESTAT_MISSES_CHART,
+ "",
"Files out of page cache",
EBPF_CACHESTAT_DIMENSION_MISSES, NETDATA_CACHESTAT_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_CACHESTAT_MISS_FILES_CONTEXT,
@@ -1288,10 +1335,10 @@ static void cachestat_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -1479,7 +1526,7 @@ static int ebpf_cachestat_load_bpf(ebpf_module_t *em)
#endif
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_cgroup.c b/collectors/ebpf.plugin/ebpf_cgroup.c
index fd4e783db1754f..1aadfbaf83c350 100644
--- a/collectors/ebpf.plugin/ebpf_cgroup.c
+++ b/collectors/ebpf.plugin/ebpf_cgroup.c
@@ -331,7 +331,7 @@ void ebpf_create_charts_on_systemd(char *id, char *title, char *units, char *fam
char *algorithm, char *context, char *module, int update_every)
{
ebpf_cgroup_target_t *w;
- ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, title, units, family, charttype, context,
+ ebpf_write_chart_cmd(NETDATA_SERVICE_FAMILY, id, "", title, units, family, charttype, context,
order, update_every, module);
for (w = ebpf_cgroup_pids; w; w = w->next) {
@@ -373,7 +373,7 @@ void *ebpf_cgroup_integration(void *ptr)
heartbeat_t hb;
heartbeat_init(&hb);
//Plugin will be killed when it receives a signal
- while (!ebpf_exit_plugin) {
+ while (!ebpf_plugin_exit) {
(void)heartbeat_next(&hb, step);
// We are using a small heartbeat time to wake up thread,
diff --git a/collectors/ebpf.plugin/ebpf_cgroup.h b/collectors/ebpf.plugin/ebpf_cgroup.h
index 6620ea10a3b3b7..ba8346934fccb2 100644
--- a/collectors/ebpf.plugin/ebpf_cgroup.h
+++ b/collectors/ebpf.plugin/ebpf_cgroup.h
@@ -21,7 +21,7 @@ struct pid_on_target2 {
ebpf_process_stat_t ps;
netdata_dcstat_pid_t dc;
netdata_publish_shm_t shm;
- ebpf_bandwidth_t socket;
+ netdata_socket_t socket;
netdata_cachestat_pid_t cachestat;
struct pid_on_target2 *next;
diff --git a/collectors/ebpf.plugin/ebpf_dcstat.c b/collectors/ebpf.plugin/ebpf_dcstat.c
index feb935b93add81..4ff6c82ab434b0 100644
--- a/collectors/ebpf.plugin/ebpf_dcstat.c
+++ b/collectors/ebpf.plugin/ebpf_dcstat.c
@@ -302,6 +302,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_DC_HIT_CHART,
+ "",
"Percentage of files inside directory cache",
EBPF_COMMON_DIMENSION_PERCENTAGE,
NETDATA_DIRECTORY_CACHE_SUBMENU,
@@ -312,6 +313,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_DC_REFERENCE_CHART,
+ "",
"Count file access",
EBPF_COMMON_DIMENSION_FILES,
NETDATA_DIRECTORY_CACHE_SUBMENU,
@@ -322,6 +324,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_DC_REQUEST_NOT_CACHE_CHART,
+ "",
"Files not present inside directory cache",
EBPF_COMMON_DIMENSION_FILES,
NETDATA_DIRECTORY_CACHE_SUBMENU,
@@ -332,6 +335,7 @@ static void ebpf_obsolete_dc_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_DC_REQUEST_NOT_FOUND_CHART,
+ "",
"Files not found",
EBPF_COMMON_DIMENSION_FILES,
NETDATA_DIRECTORY_CACHE_SUBMENU,
@@ -372,45 +376,58 @@ static inline void ebpf_obsolete_dc_cgroup_charts(ebpf_module_t *em) {
*/
void ebpf_obsolete_dc_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_DC_HIT_CHART,
- "Percentage of files inside directory cache",
- EBPF_COMMON_DIMENSION_PERCENTAGE,
- NETDATA_DIRECTORY_CACHE_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_LINE,
- NULL,
- 20100,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_dc_hit",
+ "Percentage of files inside directory cache.",
+ EBPF_COMMON_DIMENSION_PERCENTAGE,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ "app.ebpf_dc_hit",
+ 20265,
+ update_every);
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_DC_REQUEST_NOT_CACHE_CHART,
- "Files not present inside directory cache",
- EBPF_COMMON_DIMENSION_FILES,
- NETDATA_DIRECTORY_CACHE_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20102,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_dc_reference",
+ "Count file access.",
+ EBPF_COMMON_DIMENSION_FILES,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_dc_reference",
+ 20266,
+ update_every);
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_DC_REQUEST_NOT_FOUND_CHART,
- "Files not found",
- EBPF_COMMON_DIMENSION_FILES,
- NETDATA_DIRECTORY_CACHE_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20103,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_not_cache",
+ "Files not present inside directory cache.",
+ EBPF_COMMON_DIMENSION_FILES,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_dc_not_cache",
+ 20267,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_not_found",
+ "Files not found.",
+ EBPF_COMMON_DIMENSION_FILES,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_dc_not_found",
+ 20268,
+ update_every);
+
+ w->charts_created &= ~(1<update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT);
-
- ebpf_create_charts_on_apps(NETDATA_DC_REFERENCE_CHART,
- "Count file access",
- EBPF_COMMON_DIMENSION_FILES,
- NETDATA_DIRECTORY_CACHE_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20101,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT);
-
- ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_CACHE_CHART,
- "Files not present inside directory cache",
- EBPF_COMMON_DIMENSION_FILES,
- NETDATA_DIRECTORY_CACHE_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20102,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT);
-
- ebpf_create_charts_on_apps(NETDATA_DC_REQUEST_NOT_FOUND_CHART,
- "Files not found",
- EBPF_COMMON_DIMENSION_FILES,
- NETDATA_DIRECTORY_CACHE_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20103,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_DCSTAT);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_dc_hit",
+ "Percentage of files inside directory cache.",
+ EBPF_COMMON_DIMENSION_PERCENTAGE,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ "app.ebpf_dc_hit",
+ 20265,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_DCSTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION ratio '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_dc_reference",
+ "Count file access.",
+ EBPF_COMMON_DIMENSION_FILES,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_dc_reference",
+ 20266,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_DCSTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION files '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_not_cache",
+ "Files not present inside directory cache.",
+ EBPF_COMMON_DIMENSION_FILES,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_dc_not_cache",
+ 20267,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_DCSTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION files '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_not_found",
+ "Files not found.",
+ EBPF_COMMON_DIMENSION_FILES,
+ NETDATA_DIRECTORY_CACHE_SUBMENU,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_dc_not_found",
+ 20268,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_DCSTAT);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION files '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
}
@@ -746,64 +798,53 @@ void ebpf_dcache_send_apps_data(struct ebpf_target *root)
struct ebpf_target *w;
collected_number value;
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_HIT_CHART);
for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- ebpf_dcstat_sum_pids(&w->dcstat, w->root_pid);
+ if (unlikely(!(w->charts_created & (1<dcstat.curr.cache_access;
- uint64_t not_found = w->dcstat.curr.not_found;
+ ebpf_dcstat_sum_pids(&w->dcstat, w->root_pid);
- dcstat_update_publish(&w->dcstat, cache, not_found);
- value = (collected_number) w->dcstat.ratio;
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ uint64_t cache = w->dcstat.curr.cache_access;
+ uint64_t not_found = w->dcstat.curr.not_found;
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REFERENCE_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- if (w->dcstat.curr.cache_access < w->dcstat.prev.cache_access) {
- w->dcstat.prev.cache_access = 0;
- }
+ dcstat_update_publish(&w->dcstat, cache, not_found);
+
+ value = (collected_number) w->dcstat.ratio;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_dc_hit");
+ write_chart_dimension("ratio", value);
+ ebpf_write_end_chart();
- w->dcstat.cache_access = (long long)w->dcstat.curr.cache_access - (long long)w->dcstat.prev.cache_access;
- value = (collected_number) w->dcstat.cache_access;
- write_chart_dimension(w->name, value);
- w->dcstat.prev.cache_access = w->dcstat.curr.cache_access;
+ if (w->dcstat.curr.cache_access < w->dcstat.prev.cache_access) {
+ w->dcstat.prev.cache_access = 0;
}
- }
- write_end_chart();
+ w->dcstat.cache_access = (long long)w->dcstat.curr.cache_access - (long long)w->dcstat.prev.cache_access;
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- if (w->dcstat.curr.file_system < w->dcstat.prev.file_system) {
- w->dcstat.prev.file_system = 0;
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_dc_reference");
+ value = (collected_number) w->dcstat.cache_access;
+ write_chart_dimension("files", value);
+ ebpf_write_end_chart();
+ w->dcstat.prev.cache_access = w->dcstat.curr.cache_access;
- value = (collected_number) (!w->dcstat.cache_access) ? 0 :
- (long long )w->dcstat.curr.file_system - (long long)w->dcstat.prev.file_system;
- write_chart_dimension(w->name, value);
- w->dcstat.prev.file_system = w->dcstat.curr.file_system;
+ if (w->dcstat.curr.file_system < w->dcstat.prev.file_system) {
+ w->dcstat.prev.file_system = 0;
}
- }
- write_end_chart();
-
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- if (w->dcstat.curr.not_found < w->dcstat.prev.not_found) {
- w->dcstat.prev.not_found = 0;
- }
- value = (collected_number) (!w->dcstat.cache_access) ? 0 :
- (long long)w->dcstat.curr.not_found - (long long)w->dcstat.prev.not_found;
- write_chart_dimension(w->name, value);
- w->dcstat.prev.not_found = w->dcstat.curr.not_found;
+ value = (collected_number) (!w->dcstat.cache_access) ? 0 :
+ (long long )w->dcstat.curr.file_system - (long long)w->dcstat.prev.file_system;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_not_cache");
+ write_chart_dimension("files", value);
+ ebpf_write_end_chart();
+ w->dcstat.prev.file_system = w->dcstat.curr.file_system;
+
+ if (w->dcstat.curr.not_found < w->dcstat.prev.not_found) {
+ w->dcstat.prev.not_found = 0;
}
+ value = (collected_number) (!w->dcstat.cache_access) ? 0 :
+ (long long)w->dcstat.curr.not_found - (long long)w->dcstat.prev.not_found;
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_not_found");
+ write_chart_dimension("files", value);
+ ebpf_write_end_chart();
+ w->dcstat.prev.not_found = w->dcstat.curr.not_found;
}
- write_end_chart();
}
/**
@@ -898,24 +939,28 @@ static void ebpf_create_specific_dc_charts(char *type, int update_every)
static void ebpf_obsolete_specific_dc_charts(char *type, int update_every)
{
ebpf_write_chart_obsolete(type, NETDATA_DC_HIT_CHART,
+ "",
"Percentage of files inside directory cache",
EBPF_COMMON_DIMENSION_PERCENTAGE, NETDATA_DIRECTORY_CACHE_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_HIT_RATIO_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5700, update_every);
ebpf_write_chart_obsolete(type, NETDATA_DC_REFERENCE_CHART,
+ "",
"Count file access",
EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_REFERENCE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5701, update_every);
ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART,
+ "",
"Files not present inside directory cache",
EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_CACHE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5702, update_every);
ebpf_write_chart_obsolete(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART,
+ "",
"Files not found",
EBPF_COMMON_DIMENSION_FILES, NETDATA_DIRECTORY_CACHE_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_DC_NOT_FOUND_CONTEXT,
@@ -1029,23 +1074,23 @@ static void ebpf_send_systemd_dc_charts()
{
collected_number value;
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_HIT_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long) ect->publish_dc.ratio);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REFERENCE_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long) ect->publish_dc.cache_access);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_CACHE_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
value = (collected_number) (!ect->publish_dc.cache_access) ? 0 :
@@ -1055,9 +1100,9 @@ static void ebpf_send_systemd_dc_charts()
write_chart_dimension(ect->name, (long long) value);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_DC_REQUEST_NOT_FOUND_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
value = (collected_number) (!ect->publish_dc.cache_access) ? 0 :
@@ -1068,7 +1113,7 @@ static void ebpf_send_systemd_dc_charts()
write_chart_dimension(ect->name, (long long) value);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1080,31 +1125,31 @@ static void ebpf_send_systemd_dc_charts()
static void ebpf_send_specific_dc_data(char *type, netdata_publish_dcstat_t *pdc)
{
collected_number value;
- write_begin_chart(type, NETDATA_DC_HIT_CHART);
+ ebpf_write_begin_chart(type, NETDATA_DC_HIT_CHART, "");
write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_RATIO].name,
(long long) pdc->ratio);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_DC_REFERENCE_CHART);
+ ebpf_write_begin_chart(type, NETDATA_DC_REFERENCE_CHART, "");
write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_REFERENCE].name,
(long long) pdc->cache_access);
- write_end_chart();
+ ebpf_write_end_chart();
value = (collected_number) (!pdc->cache_access) ? 0 :
(long long )pdc->curr.file_system - (long long)pdc->prev.file_system;
pdc->prev.file_system = pdc->curr.file_system;
- write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART);
+ ebpf_write_begin_chart(type, NETDATA_DC_REQUEST_NOT_CACHE_CHART, "");
write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_SLOW].name, (long long) value);
- write_end_chart();
+ ebpf_write_end_chart();
value = (collected_number) (!pdc->cache_access) ? 0 :
(long long)pdc->curr.not_found - (long long)pdc->prev.not_found;
pdc->prev.not_found = pdc->curr.not_found;
- write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART);
+ ebpf_write_begin_chart(type, NETDATA_DC_REQUEST_NOT_FOUND_CHART, "");
write_chart_dimension(dcstat_counter_publish_aggregated[NETDATA_DCSTAT_IDX_MISS].name, (long long) value);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1169,10 +1214,10 @@ static void dcstat_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -1311,7 +1356,7 @@ static int ebpf_dcstat_load_bpf(ebpf_module_t *em)
#endif
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_dcstat.h b/collectors/ebpf.plugin/ebpf_dcstat.h
index 845b65908c2700..4d6aff12e8fa63 100644
--- a/collectors/ebpf.plugin/ebpf_dcstat.h
+++ b/collectors/ebpf.plugin/ebpf_dcstat.h
@@ -13,7 +13,7 @@
#define NETDATA_DC_REQUEST_NOT_CACHE_CHART "dc_not_cache"
#define NETDATA_DC_REQUEST_NOT_FOUND_CHART "dc_not_found"
-#define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache (eBPF)"
+#define NETDATA_DIRECTORY_CACHE_SUBMENU "directory cache"
// configuration file
#define NETDATA_DIRECTORY_DCSTAT_CONFIG_FILE "dcstat.conf"
diff --git a/collectors/ebpf.plugin/ebpf_disk.c b/collectors/ebpf.plugin/ebpf_disk.c
index 87945627095bad..466c2e3bb47b2f 100644
--- a/collectors/ebpf.plugin/ebpf_disk.c
+++ b/collectors/ebpf.plugin/ebpf_disk.c
@@ -485,6 +485,7 @@ static void ebpf_obsolete_disk_global(ebpf_module_t *em)
if (flags & NETDATA_DISK_CHART_CREATED) {
ebpf_write_chart_obsolete(ned->histogram.name,
ned->family,
+ "",
"Disk latency",
EBPF_COMMON_DIMENSION_CALL,
ned->family,
@@ -655,7 +656,7 @@ static void read_hard_disk_tables(int table, int maps_per_core)
*/
static void ebpf_obsolete_hd_charts(netdata_ebpf_disks_t *w, int update_every)
{
- ebpf_write_chart_obsolete(w->histogram.name, w->family, w->histogram.title, EBPF_COMMON_DIMENSION_CALL,
+ ebpf_write_chart_obsolete(w->histogram.name, w->family, "", w->histogram.title, EBPF_COMMON_DIMENSION_CALL,
w->family, NETDATA_EBPF_CHART_TYPE_STACKED, "disk.latency_io",
w->histogram.order, update_every);
@@ -778,10 +779,10 @@ static void disk_collector(ebpf_module_t *em)
int maps_per_core = em->maps_per_core;
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -873,7 +874,7 @@ static int ebpf_disk_load_bpf(ebpf_module_t *em)
#endif
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_fd.c b/collectors/ebpf.plugin/ebpf_fd.c
index f039647a1df8aa..3c8f30d3eaac75 100644
--- a/collectors/ebpf.plugin/ebpf_fd.c
+++ b/collectors/ebpf.plugin/ebpf_fd.c
@@ -386,45 +386,49 @@ static void ebpf_obsolete_fd_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_FILE_OPEN,
+ "",
"Number of open files",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_FILE_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
NETDATA_CGROUP_FD_OPEN_CONTEXT,
- 20061,
+ 20270,
em->update_every);
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR,
+ "",
"Fails to open files",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_FILE_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT,
- 20062,
+ 20271,
em->update_every);
}
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_FILE_CLOSED,
+ "",
"Files closed",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_FILE_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
NETDATA_CGROUP_FD_CLOSE_CONTEXT,
- 20063,
+ 20272,
em->update_every);
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR,
+ "",
"Fails to close files",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_FILE_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT,
- 20064,
+ 20273,
em->update_every);
}
}
@@ -460,48 +464,60 @@ static inline void ebpf_obsolete_fd_cgroup_charts(ebpf_module_t *em) {
*/
void ebpf_obsolete_fd_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_FILE_OPEN,
- "Number of open files",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20061,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR,
- "Fails to open files",
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_file_open",
+ "Number of open files",
EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
+ NETDATA_APPS_FILE_FDS,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20062,
- em->update_every);
- }
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_FILE_CLOSED,
- "Files closed",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20063,
- em->update_every);
+ "app.ebpf_file_open",
+ 20220,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_file_open_error",
+ "Fails to open files.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_FILE_FDS,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_file_open_error",
+ 20221,
+ update_every);
+ }
- if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR,
- "Fails to close files",
+ w->clean_name,
+ "_ebpf_file_closed",
+ "Files closed.",
EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
+ NETDATA_APPS_FILE_FDS,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20064,
- em->update_every);
+ "app.ebpf_file_closed",
+ 20222,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
+ w->clean_name,
+ "_ebpf_file_close_error",
+ "Fails to close files.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_FILE_FDS,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_fd_close_error",
+ 20223,
+ update_every);
+ }
+ w->charts_created &= ~(1<mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_FILE_OPEN_ERR_COUNT,
+ "",
"Open fails",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_FILE_GROUP,
@@ -802,45 +820,30 @@ void ebpf_fd_send_apps_data(ebpf_module_t *em, struct ebpf_target *root)
{
struct ebpf_target *w;
for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- ebpf_fd_sum_pids(&w->fd, w->root_pid);
- }
- }
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->fd.open_call);
- }
- }
- write_end_chart();
+ ebpf_fd_sum_pids(&w->fd, w->root_pid);
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->fd.open_err);
- }
- }
- write_end_chart();
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_open");
+ write_chart_dimension("calls", w->fd.open_call);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->fd.close_call);
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_open_error");
+ write_chart_dimension("calls", w->fd.open_err);
+ ebpf_write_end_chart();
}
- }
- write_end_chart();
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->fd.close_err);
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_closed");
+ write_chart_dimension("calls", w->fd.close_call);
+ ebpf_write_end_chart();
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_file_close_error");
+ write_chart_dimension("calls", w->fd.close_err);
+ ebpf_write_end_chart();
}
- write_end_chart();
}
}
@@ -933,25 +936,25 @@ static void ebpf_create_specific_fd_charts(char *type, ebpf_module_t *em)
*/
static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em)
{
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "Number of open files",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "", "Number of open files",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5400, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "Fails to open files",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "", "Fails to open files",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_OPEN_ERR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5401, em->update_every);
}
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "Files closed",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "", "Files closed",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5402, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "Fails to close files",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "", "Fails to close files",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_FILE_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_FD_CLOSE_ERR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5403, em->update_every);
@@ -968,24 +971,24 @@ static void ebpf_obsolete_specific_fd_charts(char *type, ebpf_module_t *em)
*/
static void ebpf_send_specific_fd_data(char *type, netdata_fd_stat_t *values, ebpf_module_t *em)
{
- write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN, "");
write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "");
write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_OPEN].name, (long long)values->open_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSED, "");
write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "");
write_chart_dimension(fd_publish_aggregated[NETDATA_FD_SYSCALL_CLOSE].name, (long long)values->close_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
}
@@ -1037,40 +1040,40 @@ static void ebpf_create_systemd_fd_charts(ebpf_module_t *em)
static void ebpf_send_systemd_fd_charts(ebpf_module_t *em)
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_fd.open_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_fd.open_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSED, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_fd.close_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_fd.close_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
}
@@ -1136,10 +1139,10 @@ static void fd_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -1197,44 +1200,77 @@ static void fd_collector(ebpf_module_t *em)
void ebpf_fd_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN,
- "Number of open files",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20061,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_OPEN_ERROR,
- "Fails to open files",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20062,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD);
- }
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_file_open",
+ "Number of open files",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_FILE_FDS,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_file_open",
+ 20220,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_FD);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_file_open_error",
+ "Fails to open files.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_FILE_FDS,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_file_open_error",
+ 20221,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_FD);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSED,
- "Files closed",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20063,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_file_closed",
+ "Files closed.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_FILE_FDS,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_file_closed",
+ 20222,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_FD);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_file_close_error",
+ "Fails to close files.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_FILE_FDS,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_file_close_error",
+ 20223,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_FD);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_CLOSE_ERROR,
- "Fails to close files",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_FILE_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20064,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_FD);
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
@@ -1337,7 +1373,7 @@ static int ebpf_fd_load_bpf(ebpf_module_t *em)
#endif
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_filesystem.c b/collectors/ebpf.plugin/ebpf_filesystem.c
index 2bff738caedd76..b78e6553243946 100644
--- a/collectors/ebpf.plugin/ebpf_filesystem.c
+++ b/collectors/ebpf.plugin/ebpf_filesystem.c
@@ -351,20 +351,22 @@ static void ebpf_obsolete_fs_charts(int update_every)
flags &= ~NETDATA_FILESYSTEM_FLAG_CHART_CREATED;
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hread.name,
+ "",
efp->hread.title,
EBPF_COMMON_DIMENSION_CALL, efp->family_name,
NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hread.order, update_every);
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hwrite.name,
+ "",
efp->hwrite.title,
EBPF_COMMON_DIMENSION_CALL, efp->family_name,
NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hwrite.order, update_every);
- ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, efp->hopen.title,
+ ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hopen.name, "", efp->hopen.title,
EBPF_COMMON_DIMENSION_CALL, efp->family_name,
NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hopen.order, update_every);
- ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name, efp->hadditional.title,
+ ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY, efp->hadditional.name,"", efp->hadditional.title,
EBPF_COMMON_DIMENSION_CALL, efp->family_name,
NULL, NETDATA_EBPF_CHART_TYPE_STACKED, efp->hadditional.order,
update_every);
@@ -390,9 +392,9 @@ static void ebpf_create_fs_charts(int update_every)
ebpf_filesystem_partitions_t *efp = &localfs[i];
uint32_t flags = efp->flags;
if (flags & NETDATA_FILESYSTEM_FLAG_HAS_PARTITION && !(flags & test)) {
- snprintfz(title, 255, "%s latency for each read request.", efp->filesystem);
- snprintfz(family, 63, "%s_latency", efp->family);
- snprintfz(chart_name, 63, "%s_read_latency", efp->filesystem);
+ snprintfz(title, sizeof(title) - 1, "%s latency for each read request.", efp->filesystem);
+ snprintfz(family, sizeof(family) - 1, "%s_latency", efp->family);
+ snprintfz(chart_name, sizeof(chart_name) - 1, "%s_read_latency", efp->filesystem);
efp->hread.name = strdupz(chart_name);
efp->hread.title = strdupz(title);
efp->hread.ctx = NULL;
@@ -408,8 +410,8 @@ static void ebpf_create_fs_charts(int update_every)
update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM);
order++;
- snprintfz(title, 255, "%s latency for each write request.", efp->filesystem);
- snprintfz(chart_name, 63, "%s_write_latency", efp->filesystem);
+ snprintfz(title, sizeof(title) - 1, "%s latency for each write request.", efp->filesystem);
+ snprintfz(chart_name, sizeof(chart_name) - 1, "%s_write_latency", efp->filesystem);
efp->hwrite.name = strdupz(chart_name);
efp->hwrite.title = strdupz(title);
efp->hwrite.ctx = NULL;
@@ -423,8 +425,8 @@ static void ebpf_create_fs_charts(int update_every)
update_every, NETDATA_EBPF_MODULE_NAME_FILESYSTEM);
order++;
- snprintfz(title, 255, "%s latency for each open request.", efp->filesystem);
- snprintfz(chart_name, 63, "%s_open_latency", efp->filesystem);
+ snprintfz(title, sizeof(title) - 1, "%s latency for each open request.", efp->filesystem);
+ snprintfz(chart_name, sizeof(chart_name) - 1, "%s_open_latency", efp->filesystem);
efp->hopen.name = strdupz(chart_name);
efp->hopen.title = strdupz(title);
efp->hopen.ctx = NULL;
@@ -439,9 +441,9 @@ static void ebpf_create_fs_charts(int update_every)
order++;
char *type = (efp->flags & NETDATA_FILESYSTEM_ATTR_CHARTS) ? "attribute" : "sync";
- snprintfz(title, 255, "%s latency for each %s request.", efp->filesystem, type);
- snprintfz(chart_name, 63, "%s_%s_latency", efp->filesystem, type);
- snprintfz(ctx, 63, "filesystem.%s_latency", type);
+ snprintfz(title, sizeof(title) - 1, "%s latency for each %s request.", efp->filesystem, type);
+ snprintfz(chart_name, sizeof(chart_name) - 1, "%s_%s_latency", efp->filesystem, type);
+ snprintfz(ctx, sizeof(ctx) - 1, "filesystem.%s_latency", type);
efp->hadditional.name = strdupz(chart_name);
efp->hadditional.title = strdupz(title);
efp->hadditional.ctx = strdupz(ctx);
@@ -470,12 +472,12 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em)
{
pthread_mutex_lock(&lock);
int i;
- const char *saved_name = em->thread_name;
+ const char *saved_name = em->info.thread_name;
uint64_t kernels = em->kernels;
for (i = 0; localfs[i].filesystem; i++) {
ebpf_filesystem_partitions_t *efp = &localfs[i];
if (!efp->probe_links && efp->flags & NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM) {
- em->thread_name = efp->filesystem;
+ em->info.thread_name = efp->filesystem;
em->kernels = efp->kernels;
em->maps = efp->fs_maps;
#ifdef LIBBPF_MAJOR_VERSION
@@ -484,7 +486,7 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em)
if (em->load & EBPF_LOAD_LEGACY) {
efp->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &efp->objects);
if (!efp->probe_links) {
- em->thread_name = saved_name;
+ em->info.thread_name = saved_name;
em->kernels = kernels;
em->maps = NULL;
pthread_mutex_unlock(&lock);
@@ -495,7 +497,7 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em)
else {
efp->fs_obj = filesystem_bpf__open();
if (!efp->fs_obj) {
- em->thread_name = saved_name;
+ em->info.thread_name = saved_name;
em->kernels = kernels;
return -1;
} else {
@@ -515,7 +517,7 @@ int ebpf_filesystem_initialize_ebpf_data(ebpf_module_t *em)
}
efp->flags &= ~NETDATA_FILESYSTEM_LOAD_EBPF_PROGRAM;
}
- em->thread_name = saved_name;
+ em->info.thread_name = saved_name;
pthread_mutex_unlock(&lock);
em->kernels = kernels;
em->maps = NULL;
@@ -671,6 +673,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
efp->hread.name,
+ "",
efp->hread.title,
EBPF_COMMON_DIMENSION_CALL,
efp->family_name,
@@ -681,6 +684,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
efp->hwrite.name,
+ "",
efp->hwrite.title,
EBPF_COMMON_DIMENSION_CALL,
efp->family_name,
@@ -691,6 +695,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
efp->hopen.name,
+ "",
efp->hopen.title,
EBPF_COMMON_DIMENSION_CALL,
efp->family_name,
@@ -701,6 +706,7 @@ static void ebpf_obsolete_filesystem_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
efp->hadditional.name,
+ "",
efp->hadditional.title,
EBPF_COMMON_DIMENSION_CALL,
efp->family_name,
@@ -909,10 +915,10 @@ static void filesystem_collector(ebpf_module_t *em)
int counter = update_every - 1;
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
diff --git a/collectors/ebpf.plugin/ebpf_functions.c b/collectors/ebpf.plugin/ebpf_functions.c
index 7a43692bc051d7..6a481ad64f2839 100644
--- a/collectors/ebpf.plugin/ebpf_functions.c
+++ b/collectors/ebpf.plugin/ebpf_functions.c
@@ -3,6 +3,40 @@
#include "ebpf.h"
#include "ebpf_functions.h"
+/*****************************************************************
+ * EBPF FUNCTION COMMON
+ *****************************************************************/
+
+/**
+ * Function Start thread
+ *
+ * Start a specific thread after user request.
+ *
+ * @param em The structure with thread information
+ * @param period
+ * @return
+ */
+static int ebpf_function_start_thread(ebpf_module_t *em, int period)
+{
+ struct netdata_static_thread *st = em->thread;
+ // another request for thread that already ran, cleanup and restart
+ if (st->thread)
+ freez(st->thread);
+
+ if (period <= 0)
+ period = EBPF_DEFAULT_LIFETIME;
+
+ st->thread = mallocz(sizeof(netdata_thread_t));
+ em->enabled = NETDATA_THREAD_EBPF_FUNCTION_RUNNING;
+ em->lifetime = period;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ netdata_log_info("Starting thread %s with lifetime = %d", em->info.thread_name, period);
+#endif
+
+ return netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, em);
+}
+
/*****************************************************************
* EBPF SELECT MODULE
*****************************************************************/
@@ -13,17 +47,17 @@
* @param thread_name name of the thread we are looking for.
*
* @return it returns a pointer for the module that has thread_name on success or NULL otherwise.
- */
ebpf_module_t *ebpf_functions_select_module(const char *thread_name) {
int i;
for (i = 0; i < EBPF_MODULE_FUNCTION_IDX; i++) {
- if (strcmp(ebpf_modules[i].thread_name, thread_name) == 0) {
+ if (strcmp(ebpf_modules[i].info.thread_name, thread_name) == 0) {
return &ebpf_modules[i];
}
}
return NULL;
}
+ */
/*****************************************************************
* EBPF HELP FUNCTIONS
@@ -35,11 +69,9 @@ ebpf_module_t *ebpf_functions_select_module(const char *thread_name) {
* Shows help with all options accepted by thread function.
*
* @param transaction the transaction id that Netdata sent for this function execution
-*/
static void ebpf_function_thread_manipulation_help(const char *transaction) {
- pthread_mutex_lock(&lock);
- pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600);
- fprintf(stdout, "%s",
+ BUFFER *wb = buffer_create(0, NULL);
+ buffer_sprintf(wb, "%s",
"ebpf.plugin / thread\n"
"\n"
"Function `thread` allows user to control eBPF threads.\n"
@@ -57,13 +89,13 @@ static void ebpf_function_thread_manipulation_help(const char *transaction) {
" Disable a sp.\n"
"\n"
"Filters can be combined. Each filter can be given only one time.\n"
- "Process thread is not controlled by functions until we finish the creation of functions per thread..\n"
);
- pluginsd_function_result_end_to_stdout();
- fflush(stdout);
- pthread_mutex_unlock(&lock);
-}
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb);
+
+ buffer_free(wb);
+}
+*/
/*****************************************************************
* EBPF ERROR FUNCTIONS
@@ -79,12 +111,7 @@ static void ebpf_function_thread_manipulation_help(const char *transaction) {
* @param msg the error message
*/
static void ebpf_function_error(const char *transaction, int code, const char *msg) {
- char buffer[PLUGINSD_LINE_MAX + 1];
- json_escape_string(buffer, msg, PLUGINSD_LINE_MAX);
-
- pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec());
- fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer);
- pluginsd_function_result_end_to_stdout();
+ pluginsd_function_json_error_to_stdout(transaction, code, msg);
}
/*****************************************************************
@@ -92,7 +119,7 @@ static void ebpf_function_error(const char *transaction, int code, const char *m
*****************************************************************/
/**
- * Function enable
+ * Function: thread
*
* Enable a specific thread.
*
@@ -102,7 +129,6 @@ static void ebpf_function_error(const char *transaction, int code, const char *m
* @param line_max Number of arguments given
* @param timeout The function timeout
* @param em The structure with thread information
- */
static void ebpf_function_thread_manipulation(const char *transaction,
char *function __maybe_unused,
char *line_buffer __maybe_unused,
@@ -134,34 +160,22 @@ static void ebpf_function_thread_manipulation(const char *transaction,
lem = ebpf_functions_select_module(thread_name);
if (!lem) {
- snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name);
+ snprintfz(message, sizeof(message) - 1, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name);
ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message);
return;
}
pthread_mutex_lock(&ebpf_exit_cleanup);
if (lem->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING) {
- struct netdata_static_thread *st = lem->thread;
// Load configuration again
ebpf_update_module(lem, default_btf, running_on_kernel, isrh);
- // another request for thread that already ran, cleanup and restart
- if (st->thread)
- freez(st->thread);
-
- if (period <= 0)
- period = EBPF_DEFAULT_LIFETIME;
-
- st->thread = mallocz(sizeof(netdata_thread_t));
- lem->enabled = NETDATA_THREAD_EBPF_FUNCTION_RUNNING;
- lem->lifetime = period;
-
-#ifdef NETDATA_INTERNAL_CHECKS
- netdata_log_info("Starting thread %s with lifetime = %d", thread_name, period);
-#endif
-
- netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT,
- st->start_routine, lem);
+ if (ebpf_function_start_thread(lem, period)) {
+ ebpf_function_error(transaction,
+ HTTP_RESP_INTERNAL_SERVER_ERROR,
+ "Cannot start thread.");
+ return;
+ }
} else {
lem->running_time = 0;
if (period > 0) // user is modifying period to run
@@ -175,7 +189,7 @@ static void ebpf_function_thread_manipulation(const char *transaction,
const char *name = &keyword[sizeof(EBPF_THREADS_DISABLE_CATEGORY) - 1];
lem = ebpf_functions_select_module(name);
if (!lem) {
- snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name);
+ snprintfz(message, sizeof(message) - 1, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name);
ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message);
return;
}
@@ -191,7 +205,7 @@ static void ebpf_function_thread_manipulation(const char *transaction,
const char *name = &keyword[sizeof(EBPF_THREADS_SELECT_THREAD) - 1];
lem = ebpf_functions_select_module(name);
if (!lem) {
- snprintfz(message, 511, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name);
+ snprintfz(message, sizeof(message) - 1, "%s%s", EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND, name);
ebpf_function_error(transaction, HTTP_RESP_NOT_FOUND, message);
return;
}
@@ -226,10 +240,10 @@ static void ebpf_function_thread_manipulation(const char *transaction,
// THE ORDER SHOULD BE THE SAME WITH THE FIELDS!
// thread name
- buffer_json_add_array_item_string(wb, wem->thread_name);
+ buffer_json_add_array_item_string(wb, wem->info.thread_name);
// description
- buffer_json_add_array_item_string(wb, wem->thread_description);
+ buffer_json_add_array_item_string(wb, wem->info.thread_description);
// Either it is not running or received a disabled signal and it is stopping.
if (wem->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING ||
(!wem->lifetime && (int)wem->running_time == wem->update_every)) {
@@ -267,7 +281,7 @@ static void ebpf_function_thread_manipulation(const char *transaction,
RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
RRDF_FIELD_FILTER_MULTISELECT,
- RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_UNIQUE_KEY, NULL);
buffer_rrdf_table_add_field(wb, fields_id++, "Description", "Thread Desc", RRDF_FIELD_TYPE_STRING,
RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
@@ -349,19 +363,697 @@ static void ebpf_function_thread_manipulation(const char *transaction,
buffer_json_finalize(wb);
// Lock necessary to avoid race condition
- pthread_mutex_lock(&lock);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires, wb);
+
+ buffer_free(wb);
+}
+ */
+
+/*****************************************************************
+ * EBPF SOCKET FUNCTION
+ *****************************************************************/
+
+/**
+ * Thread Help
+ *
+ * Shows help with all options accepted by thread function.
+ *
+ * @param transaction the transaction id that Netdata sent for this function execution
+*/
+static void ebpf_function_socket_help(const char *transaction) {
+ pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600);
+ fprintf(stdout, "%s",
+ "ebpf.plugin / socket\n"
+ "\n"
+ "Function `socket` display information for all open sockets during ebpf.plugin runtime.\n"
+ "During thread runtime the plugin is always collecting data, but when an option is modified, the plugin\n"
+ "resets completely the previous table and can show a clean data for the first request before to bring the\n"
+ "modified request.\n"
+ "\n"
+ "The following filters are supported:\n"
+ "\n"
+ " family:FAMILY\n"
+ " Shows information for the FAMILY specified. Option accepts IPV4, IPV6 and all, that is the default.\n"
+ "\n"
+ " period:PERIOD\n"
+ " Enable socket to run a specific PERIOD in seconds. When PERIOD is not\n"
+ " specified plugin will use the default 300 seconds\n"
+ "\n"
+ " resolve:BOOL\n"
+ " Resolve service name, default value is YES.\n"
+ "\n"
+ " range:CIDR\n"
+ " Show sockets that have only a specific destination. Default all addresses.\n"
+ "\n"
+ " port:range\n"
+ " Show sockets that have only a specific destination.\n"
+ "\n"
+ " reset\n"
+ " Send a reset to collector. When a collector receives this command, it uses everything defined in configuration file.\n"
+ "\n"
+ " interfaces\n"
+ " When the collector receives this command, it read all available interfaces on host.\n"
+ "\n"
+ "Filters can be combined. Each filter can be given only one time. Default all ports\n"
+ );
+ pluginsd_function_result_end_to_stdout();
+ fflush(stdout);
+}
+
+/**
+ * Fill Fake socket
+ *
+ * Fill socket with an invalid request.
+ *
+ * @param fake_values is the structure where we are storing the value.
+ */
+static inline void ebpf_socket_fill_fake_socket(netdata_socket_plus_t *fake_values)
+{
+ snprintfz(fake_values->socket_string.src_ip, INET6_ADDRSTRLEN, "%s", "127.0.0.1");
+ snprintfz(fake_values->socket_string.dst_ip, INET6_ADDRSTRLEN, "%s", "127.0.0.1");
+ fake_values->pid = getpid();
+ //fake_values->socket_string.src_port = 0;
+ fake_values->socket_string.dst_port[0] = 0;
+ snprintfz(fake_values->socket_string.dst_ip, NI_MAXSERV, "%s", "none");
+ fake_values->data.family = AF_INET;
+ fake_values->data.protocol = AF_UNSPEC;
+}
+
+/**
+ * Fill function buffer
+ *
+ * Fill buffer with data to be shown on cloud.
+ *
+ * @param wb buffer where we store data.
+ * @param values data read from hash table
+ * @param name the process name
+ */
+static void ebpf_fill_function_buffer(BUFFER *wb, netdata_socket_plus_t *values, char *name)
+{
+ buffer_json_add_array_item_array(wb);
+
+ // IMPORTANT!
+ // THE ORDER SHOULD BE THE SAME WITH THE FIELDS!
+
+ // PID
+ buffer_json_add_array_item_uint64(wb, (uint64_t)values->pid);
+
+ // NAME
+ buffer_json_add_array_item_string(wb, (name) ? name : "not identified");
+
+ // Origin
+ buffer_json_add_array_item_string(wb, (values->data.external_origin) ? "incoming" : "outgoing");
+
+ // Source IP
+ buffer_json_add_array_item_string(wb, values->socket_string.src_ip);
+
+ // SRC Port
+ //buffer_json_add_array_item_uint64(wb, (uint64_t) values->socket_string.src_port);
+
+ // Destination IP
+ buffer_json_add_array_item_string(wb, values->socket_string.dst_ip);
+
+ // DST Port
+ buffer_json_add_array_item_string(wb, values->socket_string.dst_port);
+
+ uint64_t connections;
+ if (values->data.protocol == IPPROTO_TCP) {
+ // Protocol
+ buffer_json_add_array_item_string(wb, "TCP");
+
+ // Bytes received
+ buffer_json_add_array_item_uint64(wb, (uint64_t) values->data.tcp.tcp_bytes_received);
+
+ // Bytes sent
+ buffer_json_add_array_item_uint64(wb, (uint64_t) values->data.tcp.tcp_bytes_sent);
+
+ // Connections
+ connections = values->data.tcp.ipv4_connect + values->data.tcp.ipv6_connect;
+ } else if (values->data.protocol == IPPROTO_UDP) {
+ // Protocol
+ buffer_json_add_array_item_string(wb, "UDP");
+
+ // Bytes received
+ buffer_json_add_array_item_uint64(wb, (uint64_t) values->data.udp.udp_bytes_received);
+
+ // Bytes sent
+ buffer_json_add_array_item_uint64(wb, (uint64_t) values->data.udp.udp_bytes_sent);
+
+ // Connections
+ connections = values->data.udp.call_udp_sent + values->data.udp.call_udp_received;
+ } else {
+ // Protocol
+ buffer_json_add_array_item_string(wb, "UNSPEC");
+
+ // Bytes received
+ buffer_json_add_array_item_uint64(wb, 0);
+
+ // Bytes sent
+ buffer_json_add_array_item_uint64(wb, 0);
+
+ connections = 1;
+ }
+
+ // Connections
+ if (values->flags & NETDATA_SOCKET_FLAGS_ALREADY_OPEN) {
+ connections++;
+ } else if (!connections) {
+ // If no connections, this means that we lost when connection was opened
+ values->flags |= NETDATA_SOCKET_FLAGS_ALREADY_OPEN;
+ connections++;
+ }
+ buffer_json_add_array_item_uint64(wb, connections);
+
+ buffer_json_array_close(wb);
+}
+
+/**
+ * Clean Judy array unsafe
+ *
+ * Clean all Judy Array allocated to show table when a function is called.
+ * Before to call this function it is necessary to lock `ebpf_judy_pid.index.rw_spinlock`.
+ **/
+static void ebpf_socket_clean_judy_array_unsafe()
+{
+ if (!ebpf_judy_pid.index.JudyLArray)
+ return;
+
+ Pvoid_t *pid_value, *socket_value;
+ Word_t local_pid = 0, local_socket = 0;
+ bool first_pid = true, first_socket = true;
+ while ((pid_value = JudyLFirstThenNext(ebpf_judy_pid.index.JudyLArray, &local_pid, &first_pid))) {
+ netdata_ebpf_judy_pid_stats_t *pid_ptr = (netdata_ebpf_judy_pid_stats_t *)*pid_value;
+ rw_spinlock_write_lock(&pid_ptr->socket_stats.rw_spinlock);
+ if (pid_ptr->socket_stats.JudyLArray) {
+ while ((socket_value = JudyLFirstThenNext(pid_ptr->socket_stats.JudyLArray, &local_socket, &first_socket))) {
+ netdata_socket_plus_t *socket_clean = *socket_value;
+ aral_freez(aral_socket_table, socket_clean);
+ }
+ JudyLFreeArray(&pid_ptr->socket_stats.JudyLArray, PJE0);
+ pid_ptr->socket_stats.JudyLArray = NULL;
+ }
+ rw_spinlock_write_unlock(&pid_ptr->socket_stats.rw_spinlock);
+ }
+}
+
+/**
+ * Fill function buffer unsafe
+ *
+ * Fill the function buffer with socket information. Before to call this function it is necessary to lock
+ * ebpf_judy_pid.index.rw_spinlock
+ *
+ * @param buf buffer used to store data to be shown by function.
+ *
+ * @return it returns 0 on success and -1 otherwise.
+ */
+static void ebpf_socket_fill_function_buffer_unsafe(BUFFER *buf)
+{
+ int counter = 0;
+
+ Pvoid_t *pid_value, *socket_value;
+ Word_t local_pid = 0;
+ bool first_pid = true;
+ while ((pid_value = JudyLFirstThenNext(ebpf_judy_pid.index.JudyLArray, &local_pid, &first_pid))) {
+ netdata_ebpf_judy_pid_stats_t *pid_ptr = (netdata_ebpf_judy_pid_stats_t *)*pid_value;
+ bool first_socket = true;
+ Word_t local_timestamp = 0;
+ rw_spinlock_read_lock(&pid_ptr->socket_stats.rw_spinlock);
+ if (pid_ptr->socket_stats.JudyLArray) {
+ while ((socket_value = JudyLFirstThenNext(pid_ptr->socket_stats.JudyLArray, &local_timestamp, &first_socket))) {
+ netdata_socket_plus_t *values = (netdata_socket_plus_t *)*socket_value;
+ ebpf_fill_function_buffer(buf, values, pid_ptr->cmdline);
+ }
+ counter++;
+ }
+ rw_spinlock_read_unlock(&pid_ptr->socket_stats.rw_spinlock);
+ }
+
+ if (!counter) {
+ netdata_socket_plus_t fake_values = { };
+ ebpf_socket_fill_fake_socket(&fake_values);
+ ebpf_fill_function_buffer(buf, &fake_values, NULL);
+ }
+}
+
+/**
+ * Socket read hash
+ *
+ * This is the thread callback.
+ * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket.
+ *
+ * @param buf the buffer to store data;
+ * @param em the module main structure.
+ *
+ * @return It always returns NULL.
+ */
+void ebpf_socket_read_open_connections(BUFFER *buf, struct ebpf_module *em)
+{
+ // thread was not initialized or Array was reset
+ rw_spinlock_read_lock(&ebpf_judy_pid.index.rw_spinlock);
+ if (!em->maps || (em->maps[NETDATA_SOCKET_OPEN_SOCKET].map_fd == ND_EBPF_MAP_FD_NOT_INITIALIZED) ||
+ !ebpf_judy_pid.index.JudyLArray){
+ netdata_socket_plus_t fake_values = { };
+
+ ebpf_socket_fill_fake_socket(&fake_values);
+
+ ebpf_fill_function_buffer(buf, &fake_values, NULL);
+ rw_spinlock_read_unlock(&ebpf_judy_pid.index.rw_spinlock);
+ return;
+ }
+
+ rw_spinlock_read_lock(&network_viewer_opt.rw_spinlock);
+ ebpf_socket_fill_function_buffer_unsafe(buf);
+ rw_spinlock_read_unlock(&network_viewer_opt.rw_spinlock);
+ rw_spinlock_read_unlock(&ebpf_judy_pid.index.rw_spinlock);
+}
+
+/**
+ * Function: Socket
+ *
+ * Show information for sockets stored in hash tables.
+ *
+ * @param transaction the transaction id that Netdata sent for this function execution
+ * @param function function name and arguments given to thread.
+ * @param timeout The function timeout
+ * @param cancelled Variable used to store function status.
+ */
+static void ebpf_function_socket_manipulation(const char *transaction,
+ char *function __maybe_unused,
+ int timeout __maybe_unused,
+ bool *cancelled __maybe_unused)
+{
+ UNUSED(timeout);
+ ebpf_module_t *em = &ebpf_modules[EBPF_MODULE_SOCKET_IDX];
+
+ char *words[PLUGINSD_MAX_WORDS] = {NULL};
+ size_t num_words = quoted_strings_splitter_pluginsd(function, words, PLUGINSD_MAX_WORDS);
+ const char *name;
+ int period = -1;
+ rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock);
+ network_viewer_opt.enabled = CONFIG_BOOLEAN_YES;
+ uint32_t previous;
+
+ for (int i = 1; i < PLUGINSD_MAX_WORDS; i++) {
+ const char *keyword = get_word(words, num_words, i);
+ if (!keyword)
+ break;
+
+ if (strncmp(keyword, EBPF_FUNCTION_SOCKET_FAMILY, sizeof(EBPF_FUNCTION_SOCKET_FAMILY) - 1) == 0) {
+ name = &keyword[sizeof(EBPF_FUNCTION_SOCKET_FAMILY) - 1];
+ previous = network_viewer_opt.family;
+ uint32_t family = AF_UNSPEC;
+ if (!strcmp(name, "IPV4"))
+ family = AF_INET;
+ else if (!strcmp(name, "IPV6"))
+ family = AF_INET6;
+
+ if (family != previous) {
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ network_viewer_opt.family = family;
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
+ ebpf_socket_clean_judy_array_unsafe();
+ }
+ } else if (strncmp(keyword, EBPF_FUNCTION_SOCKET_PERIOD, sizeof(EBPF_FUNCTION_SOCKET_PERIOD) - 1) == 0) {
+ name = &keyword[sizeof(EBPF_FUNCTION_SOCKET_PERIOD) - 1];
+ pthread_mutex_lock(&ebpf_exit_cleanup);
+ period = str2i(name);
+ if (period > 0) {
+ em->lifetime = period;
+ } else
+ em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME;
+
+#ifdef NETDATA_DEV_MODE
+ collector_info("Lifetime modified for %u", em->lifetime);
+#endif
+ pthread_mutex_unlock(&ebpf_exit_cleanup);
+ } else if (strncmp(keyword, EBPF_FUNCTION_SOCKET_RESOLVE, sizeof(EBPF_FUNCTION_SOCKET_RESOLVE) - 1) == 0) {
+ previous = network_viewer_opt.service_resolution_enabled;
+ uint32_t resolution;
+ name = &keyword[sizeof(EBPF_FUNCTION_SOCKET_RESOLVE) - 1];
+ resolution = (!strcasecmp(name, "YES")) ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO;
+
+ if (previous != resolution) {
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ network_viewer_opt.service_resolution_enabled = resolution;
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
+
+ ebpf_socket_clean_judy_array_unsafe();
+ }
+ } else if (strncmp(keyword, EBPF_FUNCTION_SOCKET_RANGE, sizeof(EBPF_FUNCTION_SOCKET_RANGE) - 1) == 0) {
+ name = &keyword[sizeof(EBPF_FUNCTION_SOCKET_RANGE) - 1];
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ ebpf_clean_ip_structure(&network_viewer_opt.included_ips);
+ ebpf_clean_ip_structure(&network_viewer_opt.excluded_ips);
+ ebpf_parse_ips_unsafe((char *)name);
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
+
+ ebpf_socket_clean_judy_array_unsafe();
+ } else if (strncmp(keyword, EBPF_FUNCTION_SOCKET_PORT, sizeof(EBPF_FUNCTION_SOCKET_PORT) - 1) == 0) {
+ name = &keyword[sizeof(EBPF_FUNCTION_SOCKET_PORT) - 1];
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ ebpf_clean_port_structure(&network_viewer_opt.included_port);
+ ebpf_clean_port_structure(&network_viewer_opt.excluded_port);
+ ebpf_parse_ports((char *)name);
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
+
+ ebpf_socket_clean_judy_array_unsafe();
+ } else if (strncmp(keyword, EBPF_FUNCTION_SOCKET_RESET, sizeof(EBPF_FUNCTION_SOCKET_RESET) - 1) == 0) {
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ ebpf_clean_port_structure(&network_viewer_opt.included_port);
+ ebpf_clean_port_structure(&network_viewer_opt.excluded_port);
+
+ ebpf_clean_ip_structure(&network_viewer_opt.included_ips);
+ ebpf_clean_ip_structure(&network_viewer_opt.excluded_ips);
+ ebpf_clean_ip_structure(&network_viewer_opt.ipv4_local_ip);
+ ebpf_clean_ip_structure(&network_viewer_opt.ipv6_local_ip);
+
+ parse_network_viewer_section(&socket_config);
+ ebpf_read_local_addresses_unsafe();
+ network_viewer_opt.enabled = CONFIG_BOOLEAN_YES;
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
+ } else if (strncmp(keyword, EBPF_FUNCTION_SOCKET_INTERFACES, sizeof(EBPF_FUNCTION_SOCKET_INTERFACES) - 1) == 0) {
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ ebpf_read_local_addresses_unsafe();
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
+ } else if (strncmp(keyword, "help", 4) == 0) {
+ ebpf_function_socket_help(transaction);
+ rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock);
+ return;
+ }
+ }
+ rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock);
+
+ pthread_mutex_lock(&ebpf_exit_cleanup);
+ if (em->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING) {
+ // Cleanup when we already had a thread running
+ rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock);
+ ebpf_socket_clean_judy_array_unsafe();
+ rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock);
+
+ if (ebpf_function_start_thread(em, period)) {
+ ebpf_function_error(transaction,
+ HTTP_RESP_INTERNAL_SERVER_ERROR,
+ "Cannot start thread.");
+ pthread_mutex_unlock(&ebpf_exit_cleanup);
+ return;
+ }
+ } else {
+ if (period < 0 && em->lifetime < EBPF_NON_FUNCTION_LIFE_TIME) {
+ em->lifetime = EBPF_NON_FUNCTION_LIFE_TIME;
+ }
+ }
+ pthread_mutex_unlock(&ebpf_exit_cleanup);
+
+ time_t expires = now_realtime_sec() + em->update_every;
+
+ BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL);
+ buffer_json_initialize(wb, "\"", "\"", 0, true, false);
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", em->update_every);
+ buffer_json_member_add_string(wb, "help", EBPF_PLUGIN_SOCKET_FUNCTION_DESCRIPTION);
+
+ // Collect data
+ buffer_json_member_add_array(wb, "data");
+ ebpf_socket_read_open_connections(wb, em);
+ buffer_json_array_close(wb); // data
+
+ buffer_json_member_add_object(wb, "columns");
+ {
+ int fields_id = 0;
+
+ // IMPORTANT!
+ // THE ORDER SHOULD BE THE SAME WITH THE VALUES!
+ buffer_rrdf_table_add_field(wb, fields_id++, "PID", "Process ID", RRDF_FIELD_TYPE_INTEGER,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Process Name", "Process Name", RRDF_FIELD_TYPE_STRING,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Origin", "The connection origin.", RRDF_FIELD_TYPE_STRING,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Request from", "Request from IP", RRDF_FIELD_TYPE_STRING,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+
+ /*
+ buffer_rrdf_table_add_field(wb, fields_id++, "SRC PORT", "Source Port", RRDF_FIELD_TYPE_INTEGER,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+ */
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Destination IP", "Destination IP", RRDF_FIELD_TYPE_STRING,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Destination Port", "Destination Port", RRDF_FIELD_TYPE_STRING,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Protocol", "Communication protocol", RRDF_FIELD_TYPE_STRING,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY, NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Incoming Bandwidth", "Bytes received.", RRDF_FIELD_TYPE_INTEGER,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id++, "Outgoing Bandwidth", "Bytes sent.", RRDF_FIELD_TYPE_INTEGER,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, fields_id, "Connections", "Number of calls to tcp_vX_connections and udp_sendmsg, where X is the protocol version.", RRDF_FIELD_TYPE_INTEGER,
+ RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, 0, NULL, NAN,
+ RRDF_FIELD_SORT_ASCENDING, NULL, RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+ }
+ buffer_json_object_close(wb); // columns
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ // OutBound Connections
+ buffer_json_member_add_object(wb, "IPInboundConn");
+ {
+ buffer_json_member_add_string(wb, "name", "TCP Inbound Connection");
+ buffer_json_member_add_string(wb, "type", "line");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "connected_tcp");
+ buffer_json_add_array_item_string(wb, "connected_udp");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // OutBound Connections
+ buffer_json_member_add_object(wb, "IPTCPOutboundConn");
+ {
+ buffer_json_member_add_string(wb, "name", "TCP Outbound Connection");
+ buffer_json_member_add_string(wb, "type", "line");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "connected_V4");
+ buffer_json_add_array_item_string(wb, "connected_V6");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // TCP Functions
+ buffer_json_member_add_object(wb, "TCPFunctions");
+ {
+ buffer_json_member_add_string(wb, "name", "TCPFunctions");
+ buffer_json_member_add_string(wb, "type", "line");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "received");
+ buffer_json_add_array_item_string(wb, "sent");
+ buffer_json_add_array_item_string(wb, "close");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // TCP Bandwidth
+ buffer_json_member_add_object(wb, "TCPBandwidth");
+ {
+ buffer_json_member_add_string(wb, "name", "TCPBandwidth");
+ buffer_json_member_add_string(wb, "type", "line");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "received");
+ buffer_json_add_array_item_string(wb, "sent");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // UDP Functions
+ buffer_json_member_add_object(wb, "UDPFunctions");
+ {
+ buffer_json_member_add_string(wb, "name", "UDPFunctions");
+ buffer_json_member_add_string(wb, "type", "line");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "received");
+ buffer_json_add_array_item_string(wb, "sent");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // UDP Bandwidth
+ buffer_json_member_add_object(wb, "UDPBandwidth");
+ {
+ buffer_json_member_add_string(wb, "name", "UDPBandwidth");
+ buffer_json_member_add_string(wb, "type", "line");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "received");
+ buffer_json_add_array_item_string(wb, "sent");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_string(wb, "default_sort_column", "PID");
+
+ // Do we use only on fields that can be groupped?
+ buffer_json_member_add_object(wb, "group_by");
+ {
+ // group by PID
+ buffer_json_member_add_object(wb, "PID");
+ {
+ buffer_json_member_add_string(wb, "name", "Process ID");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "PID");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // group by Process Name
+ buffer_json_member_add_object(wb, "Process Name");
+ {
+ buffer_json_member_add_string(wb, "name", "Process Name");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Process Name");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // group by Process Name
+ buffer_json_member_add_object(wb, "Origin");
+ {
+ buffer_json_member_add_string(wb, "name", "Origin");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Origin");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // group by Request From IP
+ buffer_json_member_add_object(wb, "Request from");
+ {
+ buffer_json_member_add_string(wb, "name", "Request from IP");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Request from");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // group by Destination IP
+ buffer_json_member_add_object(wb, "Destination IP");
+ {
+ buffer_json_member_add_string(wb, "name", "Destination IP");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Destination IP");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // group by DST Port
+ buffer_json_member_add_object(wb, "Destination Port");
+ {
+ buffer_json_member_add_string(wb, "name", "Destination Port");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Destination Port");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ // group by Protocol
+ buffer_json_member_add_object(wb, "Protocol");
+ {
+ buffer_json_member_add_string(wb, "name", "Protocol");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Protocol");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // group_by
+
+ buffer_json_member_add_time_t(wb, "expires", expires);
+ buffer_json_finalize(wb);
+
+ // Lock necessary to avoid race condition
pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires);
fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout);
pluginsd_function_result_end_to_stdout();
fflush(stdout);
- pthread_mutex_unlock(&lock);
buffer_free(wb);
}
-
/*****************************************************************
* EBPF FUNCTION THREAD
*****************************************************************/
@@ -375,45 +1067,27 @@ static void ebpf_function_thread_manipulation(const char *transaction,
*/
void *ebpf_function_thread(void *ptr)
{
- ebpf_module_t *em = (ebpf_module_t *)ptr;
- char buffer[PLUGINSD_LINE_MAX + 1];
-
- char *s = NULL;
- while(!ebpf_exit_plugin && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) {
- char *words[PLUGINSD_MAX_WORDS] = { NULL };
- size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS);
-
- const char *keyword = get_word(words, num_words, 0);
-
- if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) {
- char *transaction = get_word(words, num_words, 1);
- char *timeout_s = get_word(words, num_words, 2);
- char *function = get_word(words, num_words, 3);
-
- if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) {
- netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.",
- keyword,
- transaction?transaction:"(unset)",
- timeout_s?timeout_s:"(unset)",
- function?function:"(unset)");
- }
- else {
- int timeout = str2i(timeout_s);
- if (!strncmp(function, EBPF_FUNCTION_THREAD, sizeof(EBPF_FUNCTION_THREAD) - 1))
- ebpf_function_thread_manipulation(transaction,
- function,
- buffer,
- PLUGINSD_LINE_MAX + 1,
- timeout,
- em);
- else
- ebpf_function_error(transaction,
- HTTP_RESP_NOT_FOUND,
- "No function with this name found in ebpf.plugin.");
- }
+ (void)ptr;
+
+ struct functions_evloop_globals *wg = functions_evloop_init(1,
+ "EBPF",
+ &lock,
+ &ebpf_plugin_exit);
+
+ functions_evloop_add_function(wg,
+ "ebpf_socket",
+ ebpf_function_socket_manipulation,
+ PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT);
+
+ heartbeat_t hb;
+ heartbeat_init(&hb);
+ while(!ebpf_plugin_exit) {
+ (void)heartbeat_next(&hb, USEC_PER_SEC);
+
+ if (ebpf_plugin_exit) {
+ break;
}
- else
- netdata_log_error("Received unknown command: %s", keyword ? keyword : "(unset)");
}
+
return NULL;
}
diff --git a/collectors/ebpf.plugin/ebpf_functions.h b/collectors/ebpf.plugin/ebpf_functions.h
index b20dab63421e80..795703b428748d 100644
--- a/collectors/ebpf.plugin/ebpf_functions.h
+++ b/collectors/ebpf.plugin/ebpf_functions.h
@@ -3,20 +3,25 @@
#ifndef NETDATA_EBPF_FUNCTIONS_H
#define NETDATA_EBPF_FUNCTIONS_H 1
+#ifdef NETDATA_DEV_MODE
+// Common
+static inline void EBPF_PLUGIN_FUNCTIONS(const char *NAME, const char *DESC) {
+ fprintf(stdout, "%s \"%s\" 10 \"%s\"\n", PLUGINSD_KEYWORD_FUNCTION, NAME, DESC);
+}
+#endif
+
// configuration file & description
#define NETDATA_DIRECTORY_FUNCTIONS_CONFIG_FILE "functions.conf"
#define NETDATA_EBPF_FUNCTIONS_MODULE_DESC "Show information about current function status."
// function list
#define EBPF_FUNCTION_THREAD "ebpf_thread"
+#define EBPF_FUNCTION_SOCKET "ebpf_socket"
+// thread constants
#define EBPF_PLUGIN_THREAD_FUNCTION_DESCRIPTION "Detailed information about eBPF threads."
#define EBPF_PLUGIN_THREAD_FUNCTION_ERROR_THREAD_NOT_FOUND "ebpf.plugin does not have thread named "
-#define EBPF_PLUGIN_FUNCTIONS(NAME, DESC) do { \
- fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " \"" NAME "\" 10 \"%s\"\n", DESC); \
-} while(0)
-
#define EBPF_THREADS_SELECT_THREAD "thread:"
#define EBPF_THREADS_ENABLE_CATEGORY "enable:"
#define EBPF_THREADS_DISABLE_CATEGORY "disable:"
@@ -24,6 +29,16 @@
#define EBPF_THREAD_STATUS_RUNNING "running"
#define EBPF_THREAD_STATUS_STOPPED "stopped"
+// socket constants
+#define EBPF_PLUGIN_SOCKET_FUNCTION_DESCRIPTION "Detailed information about open sockets."
+#define EBPF_FUNCTION_SOCKET_FAMILY "family:"
+#define EBPF_FUNCTION_SOCKET_PERIOD "period:"
+#define EBPF_FUNCTION_SOCKET_RESOLVE "resolve:"
+#define EBPF_FUNCTION_SOCKET_RANGE "range:"
+#define EBPF_FUNCTION_SOCKET_PORT "port:"
+#define EBPF_FUNCTION_SOCKET_RESET "reset"
+#define EBPF_FUNCTION_SOCKET_INTERFACES "interfaces"
+
void *ebpf_function_thread(void *ptr);
#endif
diff --git a/collectors/ebpf.plugin/ebpf_hardirq.c b/collectors/ebpf.plugin/ebpf_hardirq.c
index 9092c7ac3032fd..465ee6434c59b5 100644
--- a/collectors/ebpf.plugin/ebpf_hardirq.c
+++ b/collectors/ebpf.plugin/ebpf_hardirq.c
@@ -226,6 +226,7 @@ static void ebpf_obsolete_hardirq_global(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
"hardirq_latency",
+ "",
"Hardware IRQ latency",
EBPF_COMMON_DIMENSION_MILLISECONDS,
"interrupts",
@@ -580,10 +581,10 @@ static void hardirq_collector(ebpf_module_t *em)
//This will be cancelled by its parent
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -593,10 +594,10 @@ static void hardirq_collector(ebpf_module_t *em)
pthread_mutex_lock(&lock);
// write dims now for all hitherto discovered IRQs.
- write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency");
+ ebpf_write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "hardirq_latency", "");
avl_traverse_lock(&hardirq_pub, hardirq_write_dims, NULL);
hardirq_write_static_dims();
- write_end_chart();
+ ebpf_write_end_chart();
pthread_mutex_unlock(&lock);
diff --git a/collectors/ebpf.plugin/ebpf_mdflush.c b/collectors/ebpf.plugin/ebpf_mdflush.c
index 3548d673bae1a3..fe33ff6a478f97 100644
--- a/collectors/ebpf.plugin/ebpf_mdflush.c
+++ b/collectors/ebpf.plugin/ebpf_mdflush.c
@@ -140,6 +140,7 @@ static void ebpf_obsolete_mdflush_global(ebpf_module_t *em)
{
ebpf_write_chart_obsolete("mdstat",
"mdstat_flush",
+ "",
"MD flushes",
"flushes",
"flush (eBPF)",
@@ -345,19 +346,19 @@ static void mdflush_collector(ebpf_module_t *em)
int maps_per_core = em->maps_per_core;
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
mdflush_read_count_map(maps_per_core);
pthread_mutex_lock(&lock);
// write dims now for all hitherto discovered devices.
- write_begin_chart("mdstat", "mdstat_flush");
+ ebpf_write_begin_chart("mdstat", "mdstat_flush", "");
avl_traverse_lock(&mdflush_pub, mdflush_write_dims, NULL);
- write_end_chart();
+ ebpf_write_end_chart();
pthread_mutex_unlock(&lock);
diff --git a/collectors/ebpf.plugin/ebpf_mount.c b/collectors/ebpf.plugin/ebpf_mount.c
index 57ea5b2f453b80..05c76540a554b2 100644
--- a/collectors/ebpf.plugin/ebpf_mount.c
+++ b/collectors/ebpf.plugin/ebpf_mount.c
@@ -233,6 +233,7 @@ static void ebpf_obsolete_mount_global(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY,
NETDATA_EBPF_MOUNT_CALLS,
+ "",
"Calls to mount and umount syscalls",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_MOUNT_FAMILY,
@@ -243,6 +244,7 @@ static void ebpf_obsolete_mount_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_EBPF_MOUNT_GLOBAL_FAMILY,
NETDATA_EBPF_MOUNT_ERRORS,
+ "",
"Errors to mount and umount file systems",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_MOUNT_FAMILY,
@@ -367,9 +369,9 @@ static void mount_collector(ebpf_module_t *em)
int maps_per_core = em->maps_per_core;
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -466,7 +468,7 @@ static int ebpf_mount_load_bpf(ebpf_module_t *em)
#endif
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_oomkill.c b/collectors/ebpf.plugin/ebpf_oomkill.c
index 84830160a09142..2c34650c358560 100644
--- a/collectors/ebpf.plugin/ebpf_oomkill.c
+++ b/collectors/ebpf.plugin/ebpf_oomkill.c
@@ -57,6 +57,7 @@ static void ebpf_obsolete_oomkill_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_OOMKILL_CHART,
+ "",
"OOM kills. This chart is provided by eBPF plugin.",
EBPF_COMMON_DIMENSION_KILLS,
NETDATA_EBPF_MEMORY_GROUP,
@@ -98,15 +99,25 @@ static inline void ebpf_obsolete_oomkill_cgroup_charts(ebpf_module_t *em)
*/
static void ebpf_obsolete_oomkill_apps(ebpf_module_t *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_OOMKILL_CHART,
- "OOM kills",
- EBPF_COMMON_DIMENSION_KILLS,
- "mem",
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20020,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<clean_name,
+ "_app_oomkill",
+ "OOM kills.",
+ EBPF_COMMON_DIMENSION_KILLS,
+ NETDATA_EBPF_MEMORY_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "ebpf.app_oomkill",
+ 20020,
+ update_every);
+
+ w->charts_created &= ~(1<next) {
- if (likely(w->exposed && w->processes)) {
- bool was_oomkilled = false;
+ if (unlikely(!(w->charts_created & (1<root_pid;
while (pids) {
uint32_t j;
@@ -165,10 +179,11 @@ static void oomkill_write_data(int32_t *keys, uint32_t total)
}
pids = pids->next;
}
-
- write_dim:;
- write_chart_dimension(w->name, was_oomkilled);
}
+write_dim:
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_oomkill");
+ write_chart_dimension(EBPF_COMMON_DIMENSION_KILLS, was_oomkilled);
+ ebpf_write_end_chart();
}
// for any remaining keys for which we couldn't find a group, this could be
@@ -231,14 +246,14 @@ static void ebpf_create_systemd_oomkill_charts(int update_every)
static void ebpf_send_systemd_oomkill_charts()
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_OOMKILL_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long) ect->oomkill);
ect->oomkill = 0;
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/*
@@ -251,9 +266,9 @@ static void ebpf_send_systemd_oomkill_charts()
*/
static void ebpf_send_specific_oomkill_data(char *type, int value)
{
- write_begin_chart(type, NETDATA_OOMKILL_CHART);
+ ebpf_write_begin_chart(type, NETDATA_OOMKILL_CHART, "");
write_chart_dimension(oomkill_publish_aggregated.name, (long long)value);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -266,7 +281,7 @@ static void ebpf_send_specific_oomkill_data(char *type, int value)
*/
static void ebpf_obsolete_specific_oomkill_charts(char *type, int update_every)
{
- ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "OOM kills. This chart is provided by eBPF plugin.",
+ ebpf_write_chart_obsolete(type, NETDATA_OOMKILL_CHART, "", "OOM kills. This chart is provided by eBPF plugin.",
EBPF_COMMON_DIMENSION_KILLS, NETDATA_EBPF_MEMORY_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_OOMKILLS_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5600, update_every);
@@ -420,9 +435,9 @@ static void oomkill_collector(ebpf_module_t *em)
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -430,7 +445,6 @@ static void oomkill_collector(ebpf_module_t *em)
uint32_t count = oomkill_read_data(keys);
if (!count) {
running_time = ebpf_update_oomkill_period(running_time, em);
- continue;
}
stats[NETDATA_CONTROLLER_PID_TABLE_ADD] += (uint64_t) count;
@@ -438,16 +452,14 @@ static void oomkill_collector(ebpf_module_t *em)
pthread_mutex_lock(&collect_data_mutex);
pthread_mutex_lock(&lock);
- if (cgroups) {
+ if (cgroups && count) {
ebpf_update_oomkill_cgroup(keys, count);
// write everything from the ebpf map.
ebpf_oomkill_send_cgroup_data(update_every);
}
if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_OOMKILL_CHART);
oomkill_write_data(keys, count);
- write_end_chart();
}
pthread_mutex_unlock(&lock);
pthread_mutex_unlock(&collect_data_mutex);
@@ -466,14 +478,29 @@ static void oomkill_collector(ebpf_module_t *em)
void ebpf_oomkill_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
- ebpf_create_charts_on_apps(NETDATA_OOMKILL_CHART,
- "OOM kills",
- EBPF_COMMON_DIMENSION_KILLS,
- "mem",
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20020,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_OOMKILL);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_oomkill",
+ "OOM kills.",
+ EBPF_COMMON_DIMENSION_KILLS,
+ NETDATA_EBPF_MEMORY_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_oomkill",
+ 20072,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_OOMKILL);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION kills '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
}
diff --git a/collectors/ebpf.plugin/ebpf_process.c b/collectors/ebpf.plugin/ebpf_process.c
index 3537efc553450d..e3e2b884ebe26a 100644
--- a/collectors/ebpf.plugin/ebpf_process.c
+++ b/collectors/ebpf.plugin/ebpf_process.c
@@ -116,12 +116,12 @@ static void ebpf_update_global_publish(netdata_publish_syscall_t *publish, netda
*/
static void write_status_chart(char *family, netdata_publish_vfs_common_t *pvc)
{
- write_begin_chart(family, NETDATA_PROCESS_STATUS_NAME);
+ ebpf_write_begin_chart(family, NETDATA_PROCESS_STATUS_NAME, "");
write_chart_dimension(status[0], (long long)pvc->running);
write_chart_dimension(status[1], (long long)pvc->zombie);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -201,56 +201,43 @@ void ebpf_process_remove_pids()
void ebpf_process_send_apps_data(struct ebpf_target *root, ebpf_module_t *em)
{
struct ebpf_target *w;
- collected_number value;
+ // This algorithm is improved in https://github.com/netdata/netdata/pull/16030
+ collected_number values[5];
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS);
for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_process));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
-
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_thread));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t,
+ values[0] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_process));
+ values[1] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t, create_thread));
+ values[2] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t,
exit_call));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
-
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t,
+ values[3] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t,
release_call));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ values[4] = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t,
+ task_err));
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_process_sum_values_for_pids(w->root_pid, offsetof(ebpf_process_stat_t,
- task_err));
- write_chart_dimension(w->name, value);
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_process_start");
+ write_chart_dimension("calls", values[0]);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_thread_start");
+ write_chart_dimension("calls", values[1]);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_task_exit");
+ write_chart_dimension("calls", values[2]);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_task_released");
+ write_chart_dimension("calls", values[3]);
+ ebpf_write_end_chart();
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_task_error");
+ write_chart_dimension("calls", values[4]);
+ ebpf_write_end_chart();
}
- write_end_chart();
}
ebpf_process_remove_pids();
@@ -433,52 +420,89 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
void ebpf_process_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_PROCESS,
- "Process started",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20065,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS);
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_THREAD,
- "Threads started",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20066,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS);
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_EXIT,
- "Tasks starts exit process.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20067,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS);
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_CLOSE,
- "Tasks closed",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20068,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_TASK_ERROR,
- "Errors to create process or threads.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20069,
- ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX],
- root,
- em->update_every, NETDATA_EBPF_MODULE_NAME_PROCESS);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_process_start",
+ "Process started.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_process_start",
+ 20161,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_PROCESS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_thread_start",
+ "Threads started.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_thread_start",
+ 20162,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_PROCESS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_task_exit",
+ "Tasks starts exit process.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_task_exit",
+ 20163,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_PROCESS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_task_released",
+ "Tasks released.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_task_released",
+ 20164,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_PROCESS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_task_error",
+ "Errors to create process or threads.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_task_error",
+ 20165,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_PROCESS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_ABSOLUTE_IDX]);
+ }
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
@@ -503,6 +527,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_TASK_PROCESS,
+ "",
"Process started",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_PROCESS_GROUP,
@@ -513,6 +538,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_TASK_THREAD,
+ "",
"Threads started",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_PROCESS_GROUP,
@@ -523,6 +549,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_TASK_CLOSE,
+ "",
"Tasks starts exit process.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_PROCESS_GROUP,
@@ -533,6 +560,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_TASK_EXIT,
+ "",
"Tasks closed",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_PROCESS_GROUP,
@@ -544,6 +572,7 @@ static void ebpf_obsolete_process_services(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_TASK_ERROR,
+ "",
"Errors to create process or threads.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_PROCESS_GROUP,
@@ -585,56 +614,70 @@ static inline void ebpf_obsolete_process_cgroup_charts(ebpf_module_t *em) {
*/
void ebpf_obsolete_process_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_TASK_PROCESS,
- "Process started",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20065,
- em->update_every);
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_TASK_THREAD,
- "Threads started",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20066,
- em->update_every);
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_TASK_EXIT,
- "Tasks starts exit process.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20067,
- em->update_every);
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_TASK_CLOSE,
- "Tasks closed",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_PROCESS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20068,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_TASK_ERROR,
- "Errors to create process or threads.",
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_process_start",
+ "Process started.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_PROCESS_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20069,
- em->update_every);
+ "app.ebpf_process_start",
+ 20161,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_thread_start",
+ "Threads started.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_thread_start",
+ 20162,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_task_exit",
+ "Tasks starts exit process.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_task_exit",
+ 20163,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_task_released",
+ "Tasks released.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_task_released",
+ 20164,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_task_error",
+ "Errors to create process or threads.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_PROCESS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_task_error",
+ 20165,
+ update_every);
+ }
+
+ w->charts_created &= ~(1<mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
NETDATA_PROCESS_ERROR_NAME,
+ "",
"Fails to create process",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_PROCESS_GROUP,
@@ -819,31 +866,31 @@ static void ebpf_process_sum_cgroup_pids(ebpf_process_stat_t *ps, struct pid_on_
*/
static void ebpf_send_specific_process_data(char *type, ebpf_process_stat_t *values, ebpf_module_t *em)
{
- write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "");
write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_FORK].name,
(long long) values->create_process);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "");
write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_CLONE].name,
(long long) values->create_thread);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_EXIT, "");
write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name,
(long long) values->release_call);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "");
write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_RELEASE_TASK].name,
(long long) values->release_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_TASK_ERROR, "");
write_chart_dimension(process_publish_aggregated[NETDATA_KEY_PUBLISH_PROCESS_EXIT].name,
(long long) values->task_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
}
@@ -909,28 +956,28 @@ static void ebpf_create_specific_process_charts(char *type, ebpf_module_t *em)
*/
static void ebpf_obsolete_specific_process_charts(char *type, ebpf_module_t *em)
{
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "Process started",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_PROCESS, "", "Process started",
EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CGROUP_PROCESS_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5000,
em->update_every);
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "Threads started",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_THREAD, "", "Threads started",
EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CGROUP_THREAD_CREATE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5001,
em->update_every);
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT,"Tasks starts exit process.",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_EXIT, "","Tasks starts exit process.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CGROUP_PROCESS_EXIT_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5002,
em->update_every);
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE,"Tasks closed",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_CLOSE, "","Tasks closed",
EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CGROUP_PROCESS_CLOSE_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5003,
em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR,"Errors to create process or threads.",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_TASK_ERROR, "","Errors to create process or threads.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_PROCESS_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CGROUP_PROCESS_ERROR_CONTEXT, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5004,
em->update_every);
@@ -989,46 +1036,46 @@ static void ebpf_create_systemd_process_charts(ebpf_module_t *em)
static void ebpf_send_systemd_process_charts(ebpf_module_t *em)
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_PROCESS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_ps.create_process);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_THREAD, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_ps.create_thread);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_EXIT, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_ps.exit_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_CLOSE, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_ps.release_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_TASK_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_ps.task_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
}
@@ -1118,10 +1165,10 @@ static void process_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
usec_t dt = heartbeat_next(&hb, USEC_PER_SEC);
(void)dt;
- if (ebpf_exit_plugin)
+ if (ebpf_plugin_exit)
break;
if (++counter == update_every) {
diff --git a/collectors/ebpf.plugin/ebpf_process.h b/collectors/ebpf.plugin/ebpf_process.h
index d49e384525e016..310b321d6356a9 100644
--- a/collectors/ebpf.plugin/ebpf_process.h
+++ b/collectors/ebpf.plugin/ebpf_process.h
@@ -52,7 +52,8 @@ enum netdata_ebpf_stats_order {
NETDATA_EBPF_ORDER_STAT_HASH_GLOBAL_TABLE_TOTAL,
NETDATA_EBPF_ORDER_STAT_HASH_PID_TABLE_ADDED,
NETDATA_EBPF_ORDER_STAT_HASH_PID_TABLE_REMOVED,
- NETATA_EBPF_ORDER_STAT_ARAL_BEGIN
+ NETATA_EBPF_ORDER_STAT_ARAL_BEGIN,
+ NETDATA_EBPF_ORDER_FUNCTION_PER_THREAD,
};
enum netdata_ebpf_load_mode_stats{
diff --git a/collectors/ebpf.plugin/ebpf_shm.c b/collectors/ebpf.plugin/ebpf_shm.c
index baeb7204e2fa2f..f14eb67d01df37 100644
--- a/collectors/ebpf.plugin/ebpf_shm.c
+++ b/collectors/ebpf.plugin/ebpf_shm.c
@@ -305,7 +305,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SHMGET_CHART,
- "Calls to syscall shmget(2).",
+ "",
+ "Calls to syscall shmget(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -315,7 +316,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SHMAT_CHART,
- "Calls to syscall shmat(2).",
+ "",
+ "Calls to syscall shmat(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -325,7 +327,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SHMDT_CHART,
- "Calls to syscall shmdt(2).",
+ "",
+ "Calls to syscall shmdt(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -335,7 +338,8 @@ static void ebpf_obsolete_shm_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SHMCTL_CHART,
- "Calls to syscall shmctl(2).",
+ "",
+ "Calls to syscall shmctl(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -375,45 +379,58 @@ static inline void ebpf_obsolete_shm_cgroup_charts(ebpf_module_t *em) {
*/
void ebpf_obsolete_shm_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SHMGET_CHART,
- "Calls to syscall shmget(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20191,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<shmat(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20192,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmget_call",
+ "Calls to syscall shmget(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmget_call",
+ 20191,
+ update_every);
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SHMDT_CHART,
- "Calls to syscall shmdt(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20193,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmat_call",
+ "Calls to syscall shmat(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmat_call",
+ 20192,
+ update_every);
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SHMCTL_CHART,
- "Calls to syscall shmctl(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20194,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmdt_call",
+ "Calls to syscall shmdt(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmdt_call",
+ 20193,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmctl_call",
+ "Calls to syscall shmctl(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmctl_call",
+ 20194,
+ update_every);
+
+ w->charts_created &= ~(1<next) {
- if (unlikely(w->exposed && w->processes)) {
- ebpf_shm_sum_pids(&w->shm, w->root_pid);
- }
- }
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, (long long) w->shm.get);
- }
- }
- write_end_chart();
+ ebpf_shm_sum_pids(&w->shm, w->root_pid);
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMAT_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, (long long) w->shm.at);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmget_call");
+ write_chart_dimension("calls", (long long) w->shm.get);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMDT_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, (long long) w->shm.dt);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmat_call");
+ write_chart_dimension("calls", (long long) w->shm.at);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SHMCTL_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, (long long) w->shm.ctl);
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmdt_call");
+ write_chart_dimension("calls", (long long) w->shm.dt);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_shmctl_call");
+ write_chart_dimension("calls", (long long) w->shm.ctl);
+ ebpf_write_end_chart();
}
- write_end_chart();
}
/**
@@ -768,7 +771,7 @@ static void ebpf_shm_sum_cgroup_pids(netdata_publish_shm_t *shm, struct pid_on_t
static void ebpf_create_specific_shm_charts(char *type, int update_every)
{
ebpf_create_chart(type, NETDATA_SHMGET_CHART,
- "Calls to syscall shmget(2).",
+ "Calls to syscall shmget(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_CGROUP_SHM_GET_CONTEXT,
@@ -781,7 +784,7 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every)
NETDATA_EBPF_MODULE_NAME_SHM);
ebpf_create_chart(type, NETDATA_SHMAT_CHART,
- "Calls to syscall shmat(2).",
+ "Calls to syscall shmat(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_CGROUP_SHM_AT_CONTEXT,
@@ -794,7 +797,7 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every)
NETDATA_EBPF_MODULE_NAME_SHM);
ebpf_create_chart(type, NETDATA_SHMDT_CHART,
- "Calls to syscall shmdt(2).",
+ "Calls to syscall shmdt(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_CGROUP_SHM_DT_CONTEXT,
@@ -807,7 +810,7 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every)
NETDATA_EBPF_MODULE_NAME_SHM);
ebpf_create_chart(type, NETDATA_SHMCTL_CHART,
- "Calls to syscall shmctl(2).",
+ "Calls to syscall shmctl(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_CGROUP_SHM_CTL_CONTEXT,
@@ -831,28 +834,32 @@ static void ebpf_create_specific_shm_charts(char *type, int update_every)
static void ebpf_obsolete_specific_shm_charts(char *type, int update_every)
{
ebpf_write_chart_obsolete(type, NETDATA_SHMGET_CHART,
- "Calls to syscall shmget(2).",
+ "",
+ "Calls to syscall shmget(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_GET_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5800, update_every);
ebpf_write_chart_obsolete(type, NETDATA_SHMAT_CHART,
- "Calls to syscall shmat(2).",
+ "",
+ "Calls to syscall shmat(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_AT_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5801, update_every);
ebpf_write_chart_obsolete(type, NETDATA_SHMDT_CHART,
- "Calls to syscall shmdt(2).",
+ "",
+ "Calls to syscall shmdt(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_DT_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5802, update_every);
ebpf_write_chart_obsolete(type, NETDATA_SHMCTL_CHART,
- "Calls to syscall shmctl(2).",
+ "",
+ "Calls to syscall shmctl(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SHM_CTL_CONTEXT,
@@ -869,7 +876,7 @@ static void ebpf_obsolete_specific_shm_charts(char *type, int update_every)
static void ebpf_create_systemd_shm_charts(int update_every)
{
ebpf_create_charts_on_systemd(NETDATA_SHMGET_CHART,
- "Calls to syscall shmget(2).",
+ "Calls to syscall shmget(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -878,7 +885,7 @@ static void ebpf_create_systemd_shm_charts(int update_every)
NETDATA_SYSTEMD_SHM_GET_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every);
ebpf_create_charts_on_systemd(NETDATA_SHMAT_CHART,
- "Calls to syscall shmat(2).",
+ "Calls to syscall shmat(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -887,7 +894,7 @@ static void ebpf_create_systemd_shm_charts(int update_every)
NETDATA_SYSTEMD_SHM_AT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every);
ebpf_create_charts_on_systemd(NETDATA_SHMDT_CHART,
- "Calls to syscall shmdt(2).",
+ "Calls to syscall shmdt(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -896,7 +903,7 @@ static void ebpf_create_systemd_shm_charts(int update_every)
NETDATA_SYSTEMD_SHM_DT_CONTEXT, NETDATA_EBPF_MODULE_NAME_SHM, update_every);
ebpf_create_charts_on_systemd(NETDATA_SHMCTL_CHART,
- "Calls to syscall shmctl(2).",
+ "Calls to syscall shmctl(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_APPS_IPC_SHM_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -913,37 +920,37 @@ static void ebpf_create_systemd_shm_charts(int update_every)
static void ebpf_send_systemd_shm_charts()
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMGET_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_shm.get);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMAT_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_shm.at);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMDT_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_shm.dt);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SHMCTL_CHART, "");
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_shm.ctl);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/*
@@ -956,21 +963,21 @@ static void ebpf_send_systemd_shm_charts()
*/
static void ebpf_send_specific_shm_data(char *type, netdata_publish_shm_t *values)
{
- write_begin_chart(type, NETDATA_SHMGET_CHART);
+ ebpf_write_begin_chart(type, NETDATA_SHMGET_CHART, "");
write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMGET_CALL].name, (long long)values->get);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SHMAT_CHART);
+ ebpf_write_begin_chart(type, NETDATA_SHMAT_CHART, "");
write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMAT_CALL].name, (long long)values->at);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SHMDT_CHART);
+ ebpf_write_begin_chart(type, NETDATA_SHMDT_CHART, "");
write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMDT_CALL].name, (long long)values->dt);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SHMCTL_CHART);
+ ebpf_write_begin_chart(type, NETDATA_SHMCTL_CHART, "");
write_chart_dimension(shm_publish_aggregated[NETDATA_KEY_SHMCTL_CALL].name, (long long)values->ctl);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -1035,9 +1042,9 @@ static void shm_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -1097,41 +1104,74 @@ static void shm_collector(ebpf_module_t *em)
void ebpf_shm_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
- ebpf_create_charts_on_apps(NETDATA_SHMGET_CHART,
- "Calls to syscall shmget(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20191,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM);
-
- ebpf_create_charts_on_apps(NETDATA_SHMAT_CHART,
- "Calls to syscall shmat(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20192,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM);
-
- ebpf_create_charts_on_apps(NETDATA_SHMDT_CHART,
- "Calls to syscall shmdt(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20193,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM);
-
- ebpf_create_charts_on_apps(NETDATA_SHMCTL_CHART,
- "Calls to syscall shmctl(2).",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_IPC_SHM_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20194,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SHM);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmget_call",
+ "Calls to syscall shmget(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmget_call",
+ 20191,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SHM);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmat_call",
+ "Calls to syscall shmat(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmat_call",
+ 20192,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SHM);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmdt_call",
+ "Calls to syscall shmdt(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmdt_call",
+ 20193,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SHM);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_shmctl_call",
+ "Calls to syscall shmctl(2).",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_IPC_SHM_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_shmctl_call",
+ 20194,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SHM);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
}
@@ -1222,7 +1262,7 @@ static int ebpf_shm_load_bpf(ebpf_module_t *em)
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_socket.c b/collectors/ebpf.plugin/ebpf_socket.c
index e4798b30c8a49c..bbb5dca1b486d1 100644
--- a/collectors/ebpf.plugin/ebpf_socket.c
+++ b/collectors/ebpf.plugin/ebpf_socket.c
@@ -5,9 +5,6 @@
#include "ebpf.h"
#include "ebpf_socket.h"
-// ----------------------------------------------------------------------------
-// ARAL vectors used to speed up processing
-
/*****************************************************************
*
* GLOBAL VARIABLES
@@ -23,16 +20,7 @@ static char *socket_id_names[NETDATA_MAX_SOCKET_VECTOR] = { "tcp_cleanup_rbuf",
"tcp_connect_v4", "tcp_connect_v6", "inet_csk_accept_tcp",
"inet_csk_accept_udp" };
-static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth",
- .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED,
- .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED,
- .type = NETDATA_EBPF_MAP_RESIZABLE | NETDATA_EBPF_MAP_PID,
- .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
-#ifdef LIBBPF_MAJOR_VERSION
- .map_type = BPF_MAP_TYPE_PERCPU_HASH
-#endif
- },
- {.name = "tbl_global_sock",
+static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_global_sock",
.internal_input = NETDATA_SOCKET_COUNTER,
.user_input = 0, .type = NETDATA_EBPF_MAP_STATIC,
.map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
@@ -48,16 +36,7 @@ static ebpf_local_maps_t socket_maps[] = {{.name = "tbl_bandwidth",
.map_type = BPF_MAP_TYPE_PERCPU_HASH
#endif
},
- {.name = "tbl_conn_ipv4",
- .internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED,
- .user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED,
- .type = NETDATA_EBPF_MAP_STATIC,
- .map_fd = ND_EBPF_MAP_FD_NOT_INITIALIZED,
-#ifdef LIBBPF_MAJOR_VERSION
- .map_type = BPF_MAP_TYPE_PERCPU_HASH
-#endif
- },
- {.name = "tbl_conn_ipv6",
+ {.name = "tbl_nd_socket",
.internal_input = NETDATA_COMPILED_CONNECTIONS_ALLOWED,
.user_input = NETDATA_MAXIMUM_CONNECTIONS_ALLOWED,
.type = NETDATA_EBPF_MAP_STATIC,
@@ -93,14 +72,10 @@ static netdata_idx_t *socket_hash_values = NULL;
static netdata_syscall_stat_t socket_aggregated_data[NETDATA_MAX_SOCKET_VECTOR];
static netdata_publish_syscall_t socket_publish_aggregated[NETDATA_MAX_SOCKET_VECTOR];
-static ebpf_bandwidth_t *bandwidth_vector = NULL;
-
-pthread_mutex_t nv_mutex;
-netdata_vector_plot_t inbound_vectors = { .plot = NULL, .next = 0, .last = 0 };
-netdata_vector_plot_t outbound_vectors = { .plot = NULL, .next = 0, .last = 0 };
netdata_socket_t *socket_values;
ebpf_network_viewer_port_list_t *listen_ports = NULL;
+ebpf_addresses_t tcp_v6_connect_address = {.function = "tcp_v6_connect", .hash = 0, .addr = 0, .type = 0};
struct config socket_config = { .first_section = NULL,
.last_section = NULL,
@@ -108,28 +83,30 @@ struct config socket_config = { .first_section = NULL,
.index = { .avl_tree = { .root = NULL, .compar = appconfig_section_compare },
.rwlock = AVL_LOCK_INITIALIZER } };
-netdata_ebpf_targets_t socket_targets[] = { {.name = "inet_csk_accept", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "tcp_retransmit_skb", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "tcp_cleanup_rbuf", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "tcp_close", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "udp_recvmsg", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "tcp_sendmsg", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "udp_sendmsg", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "tcp_v4_connect", .mode = EBPF_LOAD_TRAMPOLINE},
- {.name = "tcp_v6_connect", .mode = EBPF_LOAD_TRAMPOLINE},
+netdata_ebpf_targets_t socket_targets[] = { {.name = "inet_csk_accept", .mode = EBPF_LOAD_PROBE},
+ {.name = "tcp_retransmit_skb", .mode = EBPF_LOAD_PROBE},
+ {.name = "tcp_cleanup_rbuf", .mode = EBPF_LOAD_PROBE},
+ {.name = "tcp_close", .mode = EBPF_LOAD_PROBE},
+ {.name = "udp_recvmsg", .mode = EBPF_LOAD_PROBE},
+ {.name = "tcp_sendmsg", .mode = EBPF_LOAD_PROBE},
+ {.name = "udp_sendmsg", .mode = EBPF_LOAD_PROBE},
+ {.name = "tcp_v4_connect", .mode = EBPF_LOAD_PROBE},
+ {.name = "tcp_v6_connect", .mode = EBPF_LOAD_PROBE},
{.name = NULL, .mode = EBPF_LOAD_TRAMPOLINE}};
-struct netdata_static_thread socket_threads = {
- .name = "EBPF SOCKET READ",
- .config_section = NULL,
- .config_name = NULL,
- .env_name = NULL,
- .enabled = 1,
- .thread = NULL,
- .init_routine = NULL,
- .start_routine = NULL
+struct netdata_static_thread ebpf_read_socket = {
+ .name = "EBPF_READ_SOCKET",
+ .config_section = NULL,
+ .config_name = NULL,
+ .env_name = NULL,
+ .enabled = 1,
+ .thread = NULL,
+ .init_routine = NULL,
+ .start_routine = NULL
};
+ARAL *aral_socket_table = NULL;
+
#ifdef NETDATA_DEV_MODE
int socket_disable_priority;
#endif
@@ -145,7 +122,9 @@ int socket_disable_priority;
static void ebpf_socket_disable_probes(struct socket_bpf *obj)
{
bpf_program__set_autoload(obj->progs.netdata_inet_csk_accept_kretprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_kprobe, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_kretprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kprobe, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kretprobe, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_retransmit_skb_kprobe, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_cleanup_rbuf_kprobe, false);
@@ -156,7 +135,6 @@ static void ebpf_socket_disable_probes(struct socket_bpf *obj)
bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kprobe, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kretprobe, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kprobe, false);
- bpf_program__set_autoload(obj->progs.netdata_socket_release_task_kprobe, false);
}
/**
@@ -168,8 +146,10 @@ static void ebpf_socket_disable_probes(struct socket_bpf *obj)
*/
static void ebpf_socket_disable_trampoline(struct socket_bpf *obj)
{
- bpf_program__set_autoload(obj->progs.netdata_inet_csk_accept_fentry, false);
+ bpf_program__set_autoload(obj->progs.netdata_inet_csk_accept_fexit, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_fentry, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_fexit, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fentry, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fexit, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_retransmit_skb_fentry, false);
bpf_program__set_autoload(obj->progs.netdata_tcp_cleanup_rbuf_fentry, false);
@@ -180,7 +160,6 @@ static void ebpf_socket_disable_trampoline(struct socket_bpf *obj)
bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fexit, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fentry, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fexit, false);
- bpf_program__set_autoload(obj->progs.netdata_socket_release_task_fentry, false);
}
/**
@@ -190,14 +169,22 @@ static void ebpf_socket_disable_trampoline(struct socket_bpf *obj)
*/
static void ebpf_set_trampoline_target(struct socket_bpf *obj)
{
- bpf_program__set_attach_target(obj->progs.netdata_inet_csk_accept_fentry, 0,
+ bpf_program__set_attach_target(obj->progs.netdata_inet_csk_accept_fexit, 0,
socket_targets[NETDATA_FCNT_INET_CSK_ACCEPT].name);
+ bpf_program__set_attach_target(obj->progs.netdata_tcp_v4_connect_fentry, 0,
+ socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name);
+
bpf_program__set_attach_target(obj->progs.netdata_tcp_v4_connect_fexit, 0,
socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name);
- bpf_program__set_attach_target(obj->progs.netdata_tcp_v6_connect_fexit, 0,
+ if (tcp_v6_connect_address.type == 'T') {
+ bpf_program__set_attach_target(
+ obj->progs.netdata_tcp_v6_connect_fentry, 0, socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name);
+
+ bpf_program__set_attach_target(obj->progs.netdata_tcp_v6_connect_fexit, 0,
socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name);
+ }
bpf_program__set_attach_target(obj->progs.netdata_tcp_retransmit_skb_fentry, 0,
socket_targets[NETDATA_FCNT_TCP_RETRANSMIT].name);
@@ -205,7 +192,8 @@ static void ebpf_set_trampoline_target(struct socket_bpf *obj)
bpf_program__set_attach_target(obj->progs.netdata_tcp_cleanup_rbuf_fentry, 0,
socket_targets[NETDATA_FCNT_CLEANUP_RBUF].name);
- bpf_program__set_attach_target(obj->progs.netdata_tcp_close_fentry, 0, socket_targets[NETDATA_FCNT_TCP_CLOSE].name);
+ bpf_program__set_attach_target(obj->progs.netdata_tcp_close_fentry, 0,
+ socket_targets[NETDATA_FCNT_TCP_CLOSE].name);
bpf_program__set_attach_target(obj->progs.netdata_udp_recvmsg_fentry, 0,
socket_targets[NETDATA_FCNT_UDP_RECEVMSG].name);
@@ -224,8 +212,6 @@ static void ebpf_set_trampoline_target(struct socket_bpf *obj)
bpf_program__set_attach_target(obj->progs.netdata_udp_sendmsg_fexit, 0,
socket_targets[NETDATA_FCNT_UDP_SENDMSG].name);
-
- bpf_program__set_attach_target(obj->progs.netdata_socket_release_task_fentry, 0, EBPF_COMMON_FNCT_CLEAN_UP);
}
@@ -241,9 +227,13 @@ static inline void ebpf_socket_disable_specific_trampoline(struct socket_bpf *ob
{
if (sel == MODE_RETURN) {
bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fentry, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_fentry, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fentry, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fentry, false);
} else {
bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_fexit, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_fexit, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fexit, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_fexit, false);
}
}
@@ -260,9 +250,13 @@ static inline void ebpf_socket_disable_specific_probe(struct socket_bpf *obj, ne
{
if (sel == MODE_RETURN) {
bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_kprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kprobe, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kprobe, false);
} else {
bpf_program__set_autoload(obj->progs.netdata_tcp_sendmsg_kretprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v4_connect_kretprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kretprobe, false);
bpf_program__set_autoload(obj->progs.netdata_udp_sendmsg_kretprobe, false);
}
}
@@ -275,26 +269,12 @@ static inline void ebpf_socket_disable_specific_probe(struct socket_bpf *obj, ne
* @param obj is the main structure for bpf objects.
* @param sel option selected by user.
*/
-static int ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t sel)
+static long ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t sel)
{
obj->links.netdata_inet_csk_accept_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_inet_csk_accept_kretprobe,
true,
socket_targets[NETDATA_FCNT_INET_CSK_ACCEPT].name);
- int ret = libbpf_get_error(obj->links.netdata_inet_csk_accept_kretprobe);
- if (ret)
- return -1;
-
- obj->links.netdata_tcp_v4_connect_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v4_connect_kretprobe,
- true,
- socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name);
- ret = libbpf_get_error(obj->links.netdata_tcp_v4_connect_kretprobe);
- if (ret)
- return -1;
-
- obj->links.netdata_tcp_v6_connect_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v6_connect_kretprobe,
- true,
- socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name);
- ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kretprobe);
+ long ret = libbpf_get_error(obj->links.netdata_inet_csk_accept_kretprobe);
if (ret)
return -1;
@@ -347,6 +327,21 @@ static int ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t
ret = libbpf_get_error(obj->links.netdata_udp_sendmsg_kretprobe);
if (ret)
return -1;
+
+ obj->links.netdata_tcp_v4_connect_kretprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v4_connect_kretprobe,
+ true,
+ socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name);
+ ret = libbpf_get_error(obj->links.netdata_tcp_v4_connect_kretprobe);
+ if (ret)
+ return -1;
+
+ if (tcp_v6_connect_address.type == 'T') {
+ obj->links.netdata_tcp_v6_connect_kretprobe = bpf_program__attach_kprobe(
+ obj->progs.netdata_tcp_v6_connect_kretprobe, true, socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name);
+ ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kretprobe);
+ if (ret)
+ return -1;
+ }
} else {
obj->links.netdata_tcp_sendmsg_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_sendmsg_kprobe,
false,
@@ -361,13 +356,23 @@ static int ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t
ret = libbpf_get_error(obj->links.netdata_udp_sendmsg_kprobe);
if (ret)
return -1;
- }
- obj->links.netdata_socket_release_task_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_socket_release_task_kprobe,
- false, EBPF_COMMON_FNCT_CLEAN_UP);
- ret = libbpf_get_error(obj->links.netdata_socket_release_task_kprobe);
- if (ret)
- return -1;
+ obj->links.netdata_tcp_v4_connect_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v4_connect_kprobe,
+ false,
+ socket_targets[NETDATA_FCNT_TCP_V4_CONNECT].name);
+ ret = libbpf_get_error(obj->links.netdata_tcp_v4_connect_kprobe);
+ if (ret)
+ return -1;
+
+ if (tcp_v6_connect_address.type == 'T') {
+ obj->links.netdata_tcp_v6_connect_kprobe = bpf_program__attach_kprobe(obj->progs.netdata_tcp_v6_connect_kprobe,
+ false,
+ socket_targets[NETDATA_FCNT_TCP_V6_CONNECT].name);
+ ret = libbpf_get_error(obj->links.netdata_tcp_v6_connect_kprobe);
+ if (ret)
+ return -1;
+ }
+ }
return 0;
}
@@ -381,11 +386,9 @@ static int ebpf_socket_attach_probes(struct socket_bpf *obj, netdata_run_mode_t
*/
static void ebpf_socket_set_hash_tables(struct socket_bpf *obj)
{
- socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd = bpf_map__fd(obj->maps.tbl_bandwidth);
socket_maps[NETDATA_SOCKET_GLOBAL].map_fd = bpf_map__fd(obj->maps.tbl_global_sock);
socket_maps[NETDATA_SOCKET_LPORTS].map_fd = bpf_map__fd(obj->maps.tbl_lports);
- socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd = bpf_map__fd(obj->maps.tbl_conn_ipv4);
- socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd = bpf_map__fd(obj->maps.tbl_conn_ipv6);
+ socket_maps[NETDATA_SOCKET_OPEN_SOCKET].map_fd = bpf_map__fd(obj->maps.tbl_nd_socket);
socket_maps[NETDATA_SOCKET_TABLE_UDP].map_fd = bpf_map__fd(obj->maps.tbl_nv_udp);
socket_maps[NETDATA_SOCKET_TABLE_CTRL].map_fd = bpf_map__fd(obj->maps.socket_ctrl);
}
@@ -400,28 +403,30 @@ static void ebpf_socket_set_hash_tables(struct socket_bpf *obj)
*/
static void ebpf_socket_adjust_map(struct socket_bpf *obj, ebpf_module_t *em)
{
- ebpf_update_map_size(obj->maps.tbl_bandwidth, &socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH],
- em, bpf_map__name(obj->maps.tbl_bandwidth));
-
- ebpf_update_map_size(obj->maps.tbl_conn_ipv4, &socket_maps[NETDATA_SOCKET_TABLE_IPV4],
- em, bpf_map__name(obj->maps.tbl_conn_ipv4));
-
- ebpf_update_map_size(obj->maps.tbl_conn_ipv6, &socket_maps[NETDATA_SOCKET_TABLE_IPV6],
- em, bpf_map__name(obj->maps.tbl_conn_ipv6));
+ ebpf_update_map_size(obj->maps.tbl_nd_socket, &socket_maps[NETDATA_SOCKET_OPEN_SOCKET],
+ em, bpf_map__name(obj->maps.tbl_nd_socket));
ebpf_update_map_size(obj->maps.tbl_nv_udp, &socket_maps[NETDATA_SOCKET_TABLE_UDP],
em, bpf_map__name(obj->maps.tbl_nv_udp));
-
- ebpf_update_map_type(obj->maps.tbl_bandwidth, &socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH]);
- ebpf_update_map_type(obj->maps.tbl_conn_ipv4, &socket_maps[NETDATA_SOCKET_TABLE_IPV4]);
- ebpf_update_map_type(obj->maps.tbl_conn_ipv6, &socket_maps[NETDATA_SOCKET_TABLE_IPV6]);
+ ebpf_update_map_type(obj->maps.tbl_nd_socket, &socket_maps[NETDATA_SOCKET_OPEN_SOCKET]);
ebpf_update_map_type(obj->maps.tbl_nv_udp, &socket_maps[NETDATA_SOCKET_TABLE_UDP]);
ebpf_update_map_type(obj->maps.socket_ctrl, &socket_maps[NETDATA_SOCKET_TABLE_CTRL]);
ebpf_update_map_type(obj->maps.tbl_global_sock, &socket_maps[NETDATA_SOCKET_GLOBAL]);
ebpf_update_map_type(obj->maps.tbl_lports, &socket_maps[NETDATA_SOCKET_LPORTS]);
}
+/**
+ * Disable TCP V6 connect
+ */
+static void ebpf_disable_tcp_v6_connect(struct socket_bpf *obj)
+{
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kretprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_kprobe, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fexit, false);
+ bpf_program__set_autoload(obj->progs.netdata_tcp_v6_connect_fentry, false);
+}
+
/**
* Load and attach
*
@@ -450,6 +455,10 @@ static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_modul
ebpf_socket_adjust_map(obj, em);
+ if (tcp_v6_connect_address.type != 'T') {
+ ebpf_disable_tcp_v6_connect(obj);
+ }
+
int ret = socket_bpf__load(obj);
if (ret) {
fprintf(stderr, "failed to load BPF object: %d\n", ret);
@@ -459,7 +468,7 @@ static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_modul
if (test == EBPF_LOAD_TRAMPOLINE) {
ret = socket_bpf__attach(obj);
} else {
- ret = ebpf_socket_attach_probes(obj, em->mode);
+ ret = (int)ebpf_socket_attach_probes(obj, em->mode);
}
if (!ret) {
@@ -479,182 +488,393 @@ static inline int ebpf_socket_load_and_attach(struct socket_bpf *obj, ebpf_modul
*****************************************************************/
/**
- * Clean internal socket plot
+ * Socket Free
*
- * Clean all structures allocated with strdupz.
+ * Cleanup variables after child threads to stop
*
- * @param ptr the pointer with addresses to clean.
+ * @param ptr thread data.
*/
-static inline void clean_internal_socket_plot(netdata_socket_plot_t *ptr)
+static void ebpf_socket_free(ebpf_module_t *em )
{
- freez(ptr->dimension_recv);
- freez(ptr->dimension_sent);
- freez(ptr->resolved_name);
- freez(ptr->dimension_retransmit);
+ pthread_mutex_lock(&ebpf_exit_cleanup);
+ em->enabled = NETDATA_THREAD_EBPF_STOPPED;
+ ebpf_update_stats(&plugin_statistics, em);
+ ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE);
+ pthread_mutex_unlock(&ebpf_exit_cleanup);
}
/**
- * Clean socket plot
+ * Obsolete Systemd Socket Charts
*
- * Clean the allocated data for inbound and outbound vectors.
-static void clean_allocated_socket_plot()
-{
- if (!network_viewer_opt.enabled)
- return;
-
- uint32_t i;
- uint32_t end = inbound_vectors.last;
- netdata_socket_plot_t *plot = inbound_vectors.plot;
- for (i = 0; i < end; i++) {
- clean_internal_socket_plot(&plot[i]);
- }
-
- clean_internal_socket_plot(&plot[inbound_vectors.last]);
-
- end = outbound_vectors.last;
- plot = outbound_vectors.plot;
- for (i = 0; i < end; i++) {
- clean_internal_socket_plot(&plot[i]);
- }
- clean_internal_socket_plot(&plot[outbound_vectors.last]);
-}
- */
-
-/**
- * Clean network ports allocated during initialization.
+ * Obsolete charts when systemd is enabled
*
- * @param ptr a pointer to the link list.
-static void clean_network_ports(ebpf_network_viewer_port_list_t *ptr)
+ * @param update_every value to overwrite the update frequency set by the server.
+ **/
+static void ebpf_obsolete_systemd_socket_charts(int update_every)
{
- if (unlikely(!ptr))
- return;
-
- while (ptr) {
- ebpf_network_viewer_port_list_t *next = ptr->next;
- freez(ptr->value);
- freez(ptr);
- ptr = next;
+ int order = 20080;
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_CONNECTION_TCP_V4,
+ "",
+ "Calls to tcp_v4_connection",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT,
+ order++,
+ update_every);
+
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_CONNECTION_TCP_V6,
+ "",
+ "Calls to tcp_v6_connection",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT,
+ order++,
+ update_every);
}
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_RECV,
+ "",
+ "Bytes received",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT,
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_SENT,
+ "",
+ "Bytes sent",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT,
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS,
+ "",
+ "Calls to tcp_cleanup_rbuf.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT,
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS,
+ "",
+ "Calls to tcp_sendmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT,
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT,
+ "",
+ "Calls to tcp_retransmit",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT,
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS,
+ "",
+ "Calls to udp_sendmsg",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT,
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
+ NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS,
+ "",
+ "Calls to udp_recvmsg",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT,
+ order++,
+ update_every);
}
- */
+static void ebpf_obsolete_specific_socket_charts(char *type, int update_every);
/**
- * Clean service names
+ * Obsolete cgroup chart
*
- * Clean the allocated link list that stores names.
+ * Send obsolete for all charts created before to close.
*
- * @param names the link list.
-static void clean_service_names(ebpf_network_viewer_dim_name_t *names)
-{
- if (unlikely(!names))
- return;
-
- while (names) {
- ebpf_network_viewer_dim_name_t *next = names->next;
- freez(names->name);
- freez(names);
- names = next;
- }
-}
+ * @param em a pointer to `struct ebpf_module`
*/
+static inline void ebpf_obsolete_socket_cgroup_charts(ebpf_module_t *em) {
+ pthread_mutex_lock(&mutex_cgroup_shm);
-/**
- * Clean hostnames
- *
- * @param hostnames the hostnames to clean
-static void clean_hostnames(ebpf_network_viewer_hostname_list_t *hostnames)
-{
- if (unlikely(!hostnames))
- return;
+ ebpf_obsolete_systemd_socket_charts(em->update_every);
+
+ ebpf_cgroup_target_t *ect;
+ for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
+ if (ect->systemd)
+ continue;
- while (hostnames) {
- ebpf_network_viewer_hostname_list_t *next = hostnames->next;
- freez(hostnames->value);
- simple_pattern_free(hostnames->value_pattern);
- freez(hostnames);
- hostnames = next;
+ ebpf_obsolete_specific_socket_charts(ect->name, em->update_every);
}
+ pthread_mutex_unlock(&mutex_cgroup_shm);
}
- */
/**
- * Clean port Structure
+ * Create apps charts
*
- * Clean the allocated list.
+ * Call ebpf_create_chart to create the charts on apps submenu.
*
- * @param clean the list that will be cleaned
+ * @param em a pointer to the structure with the default values.
*/
-void clean_port_structure(ebpf_network_viewer_port_list_t **clean)
+void ebpf_socket_obsolete_apps_charts(struct ebpf_module *em)
{
- ebpf_network_viewer_port_list_t *move = *clean;
- while (move) {
- ebpf_network_viewer_port_list_t *next = move->next;
- freez(move->value);
- freez(move);
+ int order = 20130;
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<clean_name,
+ "_ebpf_call_tcp_v4_connection",
+ "Calls to tcp_v4_connection.",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_v4_connection",
+ order++,
+ update_every);
-/**
- * Clean IP structure
- *
- * Clean the allocated list.
- *
- * @param clean the list that will be cleaned
- */
-static void clean_ip_structure(ebpf_network_viewer_ip_list_t **clean)
-{
- ebpf_network_viewer_ip_list_t *move = *clean;
- while (move) {
- ebpf_network_viewer_ip_list_t *next = move->next;
- freez(move->value);
- freez(move);
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_v6_connection",
+ "Calls to tcp_v6_connection.",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_v6_connection",
+ order++,
+ update_every);
+ }
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_sock_bytes_sent",
+ "Bytes sent.",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_sock_bytes_sent",
+ order++,
+ update_every);
- move = next;
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_sock_bytes_received",
+ "Bytes received.",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_sock_bytes_received",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_sendmsg",
+ "Calls to tcp_sendmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_sendmsg",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_cleanup_rbuf",
+ "Calls to tcp_cleanup_rbuf.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_cleanup_rbuf",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_retransmit",
+ "Calls to tcp_retransmit.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_retransmit",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_udp_sendmsg",
+ "Calls to udp_sendmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_udp_sendmsg",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_udp_recvmsg",
+ "Calls to udp_recvmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_udp_recvmsg",
+ order++,
+ update_every);
+
+ w->charts_created &= ~(1<update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_TCP_OUTBOUND_CONNECTIONS,
+ "",
+ "TCP outbound connections.",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
+
+
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_TCP_FUNCTION_COUNT,
+ "",
+ "Calls to internal functions",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_TCP_FUNCTION_BITS,
+ "",
+ "TCP bandwidth",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
- clean_network_ports(network_viewer_opt.included_port);
- clean_network_ports(network_viewer_opt.excluded_port);
- clean_service_names(network_viewer_opt.names);
- clean_hostnames(network_viewer_opt.included_hostnames);
- clean_hostnames(network_viewer_opt.excluded_hostnames);
- */
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_TCP_FUNCTION_ERROR,
+ "",
+ "TCP errors",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
+ }
+
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_TCP_RETRANSMIT,
+ "",
+ "Packages retransmitted",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_UDP_FUNCTION_COUNT,
+ "",
+ "UDP calls",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_UDP_FUNCTION_BITS,
+ "",
+ "UDP bandwidth",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
- pthread_mutex_destroy(&nv_mutex);
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_EBPF_IP_FAMILY,
+ NETDATA_UDP_FUNCTION_ERROR,
+ "",
+ "UDP errors",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_SOCKET_KERNEL_FUNCTIONS,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NULL,
+ order++,
+ em->update_every);
+ }
- pthread_mutex_lock(&ebpf_exit_cleanup);
- em->enabled = NETDATA_THREAD_EBPF_STOPPED;
- ebpf_update_stats(&plugin_statistics, em);
- ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_REMOVE);
- pthread_mutex_unlock(&ebpf_exit_cleanup);
+ fflush(stdout);
}
-
/**
* Socket exit
*
@@ -665,23 +885,33 @@ static void ebpf_socket_free(ebpf_module_t *em )
static void ebpf_socket_exit(void *ptr)
{
ebpf_module_t *em = (ebpf_module_t *)ptr;
- pthread_mutex_lock(&nv_mutex);
- if (socket_threads.thread)
- netdata_thread_cancel(*socket_threads.thread);
- pthread_mutex_unlock(&nv_mutex);
- ebpf_socket_free(em);
-}
-/**
- * Socket cleanup
- *
- * Clean up allocated addresses.
- *
- * @param ptr thread data.
- */
-void ebpf_socket_cleanup(void *ptr)
-{
- UNUSED(ptr);
+ if (ebpf_read_socket.thread)
+ netdata_thread_cancel(*ebpf_read_socket.thread);
+
+ if (em->enabled == NETDATA_THREAD_EBPF_FUNCTION_RUNNING) {
+ pthread_mutex_lock(&lock);
+
+ if (em->cgroup_charts) {
+ ebpf_obsolete_socket_cgroup_charts(em);
+ fflush(stdout);
+ }
+
+ if (em->apps_charts & NETDATA_EBPF_APPS_FLAG_CHART_CREATED) {
+ ebpf_socket_obsolete_apps_charts(em);
+ fflush(stdout);
+ }
+
+ ebpf_socket_obsolete_global_charts(em);
+
+#ifdef NETDATA_DEV_MODE
+ if (ebpf_aral_socket_pid)
+ ebpf_statistic_obsolete_aral_chart(em, socket_disable_priority);
+#endif
+ pthread_mutex_unlock(&lock);
+ }
+
+ ebpf_socket_free(em);
}
/*****************************************************************
@@ -737,212 +967,44 @@ static void ebpf_update_global_publish(
}
/**
- * Update Network Viewer plot data
- *
- * @param plot the structure where the data will be stored
- * @param sock the last update from the socket
- */
-static inline void update_nv_plot_data(netdata_plot_values_t *plot, netdata_socket_t *sock)
-{
- if (sock->ct != plot->last_time) {
- plot->last_time = sock->ct;
- plot->plot_recv_packets = sock->recv_packets;
- plot->plot_sent_packets = sock->sent_packets;
- plot->plot_recv_bytes = sock->recv_bytes;
- plot->plot_sent_bytes = sock->sent_bytes;
- plot->plot_retransmit = sock->retransmit;
- }
-
- sock->recv_packets = 0;
- sock->sent_packets = 0;
- sock->recv_bytes = 0;
- sock->sent_bytes = 0;
- sock->retransmit = 0;
-}
-
-/**
- * Calculate Network Viewer Plot
+ * Send Global Inbound connection
*
- * Do math with collected values before to plot data.
+ * Send number of connections read per protocol.
*/
-static inline void calculate_nv_plot()
+static void ebpf_socket_send_global_inbound_conn()
{
- pthread_mutex_lock(&nv_mutex);
- uint32_t i;
- uint32_t end = inbound_vectors.next;
- for (i = 0; i < end; i++) {
- update_nv_plot_data(&inbound_vectors.plot[i].plot, &inbound_vectors.plot[i].sock);
- }
- inbound_vectors.max_plot = end;
-
- // The 'Other' dimension is always calculated for the chart to have at least one dimension
- update_nv_plot_data(&inbound_vectors.plot[inbound_vectors.last].plot,
- &inbound_vectors.plot[inbound_vectors.last].sock);
-
- end = outbound_vectors.next;
- for (i = 0; i < end; i++) {
- update_nv_plot_data(&outbound_vectors.plot[i].plot, &outbound_vectors.plot[i].sock);
- }
- outbound_vectors.max_plot = end;
-
- /*
- // The 'Other' dimension is always calculated for the chart to have at least one dimension
- update_nv_plot_data(&outbound_vectors.plot[outbound_vectors.last].plot,
- &outbound_vectors.plot[outbound_vectors.last].sock);
- */
- pthread_mutex_unlock(&nv_mutex);
-}
+ uint64_t udp_conn = 0;
+ uint64_t tcp_conn = 0;
+ ebpf_network_viewer_port_list_t *move = listen_ports;
+ while (move) {
+ if (move->protocol == IPPROTO_TCP)
+ tcp_conn += move->connections;
+ else
+ udp_conn += move->connections;
-/**
- * Network viewer send bytes
- *
- * @param ptr the structure with values to plot
- * @param chart the chart name.
- */
-static inline void ebpf_socket_nv_send_bytes(netdata_vector_plot_t *ptr, char *chart)
-{
- uint32_t i;
- uint32_t end = ptr->last_plot;
- netdata_socket_plot_t *w = ptr->plot;
- collected_number value;
-
- write_begin_chart(NETDATA_EBPF_FAMILY, chart);
- for (i = 0; i < end; i++) {
- value = ((collected_number) w[i].plot.plot_sent_bytes);
- write_chart_dimension(w[i].dimension_sent, value);
- value = (collected_number) w[i].plot.plot_recv_bytes;
- write_chart_dimension(w[i].dimension_recv, value);
- }
-
- i = ptr->last;
- value = ((collected_number) w[i].plot.plot_sent_bytes);
- write_chart_dimension(w[i].dimension_sent, value);
- value = (collected_number) w[i].plot.plot_recv_bytes;
- write_chart_dimension(w[i].dimension_recv, value);
- write_end_chart();
-}
+ move = move->next;
+ }
-/**
- * Network Viewer Send packets
- *
- * @param ptr the structure with values to plot
- * @param chart the chart name.
- */
-static inline void ebpf_socket_nv_send_packets(netdata_vector_plot_t *ptr, char *chart)
-{
- uint32_t i;
- uint32_t end = ptr->last_plot;
- netdata_socket_plot_t *w = ptr->plot;
- collected_number value;
-
- write_begin_chart(NETDATA_EBPF_FAMILY, chart);
- for (i = 0; i < end; i++) {
- value = ((collected_number)w[i].plot.plot_sent_packets);
- write_chart_dimension(w[i].dimension_sent, value);
- value = (collected_number) w[i].plot.plot_recv_packets;
- write_chart_dimension(w[i].dimension_recv, value);
- }
-
- i = ptr->last;
- value = ((collected_number)w[i].plot.plot_sent_packets);
- write_chart_dimension(w[i].dimension_sent, value);
- value = (collected_number)w[i].plot.plot_recv_packets;
- write_chart_dimension(w[i].dimension_recv, value);
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_INBOUND_CONNECTIONS, "");
+ write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_TCP].name, (long long) tcp_conn);
+ write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_UDP].name, (long long) udp_conn);
+ ebpf_write_end_chart();
}
/**
- * Network Viewer Send Retransmit
+ * Send data to Netdata calling auxiliary functions.
*
- * @param ptr the structure with values to plot
- * @param chart the chart name.
+ * @param em the structure with thread information
*/
-static inline void ebpf_socket_nv_send_retransmit(netdata_vector_plot_t *ptr, char *chart)
+static void ebpf_socket_send_data(ebpf_module_t *em)
{
- uint32_t i;
- uint32_t end = ptr->last_plot;
- netdata_socket_plot_t *w = ptr->plot;
- collected_number value;
-
- write_begin_chart(NETDATA_EBPF_FAMILY, chart);
- for (i = 0; i < end; i++) {
- value = (collected_number) w[i].plot.plot_retransmit;
- write_chart_dimension(w[i].dimension_retransmit, value);
- }
+ netdata_publish_vfs_common_t common_tcp;
+ netdata_publish_vfs_common_t common_udp;
+ ebpf_update_global_publish(socket_publish_aggregated, &common_tcp, &common_udp, socket_aggregated_data);
- i = ptr->last;
- value = (collected_number)w[i].plot.plot_retransmit;
- write_chart_dimension(w[i].dimension_retransmit, value);
- write_end_chart();
-}
-
-/**
- * Send network viewer data
- *
- * @param ptr the pointer to plot data
- */
-static void ebpf_socket_send_nv_data(netdata_vector_plot_t *ptr)
-{
- if (!ptr->flags)
- return;
-
- if (ptr == (netdata_vector_plot_t *)&outbound_vectors) {
- ebpf_socket_nv_send_bytes(ptr, NETDATA_NV_OUTBOUND_BYTES);
- fflush(stdout);
-
- ebpf_socket_nv_send_packets(ptr, NETDATA_NV_OUTBOUND_PACKETS);
- fflush(stdout);
-
- ebpf_socket_nv_send_retransmit(ptr, NETDATA_NV_OUTBOUND_RETRANSMIT);
- fflush(stdout);
- } else {
- ebpf_socket_nv_send_bytes(ptr, NETDATA_NV_INBOUND_BYTES);
- fflush(stdout);
-
- ebpf_socket_nv_send_packets(ptr, NETDATA_NV_INBOUND_PACKETS);
- fflush(stdout);
- }
-}
-
-/**
- * Send Global Inbound connection
- *
- * Send number of connections read per protocol.
- */
-static void ebpf_socket_send_global_inbound_conn()
-{
- uint64_t udp_conn = 0;
- uint64_t tcp_conn = 0;
- ebpf_network_viewer_port_list_t *move = listen_ports;
- while (move) {
- if (move->protocol == IPPROTO_TCP)
- tcp_conn += move->connections;
- else
- udp_conn += move->connections;
-
- move = move->next;
- }
-
- write_begin_chart(NETDATA_EBPF_IP_FAMILY, NETDATA_INBOUND_CONNECTIONS);
- write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_TCP].name, (long long) tcp_conn);
- write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_INCOMING_CONNECTION_UDP].name, (long long) udp_conn);
- write_end_chart();
-}
-
-/**
- * Send data to Netdata calling auxiliary functions.
- *
- * @param em the structure with thread information
- */
-static void ebpf_socket_send_data(ebpf_module_t *em)
-{
- netdata_publish_vfs_common_t common_tcp;
- netdata_publish_vfs_common_t common_udp;
- ebpf_update_global_publish(socket_publish_aggregated, &common_tcp, &common_udp, socket_aggregated_data);
-
- ebpf_socket_send_global_inbound_conn();
- write_count_chart(NETDATA_TCP_OUTBOUND_CONNECTIONS, NETDATA_EBPF_IP_FAMILY,
- &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 2);
+ ebpf_socket_send_global_inbound_conn();
+ write_count_chart(NETDATA_TCP_OUTBOUND_CONNECTIONS, NETDATA_EBPF_IP_FAMILY,
+ &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 2);
// We read bytes from function arguments, but bandwidth is given in bits,
// so we need to multiply by 8 to convert for the final value.
@@ -967,31 +1029,6 @@ static void ebpf_socket_send_data(ebpf_module_t *em)
}
}
-/**
- * Sum values for pid
- *
- * @param root the structure with all available PIDs
- *
- * @param offset the address that we are reading
- *
- * @return it returns the sum of all PIDs
- */
-long long ebpf_socket_sum_values_for_pids(struct ebpf_pid_on_target *root, size_t offset)
-{
- long long ret = 0;
- while (root) {
- int32_t pid = root->pid;
- ebpf_socket_publish_apps_t *w = socket_bandwidth_curr[pid];
- if (w) {
- ret += get_value_from_structure((char *)w, offset);
- }
-
- root = root->next;
- }
-
- return ret;
-}
-
/**
* Send data to Netdata calling auxiliary functions.
*
@@ -1003,100 +1040,74 @@ void ebpf_socket_send_apps_data(ebpf_module_t *em, struct ebpf_target *root)
UNUSED(em);
struct ebpf_target *w;
- collected_number value;
+ // This algorithm is improved in https://github.com/netdata/netdata/pull/16030
+ collected_number values[9];
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4);
for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- call_tcp_v4_connection));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- call_tcp_v6_connection));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ struct ebpf_pid_on_target *move = w->root_pid;
+ // Simplify algorithm, but others will appear only in https://github.com/netdata/netdata/pull/16030
+ memset(values, 0, sizeof(values));
+ while (move) {
+ int32_t pid = move->pid;
+ ebpf_socket_publish_apps_t *ws = socket_bandwidth_curr[pid];
+ if (ws) {
+ values[0] += (collected_number) ws->call_tcp_v4_connection;
+ values[1] += (collected_number) ws->call_tcp_v6_connection;
+ values[2] += (collected_number) ws->bytes_sent;
+ values[3] += (collected_number) ws->bytes_received;
+ values[4] += (collected_number) ws->call_tcp_sent;
+ values[5] += (collected_number) ws->call_tcp_received;
+ values[6] += (collected_number) ws->retransmit;
+ values[7] += (collected_number) ws->call_udp_sent;
+ values[8] += (collected_number) ws->call_udp_received;
+ }
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- bytes_sent));
- // We multiply by 0.008, because we read bytes, but we display bits
- write_chart_dimension(w->name, ((value)*8)/1000);
+ move = move->next;
}
- }
- write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- bytes_received));
- // We multiply by 0.008, because we read bytes, but we display bits
- write_chart_dimension(w->name, ((value)*8)/1000);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_v4_connection");
+ write_chart_dimension("connections", values[0]);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- call_tcp_sent));
- write_chart_dimension(w->name, value);
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_call_tcp_v6_connection");
+ write_chart_dimension("calls", values[1]);
+ ebpf_write_end_chart();
}
- }
- write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- call_tcp_received));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_sent");
+ // We multiply by 0.008, because we read bytes, but we display bits
+ write_chart_dimension("bandwidth", ((values[2])*8)/1000);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- retransmit));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_sock_bytes_received");
+ // We multiply by 0.008, because we read bytes, but we display bits
+ write_chart_dimension("bandwidth", ((values[3])*8)/1000);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- call_udp_sent));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_sendmsg");
+ write_chart_dimension("calls", values[4]);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- value = ebpf_socket_sum_values_for_pids(w->root_pid, offsetof(ebpf_socket_publish_apps_t,
- call_udp_received));
- write_chart_dimension(w->name, value);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_cleanup_rbuf");
+ write_chart_dimension("calls", values[5]);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_tcp_retransmit");
+ write_chart_dimension("calls", values[6]);
+ ebpf_write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_udp_sendmsg");
+ write_chart_dimension("calls", values[7]);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_udp_recvmsg");
+ write_chart_dimension("calls", values[8]);
+ ebpf_write_end_chart();
+ }
}
/*****************************************************************
@@ -1112,7 +1123,7 @@ void ebpf_socket_send_apps_data(ebpf_module_t *em, struct ebpf_target *root)
*
* @param em a pointer to the structure with the default values.
*/
-static void ebpf_create_global_charts(ebpf_module_t *em)
+static void ebpf_socket_create_global_charts(ebpf_module_t *em)
{
int order = 21070;
ebpf_create_chart(NETDATA_EBPF_IP_FAMILY,
@@ -1238,217 +1249,153 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
void ebpf_socket_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
- int order = 20080;
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_CONNECTION_TCP_V4,
- "Calls to tcp_v4_connection", EBPF_COMMON_DIMENSION_CONNECTIONS,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_CONNECTION_TCP_V6,
- "Calls to tcp_v6_connection", EBPF_COMMON_DIMENSION_CONNECTIONS,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_SENT,
- "Bytes sent", EBPF_COMMON_DIMENSION_BITS,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_RECV,
- "bytes received", EBPF_COMMON_DIMENSION_BITS,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS,
- "Calls for tcp_sendmsg",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS,
- "Calls for tcp_cleanup_rbuf",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT,
- "Calls for tcp_retransmit",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS,
- "Calls for udp_sendmsg",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- ebpf_create_charts_on_apps(NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS,
- "Calls for udp_recvmsg",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
-}
-
-/**
- * Create network viewer chart
- *
- * Create common charts.
- *
- * @param id chart id
- * @param title chart title
- * @param units units label
- * @param family group name used to attach the chart on dashboard
- * @param order chart order
- * @param update_every value to overwrite the update frequency set by the server.
- * @param ptr plot structure with values.
- */
-static void ebpf_socket_create_nv_chart(char *id, char *title, char *units,
- char *family, int order, int update_every, netdata_vector_plot_t *ptr)
-{
- ebpf_write_chart_cmd(NETDATA_EBPF_FAMILY,
- id,
- title,
- units,
- family,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- order,
- update_every,
- NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- uint32_t i;
- uint32_t end = ptr->last_plot;
- netdata_socket_plot_t *w = ptr->plot;
- for (i = 0; i < end; i++) {
- fprintf(stdout, "DIMENSION %s '' incremental -1 1\n", w[i].dimension_sent);
- fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[i].dimension_recv);
- }
-
- end = ptr->last;
- fprintf(stdout, "DIMENSION %s '' incremental -1 1\n", w[end].dimension_sent);
- fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[end].dimension_recv);
-}
-
-/**
- * Create network viewer retransmit
- *
- * Create a specific chart.
- *
- * @param id the chart id
- * @param title the chart title
- * @param units the units label
- * @param family the group name used to attach the chart on dashboard
- * @param order the chart order
- * @param update_every value to overwrite the update frequency set by the server.
- * @param ptr the plot structure with values.
- */
-static void ebpf_socket_create_nv_retransmit(char *id, char *title, char *units,
- char *family, int order, int update_every, netdata_vector_plot_t *ptr)
-{
- ebpf_write_chart_cmd(NETDATA_EBPF_FAMILY,
- id,
- title,
- units,
- family,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- order,
- update_every,
- NETDATA_EBPF_MODULE_NAME_SOCKET);
-
- uint32_t i;
- uint32_t end = ptr->last_plot;
- netdata_socket_plot_t *w = ptr->plot;
- for (i = 0; i < end; i++) {
- fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[i].dimension_retransmit);
- }
-
- end = ptr->last;
- fprintf(stdout, "DIMENSION %s '' incremental 1 1\n", w[end].dimension_retransmit);
-}
-
-/**
- * Create Network Viewer charts
- *
- * Recreate the charts when new sockets are created.
- *
- * @param ptr a pointer for inbound or outbound vectors.
- * @param update_every value to overwrite the update frequency set by the server.
- */
-static void ebpf_socket_create_nv_charts(netdata_vector_plot_t *ptr, int update_every)
-{
- // We do not have new sockets, so we do not need move forward
- if (ptr->max_plot == ptr->last_plot)
- return;
-
- ptr->last_plot = ptr->max_plot;
-
- if (ptr == (netdata_vector_plot_t *)&outbound_vectors) {
- ebpf_socket_create_nv_chart(NETDATA_NV_OUTBOUND_BYTES,
- "Outbound connections (bytes).", EBPF_COMMON_DIMENSION_BYTES,
- NETDATA_NETWORK_CONNECTIONS_GROUP,
- 21080,
- update_every, ptr);
-
- ebpf_socket_create_nv_chart(NETDATA_NV_OUTBOUND_PACKETS,
- "Outbound connections (packets)",
- EBPF_COMMON_DIMENSION_PACKETS,
- NETDATA_NETWORK_CONNECTIONS_GROUP,
- 21082,
- update_every, ptr);
-
- ebpf_socket_create_nv_retransmit(NETDATA_NV_OUTBOUND_RETRANSMIT,
- "Retransmitted packets",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_NETWORK_CONNECTIONS_GROUP,
- 21083,
- update_every, ptr);
- } else {
- ebpf_socket_create_nv_chart(NETDATA_NV_INBOUND_BYTES,
- "Inbound connections (bytes)", EBPF_COMMON_DIMENSION_BYTES,
- NETDATA_NETWORK_CONNECTIONS_GROUP,
- 21084,
- update_every, ptr);
+ struct ebpf_target *w;
+ int order = 20130;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
- ebpf_socket_create_nv_chart(NETDATA_NV_INBOUND_PACKETS,
- "Inbound connections (packets)",
- EBPF_COMMON_DIMENSION_PACKETS,
- NETDATA_NETWORK_CONNECTIONS_GROUP,
- 21085,
- update_every, ptr);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_v4_connection",
+ "Calls to tcp_v4_connection.",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_v4_connection",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_v6_connection",
+ "Calls to tcp_v6_connection.",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_v6_connection",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION connections '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_sock_bytes_sent",
+ "Bytes sent.",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_sock_bytes_sent",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_sock_bytes_received",
+ "Bytes received.",
+ EBPF_COMMON_DIMENSION_BITS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_sock_bytes_received",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION bandwidth '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_sendmsg",
+ "Calls to tcp_sendmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_sendmsg",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_cleanup_rbuf",
+ "Calls to tcp_cleanup_rbuf.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_cleanup_rbuf",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_tcp_retransmit",
+ "Calls to tcp_retransmit.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_tcp_retransmit",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_udp_sendmsg",
+ "Calls to udp_sendmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_udp_sendmsg",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_udp_recvmsg",
+ "Calls to udp_recvmsg.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_udp_recvmsg",
+ order,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ w->charts_created |= 1<flags |= NETWORK_VIEWER_CHARTS_CREATED;
+ em->apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
}
/*****************************************************************
@@ -1517,7 +1464,7 @@ static int ebpf_is_specific_ip_inside_range(union netdata_ip_t *cmp, int family)
*
* @return It returns 1 when cmp is inside and 0 otherwise.
*/
-static int is_port_inside_range(uint16_t cmp)
+static int ebpf_is_port_inside_range(uint16_t cmp)
{
// We do not have restrictions for ports.
if (!network_viewer_opt.excluded_port && !network_viewer_opt.included_port)
@@ -1525,7 +1472,6 @@ static int is_port_inside_range(uint16_t cmp)
// Test if port is excluded
ebpf_network_viewer_port_list_t *move = network_viewer_opt.excluded_port;
- cmp = htons(cmp);
while (move) {
if (move->cmp_first <= cmp && cmp <= move->cmp_last)
return 0;
@@ -1583,493 +1529,322 @@ int hostname_matches_pattern(char *cmp)
* Compare destination addresses and destination ports to define next steps
*
* @param key the socket read from kernel ring
- * @param family the family used to compare IPs (AF_INET and AF_INET6)
+ * @param data the socket data used also used to refuse some sockets.
*
* @return It returns 1 if this socket is inside the ranges and 0 otherwise.
*/
-int is_socket_allowed(netdata_socket_idx_t *key, int family)
-{
- if (!is_port_inside_range(key->dport))
- return 0;
-
- return ebpf_is_specific_ip_inside_range(&key->daddr, family);
-}
-
-/**
- * Compare sockets
- *
- * Compare destination address and destination port.
- * We do not compare source port, because it is random.
- * We also do not compare source address, because inbound and outbound connections are stored in separated AVL trees.
- *
- * @param a pointer to netdata_socket_plot
- * @param b pointer to netdata_socket_plot
- *
- * @return It returns 0 case the values are equal, 1 case a is bigger than b and -1 case a is smaller than b.
- */
-static int ebpf_compare_sockets(void *a, void *b)
+int ebpf_is_socket_allowed(netdata_socket_idx_t *key, netdata_socket_t *data)
{
- struct netdata_socket_plot *val1 = a;
- struct netdata_socket_plot *val2 = b;
- int cmp = 0;
-
- // We do not need to compare val2 family, because data inside hash table is always from the same family
- if (val1->family == AF_INET) { //IPV4
- if (network_viewer_opt.included_port || network_viewer_opt.excluded_port)
- cmp = memcmp(&val1->index.dport, &val2->index.dport, sizeof(uint16_t));
+ int ret = 0;
+ // If family is not AF_UNSPEC and it is different of specified
+ if (network_viewer_opt.family && network_viewer_opt.family != data->family)
+ goto endsocketallowed;
- if (!cmp) {
- cmp = memcmp(&val1->index.daddr.addr32[0], &val2->index.daddr.addr32[0], sizeof(uint32_t));
- }
- } else {
- if (network_viewer_opt.included_port || network_viewer_opt.excluded_port)
- cmp = memcmp(&val1->index.dport, &val2->index.dport, sizeof(uint16_t));
+ if (!ebpf_is_port_inside_range(key->dport))
+ goto endsocketallowed;
- if (!cmp) {
- cmp = memcmp(&val1->index.daddr.addr32, &val2->index.daddr.addr32, 4*sizeof(uint32_t));
- }
- }
+ ret = ebpf_is_specific_ip_inside_range(&key->daddr, data->family);
- return cmp;
+endsocketallowed:
+ return ret;
}
/**
- * Build dimension name
- *
- * Fill dimension name vector with values given
- *
- * @param dimname the output vector
- * @param hostname the hostname for the socket.
- * @param service_name the service used to connect.
- * @param proto the protocol used in this connection
- * @param family is this IPV4(AF_INET) or IPV6(AF_INET6)
+ * Hash accumulator
*
- * @return it returns the size of the data copied on success and -1 otherwise.
+ * @param values the values used to calculate the data.
+ * @param family the connection family
+ * @param end the values size.
*/
-static inline int ebpf_build_outbound_dimension_name(char *dimname, char *hostname, char *service_name,
- char *proto, int family)
+static void ebpf_hash_socket_accumulator(netdata_socket_t *values, int end)
{
- if (network_viewer_opt.included_port || network_viewer_opt.excluded_port)
- return snprintf(dimname, CONFIG_MAX_NAME - 7, (family == AF_INET)?"%s:%s:%s_":"%s:%s:[%s]_",
- service_name, proto, hostname);
-
- return snprintf(dimname, CONFIG_MAX_NAME - 7, (family == AF_INET)?"%s:%s_":"%s:[%s]_",
- proto, hostname);
-}
+ int i;
+ uint8_t protocol = values[0].protocol;
+ uint64_t ct = values[0].current_timestamp;
+ uint64_t ft = values[0].first_timestamp;
+ uint16_t family = AF_UNSPEC;
+ uint32_t external_origin = values[0].external_origin;
+ for (i = 1; i < end; i++) {
+ netdata_socket_t *w = &values[i];
-/**
- * Fill inbound dimension name
- *
- * Mount the dimension name with the input given
- *
- * @param dimname the output vector
- * @param service_name the service used to connect.
- * @param proto the protocol used in this connection
- *
- * @return it returns the size of the data copied on success and -1 otherwise.
- */
-static inline int build_inbound_dimension_name(char *dimname, char *service_name, char *proto)
-{
- return snprintf(dimname, CONFIG_MAX_NAME - 7, "%s:%s_", service_name,
- proto);
-}
+ values[0].tcp.call_tcp_sent += w->tcp.call_tcp_sent;
+ values[0].tcp.call_tcp_received += w->tcp.call_tcp_received;
+ values[0].tcp.tcp_bytes_received += w->tcp.tcp_bytes_received;
+ values[0].tcp.tcp_bytes_sent += w->tcp.tcp_bytes_sent;
+ values[0].tcp.close += w->tcp.close;
+ values[0].tcp.retransmit += w->tcp.retransmit;
+ values[0].tcp.ipv4_connect += w->tcp.ipv4_connect;
+ values[0].tcp.ipv6_connect += w->tcp.ipv6_connect;
-/**
- * Fill Resolved Name
- *
- * Fill the resolved name structure with the value given.
- * The hostname is the largest value possible, if it is necessary to cut some value, it must be cut.
- *
- * @param ptr the output vector
- * @param hostname the hostname resolved or IP.
- * @param length the length for the hostname.
- * @param service_name the service name associated to the connection
- * @param is_outbound the is this an outbound connection
- */
-static inline void fill_resolved_name(netdata_socket_plot_t *ptr, char *hostname, size_t length,
- char *service_name, int is_outbound)
-{
- if (length < NETDATA_MAX_NETWORK_COMBINED_LENGTH)
- ptr->resolved_name = strdupz(hostname);
- else {
- length = NETDATA_MAX_NETWORK_COMBINED_LENGTH;
- ptr->resolved_name = mallocz( NETDATA_MAX_NETWORK_COMBINED_LENGTH + 1);
- memcpy(ptr->resolved_name, hostname, length);
- ptr->resolved_name[length] = '\0';
- }
-
- char dimname[CONFIG_MAX_NAME];
- int size;
- char *protocol;
- if (ptr->sock.protocol == IPPROTO_UDP) {
- protocol = "UDP";
- } else if (ptr->sock.protocol == IPPROTO_TCP) {
- protocol = "TCP";
- } else {
- protocol = "ALL";
- }
+ if (!protocol)
+ protocol = w->protocol;
- if (is_outbound)
- size = ebpf_build_outbound_dimension_name(dimname, hostname, service_name, protocol, ptr->family);
- else
- size = build_inbound_dimension_name(dimname,service_name, protocol);
+ if (family == AF_UNSPEC)
+ family = w->family;
- if (size > 0) {
- strcpy(&dimname[size], "sent");
- dimname[size + 4] = '\0';
- ptr->dimension_sent = strdupz(dimname);
+ if (w->current_timestamp > ct)
+ ct = w->current_timestamp;
- strcpy(&dimname[size], "recv");
- ptr->dimension_recv = strdupz(dimname);
+ if (!ft)
+ ft = w->first_timestamp;
- dimname[size - 1] = '\0';
- ptr->dimension_retransmit = strdupz(dimname);
+ if (w->external_origin)
+ external_origin = NETDATA_EBPF_SRC_IP_ORIGIN_EXTERNAL;
}
+
+ values[0].protocol = (!protocol)?IPPROTO_TCP:protocol;
+ values[0].current_timestamp = ct;
+ values[0].first_timestamp = ft;
+ values[0].external_origin = external_origin;
}
/**
- * Mount dimension names
+ * Translate socket
*
- * Fill the vector names after to resolve the addresses
+ * Convert socket address to string
*
- * @param ptr a pointer to the structure where the values are stored.
- * @param is_outbound is a outbound ptr value?
- *
- * @return It returns 1 if the name is valid and 0 otherwise.
+ * @param dst structure where we will store
+ * @param key the socket address
*/
-int fill_names(netdata_socket_plot_t *ptr, int is_outbound)
+static void ebpf_socket_translate(netdata_socket_plus_t *dst, netdata_socket_idx_t *key)
{
- char hostname[NI_MAXHOST], service_name[NI_MAXSERV];
- if (ptr->resolved)
- return 1;
-
+ uint32_t resolve = network_viewer_opt.service_resolution_enabled;
+ char service[NI_MAXSERV];
int ret;
- static int resolve_name = -1;
- static int resolve_service = -1;
- if (resolve_name == -1)
- resolve_name = network_viewer_opt.hostname_resolution_enabled;
-
- if (resolve_service == -1)
- resolve_service = network_viewer_opt.service_resolution_enabled;
-
- netdata_socket_idx_t *idx = &ptr->index;
-
- char *errname = { "Not resolved" };
- // Resolve Name
- if (ptr->family == AF_INET) { //IPV4
- struct sockaddr_in myaddr;
- memset(&myaddr, 0 , sizeof(myaddr));
-
- myaddr.sin_family = ptr->family;
- if (is_outbound) {
- myaddr.sin_port = idx->dport;
- myaddr.sin_addr.s_addr = idx->daddr.addr32[0];
- } else {
- myaddr.sin_port = idx->sport;
- myaddr.sin_addr.s_addr = idx->saddr.addr32[0];
- }
-
- ret = (!resolve_name)?-1:getnameinfo((struct sockaddr *)&myaddr, sizeof(myaddr), hostname,
- sizeof(hostname), service_name, sizeof(service_name), NI_NAMEREQD);
-
- if (!ret && !resolve_service) {
- snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr.sin_port));
+ if (dst->data.family == AF_INET) {
+ struct sockaddr_in ipv4_addr = { };
+ ipv4_addr.sin_port = 0;
+ ipv4_addr.sin_addr.s_addr = key->saddr.addr32[0];
+ ipv4_addr.sin_family = AF_INET;
+ if (resolve) {
+ // NI_NAMEREQD : It is too slow
+ ret = getnameinfo((struct sockaddr *) &ipv4_addr, sizeof(ipv4_addr), dst->socket_string.src_ip,
+ INET6_ADDRSTRLEN, service, NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret) {
+ collector_error("Cannot resolve name: %s", gai_strerror(ret));
+ resolve = 0;
+ } else {
+ ipv4_addr.sin_addr.s_addr = key->daddr.addr32[0];
+
+ ipv4_addr.sin_port = key->dport;
+ ret = getnameinfo((struct sockaddr *) &ipv4_addr, sizeof(ipv4_addr), dst->socket_string.dst_ip,
+ INET6_ADDRSTRLEN, dst->socket_string.dst_port, NI_MAXSERV,
+ NI_NUMERICHOST);
+ if (ret) {
+ collector_error("Cannot resolve name: %s", gai_strerror(ret));
+ resolve = 0;
+ }
+ }
}
- if (ret) {
- // I cannot resolve the name, I will use the IP
- if (!inet_ntop(AF_INET, &myaddr.sin_addr.s_addr, hostname, NI_MAXHOST)) {
- strncpy(hostname, errname, 13);
- }
+ // When resolution fail, we should use addresses
+ if (!resolve) {
+ ipv4_addr.sin_addr.s_addr = key->saddr.addr32[0];
- snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr.sin_port));
- ret = 1;
- }
- } else { // IPV6
- struct sockaddr_in6 myaddr6;
- memset(&myaddr6, 0 , sizeof(myaddr6));
-
- myaddr6.sin6_family = AF_INET6;
- if (is_outbound) {
- myaddr6.sin6_port = idx->dport;
- memcpy(myaddr6.sin6_addr.s6_addr, idx->daddr.addr8, sizeof(union netdata_ip_t));
- } else {
- myaddr6.sin6_port = idx->sport;
- memcpy(myaddr6.sin6_addr.s6_addr, idx->saddr.addr8, sizeof(union netdata_ip_t));
- }
+ if(!inet_ntop(AF_INET, &ipv4_addr.sin_addr, dst->socket_string.src_ip, INET6_ADDRSTRLEN))
+ netdata_log_info("Cannot convert IP %u .", ipv4_addr.sin_addr.s_addr);
- ret = (!resolve_name)?-1:getnameinfo((struct sockaddr *)&myaddr6, sizeof(myaddr6), hostname,
- sizeof(hostname), service_name, sizeof(service_name), NI_NAMEREQD);
+ ipv4_addr.sin_addr.s_addr = key->daddr.addr32[0];
- if (!ret && !resolve_service) {
- snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr6.sin6_port));
+ if(!inet_ntop(AF_INET, &ipv4_addr.sin_addr, dst->socket_string.dst_ip, INET6_ADDRSTRLEN))
+ netdata_log_info("Cannot convert IP %u .", ipv4_addr.sin_addr.s_addr);
+ snprintfz(dst->socket_string.dst_port, NI_MAXSERV, "%u", ntohs(key->dport));
}
-
- if (ret) {
- // I cannot resolve the name, I will use the IP
- if (!inet_ntop(AF_INET6, myaddr6.sin6_addr.s6_addr, hostname, NI_MAXHOST)) {
- strncpy(hostname, errname, 13);
+ } else {
+ struct sockaddr_in6 ipv6_addr = { };
+ memcpy(&ipv6_addr.sin6_addr, key->saddr.addr8, sizeof(key->saddr.addr8));
+ ipv6_addr.sin6_family = AF_INET6;
+ if (resolve) {
+ ret = getnameinfo((struct sockaddr *) &ipv6_addr, sizeof(ipv6_addr), dst->socket_string.src_ip,
+ INET6_ADDRSTRLEN, service, NI_MAXSERV, NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret) {
+ collector_error("Cannot resolve name: %s", gai_strerror(ret));
+ resolve = 0;
+ } else {
+ memcpy(&ipv6_addr.sin6_addr, key->daddr.addr8, sizeof(key->daddr.addr8));
+ ret = getnameinfo((struct sockaddr *) &ipv6_addr, sizeof(ipv6_addr), dst->socket_string.dst_ip,
+ INET6_ADDRSTRLEN, dst->socket_string.dst_port, NI_MAXSERV,
+ NI_NUMERICHOST);
+ if (ret) {
+ collector_error("Cannot resolve name: %s", gai_strerror(ret));
+ resolve = 0;
+ }
}
+ }
- snprintf(service_name, sizeof(service_name), "%u", ntohs(myaddr6.sin6_port));
+ if (!resolve) {
+ memcpy(&ipv6_addr.sin6_addr, key->saddr.addr8, sizeof(key->saddr.addr8));
+ if(!inet_ntop(AF_INET6, &ipv6_addr.sin6_addr, dst->socket_string.src_ip, INET6_ADDRSTRLEN))
+ netdata_log_info("Cannot convert IPv6 Address.");
- ret = 1;
+ memcpy(&ipv6_addr.sin6_addr, key->daddr.addr8, sizeof(key->daddr.addr8));
+ if(!inet_ntop(AF_INET6, &ipv6_addr.sin6_addr, dst->socket_string.dst_ip, INET6_ADDRSTRLEN))
+ netdata_log_info("Cannot convert IPv6 Address.");
+ snprintfz(dst->socket_string.dst_port, NI_MAXSERV, "%u", ntohs(key->dport));
}
}
+ dst->pid = key->pid;
- fill_resolved_name(ptr, hostname,
- strlen(hostname) + strlen(service_name)+ NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH,
- service_name, is_outbound);
-
- if (resolve_name && !ret)
- ret = hostname_matches_pattern(hostname);
-
- ptr->resolved++;
-
- return ret;
+ if (!strcmp(dst->socket_string.dst_port, "0"))
+ snprintfz(dst->socket_string.dst_port, NI_MAXSERV, "%u", ntohs(key->dport));
+#ifdef NETDATA_DEV_MODE
+ collector_info("New socket: { ORIGIN IP: %s, ORIGIN : %u, DST IP:%s, DST PORT: %s, PID: %u, PROTO: %d, FAMILY: %d}",
+ dst->socket_string.src_ip,
+ dst->data.external_origin,
+ dst->socket_string.dst_ip,
+ dst->socket_string.dst_port,
+ dst->pid,
+ dst->data.protocol,
+ dst->data.family
+ );
+#endif
}
/**
- * Fill last Network Viewer Dimension
+ * Update array vectors
*
- * Fill the unique dimension that is always plotted.
+ * Read data from hash table and update vectors.
*
- * @param ptr the pointer for the last dimension
- * @param is_outbound is this an inbound structure?
+ * @param em the structure with configuration
*/
-static void fill_last_nv_dimension(netdata_socket_plot_t *ptr, int is_outbound)
+static void ebpf_update_array_vectors(ebpf_module_t *em)
{
- char hostname[NI_MAXHOST], service_name[NI_MAXSERV];
- char *other = { "other" };
- // We are also copying the NULL bytes to avoid warnings in new compilers
- strncpy(hostname, other, 6);
- strncpy(service_name, other, 6);
-
- ptr->family = AF_INET;
- ptr->sock.protocol = 255;
- ptr->flags = (!is_outbound)?NETDATA_INBOUND_DIRECTION:NETDATA_OUTBOUND_DIRECTION;
+ netdata_thread_disable_cancelability();
+ netdata_socket_idx_t key = {};
+ netdata_socket_idx_t next_key = {};
- fill_resolved_name(ptr, hostname, 10 + NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH, service_name, is_outbound);
+ int maps_per_core = em->maps_per_core;
+ int fd = em->maps[NETDATA_SOCKET_OPEN_SOCKET].map_fd;
-#ifdef NETDATA_INTERNAL_CHECKS
- netdata_log_info("Last %s dimension added: ID = %u, IP = OTHER, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s",
- (is_outbound)?"outbound":"inbound", network_viewer_opt.max_dim - 1, ptr->resolved_name,
- ptr->dimension_recv, ptr->dimension_sent, ptr->dimension_retransmit);
-#endif
-}
+ netdata_socket_t *values = socket_values;
+ size_t length = sizeof(netdata_socket_t);
+ int test, end;
+ if (maps_per_core) {
+ length *= ebpf_nprocs;
+ end = ebpf_nprocs;
+ } else
+ end = 1;
-/**
- * Update Socket Data
- *
- * Update the socket information with last collected data
- *
- * @param sock
- * @param lvalues
- */
-static inline void update_socket_data(netdata_socket_t *sock, netdata_socket_t *lvalues)
-{
- sock->recv_packets = lvalues->recv_packets;
- sock->sent_packets = lvalues->sent_packets;
- sock->recv_bytes = lvalues->recv_bytes;
- sock->sent_bytes = lvalues->sent_bytes;
- sock->retransmit = lvalues->retransmit;
- sock->ct = lvalues->ct;
-}
+ // We need to reset the values when we are working on kernel 4.15 or newer, because kernel does not create
+ // values for specific processor unless it is used to store data. As result of this behavior one the next socket
+ // can have values from the previous one.
+ memset(values, 0, length);
+ time_t update_time = time(NULL);
+ while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
+ test = bpf_map_lookup_elem(fd, &key, values);
+ if (test < 0) {
+ goto end_socket_loop;
+ }
-/**
- * Store socket inside avl
- *
- * Store the socket values inside the avl tree.
- *
- * @param out the structure with information used to plot charts.
- * @param lvalues Values read from socket ring.
- * @param lindex the index information, the real socket.
- * @param family the family associated to the socket
- * @param flags the connection flags
- */
-static void store_socket_inside_avl(netdata_vector_plot_t *out, netdata_socket_t *lvalues,
- netdata_socket_idx_t *lindex, int family, uint32_t flags)
-{
- netdata_socket_plot_t test, *ret ;
+ if (key.pid > (uint32_t)pid_max) {
+ goto end_socket_loop;
+ }
- memcpy(&test.index, lindex, sizeof(netdata_socket_idx_t));
- test.flags = flags;
+ ebpf_hash_socket_accumulator(values, end);
+ ebpf_socket_fill_publish_apps(key.pid, values);
- ret = (netdata_socket_plot_t *) avl_search_lock(&out->tree, (avl_t *)&test);
- if (ret) {
- if (lvalues->ct != ret->plot.last_time) {
- update_socket_data(&ret->sock, lvalues);
+ // We update UDP to show info with charts, but we do not show them with functions
+ /*
+ if (key.dport == NETDATA_EBPF_UDP_PORT && values[0].protocol == IPPROTO_UDP) {
+ bpf_map_delete_elem(fd, &key);
+ goto end_socket_loop;
}
- } else {
- uint32_t curr = out->next;
- uint32_t last = out->last;
+ */
- netdata_socket_plot_t *w = &out->plot[curr];
+ // Discard non-bind sockets
+ if (!key.daddr.addr64[0] && !key.daddr.addr64[1] && !key.saddr.addr64[0] && !key.saddr.addr64[1]) {
+ bpf_map_delete_elem(fd, &key);
+ goto end_socket_loop;
+ }
- int resolved;
- if (curr == last) {
- if (lvalues->ct != w->plot.last_time) {
- update_socket_data(&w->sock, lvalues);
- }
- return;
- } else {
- memcpy(&w->sock, lvalues, sizeof(netdata_socket_t));
- memcpy(&w->index, lindex, sizeof(netdata_socket_idx_t));
- w->family = family;
+ // When socket is not allowed, we do not append it to table, but we are still keeping it to accumulate data.
+ if (!ebpf_is_socket_allowed(&key, values)) {
+ goto end_socket_loop;
+ }
- resolved = fill_names(w, out != (netdata_vector_plot_t *)&inbound_vectors);
+ // Get PID structure
+ rw_spinlock_write_lock(&ebpf_judy_pid.index.rw_spinlock);
+ PPvoid_t judy_array = &ebpf_judy_pid.index.JudyLArray;
+ netdata_ebpf_judy_pid_stats_t *pid_ptr = ebpf_get_pid_from_judy_unsafe(judy_array, key.pid);
+ if (!pid_ptr) {
+ goto end_socket_loop;
}
- if (!resolved) {
- freez(w->resolved_name);
- freez(w->dimension_sent);
- freez(w->dimension_recv);
- freez(w->dimension_retransmit);
+ // Get Socket structure
+ rw_spinlock_write_lock(&pid_ptr->socket_stats.rw_spinlock);
+ netdata_socket_plus_t **socket_pptr = (netdata_socket_plus_t **)ebpf_judy_insert_unsafe(
+ &pid_ptr->socket_stats.JudyLArray, values[0].first_timestamp);
+ netdata_socket_plus_t *socket_ptr = *socket_pptr;
+ bool translate = false;
+ if (likely(*socket_pptr == NULL)) {
+ *socket_pptr = aral_mallocz(aral_socket_table);
- memset(w, 0, sizeof(netdata_socket_plot_t));
+ socket_ptr = *socket_pptr;
- return;
+ translate = true;
+ }
+ uint64_t prev_period = socket_ptr->data.current_timestamp;
+ memcpy(&socket_ptr->data, &values[0], sizeof(netdata_socket_t));
+ if (translate)
+ ebpf_socket_translate(socket_ptr, &key);
+ else { // Check socket was updated
+ if (prev_period) {
+ if (values[0].current_timestamp > prev_period) // Socket updated
+ socket_ptr->last_update = update_time;
+ else if ((update_time - socket_ptr->last_update) > em->update_every) {
+ // Socket was not updated since last read
+ JudyLDel(&pid_ptr->socket_stats.JudyLArray, values[0].first_timestamp, PJE0);
+ aral_freez(aral_socket_table, socket_ptr);
+ }
+ } else // First time
+ socket_ptr->last_update = update_time;
}
- w->flags = flags;
- netdata_socket_plot_t *check ;
- check = (netdata_socket_plot_t *) avl_insert_lock(&out->tree, (avl_t *)w);
- if (check != w)
- netdata_log_error("Internal error, cannot insert the AVL tree.");
+ rw_spinlock_write_unlock(&pid_ptr->socket_stats.rw_spinlock);
+ rw_spinlock_write_unlock(&ebpf_judy_pid.index.rw_spinlock);
-#ifdef NETDATA_INTERNAL_CHECKS
- char iptext[INET6_ADDRSTRLEN];
- if (inet_ntop(family, &w->index.daddr.addr8, iptext, sizeof(iptext)))
- netdata_log_info("New %s dimension added: ID = %u, IP = %s, NAME = %s, DIM1 = %s, DIM2 = %s, DIM3 = %s",
- (out == &inbound_vectors)?"inbound":"outbound", curr, iptext, w->resolved_name,
- w->dimension_recv, w->dimension_sent, w->dimension_retransmit);
-#endif
- curr++;
- if (curr > last)
- curr = last;
- out->next = curr;
+end_socket_loop:
+ memset(values, 0, length);
+ memcpy(&key, &next_key, sizeof(key));
}
+ netdata_thread_enable_cancelability();
}
/**
- * Compare Vector to store
+ * Socket thread
*
- * Compare input values with local address to select table to store.
+ * Thread used to generate socket charts.
*
- * @param direction store inbound and outbound direction.
- * @param cmp index read from hash table.
- * @param proto the protocol read.
+ * @param ptr a pointer to `struct ebpf_module`
*
- * @return It returns the structure with address to compare.
+ * @return It always return NULL
*/
-netdata_vector_plot_t * select_vector_to_store(uint32_t *direction, netdata_socket_idx_t *cmp, uint8_t proto)
+void *ebpf_read_socket_thread(void *ptr)
{
- if (!listen_ports) {
- *direction = NETDATA_OUTBOUND_DIRECTION;
- return &outbound_vectors;
- }
-
- ebpf_network_viewer_port_list_t *move_ports = listen_ports;
- while (move_ports) {
- if (move_ports->protocol == proto && move_ports->first == cmp->sport) {
- *direction = NETDATA_INBOUND_DIRECTION;
- return &inbound_vectors;
- }
-
- move_ports = move_ports->next;
- }
-
- *direction = NETDATA_OUTBOUND_DIRECTION;
- return &outbound_vectors;
-}
-
-/**
- * Hash accumulator
- *
- * @param values the values used to calculate the data.
- * @param key the key to store data.
- * @param family the connection family
- * @param end the values size.
- */
-static void hash_accumulator(netdata_socket_t *values, netdata_socket_idx_t *key, int family, int end)
-{
- if (!network_viewer_opt.enabled || !is_socket_allowed(key, family))
- return;
-
- uint64_t bsent = 0, brecv = 0, psent = 0, precv = 0;
- uint16_t retransmit = 0;
- int i;
- uint8_t protocol = values[0].protocol;
- uint64_t ct = values[0].ct;
- for (i = 1; i < end; i++) {
- netdata_socket_t *w = &values[i];
-
- precv += w->recv_packets;
- psent += w->sent_packets;
- brecv += w->recv_bytes;
- bsent += w->sent_bytes;
- retransmit += w->retransmit;
-
- if (!protocol)
- protocol = w->protocol;
-
- if (w->ct != ct)
- ct = w->ct;
- }
-
- values[0].recv_packets += precv;
- values[0].sent_packets += psent;
- values[0].recv_bytes += brecv;
- values[0].sent_bytes += bsent;
- values[0].retransmit += retransmit;
- values[0].protocol = (!protocol)?IPPROTO_TCP:protocol;
- values[0].ct = ct;
+ heartbeat_t hb;
+ heartbeat_init(&hb);
- uint32_t dir;
- netdata_vector_plot_t *table = select_vector_to_store(&dir, key, protocol);
- store_socket_inside_avl(table, &values[0], key, family, dir);
-}
+ ebpf_module_t *em = (ebpf_module_t *)ptr;
-/**
- * Read socket hash table
- *
- * Read data from hash tables created on kernel ring.
- *
- * @param fd the hash table with data.
- * @param family the family associated to the hash table
- * @param maps_per_core do I need to read all cores?
- *
- * @return it returns 0 on success and -1 otherwise.
- */
-static void ebpf_read_socket_hash_table(int fd, int family, int maps_per_core)
-{
- netdata_socket_idx_t key = {};
- netdata_socket_idx_t next_key = {};
+ ebpf_update_array_vectors(em);
- netdata_socket_t *values = socket_values;
- size_t length = sizeof(netdata_socket_t);
- int test, end;
- if (maps_per_core) {
- length *= ebpf_nprocs;
- end = ebpf_nprocs;
- } else
- end = 1;
+ int update_every = em->update_every;
+ int counter = update_every - 1;
- while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
- // We need to reset the values when we are working on kernel 4.15 or newer, because kernel does not create
- // values for specific processor unless it is used to store data. As result of this behavior one the next socket
- // can have values from the previous one.
- memset(values, 0, length);
- test = bpf_map_lookup_elem(fd, &key, values);
- if (test < 0) {
- key = next_key;
+ uint32_t running_time = 0;
+ uint32_t lifetime = em->lifetime;
+ usec_t period = update_every * USEC_PER_SEC;
+ while (!ebpf_plugin_exit && running_time < lifetime) {
+ (void)heartbeat_next(&hb, period);
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
- }
- hash_accumulator(values, &key, family, end);
+ ebpf_update_array_vectors(em);
- key = next_key;
+ counter = 0;
}
+
+ return NULL;
}
/**
@@ -2164,44 +1939,6 @@ static void read_listen_table()
}
}
-/**
- * Socket read hash
- *
- * This is the thread callback.
- * This thread is necessary, because we cannot freeze the whole plugin to read the data on very busy socket.
- *
- * @param ptr It is a NULL value for this thread.
- *
- * @return It always returns NULL.
- */
-void *ebpf_socket_read_hash(void *ptr)
-{
- netdata_thread_cleanup_push(ebpf_socket_cleanup, ptr);
- ebpf_module_t *em = (ebpf_module_t *)ptr;
-
- heartbeat_t hb;
- heartbeat_init(&hb);
- int fd_ipv4 = socket_maps[NETDATA_SOCKET_TABLE_IPV4].map_fd;
- int fd_ipv6 = socket_maps[NETDATA_SOCKET_TABLE_IPV6].map_fd;
- int maps_per_core = em->maps_per_core;
- // This thread is cancelled from another thread
- uint32_t running_time;
- uint32_t lifetime = em->lifetime;
- for (running_time = 0;!ebpf_exit_plugin && running_time < lifetime; running_time++) {
- (void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin)
- break;
-
- pthread_mutex_lock(&nv_mutex);
- ebpf_read_socket_hash_table(fd_ipv4, AF_INET, maps_per_core);
- ebpf_read_socket_hash_table(fd_ipv6, AF_INET6, maps_per_core);
- pthread_mutex_unlock(&nv_mutex);
- }
-
- netdata_thread_cleanup_pop(1);
- return NULL;
-}
-
/**
* Read the hash table and store data to allocated vectors.
*
@@ -2251,9 +1988,9 @@ static void ebpf_socket_read_hash_global_tables(netdata_idx_t *stats, int maps_p
* Fill publish apps when necessary.
*
* @param current_pid the PID that I am updating
- * @param eb the structure with data read from memory.
+ * @param ns the structure with data read from memory.
*/
-void ebpf_socket_fill_publish_apps(uint32_t current_pid, ebpf_bandwidth_t *eb)
+void ebpf_socket_fill_publish_apps(uint32_t current_pid, netdata_socket_t *ns)
{
ebpf_socket_publish_apps_t *curr = socket_bandwidth_curr[current_pid];
if (!curr) {
@@ -2261,98 +1998,33 @@ void ebpf_socket_fill_publish_apps(uint32_t current_pid, ebpf_bandwidth_t *eb)
socket_bandwidth_curr[current_pid] = curr;
}
- curr->bytes_sent = eb->bytes_sent;
- curr->bytes_received = eb->bytes_received;
- curr->call_tcp_sent = eb->call_tcp_sent;
- curr->call_tcp_received = eb->call_tcp_received;
- curr->retransmit = eb->retransmit;
- curr->call_udp_sent = eb->call_udp_sent;
- curr->call_udp_received = eb->call_udp_received;
- curr->call_close = eb->close;
- curr->call_tcp_v4_connection = eb->tcp_v4_connection;
- curr->call_tcp_v6_connection = eb->tcp_v6_connection;
-}
+ curr->bytes_sent += ns->tcp.tcp_bytes_sent;
+ curr->bytes_received += ns->tcp.tcp_bytes_received;
+ curr->call_tcp_sent += ns->tcp.call_tcp_sent;
+ curr->call_tcp_received += ns->tcp.call_tcp_received;
+ curr->retransmit += ns->tcp.retransmit;
+ curr->call_close += ns->tcp.close;
+ curr->call_tcp_v4_connection += ns->tcp.ipv4_connect;
+ curr->call_tcp_v6_connection += ns->tcp.ipv6_connect;
-/**
- * Bandwidth accumulator.
- *
- * @param out the vector with the values to sum
- */
-void ebpf_socket_bandwidth_accumulator(ebpf_bandwidth_t *out, int maps_per_core)
-{
- int i, end = (maps_per_core) ? ebpf_nprocs : 1;
- ebpf_bandwidth_t *total = &out[0];
- for (i = 1; i < end; i++) {
- ebpf_bandwidth_t *move = &out[i];
- total->bytes_sent += move->bytes_sent;
- total->bytes_received += move->bytes_received;
- total->call_tcp_sent += move->call_tcp_sent;
- total->call_tcp_received += move->call_tcp_received;
- total->retransmit += move->retransmit;
- total->call_udp_sent += move->call_udp_sent;
- total->call_udp_received += move->call_udp_received;
- total->close += move->close;
- total->tcp_v4_connection += move->tcp_v4_connection;
- total->tcp_v6_connection += move->tcp_v6_connection;
- }
-}
-
-/**
- * Update the apps data reading information from the hash table
- *
- * @param maps_per_core do I need to read all cores?
- */
-static void ebpf_socket_update_apps_data(int maps_per_core)
-{
- int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd;
- ebpf_bandwidth_t *eb = bandwidth_vector;
- uint32_t key;
- struct ebpf_pid_stat *pids = ebpf_root_of_pids;
- size_t length = sizeof(ebpf_bandwidth_t);
- if (maps_per_core)
- length *= ebpf_nprocs;
- while (pids) {
- key = pids->pid;
-
- if (bpf_map_lookup_elem(fd, &key, eb)) {
- pids = pids->next;
- continue;
- }
-
- ebpf_socket_bandwidth_accumulator(eb, maps_per_core);
-
- ebpf_socket_fill_publish_apps(key, eb);
-
- memset(eb, 0, length);
-
- pids = pids->next;
- }
+ curr->call_udp_sent += ns->udp.call_udp_sent;
+ curr->call_udp_received += ns->udp.call_udp_received;
}
/**
* Update cgroup
*
* Update cgroup data based in PIDs.
- *
- * @param maps_per_core do I need to read all cores?
*/
-static void ebpf_update_socket_cgroup(int maps_per_core)
+static void ebpf_update_socket_cgroup()
{
ebpf_cgroup_target_t *ect ;
- ebpf_bandwidth_t *eb = bandwidth_vector;
- int fd = socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].map_fd;
-
- size_t length = sizeof(ebpf_bandwidth_t);
- if (maps_per_core)
- length *= ebpf_nprocs;
-
pthread_mutex_lock(&mutex_cgroup_shm);
for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
struct pid_on_target2 *pids;
for (pids = ect->pids; pids; pids = pids->next) {
int pid = pids->pid;
- ebpf_bandwidth_t *out = &pids->socket;
ebpf_socket_publish_apps_t *publish = &ect->publish_socket;
if (likely(socket_bandwidth_curr) && socket_bandwidth_curr[pid]) {
ebpf_socket_publish_apps_t *in = socket_bandwidth_curr[pid];
@@ -2367,25 +2039,6 @@ static void ebpf_update_socket_cgroup(int maps_per_core)
publish->call_close = in->call_close;
publish->call_tcp_v4_connection = in->call_tcp_v4_connection;
publish->call_tcp_v6_connection = in->call_tcp_v6_connection;
- } else {
- if (!bpf_map_lookup_elem(fd, &pid, eb)) {
- ebpf_socket_bandwidth_accumulator(eb, maps_per_core);
-
- memcpy(out, eb, sizeof(ebpf_bandwidth_t));
-
- publish->bytes_sent = out->bytes_sent;
- publish->bytes_received = out->bytes_received;
- publish->call_tcp_sent = out->call_tcp_sent;
- publish->call_tcp_received = out->call_tcp_received;
- publish->retransmit = out->retransmit;
- publish->call_udp_sent = out->call_udp_sent;
- publish->call_udp_received = out->call_udp_received;
- publish->call_close = out->close;
- publish->call_tcp_v4_connection = out->tcp_v4_connection;
- publish->call_tcp_v6_connection = out->tcp_v6_connection;
-
- memset(eb, 0, length);
- }
}
}
}
@@ -2406,18 +2059,18 @@ static void ebpf_socket_sum_cgroup_pids(ebpf_socket_publish_apps_t *socket, stru
memset(&accumulator, 0, sizeof(accumulator));
while (pids) {
- ebpf_bandwidth_t *w = &pids->socket;
-
- accumulator.bytes_received += w->bytes_received;
- accumulator.bytes_sent += w->bytes_sent;
- accumulator.call_tcp_received += w->call_tcp_received;
- accumulator.call_tcp_sent += w->call_tcp_sent;
- accumulator.retransmit += w->retransmit;
- accumulator.call_udp_received += w->call_udp_received;
- accumulator.call_udp_sent += w->call_udp_sent;
- accumulator.call_close += w->close;
- accumulator.call_tcp_v4_connection += w->tcp_v4_connection;
- accumulator.call_tcp_v6_connection += w->tcp_v6_connection;
+ netdata_socket_t *w = &pids->socket;
+
+ accumulator.bytes_received += w->tcp.tcp_bytes_received;
+ accumulator.bytes_sent += w->tcp.tcp_bytes_sent;
+ accumulator.call_tcp_received += w->tcp.call_tcp_received;
+ accumulator.call_tcp_sent += w->tcp.call_tcp_sent;
+ accumulator.retransmit += w->tcp.retransmit;
+ accumulator.call_close += w->tcp.close;
+ accumulator.call_tcp_v4_connection += w->tcp.ipv4_connect;
+ accumulator.call_tcp_v6_connection += w->tcp.ipv6_connect;
+ accumulator.call_udp_received += w->udp.call_udp_received;
+ accumulator.call_udp_sent += w->udp.call_udp_sent;
pids = pids->next;
}
@@ -2457,15 +2110,21 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every)
&socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4], 1,
update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
- ebpf_create_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6,
- "Calls to tcp_v6_connection",
- EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_CGROUP_NET_GROUP,
- NETDATA_CGROUP_TCP_V6_CONN_CONTEXT,
- NETDATA_EBPF_CHART_TYPE_LINE,
- NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++,
- ebpf_create_global_dimension,
- &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6], 1,
- update_every, NETDATA_EBPF_MODULE_NAME_SOCKET);
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_create_chart(type,
+ NETDATA_NET_APPS_CONNECTION_TCP_V6,
+ "Calls to tcp_v6_connection",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_CGROUP_NET_GROUP,
+ NETDATA_CGROUP_TCP_V6_CONN_CONTEXT,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++,
+ ebpf_create_global_dimension,
+ &socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6],
+ 1,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SOCKET);
+ }
ebpf_create_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV,
"Bytes received",
@@ -2549,47 +2208,55 @@ static void ebpf_create_specific_socket_charts(char *type, int update_every)
static void ebpf_obsolete_specific_socket_charts(char *type, int update_every)
{
int order_basis = 5300;
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "Calls to tcp_v4_connection",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "", "Calls to tcp_v4_connection",
EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_CONNECTION_TCP_V6,"Calls to tcp_v6_connection",
- EBPF_COMMON_DIMENSION_CONNECTIONS, NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT,
- NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_chart_obsolete(type,
+ NETDATA_NET_APPS_CONNECTION_TCP_V6,
+ "",
+ "Calls to tcp_v6_connection",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_LINE,
+ NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT,
+ NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++,
+ update_every);
+ }
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "Bytes received",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "", "Bytes received",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_RECV_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT,"Bytes sent",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_SENT, "","Bytes sent",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_BYTES_SEND_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "Calls to tcp_cleanup_rbuf.",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "", "Calls to tcp_cleanup_rbuf.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RECV_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "Calls to tcp_sendmsg.",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "", "Calls to tcp_sendmsg.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_SEND_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "Calls to tcp_retransmit.",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "", "Calls to tcp_retransmit.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_TCP_RETRANSMIT_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "Calls to udp_sendmsg",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "", "Calls to udp_sendmsg",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_SERVICES_SOCKET_UDP_SEND_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "Calls to udp_recvmsg",
+ ebpf_write_chart_obsolete(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "", "Calls to udp_recvmsg",
EBPF_COMMON_DIMENSION_CALL, NETDATA_APPS_NET_GROUP, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_SERVICES_SOCKET_UDP_RECV_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + order_basis++, update_every);
@@ -2605,50 +2272,52 @@ static void ebpf_obsolete_specific_socket_charts(char *type, int update_every)
*/
static void ebpf_send_specific_socket_data(char *type, ebpf_socket_publish_apps_t *values)
{
- write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V4, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V4].name,
(long long) values->call_tcp_v4_connection);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6);
- write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name,
- (long long) values->call_tcp_v6_connection);
- write_end_chart();
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_CONNECTION_TCP_V6, "");
+ write_chart_dimension(
+ socket_publish_aggregated[NETDATA_IDX_TCP_CONNECTION_V6].name, (long long)values->call_tcp_v6_connection);
+ ebpf_write_end_chart();
+ }
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_SENT, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name,
(long long) values->bytes_sent);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_RECV, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name,
(long long) values->bytes_received);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_SENDMSG].name,
(long long) values->call_tcp_sent);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_CLEANUP_RBUF].name,
(long long) values->call_tcp_received);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_TCP_RETRANSMIT].name,
(long long) values->retransmit);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_SENDMSG].name,
(long long) values->call_udp_sent);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS);
+ ebpf_write_begin_chart(type, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "");
write_chart_dimension(socket_publish_aggregated[NETDATA_IDX_UDP_RECVBUF].name,
(long long) values->call_udp_received);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -2670,14 +2339,18 @@ static void ebpf_create_systemd_socket_charts(int update_every)
NETDATA_SERVICES_SOCKET_TCP_V4_CONN_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET,
update_every);
- ebpf_create_charts_on_systemd(NETDATA_NET_APPS_CONNECTION_TCP_V6,
- "Calls to tcp_v6_connection", EBPF_COMMON_DIMENSION_CONNECTIONS,
- NETDATA_APPS_NET_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- order++,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT, NETDATA_EBPF_MODULE_NAME_SOCKET,
- update_every);
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_create_charts_on_systemd(NETDATA_NET_APPS_CONNECTION_TCP_V6,
+ "Calls to tcp_v6_connection",
+ EBPF_COMMON_DIMENSION_CONNECTIONS,
+ NETDATA_APPS_NET_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ order++,
+ ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
+ NETDATA_SERVICES_SOCKET_TCP_V6_CONN_CONTEXT,
+ NETDATA_EBPF_MODULE_NAME_SOCKET,
+ update_every);
+ }
ebpf_create_charts_on_systemd(NETDATA_NET_APPS_BANDWIDTH_RECV,
"Bytes received", EBPF_COMMON_DIMENSION_BITS,
@@ -2756,77 +2429,79 @@ static void ebpf_create_systemd_socket_charts(int update_every)
static void ebpf_send_systemd_socket_charts()
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V4, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v4_connection);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6);
- for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
- if (unlikely(ect->systemd) && unlikely(ect->updated)) {
- write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v6_connection);
+ if (tcp_v6_connect_address.type == 'T') {
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_CONNECTION_TCP_V6, "");
+ for (ect = ebpf_cgroup_pids; ect; ect = ect->next) {
+ if (unlikely(ect->systemd) && unlikely(ect->updated)) {
+ write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_v6_connection);
+ }
}
+ ebpf_write_end_chart();
}
- write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_SENT, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_sent);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_RECV, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.bytes_received);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_SEND_CALLS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_sent);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RECV_CALLS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.call_tcp_received);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_TCP_RETRANSMIT, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.retransmit);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_sent);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long)ect->publish_socket.call_udp_received);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -2902,15 +2577,6 @@ static void socket_collector(ebpf_module_t *em)
{
heartbeat_t hb;
heartbeat_init(&hb);
- uint32_t network_connection = network_viewer_opt.enabled;
-
- if (network_connection) {
- socket_threads.thread = mallocz(sizeof(netdata_thread_t));
- socket_threads.start_routine = ebpf_socket_read_hash;
-
- netdata_thread_create(socket_threads.thread, socket_threads.name,
- NETDATA_THREAD_OPTION_DEFAULT, ebpf_socket_read_hash, em);
- }
int cgroups = em->cgroup_charts;
if (cgroups)
@@ -2924,9 +2590,9 @@ static void socket_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -2937,14 +2603,8 @@ static void socket_collector(ebpf_module_t *em)
}
pthread_mutex_lock(&collect_data_mutex);
- if (socket_apps_enabled)
- ebpf_socket_update_apps_data(maps_per_core);
-
if (cgroups)
- ebpf_update_socket_cgroup(maps_per_core);
-
- if (network_connection)
- calculate_nv_plot();
+ ebpf_update_socket_cgroup();
pthread_mutex_lock(&lock);
if (socket_global_enabled)
@@ -2963,20 +2623,6 @@ static void socket_collector(ebpf_module_t *em)
fflush(stdout);
- if (network_connection) {
- // We are calling fflush many times, because when we have a lot of dimensions
- // we began to have not expected outputs and Netdata closed the plugin.
- pthread_mutex_lock(&nv_mutex);
- ebpf_socket_create_nv_charts(&inbound_vectors, update_every);
- fflush(stdout);
- ebpf_socket_send_nv_data(&inbound_vectors);
-
- ebpf_socket_create_nv_charts(&outbound_vectors, update_every);
- fflush(stdout);
- ebpf_socket_send_nv_data(&outbound_vectors);
- pthread_mutex_unlock(&nv_mutex);
-
- }
pthread_mutex_unlock(&lock);
pthread_mutex_unlock(&collect_data_mutex);
@@ -2998,42 +2644,26 @@ static void socket_collector(ebpf_module_t *em)
*****************************************************************/
/**
- * Allocate vectors used with this thread.
+ * Initialize vectors used with this thread.
+ *
* We are not testing the return, because callocz does this and shutdown the software
* case it was not possible to allocate.
- *
- * @param apps is apps enabled?
*/
-static void ebpf_socket_allocate_global_vectors(int apps)
+static void ebpf_socket_initialize_global_vectors()
{
memset(socket_aggregated_data, 0 ,NETDATA_MAX_SOCKET_VECTOR * sizeof(netdata_syscall_stat_t));
memset(socket_publish_aggregated, 0 ,NETDATA_MAX_SOCKET_VECTOR * sizeof(netdata_publish_syscall_t));
socket_hash_values = callocz(ebpf_nprocs, sizeof(netdata_idx_t));
- if (apps) {
- ebpf_socket_aral_init();
- socket_bandwidth_curr = callocz((size_t)pid_max, sizeof(ebpf_socket_publish_apps_t *));
- bandwidth_vector = callocz((size_t)ebpf_nprocs, sizeof(ebpf_bandwidth_t));
- }
+ ebpf_socket_aral_init();
+ socket_bandwidth_curr = callocz((size_t)pid_max, sizeof(ebpf_socket_publish_apps_t *));
+
+ aral_socket_table = ebpf_allocate_pid_aral(NETDATA_EBPF_SOCKET_ARAL_TABLE_NAME,
+ sizeof(netdata_socket_plus_t));
socket_values = callocz((size_t)ebpf_nprocs, sizeof(netdata_socket_t));
- if (network_viewer_opt.enabled) {
- inbound_vectors.plot = callocz(network_viewer_opt.max_dim, sizeof(netdata_socket_plot_t));
- outbound_vectors.plot = callocz(network_viewer_opt.max_dim, sizeof(netdata_socket_plot_t));
- }
-}
-/**
- * Initialize Inbound and Outbound
- *
- * Initialize the common outbound and inbound sockets.
- */
-static void initialize_inbound_outbound()
-{
- inbound_vectors.last = network_viewer_opt.max_dim - 1;
- outbound_vectors.last = inbound_vectors.last;
- fill_last_nv_dimension(&inbound_vectors.plot[inbound_vectors.last], 0);
- fill_last_nv_dimension(&outbound_vectors.plot[outbound_vectors.last], 1);
+ ebpf_load_addresses(&tcp_v6_connect_address, -1);
}
/*****************************************************************
@@ -3042,793 +2672,6 @@ static void initialize_inbound_outbound()
*
*****************************************************************/
-/**
- * Fill Port list
- *
- * @param out a pointer to the link list.
- * @param in the structure that will be linked.
- */
-static inline void fill_port_list(ebpf_network_viewer_port_list_t **out, ebpf_network_viewer_port_list_t *in)
-{
- if (likely(*out)) {
- ebpf_network_viewer_port_list_t *move = *out, *store = *out;
- uint16_t first = ntohs(in->first);
- uint16_t last = ntohs(in->last);
- while (move) {
- uint16_t cmp_first = ntohs(move->first);
- uint16_t cmp_last = ntohs(move->last);
- if (cmp_first <= first && first <= cmp_last &&
- cmp_first <= last && last <= cmp_last ) {
- netdata_log_info("The range/value (%u, %u) is inside the range/value (%u, %u) already inserted, it will be ignored.",
- first, last, cmp_first, cmp_last);
- freez(in->value);
- freez(in);
- return;
- } else if (first <= cmp_first && cmp_first <= last &&
- first <= cmp_last && cmp_last <= last) {
- netdata_log_info("The range (%u, %u) is bigger than previous range (%u, %u) already inserted, the previous will be ignored.",
- first, last, cmp_first, cmp_last);
- freez(move->value);
- move->value = in->value;
- move->first = in->first;
- move->last = in->last;
- freez(in);
- return;
- }
-
- store = move;
- move = move->next;
- }
-
- store->next = in;
- } else {
- *out = in;
- }
-
-#ifdef NETDATA_INTERNAL_CHECKS
- netdata_log_info("Adding values %s( %u, %u) to %s port list used on network viewer",
- in->value, ntohs(in->first), ntohs(in->last),
- (*out == network_viewer_opt.included_port)?"included":"excluded");
-#endif
-}
-
-/**
- * Parse Service List
- *
- * @param out a pointer to store the link list
- * @param service the service used to create the structure that will be linked.
- */
-static void parse_service_list(void **out, char *service)
-{
- ebpf_network_viewer_port_list_t **list = (ebpf_network_viewer_port_list_t **)out;
- struct servent *serv = getservbyname((const char *)service, "tcp");
- if (!serv)
- serv = getservbyname((const char *)service, "udp");
-
- if (!serv) {
- netdata_log_info("Cannot resolv the service '%s' with protocols TCP and UDP, it will be ignored", service);
- return;
- }
-
- ebpf_network_viewer_port_list_t *w = callocz(1, sizeof(ebpf_network_viewer_port_list_t));
- w->value = strdupz(service);
- w->hash = simple_hash(service);
-
- w->first = w->last = (uint16_t)serv->s_port;
-
- fill_port_list(list, w);
-}
-
-/**
- * Netmask
- *
- * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h)
- *
- * @param prefix create the netmask based in the CIDR value.
- *
- * @return
- */
-static inline in_addr_t netmask(int prefix) {
-
- if (prefix == 0)
- return (~((in_addr_t) - 1));
- else
- return (in_addr_t)(~((1 << (32 - prefix)) - 1));
-
-}
-
-/**
- * Broadcast
- *
- * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h)
- *
- * @param addr is the ip address
- * @param prefix is the CIDR value.
- *
- * @return It returns the last address of the range
- */
-static inline in_addr_t broadcast(in_addr_t addr, int prefix)
-{
- return (addr | ~netmask(prefix));
-}
-
-/**
- * Network
- *
- * Copied from iprange (https://github.com/firehol/iprange/blob/master/iprange.h)
- *
- * @param addr is the ip address
- * @param prefix is the CIDR value.
- *
- * @return It returns the first address of the range.
- */
-static inline in_addr_t ipv4_network(in_addr_t addr, int prefix)
-{
- return (addr & netmask(prefix));
-}
-
-/**
- * IP to network long
- *
- * @param dst the vector to store the result
- * @param ip the source ip given by our users.
- * @param domain the ip domain (IPV4 or IPV6)
- * @param source the original string
- *
- * @return it returns 0 on success and -1 otherwise.
- */
-static inline int ip2nl(uint8_t *dst, char *ip, int domain, char *source)
-{
- if (inet_pton(domain, ip, dst) <= 0) {
- netdata_log_error("The address specified (%s) is invalid ", source);
- return -1;
- }
-
- return 0;
-}
-
-/**
- * Get IPV6 Last Address
- *
- * @param out the address to store the last address.
- * @param in the address used to do the math.
- * @param prefix number of bits used to calculate the address
- */
-static void get_ipv6_last_addr(union netdata_ip_t *out, union netdata_ip_t *in, uint64_t prefix)
-{
- uint64_t mask,tmp;
- uint64_t ret[2];
- memcpy(ret, in->addr32, sizeof(union netdata_ip_t));
-
- if (prefix == 128) {
- memcpy(out->addr32, in->addr32, sizeof(union netdata_ip_t));
- return;
- } else if (!prefix) {
- ret[0] = ret[1] = 0xFFFFFFFFFFFFFFFF;
- memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
- return;
- } else if (prefix <= 64) {
- ret[1] = 0xFFFFFFFFFFFFFFFFULL;
-
- tmp = be64toh(ret[0]);
- if (prefix > 0) {
- mask = 0xFFFFFFFFFFFFFFFFULL << (64 - prefix);
- tmp |= ~mask;
- }
- ret[0] = htobe64(tmp);
- } else {
- mask = 0xFFFFFFFFFFFFFFFFULL << (128 - prefix);
- tmp = be64toh(ret[1]);
- tmp |= ~mask;
- ret[1] = htobe64(tmp);
- }
-
- memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
-}
-
-/**
- * Calculate ipv6 first address
- *
- * @param out the address to store the first address.
- * @param in the address used to do the math.
- * @param prefix number of bits used to calculate the address
- */
-static void get_ipv6_first_addr(union netdata_ip_t *out, union netdata_ip_t *in, uint64_t prefix)
-{
- uint64_t mask,tmp;
- uint64_t ret[2];
-
- memcpy(ret, in->addr32, sizeof(union netdata_ip_t));
-
- if (prefix == 128) {
- memcpy(out->addr32, in->addr32, sizeof(union netdata_ip_t));
- return;
- } else if (!prefix) {
- ret[0] = ret[1] = 0;
- memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
- return;
- } else if (prefix <= 64) {
- ret[1] = 0ULL;
-
- tmp = be64toh(ret[0]);
- if (prefix > 0) {
- mask = 0xFFFFFFFFFFFFFFFFULL << (64 - prefix);
- tmp &= mask;
- }
- ret[0] = htobe64(tmp);
- } else {
- mask = 0xFFFFFFFFFFFFFFFFULL << (128 - prefix);
- tmp = be64toh(ret[1]);
- tmp &= mask;
- ret[1] = htobe64(tmp);
- }
-
- memcpy(out->addr32, ret, sizeof(union netdata_ip_t));
-}
-
-/**
- * Is ip inside the range
- *
- * Check if the ip is inside a IP range
- *
- * @param rfirst the first ip address of the range
- * @param rlast the last ip address of the range
- * @param cmpfirst the first ip to compare
- * @param cmplast the last ip to compare
- * @param family the IP family
- *
- * @return It returns 1 if the IP is inside the range and 0 otherwise
- */
-static int ebpf_is_ip_inside_range(union netdata_ip_t *rfirst, union netdata_ip_t *rlast,
- union netdata_ip_t *cmpfirst, union netdata_ip_t *cmplast, int family)
-{
- if (family == AF_INET) {
- if ((rfirst->addr32[0] <= cmpfirst->addr32[0]) && (rlast->addr32[0] >= cmplast->addr32[0]))
- return 1;
- } else {
- if (memcmp(rfirst->addr8, cmpfirst->addr8, sizeof(union netdata_ip_t)) <= 0 &&
- memcmp(rlast->addr8, cmplast->addr8, sizeof(union netdata_ip_t)) >= 0) {
- return 1;
- }
-
- }
- return 0;
-}
-
-/**
- * Fill IP list
- *
- * @param out a pointer to the link list.
- * @param in the structure that will be linked.
- * @param table the modified table.
- */
-void ebpf_fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table)
-{
-#ifndef NETDATA_INTERNAL_CHECKS
- UNUSED(table);
-#endif
- if (in->ver == AF_INET) { // It is simpler to compare using host order
- in->first.addr32[0] = ntohl(in->first.addr32[0]);
- in->last.addr32[0] = ntohl(in->last.addr32[0]);
- }
- if (likely(*out)) {
- ebpf_network_viewer_ip_list_t *move = *out, *store = *out;
- while (move) {
- if (in->ver == move->ver &&
- ebpf_is_ip_inside_range(&move->first, &move->last, &in->first, &in->last, in->ver)) {
- netdata_log_info("The range/value (%s) is inside the range/value (%s) already inserted, it will be ignored.",
- in->value, move->value);
- freez(in->value);
- freez(in);
- return;
- }
- store = move;
- move = move->next;
- }
-
- store->next = in;
- } else {
- *out = in;
- }
-
-#ifdef NETDATA_INTERNAL_CHECKS
- char first[256], last[512];
- if (in->ver == AF_INET) {
- netdata_log_info("Adding values %s: (%u - %u) to %s IP list \"%s\" used on network viewer",
- in->value, in->first.addr32[0], in->last.addr32[0],
- (*out == network_viewer_opt.included_ips)?"included":"excluded",
- table);
- } else {
- if (inet_ntop(AF_INET6, in->first.addr8, first, INET6_ADDRSTRLEN) &&
- inet_ntop(AF_INET6, in->last.addr8, last, INET6_ADDRSTRLEN))
- netdata_log_info("Adding values %s - %s to %s IP list \"%s\" used on network viewer",
- first, last,
- (*out == network_viewer_opt.included_ips)?"included":"excluded",
- table);
- }
-#endif
-}
-
-/**
- * Parse IP List
- *
- * Parse IP list and link it.
- *
- * @param out a pointer to store the link list
- * @param ip the value given as parameter
- */
-static void ebpf_parse_ip_list(void **out, char *ip)
-{
- ebpf_network_viewer_ip_list_t **list = (ebpf_network_viewer_ip_list_t **)out;
-
- char *ipdup = strdupz(ip);
- union netdata_ip_t first = { };
- union netdata_ip_t last = { };
- char *is_ipv6;
- if (*ip == '*' && *(ip+1) == '\0') {
- memset(first.addr8, 0, sizeof(first.addr8));
- memset(last.addr8, 0xFF, sizeof(last.addr8));
-
- is_ipv6 = ip;
-
- clean_ip_structure(list);
- goto storethisip;
- }
-
- char *end = ip;
- // Move while I cannot find a separator
- while (*end && *end != '/' && *end != '-') end++;
-
- // We will use only the classic IPV6 for while, but we could consider the base 85 in a near future
- // https://tools.ietf.org/html/rfc1924
- is_ipv6 = strchr(ip, ':');
-
- int select;
- if (*end && !is_ipv6) { // IPV4 range
- select = (*end == '/') ? 0 : 1;
- *end++ = '\0';
- if (*end == '!') {
- netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup);
- goto cleanipdup;
- }
-
- if (!select) { // CIDR
- select = ip2nl(first.addr8, ip, AF_INET, ipdup);
- if (select)
- goto cleanipdup;
-
- select = (int) str2i(end);
- if (select < NETDATA_MINIMUM_IPV4_CIDR || select > NETDATA_MAXIMUM_IPV4_CIDR) {
- netdata_log_info("The specified CIDR %s is not valid, the IP %s will be ignored.", end, ip);
- goto cleanipdup;
- }
-
- last.addr32[0] = htonl(broadcast(ntohl(first.addr32[0]), select));
- // This was added to remove
- // https://app.codacy.com/manual/netdata/netdata/pullRequest?prid=5810941&bid=19021977
- UNUSED(last.addr32[0]);
-
- uint32_t ipv4_test = htonl(ipv4_network(ntohl(first.addr32[0]), select));
- if (first.addr32[0] != ipv4_test) {
- first.addr32[0] = ipv4_test;
- struct in_addr ipv4_convert;
- ipv4_convert.s_addr = ipv4_test;
- char ipv4_msg[INET_ADDRSTRLEN];
- if(inet_ntop(AF_INET, &ipv4_convert, ipv4_msg, INET_ADDRSTRLEN))
- netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv4_msg);
- }
- } else { // Range
- select = ip2nl(first.addr8, ip, AF_INET, ipdup);
- if (select)
- goto cleanipdup;
-
- select = ip2nl(last.addr8, end, AF_INET, ipdup);
- if (select)
- goto cleanipdup;
- }
-
- if (htonl(first.addr32[0]) > htonl(last.addr32[0])) {
- netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.",
- ipdup);
- goto cleanipdup;
- }
- } else if (is_ipv6) { // IPV6
- if (!*end) { // Unique
- select = ip2nl(first.addr8, ip, AF_INET6, ipdup);
- if (select)
- goto cleanipdup;
-
- memcpy(last.addr8, first.addr8, sizeof(first.addr8));
- } else if (*end == '-') {
- *end++ = 0x00;
- if (*end == '!') {
- netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup);
- goto cleanipdup;
- }
-
- select = ip2nl(first.addr8, ip, AF_INET6, ipdup);
- if (select)
- goto cleanipdup;
-
- select = ip2nl(last.addr8, end, AF_INET6, ipdup);
- if (select)
- goto cleanipdup;
- } else { // CIDR
- *end++ = 0x00;
- if (*end == '!') {
- netdata_log_info("The exclusion cannot be in the second part of the range %s, it will be ignored.", ipdup);
- goto cleanipdup;
- }
-
- select = str2i(end);
- if (select < 0 || select > 128) {
- netdata_log_info("The CIDR %s is not valid, the address %s will be ignored.", end, ip);
- goto cleanipdup;
- }
-
- uint64_t prefix = (uint64_t)select;
- select = ip2nl(first.addr8, ip, AF_INET6, ipdup);
- if (select)
- goto cleanipdup;
-
- get_ipv6_last_addr(&last, &first, prefix);
-
- union netdata_ip_t ipv6_test;
- get_ipv6_first_addr(&ipv6_test, &first, prefix);
-
- if (memcmp(first.addr8, ipv6_test.addr8, sizeof(union netdata_ip_t)) != 0) {
- memcpy(first.addr8, ipv6_test.addr8, sizeof(union netdata_ip_t));
-
- struct in6_addr ipv6_convert;
- memcpy(ipv6_convert.s6_addr, ipv6_test.addr8, sizeof(union netdata_ip_t));
-
- char ipv6_msg[INET6_ADDRSTRLEN];
- if(inet_ntop(AF_INET6, &ipv6_convert, ipv6_msg, INET6_ADDRSTRLEN))
- netdata_log_info("The network value of CIDR %s was updated for %s .", ipdup, ipv6_msg);
- }
- }
-
- if ((be64toh(*(uint64_t *)&first.addr32[2]) > be64toh(*(uint64_t *)&last.addr32[2]) &&
- !memcmp(first.addr32, last.addr32, 2*sizeof(uint32_t))) ||
- (be64toh(*(uint64_t *)&first.addr32) > be64toh(*(uint64_t *)&last.addr32)) ) {
- netdata_log_info("The specified range %s is invalid, the second address is smallest than the first, it will be ignored.",
- ipdup);
- goto cleanipdup;
- }
- } else { // Unique ip
- select = ip2nl(first.addr8, ip, AF_INET, ipdup);
- if (select)
- goto cleanipdup;
-
- memcpy(last.addr8, first.addr8, sizeof(first.addr8));
- }
-
- ebpf_network_viewer_ip_list_t *store;
-
-storethisip:
- store = callocz(1, sizeof(ebpf_network_viewer_ip_list_t));
- store->value = ipdup;
- store->hash = simple_hash(ipdup);
- store->ver = (uint8_t)(!is_ipv6)?AF_INET:AF_INET6;
- memcpy(store->first.addr8, first.addr8, sizeof(first.addr8));
- memcpy(store->last.addr8, last.addr8, sizeof(last.addr8));
-
- ebpf_fill_ip_list(list, store, "socket");
- return;
-
-cleanipdup:
- freez(ipdup);
-}
-
-/**
- * Parse IP Range
- *
- * Parse the IP ranges given and create Network Viewer IP Structure
- *
- * @param ptr is a pointer with the text to parse.
- */
-static void ebpf_parse_ips(char *ptr)
-{
- // No value
- if (unlikely(!ptr))
- return;
-
- while (likely(ptr)) {
- // Move forward until next valid character
- while (isspace(*ptr)) ptr++;
-
- // No valid value found
- if (unlikely(!*ptr))
- return;
-
- // Find space that ends the list
- char *end = strchr(ptr, ' ');
- if (end) {
- *end++ = '\0';
- }
-
- int neg = 0;
- if (*ptr == '!') {
- neg++;
- ptr++;
- }
-
- if (isascii(*ptr)) { // Parse port
- ebpf_parse_ip_list((!neg)?(void **)&network_viewer_opt.included_ips:
- (void **)&network_viewer_opt.excluded_ips,
- ptr);
- }
-
- ptr = end;
- }
-}
-
-
-
-/**
- * Parse port list
- *
- * Parse an allocated port list with the range given
- *
- * @param out a pointer to store the link list
- * @param range the informed range for the user.
- */
-static void parse_port_list(void **out, char *range)
-{
- int first, last;
- ebpf_network_viewer_port_list_t **list = (ebpf_network_viewer_port_list_t **)out;
-
- char *copied = strdupz(range);
- if (*range == '*' && *(range+1) == '\0') {
- first = 1;
- last = 65535;
-
- clean_port_structure(list);
- goto fillenvpl;
- }
-
- char *end = range;
- //Move while I cannot find a separator
- while (*end && *end != ':' && *end != '-') end++;
-
- //It has a range
- if (likely(*end)) {
- *end++ = '\0';
- if (*end == '!') {
- netdata_log_info("The exclusion cannot be in the second part of the range, the range %s will be ignored.", copied);
- freez(copied);
- return;
- }
- last = str2i((const char *)end);
- } else {
- last = 0;
- }
-
- first = str2i((const char *)range);
- if (first < NETDATA_MINIMUM_PORT_VALUE || first > NETDATA_MAXIMUM_PORT_VALUE) {
- netdata_log_info("The first port %d of the range \"%s\" is invalid and it will be ignored!", first, copied);
- freez(copied);
- return;
- }
-
- if (!last)
- last = first;
-
- if (last < NETDATA_MINIMUM_PORT_VALUE || last > NETDATA_MAXIMUM_PORT_VALUE) {
- netdata_log_info("The second port %d of the range \"%s\" is invalid and the whole range will be ignored!", last, copied);
- freez(copied);
- return;
- }
-
- if (first > last) {
- netdata_log_info("The specified order %s is wrong, the smallest value is always the first, it will be ignored!", copied);
- freez(copied);
- return;
- }
-
- ebpf_network_viewer_port_list_t *w;
-fillenvpl:
- w = callocz(1, sizeof(ebpf_network_viewer_port_list_t));
- w->value = copied;
- w->hash = simple_hash(copied);
- w->first = (uint16_t)htons((uint16_t)first);
- w->last = (uint16_t)htons((uint16_t)last);
- w->cmp_first = (uint16_t)first;
- w->cmp_last = (uint16_t)last;
-
- fill_port_list(list, w);
-}
-
-/**
- * Read max dimension.
- *
- * Netdata plot two dimensions per connection, so it is necessary to adjust the values.
- *
- * @param cfg the configuration structure
- */
-static void read_max_dimension(struct config *cfg)
-{
- int maxdim ;
- maxdim = (int) appconfig_get_number(cfg,
- EBPF_NETWORK_VIEWER_SECTION,
- EBPF_MAXIMUM_DIMENSIONS,
- NETDATA_NV_CAP_VALUE);
- if (maxdim < 0) {
- netdata_log_error("'maximum dimensions = %d' must be a positive number, Netdata will change for default value %ld.",
- maxdim, NETDATA_NV_CAP_VALUE);
- maxdim = NETDATA_NV_CAP_VALUE;
- }
-
- maxdim /= 2;
- if (!maxdim) {
- netdata_log_info("The number of dimensions is too small (%u), we are setting it to minimum 2", network_viewer_opt.max_dim);
- network_viewer_opt.max_dim = 1;
- return;
- }
-
- network_viewer_opt.max_dim = (uint32_t)maxdim;
-}
-
-/**
- * Parse Port Range
- *
- * Parse the port ranges given and create Network Viewer Port Structure
- *
- * @param ptr is a pointer with the text to parse.
- */
-static void parse_ports(char *ptr)
-{
- // No value
- if (unlikely(!ptr))
- return;
-
- while (likely(ptr)) {
- // Move forward until next valid character
- while (isspace(*ptr)) ptr++;
-
- // No valid value found
- if (unlikely(!*ptr))
- return;
-
- // Find space that ends the list
- char *end = strchr(ptr, ' ');
- if (end) {
- *end++ = '\0';
- }
-
- int neg = 0;
- if (*ptr == '!') {
- neg++;
- ptr++;
- }
-
- if (isdigit(*ptr)) { // Parse port
- parse_port_list((!neg)?(void **)&network_viewer_opt.included_port:(void **)&network_viewer_opt.excluded_port,
- ptr);
- } else if (isalpha(*ptr)) { // Parse service
- parse_service_list((!neg)?(void **)&network_viewer_opt.included_port:(void **)&network_viewer_opt.excluded_port,
- ptr);
- } else if (*ptr == '*') { // All
- parse_port_list((!neg)?(void **)&network_viewer_opt.included_port:(void **)&network_viewer_opt.excluded_port,
- ptr);
- }
-
- ptr = end;
- }
-}
-
-/**
- * Link hostname
- *
- * @param out is the output link list
- * @param in the hostname to add to list.
- */
-static void link_hostname(ebpf_network_viewer_hostname_list_t **out, ebpf_network_viewer_hostname_list_t *in)
-{
- if (likely(*out)) {
- ebpf_network_viewer_hostname_list_t *move = *out;
- for (; move->next ; move = move->next ) {
- if (move->hash == in->hash && !strcmp(move->value, in->value)) {
- netdata_log_info("The hostname %s was already inserted, it will be ignored.", in->value);
- freez(in->value);
- simple_pattern_free(in->value_pattern);
- freez(in);
- return;
- }
- }
-
- move->next = in;
- } else {
- *out = in;
- }
-#ifdef NETDATA_INTERNAL_CHECKS
- netdata_log_info("Adding value %s to %s hostname list used on network viewer",
- in->value,
- (*out == network_viewer_opt.included_hostnames)?"included":"excluded");
-#endif
-}
-
-/**
- * Link Hostnames
- *
- * Parse the list of hostnames to create the link list.
- * This is not associated with the IP, because simple patterns like *example* cannot be resolved to IP.
- *
- * @param out is the output link list
- * @param parse is a pointer with the text to parser.
- */
-static void link_hostnames(char *parse)
-{
- // No value
- if (unlikely(!parse))
- return;
-
- while (likely(parse)) {
- // Find the first valid value
- while (isspace(*parse)) parse++;
-
- // No valid value found
- if (unlikely(!*parse))
- return;
-
- // Find space that ends the list
- char *end = strchr(parse, ' ');
- if (end) {
- *end++ = '\0';
- }
-
- int neg = 0;
- if (*parse == '!') {
- neg++;
- parse++;
- }
-
- ebpf_network_viewer_hostname_list_t *hostname = callocz(1 , sizeof(ebpf_network_viewer_hostname_list_t));
- hostname->value = strdupz(parse);
- hostname->hash = simple_hash(parse);
- hostname->value_pattern = simple_pattern_create(parse, NULL, SIMPLE_PATTERN_EXACT, true);
-
- link_hostname((!neg)?&network_viewer_opt.included_hostnames:&network_viewer_opt.excluded_hostnames,
- hostname);
-
- parse = end;
- }
-}
-
-/**
- * Parse network viewer section
- *
- * @param cfg the configuration structure
- */
-void parse_network_viewer_section(struct config *cfg)
-{
- read_max_dimension(cfg);
-
- network_viewer_opt.hostname_resolution_enabled = appconfig_get_boolean(cfg,
- EBPF_NETWORK_VIEWER_SECTION,
- EBPF_CONFIG_RESOLVE_HOSTNAME,
- CONFIG_BOOLEAN_NO);
-
- network_viewer_opt.service_resolution_enabled = appconfig_get_boolean(cfg,
- EBPF_NETWORK_VIEWER_SECTION,
- EBPF_CONFIG_RESOLVE_SERVICE,
- CONFIG_BOOLEAN_NO);
-
- char *value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_PORTS, NULL);
- parse_ports(value);
-
- if (network_viewer_opt.hostname_resolution_enabled) {
- value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION, EBPF_CONFIG_HOSTNAMES, NULL);
- link_hostnames(value);
- } else {
- netdata_log_info("Name resolution is disabled, collector will not parser \"hostnames\" list.");
- }
-
- value = appconfig_get(cfg, EBPF_NETWORK_VIEWER_SECTION,
- "ips", "!127.0.0.1/8 10.0.0.0/8 172.16.0.0/12 192.168.0.0/16 fc00::/7 !::1/128");
- ebpf_parse_ips(value);
-}
-
/**
* Link dimension name
*
@@ -3838,7 +2681,7 @@ void parse_network_viewer_section(struct config *cfg)
* @param hash the calculated hash for the dimension name.
* @param name the dimension name.
*/
-static void link_dimension_name(char *port, uint32_t hash, char *value)
+static void ebpf_link_dimension_name(char *port, uint32_t hash, char *value)
{
int test = str2i(port);
if (test < NETDATA_MINIMUM_PORT_VALUE || test > NETDATA_MAXIMUM_PORT_VALUE){
@@ -3883,13 +2726,13 @@ static void link_dimension_name(char *port, uint32_t hash, char *value)
*
* @param cfg the configuration structure
*/
-void parse_service_name_section(struct config *cfg)
+void ebpf_parse_service_name_section(struct config *cfg)
{
struct section *co = appconfig_get_section(cfg, EBPF_SERVICE_NAME_SECTION);
if (co) {
struct config_option *cv;
for (cv = co->values; cv ; cv = cv->next) {
- link_dimension_name(cv->name, cv->hash, cv->value);
+ ebpf_link_dimension_name(cv->name, cv->hash, cv->value);
}
}
@@ -3910,23 +2753,21 @@ void parse_service_name_section(struct config *cfg)
// if variable has an invalid value, we assume netdata is using 19999
int default_port = str2i(port_string);
if (default_port > 0 && default_port < 65536)
- link_dimension_name(port_string, simple_hash(port_string), "Netdata");
+ ebpf_link_dimension_name(port_string, simple_hash(port_string), "Netdata");
}
}
+/**
+ * Parse table size options
+ *
+ * @param cfg configuration options read from user file.
+ */
void parse_table_size_options(struct config *cfg)
{
- socket_maps[NETDATA_SOCKET_TABLE_BANDWIDTH].user_input = (uint32_t) appconfig_get_number(cfg,
- EBPF_GLOBAL_SECTION,
- EBPF_CONFIG_BANDWIDTH_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
-
- socket_maps[NETDATA_SOCKET_TABLE_IPV4].user_input = (uint32_t) appconfig_get_number(cfg,
- EBPF_GLOBAL_SECTION,
- EBPF_CONFIG_IPV4_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
-
- socket_maps[NETDATA_SOCKET_TABLE_IPV6].user_input = (uint32_t) appconfig_get_number(cfg,
- EBPF_GLOBAL_SECTION,
- EBPF_CONFIG_IPV6_SIZE, NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
+ socket_maps[NETDATA_SOCKET_OPEN_SOCKET].user_input = (uint32_t) appconfig_get_number(cfg,
+ EBPF_GLOBAL_SECTION,
+ EBPF_CONFIG_SOCKET_MONITORING_SIZE,
+ NETDATA_MAXIMUM_CONNECTIONS_ALLOWED);
socket_maps[NETDATA_SOCKET_TABLE_UDP].user_input = (uint32_t) appconfig_get_number(cfg,
EBPF_GLOBAL_SECTION,
@@ -3965,7 +2806,7 @@ static int ebpf_socket_load_bpf(ebpf_module_t *em)
#endif
if (ret) {
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
}
return ret;
@@ -3985,25 +2826,23 @@ void *ebpf_socket_thread(void *ptr)
netdata_thread_cleanup_push(ebpf_socket_exit, ptr);
ebpf_module_t *em = (ebpf_module_t *)ptr;
- em->maps = socket_maps;
-
- parse_table_size_options(&socket_config);
-
- if (pthread_mutex_init(&nv_mutex, NULL)) {
- netdata_log_error("Cannot initialize local mutex");
- goto endsocket;
+ if (em->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING) {
+ collector_error("There is already a thread %s running", em->info.thread_name);
+ return NULL;
}
- ebpf_socket_allocate_global_vectors(em->apps_charts);
+ em->maps = socket_maps;
- if (network_viewer_opt.enabled) {
- memset(&inbound_vectors.tree, 0, sizeof(avl_tree_lock));
- memset(&outbound_vectors.tree, 0, sizeof(avl_tree_lock));
- avl_init_lock(&inbound_vectors.tree, ebpf_compare_sockets);
- avl_init_lock(&outbound_vectors.tree, ebpf_compare_sockets);
+ rw_spinlock_write_lock(&network_viewer_opt.rw_spinlock);
+ // It was not enabled from main config file (ebpf.d.conf)
+ if (!network_viewer_opt.enabled)
+ network_viewer_opt.enabled = appconfig_get_boolean(&socket_config, EBPF_NETWORK_VIEWER_SECTION, "enabled",
+ CONFIG_BOOLEAN_YES);
+ rw_spinlock_write_unlock(&network_viewer_opt.rw_spinlock);
- initialize_inbound_outbound();
- }
+ parse_table_size_options(&socket_config);
+
+ ebpf_socket_initialize_global_vectors();
if (running_on_kernel < NETDATA_EBPF_KERNEL_5_0)
em->mode = MODE_ENTRY;
@@ -4026,8 +2865,15 @@ void *ebpf_socket_thread(void *ptr)
socket_aggregated_data, socket_publish_aggregated, socket_dimension_names, socket_id_names,
algorithms, NETDATA_MAX_SOCKET_VECTOR);
+ ebpf_read_socket.thread = mallocz(sizeof(netdata_thread_t));
+ netdata_thread_create(ebpf_read_socket.thread,
+ ebpf_read_socket.name,
+ NETDATA_THREAD_OPTION_DEFAULT,
+ ebpf_read_socket_thread,
+ em);
+
pthread_mutex_lock(&lock);
- ebpf_create_global_charts(em);
+ ebpf_socket_create_global_charts(em);
ebpf_update_stats(&plugin_statistics, em);
ebpf_update_kernel_memory_with_vector(&plugin_statistics, em->maps, EBPF_ACTION_STAT_ADD);
diff --git a/collectors/ebpf.plugin/ebpf_socket.h b/collectors/ebpf.plugin/ebpf_socket.h
index ae2ee28abcd68f..a6d3e03b6dabeb 100644
--- a/collectors/ebpf.plugin/ebpf_socket.h
+++ b/collectors/ebpf.plugin/ebpf_socket.h
@@ -4,6 +4,11 @@
#include
#include "libnetdata/avl/avl.h"
+#include
+#ifdef HAVE_NETDB_H
+#include
+#endif
+
// Module name & description
#define NETDATA_EBPF_MODULE_NAME_SOCKET "socket"
#define NETDATA_EBPF_SOCKET_MODULE_DESC "Monitors TCP and UDP bandwidth. This thread is integrated with apps and cgroup."
@@ -11,8 +16,6 @@
// Vector indexes
#define NETDATA_UDP_START 3
-#define NETDATA_SOCKET_READ_SLEEP_MS 800000ULL
-
// config file
#define NETDATA_NETWORK_CONFIG_FILE "network.conf"
#define EBPF_NETWORK_VIEWER_SECTION "network connections"
@@ -21,18 +24,13 @@
#define EBPF_CONFIG_RESOLVE_SERVICE "resolve service names"
#define EBPF_CONFIG_PORTS "ports"
#define EBPF_CONFIG_HOSTNAMES "hostnames"
-#define EBPF_CONFIG_BANDWIDTH_SIZE "bandwidth table size"
-#define EBPF_CONFIG_IPV4_SIZE "ipv4 connection table size"
-#define EBPF_CONFIG_IPV6_SIZE "ipv6 connection table size"
+#define EBPF_CONFIG_SOCKET_MONITORING_SIZE "socket monitoring table size"
#define EBPF_CONFIG_UDP_SIZE "udp connection table size"
-#define EBPF_MAXIMUM_DIMENSIONS "maximum dimensions"
enum ebpf_socket_table_list {
- NETDATA_SOCKET_TABLE_BANDWIDTH,
NETDATA_SOCKET_GLOBAL,
NETDATA_SOCKET_LPORTS,
- NETDATA_SOCKET_TABLE_IPV4,
- NETDATA_SOCKET_TABLE_IPV6,
+ NETDATA_SOCKET_OPEN_SOCKET,
NETDATA_SOCKET_TABLE_UDP,
NETDATA_SOCKET_TABLE_CTRL
};
@@ -122,13 +120,6 @@ typedef enum ebpf_socket_idx {
#define NETDATA_NET_APPS_BANDWIDTH_UDP_SEND_CALLS "bandwidth_udp_send"
#define NETDATA_NET_APPS_BANDWIDTH_UDP_RECV_CALLS "bandwidth_udp_recv"
-// Network viewer charts
-#define NETDATA_NV_OUTBOUND_BYTES "outbound_bytes"
-#define NETDATA_NV_OUTBOUND_PACKETS "outbound_packets"
-#define NETDATA_NV_OUTBOUND_RETRANSMIT "outbound_retransmit"
-#define NETDATA_NV_INBOUND_BYTES "inbound_bytes"
-#define NETDATA_NV_INBOUND_PACKETS "inbound_packets"
-
// Port range
#define NETDATA_MINIMUM_PORT_VALUE 1
#define NETDATA_MAXIMUM_PORT_VALUE 65535
@@ -163,6 +154,8 @@ typedef enum ebpf_socket_idx {
// ARAL name
#define NETDATA_EBPF_SOCKET_ARAL_NAME "ebpf_socket"
+#define NETDATA_EBPF_PID_SOCKET_ARAL_TABLE_NAME "ebpf_pid_socket"
+#define NETDATA_EBPF_SOCKET_ARAL_TABLE_NAME "ebpf_socket_tbl"
typedef struct ebpf_socket_publish_apps {
// Data read
@@ -246,10 +239,11 @@ typedef struct ebpf_network_viewer_hostname_list {
struct ebpf_network_viewer_hostname_list *next;
} ebpf_network_viewer_hostname_list_t;
-#define NETDATA_NV_CAP_VALUE 50L
typedef struct ebpf_network_viewer_options {
+ RW_SPINLOCK rw_spinlock;
+
uint32_t enabled;
- uint32_t max_dim; // Store value read from 'maximum dimensions'
+ uint32_t family; // AF_INET, AF_INET6 or AF_UNSPEC (both)
uint32_t hostname_resolution_enabled;
uint32_t service_resolution_enabled;
@@ -275,98 +269,78 @@ extern ebpf_network_viewer_options_t network_viewer_opt;
* Structure to store socket information
*/
typedef struct netdata_socket {
- uint64_t recv_packets;
- uint64_t sent_packets;
- uint64_t recv_bytes;
- uint64_t sent_bytes;
- uint64_t first; // First timestamp
- uint64_t ct; // Current timestamp
- uint32_t retransmit; // It is never used with UDP
+ // Timestamp
+ uint64_t first_timestamp;
+ uint64_t current_timestamp;
+ // Socket additional info
uint16_t protocol;
- uint16_t reserved;
+ uint16_t family;
+ uint32_t external_origin;
+ struct {
+ uint32_t call_tcp_sent;
+ uint32_t call_tcp_received;
+ uint64_t tcp_bytes_sent;
+ uint64_t tcp_bytes_received;
+ uint32_t close; //It is never used with UDP
+ uint32_t retransmit; //It is never used with UDP
+ uint32_t ipv4_connect;
+ uint32_t ipv6_connect;
+ } tcp;
+
+ struct {
+ uint32_t call_udp_sent;
+ uint32_t call_udp_received;
+ uint64_t udp_bytes_sent;
+ uint64_t udp_bytes_received;
+ } udp;
} netdata_socket_t;
-typedef struct netdata_plot_values {
- // Values used in the previous iteration
- uint64_t recv_packets;
- uint64_t sent_packets;
- uint64_t recv_bytes;
- uint64_t sent_bytes;
- uint32_t retransmit;
+typedef enum netdata_socket_flags {
+ NETDATA_SOCKET_FLAGS_ALREADY_OPEN = (1<<0)
+} netdata_socket_flags_t;
+
+typedef enum netdata_socket_src_ip_origin {
+ NETDATA_EBPF_SRC_IP_ORIGIN_LOCAL,
+ NETDATA_EBPF_SRC_IP_ORIGIN_EXTERNAL
+} netdata_socket_src_ip_origin_t;
- uint64_t last_time;
+typedef struct netata_socket_plus {
+ netdata_socket_t data; // Data read from database
+ uint32_t pid;
+ time_t last_update;
+ netdata_socket_flags_t flags;
+
+ struct {
+ char src_ip[INET6_ADDRSTRLEN + 1];
+ // uint16_t src_port;
+ char dst_ip[INET6_ADDRSTRLEN+ 1];
+ char dst_port[NI_MAXSERV + 1];
+ } socket_string;
+} netdata_socket_plus_t;
- // Values used to plot
- uint64_t plot_recv_packets;
- uint64_t plot_sent_packets;
- uint64_t plot_recv_bytes;
- uint64_t plot_sent_bytes;
- uint16_t plot_retransmit;
-} netdata_plot_values_t;
+extern ARAL *aral_socket_table;
/**
* Index used together previous structure
*/
typedef struct netdata_socket_idx {
union netdata_ip_t saddr;
- uint16_t sport;
+ //uint16_t sport;
union netdata_ip_t daddr;
uint16_t dport;
+ uint32_t pid;
} netdata_socket_idx_t;
-// Next values were defined according getnameinfo(3)
-#define NETDATA_MAX_NETWORK_COMBINED_LENGTH 1018
-#define NETDATA_DOTS_PROTOCOL_COMBINED_LENGTH 5 // :TCP:
-#define NETDATA_DIM_LENGTH_WITHOUT_SERVICE_PROTOCOL 979
-
-#define NETDATA_INBOUND_DIRECTION (uint32_t)1
-#define NETDATA_OUTBOUND_DIRECTION (uint32_t)2
-/**
- * Allocate the maximum number of structures in the beginning, this can force the collector to use more memory
- * in the long term, on the other had it is faster.
- */
-typedef struct netdata_socket_plot {
- // Search
- avl_t avl;
- netdata_socket_idx_t index;
-
- // Current data
- netdata_socket_t sock;
-
- // Previous values and values used to write on chart.
- netdata_plot_values_t plot;
-
- int family; // AF_INET or AF_INET6
- char *resolved_name; // Resolve only in the first call
- unsigned char resolved;
-
- char *dimension_sent;
- char *dimension_recv;
- char *dimension_retransmit;
-
- uint32_t flags;
-} netdata_socket_plot_t;
-
-#define NETWORK_VIEWER_CHARTS_CREATED (uint32_t)1
-typedef struct netdata_vector_plot {
- netdata_socket_plot_t *plot; // Vector used to plot charts
-
- avl_tree_lock tree; // AVL tree to speed up search
- uint32_t last; // The 'other' dimension, the last chart accepted.
- uint32_t next; // The next position to store in the vector.
- uint32_t max_plot; // Max number of elements to plot.
- uint32_t last_plot; // Last element plot
-
- uint32_t flags; // Flags
-
-} netdata_vector_plot_t;
-
-void clean_port_structure(ebpf_network_viewer_port_list_t **clean);
+void ebpf_clean_port_structure(ebpf_network_viewer_port_list_t **clean);
extern ebpf_network_viewer_port_list_t *listen_ports;
void update_listen_table(uint16_t value, uint16_t proto, netdata_passive_connection_t *values);
-void parse_network_viewer_section(struct config *cfg);
-void ebpf_fill_ip_list(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table);
-void parse_service_name_section(struct config *cfg);
+void ebpf_fill_ip_list_unsafe(ebpf_network_viewer_ip_list_t **out, ebpf_network_viewer_ip_list_t *in, char *table);
+void ebpf_parse_service_name_section(struct config *cfg);
+void ebpf_parse_ips_unsafe(char *ptr);
+void ebpf_parse_ports(char *ptr);
+void ebpf_socket_read_open_connections(BUFFER *buf, struct ebpf_module *em);
+void ebpf_socket_fill_publish_apps(uint32_t current_pid, netdata_socket_t *ns);
+
extern struct config socket_config;
extern netdata_ebpf_targets_t socket_targets[];
diff --git a/collectors/ebpf.plugin/ebpf_softirq.c b/collectors/ebpf.plugin/ebpf_softirq.c
index 8d8930a109dae9..106ff4f291e1f3 100644
--- a/collectors/ebpf.plugin/ebpf_softirq.c
+++ b/collectors/ebpf.plugin/ebpf_softirq.c
@@ -71,6 +71,7 @@ static void ebpf_obsolete_softirq_global(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_EBPF_SYSTEM_GROUP,
"softirq_latency",
+ "",
"Software IRQ latency",
EBPF_COMMON_DIMENSION_MILLISECONDS,
"softirqs",
@@ -218,9 +219,9 @@ static void softirq_collector(ebpf_module_t *em)
//This will be cancelled by its parent
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -228,9 +229,9 @@ static void softirq_collector(ebpf_module_t *em)
pthread_mutex_lock(&lock);
// write dims now for all hitherto discovered IRQs.
- write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency");
+ ebpf_write_begin_chart(NETDATA_EBPF_SYSTEM_GROUP, "softirq_latency", "");
softirq_write_dims();
- write_end_chart();
+ ebpf_write_end_chart();
pthread_mutex_unlock(&lock);
diff --git a/collectors/ebpf.plugin/ebpf_swap.c b/collectors/ebpf.plugin/ebpf_swap.c
index 359fe230825ba5..fb007f928f3297 100644
--- a/collectors/ebpf.plugin/ebpf_swap.c
+++ b/collectors/ebpf.plugin/ebpf_swap.c
@@ -124,13 +124,6 @@ static int ebpf_swap_attach_kprobe(struct swap_bpf *obj)
if (ret)
return -1;
- obj->links.netdata_release_task_probe = bpf_program__attach_kprobe(obj->progs.netdata_release_task_probe,
- false,
- EBPF_COMMON_FNCT_CLEAN_UP);
- ret = libbpf_get_error(obj->links.netdata_swap_writepage_probe);
- if (ret)
- return -1;
-
return 0;
}
@@ -176,7 +169,6 @@ static void ebpf_swap_adjust_map(struct swap_bpf *obj, ebpf_module_t *em)
static void ebpf_swap_disable_release_task(struct swap_bpf *obj)
{
bpf_program__set_autoload(obj->progs.netdata_release_task_fentry, false);
- bpf_program__set_autoload(obj->progs.netdata_release_task_probe, false);
}
/**
@@ -242,7 +234,8 @@ static void ebpf_obsolete_swap_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_MEM_SWAP_READ_CHART,
- "Calls to function swap_readpage.",
+ "",
+ "Calls to function swap_readpage.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -252,7 +245,8 @@ static void ebpf_obsolete_swap_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_MEM_SWAP_WRITE_CHART,
- "Calls to function swap_writepage.",
+ "",
+ "Calls to function swap_writepage.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -292,25 +286,35 @@ static inline void ebpf_obsolete_swap_cgroup_charts(ebpf_module_t *em) {
*/
void ebpf_obsolete_swap_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_MEM_SWAP_READ_CHART,
- "Calls to function swap_readpage.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_SWAP_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20191,
- em->update_every);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<swap_writepage.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_SWAP_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20192,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_swap_readpage",
+ "Calls to function swap_readpage.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_EBPF_MEMORY_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_swap_readpage",
+ 20070,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_swap_writepage",
+ "Calls to function swap_writepage.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_EBPF_MEMORY_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_swap_writepage",
+ 20071,
+ update_every);
+ w->charts_created &= ~(1<next) {
- if (unlikely(w->exposed && w->processes)) {
- ebpf_swap_sum_pids(&w->swap, w->root_pid);
- }
- }
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, (long long) w->swap.read);
- }
- }
- write_end_chart();
+ ebpf_swap_sum_pids(&w->swap, w->root_pid);
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, (long long) w->swap.write);
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_swap_readpage");
+ write_chart_dimension("calls", (long long) w->swap.read);
+ ebpf_write_end_chart();
+
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_swap_writepage");
+ write_chart_dimension("calls", (long long) w->swap.write);
+ ebpf_write_end_chart();
}
- write_end_chart();
}
/**
@@ -632,21 +630,21 @@ static void ebpf_swap_sum_cgroup_pids(netdata_publish_swap_t *swap, struct pid_o
static void ebpf_send_systemd_swap_charts()
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_READ_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.read);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_MEM_SWAP_WRITE_CHART, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, (long long) ect->publish_systemd_swap.write);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -660,7 +658,7 @@ static void ebpf_send_systemd_swap_charts()
static void ebpf_create_specific_swap_charts(char *type, int update_every)
{
ebpf_create_chart(type, NETDATA_MEM_SWAP_READ_CHART,
- "Calls to function swap_readpage.",
+ "Calls to function swap_readpage.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_CGROUP_SWAP_READ_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100,
@@ -668,7 +666,7 @@ static void ebpf_create_specific_swap_charts(char *type, int update_every)
swap_publish_aggregated, 1, update_every, NETDATA_EBPF_MODULE_NAME_SWAP);
ebpf_create_chart(type, NETDATA_MEM_SWAP_WRITE_CHART,
- "Calls to function swap_writepage.",
+ "Calls to function swap_writepage.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_CGROUP_SWAP_WRITE_CONTEXT, NETDATA_EBPF_CHART_TYPE_LINE,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101,
@@ -687,12 +685,12 @@ static void ebpf_create_specific_swap_charts(char *type, int update_every)
*/
static void ebpf_obsolete_specific_swap_charts(char *type, int update_every)
{
- ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_READ_CHART,"Calls to function swap_readpage.",
+ ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_READ_CHART, "", "Calls to function swap_readpage.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_READ_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5100, update_every);
- ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_WRITE_CHART, "Calls to function swap_writepage.",
+ ebpf_write_chart_obsolete(type, NETDATA_MEM_SWAP_WRITE_CHART, "", "Calls to function swap_writepage.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_SWAP_WRITE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5101, update_every);
@@ -708,13 +706,13 @@ static void ebpf_obsolete_specific_swap_charts(char *type, int update_every)
*/
static void ebpf_send_specific_swap_data(char *type, netdata_publish_swap_t *values)
{
- write_begin_chart(type, NETDATA_MEM_SWAP_READ_CHART);
+ ebpf_write_begin_chart(type, NETDATA_MEM_SWAP_READ_CHART, "");
write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_READPAGE_CALL].name, (long long) values->read);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_MEM_SWAP_WRITE_CHART);
+ ebpf_write_begin_chart(type, NETDATA_MEM_SWAP_WRITE_CHART, "");
write_chart_dimension(swap_publish_aggregated[NETDATA_KEY_SWAP_WRITEPAGE_CALL].name, (long long) values->write);
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -727,14 +725,14 @@ static void ebpf_send_specific_swap_data(char *type, netdata_publish_swap_t *val
static void ebpf_create_systemd_swap_charts(int update_every)
{
ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_READ_CHART,
- "Calls to swap_readpage.",
+ "Calls to swap_readpage.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_STACKED, 20191,
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_READ_CONTEXT,
NETDATA_EBPF_MODULE_NAME_SWAP, update_every);
ebpf_create_charts_on_systemd(NETDATA_MEM_SWAP_WRITE_CHART,
- "Calls to function swap_writepage.",
+ "Calls to function swap_writepage.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_SYSTEM_CGROUP_SWAP_SUBMENU,
NETDATA_EBPF_CHART_TYPE_STACKED, 20192,
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_SWAP_WRITE_CONTEXT,
@@ -804,9 +802,9 @@ static void swap_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -859,23 +857,44 @@ static void swap_collector(ebpf_module_t *em)
void ebpf_swap_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
- ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_READ_CHART,
- "Calls to function swap_readpage.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_SWAP_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20191,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP);
-
- ebpf_create_charts_on_apps(NETDATA_MEM_SWAP_WRITE_CHART,
- "Calls to function swap_writepage.",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_SWAP_SUBMENU,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20192,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_SWAP);
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_swap_readpage",
+ "Calls to function swap_readpage.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_EBPF_MEMORY_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_swap_readpage",
+ 20070,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SWAP);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_swap_writepage",
+ "Calls to function swap_writepage.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_EBPF_MEMORY_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_swap_writepage",
+ 20071,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_SWAP);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
}
@@ -959,7 +978,7 @@ static int ebpf_swap_load_bpf(ebpf_module_t *em)
#endif
if (ret)
- netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->thread_name);
+ netdata_log_error("%s %s", EBPF_DEFAULT_ERROR_MSG, em->info.thread_name);
return ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_sync.c b/collectors/ebpf.plugin/ebpf_sync.c
index 521d39f31d5232..a16318107cee52 100644
--- a/collectors/ebpf.plugin/ebpf_sync.c
+++ b/collectors/ebpf.plugin/ebpf_sync.c
@@ -298,7 +298,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em)
if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_EBPF_FILE_SYNC_CHART,
- "Monitor calls for fsync(2) and fdatasync(2).",
+ "",
+ "Monitor calls to fsync(2) and fdatasync(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_SYNC_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -309,7 +310,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em)
if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_EBPF_MSYNC_CHART,
- "Monitor calls for msync(2).",
+ "",
+ "Monitor calls to msync(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_SYNC_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -320,7 +322,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em)
if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_EBPF_SYNC_CHART,
- "Monitor calls for sync(2) and syncfs(2).",
+ "",
+ "Monitor calls to sync(2) and syncfs(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_SYNC_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -331,7 +334,8 @@ static void ebpf_obsolete_sync_global(ebpf_module_t *em)
if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled)
ebpf_write_chart_obsolete(NETDATA_EBPF_MEMORY_GROUP,
NETDATA_EBPF_FILE_SEGMENT_CHART,
- "Monitor calls for sync_file_range(2).",
+ "",
+ "Monitor calls to sync_file_range(2).",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_SYNC_SUBMENU,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -383,7 +387,7 @@ static void ebpf_sync_exit(void *ptr)
*/
static int ebpf_sync_load_legacy(ebpf_sync_syscalls_t *w, ebpf_module_t *em)
{
- em->thread_name = w->syscall;
+ em->info.thread_name = w->syscall;
if (!w->probe_links) {
w->probe_links = ebpf_load_program(ebpf_plugin_dir, em, running_on_kernel, isrh, &w->objects);
if (!w->probe_links) {
@@ -413,7 +417,7 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em)
#endif
int i;
- const char *saved_name = em->thread_name;
+ const char *saved_name = em->info.thread_name;
int errors = 0;
for (i = 0; local_syscalls[i].syscall; i++) {
ebpf_sync_syscalls_t *w = &local_syscalls[i];
@@ -424,7 +428,7 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em)
if (ebpf_sync_load_legacy(w, em))
errors++;
- em->thread_name = saved_name;
+ em->info.thread_name = saved_name;
}
#ifdef LIBBPF_MAJOR_VERSION
else {
@@ -446,12 +450,12 @@ static int ebpf_sync_initialize_syscall(ebpf_module_t *em)
w->enabled = false;
}
- em->thread_name = saved_name;
+ em->info.thread_name = saved_name;
}
#endif
}
}
- em->thread_name = saved_name;
+ em->info.thread_name = saved_name;
memset(sync_counter_aggregated_data, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_syscall_stat_t));
memset(sync_counter_publish_aggregated, 0 , NETDATA_SYNC_IDX_END * sizeof(netdata_publish_syscall_t));
@@ -507,7 +511,7 @@ static void ebpf_send_sync_chart(char *id,
int idx,
int end)
{
- write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id);
+ ebpf_write_begin_chart(NETDATA_EBPF_MEMORY_GROUP, id, "");
netdata_publish_syscall_t *move = &sync_counter_publish_aggregated[idx];
@@ -519,7 +523,7 @@ static void ebpf_send_sync_chart(char *id,
idx++;
}
- write_end_chart();
+ ebpf_write_end_chart();
}
/**
@@ -560,9 +564,9 @@ static void sync_collector(ebpf_module_t *em)
int maps_per_core = em->maps_per_core;
uint32_t running_time = 0;
uint32_t lifetime = em->lifetime;
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -610,7 +614,7 @@ static void ebpf_create_sync_chart(char *id,
int end,
int update_every)
{
- ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, title, EBPF_COMMON_DIMENSION_CALL,
+ ebpf_write_chart_cmd(NETDATA_EBPF_MEMORY_GROUP, id, "", title, EBPF_COMMON_DIMENSION_CALL,
NETDATA_EBPF_SYNC_SUBMENU, NETDATA_EBPF_CHART_TYPE_LINE, NULL, order,
update_every,
NETDATA_EBPF_MODULE_NAME_SYNC);
@@ -637,22 +641,22 @@ static void ebpf_create_sync_charts(int update_every)
{
if (local_syscalls[NETDATA_SYNC_FSYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_FDATASYNC_IDX].enabled)
ebpf_create_sync_chart(NETDATA_EBPF_FILE_SYNC_CHART,
- "Monitor calls for fsync(2) and fdatasync(2).", 21300,
+ "Monitor calls to fsync(2) and fdatasync(2).", 21300,
NETDATA_SYNC_FSYNC_IDX, NETDATA_SYNC_FDATASYNC_IDX, update_every);
if (local_syscalls[NETDATA_SYNC_MSYNC_IDX].enabled)
ebpf_create_sync_chart(NETDATA_EBPF_MSYNC_CHART,
- "Monitor calls for msync(2).", 21301,
+ "Monitor calls to msync(2).", 21301,
NETDATA_SYNC_MSYNC_IDX, NETDATA_SYNC_MSYNC_IDX, update_every);
if (local_syscalls[NETDATA_SYNC_SYNC_IDX].enabled && local_syscalls[NETDATA_SYNC_SYNCFS_IDX].enabled)
ebpf_create_sync_chart(NETDATA_EBPF_SYNC_CHART,
- "Monitor calls for sync(2) and syncfs(2).", 21302,
+ "Monitor calls to sync(2) and syncfs(2).", 21302,
NETDATA_SYNC_SYNC_IDX, NETDATA_SYNC_SYNCFS_IDX, update_every);
if (local_syscalls[NETDATA_SYNC_SYNC_FILE_RANGE_IDX].enabled)
ebpf_create_sync_chart(NETDATA_EBPF_FILE_SEGMENT_CHART,
- "Monitor calls for sync_file_range(2).", 21303,
+ "Monitor calls to sync_file_range(2).", 21303,
NETDATA_SYNC_SYNC_FILE_RANGE_IDX, NETDATA_SYNC_SYNC_FILE_RANGE_IDX, update_every);
fflush(stdout);
diff --git a/collectors/ebpf.plugin/ebpf_unittest.c b/collectors/ebpf.plugin/ebpf_unittest.c
index 3e1443ad3771ed..11b449e03bc24b 100644
--- a/collectors/ebpf.plugin/ebpf_unittest.c
+++ b/collectors/ebpf.plugin/ebpf_unittest.c
@@ -12,8 +12,8 @@ ebpf_module_t test_em;
void ebpf_ut_initialize_structure(netdata_run_mode_t mode)
{
memset(&test_em, 0, sizeof(ebpf_module_t));
- test_em.thread_name = strdupz("process");
- test_em.config_name = test_em.thread_name;
+ test_em.info.thread_name = strdupz("process");
+ test_em.info.config_name = test_em.info.thread_name;
test_em.kernels = NETDATA_V3_10 | NETDATA_V4_14 | NETDATA_V4_16 | NETDATA_V4_18 | NETDATA_V5_4 | NETDATA_V5_10 |
NETDATA_V5_14;
test_em.pid_map_size = ND_EBPF_DEFAULT_PID_SIZE;
@@ -28,7 +28,7 @@ void ebpf_ut_initialize_structure(netdata_run_mode_t mode)
*/
void ebpf_ut_cleanup_memory()
{
- freez((void *)test_em.thread_name);
+ freez((void *)test_em.info.thread_name);
}
/**
@@ -70,14 +70,14 @@ int ebpf_ut_load_real_binary()
*/
int ebpf_ut_load_fake_binary()
{
- const char *original = test_em.thread_name;
+ const char *original = test_em.info.thread_name;
- test_em.thread_name = strdupz("I_am_not_here");
+ test_em.info.thread_name = strdupz("I_am_not_here");
int ret = ebpf_ut_load_binary();
ebpf_ut_cleanup_memory();
- test_em.thread_name = original;
+ test_em.info.thread_name = original;
return !ret;
}
diff --git a/collectors/ebpf.plugin/ebpf_vfs.c b/collectors/ebpf.plugin/ebpf_vfs.c
index e566e169da7b06..354901c9ccc484 100644
--- a/collectors/ebpf.plugin/ebpf_vfs.c
+++ b/collectors/ebpf.plugin/ebpf_vfs.c
@@ -420,6 +420,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
{
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_FILE_DELETED,
+ "",
"Files deleted",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -430,6 +431,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS,
+ "",
"Write to disk",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -441,6 +443,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR,
+ "",
"Fails to write",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -452,6 +455,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_READ_CALLS,
+ "",
"Read from disk",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -463,6 +467,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR,
+ "",
"Fails to read",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -474,6 +479,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES,
+ "",
"Bytes written on disk",
EBPF_COMMON_DIMENSION_BYTES,
NETDATA_VFS_CGROUP_GROUP,
@@ -484,6 +490,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_READ_BYTES,
+ "",
"Bytes read from disk",
EBPF_COMMON_DIMENSION_BYTES,
NETDATA_VFS_CGROUP_GROUP,
@@ -494,7 +501,8 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_FSYNC,
- "Calls to vfs_fsync",
+ "",
+ "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -505,6 +513,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR,
+ "",
"Sync error",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -515,7 +524,8 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
}
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_OPEN,
- "Calls to vfs_open",
+ "",
+ "Calls to vfs_open.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -526,6 +536,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR,
+ "",
"Open error",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -537,7 +548,8 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_CREATE,
- "Calls to vfs_create",
+ "",
+ "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
@@ -548,6 +560,7 @@ static void ebpf_obsolete_vfs_services(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_SERVICE_FAMILY,
NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR,
+ "",
"Create error",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_CGROUP_GROUP,
@@ -589,144 +602,166 @@ static inline void ebpf_obsolete_vfs_cgroup_charts(ebpf_module_t *em) {
*/
void ebpf_obsolete_vfs_apps_charts(struct ebpf_module *em)
{
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_FILE_DELETED,
- "Files deleted",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20065,
- em->update_every);
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS,
- "Write to disk",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20066,
- em->update_every);
+ int order = 20275;
+ struct ebpf_target *w;
+ int update_every = em->update_every;
+ for (w = apps_groups_root_target; w; w = w->next) {
+ if (unlikely(!(w->charts_created & (1<mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR,
- "Fails to write",
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_unlink",
+ "Files deleted.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20067,
- em->update_every);
- }
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_READ_CALLS,
- "Read from disk",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20068,
- em->update_every);
-
- if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR,
- "Fails to read",
+ "app.ebpf_call_vfs_unlink",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_write",
+ "Write to disk.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20069,
- em->update_every);
- }
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES,
- "Bytes written on disk",
- EBPF_COMMON_DIMENSION_BYTES,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20070,
- em->update_every);
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_READ_BYTES,
- "Bytes read from disk",
- EBPF_COMMON_DIMENSION_BYTES,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20071,
- em->update_every);
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_FSYNC,
- "Calls for vfs_fsync",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20072,
- em->update_every);
+ "app.ebpf_call_vfs_write",
+ order++,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_write_error",
+ "Fails to write.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_write_error",
+ order++,
+ update_every);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR,
- "Sync error",
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_read",
+ "Read from disk.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20073,
- em->update_every);
- }
-
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_OPEN,
- "Calls for vfs_open",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20074,
- em->update_every);
+ "app.ebpf_call_vfs_read",
+ order++,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_read_error",
+ "Fails to read.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_read_error",
+ order++,
+ update_every);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR,
- "Open error",
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_write_bytes",
+ "Bytes written on disk.",
+ EBPF_COMMON_DIMENSION_BYTES,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_write_bytes",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_read_bytes",
+ "Bytes read from disk.",
+ EBPF_COMMON_DIMENSION_BYTES,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_read_bytes",
+ order++,
+ update_every);
+
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_fsync",
+ "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20075,
- em->update_every);
- }
+ "app.ebpf_call_vfs_fsync",
+ order++,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_fsync_error",
+ "Fails to sync.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_fsync_error",
+ order++,
+ update_every);
+ }
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_CREATE,
- "Calls for vfs_create",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20076,
- em->update_every);
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_open",
+ "Calls to vfs_open.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_open",
+ order++,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_open_error",
+ "Fails to open.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_open_error",
+ order++,
+ update_every);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(NETDATA_APPS_FAMILY,
- NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR,
- "Create error",
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_create",
+ "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED,
- NULL,
- 20077,
- em->update_every);
+ "app.ebpf_call_vfs_create",
+ order++,
+ update_every);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_obsolete(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_create_error",
+ "Fails to create.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_create_error",
+ order++,
+ update_every);
+ }
+ w->charts_created &= ~(1<mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_FILE_ERR_COUNT,
+ "",
"Fails to write or read",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
@@ -783,7 +822,8 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_FSYNC,
- "Calls for vfs_fsync",
+ "",
+ "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -794,6 +834,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_FSYNC_ERR,
+ "",
"Fails to synchronize",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
@@ -805,7 +846,8 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_OPEN,
- "Calls for vfs_open",
+ "",
+ "Calls to vfs_open.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -816,6 +858,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_OPEN_ERR,
+ "",
"Fails to open a file",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
@@ -827,7 +870,8 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em)
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_CREATE,
- "Calls for vfs_create",
+ "",
+ "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE,
@@ -838,6 +882,7 @@ static void ebpf_obsolete_vfs_global(ebpf_module_t *em)
if (em->mode < MODE_ENTRY) {
ebpf_write_chart_obsolete(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_CREATE_ERR,
+ "",
"Fails to create a file.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
@@ -1086,123 +1131,72 @@ void ebpf_vfs_send_apps_data(ebpf_module_t *em, struct ebpf_target *root)
{
struct ebpf_target *w;
for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- ebpf_vfs_sum_pids(&w->vfs, w->root_pid);
- }
- }
+ if (unlikely(!(w->charts_created & (1<next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.unlink_call);
- }
- }
- write_end_chart();
+ ebpf_vfs_sum_pids(&w->vfs, w->root_pid);
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.write_call + w->vfs.writev_call);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_unlink");
+ write_chart_dimension("calls", w->vfs.unlink_call);
+ ebpf_write_end_chart();
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.write_err + w->vfs.writev_err);
- }
- }
- write_end_chart();
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_write");
+ write_chart_dimension("calls", w->vfs.write_call + w->vfs.writev_call);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.read_call + w->vfs.readv_call);
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_write_error");
+ write_chart_dimension("calls", w->vfs.write_err + w->vfs.writev_err);
+ ebpf_write_end_chart();
}
- }
- write_end_chart();
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.read_err + w->vfs.readv_err);
- }
- }
- write_end_chart();
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_read");
+ write_chart_dimension("calls", w->vfs.read_call + w->vfs.readv_call);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.write_bytes + w->vfs.writev_bytes);
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_read_error");
+ write_chart_dimension("calls", w->vfs.read_err + w->vfs.readv_err);
+ ebpf_write_end_chart();
}
- }
- write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.read_bytes + w->vfs.readv_bytes);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_write_bytes");
+ write_chart_dimension("writes", w->vfs.write_bytes + w->vfs.writev_bytes);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.fsync_call);
- }
- }
- write_end_chart();
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_read_bytes");
+ write_chart_dimension("reads", w->vfs.read_bytes + w->vfs.readv_bytes);
+ ebpf_write_end_chart();
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.fsync_err);
- }
- }
- write_end_chart();
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_fsync");
+ write_chart_dimension("calls", w->vfs.fsync_call);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.open_call);
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_fsync_error");
+ write_chart_dimension("calls", w->vfs.fsync_err);
+ ebpf_write_end_chart();
}
- }
- write_end_chart();
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.open_err);
- }
- }
- write_end_chart();
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_open");
+ write_chart_dimension("calls", w->vfs.open_call);
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.create_call);
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_open_error");
+ write_chart_dimension("calls", w->vfs.open_err);
+ ebpf_write_end_chart();
}
- }
- write_end_chart();
- if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_APPS_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR);
- for (w = root; w; w = w->next) {
- if (unlikely(w->exposed && w->processes)) {
- write_chart_dimension(w->name, w->vfs.create_err);
- }
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_create");
+ write_chart_dimension("calls", w->vfs.create_call);
+ ebpf_write_end_chart();
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_begin_chart(NETDATA_APP_FAMILY, w->clean_name, "_ebpf_call_vfs_create_error");
+ write_chart_dimension("calls", w->vfs.create_err);
+ ebpf_write_end_chart();
}
- write_end_chart();
}
}
@@ -1451,7 +1445,7 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em)
ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ],
1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
- ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for vfs_fsync",
+ ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_FSYNC_CONTEXT,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507,
ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC],
@@ -1465,7 +1459,7 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em)
1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
}
- ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for vfs_open",
+ ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to vfs_open.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_OPEN_CONTEXT,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509,
ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN],
@@ -1479,7 +1473,7 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em)
1, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
}
- ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for vfs_create",
+ ebpf_create_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP, NETDATA_CGROUP_VFS_CREATE_CONTEXT,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511,
ebpf_create_global_dimension, &vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE],
@@ -1504,76 +1498,76 @@ static void ebpf_create_specific_vfs_charts(char *type, ebpf_module_t *em)
*/
static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em)
{
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "Files deleted",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "", "Files deleted",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_UNLINK_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5500, em->update_every);
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "Write to disk",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "", "Write to disk",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5501, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "Fails to write",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "", "Fails to write",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_ERROR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5502, em->update_every);
}
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "Read from disk",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "", "Read from disk",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5503, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "Fails to read",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "", "Fails to read",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_ERROR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5504, em->update_every);
}
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "Bytes written on disk",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "", "Bytes written on disk",
EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_WRITE_BYTES_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5505, em->update_every);
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "Bytes read from disk",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "", "Bytes read from disk",
EBPF_COMMON_DIMENSION_BYTES, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_READ_BYTES_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5506, em->update_every);
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls for vfs_fsync",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "", "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_FSYNC_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5507, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "Sync error",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "", "Sync error",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_FSYNC_ERROR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5508, em->update_every);
}
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls for vfs_open",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "", "Calls to vfs_open.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_OPEN_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5509, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "Open error",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "", "Open error",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_OPEN_ERROR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5510, em->update_every);
}
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls for vfs_create",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "", "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_CREATE_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5511, em->update_every);
if (em->mode < MODE_ENTRY) {
- ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "Create error",
+ ebpf_write_chart_obsolete(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "", "Create error",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_GROUP,
NETDATA_EBPF_CHART_TYPE_LINE, NETDATA_CGROUP_VFS_CREATE_ERROR_CONTEXT,
NETDATA_CHART_PRIO_CGROUPS_CONTAINERS + 5512, em->update_every);
@@ -1590,78 +1584,78 @@ static void ebpf_obsolete_specific_vfs_charts(char *type, ebpf_module_t *em)
*/
static void ebpf_send_specific_vfs_data(char *type, netdata_publish_vfs_t *values, ebpf_module_t *em)
{
- write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_FILE_DELETED, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_UNLINK].name, (long long)values->unlink_call);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name,
(long long)values->write_call + (long long)values->writev_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name,
(long long)values->write_err + (long long)values->writev_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name,
(long long)values->read_call + (long long)values->readv_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name,
(long long)values->read_err + (long long)values->readv_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_WRITE].name,
(long long)values->write_bytes + (long long)values->writev_bytes);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_READ].name,
(long long)values->read_bytes + (long long)values->readv_bytes);
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name,
(long long)values->fsync_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_FSYNC].name,
(long long)values->fsync_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name,
(long long)values->open_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_OPEN].name,
(long long)values->open_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name,
(long long)values->create_call);
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR);
+ ebpf_write_begin_chart(type, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "");
write_chart_dimension(vfs_publish_aggregated[NETDATA_KEY_PUBLISH_VFS_CREATE].name,
(long long)values->create_err);
- write_end_chart();
+ ebpf_write_end_chart();
}
}
@@ -1722,7 +1716,7 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em)
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_READ_BYTES_CONTEXT,
NETDATA_EBPF_MODULE_NAME_VFS, em->update_every);
- ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to vfs_fsync",
+ ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_FSYNC, "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED, 20072,
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_FSYNC_CONTEXT,
@@ -1735,7 +1729,7 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em)
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_FSYNC_ERROR_CONTEXT,
NETDATA_EBPF_MODULE_NAME_VFS, em->update_every);
}
- ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to vfs_open",
+ ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_OPEN, "Calls to vfs_open.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED, 20074,
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_OPEN_CONTEXT,
@@ -1749,7 +1743,7 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em)
NETDATA_EBPF_MODULE_NAME_VFS, em->update_every);
}
- ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to vfs_create",
+ ebpf_create_charts_on_systemd(NETDATA_SYSCALL_APPS_VFS_CREATE, "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL, NETDATA_VFS_CGROUP_GROUP,
NETDATA_EBPF_CHART_TYPE_STACKED, 20076,
ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX], NETDATA_SYSTEMD_VFS_CREATE_CONTEXT,
@@ -1774,125 +1768,124 @@ static void ebpf_create_systemd_vfs_charts(ebpf_module_t *em)
static void ebpf_send_systemd_vfs_charts(ebpf_module_t *em)
{
ebpf_cgroup_target_t *ect;
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_FILE_DELETED, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.unlink_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_call +
ect->publish_systemd_vfs.writev_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_err +
ect->publish_systemd_vfs.writev_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_call +
ect->publish_systemd_vfs.readv_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_err +
ect->publish_systemd_vfs.readv_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.write_bytes +
ect->publish_systemd_vfs.writev_bytes);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
-
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_READ_BYTES, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.read_bytes +
ect->publish_systemd_vfs.readv_bytes);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.fsync_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.open_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_call);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
if (em->mode < MODE_ENTRY) {
- write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR);
+ ebpf_write_begin_chart(NETDATA_SERVICE_FAMILY, NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR, "");
for (ect = ebpf_cgroup_pids; ect ; ect = ect->next) {
if (unlikely(ect->systemd) && unlikely(ect->updated)) {
write_chart_dimension(ect->name, ect->publish_systemd_vfs.create_err);
}
}
- write_end_chart();
+ ebpf_write_end_chart();
}
}
@@ -1960,9 +1953,9 @@ static void vfs_collector(ebpf_module_t *em)
uint32_t lifetime = em->lifetime;
netdata_idx_t *stats = em->hash_table_stats;
memset(stats, 0, sizeof(em->hash_table_stats));
- while (!ebpf_exit_plugin && running_time < lifetime) {
+ while (!ebpf_plugin_exit && running_time < lifetime) {
(void)heartbeat_next(&hb, USEC_PER_SEC);
- if (ebpf_exit_plugin || ++counter != update_every)
+ if (ebpf_plugin_exit || ++counter != update_every)
continue;
counter = 0;
@@ -2098,7 +2091,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_FSYNC,
- "Calls for vfs_fsync",
+ "Calls to vfs_fsync.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NULL,
@@ -2124,7 +2117,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_OPEN,
- "Calls for vfs_open",
+ "Calls to vfs_open.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NULL,
@@ -2150,7 +2143,7 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
ebpf_create_chart(NETDATA_FILESYSTEM_FAMILY,
NETDATA_VFS_CREATE,
- "Calls for vfs_create",
+ "Calls to vfs_create.",
EBPF_COMMON_DIMENSION_CALL,
NETDATA_VFS_GROUP,
NULL,
@@ -2188,127 +2181,219 @@ static void ebpf_create_global_charts(ebpf_module_t *em)
void ebpf_vfs_create_apps_charts(struct ebpf_module *em, void *ptr)
{
struct ebpf_target *root = ptr;
+ struct ebpf_target *w;
+ int order = 20275;
+ int update_every = em->update_every;
+ for (w = root; w; w = w->next) {
+ if (unlikely(!w->exposed))
+ continue;
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_FILE_DELETED,
- "Files deleted",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20065,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS,
- "Write to disk",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20066,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_unlink",
+ "Files deleted.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_unlink",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_write",
+ "Write to disk.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_write",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_write_error",
+ "Fails to write.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_write_error",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_CALLS_ERROR,
- "Fails to write",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20067,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
- }
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS,
- "Read from disk",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20068,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_read",
+ "Read from disk.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_read",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_read_error",
+ "Fails to read.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_read_error",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_CALLS_ERROR,
- "Fails to read",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20069,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
- }
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_WRITE_BYTES,
- "Bytes written on disk", EBPF_COMMON_DIMENSION_BYTES,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20070,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_READ_BYTES,
- "Bytes read from disk", EBPF_COMMON_DIMENSION_BYTES,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20071,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC,
- "Calls for vfs_fsync", EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20072,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_write_bytes",
+ "Bytes written on disk.",
+ EBPF_COMMON_DIMENSION_BYTES,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_write_bytes",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION writes '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_read_bytes",
+ "Bytes read from disk.",
+ EBPF_COMMON_DIMENSION_BYTES,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_read_bytes",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION reads '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_fsync",
+ "Calls to vfs_fsync.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_fsync",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_fsync_error",
+ "Fails to sync.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_fsync_error",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_FSYNC_CALLS_ERROR,
- "Sync error",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20073,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
- }
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN,
- "Calls for vfs_open", EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20074,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_open",
+ "Calls to vfs_open.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_open",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_open_error",
+ "Fails to open.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_open_error",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_OPEN_CALLS_ERROR,
- "Open error",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20075,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
- }
-
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE,
- "Calls for vfs_create", EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20076,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_create",
+ "Calls to vfs_create.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_create",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+
+ if (em->mode < MODE_ENTRY) {
+ ebpf_write_chart_cmd(NETDATA_APP_FAMILY,
+ w->clean_name,
+ "_ebpf_call_vfs_create_error",
+ "Fails to create a file.",
+ EBPF_COMMON_DIMENSION_CALL,
+ NETDATA_VFS_GROUP,
+ NETDATA_EBPF_CHART_TYPE_STACKED,
+ "app.ebpf_call_vfs_create_error",
+ order++,
+ update_every,
+ NETDATA_EBPF_MODULE_NAME_VFS);
+ ebpf_create_chart_labels("app_group", w->name, 1);
+ ebpf_commit_label();
+ fprintf(stdout, "DIMENSION calls '' %s 1 1\n", ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX]);
+ }
- if (em->mode < MODE_ENTRY) {
- ebpf_create_charts_on_apps(NETDATA_SYSCALL_APPS_VFS_CREATE_CALLS_ERROR,
- "Create error",
- EBPF_COMMON_DIMENSION_CALL,
- NETDATA_VFS_GROUP,
- NETDATA_EBPF_CHART_TYPE_STACKED,
- 20077,
- ebpf_algorithms[NETDATA_EBPF_INCREMENTAL_IDX],
- root, em->update_every, NETDATA_EBPF_MODULE_NAME_VFS);
+ w->charts_created |= 1<apps_charts |= NETDATA_EBPF_APPS_FLAG_CHART_CREATED;
diff --git a/collectors/ebpf.plugin/integrations/ebpf_cachestat.md b/collectors/ebpf.plugin/integrations/ebpf_cachestat.md
new file mode 100644
index 00000000000000..5bf0a37749eaf6
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_cachestat.md
@@ -0,0 +1,179 @@
+
+
+# eBPF Cachestat
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: cachestat
+
+
+
+## Overview
+
+Monitor Linux page cache events giving for users a general vision about how his kernel is manipulating files.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Cachestat instance
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.cachestat_ratio | ratio | % |
+| mem.cachestat_dirties | dirty | page/s |
+| mem.cachestat_hits | hit | hits/s |
+| mem.cachestat_misses | miss | misses/s |
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_cachestat_hit_ratio | ratio | % |
+| app.ebpf_cachestat_dirty_pages | pages | page/s |
+| app.ebpf_cachestat_access | hits | hits/s |
+| app.ebpf_cachestat_misses | misses | misses/s |
+
+### Per cgroup
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.cachestat_ratio | ratio | % |
+| cgroup.cachestat_dirties | dirty | page/s |
+| cgroup.cachestat_hits | hit | hits/s |
+| cgroup.cachestat_misses | miss | misses/s |
+| services.cachestat_ratio | a dimension per systemd service | % |
+| services.cachestat_dirties | a dimension per systemd service | page/s |
+| services.cachestat_hits | a dimension per systemd service | hits/s |
+| services.cachestat_misses | a dimension per systemd service | misses/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/cachestat.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/cachestat.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_dcstat.md b/collectors/ebpf.plugin/integrations/ebpf_dcstat.md
new file mode 100644
index 00000000000000..4c5719026ed301
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_dcstat.md
@@ -0,0 +1,177 @@
+
+
+# eBPF DCstat
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: dcstat
+
+
+
+## Overview
+
+Monitor directory cache events per application given an overall vision about files on memory or storage device.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_dc_ratio | ratio | % |
+| app.ebpf_dc_reference | files | files |
+| app.ebpf_dc_not_cache | files | files |
+| app.ebpf_dc_not_found | files | files |
+
+### Per filesystem
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| filesystem.dc_reference | reference, slow, miss | files |
+| filesystem.dc_hit_ratio | ratio | % |
+
+### Per cgroup
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.dc_ratio | ratio | % |
+| cgroup.dc_reference | reference | files |
+| cgroup.dc_not_cache | slow | files |
+| cgroup.dc_not_found | miss | files |
+| services.dc_ratio | a dimension per systemd service | % |
+| services.dc_reference | a dimension per systemd service | files |
+| services.dc_not_cache | a dimension per systemd service | files |
+| services.dc_not_found | a dimension per systemd service | files |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/dcstat.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/dcstat.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config option
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_disk.md b/collectors/ebpf.plugin/integrations/ebpf_disk.md
new file mode 100644
index 00000000000000..557da125d8d529
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_disk.md
@@ -0,0 +1,137 @@
+
+
+# eBPF Disk
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: disk
+
+
+
+## Overview
+
+Measure latency for I/O events on disk.
+
+Attach tracepoints to internal kernel functions.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per disk
+
+These metrics measure latency for I/O events on every hard disk present on host.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.latency_io | latency | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).`
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/disk.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/disk.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md b/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md
new file mode 100644
index 00000000000000..23f5bd26e35773
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_filedescriptor.md
@@ -0,0 +1,177 @@
+
+
+# eBPF Filedescriptor
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: filedescriptor
+
+
+
+## Overview
+
+Monitor calls for functions responsible to open or close a file descriptor and possible errors.
+
+Attach tracing (kprobe and trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netdata sets necessary permissions during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+Depending of kernel version and frequency that files are open and close, this thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+These Metrics show grouped information per cgroup/service.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.fd_open | open | calls/s |
+| cgroup.fd_open_error | open | calls/s |
+| cgroup.fd_closed | close | calls/s |
+| cgroup.fd_close_error | close | calls/s |
+| services.file_open | a dimension per systemd service | calls/s |
+| services.file_open_error | a dimension per systemd service | calls/s |
+| services.file_closed | a dimension per systemd service | calls/s |
+| services.file_close_error | a dimension per systemd service | calls/s |
+
+### Per eBPF Filedescriptor instance
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| filesystem.file_descriptor | open, close | calls/s |
+| filesystem.file_error | open, close | calls/s |
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_file_open | calls | calls/s |
+| app.ebpf_file_open_error | calls | calls/s |
+| app.ebpf_file_closed | calls | calls/s |
+| app.ebpf_file_close_error | calls | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/fd.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/fd.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_filesystem.md b/collectors/ebpf.plugin/integrations/ebpf_filesystem.md
new file mode 100644
index 00000000000000..7a1bb832b6195e
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_filesystem.md
@@ -0,0 +1,163 @@
+
+
+# eBPF Filesystem
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: filesystem
+
+
+
+## Overview
+
+Monitor latency for main actions on filesystem like I/O events.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per filesystem
+
+Latency charts associate with filesystem actions.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| filesystem.read_latency | latency period | calls/s |
+| filesystem.open_latency | latency period | calls/s |
+| filesystem.sync_latency | latency period | calls/s |
+
+### Per iilesystem
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| filesystem.write_latency | latency period | calls/s |
+
+### Per eBPF Filesystem instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| filesystem.attributte_latency | latency period | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/filesystem.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/filesystem.conf
+```
+#### Options
+
+This configuration file have two different sections. The `[global]` overwrites default options, while `[filesystem]` allow user to select the filesystems to monitor.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+| btrfsdist | Enable or disable latency monitoring for functions associated with btrfs filesystem. | yes | no |
+| ext4dist | Enable or disable latency monitoring for functions associated with ext4 filesystem. | yes | no |
+| nfsdist | Enable or disable latency monitoring for functions associated with nfs filesystem. | yes | no |
+| xfsdist | Enable or disable latency monitoring for functions associated with xfs filesystem. | yes | no |
+| zfsdist | Enable or disable latency monitoring for functions associated with zfs filesystem. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_hardirq.md b/collectors/ebpf.plugin/integrations/ebpf_hardirq.md
new file mode 100644
index 00000000000000..f9b52962450e31
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_hardirq.md
@@ -0,0 +1,137 @@
+
+
+# eBPF Hardirq
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: hardirq
+
+
+
+## Overview
+
+Monitor latency for each HardIRQ available.
+
+Attach tracepoints to internal kernel functions.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Hardirq instance
+
+These metrics show latest timestamp for each hardIRQ available on host.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.hardirq_latency | hardirq names | milliseconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/hardirq.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/hardirq.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_mdflush.md b/collectors/ebpf.plugin/integrations/ebpf_mdflush.md
new file mode 100644
index 00000000000000..0081b7d834a750
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_mdflush.md
@@ -0,0 +1,132 @@
+
+
+# eBPF MDflush
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: mdflush
+
+
+
+## Overview
+
+Monitor when flush events happen between disks.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that `md_flush_request` is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF MDflush instance
+
+Number of times md_flush_request was called since last time.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mdstat.mdstat_flush | disk | flushes |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/mdflush.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/mdflush.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_mount.md b/collectors/ebpf.plugin/integrations/ebpf_mount.md
new file mode 100644
index 00000000000000..d19e57809f2316
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_mount.md
@@ -0,0 +1,140 @@
+
+
+# eBPF Mount
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: mount
+
+
+
+## Overview
+
+Monitor calls for mount and umount syscall.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT, CONFIG_HAVE_SYSCALL_TRACEPOINTS), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Mount instance
+
+Calls for syscalls mount an umount.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mount_points.call | mount, umount | calls/s |
+| mount_points.error | mount, umount | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).`
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/mount.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/mount.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_oomkill.md b/collectors/ebpf.plugin/integrations/ebpf_oomkill.md
new file mode 100644
index 00000000000000..897cddfacb3514
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_oomkill.md
@@ -0,0 +1,160 @@
+
+
+# eBPF OOMkill
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: oomkill
+
+
+
+## Overview
+
+Monitor applications that reach out of memory.
+
+Attach tracepoint to internal kernel functions.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+These metrics show cgroup/service that reached OOM.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.oomkills | cgroup name | kills |
+| services.oomkills | a dimension per systemd service | kills |
+
+### Per apps
+
+These metrics show cgroup/service that reached OOM.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.oomkill | kills | kills |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/oomkill.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/oomkill.conf
+```
+#### Options
+
+Overwrite default configuration reducing number of I/O events
+
+
+#### Examples
+There are no configuration examples.
+
+
+
+## Troubleshooting
+
+### update every
+
+
+
+### ebpf load mode
+
+
+
+### lifetime
+
+
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_process.md b/collectors/ebpf.plugin/integrations/ebpf_process.md
new file mode 100644
index 00000000000000..109890139d5a41
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_process.md
@@ -0,0 +1,111 @@
+
+
+# eBPF Process
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: process
+
+
+
+## Overview
+
+Monitor internal memory usage.
+
+Uses netdata internal statistic to monitor memory management by plugin.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Process instance
+
+How plugin is allocating memory.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| netdata.ebpf_aral_stat_size | memory | bytes |
+| netdata.ebpf_aral_stat_alloc | aral | calls |
+| netdata.ebpf_threads | total, running | threads |
+| netdata.ebpf_load_methods | legacy, co-re | methods |
+| netdata.ebpf_kernel_memory | memory_locked | bytes |
+| netdata.ebpf_hash_tables_count | hash_table | hash tables |
+| netdata.ebpf_aral_stat_size | memory | bytes |
+| netdata.ebpf_aral_stat_alloc | aral | calls |
+| netdata.ebpf_aral_stat_size | memory | bytes |
+| netdata.ebpf_aral_stat_alloc | aral | calls |
+| netdata.ebpf_hash_tables_insert_pid_elements | thread | rows |
+| netdata.ebpf_hash_tables_remove_pid_elements | thread | rows |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Netdata flags.
+
+To have these charts you need to compile netdata with flag `NETDATA_DEV_MODE`.
+
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_processes.md b/collectors/ebpf.plugin/integrations/ebpf_processes.md
new file mode 100644
index 00000000000000..62542359a8d2b2
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_processes.md
@@ -0,0 +1,187 @@
+
+
+# eBPF Processes
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: processes
+
+
+
+## Overview
+
+Monitor calls for function creating tasks (threads and processes) inside Linux kernel.
+
+Attach tracing (kprobe or tracepoint, and trampoline) to internal kernel functions.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Processes instance
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.process_thread | process | calls/s |
+| system.process_status | process, zombie | difference |
+| system.exit | process | calls/s |
+| system.task_error | task | calls/s |
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.process_create | calls | calls/s |
+| app.thread_create | call | calls/s |
+| app.task_exit | call | calls/s |
+| app.task_close | call | calls/s |
+| app.task_error | app | calls/s |
+
+### Per cgroup
+
+These Metrics show grouped information per cgroup/service.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.process_create | process | calls/s |
+| cgroup.thread_create | thread | calls/s |
+| cgroup.task_exit | exit | calls/s |
+| cgroup.task_close | process | calls/s |
+| cgroup.task_error | process | calls/s |
+| services.process_create | a dimension per systemd service | calls/s |
+| services.thread_create | a dimension per systemd service | calls/s |
+| services.task_close | a dimension per systemd service | calls/s |
+| services.task_exit | a dimension per systemd service | calls/s |
+| services.task_error | a dimension per systemd service | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/process.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/process.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). This plugin will always try to attach a tracepoint, so option here will impact only function used to monitor task (thread and process) creation. | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_shm.md b/collectors/ebpf.plugin/integrations/ebpf_shm.md
new file mode 100644
index 00000000000000..ffa05c77030944
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_shm.md
@@ -0,0 +1,185 @@
+
+
+# eBPF SHM
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: shm
+
+
+
+## Overview
+
+Monitor syscall responsible to manipulate shared memory.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+These Metrics show grouped information per cgroup/service.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.shmget | get | calls/s |
+| cgroup.shmat | at | calls/s |
+| cgroup.shmdt | dt | calls/s |
+| cgroup.shmctl | ctl | calls/s |
+| services.shmget | a dimension per systemd service | calls/s |
+| services.shmat | a dimension per systemd service | calls/s |
+| services.shmdt | a dimension per systemd service | calls/s |
+| services.shmctl | a dimension per systemd service | calls/s |
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_shmget_call | calls | calls/s |
+| app.ebpf_shmat_call | calls | calls/s |
+| app.ebpf_shmdt_call | calls | calls/s |
+| app.ebpf_shmctl_call | calls | calls/s |
+
+### Per eBPF SHM instance
+
+These Metrics show number of calls for specified syscall.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.shared_memory_calls | get, at, dt, ctl | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).`
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/shm.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/shm.conf
+```
+#### Options
+
+This configuration file have two different sections. The `[global]` overwrites all default options, while `[syscalls]` allow user to select the syscall to monitor.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+| shmget | Enable or disable monitoring for syscall `shmget` | yes | no |
+| shmat | Enable or disable monitoring for syscall `shmat` | yes | no |
+| shmdt | Enable or disable monitoring for syscall `shmdt` | yes | no |
+| shmctl | Enable or disable monitoring for syscall `shmctl` | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_socket.md b/collectors/ebpf.plugin/integrations/ebpf_socket.md
new file mode 100644
index 00000000000000..dc7a7d07b5232b
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_socket.md
@@ -0,0 +1,201 @@
+
+
+# eBPF Socket
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: socket
+
+
+
+## Overview
+
+Monitor bandwidth consumption per application for protocols TCP and UDP.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Socket instance
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ip.inbound_conn | connection_tcp | connections/s |
+| ip.tcp_outbound_conn | received | connections/s |
+| ip.tcp_functions | received, send, closed | calls/s |
+| ip.total_tcp_bandwidth | received, send | kilobits/s |
+| ip.tcp_error | received, send | calls/s |
+| ip.tcp_retransmit | retransmited | calls/s |
+| ip.udp_functions | received, send | calls/s |
+| ip.total_udp_bandwidth | received, send | kilobits/s |
+| ip.udp_error | received, send | calls/s |
+
+### Per apps
+
+These metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_call_tcp_v4_connection | connections | connections/s |
+| app.app.ebpf_call_tcp_v6_connection | connections | connections/s |
+| app.ebpf_sock_bytes_sent | bandwidth | kilobits/s |
+| app.ebpf_sock_bytes_received | bandwidth | kilobits/s |
+| app.ebpf_call_tcp_sendmsg | calls | calls/s |
+| app.ebpf_call_tcp_cleanup_rbuf | calls | calls/s |
+| app.ebpf_call_tcp_retransmit | calls | calls/s |
+| app.ebpf_call_udp_sendmsg | calls | calls/s |
+| app.ebpf_call_udp_recvmsg | calls | calls/s |
+
+### Per cgroup
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.net_conn_ipv4 | connected_v4 | connections/s |
+| cgroup.net_conn_ipv6 | connected_v6 | connections/s |
+| cgroup.net_bytes_recv | received | calls/s |
+| cgroup.net_bytes_sent | sent | calls/s |
+| cgroup.net_tcp_recv | received | calls/s |
+| cgroup.net_tcp_send | sent | calls/s |
+| cgroup.net_retransmit | retransmitted | calls/s |
+| cgroup.net_udp_send | sent | calls/s |
+| cgroup.net_udp_recv | received | calls/s |
+| services.net_conn_ipv6 | a dimension per systemd service | connections/s |
+| services.net_bytes_recv | a dimension per systemd service | kilobits/s |
+| services.net_bytes_sent | a dimension per systemd service | kilobits/s |
+| services.net_tcp_recv | a dimension per systemd service | calls/s |
+| services.net_tcp_send | a dimension per systemd service | calls/s |
+| services.net_tcp_retransmit | a dimension per systemd service | calls/s |
+| services.net_udp_send | a dimension per systemd service | calls/s |
+| services.net_udp_recv | a dimension per systemd service | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/network.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/network.conf
+```
+#### Options
+
+All options are defined inside section `[global]`. Options inside `network connections` are ignored for while.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| bandwidth table size | Number of elements stored inside hash tables used to monitor calls per PID. | 16384 | no |
+| ipv4 connection table size | Number of elements stored inside hash tables used to monitor calls per IPV4 connections. | 16384 | no |
+| ipv6 connection table size | Number of elements stored inside hash tables used to monitor calls per IPV6 connections. | 16384 | no |
+| udp connection table size | Number of temporary elements stored inside hash tables used to monitor UDP connections. | 4096 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_softirq.md b/collectors/ebpf.plugin/integrations/ebpf_softirq.md
new file mode 100644
index 00000000000000..6a4312c6ef6fc7
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_softirq.md
@@ -0,0 +1,137 @@
+
+
+# eBPF SoftIRQ
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: softirq
+
+
+
+## Overview
+
+Monitor latency for each SoftIRQ available.
+
+Attach kprobe to internal kernel functions.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF SoftIRQ instance
+
+These metrics show latest timestamp for each softIRQ available on host.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.softirq_latency | soft IRQs | milliseconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug/`).`
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/softirq.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/softirq.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_swap.md b/collectors/ebpf.plugin/integrations/ebpf_swap.md
new file mode 100644
index 00000000000000..ce2423f8ded240
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_swap.md
@@ -0,0 +1,170 @@
+
+
+# eBPF SWAP
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: swap
+
+
+
+## Overview
+
+Monitors when swap has I/O events and applications executing events.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+These Metrics show grouped information per cgroup/service.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.swap_read | read | calls/s |
+| cgroup.swap_write | write | calls/s |
+| services.swap_read | a dimension per systemd service | calls/s |
+| services.swap_write | a dimension per systemd service | calls/s |
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_call_swap_readpage | a dimension per app group | calls/s |
+| app.ebpf_call_swap_writepage | a dimension per app group | calls/s |
+
+### Per eBPF SWAP instance
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.swapcalls | write, read | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/swap.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/swap.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_sync.md b/collectors/ebpf.plugin/integrations/ebpf_sync.md
new file mode 100644
index 00000000000000..6f6c246a7df4fc
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_sync.md
@@ -0,0 +1,157 @@
+
+
+# eBPF Sync
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: sync
+
+
+
+## Overview
+
+Monitor syscall responsible to move data from memory to storage device.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT, CONFIG_HAVE_SYSCALL_TRACEPOINTS), files inside debugfs, and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per eBPF Sync instance
+
+These metrics show total number of calls to functions inside kernel.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.file_sync | fsync, fdatasync | calls/s |
+| mem.meory_map | msync | calls/s |
+| mem.sync | sync, syncfs | calls/s |
+| mem.file_segment | sync_file_range | calls/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ sync_freq ](https://github.com/netdata/netdata/blob/master/health/health.d/synchronization.conf) | mem.sync | number of sync() system calls. Every call causes all pending modifications to filesystem metadata and cached file data to be written to the underlying filesystems. |
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+#### Debug Filesystem
+
+This thread needs to attach a tracepoint to monitor when a process schedule an exit event. To allow this specific feaure, it is necessary to mount `debugfs` (`mount -t debugfs none /sys/kernel/debug`).
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/sync.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/sync.conf
+```
+#### Options
+
+This configuration file have two different sections. The `[global]` overwrites all default options, while `[syscalls]` allow user to select the syscall to monitor.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+| sync | Enable or disable monitoring for syscall `sync` | yes | no |
+| msync | Enable or disable monitoring for syscall `msync` | yes | no |
+| fsync | Enable or disable monitoring for syscall `fsync` | yes | no |
+| fdatasync | Enable or disable monitoring for syscall `fdatasync` | yes | no |
+| syncfs | Enable or disable monitoring for syscall `syncfs` | yes | no |
+| sync_file_range | Enable or disable monitoring for syscall `sync_file_range` | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/integrations/ebpf_vfs.md b/collectors/ebpf.plugin/integrations/ebpf_vfs.md
new file mode 100644
index 00000000000000..4b824e975b96a4
--- /dev/null
+++ b/collectors/ebpf.plugin/integrations/ebpf_vfs.md
@@ -0,0 +1,212 @@
+
+
+# eBPF VFS
+
+
+
+
+
+Plugin: ebpf.plugin
+Module: vfs
+
+
+
+## Overview
+
+Monitor I/O events on Linux Virtual Filesystem.
+
+Attach tracing (kprobe, trampoline) to internal kernel functions according options used to compile kernel.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid because it loads data inside kernel. Netada sets necessary permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+The plugin checks kernel compilation flags (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) and presence of BTF files to decide which eBPF program will be attached.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+This thread will add overhead every time that an internal kernel function monitored by this thread is called. The estimated additional period of time is between 90-200ms per call on kernels that do not have BTF technology.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per cgroup
+
+These Metrics show grouped information per cgroup/service.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cgroup.vfs_unlink | delete | calls/s |
+| cgroup.vfs_write | write | calls/s |
+| cgroup.vfs_write_error | write | calls/s |
+| cgroup.vfs_read | read | calls/s |
+| cgroup.vfs_read_error | read | calls/s |
+| cgroup.vfs_write_bytes | write | bytes/s |
+| cgroup.vfs_read_bytes | read | bytes/s |
+| cgroup.vfs_fsync | fsync | calls/s |
+| cgroup.vfs_fsync_error | fsync | calls/s |
+| cgroup.vfs_open | open | calls/s |
+| cgroup.vfs_open_error | open | calls/s |
+| cgroup.vfs_create | create | calls/s |
+| cgroup.vfs_create_error | create | calls/s |
+| services.vfs_unlink | a dimension per systemd service | calls/s |
+| services.vfs_write | a dimension per systemd service | calls/s |
+| services.vfs_write_error | a dimension per systemd service | calls/s |
+| services.vfs_read | a dimension per systemd service | calls/s |
+| services.vfs_read_error | a dimension per systemd service | calls/s |
+| services.vfs_write_bytes | a dimension per systemd service | bytes/s |
+| services.vfs_read_bytes | a dimension per systemd service | bytes/s |
+| services.vfs_fsync | a dimension per systemd service | calls/s |
+| services.vfs_fsync_error | a dimension per systemd service | calls/s |
+| services.vfs_open | a dimension per systemd service | calls/s |
+| services.vfs_open_error | a dimension per systemd service | calls/s |
+| services.vfs_create | a dimension per systemd service | calls/s |
+| services.vfs_create_error | a dimension per systemd service | calls/s |
+
+### Per eBPF VFS instance
+
+These Metrics show grouped information per cgroup/service.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| filesystem.vfs_deleted_objects | delete | calls/s |
+| filesystem.vfs_io | read, write | calls/s |
+| filesystem.vfs_io_bytes | read, write | bytes/s |
+| filesystem.vfs_io_error | read, write | calls/s |
+| filesystem.vfs_fsync | fsync | calls/s |
+| filesystem.vfs_fsync_error | fsync | calls/s |
+| filesystem.vfs_open | open | calls/s |
+| filesystem.vfs_open_error | open | calls/s |
+| filesystem.vfs_create | create | calls/s |
+| filesystem.vfs_create_error | create | calls/s |
+
+### Per apps
+
+These Metrics show grouped information per apps group.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| app_group | The name of the group defined in the configuration. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| app.ebpf_call_vfs_unlink | calls | calls/s |
+| app.ebpf_call_vfs_write | calls | calls/s |
+| app.ebpf_call_vfs_write_error | calls | calls/s |
+| app.ebpf_call_vfs_read | calls | calls/s |
+| app.ebpf_call_vfs_read_error | calls | calls/s |
+| app.ebpf_call_vfs_write_bytes | writes | bytes/s |
+| app.ebpf_call_vfs_read_bytes | reads | bytes/s |
+| app.ebpf_call_vfs_fsync | calls | calls/s |
+| app.ebpf_call_vfs_fsync_error | calls | calls/s |
+| app.ebpf_call_vfs_open | calls | calls/s |
+| app.ebpf_call_vfs_open_error | calls | calls/s |
+| app.ebpf_call_vfs_create | calls | calls/s |
+| app.ebpf_call_vfs_create_error | calls | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Compile kernel
+
+Check if your kernel was compiled with necessary options (CONFIG_KPROBES, CONFIG_BPF, CONFIG_BPF_SYSCALL, CONFIG_BPF_JIT) in `/proc/config.gz` or inside /boot/config file. Some cited names can be different accoring preferences of Linux distributions.
+When you do not have options set, it is necessary to get the kernel source code from https://kernel.org or a kernel package from your distribution, this last is preferred. The kernel compilation has a well definedd pattern, but distributions can deliver their configuration files
+with different names.
+
+Now follow steps:
+1. Copy the configuration file to /usr/src/linux/.config.
+2. Select the necessary options: make oldconfig
+3. Compile your kernel image: make bzImage
+4. Compile your modules: make modules
+5. Copy your new kernel image for boot loader directory
+6. Install the new modules: make modules_install
+7. Generate an initial ramdisk image (`initrd`) if it is necessary.
+8. Update your boot loader
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ebpf.d/vfs.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ebpf.d/vfs.conf
+```
+#### Options
+
+All options are defined inside section `[global]`.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 5 | no |
+| ebpf load mode | Define whether plugin will monitor the call (`entry`) for the functions or it will also monitor the return (`return`). | entry | no |
+| apps | Enable or disable integration with apps.plugin | no | no |
+| cgroups | Enable or disable integration with cgroup.plugin | no | no |
+| pid table size | Number of elements stored inside hash tables used to monitor calls per PID. | 32768 | no |
+| ebpf type format | Define the file type to load an eBPF program. Three options are available: `legacy` (Attach only `kprobe`), `co-re` (Plugin tries to use `trampoline` when available), and `auto` (plugin check OS configuration before to load). | auto | no |
+| ebpf co-re tracing | Select the attach method used by plugin when `co-re` is defined in previous option. Two options are available: `trampoline` (Option with lowest overhead), and `probe` (the same of legacy code). | trampoline | no |
+| maps per core | Define how plugin will load their hash maps. When enabled (`yes`) plugin will load one hash table per core, instead to have centralized information. | yes | no |
+| lifetime | Set default lifetime for thread when enabled by cloud. | 300 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ebpf.plugin/metadata.yaml b/collectors/ebpf.plugin/metadata.yaml
index 23232677836017..97b5df38940d43 100644
--- a/collectors/ebpf.plugin/metadata.yaml
+++ b/collectors/ebpf.plugin/metadata.yaml
@@ -196,32 +196,34 @@ modules:
- name: close
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.file_open
+ - name: app.ebpf_file_open
description: Number of open files
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.file_open_error
+ - name: calls
+ - name: app.ebpf_file_open_error
description: Fails to open files
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.file_closed
+ - name: calls
+ - name: app.ebpf_file_closed
description: Files closed
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.file_close_error
+ - name: calls
+ - name: app.ebpf_file_close_error
description: Fails to close files
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: calls
- meta:
plugin_name: ebpf.plugin
module_name: processes
@@ -379,38 +381,40 @@ modules:
- name: task
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.process_create
+ - name: app.process_create
description: Process started
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.thread_create
+ - name: calls
+ - name: app.thread_create
description: Threads started
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.task_exit
+ - name: call
+ - name: app.task_exit
description: Tasks starts exit process
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.task_close
+ - name: call
+ - name: app.task_close
description: Tasks closed
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.task_error
+ - name: call
+ - name: app.task_error
description: Errors to create process or threads
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: app
- name: cgroup
description: "These Metrics show grouped information per cgroup/service."
labels: []
@@ -841,32 +845,34 @@ modules:
- name: miss
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.cachestat_ratio
+ - name: app.ebpf_cachestat_hit_ratio
description: Hit ratio
unit: "%"
chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.cachestat_dirties
+ - name: ratio
+ - name: app.ebpf_cachestat_dirty_pages
description: Number of dirty pages
unit: "page/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.cachestat_hits
+ - name: pages
+ - name: app.ebpf_cachestat_access
description: Number of accessed files
unit: "hits/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.cachestat_misses
+ - name: hits
+ - name: app.ebpf_cachestat_misses
description: Files out of page cache
unit: "misses/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: misses
- name: cgroup
description: ""
labels: []
@@ -1076,27 +1082,27 @@ modules:
labels: []
metrics:
- name: mem.file_sync
- description: Monitor calls for fsync(2) and fdatasync(2).
+ description: Monitor calls to fsync(2) and fdatasync(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- name: fsync
- name: fdatasync
- name: mem.meory_map
- description: Monitor calls for msync(2).
+ description: Monitor calls to msync(2).
unit: "calls/s"
chart_type: line
dimensions:
- name: msync
- name: mem.sync
- description: Monitor calls for sync(2) and syncfs(2).
+ description: Monitor calls to sync(2) and syncfs(2).
unit: "calls/s"
chart_type: line
dimensions:
- name: sync
- name: syncfs
- name: mem.file_segment
- description: Monitor calls for sync_file_range(2).
+ description: Monitor calls to sync_file_range(2).
unit: "calls/s"
chart_type: line
dimensions:
@@ -1333,41 +1339,43 @@ modules:
labels: []
metrics:
- name: cgroup.swap_read
- description: Calls to function swap_readpage.
+ description: Calls to function swap_readpage.
unit: "calls/s"
chart_type: line
dimensions:
- name: read
- name: cgroup.swap_write
- description: Calls to function swap_writepage.
+ description: Calls to function swap_writepage.
unit: "calls/s"
chart_type: line
dimensions:
- name: write
- name: services.swap_read
- description: Calls to swap_readpage.
+ description: Calls to swap_readpage.
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per systemd service
- name: services.swap_write
- description: Calls to function swap_writepage.
+ description: Calls to function swap_writepage.
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per systemd service
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.swap_read_call
- description: Calls to function swap_readpage.
+ - name: app.ebpf_call_swap_readpage
+ description: Calls to function swap_readpage.
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per app group
- - name: apps.swap_write_call
- description: Calls to function swap_writepage.
+ - name: app.ebpf_call_swap_writepage
+ description: Calls to function swap_writepage.
unit: "calls/s"
chart_type: stacked
dimensions:
@@ -1501,14 +1509,16 @@ modules:
- name: a dimension per systemd service
- name: apps
description: "These metrics show cgroup/service that reached OOM."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.oomkills
+ - name: app.oomkill
description: OOM kills
unit: "kills"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: kills
- meta:
plugin_name: ebpf.plugin
module_name: socket
@@ -1713,68 +1723,64 @@ modules:
- name: send
- name: apps
description: "These metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.outbound_conn_v4
+ - name: app.ebpf_call_tcp_v4_connection
description: Calls to tcp_v4_connection
unit: "connections/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.outbound_conn_v6
+ - name: connections
+ - name: app.app.ebpf_call_tcp_v6_connection
description: Calls to tcp_v6_connection
unit: "connections/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.total_bandwidth_sent
+ - name: connections
+ - name: app.ebpf_sock_bytes_sent
description: Bytes sent
unit: "kilobits/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.total_bandwidth_recv
+ - name: bandwidth
+ - name: app.ebpf_sock_bytes_received
description: bytes received
unit: "kilobits/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.bandwidth_tcp_send
+ - name: bandwidth
+ - name: app.ebpf_call_tcp_sendmsg
description: Calls for tcp_sendmsg
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.bandwidth_tcp_recv
+ - name: calls
+ - name: app.ebpf_call_tcp_cleanup_rbuf
description: Calls for tcp_cleanup_rbuf
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.bandwidth_tcp_retransmit
+ - name: calls
+ - name: app.ebpf_call_tcp_retransmit
description: Calls for tcp_retransmit
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.bandwidth_udp_send
+ - name: calls
+ - name: app.ebpf_call_udp_sendmsg
description: Calls for udp_sendmsg
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.bandwidth_udp_recv
+ - name: calls
+ - name: app.ebpf_call_udp_recvmsg
description: Calls for udp_recvmsg
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: services.net_conn_ipv4
- description: Calls to tcp_v4_connection
- unit: "connections/s"
- chart_type: stacked
- dimensions:
- - name: a dimension per systemd service
+ - name: calls
- name: cgroup
description: ""
labels: []
@@ -2005,32 +2011,34 @@ modules:
scopes:
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.dc_ratio
+ - name: app.ebpf_dc_ratio
description: Percentage of files inside directory cache
unit: "%"
chart_type: line
dimensions:
- - name: a dimension per app group
- - name: apps.dc_reference
+ - name: ratio
+ - name: app.ebpf_dc_reference
description: Count file access
unit: "files"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.dc_not_cache
+ - name: files
+ - name: app.ebpf_dc_not_cache
description: Files not present inside directory cache
unit: "files"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.dc_not_found
+ - name: files
+ - name: app.ebpf_dc_not_found
description: Files not found
unit: "files"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: files
- name: filesystem
description: "These metrics show total number of calls to functions inside kernel."
labels: []
@@ -2409,81 +2417,83 @@ modules:
labels: []
metrics:
- name: cgroup.shmget
- description: Calls to syscall shmget(2).
+ description: Calls to syscall shmget(2).
unit: "calls/s"
chart_type: line
dimensions:
- name: get
- name: cgroup.shmat
- description: Calls to syscall shmat(2).
+ description: Calls to syscall shmat(2).
unit: "calls/s"
chart_type: line
dimensions:
- name: at
- name: cgroup.shmdt
- description: Calls to syscall shmdt(2).
+ description: Calls to syscall shmdt(2).
unit: "calls/s"
chart_type: line
dimensions:
- name: dt
- name: cgroup.shmctl
- description: Calls to syscall shmctl(2).
+ description: Calls to syscall shmctl(2).
unit: "calls/s"
chart_type: line
dimensions:
- name: ctl
- name: services.shmget
- description: Calls to syscall shmget(2).
+ description: Calls to syscall shmget(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per systemd service
- name: services.shmat
- description: Calls to syscall shmat(2).
+ description: Calls to syscall shmat(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per systemd service
- name: services.shmdt
- description: Calls to syscall shmdt(2).
+ description: Calls to syscall shmdt(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per systemd service
- name: services.shmctl
- description: Calls to syscall shmctl(2).
+ description: Calls to syscall shmctl(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- name: a dimension per systemd service
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.shmget_call
- description: Calls to syscall shmget(2).
+ - name: app.ebpf_shmget_call
+ description: Calls to syscall shmget(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.shmat_call
- description: Calls to syscall shmat(2).
+ - name: calls
+ - name: app.ebpf_shmat_call
+ description: Calls to syscall shmat(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.shmdt_call
- description: Calls to syscall shmdt(2).
+ - name: calls
+ - name: app.ebpf_shmdt_call
+ description: Calls to syscall shmdt(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.shmctl_call
- description: Calls to syscall shmctl(2).
+ - name: calls
+ - name: app.ebpf_shmctl_call
+ description: Calls to syscall shmctl(2).
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: calls
- name: global
description: "These Metrics show number of calls for specified syscall."
labels: []
@@ -2898,7 +2908,7 @@ modules:
dimensions:
- name: read
- name: cgroup.vfs_fsync
- description: Calls for vfs_fsync
+ description: Calls to vfs_fsync.
unit: "calls/s"
chart_type: line
dimensions:
@@ -2910,7 +2920,7 @@ modules:
dimensions:
- name: fsync
- name: cgroup.vfs_open
- description: Calls for vfs_open
+ description: Calls to vfs_open.
unit: "calls/s"
chart_type: line
dimensions:
@@ -2922,7 +2932,7 @@ modules:
dimensions:
- name: open
- name: cgroup.vfs_create
- description: Calls for vfs_create
+ description: Calls to vfs_create.
unit: "calls/s"
chart_type: line
dimensions:
@@ -2976,7 +2986,7 @@ modules:
dimensions:
- name: a dimension per systemd service
- name: services.vfs_fsync
- description: Calls to vfs_fsync
+ description: Calls to vfs_fsync.
unit: "calls/s"
chart_type: stacked
dimensions:
@@ -2988,7 +2998,7 @@ modules:
dimensions:
- name: a dimension per systemd service
- name: services.vfs_open
- description: Calls to vfs_open
+ description: Calls to vfs_open.
unit: "calls/s"
chart_type: stacked
dimensions:
@@ -3000,7 +3010,7 @@ modules:
dimensions:
- name: a dimension per systemd service
- name: services.vfs_create
- description: Calls to vfs_create
+ description: Calls to vfs_create.
unit: "calls/s"
chart_type: stacked
dimensions:
@@ -3043,7 +3053,7 @@ modules:
- name: read
- name: write
- name: filesystem.vfs_fsync
- description: Calls for vfs_fsync
+ description: Calls to vfs_fsync.
unit: "calls/s"
chart_type: line
dimensions:
@@ -3055,7 +3065,7 @@ modules:
dimensions:
- name: fsync
- name: filesystem.vfs_open
- description: Calls for vfs_open
+ description: Calls to vfs_open.
unit: "calls/s"
chart_type: line
dimensions:
@@ -3067,7 +3077,7 @@ modules:
dimensions:
- name: open
- name: filesystem.vfs_create
- description: Calls for vfs_create
+ description: Calls to vfs_create.
unit: "calls/s"
chart_type: line
dimensions:
@@ -3080,86 +3090,88 @@ modules:
- name: create
- name: apps
description: "These Metrics show grouped information per apps group."
- labels: []
+ labels:
+ - name: app_group
+ description: The name of the group defined in the configuration.
metrics:
- - name: apps.file_deleted
+ - name: app.ebpf_call_vfs_unlink
description: Files deleted
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_write_call
+ - name: calls
+ - name: app.ebpf_call_vfs_write
description: Write to disk
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_write_error
+ - name: calls
+ - name: app.ebpf_call_vfs_write_error
description: Fails to write
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_read_call
+ - name: calls
+ - name: app.ebpf_call_vfs_read
description: Read from disk
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_read_error
+ - name: calls
+ - name: app.ebpf_call_vfs_read_error
description: Fails to read
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_write_bytes
+ - name: calls
+ - name: app.ebpf_call_vfs_write_bytes
description: Bytes written on disk
unit: "bytes/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_read_bytes
+ - name: writes
+ - name: app.ebpf_call_vfs_read_bytes
description: Bytes read on disk
unit: "bytes/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_fsync
- description: Calls for vfs_fsync
+ - name: reads
+ - name: app.ebpf_call_vfs_fsync
+ description: Calls to vfs_fsync.
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_fsync_error
+ - name: calls
+ - name: app.ebpf_call_vfs_fsync_error
description: Sync error
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_open
- description: Calls for vfs_open
+ - name: calls
+ - name: app.ebpf_call_vfs_open
+ description: Calls to vfs_open.
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_open_error
+ - name: calls
+ - name: app.ebpf_call_vfs_open_error
description: Open error
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_create
- description: Calls for vfs_create
+ - name: calls
+ - name: app.ebpf_call_vfs_create
+ description: Calls to vfs_create.
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
- - name: apps.vfs_create_error
+ - name: calls
+ - name: app.ebpf_call_vfs_create_error
description: Create error
unit: "calls/s"
chart_type: stacked
dimensions:
- - name: a dimension per app group
+ - name: calls
- meta:
plugin_name: ebpf.plugin
module_name: process
diff --git a/collectors/freebsd.plugin/freebsd_devstat.c b/collectors/freebsd.plugin/freebsd_devstat.c
index 65b8a2d5a23314..ca6048a1687fae 100644
--- a/collectors/freebsd.plugin/freebsd_devstat.c
+++ b/collectors/freebsd.plugin/freebsd_devstat.c
@@ -89,21 +89,21 @@ static size_t disks_added = 0, disks_found = 0;
static void disk_free(struct disk *dm) {
if (likely(dm->st_io))
- rrdset_is_obsolete(dm->st_io);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_io);
if (likely(dm->st_ops))
- rrdset_is_obsolete(dm->st_ops);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_ops);
if (likely(dm->st_qops))
- rrdset_is_obsolete(dm->st_qops);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_qops);
if (likely(dm->st_util))
- rrdset_is_obsolete(dm->st_util);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_util);
if (likely(dm->st_iotime))
- rrdset_is_obsolete(dm->st_iotime);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_iotime);
if (likely(dm->st_await))
- rrdset_is_obsolete(dm->st_await);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_await);
if (likely(dm->st_avagsz))
- rrdset_is_obsolete(dm->st_avagsz);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_avagsz);
if (likely(dm->st_svctm))
- rrdset_is_obsolete(dm->st_svctm);
+ rrdset_is_obsolete___safe_from_collector_thread(dm->st_svctm);
disks_added--;
freez(dm->name);
diff --git a/collectors/freebsd.plugin/freebsd_getifaddrs.c b/collectors/freebsd.plugin/freebsd_getifaddrs.c
index 80a209105fbcd6..36be684226fb4a 100644
--- a/collectors/freebsd.plugin/freebsd_getifaddrs.c
+++ b/collectors/freebsd.plugin/freebsd_getifaddrs.c
@@ -52,15 +52,15 @@ static size_t network_interfaces_added = 0, network_interfaces_found = 0;
static void network_interface_free(struct cgroup_network_interface *ifm) {
if (likely(ifm->st_bandwidth))
- rrdset_is_obsolete(ifm->st_bandwidth);
+ rrdset_is_obsolete___safe_from_collector_thread(ifm->st_bandwidth);
if (likely(ifm->st_packets))
- rrdset_is_obsolete(ifm->st_packets);
+ rrdset_is_obsolete___safe_from_collector_thread(ifm->st_packets);
if (likely(ifm->st_errors))
- rrdset_is_obsolete(ifm->st_errors);
+ rrdset_is_obsolete___safe_from_collector_thread(ifm->st_errors);
if (likely(ifm->st_drops))
- rrdset_is_obsolete(ifm->st_drops);
+ rrdset_is_obsolete___safe_from_collector_thread(ifm->st_drops);
if (likely(ifm->st_events))
- rrdset_is_obsolete(ifm->st_events);
+ rrdset_is_obsolete___safe_from_collector_thread(ifm->st_events);
network_interfaces_added--;
freez(ifm->name);
diff --git a/collectors/freebsd.plugin/freebsd_getmntinfo.c b/collectors/freebsd.plugin/freebsd_getmntinfo.c
index cc0abd9060ac86..d55eb3d4a49ce9 100644
--- a/collectors/freebsd.plugin/freebsd_getmntinfo.c
+++ b/collectors/freebsd.plugin/freebsd_getmntinfo.c
@@ -39,9 +39,9 @@ static size_t mount_points_added = 0, mount_points_found = 0;
static void mount_point_free(struct mount_point *m) {
if (likely(m->st_space))
- rrdset_is_obsolete(m->st_space);
+ rrdset_is_obsolete___safe_from_collector_thread(m->st_space);
if (likely(m->st_inodes))
- rrdset_is_obsolete(m->st_inodes);
+ rrdset_is_obsolete___safe_from_collector_thread(m->st_inodes);
mount_points_added--;
freez(m->name);
@@ -216,7 +216,7 @@ int do_getmntinfo(int update_every, usec_t dt) {
(mntbuf[i].f_blocks > 2 ||
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
if (unlikely(!m->st_space)) {
- snprintfz(title, 4096, "Disk Space Usage for %s [%s]",
+ snprintfz(title, sizeof(title) - 1, "Disk Space Usage for %s [%s]",
mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname);
m->st_space = rrdset_create_localhost("disk_space",
mntbuf[i].f_mntonname,
@@ -254,7 +254,7 @@ int do_getmntinfo(int update_every, usec_t dt) {
(mntbuf[i].f_files > 1 ||
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
if (unlikely(!m->st_inodes)) {
- snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]",
+ snprintfz(title, sizeof(title) - 1, "Disk Files (inodes) Usage for %s [%s]",
mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname);
m->st_inodes = rrdset_create_localhost("disk_inodes",
mntbuf[i].f_mntonname,
diff --git a/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md b/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md
new file mode 100644
index 00000000000000..5f18661d0188c9
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/dev.cpu.0.freq.md
@@ -0,0 +1,111 @@
+
+
+# dev.cpu.0.freq
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: dev.cpu.0.freq
+
+
+
+## Overview
+
+Read current CPU Scaling frequency.
+
+Current CPU Scaling Frequency
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per dev.cpu.0.freq instance
+
+The metric shows status of CPU frequency, it is direct affected by system load.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.scaling_cur_freq | frequency | MHz |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `Config options`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config Config options
+```
+#### Options
+
+
+
+
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| dev.cpu.0.freq | Enable or disable CPU Scaling frequency metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md b/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md
new file mode 100644
index 00000000000000..a3736f771a5d93
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/dev.cpu.temperature.md
@@ -0,0 +1,120 @@
+
+
+# dev.cpu.temperature
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: dev.cpu.temperature
+
+
+
+## Overview
+
+Get current CPU temperature
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per dev.cpu.temperature instance
+
+This metric show latest CPU temperature.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.temperature | a dimension per core | Celsius |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| dev.cpu.temperature | Enable or disable CPU temperature metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/devstat.md b/collectors/freebsd.plugin/integrations/devstat.md
new file mode 100644
index 00000000000000..9d9c6400b0c5c3
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/devstat.md
@@ -0,0 +1,155 @@
+
+
+# devstat
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: devstat
+
+
+
+## Overview
+
+Collect information per hard disk available on host.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per devstat instance
+
+These metrics give a general vision about I/O events on disks.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.io | io, out | KiB/s |
+
+### Per disk
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.io | reads, writes, frees | KiB/s |
+| disk.ops | reads, writes, other, frees | operations/s |
+| disk.qops | operations | operations |
+| disk.util | utilization | % of time working |
+| disk.iotime | reads, writes, other, frees | milliseconds/s |
+| disk.await | reads, writes, other, frees | milliseconds/operation |
+| disk.avgsz | reads, writes, frees | KiB/operation |
+| disk.svctm | svctm | milliseconds/operation |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 10min_disk_utilization ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.util | average percentage of time ${label:device} disk was busy over the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:kern.devstat]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| enable new disks detected at runtime | Enable or disable possibility to detect new disks. | auto | no |
+| performance metrics for pass devices | Enable or disable metrics for disks with type `PASS`. | auto | no |
+| total bandwidth for all disks | Enable or disable total bandwidth metric for all disks. | yes | no |
+| bandwidth for all disks | Enable or disable bandwidth for all disks metric. | auto | no |
+| operations for all disks | Enable or disable operations for all disks metric. | auto | no |
+| queued operations for all disks | Enable or disable queued operations for all disks metric. | auto | no |
+| utilization percentage for all disks | Enable or disable utilization percentage for all disks metric. | auto | no |
+| i/o time for all disks | Enable or disable I/O time for all disks metric. | auto | no |
+| average completed i/o time for all disks | Enable or disable average completed I/O time for all disks metric. | auto | no |
+| average completed i/o bandwidth for all disks | Enable or disable average completed I/O bandwidth for all disks metric. | auto | no |
+| average service time for all disks | Enable or disable average service time for all disks metric. | auto | no |
+| disable by default disks matching | Do not create charts for disks listed. | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/getifaddrs.md b/collectors/freebsd.plugin/integrations/getifaddrs.md
new file mode 100644
index 00000000000000..63c4ce136c45c4
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/getifaddrs.md
@@ -0,0 +1,161 @@
+
+
+# getifaddrs
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: getifaddrs
+
+
+
+## Overview
+
+Collect traffic per network interface.
+
+The plugin calls `getifaddrs` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per getifaddrs instance
+
+General overview about network traffic.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.net | received, sent | kilobits/s |
+| system.packets | received, sent, multicast_received, multicast_sent | packets/s |
+| system.ipv4 | received, sent | kilobits/s |
+| system.ipv6 | received, sent | kilobits/s |
+
+### Per network device
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| net.net | received, sent | kilobits/s |
+| net.packets | received, sent, multicast_received, multicast_sent | packets/s |
+| net.errors | inbound, outbound | errors/s |
+| net.drops | inbound, outbound | drops/s |
+| net.events | collisions | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ interface_speed ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.net | network interface ${label:device} current speed |
+| [ inbound_packets_dropped_ratio ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.drops | ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes |
+| [ outbound_packets_dropped_ratio ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.drops | ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes |
+| [ 1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ 10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+| [ interface_inbound_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.errors | number of inbound errors for the network interface ${label:device} in the last 10 minutes |
+| [ interface_outbound_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.errors | number of outbound errors for the network interface ${label:device} in the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:getifaddrs]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| enable new interfaces detected at runtime | Enable or disable possibility to discover new interface after plugin starts. | auto | no |
+| total bandwidth for physical interfaces | Enable or disable total bandwidth for physical interfaces metric. | auto | no |
+| total packets for physical interfaces | Enable or disable total packets for physical interfaces metric. | auto | no |
+| total bandwidth for ipv4 interface | Enable or disable total bandwidth for IPv4 interface metric. | auto | no |
+| total bandwidth for ipv6 interfaces | Enable or disable total bandwidth for ipv6 interfaces metric. | auto | no |
+| bandwidth for all interfaces | Enable or disable bandwidth for all interfaces metric. | auto | no |
+| packets for all interfaces | Enable or disable packets for all interfaces metric. | auto | no |
+| errors for all interfaces | Enable or disable errors for all interfaces metric. | auto | no |
+| drops for all interfaces | Enable or disable drops for all interfaces metric. | auto | no |
+| collisions for all interface | Enable or disable collisions for all interface metric. | auto | no |
+| disable by default interfaces matching | Do not display data for intterfaces listed. | lo* | no |
+| set physical interfaces for system.net | Do not show network traffic for listed interfaces. | igb* ix* cxl* em* ixl* ixlv* bge* ixgbe* vtnet* vmx* re* igc* dwc* | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/getmntinfo.md b/collectors/freebsd.plugin/integrations/getmntinfo.md
new file mode 100644
index 00000000000000..d26ad1c0334815
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/getmntinfo.md
@@ -0,0 +1,131 @@
+
+
+# getmntinfo
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: getmntinfo
+
+
+
+## Overview
+
+Collect information per mount point.
+
+The plugin calls `getmntinfo` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per mount point
+
+These metrics show detailss about mount point usages.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.space | avail, used, reserved_for_root | GiB |
+| disk.inodes | avail, used, reserved_for_root | inodes |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ disk_space_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.space | disk ${label:mount_point} space utilization |
+| [ disk_inode_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.inodes | disk ${label:mount_point} inode utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:getmntinfo]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| enable new mount points detected at runtime | Cheeck new mount points during runtime. | auto | no |
+| space usage for all disks | Enable or disable space usage for all disks metric. | auto | no |
+| inodes usage for all disks | Enable or disable inodes usage for all disks metric. | auto | no |
+| exclude space metrics on paths | Do not show metrics for listed paths. | /proc/* | no |
+| exclude space metrics on filesystems | Do not monitor listed filesystems. | autofs procfs subfs devfs none | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/hw.intrcnt.md b/collectors/freebsd.plugin/integrations/hw.intrcnt.md
new file mode 100644
index 00000000000000..49164c369d1bdf
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/hw.intrcnt.md
@@ -0,0 +1,121 @@
+
+
+# hw.intrcnt
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: hw.intrcnt
+
+
+
+## Overview
+
+Get total number of interrupts
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per hw.intrcnt instance
+
+These metrics show system interrupts frequency.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.intr | interrupts | interrupts/s |
+| system.interrupts | a dimension per interrupt | interrupts/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config option
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| hw.intrcnt | Enable or disable Interrupts metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/ipfw.md b/collectors/freebsd.plugin/integrations/ipfw.md
new file mode 100644
index 00000000000000..84e023bdfebd29
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/ipfw.md
@@ -0,0 +1,126 @@
+
+
+# ipfw
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: ipfw
+
+
+
+## Overview
+
+Collect information about FreeBSD firewall.
+
+The plugin uses RAW socket to communicate with kernel and collect data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per ipfw instance
+
+Theese metrics show FreeBSD firewall statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipfw.mem | dynamic, static | bytes |
+| ipfw.packets | a dimension per static rule | packets/s |
+| ipfw.bytes | a dimension per static rule | bytes/s |
+| ipfw.active | a dimension per dynamic rule | rules |
+| ipfw.expired | a dimension per dynamic rule | rules |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:ipfw]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| counters for static rules | Enable or disable counters for static rules metric. | yes | no |
+| number of dynamic rules | Enable or disable number of dynamic rules metric. | yes | no |
+| allocated memory | Enable or disable allocated memory metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/kern.cp_time.md b/collectors/freebsd.plugin/integrations/kern.cp_time.md
new file mode 100644
index 00000000000000..95bdb8d90b497e
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/kern.cp_time.md
@@ -0,0 +1,139 @@
+
+
+# kern.cp_time
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: kern.cp_time
+
+
+
+## Overview
+
+Total CPU utilization
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per kern.cp_time instance
+
+These metrics show CPU usage statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.cpu | nice, system, user, interrupt, idle | percentage |
+
+### Per core
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.cpu | nice, system, user, interrupt, idle | percentage |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU utilization over the last 10 minutes (excluding iowait, nice and steal) |
+| [ 10min_cpu_iowait ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU iowait time over the last 10 minutes |
+| [ 20min_steal_cpu ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU steal time over the last 20 minutes |
+| [ 10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU utilization over the last 10 minutes (excluding nice) |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+The netdata main configuration file.
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| kern.cp_time | Enable or disable Total CPU usage. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/kern.ipc.msq.md b/collectors/freebsd.plugin/integrations/kern.ipc.msq.md
new file mode 100644
index 00000000000000..e7457e0c1e23be
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/kern.ipc.msq.md
@@ -0,0 +1,122 @@
+
+
+# kern.ipc.msq
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: kern.ipc.msq
+
+
+
+## Overview
+
+Collect number of IPC message Queues
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per kern.ipc.msq instance
+
+These metrics show statistics IPC messages statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ipc_msq_queues | queues | queues |
+| system.ipc_msq_messages | messages | messages |
+| system.ipc_msq_size | allocated, used | bytes |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| kern.ipc.msq | Enable or disable IPC message queue metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/kern.ipc.sem.md b/collectors/freebsd.plugin/integrations/kern.ipc.sem.md
new file mode 100644
index 00000000000000..7bf7235e68ed90
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/kern.ipc.sem.md
@@ -0,0 +1,127 @@
+
+
+# kern.ipc.sem
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: kern.ipc.sem
+
+
+
+## Overview
+
+Collect information about semaphore.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per kern.ipc.sem instance
+
+These metrics shows counters for semaphores on host.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ipc_semaphores | semaphores | semaphores |
+| system.ipc_semaphore_arrays | arrays | arrays |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ semaphores_used ](https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf) | system.ipc_semaphores | IPC semaphore utilization |
+| [ semaphore_arrays_used ](https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf) | system.ipc_semaphore_arrays | IPC semaphore arrays utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| kern.ipc.sem | Enable or disable semaphore metrics. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/kern.ipc.shm.md b/collectors/freebsd.plugin/integrations/kern.ipc.shm.md
new file mode 100644
index 00000000000000..1f10c1e6e5dfa1
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/kern.ipc.shm.md
@@ -0,0 +1,121 @@
+
+
+# kern.ipc.shm
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: kern.ipc.shm
+
+
+
+## Overview
+
+Collect shared memory information.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per kern.ipc.shm instance
+
+These metrics give status about current shared memory segments.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ipc_shared_mem_segs | segments | segments |
+| system.ipc_shared_mem_size | allocated | KiB |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| kern.ipc.shm | Enable or disable shared memory metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md b/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md
new file mode 100644
index 00000000000000..29562bc9a3c374
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet.icmp.stats.md
@@ -0,0 +1,124 @@
+
+
+# net.inet.icmp.stats
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet.icmp.stats
+
+
+
+## Overview
+
+Collect information about ICMP traffic.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet.icmp.stats instance
+
+These metrics show ICMP connections statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv4.icmp | received, sent | packets/s |
+| ipv4.icmp_errors | InErrors, OutErrors, InCsumErrors | packets/s |
+| ipv4.icmpmsg | InEchoReps, OutEchoReps, InEchos, OutEchos | packets/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.inet.icmp.stats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| IPv4 ICMP packets | Enable or disable IPv4 ICMP packets metric. | yes | no |
+| IPv4 ICMP error | Enable or disable IPv4 ICMP error metric. | yes | no |
+| IPv4 ICMP messages | Enable or disable IPv4 ICMP messages metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md b/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md
new file mode 100644
index 00000000000000..785767e8916d4c
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet.ip.stats.md
@@ -0,0 +1,126 @@
+
+
+# net.inet.ip.stats
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet.ip.stats
+
+
+
+## Overview
+
+Collect IP stats
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet.ip.stats instance
+
+These metrics show IPv4 connections statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv4.packets | received, sent, forwarded, delivered | packets/s |
+| ipv4.fragsout | ok, failed, created | packets/s |
+| ipv4.fragsin | ok, failed, all | packets/s |
+| ipv4.errors | InDiscards, OutDiscards, InHdrErrors, OutNoRoutes, InAddrErrors, InUnknownProtos | packets/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.inet.ip.stats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| ipv4 packets | Enable or disable IPv4 packets metric. | yes | no |
+| ipv4 fragments sent | Enable or disable IPv4 fragments sent metric. | yes | no |
+| ipv4 fragments assembly | Enable or disable IPv4 fragments assembly metric. | yes | no |
+| ipv4 errors | Enable or disable IPv4 errors metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md b/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md
new file mode 100644
index 00000000000000..5b414458025921
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet.tcp.states.md
@@ -0,0 +1,125 @@
+
+
+# net.inet.tcp.states
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet.tcp.states
+
+
+
+## Overview
+
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet.tcp.states instance
+
+A counter for TCP connections.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv4.tcpsock | connections | active connections |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ tcp_connections ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf) | ipv4.tcpsock | IPv4 TCP connections utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| net.inet.tcp.states | Enable or disable TCP state metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md b/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md
new file mode 100644
index 00000000000000..be779740db477d
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet.tcp.stats.md
@@ -0,0 +1,142 @@
+
+
+# net.inet.tcp.stats
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet.tcp.stats
+
+
+
+## Overview
+
+Collect overall information about TCP connections.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet.tcp.stats instance
+
+These metrics show TCP connections statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv4.tcppackets | received, sent | packets/s |
+| ipv4.tcperrors | InErrs, InCsumErrors, RetransSegs | packets/s |
+| ipv4.tcphandshake | EstabResets, ActiveOpens, PassiveOpens, AttemptFails | events/s |
+| ipv4.tcpconnaborts | baddata, userclosed, nomemory, timeout, linger | connections/s |
+| ipv4.tcpofo | inqueue | packets/s |
+| ipv4.tcpsyncookies | received, sent, failed | packets/s |
+| ipv4.tcplistenissues | overflows | packets/s |
+| ipv4.ecnpkts | InCEPkts, InECT0Pkts, InECT1Pkts, OutECT0Pkts, OutECT1Pkts | packets/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 1m_ipv4_tcp_resets_sent ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ipv4.tcphandshake | average number of sent TCP RESETS over the last minute |
+| [ 10s_ipv4_tcp_resets_sent ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ipv4.tcphandshake | average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. |
+| [ 1m_ipv4_tcp_resets_received ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ipv4.tcphandshake | average number of received TCP RESETS over the last minute |
+| [ 10s_ipv4_tcp_resets_received ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ipv4.tcphandshake | average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.inet.tcp.stats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| ipv4 TCP packets | Enable or disable ipv4 TCP packets metric. | yes | no |
+| ipv4 TCP errors | Enable or disable pv4 TCP errors metric. | yes | no |
+| ipv4 TCP handshake issues | Enable or disable ipv4 TCP handshake issue metric. | yes | no |
+| TCP connection aborts | Enable or disable TCP connection aborts metric. | auto | no |
+| TCP out-of-order queue | Enable or disable TCP out-of-order queue metric. | auto | no |
+| TCP SYN cookies | Enable or disable TCP SYN cookies metric. | auto | no |
+| TCP listen issues | Enable or disable TCP listen issues metric. | auto | no |
+| ECN packets | Enable or disable ECN packets metric. | auto | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md b/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md
new file mode 100644
index 00000000000000..d3da4045562098
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet.udp.stats.md
@@ -0,0 +1,128 @@
+
+
+# net.inet.udp.stats
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet.udp.stats
+
+
+
+## Overview
+
+Collect information about UDP connections.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet.udp.stats instance
+
+These metrics show UDP connections statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv4.udppackets | received, sent | packets/s |
+| ipv4.udperrors | InErrors, NoPorts, RcvbufErrors, InCsumErrors, IgnoredMulti | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 1m_ipv4_udp_receive_buffer_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf) | ipv4.udperrors | average number of UDP receive buffer errors over the last minute |
+| [ 1m_ipv4_udp_send_buffer_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf) | ipv4.udperrors | average number of UDP send buffer errors over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.inet.udp.stats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| ipv4 UDP packets | Enable or disable ipv4 UDP packets metric. | yes | no |
+| ipv4 UDP errors | Enable or disable ipv4 UDP errors metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md b/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md
new file mode 100644
index 00000000000000..7344b79b3b3ee1
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet6.icmp6.stats.md
@@ -0,0 +1,132 @@
+
+
+# net.inet6.icmp6.stats
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet6.icmp6.stats
+
+
+
+## Overview
+
+Collect information abou IPv6 ICMP
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet6.icmp6.stats instance
+
+Collect IPv6 ICMP traffic statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv6.icmp | received, sent | messages/s |
+| ipv6.icmpredir | received, sent | redirects/s |
+| ipv6.icmperrors | InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutTimeExcds, OutParmProblems | errors/s |
+| ipv6.icmpechos | InEchos, OutEchos, InEchoReplies, OutEchoReplies | messages/s |
+| ipv6.icmprouter | InSolicits, OutSolicits, InAdvertisements, OutAdvertisements | messages/s |
+| ipv6.icmpneighbor | InSolicits, OutSolicits, InAdvertisements, OutAdvertisements | messages/s |
+| ipv6.icmptypes | InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143 | messages/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.inet6.icmp6.stats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| icmp | Enable or disable ICMP metric. | auto | no |
+| icmp redirects | Enable or disable ICMP redirects metric. | auto | no |
+| icmp errors | Enable or disable ICMP errors metric. | auto | no |
+| icmp echos | Enable or disable ICMP echos metric. | auto | no |
+| icmp router | Enable or disable ICMP router metric. | auto | no |
+| icmp neighbor | Enable or disable ICMP neighbor metric. | auto | no |
+| icmp types | Enable or disable ICMP types metric. | auto | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md b/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md
new file mode 100644
index 00000000000000..d9128b529729a5
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.inet6.ip6.stats.md
@@ -0,0 +1,126 @@
+
+
+# net.inet6.ip6.stats
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.inet6.ip6.stats
+
+
+
+## Overview
+
+Collect information abou IPv6 stats.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.inet6.ip6.stats instance
+
+These metrics show general information about IPv6 connections.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv6.packets | received, sent, forwarded, delivers | packets/s |
+| ipv6.fragsout | ok, failed, all | packets/s |
+| ipv6.fragsin | ok, failed, timeout, all | packets/s |
+| ipv6.errors | InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes | packets/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.inet6.ip6.stats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| ipv6 packets | Enable or disable ipv6 packet metric. | auto | no |
+| ipv6 fragments sent | Enable or disable ipv6 fragments sent metric. | auto | no |
+| ipv6 fragments assembly | Enable or disable ipv6 fragments assembly metric. | auto | no |
+| ipv6 errors | Enable or disable ipv6 errors metric. | auto | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/net.isr.md b/collectors/freebsd.plugin/integrations/net.isr.md
new file mode 100644
index 00000000000000..2d75b825a2fb52
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/net.isr.md
@@ -0,0 +1,140 @@
+
+
+# net.isr
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: net.isr
+
+
+
+## Overview
+
+Collect information about system softnet stat.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per net.isr instance
+
+These metrics show statistics about softnet stats.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.softnet_stat | dispatched, hybrid_dispatched, qdrops, queued | events/s |
+
+### Per core
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.softnet_stat | dispatched, hybrid_dispatched, qdrops, queued | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 1min_netdev_backlog_exceeded ](https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf) | system.softnet_stat | average number of dropped packets in the last minute due to exceeded net.core.netdev_max_backlog |
+| [ 1min_netdev_budget_ran_outs ](https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf) | system.softnet_stat | average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets) |
+| [ 10min_netisr_backlog_exceeded ](https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf) | system.softnet_stat | average number of drops in the last minute due to exceeded sysctl net.route.netisr_maxqlen (this can be a cause for dropped packets) |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:net.isr]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| netisr | Enable or disable general vision about softnet stat metrics. | yes | no |
+| netisr per core | Enable or disable softnet stat metric per core. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/system.ram.md b/collectors/freebsd.plugin/integrations/system.ram.md
new file mode 100644
index 00000000000000..7d49749224bcf0
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/system.ram.md
@@ -0,0 +1,129 @@
+
+
+# system.ram
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: system.ram
+
+
+
+## Overview
+
+Show information about system memory usage.
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per system.ram instance
+
+This metric shows RAM usage statistics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ram | free, active, inactive, wired, cache, laundry, buffers | MiB |
+| mem.available | avail | MiB |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | system.ram | system memory utilization |
+| [ ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | system.ram | system memory utilization |
+| [ ram_available ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | mem.available | percentage of estimated amount of RAM available for userspace processes, without causing swapping |
+| [ ram_available ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | mem.available | percentage of estimated amount of RAM available for userspace processes, without causing swapping |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| system.ram | Enable or disable system RAM metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/uptime.md b/collectors/freebsd.plugin/integrations/uptime.md
new file mode 100644
index 00000000000000..e3f1db3f1d9dbf
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/uptime.md
@@ -0,0 +1,120 @@
+
+
+# uptime
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: uptime
+
+
+
+## Overview
+
+Show period of time server is up.
+
+The plugin calls `clock_gettime` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per uptime instance
+
+How long the system is running.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.uptime | uptime | seconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.loadavg | Enable or disable load average metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.loadavg.md b/collectors/freebsd.plugin/integrations/vm.loadavg.md
new file mode 100644
index 00000000000000..88c47b7a4b893c
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.loadavg.md
@@ -0,0 +1,128 @@
+
+
+# vm.loadavg
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.loadavg
+
+
+
+## Overview
+
+System Load Average
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.loadavg instance
+
+Monitoring for number of threads running or waiting.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.load | load1, load5, load15 | load |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ load_cpu_number ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | number of active CPU cores in the system |
+| [ load_average_15 ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | system fifteen-minute load average |
+| [ load_average_5 ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | system five-minute load average |
+| [ load_average_1 ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | system one-minute load average |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.loadavg | Enable or disable load average metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md
new file mode 100644
index 00000000000000..c3e7466e91b2c3
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_intr.md
@@ -0,0 +1,120 @@
+
+
+# vm.stats.sys.v_intr
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.stats.sys.v_intr
+
+
+
+## Overview
+
+Device interrupts
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.stats.sys.v_intr instance
+
+The metric show device interrupt frequency.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.dev_intr | interrupts | interrupts/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config option
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.stats.sys.v_intr | Enable or disable device interrupts metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md
new file mode 100644
index 00000000000000..ce914bb50c2b7b
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_soft.md
@@ -0,0 +1,120 @@
+
+
+# vm.stats.sys.v_soft
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.stats.sys.v_soft
+
+
+
+## Overview
+
+Software Interrupt
+
+vm.stats.sys.v_soft
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.stats.sys.v_soft instance
+
+This metric shows software interrupt frequency.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.soft_intr | interrupts | interrupts/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config option
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.stats.sys.v_soft | Enable or disable software inerrupts metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md
new file mode 100644
index 00000000000000..cbcee311ff8540
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.stats.sys.v_swtch.md
@@ -0,0 +1,121 @@
+
+
+# vm.stats.sys.v_swtch
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.stats.sys.v_swtch
+
+
+
+## Overview
+
+CPU context switch
+
+The plugin calls `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.stats.sys.v_swtch instance
+
+The metric count the number of context switches happening on host.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ctxt | switches | context switches/s |
+| system.forks | started | processes/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.stats.sys.v_swtch | Enable or disable CPU context switch metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md
new file mode 100644
index 00000000000000..19230dd56cc8d0
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_pgfaults.md
@@ -0,0 +1,120 @@
+
+
+# vm.stats.vm.v_pgfaults
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.stats.vm.v_pgfaults
+
+
+
+## Overview
+
+Collect memory page faults events.
+
+The plugin calls `sysctl` function to collect necessary data
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.stats.vm.v_pgfaults instance
+
+The number of page faults happened on host.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.pgfaults | memory, io_requiring, cow, cow_optimized, in_transit | page faults/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.stats.vm.v_pgfaults | Enable or disable Memory page fault metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md
new file mode 100644
index 00000000000000..c6caaa68205add
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.stats.vm.v_swappgs.md
@@ -0,0 +1,125 @@
+
+
+# vm.stats.vm.v_swappgs
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.stats.vm.v_swappgs
+
+
+
+## Overview
+
+The metric swap amount of data read from and written to SWAP.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.stats.vm.v_swappgs instance
+
+This metric shows events happening on SWAP.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.swapio | io, out | KiB/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 30min_ram_swapped_out ](https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf) | mem.swapio | percentage of the system RAM swapped in the last 30 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.stats.vm.v_swappgs | Enable or disable infoormation about SWAP I/O metric. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.swap_info.md b/collectors/freebsd.plugin/integrations/vm.swap_info.md
new file mode 100644
index 00000000000000..caa22b3dcd3a91
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.swap_info.md
@@ -0,0 +1,125 @@
+
+
+# vm.swap_info
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.swap_info
+
+
+
+## Overview
+
+Collect information about SWAP memory.
+
+The plugin calls `sysctlnametomib` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.swap_info instance
+
+This metric shows the SWAP usage.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.swap | free, used | MiB |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ used_swap ](https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf) | mem.swap | swap memory utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| vm.swap_info | Enable or disable SWAP metrics. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/vm.vmtotal.md b/collectors/freebsd.plugin/integrations/vm.vmtotal.md
new file mode 100644
index 00000000000000..f3f631af69ba6a
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/vm.vmtotal.md
@@ -0,0 +1,129 @@
+
+
+# vm.vmtotal
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: vm.vmtotal
+
+
+
+## Overview
+
+Collect Virtual Memory information from host.
+
+The plugin calls function `sysctl` to collect data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per vm.vmtotal instance
+
+These metrics show an overall vision about processes running.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.active_processes | active | processes |
+| system.processes | running, blocked | processes |
+| mem.real | used | MiB |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ active_processes ](https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf) | system.active_processes | system process IDs (PID) space utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:vm.vmtotal]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config Options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| enable total processes | Number of active processes. | yes | no |
+| processes running | Show number of processes running or blocked. | yes | no |
+| real memory | Memeory used on host. | yes | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/integrations/zfs.md b/collectors/freebsd.plugin/integrations/zfs.md
new file mode 100644
index 00000000000000..99f10026d302f2
--- /dev/null
+++ b/collectors/freebsd.plugin/integrations/zfs.md
@@ -0,0 +1,152 @@
+
+
+# zfs
+
+
+
+
+
+Plugin: freebsd.plugin
+Module: zfs
+
+
+
+## Overview
+
+Collect metrics for ZFS filesystem
+
+The plugin uses `sysctl` function to collect necessary data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per zfs instance
+
+These metrics show detailed information about ZFS filesystem.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| zfs.arc_size | arcsz, target, min, max | MiB |
+| zfs.l2_size | actual, size | MiB |
+| zfs.reads | arc, demand, prefetch, metadata, l2 | reads/s |
+| zfs.bytes | read, write | KiB/s |
+| zfs.hits | hits, misses | percentage |
+| zfs.hits_rate | hits, misses | events/s |
+| zfs.dhits | hits, misses | percentage |
+| zfs.dhits_rate | hits, misses | events/s |
+| zfs.phits | hits, misses | percentage |
+| zfs.phits_rate | hits, misses | events/s |
+| zfs.mhits | hits, misses | percentage |
+| zfs.mhits_rate | hits, misses | events/s |
+| zfs.l2hits | hits, misses | percentage |
+| zfs.l2hits_rate | hits, misses | events/s |
+| zfs.list_hits | mfu, mfu_ghost, mru, mru_ghost | hits/s |
+| zfs.arc_size_breakdown | recent, frequent | percentage |
+| zfs.memory_ops | throttled | operations/s |
+| zfs.important_ops | evict_skip, deleted, mutex_miss, hash_collisions | operations/s |
+| zfs.actual_hits | hits, misses | percentage |
+| zfs.actual_hits_rate | hits, misses | events/s |
+| zfs.demand_data_hits | hits, misses | percentage |
+| zfs.demand_data_hits_rate | hits, misses | events/s |
+| zfs.prefetch_data_hits | hits, misses | percentage |
+| zfs.prefetch_data_hits_rate | hits, misses | events/s |
+| zfs.hash_elements | current, max | elements |
+| zfs.hash_chains | current, max | chains |
+| zfs.trim_bytes | TRIMmed | bytes |
+| zfs.trim_requests | successful, failed, unsupported | requests |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ zfs_memory_throttle ](https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf) | zfs.memory_ops | number of times ZFS had to limit the ARC growth in the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freebsd:zfs_arcstats]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| show zero charts | Do not show charts with zero metrics. | no | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/freebsd.plugin/metadata.yaml b/collectors/freebsd.plugin/metadata.yaml
index fca8982f7bbfef..36fba2430b3396 100644
--- a/collectors/freebsd.plugin/metadata.yaml
+++ b/collectors/freebsd.plugin/metadata.yaml
@@ -323,7 +323,7 @@ modules:
link: "https://www.freebsd.org/"
categories:
- data-collection.freebsd
- icon_filename: "freebsd.org"
+ icon_filename: "freebsd.svg"
related_resources:
integrations:
list: []
@@ -2893,36 +2893,16 @@ modules:
metric: net.net
info: network interface ${label:device} current speed
os: "*"
- - name: 1m_received_traffic_overflow
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.net
- info: average inbound utilization for the network interface ${label:device} over the last minute
- os: "linux"
- - name: 1m_sent_traffic_overflow
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.net
- info: average outbound utilization for the network interface ${label:device} over the last minute
- os: "linux"
- name: inbound_packets_dropped_ratio
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
+ metric: net.drops
info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
- os: "linux"
+ os: "*"
- name: outbound_packets_dropped_ratio
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
- info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
- os: "linux"
- - name: wifi_inbound_packets_dropped_ratio
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
- info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
- os: "linux"
- - name: wifi_outbound_packets_dropped_ratio
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
+ metric: net.drops
info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
- os: "linux"
+ os: "*"
- name: 1m_received_packets_rate
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.packets
@@ -2931,9 +2911,7 @@ modules:
- name: 10s_received_packets_storm
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.packets
- info:
- ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over
- the last minute
+ info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute
os: "linux freebsd"
- name: interface_inbound_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
@@ -2945,16 +2923,6 @@ modules:
metric: net.errors
info: number of outbound errors for the network interface ${label:device} in the last 10 minutes
os: "freebsd"
- - name: inbound_packets_dropped
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.drops
- info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes
- os: "linux"
- - name: outbound_packets_dropped
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.drops
- info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes
- os: "linux"
metrics:
folding:
title: Metrics
diff --git a/collectors/freeipmi.plugin/README.md b/collectors/freeipmi.plugin/README.md
deleted file mode 100644
index 5a9fd93c030d01..00000000000000
--- a/collectors/freeipmi.plugin/README.md
+++ /dev/null
@@ -1,287 +0,0 @@
-
-
-# freeipmi.plugin
-
-Netdata has a [freeipmi](https://www.gnu.org/software/freeipmi/) plugin.
-
-> FreeIPMI provides in-band and out-of-band IPMI software based on the IPMI v1.5/2.0 specification. The IPMI
-> specification defines a set of interfaces for platform management and is implemented by a number vendors for system
-> management. The features of IPMI that most users will be interested in are sensor monitoring, system event monitoring,
-> power control, and serial-over-LAN (SOL).
-
-## Installing the FreeIPMI plugin
-
-When using our official DEB/RPM packages, the FreeIPMI plugin is included in a separate package named
-`netdata-plugin-freeipmi` which needs to be manually installed using your system package manager. It is not
-installed automatically due to the large number of dependencies it requires.
-
-When using a static build of Netdata, the FreeIPMI plugin will be included and installed automatically, though
-you will still need to have FreeIPMI installed on your system to be able to use the plugin.
-
-When using a local build of Netdata, you need to ensure that the FreeIPMI development packages (typically
-called `libipmimonitoring-dev`, `libipmimonitoring-devel`, or `freeipmi-devel`) are installed when building Netdata.
-
-### Special Considerations
-
-Accessing IPMI requires root access, so the FreeIPMI plugin is automatically installed setuid root.
-
-FreeIPMI does not work correctly on IBM POWER systems, thus Netdata’s FreeIPMI plugin is not usable on such systems.
-
-If you have not previously used IPMI on your system, you will probably need to run the `ipmimonitoring` command as root
-to initiailze IPMI settings so that the Netdata plugin works correctly. It should return information about available
-seensors on the system.
-
-In some distributions `libipmimonitoring.pc` is located in a non-standard directory, which
-can cause building the plugin to fail when building Netdata from source. In that case you
-should find the file and link it to the standard pkg-config directory. Usually, running `sudo ln -s
-/usr/lib/$(uname -m)-linux-gnu/pkgconfig/libipmimonitoring.pc/libipmimonitoring.pc /usr/lib/pkgconfig/libipmimonitoring.pc`
-resolves this issue.
-
-## Metrics
-
-The plugin does a speed test when it starts, to find out the duration needed by the IPMI processor to respond. Depending
-on the speed of your IPMI processor, charts may need several seconds to show up on the dashboard.
-
-Metrics grouped by *scope*.
-
-The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
-
-### global
-
-These metrics refer to the monitored host.
-
-This scope has no labels.
-
-Metrics:
-
-| Metric | Dimensions | Unit |
-|----------|:----------:|:------:|
-| ipmi.sel | events | events |
-
-### sensor
-
-These metrics refer to the sensor.
-
-Labels:
-
-| Label | Description |
-|-----------|-----------------------------------------------------------------------------------------------------------------|
-| sensor | Sensor name. Same value as the "Name" column in the `ipmi-sensors` output. |
-| type | Sensor type. Same value as the "Type" column in the `ipmi-sensors` output. |
-| component | General sensor component. Identified by Netdata based on sensor name and type (e.g. System, Processor, Memory). |
-
-Metrics:
-
-| Metric | Dimensions | Unit |
-|-----------------------------|:-----------------------------------:|:----------:|
-| ipmi.sensor_state | nominal, critical, warning, unknown | state |
-| ipmi.sensor_temperature_c | temperature | Celsius |
-| ipmi.sensor_temperature_f | temperature | Fahrenheit |
-| ipmi.sensor_voltage | voltage | Volts |
-| ipmi.sensor_ampere | ampere | Amps |
-| ipmi.sensor_fan_speed | rotations | RPM |
-| ipmi.sensor_power | power | Watts |
-| ipmi.sensor_reading_percent | percentage | % |
-
-## Alarms
-
-There are 2 alarms:
-
-- The sensor is in a warning or critical state.
-- System Event Log (SEL) is non-empty.
-
-## Configuration
-
-The plugin supports a few options. To see them, run:
-
-```text
-# ./freeipmi.plugin --help
-
- netdata freeipmi.plugin v1.40.0-137-gf162c25bd
- Copyright (C) 2023 Netdata Inc.
- Released under GNU General Public License v3 or later.
- All rights reserved.
-
- This program is a data collector plugin for netdata.
-
- Available command line options:
-
- SECONDS data collection frequency
- minimum: 5
-
- debug enable verbose output
- default: disabled
-
- sel
- no-sel enable/disable SEL collection
- default: enabled
-
- reread-sdr-cache re-read SDR cache on every iteration
- default: disabled
-
- interpret-oem-data attempt to parse OEM data
- default: disabled
-
- assume-system-event-record
- tread illegal SEL events records as normal
- default: disabled
-
- ignore-non-interpretable-sensors
- do not read sensors that cannot be interpreted
- default: disabled
-
- bridge-sensors bridge sensors not owned by the BMC
- default: disabled
-
- shared-sensors enable shared sensors, if found
- default: disabled
-
- no-discrete-reading do not read sensors that their event/reading type code is invalid
- default: enabled
-
- ignore-scanning-disabled
- Ignore the scanning bit and read sensors no matter what
- default: disabled
-
- assume-bmc-owner assume the BMC is the sensor owner no matter what
- (usually bridging is required too)
- default: disabled
-
- hostname HOST
- username USER
- password PASS connect to remote IPMI host
- default: local IPMI processor
-
- no-auth-code-check
- noauthcodecheck don't check the authentication codes returned
-
- driver-type IPMIDRIVER
- Specify the driver type to use instead of doing an auto selection.
- The currently available outofband drivers are LAN and LAN_2_0,
- which perform IPMI 1.5 and IPMI 2.0 respectively.
- The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.
-
- sdr-cache-dir PATH directory for SDR cache files
- default: /tmp
-
- sensor-config-file FILE filename to read sensor configuration
- default: system default
-
- sel-config-file FILE filename to read sel configuration
- default: system default
-
- ignore N1,N2,N3,... sensor IDs to ignore
- default: none
-
- ignore-status N1,N2,N3,... sensor IDs to ignore status (nominal/warning/critical)
- default: none
-
- -v
- -V
- version print version and exit
-
- Linux kernel module for IPMI is CPU hungry.
- On Linux run this to lower kipmiN CPU utilization:
- # echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us
-
- or create: /etc/modprobe.d/ipmi.conf with these contents:
- options ipmi_si kipmid_max_busy_us=10
-
- For more information:
- https://github.com/netdata/netdata/tree/master/collectors/freeipmi.plugin
-```
-
-You can set these options in `/etc/netdata/netdata.conf` at this section:
-
-```
-[plugin:freeipmi]
- update every = 5
- command options =
-```
-
-Append to `command options =` the settings you need. The minimum `update every` is 5 (enforced internally by the
-plugin). IPMI is slow and CPU hungry. So, once every 5 seconds is pretty acceptable.
-
-## Ignoring specific sensors
-
-Specific sensor IDs can be excluded from freeipmi tools by editing `/etc/freeipmi/freeipmi.conf` and setting the IDs to
-be ignored at `ipmi-sensors-exclude-record-ids`. **However this file is not used by `libipmimonitoring`** (the library
-used by Netdata's `freeipmi.plugin`).
-
-So, `freeipmi.plugin` supports the option `ignore` that accepts a comma separated list of sensor IDs to ignore. To
-configure it, edit `/etc/netdata/netdata.conf` and set:
-
-```
-[plugin:freeipmi]
- command options = ignore 1,2,3,4,...
-```
-
-To find the IDs to ignore, run the command `ipmimonitoring`. The first column is the wanted ID:
-
-```
-ID | Name | Type | State | Reading | Units | Event
-1 | Ambient Temp | Temperature | Nominal | 26.00 | C | 'OK'
-2 | Altitude | Other Units Based Sensor | Nominal | 480.00 | ft | 'OK'
-3 | Avg Power | Current | Nominal | 100.00 | W | 'OK'
-4 | Planar 3.3V | Voltage | Nominal | 3.29 | V | 'OK'
-5 | Planar 5V | Voltage | Nominal | 4.90 | V | 'OK'
-6 | Planar 12V | Voltage | Nominal | 11.99 | V | 'OK'
-7 | Planar VBAT | Voltage | Nominal | 2.95 | V | 'OK'
-8 | Fan 1A Tach | Fan | Nominal | 3132.00 | RPM | 'OK'
-9 | Fan 1B Tach | Fan | Nominal | 2150.00 | RPM | 'OK'
-10 | Fan 2A Tach | Fan | Nominal | 2494.00 | RPM | 'OK'
-11 | Fan 2B Tach | Fan | Nominal | 1825.00 | RPM | 'OK'
-12 | Fan 3A Tach | Fan | Nominal | 3538.00 | RPM | 'OK'
-13 | Fan 3B Tach | Fan | Nominal | 2625.00 | RPM | 'OK'
-14 | Fan 1 | Entity Presence | Nominal | N/A | N/A | 'Entity Present'
-15 | Fan 2 | Entity Presence | Nominal | N/A | N/A | 'Entity Present'
-...
-```
-
-## Debugging
-
-You can run the plugin by hand:
-
-```sh
-# become user netdata
-sudo su -s /bin/sh netdata
-
-# run the plugin in debug mode
-/usr/libexec/netdata/plugins.d/freeipmi.plugin 5 debug
-```
-
-You will get verbose output on what the plugin does.
-
-## kipmi0 CPU usage
-
-There have been reports that kipmi is showing increased CPU when the IPMI is queried. To lower the CPU consumption of
-the system you can issue this command:
-
-```sh
-echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us
-```
-
-You can also permanently set the above setting by creating the file `/etc/modprobe.d/ipmi.conf` with this content:
-
-```sh
-# prevent kipmi from consuming 100% CPU
-options ipmi_si kipmid_max_busy_us=10
-```
-
-This instructs the kernel IPMI module to pause for a tick between checking IPMI. Querying IPMI will be a lot slower
-now (e.g. several seconds for IPMI to respond), but `kipmi` will not use any noticeable CPU. You can also use a higher
-number (this is the number of microseconds to poll IPMI for a response, before waiting for a tick).
-
-If you need to disable IPMI for Netdata, edit `/etc/netdata/netdata.conf` and set:
-
-```
-[plugins]
- freeipmi = no
-```
diff --git a/collectors/freeipmi.plugin/README.md b/collectors/freeipmi.plugin/README.md
new file mode 120000
index 00000000000000..f55ebf73d5cd6a
--- /dev/null
+++ b/collectors/freeipmi.plugin/README.md
@@ -0,0 +1 @@
+integrations/intelligent_platform_management_interface_ipmi.md
\ No newline at end of file
diff --git a/collectors/freeipmi.plugin/freeipmi_plugin.c b/collectors/freeipmi.plugin/freeipmi_plugin.c
index 56a1c499892eb2..6ec9b698bf924a 100644
--- a/collectors/freeipmi.plugin/freeipmi_plugin.c
+++ b/collectors/freeipmi.plugin/freeipmi_plugin.c
@@ -22,6 +22,10 @@
#include "libnetdata/libnetdata.h"
#include "libnetdata/required_dummies.h"
+#define FREEIPMI_GLOBAL_FUNCTION_SENSORS() do { \
+ fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"ipmi-sensors\" %d \"%s\"\n", 5, "Displays current sensor state and readings"); \
+ } while(0)
+
// component names, based on our patterns
#define NETDATA_SENSOR_COMPONENT_MEMORY_MODULE "Memory Module"
#define NETDATA_SENSOR_COMPONENT_MEMORY "Memory"
@@ -83,6 +87,12 @@ static void netdata_update_ipmi_sel_events_count(struct netdata_ipmi_state *stat
/* Communication Configuration - Initialize accordingly */
+static netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER;
+static bool function_plugin_should_exit = false;
+
+int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency
+int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events
+
/* Hostname, NULL for In-band communication, non-null for a hostname */
char *hostname = NULL;
@@ -707,6 +717,8 @@ struct netdata_ipmi_state {
} updates;
};
+struct netdata_ipmi_state state = {0};
+
// ----------------------------------------------------------------------------
// excluded record ids maintenance (both for sensor data and state)
@@ -1146,7 +1158,7 @@ int netdata_ipmi_detect_speed_secs(struct ipmi_monitoring_ipmi_config *ipmi_conf
successful++;
if(unlikely(state->debug))
- fprintf(stderr, "%s: %s data collection speed was %llu usec\n",
+ fprintf(stderr, "%s: %s data collection speed was %"PRIu64" usec\n",
program_name, netdata_collect_type_to_string(type), end - start);
// add it to our total
@@ -1297,6 +1309,7 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
int update_every = (int)(state->sensors.freq_ut / USEC_PER_SEC);
struct sensor *sn;
+ netdata_mutex_lock(&stdout_mutex);
// generate the CHART/DIMENSION lines, if we have to
dfe_start_reentrant(state->sensors.dict, sn) {
if(unlikely(!sn->do_metric && !sn->do_state))
@@ -1307,7 +1320,7 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
if(likely(sn->do_metric)) {
if(unlikely(!is_sensor_updated(sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut))) {
if(unlikely(state->debug))
- fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %llu, now %llu, freq %llu\n",
+ fprintf(stderr, "%s: %s() sensor '%s' metric is not UPDATED (last updated %"PRIu64", now %"PRIu64", freq %"PRIu64"\n",
program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_metric_ut, state->updates.now_ut, state->sensors.freq_ut);
}
else {
@@ -1360,7 +1373,7 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
if(likely(sn->do_state)) {
if(unlikely(!is_sensor_updated(sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut))) {
if (unlikely(state->debug))
- fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %llu, now %llu, freq %llu\n",
+ fprintf(stderr, "%s: %s() sensor '%s' state is not UPDATED (last updated %"PRIu64", now %"PRIu64", freq %"PRIu64"\n",
program_name, __FUNCTION__, sn->sensor_name, sn->last_collected_state_ut, state->updates.now_ut, state->sensors.freq_ut);
}
else {
@@ -1396,12 +1409,16 @@ static size_t send_ipmi_sensor_metrics_to_netdata(struct netdata_ipmi_state *sta
}
dfe_done(sn);
+ netdata_mutex_unlock(&stdout_mutex);
+
return total_sensors_sent;
}
static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state) {
static bool sel_chart_generated = false;
+ netdata_mutex_lock(&stdout_mutex);
+
if(likely(state->sel.status == ICS_RUNNING)) {
if(unlikely(!sel_chart_generated)) {
sel_chart_generated = true;
@@ -1422,37 +1439,197 @@ static size_t send_ipmi_sel_metrics_to_netdata(struct netdata_ipmi_state *state)
);
}
+ netdata_mutex_unlock(&stdout_mutex);
+
return state->sel.events;
}
// ----------------------------------------------------------------------------
-// main, command line arguments parsing
-int main (int argc, char **argv) {
- bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT;
+static const char *get_sensor_state_string(struct sensor *sn) {
+ switch (sn->sensor_state) {
+ case IPMI_MONITORING_STATE_NOMINAL:
+ return "nominal";
+ case IPMI_MONITORING_STATE_WARNING:
+ return "warning";
+ case IPMI_MONITORING_STATE_CRITICAL:
+ return "critical";
+ default:
+ return "unknown";
+ }
+}
- stderror = stderr;
- clocks_init();
+static const char *get_sensor_function_priority(struct sensor *sn) {
+ switch (sn->sensor_state) {
+ case IPMI_MONITORING_STATE_WARNING:
+ return "warning";
+ case IPMI_MONITORING_STATE_CRITICAL:
+ return "critical";
+ default:
+ return "normal";
+ }
+}
- int update_every = IPMI_SENSORS_MIN_UPDATE_EVERY; // this is the minimum update frequency
- int update_every_sel = IPMI_SEL_MIN_UPDATE_EVERY; // this is the minimum update frequency for SEL events
- bool debug = false;
+static void freeimi_function_sensors(const char *transaction, char *function __maybe_unused, int timeout __maybe_unused, bool *cancelled __maybe_unused) {
+ time_t expires = now_realtime_sec() + update_every;
- // ------------------------------------------------------------------------
- // initialization of netdata plugin
+ BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL);
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS);
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", update_every);
+ buffer_json_member_add_string(wb, "help", "View IPMI sensor readings and its state");
+ buffer_json_member_add_array(wb, "data");
+
+ struct sensor *sn;
+ dfe_start_reentrant(state.sensors.dict, sn) {
+ if (unlikely(!sn->do_metric && !sn->do_state))
+ continue;
+
+ double reading = NAN;
+ switch (sn->sensor_reading_type) {
+ case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER32:
+ reading = (double)sn->sensor_reading.uint32_value;
+ break;
+ case IPMI_MONITORING_SENSOR_READING_TYPE_DOUBLE:
+ reading = (double)(sn->sensor_reading.double_value);
+ break;
+ case IPMI_MONITORING_SENSOR_READING_TYPE_UNSIGNED_INTEGER8_BOOL:
+ reading = (double)sn->sensor_reading.bool_value;
+ break;
+ }
+
+ buffer_json_add_array_item_array(wb);
+
+ buffer_json_add_array_item_string(wb, sn->sensor_name);
+ buffer_json_add_array_item_string(wb, sn->type);
+ buffer_json_add_array_item_string(wb, sn->component);
+ buffer_json_add_array_item_double(wb, reading);
+ buffer_json_add_array_item_string(wb, sn->units);
+ buffer_json_add_array_item_string(wb, get_sensor_state_string(sn));
+
+ buffer_json_add_array_item_object(wb);
+ buffer_json_member_add_string(wb, "severity", get_sensor_function_priority(sn));
+ buffer_json_object_close(wb);
+
+ buffer_json_array_close(wb);
+ }
+ dfe_done(sn);
- program_name = "freeipmi.plugin";
+ buffer_json_array_close(wb); // data
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Sensor", "Sensor Name",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Type", "Sensor Type",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Component", "Sensor Component",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Reading", "Sensor Current Reading",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, NULL, 0, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Units", "Sensor Reading Units",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "State", "Sensor State",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(
+ wb, field_id++,
+ "rowOptions", "rowOptions",
+ RRDF_FIELD_TYPE_NONE,
+ RRDR_FIELD_VISUAL_ROW_OPTIONS,
+ RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_FIXED,
+ NULL,
+ RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_DUMMY,
+ NULL);
+ }
+
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "Type");
- // disable syslog
- error_log_syslog = 0;
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "Sensors");
+ {
+ buffer_json_member_add_string(wb, "name", "Sensors");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Sensor");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Sensors");
+ buffer_json_add_array_item_string(wb, "Component");
+ buffer_json_array_close(wb);
+
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Sensors");
+ buffer_json_add_array_item_string(wb, "State");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires, wb);
+
+ buffer_free(wb);
+}
+
+// ----------------------------------------------------------------------------
+// main, command line arguments parsing
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
+static void plugin_exit(int code) {
+ fflush(stdout);
+ function_plugin_should_exit = true;
+ exit(code);
+}
- // initialize the threads
+int main (int argc, char **argv) {
+ clocks_init();
+ nd_log_initialize_for_external_plugins("freeipmi.plugin");
netdata_threads_init_for_external_plugins(0); // set the default threads stack size here
+ bool netdata_do_sel = IPMI_ENABLE_SEL_BY_DEFAULT;
+
+ bool debug = false;
+
// ------------------------------------------------------------------------
// parse command line parameters
@@ -1726,7 +1903,7 @@ int main (int argc, char **argv) {
errno = 0;
if(freq_s && freq_s < update_every)
- collector_error("%s(): update frequency %d seconds is too small for IPMI. Using %d.",
+ collector_info("%s(): update frequency %d seconds is too small for IPMI. Using %d.",
__FUNCTION__, freq_s, update_every);
update_every = freq_s = MAX(freq_s, update_every);
@@ -1799,16 +1976,17 @@ int main (int argc, char **argv) {
heartbeat_t hb;
heartbeat_init(&hb);
+
for(iteration = 0; 1 ; iteration++) {
usec_t dt = heartbeat_next(&hb, step);
if (!tty) {
+ netdata_mutex_lock(&stdout_mutex);
fprintf(stdout, "\n"); // keepalive to avoid parser read timeout (2 minutes) during ipmi_detect_speed_secs()
fflush(stdout);
+ netdata_mutex_unlock(&stdout_mutex);
}
- struct netdata_ipmi_state state = {0 };
-
spinlock_lock(&sensors_data.spinlock);
state.sensors = sensors_data.state.sensors;
spinlock_unlock(&sensors_data.spinlock);
@@ -1825,8 +2003,7 @@ int main (int argc, char **argv) {
__FUNCTION__, (size_t)((now_monotonic_usec() - state.sensors.last_iteration_ut) / USEC_PER_SEC));
fprintf(stdout, "EXIT\n");
- fflush(stdout);
- exit(0);
+ plugin_exit(0);
}
break;
@@ -1836,14 +2013,12 @@ int main (int argc, char **argv) {
case ICS_INIT_FAILED:
collector_error("%s(): sensors failed to initialize. Calling DISABLE.", __FUNCTION__);
fprintf(stdout, "DISABLE\n");
- fflush(stdout);
- exit(0);
+ plugin_exit(0);
case ICS_FAILED:
collector_error("%s(): sensors fails repeatedly to collect metrics. Exiting to restart.", __FUNCTION__);
fprintf(stdout, "EXIT\n");
- fflush(stdout);
- exit(0);
+ plugin_exit(0);
}
if(netdata_do_sel) {
@@ -1863,6 +2038,16 @@ int main (int argc, char **argv) {
if(unlikely(debug))
fprintf(stderr, "%s: calling send_ipmi_sensor_metrics_to_netdata()\n", program_name);
+ static bool add_func_sensors = true;
+ if (add_func_sensors) {
+ add_func_sensors = false;
+ struct functions_evloop_globals *wg =
+ functions_evloop_init(1, "FREEIPMI", &stdout_mutex, &function_plugin_should_exit);
+ functions_evloop_add_function(
+ wg, "ipmi-sensors", freeimi_function_sensors, PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT);
+ FREEIPMI_GLOBAL_FUNCTION_SENSORS();
+ }
+
state.updates.now_ut = now_monotonic_usec();
send_ipmi_sensor_metrics_to_netdata(&state);
@@ -1870,7 +2055,7 @@ int main (int argc, char **argv) {
send_ipmi_sel_metrics_to_netdata(&state);
if(unlikely(debug))
- fprintf(stderr, "%s: iteration %zu, dt %llu usec, sensors ever collected %zu, sensors last collected %zu \n"
+ fprintf(stderr, "%s: iteration %zu, dt %"PRIu64" usec, sensors ever collected %zu, sensors last collected %zu \n"
, program_name
, iteration
, dt
@@ -1878,6 +2063,8 @@ int main (int argc, char **argv) {
, state.sensors.collected
);
+ netdata_mutex_lock(&stdout_mutex);
+
if (!global_chart_created) {
global_chart_created = true;
@@ -1897,10 +2084,11 @@ int main (int argc, char **argv) {
if (now_monotonic_sec() - started_t > IPMI_RESTART_EVERY_SECONDS) {
collector_info("%s(): reached my lifetime expectancy. Exiting to restart.", __FUNCTION__);
fprintf(stdout, "EXIT\n");
- fflush(stdout);
- exit(0);
+ plugin_exit(0);
}
fflush(stdout);
+
+ netdata_mutex_unlock(&stdout_mutex);
}
}
diff --git a/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md b/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md
new file mode 100644
index 00000000000000..c0293fc37d3239
--- /dev/null
+++ b/collectors/freeipmi.plugin/integrations/intelligent_platform_management_interface_ipmi.md
@@ -0,0 +1,275 @@
+
+
+# Intelligent Platform Management Interface (IPMI)
+
+
+
+
+
+Plugin: freeipmi.plugin
+Module: freeipmi
+
+
+
+## Overview
+
+"Monitor enterprise server sensor readings, event log entries, and hardware statuses to ensure reliable server operations."
+
+
+The plugin uses open source library IPMImonitoring to communicate with sensors.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid.
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+Linux kernel module for IPMI can create big overhead.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+The plugin does a speed test when it starts, to find out the duration needed by the IPMI processor to respond. Depending on the speed of your IPMI processor, charts may need several seconds to show up on the dashboard.
+
+
+### Per Intelligent Platform Management Interface (IPMI) instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipmi.sel | events | events |
+
+### Per sensor
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| sensor | The sensor name |
+| type | One of 45 recognized sensor types (Battery, Voltage...) |
+| component | One of 25 recognized components (Processor, Peripheral). |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipmi.sensor_state | nominal, critical, warning, unknown | state |
+| ipmi.sensor_temperature_c | temperature | Celsius |
+| ipmi.sensor_temperature_f | temperature | Fahrenheit |
+| ipmi.sensor_voltage | voltage | Volts |
+| ipmi.sensor_ampere | ampere | Amps |
+| ipmi.sensor_fan_speed | rotations | RPM |
+| ipmi.sensor_power | power | Watts |
+| ipmi.sensor_reading_percent | percentage | % |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ipmi_sensor_state ](https://github.com/netdata/netdata/blob/master/health/health.d/ipmi.conf) | ipmi.sensor_state | IPMI sensor ${label:sensor} (${label:component}) state |
+
+
+## Setup
+
+### Prerequisites
+
+#### Install freeipmi.plugin
+
+When using our official DEB/RPM packages, the FreeIPMI plugin is included in a separate package named `netdata-plugin-freeipmi` which needs to be manually installed using your system package manager. It is not installed automatically due to the large number of dependencies it requires.
+
+When using a static build of Netdata, the FreeIPMI plugin will be included and installed automatically, though you will still need to have FreeIPMI installed on your system to be able to use the plugin.
+
+When using a local build of Netdata, you need to ensure that the FreeIPMI development packages (typically called `libipmimonitoring-dev`, `libipmimonitoring-devel`, or `freeipmi-devel`) are installed when building Netdata.
+
+
+#### Preliminary actions
+
+If you have not previously used IPMI on your system, you will probably need to run the `ipmimonitoring` command as root
+to initialize IPMI settings so that the Netdata plugin works correctly. It should return information about available sensors on the system.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:freeipmi]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+The configuration is set using command line options:
+
+```
+# netdata.conf
+[plugin:freeipmi]
+ command options = opt1 opt2 ... optN
+```
+
+To display a help message listing the available command line options:
+
+```bash
+./usr/libexec/netdata/plugins.d/freeipmi.plugin --help
+```
+
+
+Command options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| SECONDS | Data collection frequency. | | no |
+| debug | Enable verbose output. | disabled | no |
+| no-sel | Disable System Event Log (SEL) collection. | disabled | no |
+| reread-sdr-cache | Re-read SDR cache on every iteration. | disabled | no |
+| interpret-oem-data | Attempt to parse OEM data. | disabled | no |
+| assume-system-event-record | treat illegal SEL events records as normal. | disabled | no |
+| ignore-non-interpretable-sensors | Do not read sensors that cannot be interpreted. | disabled | no |
+| bridge-sensors | Bridge sensors not owned by the BMC. | disabled | no |
+| shared-sensors | Enable shared sensors if found. | disabled | no |
+| no-discrete-reading | Do not read sensors if their event/reading type code is invalid. | enabled | no |
+| ignore-scanning-disabled | Ignore the scanning bit and read sensors no matter what. | disabled | no |
+| assume-bmc-owner | Assume the BMC is the sensor owner no matter what (usually bridging is required too). | disabled | no |
+| hostname HOST | Remote IPMI hostname or IP address. | local | no |
+| username USER | Username that will be used when connecting to the remote host. | | no |
+| password PASS | Password that will be used when connecting to the remote host. | | no |
+| noauthcodecheck / no-auth-code-check | Don't check the authentication codes returned. | | no |
+| driver-type IPMIDRIVER | Specify the driver type to use instead of doing an auto selection. The currently available outofband drivers are LAN and LAN_2_0, which perform IPMI 1.5 and IPMI 2.0 respectively. The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC. | | no |
+| sdr-cache-dir PATH | SDR cache files directory. | /tmp | no |
+| sensor-config-file FILE | Sensors configuration filename. | system default | no |
+| sel-config-file FILE | SEL configuration filename. | system default | no |
+| ignore N1,N2,N3,... | Sensor IDs to ignore. | | no |
+| ignore-status N1,N2,N3,... | Sensor IDs to ignore status (nominal/warning/critical). | | no |
+| -v | Print version and exit. | | no |
+| --help | Print usage message and exit. | | no |
+
+
+
+#### Examples
+
+##### Decrease data collection frequency
+
+Basic example decreasing data collection frequency. The minimum `update every` is 5 (enforced internally by the plugin). IPMI is slow and CPU hungry. So, once every 5 seconds is pretty acceptable.
+
+```yaml
+[plugin:freeipmi]
+ update every = 10
+
+```
+##### Disable SEL collection
+
+Append to `command options =` the options you need.
+
+Config
+
+```yaml
+[plugin:freeipmi]
+ command options = no-sel
+
+```
+
+
+##### Ignore specific sensors
+
+Specific sensor IDs can be excluded from freeipmi tools by editing `/etc/freeipmi/freeipmi.conf` and setting the IDs to be ignored at `ipmi-sensors-exclude-record-ids`.
+
+**However this file is not used by `libipmimonitoring`** (the library used by Netdata's `freeipmi.plugin`).
+
+To find the IDs to ignore, run the command `ipmimonitoring`. The first column is the wanted ID:
+
+ID | Name | Type | State | Reading | Units | Event
+1 | Ambient Temp | Temperature | Nominal | 26.00 | C | 'OK'
+2 | Altitude | Other Units Based Sensor | Nominal | 480.00 | ft | 'OK'
+3 | Avg Power | Current | Nominal | 100.00 | W | 'OK'
+4 | Planar 3.3V | Voltage | Nominal | 3.29 | V | 'OK'
+5 | Planar 5V | Voltage | Nominal | 4.90 | V | 'OK'
+6 | Planar 12V | Voltage | Nominal | 11.99 | V | 'OK'
+7 | Planar VBAT | Voltage | Nominal | 2.95 | V | 'OK'
+8 | Fan 1A Tach | Fan | Nominal | 3132.00 | RPM | 'OK'
+9 | Fan 1B Tach | Fan | Nominal | 2150.00 | RPM | 'OK'
+10 | Fan 2A Tach | Fan | Nominal | 2494.00 | RPM | 'OK'
+11 | Fan 2B Tach | Fan | Nominal | 1825.00 | RPM | 'OK'
+12 | Fan 3A Tach | Fan | Nominal | 3538.00 | RPM | 'OK'
+13 | Fan 3B Tach | Fan | Nominal | 2625.00 | RPM | 'OK'
+14 | Fan 1 | Entity Presence | Nominal | N/A | N/A | 'Entity Present'
+15 | Fan 2 | Entity Presence | Nominal | N/A | N/A | 'Entity Present'
+...
+
+`freeipmi.plugin` supports the option `ignore` that accepts a comma separated list of sensor IDs to ignore. To configure it set on `netdata.conf`:
+
+
+Config
+
+```yaml
+[plugin:freeipmi]
+ command options = ignore 1,2,3,4,...
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+
+
+### kimpi0 CPU usage
+
+
+
+
diff --git a/collectors/freeipmi.plugin/metadata.yaml b/collectors/freeipmi.plugin/metadata.yaml
index 9540410bf46c44..f8c75c2cb144f3 100644
--- a/collectors/freeipmi.plugin/metadata.yaml
+++ b/collectors/freeipmi.plugin/metadata.yaml
@@ -2,7 +2,7 @@ plugin_name: freeipmi.plugin
modules:
- meta:
plugin_name: freeipmi.plugin
- module_name: sensors
+ module_name: freeipmi
monitored_instance:
name: Intelligent Platform Management Interface (IPMI)
link: "https://en.wikipedia.org/wiki/Intelligent_Platform_Management_Interface"
@@ -42,34 +42,225 @@ modules:
setup:
prerequisites:
list:
- - title: Preliminary actions
+ - title: Install freeipmi.plugin
description: |
- If you have not previously used IPMI on your system, you will probably need to run the ipmimonitoring command as root to initialize IPMI settings so that the Netdata plugin works correctly. It should return information about available sensors on the system.
+ When using our official DEB/RPM packages, the FreeIPMI plugin is included in a separate package named `netdata-plugin-freeipmi` which needs to be manually installed using your system package manager. It is not installed automatically due to the large number of dependencies it requires.
+
+ When using a static build of Netdata, the FreeIPMI plugin will be included and installed automatically, though you will still need to have FreeIPMI installed on your system to be able to use the plugin.
- In some distributions libipmimonitoring.pc is located in a non-standard directory, which can cause building the plugin to fail when building Netdata from source. In that case you should find the file and link it to the standard pkg-config directory. Usually, running sudo ln -s /usr/lib/$(uname -m)-linux-gnu/pkgconfig/libipmimonitoring.pc/libipmimonitoring.pc /usr/lib/pkgconfig/libipmimonitoring.pc resolves this issue.
+ When using a local build of Netdata, you need to ensure that the FreeIPMI development packages (typically called `libipmimonitoring-dev`, `libipmimonitoring-devel`, or `freeipmi-devel`) are installed when building Netdata.
+ - title: Preliminary actions
+ description: |
+ If you have not previously used IPMI on your system, you will probably need to run the `ipmimonitoring` command as root
+ to initialize IPMI settings so that the Netdata plugin works correctly. It should return information about available sensors on the system.
configuration:
file:
name: "netdata.conf"
- section_name: '[plugin:freeipmi]'
- description: "This is netdata main configuration file"
+ section_name: "[plugin:freeipmi]"
options:
- description: "This tool receives command line options that are visible when user run: `./usr/libexec/netdata/plugins.d/freeipmi.plugin --help`"
+ description: |
+ The configuration is set using command line options:
+
+ ```
+ # netdata.conf
+ [plugin:freeipmi]
+ command options = opt1 opt2 ... optN
+ ```
+
+ To display a help message listing the available command line options:
+
+ ```bash
+ ./usr/libexec/netdata/plugins.d/freeipmi.plugin --help
+ ```
folding:
- title: "Config options"
+ title: "Command options"
enabled: true
list:
- - name: command options
- description: Variable used to pass arguments for the plugin.
- default_value: 1
+ - name: SECONDS
+ description: Data collection frequency.
+ default_value: ""
+ required: false
+ - name: debug
+ description: Enable verbose output.
+ default_value: disabled
+ required: false
+ - name: no-sel
+ description: Disable System Event Log (SEL) collection.
+ default_value: disabled
+ required: false
+ - name: reread-sdr-cache
+ description: Re-read SDR cache on every iteration.
+ default_value: disabled
+ required: false
+ - name: interpret-oem-data
+ description: Attempt to parse OEM data.
+ default_value: disabled
+ required: false
+ - name: assume-system-event-record
+ description: treat illegal SEL events records as normal.
+ default_value: disabled
+ required: false
+ - name: ignore-non-interpretable-sensors
+ description: Do not read sensors that cannot be interpreted.
+ default_value: disabled
+ required: false
+ - name: bridge-sensors
+ description: Bridge sensors not owned by the BMC.
+ default_value: disabled
+ required: false
+ - name: shared-sensors
+ description: Enable shared sensors if found.
+ default_value: disabled
+ required: false
+ - name: no-discrete-reading
+ description: Do not read sensors if their event/reading type code is invalid.
+ default_value: enabled
+ required: false
+ - name: ignore-scanning-disabled
+ description: Ignore the scanning bit and read sensors no matter what.
+ default_value: disabled
+ required: false
+ - name: assume-bmc-owner
+ description: Assume the BMC is the sensor owner no matter what (usually bridging is required too).
+ default_value: disabled
+ required: false
+ - name: hostname HOST
+ description: Remote IPMI hostname or IP address.
+ default_value: local
+ required: false
+ - name: username USER
+ description: Username that will be used when connecting to the remote host.
+ default_value: ""
+ required: false
+ - name: password PASS
+ description: Password that will be used when connecting to the remote host.
+ default_value: ""
+ required: false
+ - name: noauthcodecheck / no-auth-code-check
+ description: Don't check the authentication codes returned.
+ default_value: ""
+ required: false
+ - name: driver-type IPMIDRIVER
+ description: Specify the driver type to use instead of doing an auto selection. The currently available outofband drivers are LAN and LAN_2_0, which perform IPMI 1.5 and IPMI 2.0 respectively. The currently available inband drivers are KCS, SSIF, OPENIPMI and SUNBMC.
+ default_value: ""
+ required: false
+ - name: sdr-cache-dir PATH
+ description: SDR cache files directory.
+ default_value: /tmp
+ required: false
+ - name: sensor-config-file FILE
+ description: Sensors configuration filename.
+ default_value: system default
+ required: false
+ - name: sel-config-file FILE
+ description: SEL configuration filename.
+ default_value: system default
+ required: false
+ - name: ignore N1,N2,N3,...
+ description: Sensor IDs to ignore.
+ default_value: ""
+ required: false
+ - name: ignore-status N1,N2,N3,...
+ description: Sensor IDs to ignore status (nominal/warning/critical).
+ default_value: ""
+ required: false
+ - name: -v
+ description: Print version and exit.
+ default_value: ""
+ required: false
+ - name: --help
+ description: Print usage message and exit.
+ default_value: ""
required: false
examples:
folding:
enabled: true
- title: ""
- list: []
+ title: "Config"
+ list:
+ - name: Decrease data collection frequency
+ description: Basic example decreasing data collection frequency. The minimum `update every` is 5 (enforced internally by the plugin). IPMI is slow and CPU hungry. So, once every 5 seconds is pretty acceptable.
+ config: |
+ [plugin:freeipmi]
+ update every = 10
+ folding:
+ enabled: false
+ - name: Disable SEL collection
+ description: Append to `command options =` the options you need.
+ config: |
+ [plugin:freeipmi]
+ command options = no-sel
+ - name: Ignore specific sensors
+ description: |
+ Specific sensor IDs can be excluded from freeipmi tools by editing `/etc/freeipmi/freeipmi.conf` and setting the IDs to be ignored at `ipmi-sensors-exclude-record-ids`.
+
+ **However this file is not used by `libipmimonitoring`** (the library used by Netdata's `freeipmi.plugin`).
+
+ To find the IDs to ignore, run the command `ipmimonitoring`. The first column is the wanted ID:
+
+ ID | Name | Type | State | Reading | Units | Event
+ 1 | Ambient Temp | Temperature | Nominal | 26.00 | C | 'OK'
+ 2 | Altitude | Other Units Based Sensor | Nominal | 480.00 | ft | 'OK'
+ 3 | Avg Power | Current | Nominal | 100.00 | W | 'OK'
+ 4 | Planar 3.3V | Voltage | Nominal | 3.29 | V | 'OK'
+ 5 | Planar 5V | Voltage | Nominal | 4.90 | V | 'OK'
+ 6 | Planar 12V | Voltage | Nominal | 11.99 | V | 'OK'
+ 7 | Planar VBAT | Voltage | Nominal | 2.95 | V | 'OK'
+ 8 | Fan 1A Tach | Fan | Nominal | 3132.00 | RPM | 'OK'
+ 9 | Fan 1B Tach | Fan | Nominal | 2150.00 | RPM | 'OK'
+ 10 | Fan 2A Tach | Fan | Nominal | 2494.00 | RPM | 'OK'
+ 11 | Fan 2B Tach | Fan | Nominal | 1825.00 | RPM | 'OK'
+ 12 | Fan 3A Tach | Fan | Nominal | 3538.00 | RPM | 'OK'
+ 13 | Fan 3B Tach | Fan | Nominal | 2625.00 | RPM | 'OK'
+ 14 | Fan 1 | Entity Presence | Nominal | N/A | N/A | 'Entity Present'
+ 15 | Fan 2 | Entity Presence | Nominal | N/A | N/A | 'Entity Present'
+ ...
+
+ `freeipmi.plugin` supports the option `ignore` that accepts a comma separated list of sensor IDs to ignore. To configure it set on `netdata.conf`:
+ config: |
+ [plugin:freeipmi]
+ command options = ignore 1,2,3,4,...
troubleshooting:
problems:
- list: []
+ list:
+ - name: Debug Mode
+ description: |
+ You can run `freeipmi.plugin` with the debug option enabled, to troubleshoot issues with it. The output should give you clues as to why the collector isn't working.
+
+ - Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+ - Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+ - Run the `freeipmi.plugin` in debug mode:
+
+ ```bash
+ ./freeipmi.plugin 5 debug
+ ```
+ - name: kimpi0 CPU usage
+ description: |
+ There have been reports that kipmi is showing increased CPU when the IPMI is queried. To lower the CPU consumption of the system you can issue this command:
+
+ ```sh
+ echo 10 > /sys/module/ipmi_si/parameters/kipmid_max_busy_us
+ ```
+
+ You can also permanently set the above setting by creating the file `/etc/modprobe.d/ipmi.conf` with this content:
+
+ ```sh
+ # prevent kipmi from consuming 100% CPU
+ options ipmi_si kipmid_max_busy_us=10
+ ```
+
+ This instructs the kernel IPMI module to pause for a tick between checking IPMI. Querying IPMI will be a lot slower now (e.g. several seconds for IPMI to respond), but `kipmi` will not use any noticeable CPU.
+
+ You can also use a higher number (this is the number of microseconds to poll IPMI for a response, before waiting for a tick).
alerts:
- name: ipmi_sensor_state
link: https://github.com/netdata/netdata/blob/master/health/health.d/ipmi.conf
@@ -79,9 +270,20 @@ modules:
folding:
title: Metrics
enabled: false
- description: ""
+ description: |
+ The plugin does a speed test when it starts, to find out the duration needed by the IPMI processor to respond. Depending on the speed of your IPMI processor, charts may need several seconds to show up on the dashboard.
availability: []
scopes:
+ - name: global
+ description: These metrics refer to the entire monitored application.
+ labels: []
+ metrics:
+ - name: ipmi.sel
+ description: IPMI Events
+ unit: "events"
+ chart_type: area
+ dimensions:
+ - name: events
- name: sensor
description: ""
labels:
@@ -92,12 +294,6 @@ modules:
- name: component
description: One of 25 recognized components (Processor, Peripheral).
metrics:
- - name: ipmi.sel
- description: IPMI Events
- unit: "events"
- chart_type: area
- dimensions:
- - name: events
- name: ipmi.sensor_state
description: IPMI Sensors State
unit: "state"
diff --git a/collectors/idlejitter.plugin/README.md b/collectors/idlejitter.plugin/README.md
deleted file mode 100644
index 9474a2b97f3843..00000000000000
--- a/collectors/idlejitter.plugin/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-# idlejitter.plugin
-
-Idle jitter is a measure of delays in timing for user processes caused by scheduling limitations.
-
-## How Netdata measures idle jitter
-
-A thread is spawned that requests to sleep for 20000 microseconds (20ms).
-When the system wakes it up, it measures how many microseconds have passed.
-The difference between the requested and the actual duration of the sleep, is the idle jitter.
-This is done at most 50 times per second, to ensure we have a good average.
-
-This number is useful:
-
-- In multimedia-streaming environments such as VoIP gateways, where the CPU jitter can affect the quality of the service.
-- On time servers and other systems that require very precise timing, where CPU jitter can actively interfere with timing precision.
-- On gaming systems, where CPU jitter can cause frame drops and stuttering.
-- In cloud infrastructure that can pause the VM or container for a small duration to perform operations at the host.
-
-## Charts
-
-idlejitter.plugin generates the idlejitter chart which measures CPU idle jitter in milliseconds lost per second.
-
-## Configuration
-
-This chart is available without any configuration.
-
-
diff --git a/collectors/idlejitter.plugin/README.md b/collectors/idlejitter.plugin/README.md
new file mode 120000
index 00000000000000..1ce460b6262442
--- /dev/null
+++ b/collectors/idlejitter.plugin/README.md
@@ -0,0 +1 @@
+integrations/idle_os_jitter.md
\ No newline at end of file
diff --git a/collectors/idlejitter.plugin/integrations/idle_os_jitter.md b/collectors/idlejitter.plugin/integrations/idle_os_jitter.md
new file mode 100644
index 00000000000000..44463f6f574225
--- /dev/null
+++ b/collectors/idlejitter.plugin/integrations/idle_os_jitter.md
@@ -0,0 +1,118 @@
+
+
+# Idle OS Jitter
+
+
+
+
+
+Plugin: idlejitter.plugin
+Module: idlejitter.plugin
+
+
+
+## Overview
+
+Monitor delays in timing for user processes caused by scheduling limitations to optimize the system to run latency sensitive applications with minimal jitter, improving consistency and quality of service.
+
+
+A thread is spawned that requests to sleep for fixed amount of time. When the system wakes it up, it measures how many microseconds have passed. The difference between the requested and the actual duration of the sleep, is the idle jitter. This is done dozens of times per second to ensure we have a representative sample.
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration will run by default on all supported systems.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Idle OS Jitter instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.idlejitter | min, max, average | microseconds lost/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+This integration only supports a single configuration option, and most users will not need to change it.
+
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| loop time in ms | Specifies the target time for the data collection thread to sleep, measured in miliseconds. | 20 | no |
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/ioping.plugin/README.md b/collectors/ioping.plugin/README.md
deleted file mode 100644
index 73fc35fb00b8d4..00000000000000
--- a/collectors/ioping.plugin/README.md
+++ /dev/null
@@ -1,80 +0,0 @@
-# Monitor I/O latency using ioping.plugin
-
-The ioping plugin supports monitoring I/O latency for any number of directories/files/devices, by pinging them with `ioping`.
-
-A recent version of `ioping` is required (one that supports option `-N`).
-The supplied plugin can install it, by running:
-
-```sh
-/usr/libexec/netdata/plugins.d/ioping.plugin install
-```
-
-The `-e` option can be supplied to indicate where the Netdata environment file is installed. The default path is `/etc/netdata/.environment`.
-
-The above will download, build and install the right version as `/usr/libexec/netdata/plugins.d/ioping`.
-
-Then you need to edit `/etc/netdata/ioping.conf` (to edit it on your system run
-`/etc/netdata/edit-config ioping.conf`) like this:
-
-```sh
-# uncomment the following line - it should already be there
-ioping="/usr/libexec/netdata/plugins.d/ioping"
-
-# set here the directory/file/device, you need to ping
-destination="destination"
-
-# override the chart update frequency - the default is inherited from Netdata
-update_every="1s"
-
-# the request size in bytes to ping the destination
-request_size="4k"
-
-# other iping options - these are the defaults
-ioping_opts="-T 1000000 -R"
-```
-
-## alarms
-
-Netdata will automatically attach a few alarms for each host.
-Check the [latest versions of the ioping alarms](https://raw.githubusercontent.com/netdata/netdata/master/health/health.d/ioping.conf)
-
-## Multiple ioping Plugins With Different Settings
-
-You may need to run multiple ioping plugins with different settings or different end points.
-For example, you may need to ping one destination once per 10 seconds, and another once per second.
-
-Netdata allows you to add as many `ioping` plugins as you like.
-
-Follow this procedure:
-
-**1. Create New ioping Configuration File**
-
-```sh
-# Step Into Configuration Directory
-cd /etc/netdata
-
-# Copy Original ioping Configuration File To New Configuration File
-cp ioping.conf ioping2.conf
-```
-
-Edit `ioping2.conf` and set the settings and the destination you need for the seconds instance.
-
-**2. Soft Link Original ioping Plugin to New Plugin File**
-
-```sh
-# Become root (If The Step Step Is Performed As Non-Root User)
-sudo su
-
-# Step Into The Plugins Directory
-cd /usr/libexec/netdata/plugins.d
-
-# Link ioping.plugin to ioping2.plugin
-ln -s ioping.plugin ioping2.plugin
-```
-
-That's it. Netdata will detect the new plugin and start it.
-
-You can name the new plugin any name you like.
-Just make sure the plugin and the configuration file have the same name.
-
-
diff --git a/collectors/ioping.plugin/README.md b/collectors/ioping.plugin/README.md
new file mode 120000
index 00000000000000..cb660f13b39f05
--- /dev/null
+++ b/collectors/ioping.plugin/README.md
@@ -0,0 +1 @@
+integrations/ioping.md
\ No newline at end of file
diff --git a/collectors/ioping.plugin/integrations/ioping.md b/collectors/ioping.plugin/integrations/ioping.md
new file mode 100644
index 00000000000000..39a07ed62e10a9
--- /dev/null
+++ b/collectors/ioping.plugin/integrations/ioping.md
@@ -0,0 +1,133 @@
+
+
+# IOPing
+
+
+
+
+
+Plugin: ioping.plugin
+Module: ioping.plugin
+
+
+
+## Overview
+
+Monitor IOPing metrics for efficient disk I/O latency tracking. Keep track of read/write speeds, latency, and error rates for optimized disk operations.
+
+Plugin uses `ioping` command.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per disk
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ioping.latency | latency | microseconds |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ioping_disk_latency ](https://github.com/netdata/netdata/blob/master/health/health.d/ioping.conf) | ioping.latency | average I/O latency over the last 10 seconds |
+
+
+## Setup
+
+### Prerequisites
+
+#### Install ioping
+
+You can install the command by passing the argument `install` to the plugin (`/usr/libexec/netdata/plugins.d/ioping.plugin install`).
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `ioping.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config ioping.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Data collection frequency. | 1s | no |
+| destination | The directory/file/device to ioping. | | yes |
+| request_size | The request size in bytes to ioping the destination (symbolic modifiers are supported) | 4k | no |
+| ioping_opts | Options passed to `ioping` commands. | -T 1000000 | no |
+
+
+
+#### Examples
+
+##### Basic Configuration
+
+This example has the minimum configuration necessary to have the plugin running.
+
+Config
+
+```yaml
+destination="/dev/sda"
+
+```
+
+
+
diff --git a/collectors/ioping.plugin/ioping.plugin.in b/collectors/ioping.plugin/ioping.plugin.in
index 1d79eb70646b66..171e384dbf3b37 100755
--- a/collectors/ioping.plugin/ioping.plugin.in
+++ b/collectors/ioping.plugin/ioping.plugin.in
@@ -9,7 +9,7 @@
# This plugin requires a latest version of ioping.
# You can compile it from source, by running me with option: install
-export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
+export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@"
export LC_ALL=C
usage="$(basename "$0") [install] [-h] [-e]
@@ -93,42 +93,103 @@ if [ "$INSTALL" == "1" ]
fi
# -----------------------------------------------------------------------------
+# logging
PROGRAM_NAME="$(basename "${0}")"
-logdate() {
- date "+%Y-%m-%d %H:%M:%S"
+# these should be the same with syslog() priorities
+NDLP_EMERG=0 # system is unusable
+NDLP_ALERT=1 # action must be taken immediately
+NDLP_CRIT=2 # critical conditions
+NDLP_ERR=3 # error conditions
+NDLP_WARN=4 # warning conditions
+NDLP_NOTICE=5 # normal but significant condition
+NDLP_INFO=6 # informational
+NDLP_DEBUG=7 # debug-level messages
+
+# the max (numerically) log level we will log
+LOG_LEVEL=$NDLP_INFO
+
+set_log_min_priority() {
+ case "${NETDATA_LOG_LEVEL,,}" in
+ "emerg" | "emergency")
+ LOG_LEVEL=$NDLP_EMERG
+ ;;
+
+ "alert")
+ LOG_LEVEL=$NDLP_ALERT
+ ;;
+
+ "crit" | "critical")
+ LOG_LEVEL=$NDLP_CRIT
+ ;;
+
+ "err" | "error")
+ LOG_LEVEL=$NDLP_ERR
+ ;;
+
+ "warn" | "warning")
+ LOG_LEVEL=$NDLP_WARN
+ ;;
+
+ "notice")
+ LOG_LEVEL=$NDLP_NOTICE
+ ;;
+
+ "info")
+ LOG_LEVEL=$NDLP_INFO
+ ;;
+
+ "debug")
+ LOG_LEVEL=$NDLP_DEBUG
+ ;;
+ esac
}
+set_log_min_priority
+
log() {
- local status="${1}"
- shift
+ local level="${1}"
+ shift 1
+
+ [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
- echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
+ systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <this command will tail all `*.log` files in `/var/log/nginx/`. We use `-F` instead of `-f` to ensure that files will still be tailed after log rotation.
+2. `log2joural` is a Netdata program. It reads log entries and extracts fields, according to the PCRE2 pattern it accepts. It can also apply some basic operations on the fields, like injecting new fields or duplicating existing ones or rewriting their values. The output of `log2journal` is in Systemd Journal Export Format, and it looks like this:
+ ```bash
+ KEY1=VALUE1 # << start of the first log line
+ KEY2=VALUE2
+ # << log lines separator
+ KEY1=VALUE1 # << start of the second log line
+ KEY2=VALUE2
+ ```
+3. `systemd-cat-native` is a Netdata program. I can send the logs to a local `systemd-journald` (journal namespaces supported), or to a remote `systemd-journal-remote`.
+
+
+## Processing pipeline
+
+The sequence of processing in Netdata's `log2journal` is designed to methodically transform and prepare log data for export in the systemd Journal Export Format. This transformation occurs through a pipeline of stages, each with a specific role in processing the log entries. Here's a description of each stage in the sequence:
+
+1. **Input**
+ The tool reads one log line at a time from the input source. It supports different input formats such as JSON, logfmt, and free-form logs defined by PCRE2 patterns.
+
+2. **Extract Fields and Values**
+ Based on the input format (JSON, logfmt, or custom pattern), it extracts fields and their values from each log line. In the case of JSON and logfmt, it automatically extracts all fields. For custom patterns, it uses PCRE2 regular expressions, and fields are extracted based on sub-expressions defined in the pattern.
+
+3. **Transliteration**
+ Extracted fields are transliterated to the limited character set accepted by systemd-journal: capitals A-Z, digits 0-9, underscores.
+
+4. **Apply Optional Prefix**
+ If a prefix is specified, it is added to all keys. This happens before any other processing so that all subsequent matches and manipulations take the prefix into account.
+
+5. **Rename Fields**
+ Renames fields as specified in the configuration. This is used to change the names of the fields to match desired or required naming conventions.
+
+6. **Inject New Fields**
+ New fields are injected into the log data. This can include constants or values derived from other fields, using variable substitution.
+
+7. **Rewrite Field Values**
+ Applies rewriting rules to alter the values of the fields. This can involve complex transformations, including regular expressions and variable substitutions. The rewrite rules can also inject new fields into the data.
+
+8. **Filter Fields**
+ Fields are filtered based on include and exclude patterns. This stage selects which fields are to be sent to the journal, allowing for selective logging.
+
+9. **Output**
+ Finally, the processed log data is output in the Journal Export Format. This format is compatible with systemd's journaling system and can be sent to local or remote systemd journal systems, by piping the output of `log2journal` to `systemd-cat-native`.
+
+This pipeline ensures a flexible and comprehensive approach to log processing, allowing for a wide range of modifications and customizations to fit various logging requirements. Each stage builds upon the previous one, enabling complex log transformations and enrichments before the data is exported to the systemd journal.
+
+## Real-life example
+
+We have an nginx server logging in this standard combined log format:
+
+```bash
+ log_format combined '$remote_addr - $remote_user [$time_local] '
+ '"$request" $status $body_bytes_sent '
+ '"$http_referer" "$http_user_agent"';
+```
+
+### Extracting fields with a pattern
+
+First, let's find the right pattern for `log2journal`. We ask ChatGPT:
+
+```
+My nginx log uses this log format:
+
+log_format access '$remote_addr - $remote_user [$time_local] '
+ '"$request" $status $body_bytes_sent '
+ '"$http_referer" "$http_user_agent"';
+
+I want to use `log2joural` to convert this log for systemd-journal.
+`log2journal` accepts a PCRE2 regular expression, using the named groups
+in the pattern as the journal fields to extract from the logs.
+
+Please give me the PCRE2 pattern to extract all the fields from my nginx
+log files.
+```
+
+ChatGPT replies with this:
+
+```regexp
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s
+ (?[^ ]+) \s
+ \[
+ (?[^\]]+)
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+
+ (?\d+) \s+
+ "(?[^"]*)" \s+
+ "(?[^"]*)"
+```
+
+Let's see what the above says:
+
+1. `(?x)`: enable PCRE2 extended mode. In this mode spaces and newlines in the pattern are ignored. To match a space you have to use `\s`. This mode allows us to split the pattern is multiple lines and add comments to it.
+1. `^`: match the beginning of the line
+2. `(?[^ ]+) \s - \s
+ (?[^ ]+) \s
+ \[
+ (?[^\]]+)
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+
+ (?\d+) \s+
+ "(?[^"]*)" \s+
+ "(?[^"]*)"
+```
+
+Let's test it with a sample line (instead of `tail`):
+
+```bash
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 104 0.001 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml
+BODY_BYTES_SENT=4172
+HTTP_REFERER=-
+HTTP_USER_AGENT=Go-http-client/1.1
+REMOTE_ADDR=1.2.3.4
+REMOTE_USER=-
+REQUEST=GET /index.html HTTP/1.1
+REQUEST_METHOD=GET
+REQUEST_URI=/index.html
+SERVER_PROTOCOL=HTTP/1.1
+STATUS=200
+TIME_LOCAL=19/Nov/2023:00:24:43 +0000
+
+```
+
+As you can see, it extracted all the fields and made them capitals, as systemd-journal expects them.
+
+### Prefixing field names
+
+To make sure the fields are unique for nginx and do not interfere with other applications, we should prefix them with `NGINX_`:
+
+```yaml
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s
+ (?[^ ]+) \s
+ \[
+ (?[^\]]+)
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+
+ (?\d+) \s+
+ "(?[^"]*)" \s+
+ "(?[^"]*)"
+
+prefix: 'NGINX_' # <<< we added this
+```
+
+And let's try it:
+
+```bash
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml
+NGINX_BODY_BYTES_SENT=4172
+NGINX_HTTP_REFERER=-
+NGINX_HTTP_USER_AGENT=Go-http-client/1.1
+NGINX_REMOTE_ADDR=1.2.3.4
+NGINX_REMOTE_USER=-
+NGINX_REQUEST=GET /index.html HTTP/1.1
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/index.html
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000
+
+```
+
+### Renaming fields
+
+Now, all fields start with `NGINX_` but we want `NGINX_REQUEST` to be the `MESSAGE` of the log line, as we will see it by default in `journalctl` and the Netdata dashboard. Let's rename it:
+
+```yaml
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s
+ (?[^ ]+) \s
+ \[
+ (?[^\]]+)
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+
+ (?\d+) \s+
+ "(?[^"]*)" \s+
+ "(?[^"]*)"
+
+prefix: 'NGINX_'
+
+rename: # <<< we added this
+ - new_key: MESSAGE # <<< we added this
+ old_key: NGINX_REQUEST # <<< we added this
+```
+
+Let's test it:
+
+```bash
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml
+MESSAGE=GET /index.html HTTP/1.1 # <<< renamed !
+NGINX_BODY_BYTES_SENT=4172
+NGINX_HTTP_REFERER=-
+NGINX_HTTP_USER_AGENT=Go-http-client/1.1
+NGINX_REMOTE_ADDR=1.2.3.4
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/index.html
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000
+
+```
+
+### Injecting new fields
+
+To have a complete message in journals we need 3 fields: `MESSAGE`, `PRIORITY` and `SYSLOG_IDENTIFIER`. We have already added `MESSAGE` by renaming `NGINX_REQUEST`. We can also inject a `SYSLOG_IDENTIFIER` and `PRIORITY`.
+
+Ideally, we would want the 5xx errors to be red in our `journalctl` output and the dashboard. To achieve that we need to set the `PRIORITY` field to the right log level. Log priorities are numeric and follow the `syslog` priorities. Checking `/usr/include/sys/syslog.h` we can see these:
+
+```c
+#define LOG_EMERG 0 /* system is unusable */
+#define LOG_ALERT 1 /* action must be taken immediately */
+#define LOG_CRIT 2 /* critical conditions */
+#define LOG_ERR 3 /* error conditions */
+#define LOG_WARNING 4 /* warning conditions */
+#define LOG_NOTICE 5 /* normal but significant condition */
+#define LOG_INFO 6 /* informational */
+#define LOG_DEBUG 7 /* debug-level messages */
+```
+
+Avoid setting priority to 0 (`LOG_EMERG`), because these will be on your terminal (the journal uses `wall` to let you know of such events). A good priority for errors is 3 (red), or 4 (yellow).
+
+To set the PRIORITY field in the output, we can use `NGINX_STATUS`. We will do this in 2 steps: a) inject the priority field as a copy is `NGINX_STATUS` and then b) use a pattern on its value to rewrite it to the priority level we want.
+
+First, let's inject `SYSLOG_IDENTIFIER` and `PRIORITY`:
+
+```yaml
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s
+ (?[^ ]+) \s
+ \[
+ (?[^\]]+)
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+
+ (?\d+) \s+
+ "(?[^"]*)" \s+
+ "(?[^"]*)"
+
+prefix: 'NGINX_'
+
+rename:
+ - new_key: MESSAGE
+ old_key: NGINX_REQUEST
+
+inject: # <<< we added this
+ - key: PRIORITY # <<< we added this
+ value: '${NGINX_STATUS}' # <<< we added this
+
+ - key: SYSLOG_IDENTIFIER # <<< we added this
+ value: 'nginx-log' # <<< we added this
+```
+
+Let's see what this does:
+
+```bash
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml
+MESSAGE=GET /index.html HTTP/1.1
+NGINX_BODY_BYTES_SENT=4172
+NGINX_HTTP_REFERER=-
+NGINX_HTTP_USER_AGENT=Go-http-client/1.1
+NGINX_REMOTE_ADDR=1.2.3.4
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/index.html
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000
+PRIORITY=200 # <<< PRIORITY added
+SYSLOG_IDENTIFIER=nginx-log # <<< SYSLOG_IDENTIFIER added
+
+```
+
+### Rewriting field values
+
+Now we need to rewrite `PRIORITY` to the right syslog level based on its value (`NGINX_STATUS`). We will assign the priority 6 (info) when the status is 1xx, 2xx, 3xx, priority 5 (notice) when status is 4xx, priority 3 (error) when status is 5xx and anything else will go to priority 4 (warning). Let's do it:
+
+```yaml
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s
+ (?[^ ]+) \s
+ \[
+ (?[^\]]+)
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+
+ (?\d+) \s+
+ "(?[^"]*)" \s+
+ "(?[^"]*)"
+
+prefix: 'NGINX_'
+
+rename:
+ - new_key: MESSAGE
+ old_key: NGINX_REQUEST
+
+inject:
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
+
+rewrite: # <<< we added this
+ - key: PRIORITY # <<< we added this
+ match: '^[123]' # <<< we added this
+ value: 6 # <<< we added this
+
+ - key: PRIORITY # <<< we added this
+ match: '^4' # <<< we added this
+ value: 5 # <<< we added this
+
+ - key: PRIORITY # <<< we added this
+ match: '^5' # <<< we added this
+ value: 3 # <<< we added this
+
+ - key: PRIORITY # <<< we added this
+ match: '.*' # <<< we added this
+ value: 4 # <<< we added this
+```
+
+Rewrite rules are processed in order and the first matching a field, stops by default processing for this field. This is why the last rule, that matches everything does not always change the priority to 4.
+
+Let's test it:
+
+```bash
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml
+MESSAGE=GET /index.html HTTP/1.1
+NGINX_BODY_BYTES_SENT=4172
+NGINX_HTTP_REFERER=-
+NGINX_HTTP_USER_AGENT=Go-http-client/1.1
+NGINX_REMOTE_ADDR=1.2.3.4
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/index.html
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000
+PRIORITY=6 # <<< PRIORITY rewritten here
+SYSLOG_IDENTIFIER=nginx-log
+
+```
+
+Rewrite rules are powerful. You can have named groups in them, like in the main pattern, to extract sub-fields from them, which you can then use in variable substitution. You can use rewrite rules to anonymize the URLs, e.g to remove customer IDs or transaction details from them.
+
+### Sending logs to systemd-journal
+
+Now the message is ready to be sent to a systemd-journal. For this we use `systemd-cat-native`. This command can send such messages to a journal running on the localhost, a local journal namespace, or a `systemd-journal-remote` running on another server. By just appending `| systemd-cat-native` to the command, the message will be sent to the local journal.
+
+
+```bash
+# echo '1.2.3.4 - - [19/Nov/2023:00:24:43 +0000] "GET /index.html HTTP/1.1" 200 4172 "-" "Go-http-client/1.1"' | log2journal -f nginx.yaml | systemd-cat-native
+# no output
+
+# let's find the message
+# journalctl -r -o verbose SYSLOG_IDENTIFIER=nginx-log
+Wed 2023-12-06 13:23:07.083299 EET [s=5290f0133f25407aaa1e2c451c0e4756;i=57194;b=0dfa96ecc2094cecaa8ec0efcb93b865;m=b133308867;t=60bd59346a289;x=5c1bdacf2b9c4bbd]
+ PRIORITY=6
+ _UID=0
+ _GID=0
+ _CAP_EFFECTIVE=1ffffffffff
+ _SELINUX_CONTEXT=unconfined
+ _BOOT_ID=0dfa96ecc2094cecaa8ec0efcb93b865
+ _MACHINE_ID=355c8eca894d462bbe4c9422caf7a8bb
+ _HOSTNAME=lab-logtest-src
+ _RUNTIME_SCOPE=system
+ _TRANSPORT=journal
+ MESSAGE=GET /index.html HTTP/1.1
+ NGINX_BODY_BYTES_SENT=4172
+ NGINX_HTTP_REFERER=-
+ NGINX_HTTP_USER_AGENT=Go-http-client/1.1
+ NGINX_REMOTE_ADDR=1.2.3.4
+ NGINX_REMOTE_USER=-
+ NGINX_REQUEST_METHOD=GET
+ NGINX_REQUEST_URI=/index.html
+ NGINX_SERVER_PROTOCOL=HTTP/1.1
+ NGINX_STATUS=200
+ NGINX_TIME_LOCAL=19/Nov/2023:00:24:43 +0000
+ SYSLOG_IDENTIFIER=nginx-log
+ _PID=114343
+ _COMM=systemd-cat-nat
+ _AUDIT_SESSION=253
+ _AUDIT_LOGINUID=1000
+ _SYSTEMD_CGROUP=/user.slice/user-1000.slice/session-253.scope
+ _SYSTEMD_SESSION=253
+ _SYSTEMD_OWNER_UID=1000
+ _SYSTEMD_UNIT=session-253.scope
+ _SYSTEMD_SLICE=user-1000.slice
+ _SYSTEMD_USER_SLICE=-.slice
+ _SYSTEMD_INVOCATION_ID=c59e33ead8c24880b027e317b89f9f76
+ _SOURCE_REALTIME_TIMESTAMP=1701861787083299
+
+```
+
+So, the log line, with all its fields parsed, ended up in systemd-journal. Now we can send all the nginx logs to systemd-journal like this:
+
+```bash
+tail -F /var/log/nginx/access.log |\
+ log2journal -f nginx.yaml |\
+ systemd-cat-native
+```
+
+## Best practices
+
+**Create a systemd service unit**: Add the above commands to a systemd unit file. When you run it in a systemd unit file you will be able to start/stop it and also see its status. Furthermore you can use the `LogNamespace=` directive of systemd service units to isolate your nginx logs from the logs of the rest of the system. Here is how to do it:
+
+Create the file `/etc/systemd/system/nginx-logs.service` (change `/path/to/nginx.yaml` to the right path):
+
+```
+[Unit]
+Description=NGINX Log to Systemd Journal
+After=network.target
+
+[Service]
+ExecStart=/bin/sh -c 'tail -F /var/log/nginx/access.log | log2journal -f /path/to/nginx.yaml' | systemd-cat-native
+LogNamespace=nginx-logs
+Restart=always
+RestartSec=3
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Reload systemd to grab this file:
+
+```bash
+sudo systemctl daemon-reload
+```
+
+Enable and start the service:
+
+```bash
+sudo systemctl enable nginx-logs.service
+sudo systemctl start nginx-logs.service
+```
+
+To see the logs of the namespace, use:
+
+```bash
+journalctl -f --namespace=nginx-logs
+```
+
+Netdata will automatically pick the new namespace and present it at the list of sources of the dashboard.
+
+You can also instruct `systemd-cat-native` to log to a remote system, sending the logs to a `systemd-journal-remote` instance running on another server. Check [the manual of systemd-cat-native](https://github.com/netdata/netdata/blob/master/libnetdata/log/systemd-cat-native.md).
+
+
+## Performance
+
+`log2journal` and `systemd-cat-native` have been designed to process hundreds of thousands of log lines per second. They both utilize high performance indexing hashtables to speed up lookups, and queues that dynamically adapt to the number of log lines offered, offering a smooth and fast experience under all conditions.
+
+In our tests, the combined CPU utilization of `log2journal` and `systemd-cat-native` versus `promtail` with similar configuration is 1 to 5. So, `log2journal` and `systemd-cat-native` combined, are 5 times faster than `promtail`.
+
+### PCRE2 patterns
+
+The key characteristic that can influence the performance of a logs processing pipeline using these tools, is the quality of the PCRE2 patterns used. Poorly created PCRE2 patterns can make processing significantly slower, or CPU consuming.
+
+Especially the pattern `.*` seems to have the biggest impact on CPU consumption, especially when multiple `.*` are on the same pattern.
+
+Usually we use `.*` to indicate that we need to match everything up to a character, e.g. `.* ` to match up to a space. By replacing it with `[^ ]+` (meaning: match at least a character up to a space), the regular expression engine can be a lot more efficient, reducing the overall CPU utilization significantly.
+
+### Performance of systemd journals
+
+The ingestion pipeline of logs, from `tail` to `systemd-journald` or `systemd-journal-remote` is very efficient in all aspects. CPU utilization is better than any other system we tested and RAM usage is independent of the number of fields indexed, making systemd-journal one of the most efficient log management engines for ingesting high volumes of structured logs.
+
+High fields cardinality does not have a noticable impact on systemd-journal. The amount of fields indexed and the amount of unique values per field, have a linear and predictable result in the resource utilization of `systemd-journald` and `systemd-journal-remote`. This is unlike other logs management solutions, like Loki, that their RAM requirements grow exponentially as the cardinality increases, making it impractical for them to index the amount of information systemd journals can index.
+
+However, the number of fields added to journals influences the overall disk footprint. Less fields means more log entries per journal file, smaller overall disk footprint and faster queries.
+
+systemd-journal files are primarily designed for security and reliability. This comes at the cost of disk footprint. The internal structure of journal files is such that in case of corruption, minimum data loss will incur. To achieve such a unique characteristic, certain data within the files need to be aligned at predefined boundaries, so that in case there is a corruption, non-corrupted parts of the journal file can be recovered.
+
+Despite the fact that systemd-journald employees several techniques to optimize disk footprint, like deduplication of log entries, shared indexes for fields and their values, compression of long log entries, etc. the disk footprint of journal files is generally 10x more compared to other monitoring solutions, like Loki.
+
+This can be improved by storing journal files in a compressed filesystem. In our tests, a compressed filesystem can save up to 75% of the space required by journal files. The journal files will still be bigger than the overall disk footprint of other solutions, but the flexibility (index any number of fields), reliability (minimal potential data loss) and security (tampering protection and sealing) features of systemd-journal justify the difference.
+
+When using versions of systemd prior to 254 and you are centralizing logs to a remote system, `systemd-journal-remote` creates very small files (32MB). This results in increased duplication of information across the files, increasing the overall disk footprint. systemd versions 254+, added options to `systemd-journal-remote` to control the max size per file. This can significantly reduce the duplication of information.
+
+Another limitation of the `systemd-journald` ecosystem is the uncompressed transmission of logs across systems. `systemd-journal-remote` up to version 254 that we tested, accepts encrypted, but uncompressed data. This means that when centralizing logs to a logs server, the bandwidth required will be increased compared to other log management solution.
+
+## Security Considerations
+
+`log2journal` and `systemd-cat-native` are used to convert log files to structured logs in the systemd-journald ecosystem.
+
+Systemd-journal is a logs management solution designed primarily for security and reliability. When configured properly, it can reliably and securely store your logs, ensuring they will available and unchanged for as long as you need them.
+
+When sending logs to a remote system, `systemd-cat-native` can be configured the same way `systemd-journal-upload` is configured, using HTTPS and private keys to encrypt and secure their transmission over the network.
+
+When dealing with sensitive logs, organizations usually follow 2 strategies:
+
+1. Anonymize the logs before storing them, so that the stored logs do not have any sensitive information.
+2. Store the logs in full, including sensitive information, and carefully control who and how has access to them.
+
+Netdata can help in both cases.
+
+If you want to anonymize the logs before storing them, use rewriting rules at the `log2journal` phase to remove sensitive information from them. This process usually means matching the sensitive part and replacing with `XXX` or `CUSTOMER_ID`, or `CREDIT_CARD_NUMBER`, so that the resulting log entries stored in journal files will not include any such sensitive information.
+
+If on other hand your organization prefers to maintain the full logs and control who and how has access on them, use Netdata Cloud to assign roles to your team members and control which roles can access the journal logs in your environment.
+
+## `log2journal` options
+
+```
+
+Netdata log2journal v1.43.0-341-gdac4df856
+
+Convert logs to systemd Journal Export Format.
+
+ - JSON logs: extracts all JSON fields.
+ - logfmt logs: extracts all logfmt fields.
+ - free-form logs: uses PCRE2 patterns to extracts fields.
+
+Usage: ./log2journal [OPTIONS] PATTERN|json
+
+Options:
+
+ --file /path/to/file.yaml or -f /path/to/file.yaml
+ Read yaml configuration file for instructions.
+
+ --config CONFIG_NAME or -c CONFIG_NAME
+ Run with the internal YAML configuration named CONFIG_NAME.
+ Available internal YAML configs:
+
+ nginx-combined nginx-json default
+
+--------------------------------------------------------------------------------
+ INPUT PROCESSING
+
+ PATTERN
+ PATTERN should be a valid PCRE2 regular expression.
+ RE2 regular expressions (like the ones usually used in Go applications),
+ are usually valid PCRE2 patterns too.
+ Sub-expressions without named groups are evaluated, but their matches are
+ not added to the output.
+
+ - JSON mode
+ JSON mode is enabled when the pattern is set to: json
+ Field names are extracted from the JSON logs and are converted to the
+ format expected by Journal Export Format (all caps, only _ is allowed).
+
+ - logfmt mode
+ logfmt mode is enabled when the pattern is set to: logfmt
+ Field names are extracted from the logfmt logs and are converted to the
+ format expected by Journal Export Format (all caps, only _ is allowed).
+
+ All keys extracted from the input, are transliterated to match Journal
+ semantics (capital A-Z, digits 0-9, underscore).
+
+ In a YAML file:
+ ```yaml
+ pattern: 'PCRE2 pattern | json | logfmt'
+ ```
+
+--------------------------------------------------------------------------------
+ GLOBALS
+
+ --prefix PREFIX
+ Prefix all fields with PREFIX. The PREFIX is added before any other
+ processing, so that the extracted keys have to be matched with the PREFIX in
+ them. PREFIX is NOT transliterated and it is assumed to be systemd-journal
+ friendly.
+
+ In a YAML file:
+ ```yaml
+ prefix: 'PREFIX_' # prepend all keys with this prefix.
+ ```
+
+ --filename-key KEY
+ Add a field with KEY as the key and the current filename as value.
+ Automatically detects filenames when piped after 'tail -F',
+ and tail matches multiple filenames.
+ To inject the filename when tailing a single file, use --inject.
+
+ In a YAML file:
+ ```yaml
+ filename:
+ key: KEY
+ ```
+
+--------------------------------------------------------------------------------
+ RENAMING OF KEYS
+
+ --rename NEW=OLD
+ Rename fields. OLD has been transliterated and PREFIX has been added.
+ NEW is assumed to be systemd journal friendly.
+
+ Up to 512 renaming rules are allowed.
+
+ In a YAML file:
+ ```yaml
+ rename:
+ - new_key: KEY1
+ old_key: KEY2 # transliterated with PREFIX added
+ - new_key: KEY3
+ old_key: KEY4 # transliterated with PREFIX added
+ # add as many as required
+ ```
+
+--------------------------------------------------------------------------------
+ INJECTING NEW KEYS
+
+ --inject KEY=VALUE
+ Inject constant fields to the output (both matched and unmatched logs).
+ --inject entries are added to unmatched lines too, when their key is
+ not used in --inject-unmatched (--inject-unmatched override --inject).
+ VALUE can use variable like ${OTHER_KEY} to be replaced with the values
+ of other keys available.
+
+ Up to 512 fields can be injected.
+
+ In a YAML file:
+ ```yaml
+ inject:
+ - key: KEY1
+ value: 'VALUE1'
+ - key: KEY2
+ value: '${KEY3}${KEY4}' # gets the values of KEY3 and KEY4
+ # add as many as required
+ ```
+
+--------------------------------------------------------------------------------
+ REWRITING KEY VALUES
+
+ --rewrite KEY=/MATCH/REPLACE[/OPTIONS]
+ Apply a rewrite rule to the values of a specific key.
+ The first character after KEY= is the separator, which should also
+ be used between the MATCH, REPLACE and OPTIONS.
+
+ OPTIONS can be a comma separated list of `non-empty`, `dont-stop` and
+ `inject`.
+
+ When `non-empty` is given, MATCH is expected to be a variable
+ substitution using `${KEY1}${KEY2}`. Once the substitution is completed
+ the rule is matching the KEY only if the result is not empty.
+ When `non-empty` is not set, the MATCH string is expected to be a PCRE2
+ regular expression to be checked against the KEY value. This PCRE2
+ pattern may include named groups to extract parts of the KEY's value.
+
+ REPLACE supports variable substitution like `${variable}` against MATCH
+ named groups (when MATCH is a PCRE2 pattern) and `${KEY}` against the
+ keys defined so far.
+
+ Example:
+ --rewrite DATE=/^(?\d{4})-(?\d{2})-(?\d{2})$/
+ ${day}/${month}/${year}
+ The above will rewrite dates in the format YYYY-MM-DD to DD/MM/YYYY.
+
+ Only one rewrite rule is applied per key; the sequence of rewrites for a
+ given key, stops once a rule matches it. This allows providing a sequence
+ of independent rewriting rules for the same key, matching the different
+ values the key may get, and also provide a catch-all rewrite rule at the
+ end, for setting the key value if no other rule matched it. The rewrite
+ rule can allow processing more rewrite rules when OPTIONS includes
+ the keyword 'dont-stop'.
+
+ Up to 512 rewriting rules are allowed.
+
+ In a YAML file:
+ ```yaml
+ rewrite:
+ # the order if these rules in important - processed top to bottom
+ - key: KEY1
+ match: 'PCRE2 PATTERN WITH NAMED GROUPS'
+ value: 'all match fields and input keys as ${VARIABLE}'
+ inject: BOOLEAN # yes = inject the field, don't just rewrite it
+ stop: BOOLEAN # no = continue processing, don't stop if matched
+ - key: KEY2
+ non_empty: '${KEY3}${KEY4}' # match only if this evaluates to non empty
+ value: 'all input keys as ${VARIABLE}'
+ inject: BOOLEAN # yes = inject the field, don't just rewrite it
+ stop: BOOLEAN # no = continue processing, don't stop if matched
+ # add as many rewrites as required
+ ```
+
+ By default rewrite rules are applied only on fields already defined.
+ This allows shipping YAML files that include more rewrites than are
+ required for a specific input file.
+ Rewrite rules however allow injecting new fields when OPTIONS include
+ the keyword `inject` or in YAML `inject: yes` is given.
+
+ MATCH on the command line can be empty to define an unconditional rule.
+ Similarly, `match` and `non_empty` can be omitted in the YAML file.
+--------------------------------------------------------------------------------
+ UNMATCHED LINES
+
+ --unmatched-key KEY
+ Include unmatched log entries in the output with KEY as the field name.
+ Use this to include unmatched entries to the output stream.
+ Usually it should be set to --unmatched-key=MESSAGE so that the
+ unmatched entry will appear as the log message in the journals.
+ Use --inject-unmatched to inject additional fields to unmatched lines.
+
+ In a YAML file:
+ ```yaml
+ unmatched:
+ key: MESSAGE # inject the error log as MESSAGE
+ ```
+
+ --inject-unmatched LINE
+ Inject lines into the output for each unmatched log entry.
+ Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched
+ lines as errors, so that they can easily be spotted in the journals.
+
+ Up to 512 such lines can be injected.
+
+ In a YAML file:
+ ```yaml
+ unmatched:
+ key: MESSAGE # inject the error log as MESSAGE
+ inject::
+ - key: KEY1
+ value: 'VALUE1'
+ # add as many constants as required
+ ```
+
+--------------------------------------------------------------------------------
+ FILTERING
+
+ --include PATTERN
+ Include only keys matching the PCRE2 PATTERN.
+ Useful when parsing JSON of logfmt logs, to include only the keys given.
+ The keys are matched after the PREFIX has been added to them.
+
+ --exclude PATTERN
+ Exclude the keys matching the PCRE2 PATTERN.
+ Useful when parsing JSON of logfmt logs, to exclude some of the keys given.
+ The keys are matched after the PREFIX has been added to them.
+
+ When both include and exclude patterns are set and both match a key,
+ exclude wins and the key will not be added, like a pipeline, we first
+ include it and then exclude it.
+
+ In a YAML file:
+ ```yaml
+ filter:
+ include: 'PCRE2 PATTERN MATCHING KEY NAMES TO INCLUDE'
+ exclude: 'PCRE2 PATTERN MATCHING KEY NAMES TO EXCLUDE'
+ ```
+
+--------------------------------------------------------------------------------
+ OTHER
+
+ -h, or --help
+ Display this help and exit.
+
+ --show-config
+ Show the configuration in YAML format before starting the job.
+ This is also an easy way to convert command line parameters to yaml.
+
+The program accepts all parameters as both --option=value and --option value.
+
+The maximum log line length accepted is 1048576 characters.
+
+PIPELINE AND SEQUENCE OF PROCESSING
+
+This is a simple diagram of the pipeline taking place:
+
+ +---------------------------------------------------+
+ | INPUT |
+ | read one log line at a time |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | EXTRACT FIELDS AND VALUES |
+ | JSON, logfmt, or pattern based |
+ | (apply optional PREFIX - all keys use capitals) |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | RENAME FIELDS |
+ | change the names of the fields |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | INJECT NEW FIELDS |
+ | constants, or other field values as variables |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | REWRITE FIELD VALUES |
+ | pipeline multiple rewriting rules to alter |
+ | the values of the fields |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | FILTER FIELDS |
+ | use include and exclude patterns on the field |
+ | names, to select which fields are sent to journal |
+ +---------------------------------------------------+
+ v v v v v v
+ +---------------------------------------------------+
+ | OUTPUT |
+ | generate Journal Export Format |
+ +---------------------------------------------------+
+
+--------------------------------------------------------------------------------
+JOURNAL FIELDS RULES (enforced by systemd-journald)
+
+ - field names can be up to 64 characters
+ - the only allowed field characters are A-Z, 0-9 and underscore
+ - the first character of fields cannot be a digit
+ - protected journal fields start with underscore:
+ * they are accepted by systemd-journal-remote
+ * they are NOT accepted by a local systemd-journald
+
+ For best results, always include these fields:
+
+ MESSAGE=TEXT
+ The MESSAGE is the body of the log entry.
+ This field is what we usually see in our logs.
+
+ PRIORITY=NUMBER
+ PRIORITY sets the severity of the log entry.
+ 0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug
+ - Emergency events (0) are usually broadcast to all terminals.
+ - Emergency, alert, critical, and error (0-3) are usually colored red.
+ - Warning (4) entries are usually colored yellow.
+ - Notice (5) entries are usually bold or have a brighter white color.
+ - Info (6) entries are the default.
+ - Debug (7) entries are usually grayed or dimmed.
+
+ SYSLOG_IDENTIFIER=NAME
+ SYSLOG_IDENTIFIER sets the name of application.
+ Use something descriptive, like: SYSLOG_IDENTIFIER=nginx-logs
+
+You can find the most common fields at 'man systemd.journal-fields'.
+
+```
+
+`log2journal` supports YAML configuration files, like the ones found [in this directory](https://github.com/netdata/netdata/tree/master/collectors/log2journal/log2journal.d).
+
+## `systemd-cat-native` options
+
+Read [the manual of systemd-cat-native](https://github.com/netdata/netdata/blob/master/libnetdata/log/systemd-cat-native.md).
diff --git a/collectors/log2journal/log2journal-help.c b/collectors/log2journal/log2journal-help.c
new file mode 100644
index 00000000000000..21be948e8a974b
--- /dev/null
+++ b/collectors/log2journal/log2journal-help.c
@@ -0,0 +1,377 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+static void config_dir_print_available(void) {
+ const char *path = LOG2JOURNAL_CONFIG_PATH;
+ DIR *dir;
+ struct dirent *entry;
+
+ dir = opendir(path);
+
+ if (dir == NULL) {
+ log2stderr(" >>> Cannot open directory:\n %s", path);
+ return;
+ }
+
+ size_t column_width = 80;
+ size_t current_columns = 7; // Start with 7 spaces for the first line
+
+ while ((entry = readdir(dir))) {
+ if (entry->d_type == DT_REG) { // Check if it's a regular file
+ const char *file_name = entry->d_name;
+ size_t len = strlen(file_name);
+ if (len >= 5 && strcmp(file_name + len - 5, ".yaml") == 0) {
+ // Remove the ".yaml" extension
+ len -= 5;
+ if (current_columns == 7) {
+ printf(" "); // Print 7 spaces at the beginning of a new line
+ }
+ if (current_columns + len + 1 > column_width) {
+ // Start a new line if the current line is full
+ printf("\n "); // Print newline and 7 spaces
+ current_columns = 7;
+ }
+ printf("%.*s ", (int)len, file_name); // Print the filename without extension
+ current_columns += len + 1; // Add filename length and a space
+ }
+ }
+ }
+
+ closedir(dir);
+ printf("\n"); // Add a newline at the end
+}
+
+void log_job_command_line_help(const char *name) {
+ printf("\n");
+ printf("Netdata log2journal " PACKAGE_VERSION "\n");
+ printf("\n");
+ printf("Convert logs to systemd Journal Export Format.\n");
+ printf("\n");
+ printf(" - JSON logs: extracts all JSON fields.\n");
+ printf(" - logfmt logs: extracts all logfmt fields.\n");
+ printf(" - free-form logs: uses PCRE2 patterns to extracts fields.\n");
+ printf("\n");
+ printf("Usage: %s [OPTIONS] PATTERN|json\n", name);
+ printf("\n");
+ printf("Options:\n");
+ printf("\n");
+#ifdef HAVE_LIBYAML
+ printf(" --file /path/to/file.yaml or -f /path/to/file.yaml\n");
+ printf(" Read yaml configuration file for instructions.\n");
+ printf("\n");
+ printf(" --config CONFIG_NAME or -c CONFIG_NAME\n");
+ printf(" Run with the internal YAML configuration named CONFIG_NAME.\n");
+ printf(" Available internal YAML configs:\n");
+ printf("\n");
+ config_dir_print_available();
+ printf("\n");
+#else
+ printf(" IMPORTANT:\n");
+ printf(" YAML configuration parsing is not compiled in this binary.\n");
+ printf("\n");
+#endif
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" INPUT PROCESSING\n");
+ printf("\n");
+ printf(" PATTERN\n");
+ printf(" PATTERN should be a valid PCRE2 regular expression.\n");
+ printf(" RE2 regular expressions (like the ones usually used in Go applications),\n");
+ printf(" are usually valid PCRE2 patterns too.\n");
+ printf(" Sub-expressions without named groups are evaluated, but their matches are\n");
+ printf(" not added to the output.\n");
+ printf("\n");
+ printf(" - JSON mode\n");
+ printf(" JSON mode is enabled when the pattern is set to: json\n");
+ printf(" Field names are extracted from the JSON logs and are converted to the\n");
+ printf(" format expected by Journal Export Format (all caps, only _ is allowed).\n");
+ printf("\n");
+ printf(" - logfmt mode\n");
+ printf(" logfmt mode is enabled when the pattern is set to: logfmt\n");
+ printf(" Field names are extracted from the logfmt logs and are converted to the\n");
+ printf(" format expected by Journal Export Format (all caps, only _ is allowed).\n");
+ printf("\n");
+ printf(" All keys extracted from the input, are transliterated to match Journal\n");
+ printf(" semantics (capital A-Z, digits 0-9, underscore).\n");
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" pattern: 'PCRE2 pattern | json | logfmt'\n");
+ printf(" ```\n");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" GLOBALS\n");
+ printf("\n");
+ printf(" --prefix PREFIX\n");
+ printf(" Prefix all fields with PREFIX. The PREFIX is added before any other\n");
+ printf(" processing, so that the extracted keys have to be matched with the PREFIX in\n");
+ printf(" them. PREFIX is NOT transliterated and it is assumed to be systemd-journal\n");
+ printf(" friendly.\n");
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" prefix: 'PREFIX_' # prepend all keys with this prefix.\n");
+ printf(" ```\n");
+ printf("\n");
+ printf(" --filename-key KEY\n");
+ printf(" Add a field with KEY as the key and the current filename as value.\n");
+ printf(" Automatically detects filenames when piped after 'tail -F',\n");
+ printf(" and tail matches multiple filenames.\n");
+ printf(" To inject the filename when tailing a single file, use --inject.\n");
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" filename:\n");
+ printf(" key: KEY\n");
+ printf(" ```\n");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" RENAMING OF KEYS\n");
+ printf("\n");
+ printf(" --rename NEW=OLD\n");
+ printf(" Rename fields. OLD has been transliterated and PREFIX has been added.\n");
+ printf(" NEW is assumed to be systemd journal friendly.\n");
+ printf("\n");
+ printf(" Up to %d renaming rules are allowed.\n", MAX_RENAMES);
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" rename:\n");
+ printf(" - new_key: KEY1\n");
+ printf(" old_key: KEY2 # transliterated with PREFIX added\n");
+ printf(" - new_key: KEY3\n");
+ printf(" old_key: KEY4 # transliterated with PREFIX added\n");
+ printf(" # add as many as required\n");
+ printf(" ```\n");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" INJECTING NEW KEYS\n");
+ printf("\n");
+ printf(" --inject KEY=VALUE\n");
+ printf(" Inject constant fields to the output (both matched and unmatched logs).\n");
+ printf(" --inject entries are added to unmatched lines too, when their key is\n");
+ printf(" not used in --inject-unmatched (--inject-unmatched override --inject).\n");
+ printf(" VALUE can use variable like ${OTHER_KEY} to be replaced with the values\n");
+ printf(" of other keys available.\n");
+ printf("\n");
+ printf(" Up to %d fields can be injected.\n", MAX_INJECTIONS);
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" inject:\n");
+ printf(" - key: KEY1\n");
+ printf(" value: 'VALUE1'\n");
+ printf(" - key: KEY2\n");
+ printf(" value: '${KEY3}${KEY4}' # gets the values of KEY3 and KEY4\n");
+ printf(" # add as many as required\n");
+ printf(" ```\n");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" REWRITING KEY VALUES\n");
+ printf("\n");
+ printf(" --rewrite KEY=/MATCH/REPLACE[/OPTIONS]\n");
+ printf(" Apply a rewrite rule to the values of a specific key.\n");
+ printf(" The first character after KEY= is the separator, which should also\n");
+ printf(" be used between the MATCH, REPLACE and OPTIONS.\n");
+ printf("\n");
+ printf(" OPTIONS can be a comma separated list of `non-empty`, `dont-stop` and\n");
+ printf(" `inject`.\n");
+ printf("\n");
+ printf(" When `non-empty` is given, MATCH is expected to be a variable\n");
+ printf(" substitution using `${KEY1}${KEY2}`. Once the substitution is completed\n");
+ printf(" the rule is matching the KEY only if the result is not empty.\n");
+ printf(" When `non-empty` is not set, the MATCH string is expected to be a PCRE2\n");
+ printf(" regular expression to be checked against the KEY value. This PCRE2\n");
+ printf(" pattern may include named groups to extract parts of the KEY's value.\n");
+ printf("\n");
+ printf(" REPLACE supports variable substitution like `${variable}` against MATCH\n");
+ printf(" named groups (when MATCH is a PCRE2 pattern) and `${KEY}` against the\n");
+ printf(" keys defined so far.\n");
+ printf("\n");
+ printf(" Example:\n");
+ printf(" --rewrite DATE=/^(?\\d{4})-(?\\d{2})-(?\\d{2})$/\n");
+ printf(" ${day}/${month}/${year}\n");
+ printf(" The above will rewrite dates in the format YYYY-MM-DD to DD/MM/YYYY.\n");
+ printf("\n");
+ printf(" Only one rewrite rule is applied per key; the sequence of rewrites for a\n");
+ printf(" given key, stops once a rule matches it. This allows providing a sequence\n");
+ printf(" of independent rewriting rules for the same key, matching the different\n");
+ printf(" values the key may get, and also provide a catch-all rewrite rule at the\n");
+ printf(" end, for setting the key value if no other rule matched it. The rewrite\n");
+ printf(" rule can allow processing more rewrite rules when OPTIONS includes\n");
+ printf(" the keyword 'dont-stop'.\n");
+ printf("\n");
+ printf(" Up to %d rewriting rules are allowed.\n", MAX_REWRITES);
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" rewrite:\n");
+ printf(" # the order if these rules in important - processed top to bottom\n");
+ printf(" - key: KEY1\n");
+ printf(" match: 'PCRE2 PATTERN WITH NAMED GROUPS'\n");
+ printf(" value: 'all match fields and input keys as ${VARIABLE}'\n");
+ printf(" inject: BOOLEAN # yes = inject the field, don't just rewrite it\n");
+ printf(" stop: BOOLEAN # no = continue processing, don't stop if matched\n");
+ printf(" - key: KEY2\n");
+ printf(" non_empty: '${KEY3}${KEY4}' # match only if this evaluates to non empty\n");
+ printf(" value: 'all input keys as ${VARIABLE}'\n");
+ printf(" inject: BOOLEAN # yes = inject the field, don't just rewrite it\n");
+ printf(" stop: BOOLEAN # no = continue processing, don't stop if matched\n");
+ printf(" # add as many rewrites as required\n");
+ printf(" ```\n");
+ printf("\n");
+ printf(" By default rewrite rules are applied only on fields already defined.\n");
+ printf(" This allows shipping YAML files that include more rewrites than are\n");
+ printf(" required for a specific input file.\n");
+ printf(" Rewrite rules however allow injecting new fields when OPTIONS include\n");
+ printf(" the keyword `inject` or in YAML `inject: yes` is given.\n");
+ printf("\n");
+ printf(" MATCH on the command line can be empty to define an unconditional rule.\n");
+ printf(" Similarly, `match` and `non_empty` can be omitted in the YAML file.");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" UNMATCHED LINES\n");
+ printf("\n");
+ printf(" --unmatched-key KEY\n");
+ printf(" Include unmatched log entries in the output with KEY as the field name.\n");
+ printf(" Use this to include unmatched entries to the output stream.\n");
+ printf(" Usually it should be set to --unmatched-key=MESSAGE so that the\n");
+ printf(" unmatched entry will appear as the log message in the journals.\n");
+ printf(" Use --inject-unmatched to inject additional fields to unmatched lines.\n");
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" unmatched:\n");
+ printf(" key: MESSAGE # inject the error log as MESSAGE\n");
+ printf(" ```\n");
+ printf("\n");
+ printf(" --inject-unmatched LINE\n");
+ printf(" Inject lines into the output for each unmatched log entry.\n");
+ printf(" Usually, --inject-unmatched=PRIORITY=3 is needed to mark the unmatched\n");
+ printf(" lines as errors, so that they can easily be spotted in the journals.\n");
+ printf("\n");
+ printf(" Up to %d such lines can be injected.\n", MAX_INJECTIONS);
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" unmatched:\n");
+ printf(" key: MESSAGE # inject the error log as MESSAGE\n");
+ printf(" inject::\n");
+ printf(" - key: KEY1\n");
+ printf(" value: 'VALUE1'\n");
+ printf(" # add as many constants as required\n");
+ printf(" ```\n");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" FILTERING\n");
+ printf("\n");
+ printf(" --include PATTERN\n");
+ printf(" Include only keys matching the PCRE2 PATTERN.\n");
+ printf(" Useful when parsing JSON of logfmt logs, to include only the keys given.\n");
+ printf(" The keys are matched after the PREFIX has been added to them.\n");
+ printf("\n");
+ printf(" --exclude PATTERN\n");
+ printf(" Exclude the keys matching the PCRE2 PATTERN.\n");
+ printf(" Useful when parsing JSON of logfmt logs, to exclude some of the keys given.\n");
+ printf(" The keys are matched after the PREFIX has been added to them.\n");
+ printf("\n");
+ printf(" When both include and exclude patterns are set and both match a key,\n");
+ printf(" exclude wins and the key will not be added, like a pipeline, we first\n");
+ printf(" include it and then exclude it.\n");
+ printf("\n");
+ printf(" In a YAML file:\n");
+ printf(" ```yaml\n");
+ printf(" filter:\n");
+ printf(" include: 'PCRE2 PATTERN MATCHING KEY NAMES TO INCLUDE'\n");
+ printf(" exclude: 'PCRE2 PATTERN MATCHING KEY NAMES TO EXCLUDE'\n");
+ printf(" ```\n");
+ printf("\n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf(" OTHER\n");
+ printf("\n");
+ printf(" -h, or --help\n");
+ printf(" Display this help and exit.\n");
+ printf("\n");
+ printf(" --show-config\n");
+ printf(" Show the configuration in YAML format before starting the job.\n");
+ printf(" This is also an easy way to convert command line parameters to yaml.\n");
+ printf("\n");
+ printf("The program accepts all parameters as both --option=value and --option value.\n");
+ printf("\n");
+ printf("The maximum log line length accepted is %d characters.\n", MAX_LINE_LENGTH);
+ printf("\n");
+ printf("PIPELINE AND SEQUENCE OF PROCESSING\n");
+ printf("\n");
+ printf("This is a simple diagram of the pipeline taking place:\n");
+ printf(" \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | INPUT | \n");
+ printf(" | read one log line at a time | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | EXTRACT FIELDS AND VALUES | \n");
+ printf(" | JSON, logfmt, or pattern based | \n");
+ printf(" | (apply optional PREFIX - all keys use capitals) | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | RENAME FIELDS | \n");
+ printf(" | change the names of the fields | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | INJECT NEW FIELDS | \n");
+ printf(" | constants, or other field values as variables | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | REWRITE FIELD VALUES | \n");
+ printf(" | pipeline multiple rewriting rules to alter | \n");
+ printf(" | the values of the fields | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | FILTER FIELDS | \n");
+ printf(" | use include and exclude patterns on the field | \n");
+ printf(" | names, to select which fields are sent to journal | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" v v v v v v \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" | OUTPUT | \n");
+ printf(" | generate Journal Export Format | \n");
+ printf(" +---------------------------------------------------+ \n");
+ printf(" \n");
+ printf("--------------------------------------------------------------------------------\n");
+ printf("JOURNAL FIELDS RULES (enforced by systemd-journald)\n");
+ printf("\n");
+ printf(" - field names can be up to 64 characters\n");
+ printf(" - the only allowed field characters are A-Z, 0-9 and underscore\n");
+ printf(" - the first character of fields cannot be a digit\n");
+ printf(" - protected journal fields start with underscore:\n");
+ printf(" * they are accepted by systemd-journal-remote\n");
+ printf(" * they are NOT accepted by a local systemd-journald\n");
+ printf("\n");
+ printf(" For best results, always include these fields:\n");
+ printf("\n");
+ printf(" MESSAGE=TEXT\n");
+ printf(" The MESSAGE is the body of the log entry.\n");
+ printf(" This field is what we usually see in our logs.\n");
+ printf("\n");
+ printf(" PRIORITY=NUMBER\n");
+ printf(" PRIORITY sets the severity of the log entry.\n");
+ printf(" 0=emerg, 1=alert, 2=crit, 3=err, 4=warn, 5=notice, 6=info, 7=debug\n");
+ printf(" - Emergency events (0) are usually broadcast to all terminals.\n");
+ printf(" - Emergency, alert, critical, and error (0-3) are usually colored red.\n");
+ printf(" - Warning (4) entries are usually colored yellow.\n");
+ printf(" - Notice (5) entries are usually bold or have a brighter white color.\n");
+ printf(" - Info (6) entries are the default.\n");
+ printf(" - Debug (7) entries are usually grayed or dimmed.\n");
+ printf("\n");
+ printf(" SYSLOG_IDENTIFIER=NAME\n");
+ printf(" SYSLOG_IDENTIFIER sets the name of application.\n");
+ printf(" Use something descriptive, like: SYSLOG_IDENTIFIER=nginx-logs\n");
+ printf("\n");
+ printf("You can find the most common fields at 'man systemd.journal-fields'.\n");
+ printf("\n");
+}
diff --git a/collectors/log2journal/log2journal-inject.c b/collectors/log2journal/log2journal-inject.c
new file mode 100644
index 00000000000000..45158066bf23e0
--- /dev/null
+++ b/collectors/log2journal/log2journal-inject.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+void injection_cleanup(INJECTION *inj) {
+ hashed_key_cleanup(&inj->key);
+ replace_pattern_cleanup(&inj->value);
+}
+
+static inline bool log_job_injection_replace(INJECTION *inj, const char *key, size_t key_len, const char *value, size_t value_len) {
+ if(key_len > JOURNAL_MAX_KEY_LEN)
+ log2stderr("WARNING: injection key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key);
+
+ if(value_len > JOURNAL_MAX_VALUE_LEN)
+ log2stderr("WARNING: injection value of key '%.*s' is too long for journal. Will be truncated.", (int)key_len, key);
+
+ hashed_key_len_set(&inj->key, key, key_len);
+ char *v = strndupz(value, value_len);
+ bool ret = replace_pattern_set(&inj->value, v);
+ freez(v);
+
+ return ret;
+}
+
+bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched) {
+ if (unmatched) {
+ if (jb->unmatched.injections.used >= MAX_INJECTIONS) {
+ log2stderr("Error: too many unmatched injections. You can inject up to %d lines.", MAX_INJECTIONS);
+ return false;
+ }
+ }
+ else {
+ if (jb->injections.used >= MAX_INJECTIONS) {
+ log2stderr("Error: too many injections. You can inject up to %d lines.", MAX_INJECTIONS);
+ return false;
+ }
+ }
+
+ bool ret;
+ if (unmatched) {
+ ret = log_job_injection_replace(&jb->unmatched.injections.keys[jb->unmatched.injections.used++],
+ key, key_len, value, value_len);
+ } else {
+ ret = log_job_injection_replace(&jb->injections.keys[jb->injections.used++],
+ key, key_len, value, value_len);
+ }
+
+ return ret;
+}
diff --git a/collectors/log2journal/log2journal-json.c b/collectors/log2journal/log2journal-json.c
new file mode 100644
index 00000000000000..2ca294e4db3280
--- /dev/null
+++ b/collectors/log2journal/log2journal-json.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+#define JSON_ERROR_LINE_MAX 1024
+#define JSON_KEY_MAX 1024
+#define JSON_DEPTH_MAX 100
+
+struct log_json_state {
+ LOG_JOB *jb;
+
+ const char *line;
+ uint32_t pos;
+ uint32_t depth;
+ char *stack[JSON_DEPTH_MAX];
+
+ char key[JSON_KEY_MAX];
+ char msg[JSON_ERROR_LINE_MAX];
+};
+
+static inline bool json_parse_object(LOG_JSON_STATE *js);
+static inline bool json_parse_array(LOG_JSON_STATE *js);
+
+#define json_current_pos(js) &(js)->line[(js)->pos]
+#define json_consume_char(js) ++(js)->pos
+
+static inline void json_process_key_value(LOG_JSON_STATE *js, const char *value, size_t len) {
+ log_job_send_extracted_key_value(js->jb, js->key, value, len);
+}
+
+static inline void json_skip_spaces(LOG_JSON_STATE *js) {
+ const char *s = json_current_pos(js);
+ const char *start = s;
+
+ while(isspace(*s)) s++;
+
+ js->pos += s - start;
+}
+
+static inline bool json_expect_char_after_white_space(LOG_JSON_STATE *js, const char *expected) {
+ json_skip_spaces(js);
+
+ const char *s = json_current_pos(js);
+ for(const char *e = expected; *e ;e++) {
+ if (*s == *e)
+ return true;
+ }
+
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: character '%c' is not one of the expected characters (%s), at pos %zu",
+ *s ? *s : '?', expected, js->pos);
+
+ return false;
+}
+
+static inline bool json_parse_null(LOG_JSON_STATE *js) {
+ const char *s = json_current_pos(js);
+ if (strncmp(s, "null", 4) == 0) {
+ json_process_key_value(js, "null", 4);
+ js->pos += 4;
+ return true;
+ }
+ else {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: expected 'null', found '%.4s' at position %zu", s, js->pos);
+ return false;
+ }
+}
+
+static inline bool json_parse_true(LOG_JSON_STATE *js) {
+ const char *s = json_current_pos(js);
+ if (strncmp(s, "true", 4) == 0) {
+ json_process_key_value(js, "true", 4);
+ js->pos += 4;
+ return true;
+ }
+ else {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: expected 'true', found '%.4s' at position %zu", s, js->pos);
+ return false;
+ }
+}
+
+static inline bool json_parse_false(LOG_JSON_STATE *js) {
+ const char *s = json_current_pos(js);
+ if (strncmp(s, "false", 5) == 0) {
+ json_process_key_value(js, "false", 5);
+ js->pos += 5;
+ return true;
+ }
+ else {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: expected 'false', found '%.4s' at position %zu", s, js->pos);
+ return false;
+ }
+}
+
+static inline bool json_parse_number(LOG_JSON_STATE *js) {
+ static __thread char value[8192];
+
+ value[0] = '\0';
+ char *d = value;
+ const char *s = json_current_pos(js);
+ size_t remaining = sizeof(value) - 1; // Reserve space for null terminator
+
+ // Optional minus sign
+ if (*s == '-') {
+ *d++ = *s++;
+ remaining--;
+ }
+
+ // Digits before decimal point
+ while (*s >= '0' && *s <= '9') {
+ if (remaining < 2) {
+ snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated number value at pos %zu", js->pos);
+ return false;
+ }
+ *d++ = *s++;
+ remaining--;
+ }
+
+ // Decimal point and fractional part
+ if (*s == '.') {
+ *d++ = *s++;
+ remaining--;
+
+ while (*s >= '0' && *s <= '9') {
+ if (remaining < 2) {
+ snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated fractional part at pos %zu", js->pos);
+ return false;
+ }
+ *d++ = *s++;
+ remaining--;
+ }
+ }
+
+ // Exponent part
+ if (*s == 'e' || *s == 'E') {
+ *d++ = *s++;
+ remaining--;
+
+ // Optional sign in exponent
+ if (*s == '+' || *s == '-') {
+ *d++ = *s++;
+ remaining--;
+ }
+
+ while (*s >= '0' && *s <= '9') {
+ if (remaining < 2) {
+ snprintf(js->msg, sizeof(js->msg), "JSON PARSER: truncated exponent at pos %zu", js->pos);
+ return false;
+ }
+ *d++ = *s++;
+ remaining--;
+ }
+ }
+
+ *d = '\0';
+ js->pos += d - value;
+
+ if (d > value) {
+ json_process_key_value(js, value, d - value);
+ return true;
+ } else {
+ snprintf(js->msg, sizeof(js->msg), "JSON PARSER: invalid number format at pos %zu", js->pos);
+ return false;
+ }
+}
+
+static inline bool encode_utf8(unsigned codepoint, char **d, size_t *remaining) {
+ if (codepoint <= 0x7F) {
+ // 1-byte sequence
+ if (*remaining < 2) return false; // +1 for the null
+ *(*d)++ = (char)codepoint;
+ (*remaining)--;
+ }
+ else if (codepoint <= 0x7FF) {
+ // 2-byte sequence
+ if (*remaining < 3) return false; // +1 for the null
+ *(*d)++ = (char)(0xC0 | ((codepoint >> 6) & 0x1F));
+ *(*d)++ = (char)(0x80 | (codepoint & 0x3F));
+ (*remaining) -= 2;
+ }
+ else if (codepoint <= 0xFFFF) {
+ // 3-byte sequence
+ if (*remaining < 4) return false; // +1 for the null
+ *(*d)++ = (char)(0xE0 | ((codepoint >> 12) & 0x0F));
+ *(*d)++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));
+ *(*d)++ = (char)(0x80 | (codepoint & 0x3F));
+ (*remaining) -= 3;
+ }
+ else if (codepoint <= 0x10FFFF) {
+ // 4-byte sequence
+ if (*remaining < 5) return false; // +1 for the null
+ *(*d)++ = (char)(0xF0 | ((codepoint >> 18) & 0x07));
+ *(*d)++ = (char)(0x80 | ((codepoint >> 12) & 0x3F));
+ *(*d)++ = (char)(0x80 | ((codepoint >> 6) & 0x3F));
+ *(*d)++ = (char)(0x80 | (codepoint & 0x3F));
+ (*remaining) -= 4;
+ }
+ else
+ // Invalid code point
+ return false;
+
+ return true;
+}
+
+size_t parse_surrogate(const char *s, char *d, size_t *remaining) {
+ if (s[0] != '\\' || (s[1] != 'u' && s[1] != 'U')) {
+ return 0; // Not a valid Unicode escape sequence
+ }
+
+ char hex[9] = {0}; // Buffer for the hexadecimal value
+ unsigned codepoint;
+
+ if (s[1] == 'u') {
+ // Handle \uXXXX
+ if (!isxdigit(s[2]) || !isxdigit(s[3]) || !isxdigit(s[4]) || !isxdigit(s[5])) {
+ return 0; // Not a valid \uXXXX sequence
+ }
+
+ hex[0] = s[2];
+ hex[1] = s[3];
+ hex[2] = s[4];
+ hex[3] = s[5];
+ codepoint = (unsigned)strtoul(hex, NULL, 16);
+
+ if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
+ // Possible start of surrogate pair
+ if (s[6] == '\\' && s[7] == 'u' && isxdigit(s[8]) && isxdigit(s[9]) &&
+ isxdigit(s[10]) && isxdigit(s[11])) {
+ // Valid low surrogate
+ unsigned low_surrogate = strtoul(&s[8], NULL, 16);
+ if (low_surrogate < 0xDC00 || low_surrogate > 0xDFFF) {
+ return 0; // Invalid low surrogate
+ }
+ codepoint = 0x10000 + ((codepoint - 0xD800) << 10) + (low_surrogate - 0xDC00);
+ return encode_utf8(codepoint, &d, remaining) ? 12 : 0; // \uXXXX\uXXXX
+ }
+ }
+
+ // Single \uXXXX
+ return encode_utf8(codepoint, &d, remaining) ? 6 : 0;
+ }
+ else {
+ // Handle \UXXXXXXXX
+ for (int i = 2; i < 10; i++) {
+ if (!isxdigit(s[i])) {
+ return 0; // Not a valid \UXXXXXXXX sequence
+ }
+ hex[i - 2] = s[i];
+ }
+ codepoint = (unsigned)strtoul(hex, NULL, 16);
+ return encode_utf8(codepoint, &d, remaining) ? 10 : 0; // \UXXXXXXXX
+ }
+}
+
+static inline void copy_newline(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) {
+ if(*remaining > 3) {
+ *(*d)++ = '\\';
+ *(*d)++ = 'n';
+ (*remaining) -= 2;
+ }
+}
+
+static inline void copy_tab(LOG_JSON_STATE *js __maybe_unused, char **d, size_t *remaining) {
+ if(*remaining > 3) {
+ *(*d)++ = '\\';
+ *(*d)++ = 't';
+ (*remaining) -= 2;
+ }
+}
+
+static inline bool json_parse_string(LOG_JSON_STATE *js) {
+ static __thread char value[JOURNAL_MAX_VALUE_LEN];
+
+ if(!json_expect_char_after_white_space(js, "\""))
+ return false;
+
+ json_consume_char(js);
+
+ value[0] = '\0';
+ char *d = value;
+ const char *s = json_current_pos(js);
+ size_t remaining = sizeof(value);
+
+ while (*s && *s != '"') {
+ char c;
+
+ if (*s == '\\') {
+ s++;
+
+ switch (*s) {
+ case 'n':
+ copy_newline(js, &d, &remaining);
+ s++;
+ continue;
+
+ case 't':
+ copy_tab(js, &d, &remaining);
+ s++;
+ continue;
+
+ case 'f':
+ case 'b':
+ case 'r':
+ c = ' ';
+ s++;
+ break;
+
+ case 'u': {
+ size_t old_remaining = remaining;
+ size_t consumed = parse_surrogate(s - 1, d, &remaining);
+ if (consumed > 0) {
+ s += consumed - 1; // -1 because we already incremented s after '\\'
+ d += old_remaining - remaining;
+ continue;
+ }
+ else {
+ *d++ = '\\';
+ remaining--;
+ c = *s++;
+ }
+ }
+ break;
+
+ default:
+ c = *s++;
+ break;
+ }
+ }
+ else
+ c = *s++;
+
+ if(remaining < 2) {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: truncated string value at pos %zu", js->pos);
+ return false;
+ }
+ else {
+ *d++ = c;
+ remaining--;
+ }
+ }
+ *d = '\0';
+ js->pos += s - json_current_pos(js);
+
+ if(!json_expect_char_after_white_space(js, "\""))
+ return false;
+
+ json_consume_char(js);
+
+ if(d > value)
+ json_process_key_value(js, value, d - value);
+
+ return true;
+}
+
+static inline bool json_parse_key_and_push(LOG_JSON_STATE *js) {
+ if (!json_expect_char_after_white_space(js, "\""))
+ return false;
+
+ if(js->depth >= JSON_DEPTH_MAX - 1) {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: object too deep, at pos %zu", js->pos);
+ return false;
+ }
+
+ json_consume_char(js);
+
+ char *d = js->stack[js->depth];
+ if(js->depth)
+ *d++ = '_';
+
+ size_t remaining = sizeof(js->key) - (d - js->key);
+
+ const char *s = json_current_pos(js);
+ char last_c = '\0';
+ while(*s && *s != '\"') {
+ char c;
+
+ if (*s == '\\') {
+ s++;
+ c = (char)((*s == 'u') ? '_' : journal_key_characters_map[(unsigned char)*s]);
+ s += (*s == 'u') ? 5 : 1;
+ }
+ else
+ c = journal_key_characters_map[(unsigned char)*s++];
+
+ if(c == '_' && last_c == '_')
+ continue;
+ else {
+ if(remaining < 2) {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: key buffer full - keys are too long, at pos %zu", js->pos);
+ return false;
+ }
+ *d++ = c;
+ remaining--;
+ }
+
+ last_c = c;
+ }
+ *d = '\0';
+ js->pos += s - json_current_pos(js);
+
+ if (!json_expect_char_after_white_space(js, "\""))
+ return false;
+
+ json_consume_char(js);
+
+ js->stack[++js->depth] = d;
+
+ return true;
+}
+
+static inline bool json_key_pop(LOG_JSON_STATE *js) {
+ if(js->depth <= 0) {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: cannot pop a key at depth %zu, at pos %zu", js->depth, js->pos);
+ return false;
+ }
+
+ char *k = js->stack[js->depth--];
+ *k = '\0';
+ return true;
+}
+
+static inline bool json_parse_value(LOG_JSON_STATE *js) {
+ if(!json_expect_char_after_white_space(js, "-.0123456789tfn\"{["))
+ return false;
+
+ const char *s = json_current_pos(js);
+ switch(*s) {
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ return json_parse_number(js);
+
+ case 't':
+ return json_parse_true(js);
+
+ case 'f':
+ return json_parse_false(js);
+
+ case 'n':
+ return json_parse_null(js);
+
+ case '"':
+ return json_parse_string(js);
+
+ case '{':
+ return json_parse_object(js);
+
+ case '[':
+ return json_parse_array(js);
+ }
+
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: unexpected character at pos %zu", js->pos);
+ return false;
+}
+
+static inline bool json_key_index_and_push(LOG_JSON_STATE *js, size_t index) {
+ char *d = js->stack[js->depth];
+ if(js->depth > 0) {
+ *d++ = '_';
+ }
+
+ // Convert index to string manually
+ char temp[32];
+ char *t = temp + sizeof(temp) - 1; // Start at the end of the buffer
+ *t = '\0';
+
+ do {
+ *--t = (char)((index % 10) + '0');
+ index /= 10;
+ } while (index > 0);
+
+ size_t remaining = sizeof(js->key) - (d - js->key);
+
+ // Append the index to the key
+ while (*t) {
+ if(remaining < 2) {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: key buffer full - keys are too long, at pos %zu", js->pos);
+ return false;
+ }
+
+ *d++ = *t++;
+ remaining--;
+ }
+
+ *d = '\0'; // Null-terminate the key
+ js->stack[++js->depth] = d;
+
+ return true;
+}
+
+static inline bool json_parse_array(LOG_JSON_STATE *js) {
+ if(!json_expect_char_after_white_space(js, "["))
+ return false;
+
+ json_consume_char(js);
+
+ size_t index = 0;
+ do {
+ if(!json_key_index_and_push(js, index))
+ return false;
+
+ if(!json_parse_value(js))
+ return false;
+
+ json_key_pop(js);
+
+ if(!json_expect_char_after_white_space(js, ",]"))
+ return false;
+
+ const char *s = json_current_pos(js);
+ json_consume_char(js);
+ if(*s == ',') {
+ index++;
+ continue;
+ }
+ else // }
+ break;
+
+ } while(true);
+
+ return true;
+}
+
+static inline bool json_parse_object(LOG_JSON_STATE *js) {
+ if(!json_expect_char_after_white_space(js, "{"))
+ return false;
+
+ json_consume_char(js);
+
+ do {
+ if (!json_expect_char_after_white_space(js, "\""))
+ return false;
+
+ if(!json_parse_key_and_push(js))
+ return false;
+
+ if(!json_expect_char_after_white_space(js, ":"))
+ return false;
+
+ json_consume_char(js);
+
+ if(!json_parse_value(js))
+ return false;
+
+ json_key_pop(js);
+
+ if(!json_expect_char_after_white_space(js, ",}"))
+ return false;
+
+ const char *s = json_current_pos(js);
+ json_consume_char(js);
+ if(*s == ',')
+ continue;
+ else // }
+ break;
+
+ } while(true);
+
+ return true;
+}
+
+LOG_JSON_STATE *json_parser_create(LOG_JOB *jb) {
+ LOG_JSON_STATE *js = mallocz(sizeof(LOG_JSON_STATE));
+ memset(js, 0, sizeof(LOG_JSON_STATE));
+ js->jb = jb;
+
+ if(jb->prefix)
+ copy_to_buffer(js->key, sizeof(js->key), js->jb->prefix, strlen(js->jb->prefix));
+
+ js->stack[0] = &js->key[strlen(js->key)];
+
+ return js;
+}
+
+void json_parser_destroy(LOG_JSON_STATE *js) {
+ if(js)
+ freez(js);
+}
+
+const char *json_parser_error(LOG_JSON_STATE *js) {
+ return js->msg;
+}
+
+bool json_parse_document(LOG_JSON_STATE *js, const char *txt) {
+ js->line = txt;
+ js->pos = 0;
+ js->msg[0] = '\0';
+ js->stack[0][0] = '\0';
+ js->depth = 0;
+
+ if(!json_parse_object(js))
+ return false;
+
+ json_skip_spaces(js);
+ const char *s = json_current_pos(js);
+
+ if(*s) {
+ snprintf(js->msg, sizeof(js->msg),
+ "JSON PARSER: excess characters found after document is finished, at pos %zu", js->pos);
+ return false;
+ }
+
+ return true;
+}
+
+void json_test(void) {
+ LOG_JOB jb = { .prefix = "NIGNX_" };
+ LOG_JSON_STATE *json = json_parser_create(&jb);
+
+ json_parse_document(json, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}");
+
+ json_parser_destroy(json);
+}
diff --git a/collectors/log2journal/log2journal-logfmt.c b/collectors/log2journal/log2journal-logfmt.c
new file mode 100644
index 00000000000000..5966cce901b6a5
--- /dev/null
+++ b/collectors/log2journal/log2journal-logfmt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+#define LOGFMT_ERROR_LINE_MAX 1024
+#define LOGFMT_KEY_MAX 1024
+
+struct logfmt_state {
+ LOG_JOB *jb;
+
+ const char *line;
+ uint32_t pos;
+ uint32_t key_start;
+
+ char key[LOGFMT_KEY_MAX];
+ char msg[LOGFMT_ERROR_LINE_MAX];
+};
+
+#define logfmt_current_pos(lfs) &(lfs)->line[(lfs)->pos]
+#define logfmt_consume_char(lfs) ++(lfs)->pos
+
+static inline void logfmt_process_key_value(LOGFMT_STATE *lfs, const char *value, size_t len) {
+ log_job_send_extracted_key_value(lfs->jb, lfs->key, value, len);
+}
+
+static inline void logfmt_skip_spaces(LOGFMT_STATE *lfs) {
+ const char *s = logfmt_current_pos(lfs);
+ const char *start = s;
+
+ while(isspace(*s)) s++;
+
+ lfs->pos += s - start;
+}
+
+static inline void copy_newline(LOGFMT_STATE *lfs __maybe_unused, char **d, size_t *remaining) {
+ if(*remaining > 3) {
+ *(*d)++ = '\\';
+ *(*d)++ = 'n';
+ (*remaining) -= 2;
+ }
+}
+
+static inline void copy_tab(LOGFMT_STATE *lfs __maybe_unused, char **d, size_t *remaining) {
+ if(*remaining > 3) {
+ *(*d)++ = '\\';
+ *(*d)++ = 't';
+ (*remaining) -= 2;
+ }
+}
+
+static inline bool logftm_parse_value(LOGFMT_STATE *lfs) {
+ static __thread char value[JOURNAL_MAX_VALUE_LEN];
+
+ char quote = '\0';
+ const char *s = logfmt_current_pos(lfs);
+ if(*s == '\"' || *s == '\'') {
+ quote = *s;
+ logfmt_consume_char(lfs);
+ }
+
+ value[0] = '\0';
+ char *d = value;
+ s = logfmt_current_pos(lfs);
+ size_t remaining = sizeof(value);
+
+ char end_char = (char)(quote == '\0' ? ' ' : quote);
+ while (*s && *s != end_char) {
+ char c;
+
+ if (*s == '\\') {
+ s++;
+
+ switch (*s) {
+ case 'n':
+ copy_newline(lfs, &d, &remaining);
+ s++;
+ continue;
+
+ case 't':
+ copy_tab(lfs, &d, &remaining);
+ s++;
+ continue;
+
+ case 'f':
+ case 'b':
+ case 'r':
+ c = ' ';
+ s++;
+ break;
+
+ default:
+ c = *s++;
+ break;
+ }
+ }
+ else
+ c = *s++;
+
+ if(remaining < 2) {
+ snprintf(lfs->msg, sizeof(lfs->msg),
+ "LOGFMT PARSER: truncated string value at pos %zu", lfs->pos);
+ return false;
+ }
+ else {
+ *d++ = c;
+ remaining--;
+ }
+ }
+ *d = '\0';
+ lfs->pos += s - logfmt_current_pos(lfs);
+
+ s = logfmt_current_pos(lfs);
+
+ if(quote != '\0') {
+ if (*s != quote) {
+ snprintf(lfs->msg, sizeof(lfs->msg),
+ "LOGFMT PARSER: missing quote at pos %zu: '%s'",
+ lfs->pos, s);
+ return false;
+ }
+ else
+ logfmt_consume_char(lfs);
+ }
+
+ if(d > value)
+ logfmt_process_key_value(lfs, value, d - value);
+
+ return true;
+}
+
+static inline bool logfmt_parse_key(LOGFMT_STATE *lfs) {
+ logfmt_skip_spaces(lfs);
+
+ char *d = &lfs->key[lfs->key_start];
+
+ size_t remaining = sizeof(lfs->key) - (d - lfs->key);
+
+ const char *s = logfmt_current_pos(lfs);
+ char last_c = '\0';
+ while(*s && *s != '=') {
+ char c;
+
+ if (*s == '\\')
+ s++;
+
+ c = journal_key_characters_map[(unsigned char)*s++];
+
+ if(c == '_' && last_c == '_')
+ continue;
+ else {
+ if(remaining < 2) {
+ snprintf(lfs->msg, sizeof(lfs->msg),
+ "LOGFMT PARSER: key buffer full - keys are too long, at pos %zu", lfs->pos);
+ return false;
+ }
+ *d++ = c;
+ remaining--;
+ }
+
+ last_c = c;
+ }
+ *d = '\0';
+ lfs->pos += s - logfmt_current_pos(lfs);
+
+ s = logfmt_current_pos(lfs);
+ if(*s != '=') {
+ snprintf(lfs->msg, sizeof(lfs->msg),
+ "LOGFMT PARSER: key is missing the equal sign, at pos %zu", lfs->pos);
+ return false;
+ }
+
+ logfmt_consume_char(lfs);
+
+ return true;
+}
+
+LOGFMT_STATE *logfmt_parser_create(LOG_JOB *jb) {
+ LOGFMT_STATE *lfs = mallocz(sizeof(LOGFMT_STATE));
+ memset(lfs, 0, sizeof(LOGFMT_STATE));
+ lfs->jb = jb;
+
+ if(jb->prefix)
+ lfs->key_start = copy_to_buffer(lfs->key, sizeof(lfs->key), lfs->jb->prefix, strlen(lfs->jb->prefix));
+
+ return lfs;
+}
+
+void logfmt_parser_destroy(LOGFMT_STATE *lfs) {
+ if(lfs)
+ freez(lfs);
+}
+
+const char *logfmt_parser_error(LOGFMT_STATE *lfs) {
+ return lfs->msg;
+}
+
+bool logfmt_parse_document(LOGFMT_STATE *lfs, const char *txt) {
+ lfs->line = txt;
+ lfs->pos = 0;
+ lfs->msg[0] = '\0';
+
+ const char *s;
+ do {
+ if(!logfmt_parse_key(lfs))
+ return false;
+
+ if(!logftm_parse_value(lfs))
+ return false;
+
+ logfmt_skip_spaces(lfs);
+
+ s = logfmt_current_pos(lfs);
+ } while(*s);
+
+ return true;
+}
+
+
+void logfmt_test(void) {
+ LOG_JOB jb = { .prefix = "NIGNX_" };
+ LOGFMT_STATE *logfmt = logfmt_parser_create(&jb);
+
+ logfmt_parse_document(logfmt, "x=1 y=2 z=\"3 \\ 4\" 5 ");
+
+ logfmt_parser_destroy(logfmt);
+}
diff --git a/collectors/log2journal/log2journal-params.c b/collectors/log2journal/log2journal-params.c
new file mode 100644
index 00000000000000..a7bb3e263c6040
--- /dev/null
+++ b/collectors/log2journal/log2journal-params.c
@@ -0,0 +1,404 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+// ----------------------------------------------------------------------------
+
+void log_job_init(LOG_JOB *jb) {
+ memset(jb, 0, sizeof(*jb));
+ simple_hashtable_init_KEY(&jb->hashtable, 32);
+ hashed_key_set(&jb->line.key, "LINE");
+}
+
+static void simple_hashtable_cleanup_allocated_keys(SIMPLE_HASHTABLE_KEY *ht) {
+ SIMPLE_HASHTABLE_FOREACH_READ_ONLY(ht, sl, _KEY) {
+ HASHED_KEY *k = SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(sl);
+ if(k && k->flags & HK_HASHTABLE_ALLOCATED) {
+ // the order of these statements is important!
+ simple_hashtable_del_slot_KEY(ht, sl); // remove any references to n
+ hashed_key_cleanup(k); // cleanup the internals of n
+ freez(k); // free n
+ }
+ }
+}
+
+void log_job_cleanup(LOG_JOB *jb) {
+ hashed_key_cleanup(&jb->line.key);
+
+ if(jb->prefix) {
+ freez((void *) jb->prefix);
+ jb->prefix = NULL;
+ }
+
+ if(jb->pattern) {
+ freez((void *) jb->pattern);
+ jb->pattern = NULL;
+ }
+
+ for(size_t i = 0; i < jb->injections.used ;i++)
+ injection_cleanup(&jb->injections.keys[i]);
+
+ for(size_t i = 0; i < jb->unmatched.injections.used ;i++)
+ injection_cleanup(&jb->unmatched.injections.keys[i]);
+
+ for(size_t i = 0; i < jb->renames.used ;i++)
+ rename_cleanup(&jb->renames.array[i]);
+
+ for(size_t i = 0; i < jb->rewrites.used; i++)
+ rewrite_cleanup(&jb->rewrites.array[i]);
+
+ txt_cleanup(&jb->rewrites.tmp);
+ txt_cleanup(&jb->filename.current);
+
+ simple_hashtable_cleanup_allocated_keys(&jb->hashtable);
+ simple_hashtable_destroy_KEY(&jb->hashtable);
+
+ // remove references to everything else, to reveal them in valgrind
+ memset(jb, 0, sizeof(*jb));
+}
+
+// ----------------------------------------------------------------------------
+
+bool log_job_filename_key_set(LOG_JOB *jb, const char *key, size_t key_len) {
+ if(!key || !*key) {
+ log2stderr("filename key cannot be empty.");
+ return false;
+ }
+
+ hashed_key_len_set(&jb->filename.key, key, key_len);
+
+ return true;
+}
+
+bool log_job_key_prefix_set(LOG_JOB *jb, const char *prefix, size_t prefix_len) {
+ if(!prefix || !*prefix) {
+ log2stderr("filename key cannot be empty.");
+ return false;
+ }
+
+ if(jb->prefix)
+ freez((char*)jb->prefix);
+
+ jb->prefix = strndupz(prefix, prefix_len);
+
+ return true;
+}
+
+bool log_job_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len) {
+ if(!pattern || !*pattern) {
+ log2stderr("filename key cannot be empty.");
+ return false;
+ }
+
+ if(jb->pattern)
+ freez((char*)jb->pattern);
+
+ jb->pattern = strndupz(pattern, pattern_len);
+
+ return true;
+}
+
+bool log_job_include_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len) {
+ if(jb->filter.include.re) {
+ log2stderr("FILTER INCLUDE: there is already an include filter set");
+ return false;
+ }
+
+ if(!search_pattern_set(&jb->filter.include, pattern, pattern_len)) {
+ log2stderr("FILTER INCLUDE: failed: %s", jb->filter.include.error.txt);
+ return false;
+ }
+
+ return true;
+}
+
+bool log_job_exclude_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len) {
+ if(jb->filter.exclude.re) {
+ log2stderr("FILTER INCLUDE: there is already an exclude filter set");
+ return false;
+ }
+
+ if(!search_pattern_set(&jb->filter.exclude, pattern, pattern_len)) {
+ log2stderr("FILTER EXCLUDE: failed: %s", jb->filter.exclude.error.txt);
+ return false;
+ }
+
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+
+static bool parse_rename(LOG_JOB *jb, const char *param) {
+ // Search for '=' in param
+ const char *equal_sign = strchr(param, '=');
+ if (!equal_sign || equal_sign == param) {
+ log2stderr("Error: Invalid rename format, '=' not found in %s", param);
+ return false;
+ }
+
+ const char *new_key = param;
+ size_t new_key_len = equal_sign - new_key;
+
+ const char *old_key = equal_sign + 1;
+ size_t old_key_len = strlen(old_key);
+
+ return log_job_rename_add(jb, new_key, new_key_len, old_key, old_key_len);
+}
+
+static bool is_symbol(char c) {
+ return !isalpha(c) && !isdigit(c) && !iscntrl(c);
+}
+
+struct {
+ const char *keyword;
+ int action;
+ RW_FLAGS flag;
+} rewrite_flags[] = {
+ {"match", 1, RW_MATCH_PCRE2},
+ {"match", 0, RW_MATCH_NON_EMPTY},
+
+ {"regex", 1, RW_MATCH_PCRE2},
+ {"regex", 0, RW_MATCH_NON_EMPTY},
+
+ {"pcre2", 1, RW_MATCH_PCRE2},
+ {"pcre2", 0, RW_MATCH_NON_EMPTY},
+
+ {"non_empty", 1, RW_MATCH_NON_EMPTY},
+ {"non_empty", 0, RW_MATCH_PCRE2},
+
+ {"non-empty", 1, RW_MATCH_NON_EMPTY},
+ {"non-empty", 0, RW_MATCH_PCRE2},
+
+ {"not_empty", 1, RW_MATCH_NON_EMPTY},
+ {"not_empty", 0, RW_MATCH_PCRE2},
+
+ {"not-empty", 1, RW_MATCH_NON_EMPTY},
+ {"not-empty", 0, RW_MATCH_PCRE2},
+
+ {"stop", 0, RW_DONT_STOP},
+ {"no-stop", 1, RW_DONT_STOP},
+ {"no_stop", 1, RW_DONT_STOP},
+ {"dont-stop", 1, RW_DONT_STOP},
+ {"dont_stop", 1, RW_DONT_STOP},
+ {"continue", 1, RW_DONT_STOP},
+ {"inject", 1, RW_INJECT},
+ {"existing", 0, RW_INJECT},
+};
+
+RW_FLAGS parse_rewrite_flags(const char *options) {
+ RW_FLAGS flags = RW_MATCH_PCRE2; // Default option
+
+ // Tokenize the input options using ","
+ char *token;
+ char *optionsCopy = strdup(options); // Make a copy to avoid modifying the original
+ token = strtok(optionsCopy, ",");
+
+ while (token != NULL) {
+ // Find the keyword-action mapping
+ bool found = false;
+
+ for (size_t i = 0; i < sizeof(rewrite_flags) / sizeof(rewrite_flags[0]); i++) {
+ if (strcmp(token, rewrite_flags[i].keyword) == 0) {
+ if (rewrite_flags[i].action == 1) {
+ flags |= rewrite_flags[i].flag; // Set the flag
+ } else {
+ flags &= ~rewrite_flags[i].flag; // Unset the flag
+ }
+
+ found = true;
+ }
+ }
+
+ if(!found)
+ log2stderr("Warning: rewrite options '%s' is not understood.", token);
+
+ // Get the next token
+ token = strtok(NULL, ",");
+ }
+
+ free(optionsCopy); // Free the copied string
+
+ return flags;
+}
+
+
+static bool parse_rewrite(LOG_JOB *jb, const char *param) {
+ // Search for '=' in param
+ const char *equal_sign = strchr(param, '=');
+ if (!equal_sign || equal_sign == param) {
+ log2stderr("Error: Invalid rewrite format, '=' not found in %s", param);
+ return false;
+ }
+
+ // Get the next character as the separator
+ char separator = *(equal_sign + 1);
+ if (!separator || !is_symbol(separator)) {
+ log2stderr("Error: rewrite separator not found after '=', or is not one of /\\|-# in: %s", param);
+ return false;
+ }
+
+ // Find the next occurrence of the separator
+ const char *second_separator = strchr(equal_sign + 2, separator);
+ if (!second_separator) {
+ log2stderr("Error: rewrite second separator not found in: %s", param);
+ return false;
+ }
+
+ // Check if the search pattern is empty
+ if (equal_sign + 1 == second_separator) {
+ log2stderr("Error: rewrite search pattern is empty in: %s", param);
+ return false;
+ }
+
+ // Check if the replacement pattern is empty
+ if (*(second_separator + 1) == '\0') {
+ log2stderr("Error: rewrite replacement pattern is empty in: %s", param);
+ return false;
+ }
+
+ RW_FLAGS flags = RW_MATCH_PCRE2;
+ const char *third_separator = strchr(second_separator + 1, separator);
+ if(third_separator)
+ flags = parse_rewrite_flags(third_separator + 1);
+
+ // Extract key, search pattern, and replacement pattern
+ char *key = strndupz(param, equal_sign - param);
+ char *search_pattern = strndupz(equal_sign + 2, second_separator - (equal_sign + 2));
+ char *replace_pattern = third_separator ? strndup(second_separator + 1, third_separator - (second_separator + 1)) : strdupz(second_separator + 1);
+
+ if(!*search_pattern)
+ flags &= ~RW_MATCH_PCRE2;
+
+ bool ret = log_job_rewrite_add(jb, key, flags, search_pattern, replace_pattern);
+
+ freez(key);
+ freez(search_pattern);
+ freez(replace_pattern);
+
+ return ret;
+}
+
+static bool parse_inject(LOG_JOB *jb, const char *value, bool unmatched) {
+ const char *equal = strchr(value, '=');
+ if (!equal) {
+ log2stderr("Error: injection '%s' does not have an equal sign.", value);
+ return false;
+ }
+
+ const char *key = value;
+ const char *val = equal + 1;
+ log_job_injection_add(jb, key, equal - key, val, strlen(val), unmatched);
+
+ return true;
+}
+
+bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv) {
+ for (int i = 1; i < argc; i++) {
+ char *arg = argv[i];
+ if (strcmp(arg, "--help") == 0 || strcmp(arg, "-h") == 0) {
+ log_job_command_line_help(argv[0]);
+ exit(0);
+ }
+#if defined(NETDATA_DEV_MODE) || defined(NETDATA_INTERNAL_CHECKS)
+ else if(strcmp(arg, "--test") == 0) {
+ // logfmt_test();
+ json_test();
+ exit(1);
+ }
+#endif
+ else if (strcmp(arg, "--show-config") == 0) {
+ jb->show_config = true;
+ }
+ else {
+ char buffer[1024];
+ char *param = NULL;
+ char *value = NULL;
+
+ char *equal_sign = strchr(arg, '=');
+ if (equal_sign) {
+ copy_to_buffer(buffer, sizeof(buffer), arg, equal_sign - arg);
+ param = buffer;
+ value = equal_sign + 1;
+ }
+ else {
+ param = arg;
+ if (i + 1 < argc) {
+ value = argv[++i];
+ }
+ else {
+ if (!jb->pattern) {
+ log_job_pattern_set(jb, arg, strlen(arg));
+ continue;
+ } else {
+ log2stderr("Error: Multiple patterns detected. Specify only one pattern. The first is '%s', the second is '%s'", jb->pattern, arg);
+ return false;
+ }
+ }
+ }
+
+ if (strcmp(param, "--filename-key") == 0) {
+ if(!log_job_filename_key_set(jb, value, value ? strlen(value) : 0))
+ return false;
+ }
+ else if (strcmp(param, "--prefix") == 0) {
+ if(!log_job_key_prefix_set(jb, value, value ? strlen(value) : 0))
+ return false;
+ }
+#ifdef HAVE_LIBYAML
+ else if (strcmp(param, "-f") == 0 || strcmp(param, "--file") == 0) {
+ if (!yaml_parse_file(value, jb))
+ return false;
+ }
+ else if (strcmp(param, "-c") == 0 || strcmp(param, "--config") == 0) {
+ if (!yaml_parse_config(value, jb))
+ return false;
+ }
+#endif
+ else if (strcmp(param, "--unmatched-key") == 0)
+ hashed_key_set(&jb->unmatched.key, value);
+ else if (strcmp(param, "--inject") == 0) {
+ if (!parse_inject(jb, value, false))
+ return false;
+ }
+ else if (strcmp(param, "--inject-unmatched") == 0) {
+ if (!parse_inject(jb, value, true))
+ return false;
+ }
+ else if (strcmp(param, "--rewrite") == 0) {
+ if (!parse_rewrite(jb, value))
+ return false;
+ }
+ else if (strcmp(param, "--rename") == 0) {
+ if (!parse_rename(jb, value))
+ return false;
+ }
+ else if (strcmp(param, "--include") == 0) {
+ if (!log_job_include_pattern_set(jb, value, strlen(value)))
+ return false;
+ }
+ else if (strcmp(param, "--exclude") == 0) {
+ if (!log_job_exclude_pattern_set(jb, value, strlen(value)))
+ return false;
+ }
+ else {
+ i--;
+ if (!jb->pattern) {
+ log_job_pattern_set(jb, arg, strlen(arg));
+ continue;
+ } else {
+ log2stderr("Error: Multiple patterns detected. Specify only one pattern. The first is '%s', the second is '%s'", jb->pattern, arg);
+ return false;
+ }
+ }
+ }
+ }
+
+ // Check if a pattern is set and exactly one pattern is specified
+ if (!jb->pattern) {
+ log2stderr("Warning: pattern not specified. Try the default config with: -c default");
+ log_job_command_line_help(argv[0]);
+ return false;
+ }
+
+ return true;
+}
diff --git a/collectors/log2journal/log2journal-pattern.c b/collectors/log2journal/log2journal-pattern.c
new file mode 100644
index 00000000000000..4b7e9026b3a4c7
--- /dev/null
+++ b/collectors/log2journal/log2journal-pattern.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+void search_pattern_cleanup(SEARCH_PATTERN *sp) {
+ if(sp->pattern) {
+ freez((void *)sp->pattern);
+ sp->pattern = NULL;
+ }
+
+ if(sp->re) {
+ pcre2_code_free(sp->re);
+ sp->re = NULL;
+ }
+
+ if(sp->match_data) {
+ pcre2_match_data_free(sp->match_data);
+ sp->match_data = NULL;
+ }
+
+ txt_cleanup(&sp->error);
+}
+
+static void pcre2_error_message(SEARCH_PATTERN *sp, int rc, int pos) {
+ char msg[1024];
+ pcre2_get_error_in_buffer(msg, sizeof(msg), rc, pos);
+ txt_replace(&sp->error, msg, strlen(msg));
+}
+
+static inline bool compile_pcre2(SEARCH_PATTERN *sp) {
+ int error_number;
+ PCRE2_SIZE error_offset;
+ PCRE2_SPTR pattern_ptr = (PCRE2_SPTR)sp->pattern;
+
+ sp->re = pcre2_compile(pattern_ptr, PCRE2_ZERO_TERMINATED, 0, &error_number, &error_offset, NULL);
+ if (!sp->re) {
+ pcre2_error_message(sp, error_number, (int) error_offset);
+ return false;
+ }
+
+ return true;
+}
+
+bool search_pattern_set(SEARCH_PATTERN *sp, const char *search_pattern, size_t search_pattern_len) {
+ search_pattern_cleanup(sp);
+
+ sp->pattern = strndupz(search_pattern, search_pattern_len);
+ if (!compile_pcre2(sp))
+ return false;
+
+ sp->match_data = pcre2_match_data_create_from_pattern(sp->re, NULL);
+
+ return true;
+}
diff --git a/collectors/log2journal/log2journal-pcre2.c b/collectors/log2journal/log2journal-pcre2.c
new file mode 100644
index 00000000000000..185e6910864aa4
--- /dev/null
+++ b/collectors/log2journal/log2journal-pcre2.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+#define PCRE2_ERROR_LINE_MAX 1024
+#define PCRE2_KEY_MAX 1024
+
+struct pcre2_state {
+ LOG_JOB *jb;
+
+ const char *line;
+ uint32_t pos;
+ uint32_t key_start;
+
+ pcre2_code *re;
+ pcre2_match_data *match_data;
+
+ char key[PCRE2_KEY_MAX];
+ char msg[PCRE2_ERROR_LINE_MAX];
+};
+
+static inline void copy_and_convert_key(PCRE2_STATE *pcre2, const char *key) {
+ char *d = &pcre2->key[pcre2->key_start];
+ size_t remaining = sizeof(pcre2->key) - pcre2->key_start;
+
+ while(remaining >= 2 && *key) {
+ *d = journal_key_characters_map[(unsigned) (*key)];
+ remaining--;
+ key++;
+ d++;
+ }
+
+ *d = '\0';
+}
+
+static inline void jb_traverse_pcre2_named_groups_and_send_keys(PCRE2_STATE *pcre2, pcre2_code *re, pcre2_match_data *match_data, char *line) {
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
+ uint32_t names_count;
+ pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &names_count);
+
+ if (names_count > 0) {
+ PCRE2_SPTR name_table;
+ pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &name_table);
+ uint32_t name_entry_size;
+ pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
+
+ const unsigned char *table_ptr = name_table;
+ for (uint32_t i = 0; i < names_count; i++) {
+ int n = (table_ptr[0] << 8) | table_ptr[1];
+ const char *group_name = (const char *)(table_ptr + 2);
+
+ PCRE2_SIZE start_offset = ovector[2 * n];
+ PCRE2_SIZE end_offset = ovector[2 * n + 1];
+ PCRE2_SIZE group_length = end_offset - start_offset;
+
+ copy_and_convert_key(pcre2, group_name);
+ log_job_send_extracted_key_value(pcre2->jb, pcre2->key, line + start_offset, group_length);
+
+ table_ptr += name_entry_size;
+ }
+ }
+}
+
+void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos) {
+ int l;
+
+ if(pos >= 0)
+ l = snprintf(msg, msg_len, "PCRE2 error %d at pos %d on: ", rc, pos);
+ else
+ l = snprintf(msg, msg_len, "PCRE2 error %d on: ", rc);
+
+ pcre2_get_error_message(rc, (PCRE2_UCHAR *)&msg[l], msg_len - l);
+}
+
+static void pcre2_error_message(PCRE2_STATE *pcre2, int rc, int pos) {
+ pcre2_get_error_in_buffer(pcre2->msg, sizeof(pcre2->msg), rc, pos);
+}
+
+bool pcre2_has_error(PCRE2_STATE *pcre2) {
+ return !pcre2->re || pcre2->msg[0];
+}
+
+PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb) {
+ PCRE2_STATE *pcre2 = mallocz(sizeof(PCRE2_STATE));
+ memset(pcre2, 0, sizeof(PCRE2_STATE));
+ pcre2->jb = jb;
+
+ if(jb->prefix)
+ pcre2->key_start = copy_to_buffer(pcre2->key, sizeof(pcre2->key), pcre2->jb->prefix, strlen(pcre2->jb->prefix));
+
+ int rc;
+ PCRE2_SIZE pos;
+ pcre2->re = pcre2_compile((PCRE2_SPTR)jb->pattern, PCRE2_ZERO_TERMINATED, 0, &rc, &pos, NULL);
+ if (!pcre2->re) {
+ pcre2_error_message(pcre2, rc, pos);
+ return pcre2;
+ }
+
+ pcre2->match_data = pcre2_match_data_create_from_pattern(pcre2->re, NULL);
+
+ return pcre2;
+}
+
+void pcre2_parser_destroy(PCRE2_STATE *pcre2) {
+ if(pcre2)
+ freez(pcre2);
+}
+
+const char *pcre2_parser_error(PCRE2_STATE *pcre2) {
+ return pcre2->msg;
+}
+
+bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len) {
+ pcre2->line = txt;
+ pcre2->pos = 0;
+ pcre2->msg[0] = '\0';
+
+ if(!len)
+ len = strlen(txt);
+
+ int rc = pcre2_match(pcre2->re, (PCRE2_SPTR)pcre2->line, len, 0, 0, pcre2->match_data, NULL);
+ if(rc < 0) {
+ pcre2_error_message(pcre2, rc, -1);
+ return false;
+ }
+
+ jb_traverse_pcre2_named_groups_and_send_keys(pcre2, pcre2->re, pcre2->match_data, (char *)pcre2->line);
+
+ return true;
+}
+
+void pcre2_test(void) {
+ LOG_JOB jb = { .prefix = "NIGNX_" };
+ PCRE2_STATE *pcre2 = pcre2_parser_create(&jb);
+
+ pcre2_parse_document(pcre2, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}", 0);
+
+ pcre2_parser_destroy(pcre2);
+}
diff --git a/collectors/log2journal/log2journal-rename.c b/collectors/log2journal/log2journal-rename.c
new file mode 100644
index 00000000000000..c6975779f6885d
--- /dev/null
+++ b/collectors/log2journal/log2journal-rename.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+void rename_cleanup(RENAME *rn) {
+ hashed_key_cleanup(&rn->new_key);
+ hashed_key_cleanup(&rn->old_key);
+}
+
+bool log_job_rename_add(LOG_JOB *jb, const char *new_key, size_t new_key_len, const char *old_key, size_t old_key_len) {
+ if(jb->renames.used >= MAX_RENAMES) {
+ log2stderr("Error: too many renames. You can rename up to %d fields.", MAX_RENAMES);
+ return false;
+ }
+
+ RENAME *rn = &jb->renames.array[jb->renames.used++];
+ hashed_key_len_set(&rn->new_key, new_key, new_key_len);
+ hashed_key_len_set(&rn->old_key, old_key, old_key_len);
+
+ return true;
+}
diff --git a/collectors/log2journal/log2journal-replace.c b/collectors/log2journal/log2journal-replace.c
new file mode 100644
index 00000000000000..429d615da5919f
--- /dev/null
+++ b/collectors/log2journal/log2journal-replace.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+void replace_node_free(REPLACE_NODE *rpn) {
+ hashed_key_cleanup(&rpn->name);
+ rpn->next = NULL;
+ freez(rpn);
+}
+
+void replace_pattern_cleanup(REPLACE_PATTERN *rp) {
+ if(rp->pattern) {
+ freez((void *)rp->pattern);
+ rp->pattern = NULL;
+ }
+
+ while(rp->nodes) {
+ REPLACE_NODE *rpn = rp->nodes;
+ rp->nodes = rpn->next;
+ replace_node_free(rpn);
+ }
+}
+
+static REPLACE_NODE *replace_pattern_add_node(REPLACE_NODE **head, bool is_variable, const char *text) {
+ REPLACE_NODE *new_node = callocz(1, sizeof(REPLACE_NODE));
+ if (!new_node)
+ return NULL;
+
+ hashed_key_set(&new_node->name, text);
+ new_node->is_variable = is_variable;
+ new_node->next = NULL;
+
+ if (*head == NULL)
+ *head = new_node;
+
+ else {
+ REPLACE_NODE *current = *head;
+
+ // append it
+ while (current->next != NULL)
+ current = current->next;
+
+ current->next = new_node;
+ }
+
+ return new_node;
+}
+
+bool replace_pattern_set(REPLACE_PATTERN *rp, const char *pattern) {
+ replace_pattern_cleanup(rp);
+
+ rp->pattern = strdupz(pattern);
+ const char *current = rp->pattern;
+
+ while (*current != '\0') {
+ if (*current == '$' && *(current + 1) == '{') {
+ // Start of a variable
+ const char *end = strchr(current, '}');
+ if (!end) {
+ log2stderr("Error: Missing closing brace in replacement pattern: %s", rp->pattern);
+ return false;
+ }
+
+ size_t name_length = end - current - 2; // Length of the variable name
+ char *variable_name = strndupz(current + 2, name_length);
+ if (!variable_name) {
+ log2stderr("Error: Memory allocation failed for variable name.");
+ return false;
+ }
+
+ REPLACE_NODE *node = replace_pattern_add_node(&(rp->nodes), true, variable_name);
+ if (!node) {
+ freez(variable_name);
+ log2stderr("Error: Failed to add replacement node for variable.");
+ return false;
+ }
+
+ current = end + 1; // Move past the variable
+ }
+ else {
+ // Start of literal text
+ const char *start = current;
+ while (*current != '\0' && !(*current == '$' && *(current + 1) == '{')) {
+ current++;
+ }
+
+ size_t text_length = current - start;
+ char *text = strndupz(start, text_length);
+ if (!text) {
+ log2stderr("Error: Memory allocation failed for literal text.");
+ return false;
+ }
+
+ REPLACE_NODE *node = replace_pattern_add_node(&(rp->nodes), false, text);
+ if (!node) {
+ freez(text);
+ log2stderr("Error: Failed to add replacement node for text.");
+ return false;
+ }
+ }
+ }
+
+ for(REPLACE_NODE *node = rp->nodes; node; node = node->next) {
+ if(node->is_variable) {
+ rp->has_variables = true;
+ break;
+ }
+ }
+
+ return true;
+}
diff --git a/collectors/log2journal/log2journal-rewrite.c b/collectors/log2journal/log2journal-rewrite.c
new file mode 100644
index 00000000000000..112391bf035191
--- /dev/null
+++ b/collectors/log2journal/log2journal-rewrite.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+void rewrite_cleanup(REWRITE *rw) {
+ hashed_key_cleanup(&rw->key);
+
+ if(rw->flags & RW_MATCH_PCRE2)
+ search_pattern_cleanup(&rw->match_pcre2);
+ else if(rw->flags & RW_MATCH_NON_EMPTY)
+ replace_pattern_cleanup(&rw->match_non_empty);
+
+ replace_pattern_cleanup(&rw->value);
+ rw->flags = RW_NONE;
+}
+
+bool log_job_rewrite_add(LOG_JOB *jb, const char *key, RW_FLAGS flags, const char *search_pattern, const char *replace_pattern) {
+ if(jb->rewrites.used >= MAX_REWRITES) {
+ log2stderr("Error: too many rewrites. You can add up to %d rewrite rules.", MAX_REWRITES);
+ return false;
+ }
+
+ if((flags & (RW_MATCH_PCRE2|RW_MATCH_NON_EMPTY)) && (!search_pattern || !*search_pattern)) {
+ log2stderr("Error: rewrite for key '%s' does not specify a search pattern.", key);
+ return false;
+ }
+
+ REWRITE *rw = &jb->rewrites.array[jb->rewrites.used++];
+ rw->flags = flags;
+
+ hashed_key_set(&rw->key, key);
+
+ if((flags & RW_MATCH_PCRE2) && !search_pattern_set(&rw->match_pcre2, search_pattern, strlen(search_pattern))) {
+ rewrite_cleanup(rw);
+ jb->rewrites.used--;
+ return false;
+ }
+ else if((flags & RW_MATCH_NON_EMPTY) && !replace_pattern_set(&rw->match_non_empty, search_pattern)) {
+ rewrite_cleanup(rw);
+ jb->rewrites.used--;
+ return false;
+ }
+
+ if(replace_pattern && *replace_pattern && !replace_pattern_set(&rw->value, replace_pattern)) {
+ rewrite_cleanup(rw);
+ jb->rewrites.used--;
+ return false;
+ }
+
+ return true;
+}
diff --git a/collectors/log2journal/log2journal-yaml.c b/collectors/log2journal/log2journal-yaml.c
new file mode 100644
index 00000000000000..862e7bf4b77a70
--- /dev/null
+++ b/collectors/log2journal/log2journal-yaml.c
@@ -0,0 +1,964 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+// ----------------------------------------------------------------------------
+// yaml configuration file
+
+#ifdef HAVE_LIBYAML
+
+static const char *yaml_event_name(yaml_event_type_t type) {
+ switch (type) {
+ case YAML_NO_EVENT:
+ return "YAML_NO_EVENT";
+
+ case YAML_SCALAR_EVENT:
+ return "YAML_SCALAR_EVENT";
+
+ case YAML_ALIAS_EVENT:
+ return "YAML_ALIAS_EVENT";
+
+ case YAML_MAPPING_START_EVENT:
+ return "YAML_MAPPING_START_EVENT";
+
+ case YAML_MAPPING_END_EVENT:
+ return "YAML_MAPPING_END_EVENT";
+
+ case YAML_SEQUENCE_START_EVENT:
+ return "YAML_SEQUENCE_START_EVENT";
+
+ case YAML_SEQUENCE_END_EVENT:
+ return "YAML_SEQUENCE_END_EVENT";
+
+ case YAML_STREAM_START_EVENT:
+ return "YAML_STREAM_START_EVENT";
+
+ case YAML_STREAM_END_EVENT:
+ return "YAML_STREAM_END_EVENT";
+
+ case YAML_DOCUMENT_START_EVENT:
+ return "YAML_DOCUMENT_START_EVENT";
+
+ case YAML_DOCUMENT_END_EVENT:
+ return "YAML_DOCUMENT_END_EVENT";
+
+ default:
+ return "UNKNOWN";
+ }
+}
+
+#define yaml_error(parser, event, fmt, args...) yaml_error_with_trace(parser, event, __LINE__, __FUNCTION__, __FILE__, fmt, ##args)
+static void yaml_error_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file, const char *format, ...) __attribute__ ((format(__printf__, 6, 7)));
+static void yaml_error_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line, const char *function, const char *file, const char *format, ...) {
+ char buf[1024] = ""; // Initialize buf to an empty string
+ const char *type = "";
+
+ if(event) {
+ type = yaml_event_name(event->type);
+
+ switch (event->type) {
+ case YAML_SCALAR_EVENT:
+ copy_to_buffer(buf, sizeof(buf), (char *)event->data.scalar.value, event->data.scalar.length);
+ break;
+
+ case YAML_ALIAS_EVENT:
+ snprintf(buf, sizeof(buf), "%s", event->data.alias.anchor);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ fprintf(stderr, "YAML %zu@%s, %s(): (line %d, column %d, %s%s%s): ",
+ line, file, function,
+ (int)(parser->mark.line + 1), (int)(parser->mark.column + 1),
+ type, buf[0]? ", near ": "", buf);
+
+ va_list args;
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+ fprintf(stderr, "\n");
+}
+
+#define yaml_parse(parser, event) yaml_parse_with_trace(parser, event, __LINE__, __FUNCTION__, __FILE__)
+static bool yaml_parse_with_trace(yaml_parser_t *parser, yaml_event_t *event, size_t line __maybe_unused, const char *function __maybe_unused, const char *file __maybe_unused) {
+ if (!yaml_parser_parse(parser, event)) {
+ yaml_error(parser, NULL, "YAML parser error %d", parser->error);
+ return false;
+ }
+
+// fprintf(stderr, ">>> %s >>> %.*s\n",
+// yaml_event_name(event->type),
+// event->type == YAML_SCALAR_EVENT ? event->data.scalar.length : 0,
+// event->type == YAML_SCALAR_EVENT ? (char *)event->data.scalar.value : "");
+
+ return true;
+}
+
+#define yaml_parse_expect_event(parser, type) yaml_parse_expect_event_with_trace(parser, type, __LINE__, __FUNCTION__, __FILE__)
+static bool yaml_parse_expect_event_with_trace(yaml_parser_t *parser, yaml_event_type_t type, size_t line, const char *function, const char *file) {
+ yaml_event_t event;
+ if (!yaml_parse(parser, &event))
+ return false;
+
+ bool ret = true;
+ if(event.type != type) {
+ yaml_error_with_trace(parser, &event, line, function, file, "unexpected event - expecting: %s", yaml_event_name(type));
+ ret = false;
+ }
+// else
+// fprintf(stderr, "OK (%zu@%s, %s()\n", line, file, function);
+
+ yaml_event_delete(&event);
+ return ret;
+}
+
+#define yaml_scalar_matches(event, s, len) yaml_scalar_matches_with_trace(event, s, len, __LINE__, __FUNCTION__, __FILE__)
+static bool yaml_scalar_matches_with_trace(yaml_event_t *event, const char *s, size_t len, size_t line __maybe_unused, const char *function __maybe_unused, const char *file __maybe_unused) {
+ if(event->type != YAML_SCALAR_EVENT)
+ return false;
+
+ if(len != event->data.scalar.length)
+ return false;
+// else
+// fprintf(stderr, "OK (%zu@%s, %s()\n", line, file, function);
+
+ return strcmp((char *)event->data.scalar.value, s) == 0;
+}
+
+// ----------------------------------------------------------------------------
+
+static size_t yaml_parse_filename_injection(yaml_parser_t *parser, LOG_JOB *jb) {
+ yaml_event_t event;
+ size_t errors = 0;
+
+ if(!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT))
+ return 1;
+
+ if (!yaml_parse(parser, &event))
+ return 1;
+
+ if (yaml_scalar_matches(&event, "key", strlen("key"))) {
+ yaml_event_t sub_event;
+ if (!yaml_parse(parser, &sub_event))
+ errors++;
+
+ else {
+ if (sub_event.type == YAML_SCALAR_EVENT) {
+ if(!log_job_filename_key_set(jb, (char *) sub_event.data.scalar.value,
+ sub_event.data.scalar.length))
+ errors++;
+ }
+
+ else {
+ yaml_error(parser, &sub_event, "expected the filename as %s", yaml_event_name(YAML_SCALAR_EVENT));
+ errors++;
+ }
+
+ yaml_event_delete(&sub_event);
+ }
+ }
+
+ if(!yaml_parse_expect_event(parser, YAML_MAPPING_END_EVENT))
+ errors++;
+
+ yaml_event_delete(&event);
+ return errors;
+}
+
+static size_t yaml_parse_filters(yaml_parser_t *parser, LOG_JOB *jb) {
+ if(!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT))
+ return 1;
+
+ size_t errors = 0;
+ bool finished = false;
+
+ while(!errors && !finished) {
+ yaml_event_t event;
+
+ if(!yaml_parse(parser, &event))
+ return 1;
+
+ if(event.type == YAML_SCALAR_EVENT) {
+ if(yaml_scalar_matches(&event, "include", strlen("include"))) {
+ yaml_event_t sub_event;
+ if(!yaml_parse(parser, &sub_event))
+ errors++;
+
+ else {
+ if(sub_event.type == YAML_SCALAR_EVENT) {
+ if(!log_job_include_pattern_set(jb, (char *) sub_event.data.scalar.value,
+ sub_event.data.scalar.length))
+ errors++;
+ }
+
+ else {
+ yaml_error(parser, &sub_event, "expected the include as %s",
+ yaml_event_name(YAML_SCALAR_EVENT));
+ errors++;
+ }
+
+ yaml_event_delete(&sub_event);
+ }
+ }
+ else if(yaml_scalar_matches(&event, "exclude", strlen("exclude"))) {
+ yaml_event_t sub_event;
+ if(!yaml_parse(parser, &sub_event))
+ errors++;
+
+ else {
+ if(sub_event.type == YAML_SCALAR_EVENT) {
+ if(!log_job_exclude_pattern_set(jb,(char *) sub_event.data.scalar.value,
+ sub_event.data.scalar.length))
+ errors++;
+ }
+
+ else {
+ yaml_error(parser, &sub_event, "expected the exclude as %s",
+ yaml_event_name(YAML_SCALAR_EVENT));
+ errors++;
+ }
+
+ yaml_event_delete(&sub_event);
+ }
+ }
+ }
+ else if(event.type == YAML_MAPPING_END_EVENT)
+ finished = true;
+ else {
+ yaml_error(parser, &event, "expected %s or %s",
+ yaml_event_name(YAML_SCALAR_EVENT),
+ yaml_event_name(YAML_MAPPING_END_EVENT));
+ errors++;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ return errors;
+}
+
+static size_t yaml_parse_prefix(yaml_parser_t *parser, LOG_JOB *jb) {
+ yaml_event_t event;
+ size_t errors = 0;
+
+ if (!yaml_parse(parser, &event))
+ return 1;
+
+ if (event.type == YAML_SCALAR_EVENT) {
+ if(!log_job_key_prefix_set(jb, (char *) event.data.scalar.value, event.data.scalar.length))
+ errors++;
+ }
+
+ yaml_event_delete(&event);
+ return errors;
+}
+
+static bool yaml_parse_constant_field_injection(yaml_parser_t *parser, LOG_JOB *jb, bool unmatched) {
+ yaml_event_t event;
+ if (!yaml_parse(parser, &event) || event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &event, "Expected scalar for constant field injection key");
+ yaml_event_delete(&event);
+ return false;
+ }
+
+ char *key = strndupz((char *)event.data.scalar.value, event.data.scalar.length);
+ char *value = NULL;
+ bool ret = false;
+
+ yaml_event_delete(&event);
+
+ if (!yaml_parse(parser, &event) || event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &event, "Expected scalar for constant field injection value");
+ goto cleanup;
+ }
+
+ if(!yaml_scalar_matches(&event, "value", strlen("value"))) {
+ yaml_error(parser, &event, "Expected scalar 'value'");
+ goto cleanup;
+ }
+
+ if (!yaml_parse(parser, &event) || event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &event, "Expected scalar for constant field injection value");
+ goto cleanup;
+ }
+
+ value = strndupz((char *)event.data.scalar.value, event.data.scalar.length);
+
+ if(!log_job_injection_add(jb, key, strlen(key), value, strlen(value), unmatched))
+ ret = false;
+ else
+ ret = true;
+
+ ret = true;
+
+cleanup:
+ yaml_event_delete(&event);
+ freez(key);
+ freez(value);
+ return !ret ? 1 : 0;
+}
+
+static bool yaml_parse_injection_mapping(yaml_parser_t *parser, LOG_JOB *jb, bool unmatched) {
+ yaml_event_t event;
+ size_t errors = 0;
+ bool finished = false;
+
+ while (!errors && !finished) {
+ if (!yaml_parse(parser, &event)) {
+ errors++;
+ continue;
+ }
+
+ switch (event.type) {
+ case YAML_SCALAR_EVENT:
+ if (yaml_scalar_matches(&event, "key", strlen("key"))) {
+ errors += yaml_parse_constant_field_injection(parser, jb, unmatched);
+ } else {
+ yaml_error(parser, &event, "Unexpected scalar in injection mapping");
+ errors++;
+ }
+ break;
+
+ case YAML_MAPPING_END_EVENT:
+ finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &event, "Unexpected event in injection mapping");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ return errors == 0;
+}
+
+static size_t yaml_parse_injections(yaml_parser_t *parser, LOG_JOB *jb, bool unmatched) {
+ yaml_event_t event;
+ size_t errors = 0;
+ bool finished = false;
+
+ if (!yaml_parse_expect_event(parser, YAML_SEQUENCE_START_EVENT))
+ return 1;
+
+ while (!errors && !finished) {
+ if (!yaml_parse(parser, &event)) {
+ errors++;
+ continue;
+ }
+
+ switch (event.type) {
+ case YAML_MAPPING_START_EVENT:
+ if (!yaml_parse_injection_mapping(parser, jb, unmatched))
+ errors++;
+ break;
+
+ case YAML_SEQUENCE_END_EVENT:
+ finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &event, "Unexpected event in injections sequence");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ return errors;
+}
+
+static size_t yaml_parse_unmatched(yaml_parser_t *parser, LOG_JOB *jb) {
+ size_t errors = 0;
+ bool finished = false;
+
+ if (!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT))
+ return 1;
+
+ while (!errors && !finished) {
+ yaml_event_t event;
+ if (!yaml_parse(parser, &event)) {
+ errors++;
+ continue;
+ }
+
+ switch (event.type) {
+ case YAML_SCALAR_EVENT:
+ if (yaml_scalar_matches(&event, "key", strlen("key"))) {
+ yaml_event_t sub_event;
+ if (!yaml_parse(parser, &sub_event)) {
+ errors++;
+ } else {
+ if (sub_event.type == YAML_SCALAR_EVENT) {
+ hashed_key_len_set(&jb->unmatched.key, (char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ } else {
+ yaml_error(parser, &sub_event, "expected a scalar value for 'key'");
+ errors++;
+ }
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&event, "inject", strlen("inject"))) {
+ errors += yaml_parse_injections(parser, jb, true);
+ } else {
+ yaml_error(parser, &event, "Unexpected scalar in unmatched section");
+ errors++;
+ }
+ break;
+
+ case YAML_MAPPING_END_EVENT:
+ finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &event, "Unexpected event in unmatched section");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ return errors;
+}
+
+static size_t yaml_parse_rewrites(yaml_parser_t *parser, LOG_JOB *jb) {
+ size_t errors = 0;
+
+ if (!yaml_parse_expect_event(parser, YAML_SEQUENCE_START_EVENT))
+ return 1;
+
+ bool finished = false;
+ while (!errors && !finished) {
+ yaml_event_t event;
+ if (!yaml_parse(parser, &event)) {
+ errors++;
+ continue;
+ }
+
+ switch (event.type) {
+ case YAML_MAPPING_START_EVENT:
+ {
+ RW_FLAGS flags = RW_NONE;
+ char *key = NULL;
+ char *search_pattern = NULL;
+ char *replace_pattern = NULL;
+
+ bool mapping_finished = false;
+ while (!errors && !mapping_finished) {
+ yaml_event_t sub_event;
+ if (!yaml_parse(parser, &sub_event)) {
+ errors++;
+ continue;
+ }
+
+ switch (sub_event.type) {
+ case YAML_SCALAR_EVENT:
+ if (yaml_scalar_matches(&sub_event, "key", strlen("key"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rewrite key");
+ errors++;
+ } else {
+ key = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&sub_event, "match", strlen("match"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rewrite match PCRE2 pattern");
+ errors++;
+ }
+ else {
+ if(search_pattern)
+ freez(search_pattern);
+ flags |= RW_MATCH_PCRE2;
+ flags &= ~RW_MATCH_NON_EMPTY;
+ search_pattern = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&sub_event, "not_empty", strlen("not_empty"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rewrite not empty condition");
+ errors++;
+ }
+ else {
+ if(search_pattern)
+ freez(search_pattern);
+ flags |= RW_MATCH_NON_EMPTY;
+ flags &= ~RW_MATCH_PCRE2;
+ search_pattern = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&sub_event, "value", strlen("value"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rewrite value");
+ errors++;
+ } else {
+ replace_pattern = strndupz((char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&sub_event, "stop", strlen("stop"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rewrite stop boolean");
+ errors++;
+ } else {
+ if(strncmp((char*)sub_event.data.scalar.value, "no", 2) == 0 ||
+ strncmp((char*)sub_event.data.scalar.value, "false", 5) == 0)
+ flags |= RW_DONT_STOP;
+ else
+ flags &= ~RW_DONT_STOP;
+
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&sub_event, "inject", strlen("inject"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rewrite inject boolean");
+ errors++;
+ } else {
+ if(strncmp((char*)sub_event.data.scalar.value, "yes", 3) == 0 ||
+ strncmp((char*)sub_event.data.scalar.value, "true", 4) == 0)
+ flags |= RW_INJECT;
+ else
+ flags &= ~RW_INJECT;
+
+ yaml_event_delete(&sub_event);
+ }
+ } else {
+ yaml_error(parser, &sub_event, "Unexpected scalar in rewrite mapping");
+ errors++;
+ }
+ break;
+
+ case YAML_MAPPING_END_EVENT:
+ if(key) {
+ if (!log_job_rewrite_add(jb, key, flags, search_pattern, replace_pattern))
+ errors++;
+ }
+
+ freez(key);
+ key = NULL;
+
+ freez(search_pattern);
+ search_pattern = NULL;
+
+ freez(replace_pattern);
+ replace_pattern = NULL;
+
+ flags = RW_NONE;
+
+ mapping_finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &sub_event, "Unexpected event in rewrite mapping");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&sub_event);
+ }
+ }
+ break;
+
+ case YAML_SEQUENCE_END_EVENT:
+ finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &event, "Unexpected event in rewrites sequence");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ return errors;
+}
+
+static size_t yaml_parse_renames(yaml_parser_t *parser, LOG_JOB *jb) {
+ size_t errors = 0;
+
+ if (!yaml_parse_expect_event(parser, YAML_SEQUENCE_START_EVENT))
+ return 1;
+
+ bool finished = false;
+ while (!errors && !finished) {
+ yaml_event_t event;
+ if (!yaml_parse(parser, &event)) {
+ errors++;
+ continue;
+ }
+
+ switch (event.type) {
+ case YAML_MAPPING_START_EVENT:
+ {
+ struct key_rename rn = { 0 };
+
+ bool mapping_finished = false;
+ while (!errors && !mapping_finished) {
+ yaml_event_t sub_event;
+ if (!yaml_parse(parser, &sub_event)) {
+ errors++;
+ continue;
+ }
+
+ switch (sub_event.type) {
+ case YAML_SCALAR_EVENT:
+ if (yaml_scalar_matches(&sub_event, "new_key", strlen("new_key"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rename new_key");
+ errors++;
+ } else {
+ hashed_key_len_set(&rn.new_key, (char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ yaml_event_delete(&sub_event);
+ }
+ } else if (yaml_scalar_matches(&sub_event, "old_key", strlen("old_key"))) {
+ if (!yaml_parse(parser, &sub_event) || sub_event.type != YAML_SCALAR_EVENT) {
+ yaml_error(parser, &sub_event, "Expected scalar for rename old_key");
+ errors++;
+ } else {
+ hashed_key_len_set(&rn.old_key, (char *)sub_event.data.scalar.value, sub_event.data.scalar.length);
+ yaml_event_delete(&sub_event);
+ }
+ } else {
+ yaml_error(parser, &sub_event, "Unexpected scalar in rewrite mapping");
+ errors++;
+ }
+ break;
+
+ case YAML_MAPPING_END_EVENT:
+ if(rn.old_key.key && rn.new_key.key) {
+ if (!log_job_rename_add(jb, rn.new_key.key, rn.new_key.len,
+ rn.old_key.key, rn.old_key.len))
+ errors++;
+ }
+ rename_cleanup(&rn);
+
+ mapping_finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &sub_event, "Unexpected event in rewrite mapping");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&sub_event);
+ }
+ }
+ break;
+
+ case YAML_SEQUENCE_END_EVENT:
+ finished = true;
+ break;
+
+ default:
+ yaml_error(parser, &event, "Unexpected event in rewrites sequence");
+ errors++;
+ break;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ return errors;
+}
+
+static size_t yaml_parse_pattern(yaml_parser_t *parser, LOG_JOB *jb) {
+ yaml_event_t event;
+ size_t errors = 0;
+
+ if (!yaml_parse(parser, &event))
+ return 1;
+
+ if(event.type == YAML_SCALAR_EVENT)
+ log_job_pattern_set(jb, (char *) event.data.scalar.value, event.data.scalar.length);
+ else {
+ yaml_error(parser, &event, "unexpected event type");
+ errors++;
+ }
+
+ yaml_event_delete(&event);
+ return errors;
+}
+
+static size_t yaml_parse_initialized(yaml_parser_t *parser, LOG_JOB *jb) {
+ size_t errors = 0;
+
+ if(!yaml_parse_expect_event(parser, YAML_STREAM_START_EVENT)) {
+ errors++;
+ goto cleanup;
+ }
+
+ if(!yaml_parse_expect_event(parser, YAML_DOCUMENT_START_EVENT)) {
+ errors++;
+ goto cleanup;
+ }
+
+ if(!yaml_parse_expect_event(parser, YAML_MAPPING_START_EVENT)) {
+ errors++;
+ goto cleanup;
+ }
+
+ bool finished = false;
+ while (!errors && !finished) {
+ yaml_event_t event;
+ if(!yaml_parse(parser, &event)) {
+ errors++;
+ continue;
+ }
+
+ switch(event.type) {
+ default:
+ yaml_error(parser, &event, "unexpected type");
+ errors++;
+ break;
+
+ case YAML_MAPPING_END_EVENT:
+ finished = true;
+ break;
+
+ case YAML_SCALAR_EVENT:
+ if (yaml_scalar_matches(&event, "pattern", strlen("pattern")))
+ errors += yaml_parse_pattern(parser, jb);
+
+ else if (yaml_scalar_matches(&event, "prefix", strlen("prefix")))
+ errors += yaml_parse_prefix(parser, jb);
+
+ else if (yaml_scalar_matches(&event, "filename", strlen("filename")))
+ errors += yaml_parse_filename_injection(parser, jb);
+
+ else if (yaml_scalar_matches(&event, "filter", strlen("filter")))
+ errors += yaml_parse_filters(parser, jb);
+
+ else if (yaml_scalar_matches(&event, "inject", strlen("inject")))
+ errors += yaml_parse_injections(parser, jb, false);
+
+ else if (yaml_scalar_matches(&event, "unmatched", strlen("unmatched")))
+ errors += yaml_parse_unmatched(parser, jb);
+
+ else if (yaml_scalar_matches(&event, "rewrite", strlen("rewrite")))
+ errors += yaml_parse_rewrites(parser, jb);
+
+ else if (yaml_scalar_matches(&event, "rename", strlen("rename")))
+ errors += yaml_parse_renames(parser, jb);
+
+ else {
+ yaml_error(parser, &event, "unexpected scalar");
+ errors++;
+ }
+ break;
+ }
+
+ yaml_event_delete(&event);
+ }
+
+ if(!errors && !yaml_parse_expect_event(parser, YAML_DOCUMENT_END_EVENT)) {
+ errors++;
+ goto cleanup;
+ }
+
+ if(!errors && !yaml_parse_expect_event(parser, YAML_STREAM_END_EVENT)) {
+ errors++;
+ goto cleanup;
+ }
+
+cleanup:
+ return errors;
+}
+
+bool yaml_parse_file(const char *config_file_path, LOG_JOB *jb) {
+ if(!config_file_path || !*config_file_path) {
+ log2stderr("yaml configuration filename cannot be empty.");
+ return false;
+ }
+
+ FILE *fp = fopen(config_file_path, "r");
+ if (!fp) {
+ log2stderr("Error opening config file: %s", config_file_path);
+ return false;
+ }
+
+ yaml_parser_t parser;
+ yaml_parser_initialize(&parser);
+ yaml_parser_set_input_file(&parser, fp);
+
+ size_t errors = yaml_parse_initialized(&parser, jb);
+
+ yaml_parser_delete(&parser);
+ fclose(fp);
+ return errors == 0;
+}
+
+bool yaml_parse_config(const char *config_name, LOG_JOB *jb) {
+ char filename[FILENAME_MAX + 1];
+
+ snprintf(filename, sizeof(filename), "%s/%s.yaml", LOG2JOURNAL_CONFIG_PATH, config_name);
+ return yaml_parse_file(filename, jb);
+}
+
+#endif // HAVE_LIBYAML
+
+// ----------------------------------------------------------------------------
+// printing yaml
+
+static void yaml_print_multiline_value(const char *s, size_t depth) {
+ if (!s)
+ s = "";
+
+ do {
+ const char* next = strchr(s, '\n');
+ if(next) next++;
+
+ size_t len = next ? (size_t)(next - s) : strlen(s);
+ char buf[len + 1];
+ copy_to_buffer(buf, sizeof(buf), s, len);
+
+ fprintf(stderr, "%.*s%s%s",
+ (int)(depth * 2), " ",
+ buf, next ? "" : "\n");
+
+ s = next;
+ } while(s && *s);
+}
+
+static bool needs_quotes_in_yaml(const char *str) {
+ // Lookup table for special YAML characters
+ static bool special_chars[256] = { false };
+ static bool table_initialized = false;
+
+ if (!table_initialized) {
+ // Initialize the lookup table
+ const char *special_chars_str = ":{}[],&*!|>'\"%@`^";
+ for (const char *c = special_chars_str; *c; ++c) {
+ special_chars[(unsigned char)*c] = true;
+ }
+ table_initialized = true;
+ }
+
+ while (*str) {
+ if (special_chars[(unsigned char)*str]) {
+ return true;
+ }
+ str++;
+ }
+ return false;
+}
+
+static void yaml_print_node(const char *key, const char *value, size_t depth, bool dash) {
+ if(depth > 10) depth = 10;
+ const char *quote = "'";
+
+ const char *second_line = NULL;
+ if(value && strchr(value, '\n')) {
+ second_line = value;
+ value = "|";
+ quote = "";
+ }
+ else if(!value || !needs_quotes_in_yaml(value))
+ quote = "";
+
+ fprintf(stderr, "%.*s%s%s%s%s%s%s\n",
+ (int)(depth * 2), " ", dash ? "- ": "",
+ key ? key : "", key ? ": " : "",
+ quote, value ? value : "", quote);
+
+ if(second_line) {
+ yaml_print_multiline_value(second_line, depth + 1);
+ }
+}
+
+void log_job_configuration_to_yaml(LOG_JOB *jb) {
+ if(jb->pattern)
+ yaml_print_node("pattern", jb->pattern, 0, false);
+
+ if(jb->prefix) {
+ fprintf(stderr, "\n");
+ yaml_print_node("prefix", jb->prefix, 0, false);
+ }
+
+ if(jb->filename.key.key) {
+ fprintf(stderr, "\n");
+ yaml_print_node("filename", NULL, 0, false);
+ yaml_print_node("key", jb->filename.key.key, 1, false);
+ }
+
+ if(jb->filter.include.pattern || jb->filter.exclude.pattern) {
+ fprintf(stderr, "\n");
+ yaml_print_node("filter", NULL, 0, false);
+
+ if(jb->filter.include.pattern)
+ yaml_print_node("include", jb->filter.include.pattern, 1, false);
+
+ if(jb->filter.exclude.pattern)
+ yaml_print_node("exclude", jb->filter.exclude.pattern, 1, false);
+ }
+
+ if(jb->renames.used) {
+ fprintf(stderr, "\n");
+ yaml_print_node("rename", NULL, 0, false);
+
+ for(size_t i = 0; i < jb->renames.used ;i++) {
+ yaml_print_node("new_key", jb->renames.array[i].new_key.key, 1, true);
+ yaml_print_node("old_key", jb->renames.array[i].old_key.key, 2, false);
+ }
+ }
+
+ if(jb->injections.used) {
+ fprintf(stderr, "\n");
+ yaml_print_node("inject", NULL, 0, false);
+
+ for (size_t i = 0; i < jb->injections.used; i++) {
+ yaml_print_node("key", jb->injections.keys[i].key.key, 1, true);
+ yaml_print_node("value", jb->injections.keys[i].value.pattern, 2, false);
+ }
+ }
+
+ if(jb->rewrites.used) {
+ fprintf(stderr, "\n");
+ yaml_print_node("rewrite", NULL, 0, false);
+
+ for(size_t i = 0; i < jb->rewrites.used ;i++) {
+ REWRITE *rw = &jb->rewrites.array[i];
+
+ yaml_print_node("key", rw->key.key, 1, true);
+
+ if(rw->flags & RW_MATCH_PCRE2)
+ yaml_print_node("match", rw->match_pcre2.pattern, 2, false);
+
+ else if(rw->flags & RW_MATCH_NON_EMPTY)
+ yaml_print_node("not_empty", rw->match_non_empty.pattern, 2, false);
+
+ yaml_print_node("value", rw->value.pattern, 2, false);
+
+ if(rw->flags & RW_INJECT)
+ yaml_print_node("inject", "yes", 2, false);
+
+ if(rw->flags & RW_DONT_STOP)
+ yaml_print_node("stop", "no", 2, false);
+ }
+ }
+
+ if(jb->unmatched.key.key || jb->unmatched.injections.used) {
+ fprintf(stderr, "\n");
+ yaml_print_node("unmatched", NULL, 0, false);
+
+ if(jb->unmatched.key.key)
+ yaml_print_node("key", jb->unmatched.key.key, 1, false);
+
+ if(jb->unmatched.injections.used) {
+ fprintf(stderr, "\n");
+ yaml_print_node("inject", NULL, 1, false);
+
+ for (size_t i = 0; i < jb->unmatched.injections.used; i++) {
+ yaml_print_node("key", jb->unmatched.injections.keys[i].key.key, 2, true);
+ yaml_print_node("value", jb->unmatched.injections.keys[i].value.pattern, 3, false);
+ }
+ }
+ }
+}
diff --git a/collectors/log2journal/log2journal.c b/collectors/log2journal/log2journal.c
new file mode 100644
index 00000000000000..c3204939cda9c5
--- /dev/null
+++ b/collectors/log2journal/log2journal.c
@@ -0,0 +1,569 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "log2journal.h"
+
+// ----------------------------------------------------------------------------
+
+const char journal_key_characters_map[256] = {
+ // control characters
+ [0] = '\0', [1] = '_', [2] = '_', [3] = '_', [4] = '_', [5] = '_', [6] = '_', [7] = '_',
+ [8] = '_', [9] = '_', [10] = '_', [11] = '_', [12] = '_', [13] = '_', [14] = '_', [15] = '_',
+ [16] = '_', [17] = '_', [18] = '_', [19] = '_', [20] = '_', [21] = '_', [22] = '_', [23] = '_',
+ [24] = '_', [25] = '_', [26] = '_', [27] = '_', [28] = '_', [29] = '_', [30] = '_', [31] = '_',
+
+ // symbols
+ [' '] = '_', ['!'] = '_', ['"'] = '_', ['#'] = '_', ['$'] = '_', ['%'] = '_', ['&'] = '_', ['\''] = '_',
+ ['('] = '_', [')'] = '_', ['*'] = '_', ['+'] = '_', [','] = '_', ['-'] = '_', ['.'] = '_', ['/'] = '_',
+
+ // numbers
+ ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7',
+ ['8'] = '8', ['9'] = '9',
+
+ // symbols
+ [':'] = '_', [';'] = '_', ['<'] = '_', ['='] = '_', ['>'] = '_', ['?'] = '_', ['@'] = '_',
+
+ // capitals
+ ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H',
+ ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P',
+ ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X',
+ ['Y'] = 'Y', ['Z'] = 'Z',
+
+ // symbols
+ ['['] = '_', ['\\'] = '_', [']'] = '_', ['^'] = '_', ['_'] = '_', ['`'] = '_',
+
+ // lower to upper
+ ['a'] = 'A', ['b'] = 'B', ['c'] = 'C', ['d'] = 'D', ['e'] = 'E', ['f'] = 'F', ['g'] = 'G', ['h'] = 'H',
+ ['i'] = 'I', ['j'] = 'J', ['k'] = 'K', ['l'] = 'L', ['m'] = 'M', ['n'] = 'N', ['o'] = 'O', ['p'] = 'P',
+ ['q'] = 'Q', ['r'] = 'R', ['s'] = 'S', ['t'] = 'T', ['u'] = 'U', ['v'] = 'V', ['w'] = 'W', ['x'] = 'X',
+ ['y'] = 'Y', ['z'] = 'Z',
+
+ // symbols
+ ['{'] = '_', ['|'] = '_', ['}'] = '_', ['~'] = '_', [127] = '_', // Delete (DEL)
+
+ // Extended ASCII characters (128-255) set to underscore
+ [128] = '_', [129] = '_', [130] = '_', [131] = '_', [132] = '_', [133] = '_', [134] = '_', [135] = '_',
+ [136] = '_', [137] = '_', [138] = '_', [139] = '_', [140] = '_', [141] = '_', [142] = '_', [143] = '_',
+ [144] = '_', [145] = '_', [146] = '_', [147] = '_', [148] = '_', [149] = '_', [150] = '_', [151] = '_',
+ [152] = '_', [153] = '_', [154] = '_', [155] = '_', [156] = '_', [157] = '_', [158] = '_', [159] = '_',
+ [160] = '_', [161] = '_', [162] = '_', [163] = '_', [164] = '_', [165] = '_', [166] = '_', [167] = '_',
+ [168] = '_', [169] = '_', [170] = '_', [171] = '_', [172] = '_', [173] = '_', [174] = '_', [175] = '_',
+ [176] = '_', [177] = '_', [178] = '_', [179] = '_', [180] = '_', [181] = '_', [182] = '_', [183] = '_',
+ [184] = '_', [185] = '_', [186] = '_', [187] = '_', [188] = '_', [189] = '_', [190] = '_', [191] = '_',
+ [192] = '_', [193] = '_', [194] = '_', [195] = '_', [196] = '_', [197] = '_', [198] = '_', [199] = '_',
+ [200] = '_', [201] = '_', [202] = '_', [203] = '_', [204] = '_', [205] = '_', [206] = '_', [207] = '_',
+ [208] = '_', [209] = '_', [210] = '_', [211] = '_', [212] = '_', [213] = '_', [214] = '_', [215] = '_',
+ [216] = '_', [217] = '_', [218] = '_', [219] = '_', [220] = '_', [221] = '_', [222] = '_', [223] = '_',
+ [224] = '_', [225] = '_', [226] = '_', [227] = '_', [228] = '_', [229] = '_', [230] = '_', [231] = '_',
+ [232] = '_', [233] = '_', [234] = '_', [235] = '_', [236] = '_', [237] = '_', [238] = '_', [239] = '_',
+ [240] = '_', [241] = '_', [242] = '_', [243] = '_', [244] = '_', [245] = '_', [246] = '_', [247] = '_',
+ [248] = '_', [249] = '_', [250] = '_', [251] = '_', [252] = '_', [253] = '_', [254] = '_', [255] = '_',
+};
+
+// ----------------------------------------------------------------------------
+
+static inline HASHED_KEY *get_key_from_hashtable(LOG_JOB *jb, HASHED_KEY *k) {
+ if(k->flags & HK_HASHTABLE_ALLOCATED)
+ return k;
+
+ if(!k->hashtable_ptr) {
+ HASHED_KEY *ht_key;
+ SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&jb->hashtable, k->hash, true);
+ if((ht_key = SIMPLE_HASHTABLE_SLOT_DATA(slot))) {
+ if(!(ht_key->flags & HK_COLLISION_CHECKED)) {
+ ht_key->flags |= HK_COLLISION_CHECKED;
+
+ if(strcmp(ht_key->key, k->key) != 0)
+ log2stderr("Hashtable collision detected on key '%s' (hash %lx) and '%s' (hash %lx). "
+ "Please file a bug report.", ht_key->key, (unsigned long) ht_key->hash, k->key
+ , (unsigned long) k->hash
+ );
+ }
+ }
+ else {
+ ht_key = callocz(1, sizeof(HASHED_KEY));
+ ht_key->key = strdupz(k->key);
+ ht_key->len = k->len;
+ ht_key->hash = k->hash;
+ ht_key->flags = HK_HASHTABLE_ALLOCATED;
+
+ simple_hashtable_set_slot_KEY(&jb->hashtable, slot, ht_key->hash, ht_key);
+ }
+
+ k->hashtable_ptr = ht_key;
+ }
+
+ return k->hashtable_ptr;
+}
+
+static inline HASHED_KEY *get_key_from_hashtable_with_char_ptr(LOG_JOB *jb, const char *key) {
+ HASHED_KEY find = {
+ .key = key,
+ .len = strlen(key),
+ };
+ find.hash = XXH3_64bits(key, find.len);
+
+ return get_key_from_hashtable(jb, &find);
+}
+
+// ----------------------------------------------------------------------------
+
+static inline void validate_key(LOG_JOB *jb __maybe_unused, HASHED_KEY *k) {
+ if(k->len > JOURNAL_MAX_KEY_LEN)
+ log2stderr("WARNING: key '%s' has length %zu, which is more than %zu, the max systemd-journal allows",
+ k->key, (size_t)k->len, (size_t)JOURNAL_MAX_KEY_LEN);
+
+ for(size_t i = 0; i < k->len ;i++) {
+ char c = k->key[i];
+
+ if((c < 'A' || c > 'Z') && !isdigit(c) && c != '_') {
+ log2stderr("WARNING: key '%s' contains characters that are not allowed by systemd-journal.", k->key);
+ break;
+ }
+ }
+
+ if(isdigit(k->key[0]))
+ log2stderr("WARNING: key '%s' starts with a digit and may not be accepted by systemd-journal.", k->key);
+
+ if(k->key[0] == '_')
+ log2stderr("WARNING: key '%s' starts with an underscore, which makes it a systemd-journal trusted field. "
+ "Such fields are accepted by systemd-journal-remote, but not by systemd-journald.", k->key);
+}
+
+// ----------------------------------------------------------------------------
+
+static inline size_t replace_evaluate_to_buffer(LOG_JOB *jb, HASHED_KEY *k __maybe_unused, REPLACE_PATTERN *rp, char *dst, size_t dst_size) {
+ size_t remaining = dst_size;
+ char *copy_to = dst;
+
+ for(REPLACE_NODE *node = rp->nodes; node != NULL && remaining > 1; node = node->next) {
+ if(node->is_variable) {
+ if(hashed_keys_match(&node->name, &jb->line.key)) {
+ size_t copied = copy_to_buffer(copy_to, remaining, jb->line.trimmed, jb->line.trimmed_len);
+ copy_to += copied;
+ remaining -= copied;
+ }
+ else {
+ HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
+ if(ktmp->value.len) {
+ size_t copied = copy_to_buffer(copy_to, remaining, ktmp->value.txt, ktmp->value.len);
+ copy_to += copied;
+ remaining -= copied;
+ }
+ }
+ }
+ else {
+ size_t copied = copy_to_buffer(copy_to, remaining, node->name.key, node->name.len);
+ copy_to += copied;
+ remaining -= copied;
+ }
+ }
+
+ return copy_to - dst;
+}
+
+static inline void replace_evaluate(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN *rp) {
+ HASHED_KEY *ht_key = get_key_from_hashtable(jb, k);
+
+ // set it to empty value
+ k->value.len = 0;
+
+ for(REPLACE_NODE *node = rp->nodes; node != NULL; node = node->next) {
+ if(node->is_variable) {
+ if(hashed_keys_match(&node->name, &jb->line.key))
+ txt_expand_and_append(&ht_key->value, jb->line.trimmed, jb->line.trimmed_len);
+
+ else {
+ HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
+ if(ktmp->value.len)
+ txt_expand_and_append(&ht_key->value, ktmp->value.txt, ktmp->value.len);
+ }
+ }
+ else
+ txt_expand_and_append(&ht_key->value, node->name.key, node->name.len);
+ }
+}
+
+static inline void replace_evaluate_from_pcre2(LOG_JOB *jb, HASHED_KEY *k, REPLACE_PATTERN *rp, SEARCH_PATTERN *sp) {
+ assert(k->flags & HK_HASHTABLE_ALLOCATED);
+
+ // set the temporary TEXT to zero length
+ jb->rewrites.tmp.len = 0;
+
+ PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(sp->match_data);
+
+ // Iterate through the linked list of replacement nodes
+ for(REPLACE_NODE *node = rp->nodes; node != NULL; node = node->next) {
+ if(node->is_variable) {
+ int group_number = pcre2_substring_number_from_name(
+ sp->re, (PCRE2_SPTR) node->name.key);
+
+ if(group_number >= 0) {
+ PCRE2_SIZE start_offset = ovector[2 * group_number];
+ PCRE2_SIZE end_offset = ovector[2 * group_number + 1];
+ PCRE2_SIZE length = end_offset - start_offset;
+
+ txt_expand_and_append(&jb->rewrites.tmp, k->value.txt + start_offset, length);
+ }
+ else {
+ if(hashed_keys_match(&node->name, &jb->line.key))
+ txt_expand_and_append(&jb->rewrites.tmp, jb->line.trimmed, jb->line.trimmed_len);
+
+ else {
+ HASHED_KEY *ktmp = get_key_from_hashtable_with_char_ptr(jb, node->name.key);
+ if(ktmp->value.len)
+ txt_expand_and_append(&jb->rewrites.tmp, ktmp->value.txt, ktmp->value.len);
+ }
+ }
+ }
+ else {
+ txt_expand_and_append(&jb->rewrites.tmp, node->name.key, node->name.len);
+ }
+ }
+
+ // swap the values of the temporary TEXT and the key value
+ TEXT tmp = k->value;
+ k->value = jb->rewrites.tmp;
+ jb->rewrites.tmp = tmp;
+}
+
+static inline bool rewrite_conditions_satisfied(LOG_JOB *jb, HASHED_KEY *k, REWRITE *rw) {
+ assert(k->flags & HK_HASHTABLE_ALLOCATED);
+
+ if(rw->flags & RW_MATCH_PCRE2) {
+ return search_pattern_matches(&rw->match_pcre2, k->value.txt, k->value.len);
+ }
+ else if(rw->flags & RW_MATCH_NON_EMPTY) {
+ char buffer[2]; // we don't need a big buffer - we just check if anything is written
+ if(replace_evaluate_to_buffer(jb, k, &rw->match_non_empty, buffer, sizeof(buffer)))
+ // it copied something
+ return true;
+ else
+ // it copied nothing
+ return false;
+ }
+ else
+ // no conditions
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+
+static inline HASHED_KEY *rename_key(LOG_JOB *jb, HASHED_KEY *k) {
+ if(!(k->flags & HK_RENAMES_CHECKED) || k->flags & HK_HAS_RENAMES) {
+ k->flags |= HK_RENAMES_CHECKED;
+
+ for(size_t i = 0; i < jb->renames.used; i++) {
+ RENAME *rn = &jb->renames.array[i];
+
+ if(hashed_keys_match(&rn->old_key, k)) {
+ k->flags |= HK_HAS_RENAMES;
+
+ return get_key_from_hashtable(jb, &rn->new_key);
+ }
+ }
+ }
+
+ return k;
+}
+
+// ----------------------------------------------------------------------------
+
+static inline void send_key_value_constant(LOG_JOB *jb __maybe_unused, HASHED_KEY *key, const char *value, size_t len) {
+ HASHED_KEY *ht_key = get_key_from_hashtable(jb, key);
+
+ txt_replace(&ht_key->value, value, len);
+ ht_key->flags |= HK_VALUE_FROM_LOG;
+
+ // fprintf(stderr, "SET %s=%.*s\n", ht_key->key, (int)ht_key->value.len, ht_key->value.txt);
+}
+
+static inline void send_key_value_error(LOG_JOB *jb, HASHED_KEY *key, const char *format, ...) __attribute__ ((format(__printf__, 3, 4)));
+static inline void send_key_value_error(LOG_JOB *jb, HASHED_KEY *key, const char *format, ...) {
+ HASHED_KEY *ht_key = get_key_from_hashtable(jb, key);
+
+ printf("%s=", ht_key->key);
+ va_list args;
+ va_start(args, format);
+ vprintf(format, args);
+ va_end(args);
+ printf("\n");
+}
+
+inline void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len) {
+ HASHED_KEY *ht_key = get_key_from_hashtable_with_char_ptr(jb, key);
+ HASHED_KEY *nk = rename_key(jb, ht_key);
+ txt_replace(&nk->value, value, len);
+ ht_key->flags |= HK_VALUE_FROM_LOG;
+
+// fprintf(stderr, "SET %s=%.*s\n", ht_key->key, (int)ht_key->value.len, ht_key->value.txt);
+}
+
+static inline void log_job_process_rewrites(LOG_JOB *jb) {
+ for(size_t i = 0; i < jb->rewrites.used ;i++) {
+ REWRITE *rw = &jb->rewrites.array[i];
+
+ HASHED_KEY *k = get_key_from_hashtable(jb, &rw->key);
+
+ if(!(rw->flags & RW_INJECT) && !(k->flags & HK_VALUE_FROM_LOG) && !k->value.len)
+ continue;
+
+ if(!(k->flags & HK_VALUE_REWRITTEN) && rewrite_conditions_satisfied(jb, k, rw)) {
+ if(rw->flags & RW_MATCH_PCRE2)
+ replace_evaluate_from_pcre2(jb, k, &rw->value, &rw->match_pcre2);
+ else
+ replace_evaluate(jb, k, &rw->value);
+
+ if(!(rw->flags & RW_DONT_STOP))
+ k->flags |= HK_VALUE_REWRITTEN;
+
+// fprintf(stderr, "REWRITE %s=%.*s\n", k->key, (int)k->value.len, k->value.txt);
+ }
+ }
+}
+
+static inline void send_all_fields(LOG_JOB *jb) {
+ SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY(&jb->hashtable, kptr, HASHED_KEY, _KEY) {
+ HASHED_KEY *k = SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY_VALUE(kptr);
+
+ if(k->value.len) {
+ // the key exists and has some value
+
+ if(!(k->flags & HK_FILTERED)) {
+ k->flags |= HK_FILTERED;
+
+ bool included = jb->filter.include.re ? search_pattern_matches(&jb->filter.include, k->key, k->len) : true;
+ bool excluded = jb->filter.exclude.re ? search_pattern_matches(&jb->filter.exclude, k->key, k->len) : false;
+
+ if(included && !excluded)
+ k->flags |= HK_FILTERED_INCLUDED;
+ else
+ k->flags &= ~HK_FILTERED_INCLUDED;
+
+ // log some error if the key does not comply to journal standards
+ validate_key(jb, k);
+ }
+
+ if(k->flags & HK_FILTERED_INCLUDED)
+ printf("%s=%.*s\n", k->key, (int)k->value.len, k->value.txt);
+
+ // reset it for the next round
+ k->value.txt[0] = '\0';
+ k->value.len = 0;
+ }
+
+ k->flags &= ~(HK_VALUE_REWRITTEN | HK_VALUE_FROM_LOG);
+ }
+}
+
+// ----------------------------------------------------------------------------
+// injection of constant fields
+
+static void select_which_injections_should_be_injected_on_unmatched(LOG_JOB *jb) {
+ // mark all injections to be added to unmatched logs
+ for(size_t i = 0; i < jb->injections.used ; i++)
+ jb->injections.keys[i].on_unmatched = true;
+
+ if(jb->injections.used && jb->unmatched.injections.used) {
+ // we have both injections and injections on unmatched
+
+ // we find all the injections that are also configured as injections on unmatched,
+ // and we disable them, so that the output will not have the same key twice
+
+ for(size_t i = 0; i < jb->injections.used ;i++) {
+ for(size_t u = 0; u < jb->unmatched.injections.used ; u++) {
+ if(strcmp(jb->injections.keys[i].key.key, jb->unmatched.injections.keys[u].key.key) == 0)
+ jb->injections.keys[i].on_unmatched = false;
+ }
+ }
+ }
+}
+
+
+static inline void jb_finalize_injections(LOG_JOB *jb, bool line_is_matched) {
+ for (size_t j = 0; j < jb->injections.used; j++) {
+ if(!line_is_matched && !jb->injections.keys[j].on_unmatched)
+ continue;
+
+ INJECTION *inj = &jb->injections.keys[j];
+
+ replace_evaluate(jb, &inj->key, &inj->value);
+ }
+}
+
+// ----------------------------------------------------------------------------
+// filename injection
+
+static inline void jb_inject_filename(LOG_JOB *jb) {
+ if (jb->filename.key.key && jb->filename.current.len)
+ send_key_value_constant(jb, &jb->filename.key, jb->filename.current.txt, jb->filename.current.len);
+}
+
+static inline bool jb_switched_filename(LOG_JOB *jb, const char *line, size_t len) {
+ // IMPORTANT:
+ // Return TRUE when the caller should skip this line (because it is ours).
+ // Unfortunately, we have to consume empty lines too.
+
+ // IMPORTANT:
+ // filename may not be NULL terminated and have more data than the filename.
+
+ if (!len) {
+ jb->filename.last_line_was_empty = true;
+ return true;
+ }
+
+ // Check if it's a log file change line
+ if (jb->filename.last_line_was_empty && line[0] == '=' && strncmp(line, "==> ", 4) == 0) {
+ const char *start = line + 4;
+ const char *end = strstr(line, " <==");
+ while (*start == ' ') start++;
+ if (*start != '\n' && *start != '\0' && end) {
+ txt_replace(&jb->filename.current, start, end - start);
+ return true;
+ }
+ }
+
+ jb->filename.last_line_was_empty = false;
+ return false;
+}
+
+static inline bool jb_send_unmatched_line(LOG_JOB *jb, const char *line) {
+ if (!jb->unmatched.key.key)
+ return false;
+
+ // we are sending errors to systemd-journal
+ send_key_value_error(jb, &jb->unmatched.key, "Parsing error on: %s", line);
+
+ for (size_t j = 0; j < jb->unmatched.injections.used; j++) {
+ INJECTION *inj = &jb->unmatched.injections.keys[j];
+
+ replace_evaluate(jb, &inj->key, &inj->value);
+ }
+
+ return true;
+}
+
+// ----------------------------------------------------------------------------
+// running a job
+
+static char *get_next_line(LOG_JOB *jb __maybe_unused, char *buffer, size_t size, size_t *line_length) {
+ if(!fgets(buffer, (int)size, stdin)) {
+ *line_length = 0;
+ return NULL;
+ }
+
+ char *line = buffer;
+ size_t len = strlen(line);
+
+ // remove trailing newlines and spaces
+ while(len > 1 && (line[len - 1] == '\n' || isspace(line[len - 1])))
+ line[--len] = '\0';
+
+ // skip leading spaces
+ while(isspace(*line)) {
+ line++;
+ len--;
+ }
+
+ *line_length = len;
+ return line;
+}
+
+int log_job_run(LOG_JOB *jb) {
+ select_which_injections_should_be_injected_on_unmatched(jb);
+
+ PCRE2_STATE *pcre2 = NULL;
+ LOG_JSON_STATE *json = NULL;
+ LOGFMT_STATE *logfmt = NULL;
+
+ if(strcmp(jb->pattern, "json") == 0) {
+ json = json_parser_create(jb);
+ // never fails
+ }
+ else if(strcmp(jb->pattern, "logfmt") == 0) {
+ logfmt = logfmt_parser_create(jb);
+ // never fails
+ }
+ else if(strcmp(jb->pattern, "none") != 0) {
+ pcre2 = pcre2_parser_create(jb);
+ if(pcre2_has_error(pcre2)) {
+ log2stderr("%s", pcre2_parser_error(pcre2));
+ pcre2_parser_destroy(pcre2);
+ return 1;
+ }
+ }
+
+ jb->line.buffer = mallocz(MAX_LINE_LENGTH + 1);
+ jb->line.size = MAX_LINE_LENGTH + 1;
+ jb->line.trimmed_len = 0;
+ jb->line.trimmed = jb->line.buffer;
+
+ while ((jb->line.trimmed = get_next_line(jb, (char *)jb->line.buffer, jb->line.size, &jb->line.trimmed_len))) {
+ const char *line = jb->line.trimmed;
+ size_t len = jb->line.trimmed_len;
+
+ if(jb_switched_filename(jb, line, len))
+ continue;
+
+ bool line_is_matched = true;
+
+ if(json)
+ line_is_matched = json_parse_document(json, line);
+ else if(logfmt)
+ line_is_matched = logfmt_parse_document(logfmt, line);
+ else if(pcre2)
+ line_is_matched = pcre2_parse_document(pcre2, line, len);
+
+ if(!line_is_matched) {
+ if(json)
+ log2stderr("%s", json_parser_error(json));
+ else if(logfmt)
+ log2stderr("%s", logfmt_parser_error(logfmt));
+ else if(pcre2)
+ log2stderr("%s", pcre2_parser_error(pcre2));
+
+ if(!jb_send_unmatched_line(jb, line))
+ // just logging to stderr, not sending unmatched lines
+ continue;
+ }
+
+ jb_inject_filename(jb);
+ jb_finalize_injections(jb, line_is_matched);
+
+ log_job_process_rewrites(jb);
+ send_all_fields(jb);
+ printf("\n");
+ fflush(stdout);
+ }
+
+ if(json)
+ json_parser_destroy(json);
+
+ else if(logfmt)
+ logfmt_parser_destroy(logfmt);
+
+ else if(pcre2)
+ pcre2_parser_destroy(pcre2);
+
+ freez((void *)jb->line.buffer);
+
+ return 0;
+}
+
+// ----------------------------------------------------------------------------
+
+int main(int argc, char *argv[]) {
+ LOG_JOB log_job;
+
+ log_job_init(&log_job);
+
+ if(!log_job_command_line_parse_parameters(&log_job, argc, argv))
+ exit(1);
+
+ if(log_job.show_config)
+ log_job_configuration_to_yaml(&log_job);
+
+ int ret = log_job_run(&log_job);
+
+ log_job_cleanup(&log_job);
+ return ret;
+}
diff --git a/collectors/log2journal/log2journal.d/default.yaml b/collectors/log2journal/log2journal.d/default.yaml
new file mode 100644
index 00000000000000..d41efc4abb251d
--- /dev/null
+++ b/collectors/log2journal/log2journal.d/default.yaml
@@ -0,0 +1,15 @@
+pattern: none
+
+filename:
+ key: LOG_FILENAME
+
+inject:
+ - key: MESSAGE
+ value: '${LINE}' # a special variable that resolves to the whole line read from the log
+
+ - key: PRIORITY
+ value: 6 # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
+
+ - key: SYSLOG_IDENTIFIER
+ value: log2journal # the name of the application sending the logs
+
diff --git a/collectors/log2journal/log2journal.d/nginx-combined.yaml b/collectors/log2journal/log2journal.d/nginx-combined.yaml
new file mode 100644
index 00000000000000..003c774d7bd263
--- /dev/null
+++ b/collectors/log2journal/log2journal.d/nginx-combined.yaml
@@ -0,0 +1,91 @@
+# Netdata log2journal Configuration
+# The following parses nginx log files using the combined format.
+
+# The PCRE2 pattern to match log entries and give names to the fields.
+# The journal will have these names, so follow their rules. You can
+# initiate an extended PCRE2 pattern by starting the pattern with (?x)
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s # NGINX_REMOTE_ADDR
+ (?[^ ]+) \s # NGINX_REMOTE_USER
+ \[
+ (?[^\]]+) # NGINX_TIME_LOCAL
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+ # NGINX_METHOD
+ (?[^ ]+) \s+
+ (?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+ # NGINX_STATUS
+ (?\d+) \s+ # NGINX_BODY_BYTES_SENT
+ "(?[^"]*)" \s+ # NGINX_HTTP_REFERER
+ "(?[^"]*)" # NGINX_HTTP_USER_AGENT
+
+# When log2journal can detect the filename of each log entry (tail gives it
+# only when it tails multiple files), this key will be used to send the
+# filename to the journals.
+filename:
+ key: NGINX_LOG_FILENAME
+
+rename:
+ - new_key: MESSAGE
+ old_key: NGINX_REQUEST
+
+# Inject constant fields into the journal logs.
+inject:
+ - key: SYSLOG_IDENTIFIER
+ value: nginx-log
+
+ # inject PRIORITY is a duplicate of NGINX_STATUS
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
+
+ # Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
+ - key: NGINX_STATUS_FAMILY
+ value: '${NGINX_STATUS}'
+
+# Rewrite the value of fields (including the duplicated ones).
+# The search pattern can have named groups, and the replace pattern can use
+# them as ${name}.
+rewrite:
+ # PRIORITY is a duplicate of NGINX_STATUS
+ # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
+ - key: PRIORITY
+ match: '^[123]'
+ value: 6
+
+ - key: PRIORITY
+ match: '^4'
+ value: 5
+
+ - key: PRIORITY
+ match: '^5'
+ value: 3
+
+ - key: PRIORITY
+ match: '.*'
+ value: 4
+
+ # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
+ - key: NGINX_STATUS_FAMILY
+ match: '^(?[1-5])'
+ value: '${first_digit}xx'
+
+ - key: NGINX_STATUS_FAMILY
+ match: '.*'
+ value: 'UNKNOWN'
+
+# Control what to do when input logs do not match the main PCRE2 pattern.
+unmatched:
+ # The journal key to log the PCRE2 error message to.
+ # Set this to MESSAGE, so you to see the error in the log.
+ key: MESSAGE
+
+ # Inject static fields to the unmatched entries.
+ # Set PRIORITY=1 (alert) to help you spot unmatched entries in the logs.
+ inject:
+ - key: PRIORITY
+ value: 1
diff --git a/collectors/log2journal/log2journal.d/nginx-json.yaml b/collectors/log2journal/log2journal.d/nginx-json.yaml
new file mode 100644
index 00000000000000..7fdc4be58459c4
--- /dev/null
+++ b/collectors/log2journal/log2journal.d/nginx-json.yaml
@@ -0,0 +1,164 @@
+# For all nginx variables, check this:
+# https://nginx.org/en/docs/http/ngx_http_core_module.html#var_connection_requests
+
+pattern: json
+
+prefix: NGINX_
+
+# When log2journal can detect the filename of each log entry (tail gives it
+# only when it tails multiple files), this key will be used to send the
+# filename to the journals.
+filename:
+ key: NGINX_LOG_FILENAME
+
+filter:
+ exclude: '^(NGINX_BINARY_REMOTE_ADDR)$'
+
+rename:
+ - new_key: MESSAGE
+ old_key: NGINX_REQUEST
+
+ # args is an alias for query_string
+ - new_key: NGINX_QUERY_STRING
+ old_key: NGINX_ARGS
+
+ # document_uri is an alias for uri
+ - new_key: NGINX_URI
+ old_key: NGINX_DOCUMENT_URI
+
+ # is_args states if the request had a query string or not
+ - new_key: NGINX_HAS_QUERY_STRING
+ old_key: NGINX_IS_ARGS
+
+ # msec is the timestamp in seconds, with fractional digits for milliseconds
+ - new_key: NGINX_TIMESTAMP_SEC
+ old_key: NGINX_MSEC
+
+ # nginx_version is already prefixed with nginx, let's remove one of them
+ - new_key: NGINX_VERSION
+ old_key: NGINX_NGINX_VERSION
+
+ # pipe states if the request was pipelined or not
+ - new_key: NGINX_PIPELINED
+ old_key: NGINX_PIPE
+
+ # rename numeric TLVs to their names
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_ALPN
+ old_key: NGINX_PROXY_PROTOCOL_TLV_0X01
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_AUTHORITY
+ old_key: NGINX_PROXY_PROTOCOL_TLV_0X02
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_UNIQUE_ID
+ old_key: NGINX_PROXY_PROTOCOL_TLV_0X05
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL
+ old_key: NGINX_PROXY_PROTOCOL_TLV_0X20
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_NETNS
+ old_key: NGINX_PROXY_PROTOCOL_TLV_0X30
+
+ # rename numeric SSL TLVs to their names
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERSION
+ old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X21
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_CN
+ old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X22
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_CIPHER
+ old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X23
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_SIG_ALG
+ old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X24
+ - new_key: NGINX_PROXY_PROTOCOL_TLV_SSL_KEY_ALG
+ old_key: NGINX_PROXY_PROTOCOL_TLV_SSL_0X25
+
+# Inject constant fields into the journal logs.
+inject:
+ - key: SYSLOG_IDENTIFIER
+ value: nginx-log
+
+ # inject PRIORITY is a duplicate of NGINX_STATUS
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
+
+ # Inject NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
+ - key: NGINX_STATUS_FAMILY
+ value: '${NGINX_STATUS}'
+
+
+# Rewrite the value of fields (including the duplicated ones).
+# The search pattern can have named groups, and the replace pattern can use
+# them as ${name}.
+rewrite:
+ # a ? means it has query string, everything else means it does not
+ - key: NGINX_HAS_QUERY_STRING
+ match: '^\?$'
+ value: yes
+ - key: NGINX_HAS_QUERY_STRING
+ match: '.*'
+ value: no
+
+ # 'on' means it was HTTPS, everything else means it was not
+ - key: NGINX_HTTPS
+ match: '^on$'
+ value: yes
+ - key: NGINX_HTTPS
+ match: '.*'
+ value: no
+
+ # 'p' means it was pipelined, everything else means it was not
+ - key: NGINX_PIPELINED
+ match: '^p$'
+ value: yes
+ - key: NGINX_PIPELINED
+ match: '.*'
+ value: no
+
+ # zero means client sent a certificate and it was verified, non-zero means otherwise
+ - key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY
+ match: '^0$'
+ value: yes
+ - key: NGINX_PROXY_PROTOCOL_TLV_SSL_VERIFY
+ match: '.*'
+ value: no
+
+ # 'OK' means request completed, everything else means it didn't
+ - key: NGINX_REQUEST_COMPLETION
+ match: '^OK$'
+ value: 'completed'
+ - key: NGINX_REQUEST_COMPLETION
+ match: '.*'
+ value: 'not completed'
+
+ # PRIORTY is a duplicate of NGINX_STATUS
+ # Valid PRIORITIES: 0=emerg, 1=alert, 2=crit, 3=error, 4=warn, 5=notice, 6=info, 7=debug
+ - key: PRIORITY
+ match: '^[123]'
+ value: 6
+
+ - key: PRIORITY
+ match: '^4'
+ value: 5
+
+ - key: PRIORITY
+ match: '^5'
+ value: 3
+
+ - key: PRIORITY
+ match: '.*'
+ value: 4
+
+ # NGINX_STATUS_FAMILY is a duplicate of NGINX_STATUS
+ - key: NGINX_STATUS_FAMILY
+ match: '^(?[1-5])'
+ value: '${first_digit}xx'
+
+ - key: NGINX_STATUS_FAMILY
+ match: '.*'
+ value: 'UNKNOWN'
+
+# Control what to do when input logs do not match the main PCRE2 pattern.
+unmatched:
+ # The journal key to log the PCRE2 error message to.
+ # Set this to MESSAGE, so you to see the error in the log.
+ key: MESSAGE
+
+ # Inject static fields to the unmatched entries.
+ # Set PRIORITY=1 (alert) to help you spot unmatched entries in the logs.
+ inject:
+ - key: PRIORITY
+ value: 1
diff --git a/collectors/log2journal/log2journal.h b/collectors/log2journal/log2journal.h
new file mode 100644
index 00000000000000..834a5b135d8a3c
--- /dev/null
+++ b/collectors/log2journal/log2journal.h
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_LOG2JOURNAL_H
+#define NETDATA_LOG2JOURNAL_H
+
+// only for PACKAGE_VERSION
+#include "config.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+// ----------------------------------------------------------------------------
+// logging
+
+// enable the compiler to check for printf like errors on our log2stderr() function
+static inline void log2stderr(const char *format, ...) __attribute__ ((format(__printf__, 1, 2)));
+static inline void log2stderr(const char *format, ...) {
+ va_list args;
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+ fprintf(stderr, "\n");
+}
+
+// ----------------------------------------------------------------------------
+// allocation functions abstraction
+
+static inline void *mallocz(size_t size) {
+ void *ptr = malloc(size);
+ if (!ptr) {
+ log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", size);
+ exit(EXIT_FAILURE);
+ }
+ return ptr;
+}
+
+static inline void *callocz(size_t elements, size_t size) {
+ void *ptr = calloc(elements, size);
+ if (!ptr) {
+ log2stderr("Fatal Error: Memory allocation failed. Requested size: %zu bytes.", elements * size);
+ exit(EXIT_FAILURE);
+ }
+ return ptr;
+}
+
+static inline void *reallocz(void *ptr, size_t size) {
+ void *new_ptr = realloc(ptr, size);
+ if (!new_ptr) {
+ log2stderr("Fatal Error: Memory reallocation failed. Requested size: %zu bytes.", size);
+ exit(EXIT_FAILURE);
+ }
+ return new_ptr;
+}
+
+static inline char *strdupz(const char *s) {
+ char *ptr = strdup(s);
+ if (!ptr) {
+ log2stderr("Fatal Error: Memory allocation failed in strdup.");
+ exit(EXIT_FAILURE);
+ }
+ return ptr;
+}
+
+static inline char *strndupz(const char *s, size_t n) {
+ char *ptr = strndup(s, n);
+ if (!ptr) {
+ log2stderr("Fatal Error: Memory allocation failed in strndup. Requested size: %zu bytes.", n);
+ exit(EXIT_FAILURE);
+ }
+ return ptr;
+}
+
+static inline void freez(void *ptr) {
+ if (ptr)
+ free(ptr);
+}
+
+// ----------------------------------------------------------------------------
+
+#define XXH_INLINE_ALL
+#include "../../libnetdata/xxhash.h"
+
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include
+
+#ifdef HAVE_LIBYAML
+#include
+#endif
+
+// ----------------------------------------------------------------------------
+// hashtable for HASHED_KEY
+
+// cleanup hashtable defines
+#undef SIMPLE_HASHTABLE_SORT_FUNCTION
+#undef SIMPLE_HASHTABLE_VALUE_TYPE
+#undef SIMPLE_HASHTABLE_NAME
+#undef NETDATA_SIMPLE_HASHTABLE_H
+
+struct hashed_key;
+static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2);
+#define SIMPLE_HASHTABLE_SORT_FUNCTION compare_keys
+#define SIMPLE_HASHTABLE_VALUE_TYPE struct hashed_key
+#define SIMPLE_HASHTABLE_NAME _KEY
+#include "../../libnetdata/simple_hashtable.h"
+
+// ----------------------------------------------------------------------------
+
+#define MAX_OUTPUT_KEYS 1024
+#define MAX_LINE_LENGTH (1024 * 1024)
+#define MAX_INJECTIONS (MAX_OUTPUT_KEYS / 2)
+#define MAX_REWRITES (MAX_OUTPUT_KEYS / 2)
+#define MAX_RENAMES (MAX_OUTPUT_KEYS / 2)
+
+#define JOURNAL_MAX_KEY_LEN 64 // according to systemd-journald
+#define JOURNAL_MAX_VALUE_LEN (48 * 1024) // according to systemd-journald
+
+#define LOG2JOURNAL_CONFIG_PATH LIBCONFIG_DIR "/log2journal.d"
+
+// ----------------------------------------------------------------------------
+// character conversion for journal keys
+
+extern const char journal_key_characters_map[256];
+
+// ----------------------------------------------------------------------------
+// copy to buffer, while ensuring there is no buffer overflow
+
+static inline size_t copy_to_buffer(char *dst, size_t dst_size, const char *src, size_t src_len) {
+ if(dst_size < 2) {
+ if(dst_size == 1)
+ *dst = '\0';
+
+ return 0;
+ }
+
+ if(src_len <= dst_size - 1) {
+ memcpy(dst, src, src_len);
+ dst[src_len] = '\0';
+ return src_len;
+ }
+ else {
+ memcpy(dst, src, dst_size - 1);
+ dst[dst_size - 1] = '\0';
+ return dst_size - 1;
+ }
+}
+
+// ----------------------------------------------------------------------------
+// A dynamically sized, reusable text buffer,
+// allowing us to be fast (no allocations during iterations) while having the
+// smallest possible allocations.
+
+typedef struct txt {
+ char *txt;
+ uint32_t size;
+ uint32_t len;
+} TEXT;
+
+static inline void txt_cleanup(TEXT *t) {
+ if(!t)
+ return;
+
+ if(t->txt)
+ freez(t->txt);
+
+ t->txt = NULL;
+ t->size = 0;
+ t->len = 0;
+}
+
+static inline void txt_replace(TEXT *t, const char *s, size_t len) {
+ if(!s || !*s || len == 0) {
+ s = "";
+ len = 0;
+ }
+
+ if(len + 1 <= t->size) {
+ // the existing value allocation, fits our value
+
+ memcpy(t->txt, s, len);
+ t->txt[len] = '\0';
+ t->len = len;
+ }
+ else {
+ // no existing value allocation, or too small for our value
+ // cleanup and increase the buffer
+
+ txt_cleanup(t);
+
+ t->txt = strndupz(s, len);
+ t->size = len + 1;
+ t->len = len;
+ }
+}
+
+static inline void txt_expand_and_append(TEXT *t, const char *s, size_t len) {
+ if(len + 1 > (t->size - t->len)) {
+ size_t new_size = t->len + len + 1;
+ if(new_size < t->size * 2)
+ new_size = t->size * 2;
+
+ t->txt = reallocz(t->txt, new_size);
+ t->size = new_size;
+ }
+
+ char *copy_to = &t->txt[t->len];
+ memcpy(copy_to, s, len);
+ copy_to[len] = '\0';
+ t->len += len;
+}
+
+// ----------------------------------------------------------------------------
+
+typedef enum __attribute__((__packed__)) {
+ HK_NONE = 0,
+
+ // permanent flags - they are set once to optimize various decisions and lookups
+
+ HK_HASHTABLE_ALLOCATED = (1 << 0), // this is key object allocated in the hashtable
+ // objects that do not have this, have a pointer to a key in the hashtable
+ // objects that have this, value a value allocated
+
+ HK_FILTERED = (1 << 1), // we checked once if this key in filtered
+ HK_FILTERED_INCLUDED = (1 << 2), // the result of the filtering was to include it in the output
+
+ HK_COLLISION_CHECKED = (1 << 3), // we checked once for collision check of this key
+
+ HK_RENAMES_CHECKED = (1 << 4), // we checked once if there are renames on this key
+ HK_HAS_RENAMES = (1 << 5), // and we found there is a rename rule related to it
+
+ // ephemeral flags - they are unset at the end of each log line
+
+ HK_VALUE_FROM_LOG = (1 << 14), // the value of this key has been read from the log (or from injection, duplication)
+ HK_VALUE_REWRITTEN = (1 << 15), // the value of this key has been rewritten due to one of our rewrite rules
+
+} HASHED_KEY_FLAGS;
+
+typedef struct hashed_key {
+ const char *key;
+ uint32_t len;
+ HASHED_KEY_FLAGS flags;
+ XXH64_hash_t hash;
+ union {
+ struct hashed_key *hashtable_ptr; // HK_HASHTABLE_ALLOCATED is not set
+ TEXT value; // HK_HASHTABLE_ALLOCATED is set
+ };
+} HASHED_KEY;
+
+static inline void hashed_key_cleanup(HASHED_KEY *k) {
+ if(k->key) {
+ freez((void *)k->key);
+ k->key = NULL;
+ }
+
+ if(k->flags & HK_HASHTABLE_ALLOCATED)
+ txt_cleanup(&k->value);
+ else
+ k->hashtable_ptr = NULL;
+}
+
+static inline void hashed_key_set(HASHED_KEY *k, const char *name) {
+ hashed_key_cleanup(k);
+
+ k->key = strdupz(name);
+ k->len = strlen(k->key);
+ k->hash = XXH3_64bits(k->key, k->len);
+ k->flags = HK_NONE;
+}
+
+static inline void hashed_key_len_set(HASHED_KEY *k, const char *name, size_t len) {
+ hashed_key_cleanup(k);
+
+ k->key = strndupz(name, len);
+ k->len = len;
+ k->hash = XXH3_64bits(k->key, k->len);
+ k->flags = HK_NONE;
+}
+
+static inline bool hashed_keys_match(HASHED_KEY *k1, HASHED_KEY *k2) {
+ return ((k1 == k2) || (k1->hash == k2->hash && strcmp(k1->key, k2->key) == 0));
+}
+
+static inline int compare_keys(struct hashed_key *k1, struct hashed_key *k2) {
+ return strcmp(k1->key, k2->key);
+}
+
+// ----------------------------------------------------------------------------
+
+typedef struct search_pattern {
+ const char *pattern;
+ pcre2_code *re;
+ pcre2_match_data *match_data;
+ TEXT error;
+} SEARCH_PATTERN;
+
+void search_pattern_cleanup(SEARCH_PATTERN *sp);
+bool search_pattern_set(SEARCH_PATTERN *sp, const char *search_pattern, size_t search_pattern_len);
+
+static inline bool search_pattern_matches(SEARCH_PATTERN *sp, const char *value, size_t value_len) {
+ return pcre2_match(sp->re, (PCRE2_SPTR)value, value_len, 0, 0, sp->match_data, NULL) >= 0;
+}
+
+// ----------------------------------------------------------------------------
+
+typedef struct replacement_node {
+ HASHED_KEY name;
+ bool is_variable;
+ bool logged_error;
+
+ struct replacement_node *next;
+} REPLACE_NODE;
+
+void replace_node_free(REPLACE_NODE *rpn);
+
+typedef struct replace_pattern {
+ const char *pattern;
+ REPLACE_NODE *nodes;
+ bool has_variables;
+} REPLACE_PATTERN;
+
+void replace_pattern_cleanup(REPLACE_PATTERN *rp);
+bool replace_pattern_set(REPLACE_PATTERN *rp, const char *pattern);
+
+// ----------------------------------------------------------------------------
+
+typedef struct injection {
+ bool on_unmatched;
+ HASHED_KEY key;
+ REPLACE_PATTERN value;
+} INJECTION;
+
+void injection_cleanup(INJECTION *inj);
+
+// ----------------------------------------------------------------------------
+
+typedef struct key_rename {
+ HASHED_KEY new_key;
+ HASHED_KEY old_key;
+} RENAME;
+
+void rename_cleanup(RENAME *rn);
+
+// ----------------------------------------------------------------------------
+
+typedef enum __attribute__((__packed__)) {
+ RW_NONE = 0,
+ RW_MATCH_PCRE2 = (1 << 1), // a rewrite rule
+ RW_MATCH_NON_EMPTY = (1 << 2), // a rewrite rule
+ RW_DONT_STOP = (1 << 3),
+ RW_INJECT = (1 << 4),
+} RW_FLAGS;
+
+typedef struct key_rewrite {
+ RW_FLAGS flags;
+ HASHED_KEY key;
+ union {
+ SEARCH_PATTERN match_pcre2;
+ REPLACE_PATTERN match_non_empty;
+ };
+ REPLACE_PATTERN value;
+} REWRITE;
+
+void rewrite_cleanup(REWRITE *rw);
+
+// ----------------------------------------------------------------------------
+// A job configuration and runtime structures
+
+typedef struct log_job {
+ bool show_config;
+
+ const char *pattern;
+ const char *prefix;
+
+ SIMPLE_HASHTABLE_KEY hashtable;
+
+ struct {
+ const char *buffer;
+ const char *trimmed;
+ size_t trimmed_len;
+ size_t size;
+ HASHED_KEY key;
+ } line;
+
+ struct {
+ SEARCH_PATTERN include;
+ SEARCH_PATTERN exclude;
+ } filter;
+
+ struct {
+ bool last_line_was_empty;
+ HASHED_KEY key;
+ TEXT current;
+ } filename;
+
+ struct {
+ uint32_t used;
+ INJECTION keys[MAX_INJECTIONS];
+ } injections;
+
+ struct {
+ HASHED_KEY key;
+ struct {
+ uint32_t used;
+ INJECTION keys[MAX_INJECTIONS];
+ } injections;
+ } unmatched;
+
+ struct {
+ uint32_t used;
+ REWRITE array[MAX_REWRITES];
+ TEXT tmp;
+ } rewrites;
+
+ struct {
+ uint32_t used;
+ RENAME array[MAX_RENAMES];
+ } renames;
+} LOG_JOB;
+
+// initialize a log job
+void log_job_init(LOG_JOB *jb);
+
+// free all resources consumed by the log job
+void log_job_cleanup(LOG_JOB *jb);
+
+// ----------------------------------------------------------------------------
+
+// the entry point to send key value pairs to the output
+// this implements the pipeline of processing renames, rewrites and duplications
+void log_job_send_extracted_key_value(LOG_JOB *jb, const char *key, const char *value, size_t len);
+
+// ----------------------------------------------------------------------------
+// configuration related
+
+// management of configuration to set settings
+bool log_job_filename_key_set(LOG_JOB *jb, const char *key, size_t key_len);
+bool log_job_key_prefix_set(LOG_JOB *jb, const char *prefix, size_t prefix_len);
+bool log_job_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
+bool log_job_injection_add(LOG_JOB *jb, const char *key, size_t key_len, const char *value, size_t value_len, bool unmatched);
+bool log_job_rewrite_add(LOG_JOB *jb, const char *key, RW_FLAGS flags, const char *search_pattern, const char *replace_pattern);
+bool log_job_rename_add(LOG_JOB *jb, const char *new_key, size_t new_key_len, const char *old_key, size_t old_key_len);
+bool log_job_include_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
+bool log_job_exclude_pattern_set(LOG_JOB *jb, const char *pattern, size_t pattern_len);
+
+// entry point to parse command line parameters
+bool log_job_command_line_parse_parameters(LOG_JOB *jb, int argc, char **argv);
+void log_job_command_line_help(const char *name);
+
+// ----------------------------------------------------------------------------
+// YAML configuration related
+
+#ifdef HAVE_LIBYAML
+bool yaml_parse_file(const char *config_file_path, LOG_JOB *jb);
+bool yaml_parse_config(const char *config_name, LOG_JOB *jb);
+#endif
+
+void log_job_configuration_to_yaml(LOG_JOB *jb);
+
+// ----------------------------------------------------------------------------
+// JSON parser
+
+typedef struct log_json_state LOG_JSON_STATE;
+LOG_JSON_STATE *json_parser_create(LOG_JOB *jb);
+void json_parser_destroy(LOG_JSON_STATE *js);
+const char *json_parser_error(LOG_JSON_STATE *js);
+bool json_parse_document(LOG_JSON_STATE *js, const char *txt);
+void json_test(void);
+
+size_t parse_surrogate(const char *s, char *d, size_t *remaining);
+
+// ----------------------------------------------------------------------------
+// logfmt parser
+
+typedef struct logfmt_state LOGFMT_STATE;
+LOGFMT_STATE *logfmt_parser_create(LOG_JOB *jb);
+void logfmt_parser_destroy(LOGFMT_STATE *lfs);
+const char *logfmt_parser_error(LOGFMT_STATE *lfs);
+bool logfmt_parse_document(LOGFMT_STATE *js, const char *txt);
+void logfmt_test(void);
+
+// ----------------------------------------------------------------------------
+// pcre2 parser
+
+typedef struct pcre2_state PCRE2_STATE;
+PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb);
+void pcre2_parser_destroy(PCRE2_STATE *pcre2);
+const char *pcre2_parser_error(PCRE2_STATE *pcre2);
+bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len);
+bool pcre2_has_error(PCRE2_STATE *pcre2);
+void pcre2_test(void);
+
+void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos);
+
+#endif //NETDATA_LOG2JOURNAL_H
diff --git a/collectors/log2journal/tests.d/default.output b/collectors/log2journal/tests.d/default.output
new file mode 100644
index 00000000000000..ef17cb2c7c78f6
--- /dev/null
+++ b/collectors/log2journal/tests.d/default.output
@@ -0,0 +1,20 @@
+MESSAGE=key1=value01 key2=value02 key3=value03 key4=value04
+PRIORITY=6
+SYSLOG_IDENTIFIER=log2journal
+
+MESSAGE=key1=value11 key2=value12 key3=value13 key4=
+PRIORITY=6
+SYSLOG_IDENTIFIER=log2journal
+
+MESSAGE=key1=value21 key2=value22 key3=value23 key4=value24
+PRIORITY=6
+SYSLOG_IDENTIFIER=log2journal
+
+MESSAGE=key1=value31 key2=value32 key3=value33 key4=
+PRIORITY=6
+SYSLOG_IDENTIFIER=log2journal
+
+MESSAGE=key1=value41 key2=value42 key3=value43 key4=value44
+PRIORITY=6
+SYSLOG_IDENTIFIER=log2journal
+
diff --git a/collectors/log2journal/tests.d/full.output b/collectors/log2journal/tests.d/full.output
new file mode 100644
index 00000000000000..074092d4ed1f74
--- /dev/null
+++ b/collectors/log2journal/tests.d/full.output
@@ -0,0 +1,77 @@
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s # NGINX_REMOTE_ADDR
+ (?[^ ]+) \s # NGINX_REMOTE_USER
+ \[
+ (?[^\]]+) # NGINX_TIME_LOCAL
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+ # NGINX_METHOD
+ (?[^ ]+) \s+
+ HTTP/(?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+ # NGINX_STATUS
+ (?\d+) \s+ # NGINX_BODY_BYTES_SENT
+ "(?[^"]*)" \s+ # NGINX_HTTP_REFERER
+ "(?[^"]*)" # NGINX_HTTP_USER_AGENT
+
+prefix: NGINX_
+
+filename:
+ key: NGINX_LOG_FILENAME
+
+filter:
+ include: '.*'
+ exclude: '.*HELLO.*WORLD.*'
+
+rename:
+ - new_key: TEST1
+ old_key: TEST2
+ - new_key: TEST3
+ old_key: TEST4
+
+inject:
+ - key: SYSLOG_IDENTIFIER
+ value: nginx-log
+ - key: SYSLOG_IDENTIFIER2
+ value: nginx-log2
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
+ - key: NGINX_STATUS_FAMILY
+ value: '${NGINX_STATUS}${NGINX_METHOD}'
+
+rewrite:
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
+ inject: yes
+ stop: no
+ - key: PRIORITY
+ match: '^[123]'
+ value: 6
+ - key: PRIORITY
+ match: '^4'
+ value: 5
+ - key: PRIORITY
+ match: '^5'
+ value: 3
+ - key: PRIORITY
+ match: '.*'
+ value: 4
+ - key: NGINX_STATUS_FAMILY
+ match: '^(?[1-5])'
+ value: '${first_digit}xx'
+ - key: NGINX_STATUS_FAMILY
+ match: '.*'
+ value: UNKNOWN
+
+unmatched:
+ key: MESSAGE
+
+ inject:
+ - key: PRIORITY
+ value: 1
+ - key: PRIORITY2
+ value: 2
diff --git a/collectors/log2journal/tests.d/full.yaml b/collectors/log2journal/tests.d/full.yaml
new file mode 100644
index 00000000000000..86cafb5a2a1189
--- /dev/null
+++ b/collectors/log2journal/tests.d/full.yaml
@@ -0,0 +1,76 @@
+pattern: |
+ (?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s # NGINX_REMOTE_ADDR
+ (?[^ ]+) \s # NGINX_REMOTE_USER
+ \[
+ (?[^\]]+) # NGINX_TIME_LOCAL
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+ # NGINX_METHOD
+ (?[^ ]+) \s+
+ HTTP/(?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+ # NGINX_STATUS
+ (?\d+) \s+ # NGINX_BODY_BYTES_SENT
+ "(?[^"]*)" \s+ # NGINX_HTTP_REFERER
+ "(?[^"]*)" # NGINX_HTTP_USER_AGENT
+
+prefix: NGINX_
+
+filename:
+ key: NGINX_LOG_FILENAME
+
+filter:
+ include: '.*'
+ exclude: '.*HELLO.*WORLD.*'
+
+rename:
+ - new_key: TEST1
+ old_key: TEST2
+ - new_key: TEST3
+ old_key: TEST4
+
+inject:
+ - key: SYSLOG_IDENTIFIER
+ value: 'nginx-log'
+ - key: SYSLOG_IDENTIFIER2
+ value: 'nginx-log2'
+ - key: PRIORITY
+ value: '${NGINX_STATUS}'
+ - key: NGINX_STATUS_FAMILY
+ value: '${NGINX_STATUS}${NGINX_METHOD}'
+
+rewrite:
+ - key: "PRIORITY"
+ value: "${NGINX_STATUS}"
+ inject: yes
+ stop: no
+ - key: "PRIORITY"
+ match: "^[123]"
+ value: 6
+ - key: "PRIORITY"
+ match: "^4"
+ value: 5
+ - key: "PRIORITY"
+ match: "^5"
+ value: 3
+ - key: "PRIORITY"
+ match: ".*"
+ value: 4
+ - key: "NGINX_STATUS_FAMILY"
+ match: "^(?[1-5])"
+ value: "${first_digit}xx"
+ - key: "NGINX_STATUS_FAMILY"
+ match: ".*"
+ value: "UNKNOWN"
+
+unmatched:
+ key: MESSAGE
+ inject:
+ - key: PRIORITY
+ value: 1
+ - key: PRIORITY2
+ value: 2
diff --git a/collectors/log2journal/tests.d/json-exclude.output b/collectors/log2journal/tests.d/json-exclude.output
new file mode 100644
index 00000000000000..a8f6f83e6ce2d5
--- /dev/null
+++ b/collectors/log2journal/tests.d/json-exclude.output
@@ -0,0 +1,153 @@
+ARRAY2_0=1
+ARRAY2_1=-2.345
+ARRAY2_2=Array Element
+ARRAY2_3=true
+ARRAY2_4=false
+ARRAY2_5=null
+ARRAY2_6_BOOLEANFALSE=false
+ARRAY2_6_BOOLEANTRUE=true
+ARRAY2_6_FLOATNEGATIVE=-0.123
+ARRAY2_6_FLOATPOSITIVE=0.987
+ARRAY2_6_NULLVALUE=null
+ARRAY2_6_NUMERICNEGATIVE=-456
+ARRAY2_6_NUMERICPOSITIVE=123
+ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4
+ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5
+ARRAY2_6_STRING=Nested Object in Array2
+ARRAY2_7_BOOLEANFALSE=false
+ARRAY2_7_BOOLEANTRUE=true
+ARRAY2_7_FLOATNEGATIVE=-2.71828
+ARRAY2_7_FLOATPOSITIVE=3.14159
+ARRAY2_7_NULLVALUE=null
+ARRAY2_7_NUMERICNEGATIVE=-123
+ARRAY2_7_NUMERICPOSITIVE=42
+ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY2_7_STRING=Array Element with Object in Array2
+BOOLEANFALSE=false
+BOOLEANTRUE=true
+FLOATNEGATIVE=-2.71828
+FLOATPOSITIVE=3.14159
+NULLVALUE=null
+NUMERICNEGATIVE=-123
+NUMERICPOSITIVE=42
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+SCIENTIFICFLOATNEGATIVE=-2.5e-3
+SCIENTIFICINTPOSITIVE=1e5
+SCIENTIFICSMALLPOSITIVE=1e-4
+STRING=Hello, World!
+
+ARRAY2_0=1
+ARRAY2_1=-2.345
+ARRAY2_2=Array Element
+ARRAY2_3=true
+ARRAY2_4=false
+ARRAY2_5=null
+ARRAY2_6_BOOLEANFALSE=false
+ARRAY2_6_BOOLEANTRUE=true
+ARRAY2_6_FLOATNEGATIVE=-0.123
+ARRAY2_6_FLOATPOSITIVE=0.987
+ARRAY2_6_NULLVALUE=null
+ARRAY2_6_NUMERICNEGATIVE=-456
+ARRAY2_6_NUMERICPOSITIVE=123
+ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4
+ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5
+ARRAY2_6_STRING=Nested Object in Array2
+ARRAY2_7_BOOLEANFALSE=false
+ARRAY2_7_BOOLEANTRUE=true
+ARRAY2_7_FLOATNEGATIVE=-2.71828
+ARRAY2_7_FLOATPOSITIVE=3.14159
+ARRAY2_7_NULLVALUE=null
+ARRAY2_7_NUMERICNEGATIVE=-123
+ARRAY2_7_NUMERICPOSITIVE=42
+ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY2_7_STRING=Array Element with Object in Array2
+BOOLEANFALSE=false
+BOOLEANTRUE=true
+FLOATNEGATIVE=-2.71828
+FLOATPOSITIVE=3.14159
+NULLVALUE=null
+NUMERICNEGATIVE=-123
+NUMERICPOSITIVE=42
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+SCIENTIFICFLOATNEGATIVE=-2.5e-3
+SCIENTIFICINTPOSITIVE=1e5
+SCIENTIFICSMALLPOSITIVE=1e-4
+STRING=Hello, World!
+
+ARRAY2_0=1
+ARRAY2_1=-2.345
+ARRAY2_2=Array Element
+ARRAY2_3=true
+ARRAY2_4=false
+ARRAY2_5=null
+ARRAY2_6_BOOLEANFALSE=false
+ARRAY2_6_BOOLEANTRUE=true
+ARRAY2_6_FLOATNEGATIVE=-0.123
+ARRAY2_6_FLOATPOSITIVE=0.987
+ARRAY2_6_NULLVALUE=null
+ARRAY2_6_NUMERICNEGATIVE=-456
+ARRAY2_6_NUMERICPOSITIVE=123
+ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4
+ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5
+ARRAY2_6_STRING=Nested Object in Array2
+ARRAY2_7_BOOLEANFALSE=false
+ARRAY2_7_BOOLEANTRUE=true
+ARRAY2_7_FLOATNEGATIVE=-2.71828
+ARRAY2_7_FLOATPOSITIVE=3.14159
+ARRAY2_7_NULLVALUE=null
+ARRAY2_7_NUMERICNEGATIVE=-123
+ARRAY2_7_NUMERICPOSITIVE=42
+ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY2_7_STRING=Array Element with Object in Array2
+BOOLEANFALSE=false
+BOOLEANTRUE=true
+FLOATNEGATIVE=-2.71828
+FLOATPOSITIVE=3.14159
+NULLVALUE=null
+NUMERICNEGATIVE=-123
+NUMERICPOSITIVE=42
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+SCIENTIFICFLOATNEGATIVE=-2.5e-3
+SCIENTIFICINTPOSITIVE=1e5
+SCIENTIFICSMALLPOSITIVE=1e-4
+STRING=Hello, World!
+
diff --git a/collectors/log2journal/tests.d/json-include.output b/collectors/log2journal/tests.d/json-include.output
new file mode 100644
index 00000000000000..326c58da22edb5
--- /dev/null
+++ b/collectors/log2journal/tests.d/json-include.output
@@ -0,0 +1,54 @@
+OBJECT_ARRAY_0=1
+OBJECT_ARRAY_1=-2
+OBJECT_ARRAY_2=3
+OBJECT_ARRAY_3=Nested Array
+OBJECT_ARRAY_4=true
+OBJECT_ARRAY_5=null
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+
+OBJECT_ARRAY_0=1
+OBJECT_ARRAY_1=-2
+OBJECT_ARRAY_2=3
+OBJECT_ARRAY_3=Nested Array
+OBJECT_ARRAY_4=true
+OBJECT_ARRAY_5=null
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+
+OBJECT_ARRAY_0=1
+OBJECT_ARRAY_1=-2
+OBJECT_ARRAY_2=3
+OBJECT_ARRAY_3=Nested Array
+OBJECT_ARRAY_4=true
+OBJECT_ARRAY_5=null
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+
diff --git a/collectors/log2journal/tests.d/json.log b/collectors/log2journal/tests.d/json.log
new file mode 100644
index 00000000000000..3f133496050948
--- /dev/null
+++ b/collectors/log2journal/tests.d/json.log
@@ -0,0 +1,3 @@
+{ "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Hello, World!", "nullValue": null, "object": { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object", "nullValue": null, "array": [1, -2, 3, "Nested Array", true, null] }, "array": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 987, "numericNegative": -654, "string": "Nested Object in Array", "array": [null, false, true] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object", true, null] } ], "array2": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null]}]}
+{ "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Hello, World!", "nullValue": null, "object": { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object", "nullValue": null, "array": [1, -2, 3, "Nested Array", true, null] }, "array": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 987, "numericNegative": -654, "string": "Nested Object in Array", "array": [null, false, true] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object", true, null] } ], "array2": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null]}]}
+{ "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Hello, World!", "nullValue": null, "object": { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object", "nullValue": null, "array": [1, -2, 3, "Nested Array", true, null] }, "array": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 987, "numericNegative": -654, "string": "Nested Object in Array", "array": [null, false, true] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object", true, null] } ], "array2": [ 1, -2.345, "Array Element", true, false, null, { "numericPositive": 123, "numericNegative": -456, "floatPositive": 0.987, "floatNegative": -0.123, "scientificIntPositive": 6e4, "scientificFloatNegative": -1.5e-2, "scientificSmallPositive": 5e-5, "booleanTrue": true, "booleanFalse": false, "string": "Nested Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null] }, { "numericPositive": 42, "numericNegative": -123, "floatPositive": 3.14159, "floatNegative": -2.71828, "scientificIntPositive": 1e5, "scientificFloatNegative": -2.5e-3, "scientificSmallPositive": 1e-4, "booleanTrue": true, "booleanFalse": false, "string": "Array Element with Object in Array2", "nullValue": null, "array": [1, -2, 3, "Nested Array in Object2", true, null]}]}
diff --git a/collectors/log2journal/tests.d/json.output b/collectors/log2journal/tests.d/json.output
new file mode 100644
index 00000000000000..83499cc55f428f
--- /dev/null
+++ b/collectors/log2journal/tests.d/json.output
@@ -0,0 +1,294 @@
+ARRAY2_0=1
+ARRAY2_1=-2.345
+ARRAY2_2=Array Element
+ARRAY2_3=true
+ARRAY2_4=false
+ARRAY2_5=null
+ARRAY2_6_ARRAY_0=1
+ARRAY2_6_ARRAY_1=-2
+ARRAY2_6_ARRAY_2=3
+ARRAY2_6_ARRAY_3=Nested Array in Object2
+ARRAY2_6_ARRAY_4=true
+ARRAY2_6_ARRAY_5=null
+ARRAY2_6_BOOLEANFALSE=false
+ARRAY2_6_BOOLEANTRUE=true
+ARRAY2_6_FLOATNEGATIVE=-0.123
+ARRAY2_6_FLOATPOSITIVE=0.987
+ARRAY2_6_NULLVALUE=null
+ARRAY2_6_NUMERICNEGATIVE=-456
+ARRAY2_6_NUMERICPOSITIVE=123
+ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4
+ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5
+ARRAY2_6_STRING=Nested Object in Array2
+ARRAY2_7_ARRAY_0=1
+ARRAY2_7_ARRAY_1=-2
+ARRAY2_7_ARRAY_2=3
+ARRAY2_7_ARRAY_3=Nested Array in Object2
+ARRAY2_7_ARRAY_4=true
+ARRAY2_7_ARRAY_5=null
+ARRAY2_7_BOOLEANFALSE=false
+ARRAY2_7_BOOLEANTRUE=true
+ARRAY2_7_FLOATNEGATIVE=-2.71828
+ARRAY2_7_FLOATPOSITIVE=3.14159
+ARRAY2_7_NULLVALUE=null
+ARRAY2_7_NUMERICNEGATIVE=-123
+ARRAY2_7_NUMERICPOSITIVE=42
+ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY2_7_STRING=Array Element with Object in Array2
+ARRAY_0=1
+ARRAY_1=-2.345
+ARRAY_2=Array Element
+ARRAY_3=true
+ARRAY_4=false
+ARRAY_5=null
+ARRAY_6_ARRAY_0=null
+ARRAY_6_ARRAY_1=false
+ARRAY_6_ARRAY_2=true
+ARRAY_6_NUMERICNEGATIVE=-654
+ARRAY_6_NUMERICPOSITIVE=987
+ARRAY_6_STRING=Nested Object in Array
+ARRAY_7_ARRAY_0=1
+ARRAY_7_ARRAY_1=-2
+ARRAY_7_ARRAY_2=3
+ARRAY_7_ARRAY_3=Nested Array in Object
+ARRAY_7_ARRAY_4=true
+ARRAY_7_ARRAY_5=null
+ARRAY_7_BOOLEANFALSE=false
+ARRAY_7_BOOLEANTRUE=true
+ARRAY_7_FLOATNEGATIVE=-2.71828
+ARRAY_7_FLOATPOSITIVE=3.14159
+ARRAY_7_NULLVALUE=null
+ARRAY_7_NUMERICNEGATIVE=-123
+ARRAY_7_NUMERICPOSITIVE=42
+ARRAY_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY_7_STRING=Array Element with Object
+BOOLEANFALSE=false
+BOOLEANTRUE=true
+FLOATNEGATIVE=-2.71828
+FLOATPOSITIVE=3.14159
+NULLVALUE=null
+NUMERICNEGATIVE=-123
+NUMERICPOSITIVE=42
+OBJECT_ARRAY_0=1
+OBJECT_ARRAY_1=-2
+OBJECT_ARRAY_2=3
+OBJECT_ARRAY_3=Nested Array
+OBJECT_ARRAY_4=true
+OBJECT_ARRAY_5=null
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+SCIENTIFICFLOATNEGATIVE=-2.5e-3
+SCIENTIFICINTPOSITIVE=1e5
+SCIENTIFICSMALLPOSITIVE=1e-4
+STRING=Hello, World!
+
+ARRAY2_0=1
+ARRAY2_1=-2.345
+ARRAY2_2=Array Element
+ARRAY2_3=true
+ARRAY2_4=false
+ARRAY2_5=null
+ARRAY2_6_ARRAY_0=1
+ARRAY2_6_ARRAY_1=-2
+ARRAY2_6_ARRAY_2=3
+ARRAY2_6_ARRAY_3=Nested Array in Object2
+ARRAY2_6_ARRAY_4=true
+ARRAY2_6_ARRAY_5=null
+ARRAY2_6_BOOLEANFALSE=false
+ARRAY2_6_BOOLEANTRUE=true
+ARRAY2_6_FLOATNEGATIVE=-0.123
+ARRAY2_6_FLOATPOSITIVE=0.987
+ARRAY2_6_NULLVALUE=null
+ARRAY2_6_NUMERICNEGATIVE=-456
+ARRAY2_6_NUMERICPOSITIVE=123
+ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4
+ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5
+ARRAY2_6_STRING=Nested Object in Array2
+ARRAY2_7_ARRAY_0=1
+ARRAY2_7_ARRAY_1=-2
+ARRAY2_7_ARRAY_2=3
+ARRAY2_7_ARRAY_3=Nested Array in Object2
+ARRAY2_7_ARRAY_4=true
+ARRAY2_7_ARRAY_5=null
+ARRAY2_7_BOOLEANFALSE=false
+ARRAY2_7_BOOLEANTRUE=true
+ARRAY2_7_FLOATNEGATIVE=-2.71828
+ARRAY2_7_FLOATPOSITIVE=3.14159
+ARRAY2_7_NULLVALUE=null
+ARRAY2_7_NUMERICNEGATIVE=-123
+ARRAY2_7_NUMERICPOSITIVE=42
+ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY2_7_STRING=Array Element with Object in Array2
+ARRAY_0=1
+ARRAY_1=-2.345
+ARRAY_2=Array Element
+ARRAY_3=true
+ARRAY_4=false
+ARRAY_5=null
+ARRAY_6_ARRAY_0=null
+ARRAY_6_ARRAY_1=false
+ARRAY_6_ARRAY_2=true
+ARRAY_6_NUMERICNEGATIVE=-654
+ARRAY_6_NUMERICPOSITIVE=987
+ARRAY_6_STRING=Nested Object in Array
+ARRAY_7_ARRAY_0=1
+ARRAY_7_ARRAY_1=-2
+ARRAY_7_ARRAY_2=3
+ARRAY_7_ARRAY_3=Nested Array in Object
+ARRAY_7_ARRAY_4=true
+ARRAY_7_ARRAY_5=null
+ARRAY_7_BOOLEANFALSE=false
+ARRAY_7_BOOLEANTRUE=true
+ARRAY_7_FLOATNEGATIVE=-2.71828
+ARRAY_7_FLOATPOSITIVE=3.14159
+ARRAY_7_NULLVALUE=null
+ARRAY_7_NUMERICNEGATIVE=-123
+ARRAY_7_NUMERICPOSITIVE=42
+ARRAY_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY_7_STRING=Array Element with Object
+BOOLEANFALSE=false
+BOOLEANTRUE=true
+FLOATNEGATIVE=-2.71828
+FLOATPOSITIVE=3.14159
+NULLVALUE=null
+NUMERICNEGATIVE=-123
+NUMERICPOSITIVE=42
+OBJECT_ARRAY_0=1
+OBJECT_ARRAY_1=-2
+OBJECT_ARRAY_2=3
+OBJECT_ARRAY_3=Nested Array
+OBJECT_ARRAY_4=true
+OBJECT_ARRAY_5=null
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+SCIENTIFICFLOATNEGATIVE=-2.5e-3
+SCIENTIFICINTPOSITIVE=1e5
+SCIENTIFICSMALLPOSITIVE=1e-4
+STRING=Hello, World!
+
+ARRAY2_0=1
+ARRAY2_1=-2.345
+ARRAY2_2=Array Element
+ARRAY2_3=true
+ARRAY2_4=false
+ARRAY2_5=null
+ARRAY2_6_ARRAY_0=1
+ARRAY2_6_ARRAY_1=-2
+ARRAY2_6_ARRAY_2=3
+ARRAY2_6_ARRAY_3=Nested Array in Object2
+ARRAY2_6_ARRAY_4=true
+ARRAY2_6_ARRAY_5=null
+ARRAY2_6_BOOLEANFALSE=false
+ARRAY2_6_BOOLEANTRUE=true
+ARRAY2_6_FLOATNEGATIVE=-0.123
+ARRAY2_6_FLOATPOSITIVE=0.987
+ARRAY2_6_NULLVALUE=null
+ARRAY2_6_NUMERICNEGATIVE=-456
+ARRAY2_6_NUMERICPOSITIVE=123
+ARRAY2_6_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+ARRAY2_6_SCIENTIFICINTPOSITIVE=6e4
+ARRAY2_6_SCIENTIFICSMALLPOSITIVE=5e-5
+ARRAY2_6_STRING=Nested Object in Array2
+ARRAY2_7_ARRAY_0=1
+ARRAY2_7_ARRAY_1=-2
+ARRAY2_7_ARRAY_2=3
+ARRAY2_7_ARRAY_3=Nested Array in Object2
+ARRAY2_7_ARRAY_4=true
+ARRAY2_7_ARRAY_5=null
+ARRAY2_7_BOOLEANFALSE=false
+ARRAY2_7_BOOLEANTRUE=true
+ARRAY2_7_FLOATNEGATIVE=-2.71828
+ARRAY2_7_FLOATPOSITIVE=3.14159
+ARRAY2_7_NULLVALUE=null
+ARRAY2_7_NUMERICNEGATIVE=-123
+ARRAY2_7_NUMERICPOSITIVE=42
+ARRAY2_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY2_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY2_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY2_7_STRING=Array Element with Object in Array2
+ARRAY_0=1
+ARRAY_1=-2.345
+ARRAY_2=Array Element
+ARRAY_3=true
+ARRAY_4=false
+ARRAY_5=null
+ARRAY_6_ARRAY_0=null
+ARRAY_6_ARRAY_1=false
+ARRAY_6_ARRAY_2=true
+ARRAY_6_NUMERICNEGATIVE=-654
+ARRAY_6_NUMERICPOSITIVE=987
+ARRAY_6_STRING=Nested Object in Array
+ARRAY_7_ARRAY_0=1
+ARRAY_7_ARRAY_1=-2
+ARRAY_7_ARRAY_2=3
+ARRAY_7_ARRAY_3=Nested Array in Object
+ARRAY_7_ARRAY_4=true
+ARRAY_7_ARRAY_5=null
+ARRAY_7_BOOLEANFALSE=false
+ARRAY_7_BOOLEANTRUE=true
+ARRAY_7_FLOATNEGATIVE=-2.71828
+ARRAY_7_FLOATPOSITIVE=3.14159
+ARRAY_7_NULLVALUE=null
+ARRAY_7_NUMERICNEGATIVE=-123
+ARRAY_7_NUMERICPOSITIVE=42
+ARRAY_7_SCIENTIFICFLOATNEGATIVE=-2.5e-3
+ARRAY_7_SCIENTIFICINTPOSITIVE=1e5
+ARRAY_7_SCIENTIFICSMALLPOSITIVE=1e-4
+ARRAY_7_STRING=Array Element with Object
+BOOLEANFALSE=false
+BOOLEANTRUE=true
+FLOATNEGATIVE=-2.71828
+FLOATPOSITIVE=3.14159
+NULLVALUE=null
+NUMERICNEGATIVE=-123
+NUMERICPOSITIVE=42
+OBJECT_ARRAY_0=1
+OBJECT_ARRAY_1=-2
+OBJECT_ARRAY_2=3
+OBJECT_ARRAY_3=Nested Array
+OBJECT_ARRAY_4=true
+OBJECT_ARRAY_5=null
+OBJECT_BOOLEANFALSE=false
+OBJECT_BOOLEANTRUE=true
+OBJECT_FLOATNEGATIVE=-0.123
+OBJECT_FLOATPOSITIVE=0.987
+OBJECT_NULLVALUE=null
+OBJECT_NUMERICNEGATIVE=-456
+OBJECT_NUMERICPOSITIVE=123
+OBJECT_SCIENTIFICFLOATNEGATIVE=-1.5e-2
+OBJECT_SCIENTIFICINTPOSITIVE=6e4
+OBJECT_SCIENTIFICSMALLPOSITIVE=5e-5
+OBJECT_STRING=Nested Object
+SCIENTIFICFLOATNEGATIVE=-2.5e-3
+SCIENTIFICINTPOSITIVE=1e5
+SCIENTIFICSMALLPOSITIVE=1e-4
+STRING=Hello, World!
+
diff --git a/collectors/log2journal/tests.d/logfmt.log b/collectors/log2journal/tests.d/logfmt.log
new file mode 100644
index 00000000000000..e55a83bbbbb37c
--- /dev/null
+++ b/collectors/log2journal/tests.d/logfmt.log
@@ -0,0 +1,5 @@
+key1=value01 key2=value02 key3=value03 key4=value04
+key1=value11 key2=value12 key3=value13 key4=
+key1=value21 key2=value22 key3=value23 key4=value24
+key1=value31 key2=value32 key3=value33 key4=
+key1=value41 key2=value42 key3=value43 key4=value44
diff --git a/collectors/log2journal/tests.d/logfmt.output b/collectors/log2journal/tests.d/logfmt.output
new file mode 100644
index 00000000000000..4291c966507eea
--- /dev/null
+++ b/collectors/log2journal/tests.d/logfmt.output
@@ -0,0 +1,37 @@
+INJECTED=Key INJECTED had value 'value01 - value02' and now has this, but only on the first row of the log.
+KEY1=value01
+KEY2=value02
+KEY3=value03
+KEY4=value04
+SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value02'
+YET_ANOTHER_INJECTION=value01 - value02 - Key INJECTED had value 'value01 - value02' and now has this, but only on the first row of the log. - this should work because inject is yes
+
+INJECTED=value11 - value12
+KEY1=value11
+KEY2=value12
+KEY3=value13
+SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value12'
+YET_ANOTHER_INJECTION=value11 - value12 - value11 - value12 - this should work because inject is yes
+
+INJECTED=KEY4 has the value 'value24'; it is not empty, so INJECTED has been rewritten.
+KEY1=value21
+KEY2=value22
+KEY3=value23
+KEY4=value24
+SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value22'
+YET_ANOTHER_INJECTION=value21 - value22 - KEY4 has the value 'value24'; it is not empty, so INJECTED has been rewritten. - this should work because inject is yes
+
+INJECTED=value31 - value32
+KEY1=value31
+KEY2=value32
+KEY3=value33
+YET_ANOTHER_INJECTION=value31 - value32 - value31 - value32 - this should work because inject is yes
+
+INJECTED=KEY4 has the value 'value44'; it is not empty, so INJECTED has been rewritten.
+KEY1=value41
+KEY2=value42
+KEY3=value43
+KEY4=value44
+SIMPLE_INJECTION=An unset variable looks like '', while the value of KEY2 is 'value42'
+YET_ANOTHER_INJECTION=value41 - value42 - KEY4 has the value 'value44'; it is not empty, so INJECTED has been rewritten. - this should work because inject is yes
+
diff --git a/collectors/log2journal/tests.d/logfmt.yaml b/collectors/log2journal/tests.d/logfmt.yaml
new file mode 100644
index 00000000000000..91e93a71ecf9f4
--- /dev/null
+++ b/collectors/log2journal/tests.d/logfmt.yaml
@@ -0,0 +1,34 @@
+pattern: logfmt
+
+inject:
+ - key: SIMPLE_INJECTION
+ value: "An unset variable looks like '${this}', while the value of KEY2 is '${KEY2}'"
+
+rewrite:
+ - key: INJECTED
+ value: "${KEY1} - ${KEY2}"
+ inject: yes
+ stop: no
+
+ - key: INJECTED
+ match: '^value01'
+ value: "Key INJECTED had value '${INJECTED}' and now has this, but only on the first row of the log."
+
+ - key: INJECTED
+ not_empty: "${KEY4}"
+ value: "KEY4 has the value '${KEY4}'; it is not empty, so INJECTED has been rewritten."
+
+ - key: INJECTED
+ match: '^KEY4 has the value'
+ value: "This value should not appear in the logs, because the previous one matched and stopped the pipeline."
+
+ - key: ANOTHER_INJECTION
+ value: "${KEY1} - ${KEY2} - ${INJECTED} - should not work because inject is not true amd ANOTHER_INJECTION is not in the log file."
+
+ - key: YET_ANOTHER_INJECTION
+ value: "${KEY1} - ${KEY2} - ${INJECTED} - this should work because inject is yes"
+ inject: yes
+
+ - key: SIMPLE_INJECTION
+ match: "KEY2 is 'value32'"
+ value: "" # empty, so SIMPLE_INJECTION should not be available on row 3
diff --git a/collectors/log2journal/tests.d/nginx-combined.log b/collectors/log2journal/tests.d/nginx-combined.log
new file mode 100644
index 00000000000000..b0faa81e906bc2
--- /dev/null
+++ b/collectors/log2journal/tests.d/nginx-combined.log
@@ -0,0 +1,14 @@
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "GET /api/v1/data?chart=system.net&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775349 HTTP/1.1" 200 4844 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "OPTIONS /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:27 +0000] "OPTIONS /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+127.0.0.1 - - [30/Nov/2023:19:35:28 +0000] "GET /stub_status HTTP/1.1" 200 120 "-" "Go-http-client/1.1"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1" 200 1918 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1" 200 1632 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1" 200 588 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "OPTIONS /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1" 200 6085 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1" 200 1918 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "OPTIONS /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1" 200 29 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
+2a02:169:1210::2000 - - [30/Nov/2023:19:35:28 +0000] "GET /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1" 200 3503 "http://192.168.69.5:19999/" "Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36"
diff --git a/collectors/log2journal/tests.d/nginx-combined.output b/collectors/log2journal/tests.d/nginx-combined.output
new file mode 100644
index 00000000000000..07fd110144df0c
--- /dev/null
+++ b/collectors/log2journal/tests.d/nginx-combined.output
@@ -0,0 +1,210 @@
+MESSAGE=GET /api/v1/data?chart=system.net&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775349 HTTP/1.1
+NGINX_BODY_BYTES_SENT=4844
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=system.net&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775349
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=OPTIONS /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1
+NGINX_BODY_BYTES_SENT=29
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=OPTIONS
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1
+NGINX_BODY_BYTES_SENT=29
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=OPTIONS
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=OPTIONS /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1
+NGINX_BODY_BYTES_SENT=29
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=OPTIONS
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:27 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /stub_status HTTP/1.1
+NGINX_BODY_BYTES_SENT=120
+NGINX_HTTP_REFERER=-
+NGINX_HTTP_USER_AGENT=Go-http-client/1.1
+NGINX_REMOTE_ADDR=127.0.0.1
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/stub_status
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359 HTTP/1.1
+NGINX_BODY_BYTES_SENT=1918
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=out&_=1701372775359
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357 HTTP/1.1
+NGINX_BODY_BYTES_SENT=1632
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.requests&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775357
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358 HTTP/1.1
+NGINX_BODY_BYTES_SENT=588
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.clients&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775358
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=OPTIONS /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1
+NGINX_BODY_BYTES_SENT=29
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=OPTIONS
+NGINX_REQUEST_URI=/api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=OPTIONS /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1
+NGINX_BODY_BYTES_SENT=29
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=OPTIONS
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360 HTTP/1.1
+NGINX_BODY_BYTES_SENT=6085
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=system.cpu&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775360
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361 HTTP/1.1
+NGINX_BODY_BYTES_SENT=1918
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=netdata.net&format=array&points=300&group=average>ime=0&options=absolute%7Cjsonwrap%7Cnonzero&after=-300&dimensions=in&_=1701372775361
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=OPTIONS /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1
+NGINX_BODY_BYTES_SENT=29
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=OPTIONS
+NGINX_REQUEST_URI=/api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362 HTTP/1.1
+NGINX_BODY_BYTES_SENT=3503
+NGINX_HTTP_REFERER=http://192.168.69.5:19999/
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (X11; CrOS armv7l 13597.84.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.98 Safari/537.36
+NGINX_REMOTE_ADDR=2a02:169:1210::2000
+NGINX_REMOTE_USER=-
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_URI=/api/v1/data?chart=system.io&format=json&points=267&group=average>ime=0&options=ms%7Cflip%7Cjsonwrap%7Cnonzero&after=-300&_=1701372775362
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIME_LOCAL=30/Nov/2023:19:35:28 +0000
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
diff --git a/collectors/log2journal/tests.d/nginx-json.log b/collectors/log2journal/tests.d/nginx-json.log
new file mode 100644
index 00000000000000..7e2b5d5f59ea02
--- /dev/null
+++ b/collectors/log2journal/tests.d/nginx-json.log
@@ -0,0 +1,9 @@
+{"msec":"1644997905.123","connection":12345,"connection_requests":5,"pid":9876,"request_id":"8f3ebc1e38fbb92f","request_length":345,"remote_addr":"192.168.1.100","remote_user":"john_doe","remote_port":54321,"time_local":"19/Feb/2023:14:15:05 +0000","request":"GET /index.html HTTP/1.1","request_uri":"/index.html?param=value","args":"param=value","status":200,"body_bytes_sent":5432,"bytes_sent":6543,"http_referer":"https://example.com","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"192.168.1.50, 10.0.0.1","host":"example.com","request_time":0.123,"upstream":"10.0.0.2:8080","upstream_connect_time":0.045,"upstream_header_time":0.020,"upstream_response_time":0.058,"upstream_response_length":7890,"upstream_cache_status":"MISS","ssl_protocol":"TLSv1.2","ssl_cipher":"AES256-SHA256","scheme":"https","request_method":"GET","server_protocol":"HTTP/1.1","pipe":".","gzip_ratio":"2.1","http_cf_ray":"abc123def456","geoip_country_code":"US"}
+{"msec":"1644997910.789","connection":54321,"connection_requests":10,"pid":5432,"request_id":"4a7bca5e19d3f8e7","request_length":432,"remote_addr":"10.0.0.3","remote_user":"","remote_port":12345,"time_local":"19/Feb/2023:14:15:10 +0000","request":"POST /api/update HTTP/1.1","request_uri":"/api/update","args":"","status":204,"body_bytes_sent":0,"bytes_sent":123,"http_referer":"","http_user_agent":"curl/7.68.0","http_x_forwarded_for":"","host":"api.example.com","request_time":0.032,"upstream":"backend-server-1:8080","upstream_connect_time":0.012,"upstream_header_time":0.020,"upstream_response_time":0.010,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"http","request_method":"POST","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""}
+{"msec":"1644997920.456","connection":98765,"connection_requests":15,"pid":1234,"request_id":"63f8ad2c3e1b4090","request_length":567,"remote_addr":"2001:0db8:85a3:0000:0000:8a2e:0370:7334","remote_user":"alice","remote_port":6789,"time_local":"19/Feb/2023:14:15:20 +0000","request":"GET /page?param1=value1¶m2=value2 HTTP/2.0","request_uri":"/page?param1=value1¶m2=value2","args":"param1=value1¶m2=value2","status":404,"body_bytes_sent":0,"bytes_sent":0,"http_referer":"","http_user_agent":"Mozilla/5.0 (Linux; Android 10; Pixel 3)","http_x_forwarded_for":"","host":"example.org","request_time":0.045,"upstream":"","upstream_connect_time":0.0,"upstream_header_time":0.0,"upstream_response_time":0.0,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"https","request_method":"GET","server_protocol":"HTTP/2.0","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":"GB"}
+{"msec":"1644997930.987","connection":123,"connection_requests":3,"pid":5678,"request_id":"9e632a5b24c18f76","request_length":234,"remote_addr":"192.168.0.1","remote_user":"jane_doe","remote_port":9876,"time_local":"19/Feb/2023:14:15:30 +0000","request":"PUT /api/update HTTP/1.1","request_uri":"/api/update","args":"","status":500,"body_bytes_sent":543,"bytes_sent":876,"http_referer":"https://example.com/page","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"","host":"api.example.com","request_time":0.123,"upstream":"backend-server-2:8080","upstream_connect_time":0.045,"upstream_header_time":0.020,"upstream_response_time":0.058,"upstream_response_length":7890,"upstream_cache_status":"HIT","ssl_protocol":"TLSv1.2","ssl_cipher":"AES256-SHA256","scheme":"https","request_method":"PUT","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"1.8","http_cf_ray":"xyz789abc123","geoip_country_code":"CA"}
+{"msec":"1644997940.234","connection":9876,"connection_requests":8,"pid":4321,"request_id":"1b6c59c8aef7d24a","request_length":456,"remote_addr":"203.0.113.1","remote_user":"","remote_port":5432,"time_local":"19/Feb/2023:14:15:40 +0000","request":"DELETE /api/resource HTTP/2.0","request_uri":"/api/resource","args":"","status":204,"body_bytes_sent":0,"bytes_sent":123,"http_referer":"","http_user_agent":"curl/7.68.0","http_x_forwarded_for":"","host":"api.example.com","request_time":0.032,"upstream":"backend-server-1:8080","upstream_connect_time":0.012,"upstream_header_time":0.020,"upstream_response_time":0.010,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"http","request_method":"DELETE","server_protocol":"HTTP/2.0","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""}
+{"msec":"1644997950.789","connection":5432,"connection_requests":12,"pid":6543,"request_id":"72692d781d0b8a4f","request_length":789,"remote_addr":"198.51.100.2","remote_user":"bob","remote_port":8765,"time_local":"19/Feb/2023:14:15:50 +0000","request":"GET /profile?user=bob HTTP/1.1","request_uri":"/profile?user=bob","args":"user=bob","status":200,"body_bytes_sent":1234,"bytes_sent":2345,"http_referer":"","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"","host":"example.com","request_time":0.065,"upstream":"10.0.0.2:8080","upstream_connect_time":0.045,"upstream_header_time":0.020,"upstream_response_time":0.058,"upstream_response_length":7890,"upstream_cache_status":"MISS","ssl_protocol":"TLSv1.3","ssl_cipher":"AES128-GCM-SHA256","scheme":"https","request_method":"GET","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"","http_cf_ray":"","geoip_country_code":"US"}
+{"msec":"1644997960.321","connection":65432,"connection_requests":7,"pid":7890,"request_id":"c3e158d41e75a9d7","request_length":321,"remote_addr":"203.0.113.2","remote_user":"","remote_port":9876,"time_local":"19/Feb/2023:14:15:60 +0000","request":"GET /dashboard HTTP/2.0","request_uri":"/dashboard","args":"","status":301,"body_bytes_sent":0,"bytes_sent":123,"http_referer":"","http_user_agent":"Mozilla/5.0 (Linux; Android 10; Pixel 3)","http_x_forwarded_for":"","host":"dashboard.example.org","request_time":0.032,"upstream":"","upstream_connect_time":0.0,"upstream_header_time":0.0,"upstream_response_time":0.0,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"https","request_method":"GET","server_protocol":"HTTP/2.0","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""}
+{"msec":"1644997970.555","connection":8765,"connection_requests":9,"pid":8765,"request_id":"f9f6e8235de54af4","request_length":654,"remote_addr":"10.0.0.4","remote_user":"","remote_port":12345,"time_local":"19/Feb/2023:14:15:70 +0000","request":"POST /submit-form HTTP/1.1","request_uri":"/submit-form","args":"","status":201,"body_bytes_sent":876,"bytes_sent":987,"http_referer":"","http_user_agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64)","http_x_forwarded_for":"","host":"example.com","request_time":0.045,"upstream":"backend-server-3:8080","upstream_connect_time":0.012,"upstream_header_time":0.020,"upstream_response_time":0.010,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"http","request_method":"POST","server_protocol":"HTTP/1.1","pipe":"p","gzip_ratio":"","http_cf_ray":"","geoip_country_code":""}
+{"msec":"1644997980.987","connection":23456,"connection_requests":6,"pid":3456,"request_id":"2ec3e8859e7a406c","request_length":432,"remote_addr":"198.51.100.3","remote_user":"mary","remote_port":5678,"time_local":"19/Feb/2023:14:15:80 +0000","request":"GET /contact HTTP/1.1","request_uri":"/contact","args":"","status":404,"body_bytes_sent":0,"bytes_sent":0,"http_referer":"","http_user_agent":"Mozilla/5.0 (Linux; Android 10; Pixel 3)","http_x_forwarded_for":"","host":"example.org","request_time":0.032,"upstream":"","upstream_connect_time":0.0,"upstream_header_time":0.0,"upstream_response_time":0.0,"upstream_response_length":0,"upstream_cache_status":"","ssl_protocol":"","ssl_cipher":"","scheme":"https","request_method":"GET","server_protocol":"HTTP/1.1","pipe":".","gzip_ratio":"","http_cf_ray":"","geoip_country_code":"FR"}
diff --git a/collectors/log2journal/tests.d/nginx-json.output b/collectors/log2journal/tests.d/nginx-json.output
new file mode 100644
index 00000000000000..e7db9dcbde8cfc
--- /dev/null
+++ b/collectors/log2journal/tests.d/nginx-json.output
@@ -0,0 +1,296 @@
+MESSAGE=GET /index.html HTTP/1.1
+NGINX_BODY_BYTES_SENT=5432
+NGINX_BYTES_SENT=6543
+NGINX_CONNECTION=12345
+NGINX_CONNECTION_REQUESTS=5
+NGINX_GEOIP_COUNTRY_CODE=US
+NGINX_GZIP_RATIO=2.1
+NGINX_HOST=example.com
+NGINX_HTTP_CF_RAY=abc123def456
+NGINX_HTTP_REFERER=https://example.com
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64)
+NGINX_HTTP_X_FORWARDED_FOR=192.168.1.50, 10.0.0.1
+NGINX_PID=9876
+NGINX_PIPELINED=no
+NGINX_QUERY_STRING=param=value
+NGINX_REMOTE_ADDR=192.168.1.100
+NGINX_REMOTE_PORT=54321
+NGINX_REMOTE_USER=john_doe
+NGINX_REQUEST_ID=8f3ebc1e38fbb92f
+NGINX_REQUEST_LENGTH=345
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_TIME=0.123
+NGINX_REQUEST_URI=/index.html?param=value
+NGINX_SCHEME=https
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_SSL_CIPHER=AES256-SHA256
+NGINX_SSL_PROTOCOL=TLSv1.2
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIMESTAMP_SEC=1644997905.123
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:05 +0000
+NGINX_UPSTREAM=10.0.0.2:8080
+NGINX_UPSTREAM_CACHE_STATUS=MISS
+NGINX_UPSTREAM_CONNECT_TIME=0.045
+NGINX_UPSTREAM_HEADER_TIME=0.020
+NGINX_UPSTREAM_RESPONSE_LENGTH=7890
+NGINX_UPSTREAM_RESPONSE_TIME=0.058
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=POST /api/update HTTP/1.1
+NGINX_BODY_BYTES_SENT=0
+NGINX_BYTES_SENT=123
+NGINX_CONNECTION=54321
+NGINX_CONNECTION_REQUESTS=10
+NGINX_HOST=api.example.com
+NGINX_HTTP_USER_AGENT=curl/7.68.0
+NGINX_PID=5432
+NGINX_PIPELINED=yes
+NGINX_REMOTE_ADDR=10.0.0.3
+NGINX_REMOTE_PORT=12345
+NGINX_REQUEST_ID=4a7bca5e19d3f8e7
+NGINX_REQUEST_LENGTH=432
+NGINX_REQUEST_METHOD=POST
+NGINX_REQUEST_TIME=0.032
+NGINX_REQUEST_URI=/api/update
+NGINX_SCHEME=http
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=204
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIMESTAMP_SEC=1644997910.789
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:10 +0000
+NGINX_UPSTREAM=backend-server-1:8080
+NGINX_UPSTREAM_CONNECT_TIME=0.012
+NGINX_UPSTREAM_HEADER_TIME=0.020
+NGINX_UPSTREAM_RESPONSE_LENGTH=0
+NGINX_UPSTREAM_RESPONSE_TIME=0.010
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /page?param1=value1¶m2=value2 HTTP/2.0
+NGINX_BODY_BYTES_SENT=0
+NGINX_BYTES_SENT=0
+NGINX_CONNECTION=98765
+NGINX_CONNECTION_REQUESTS=15
+NGINX_GEOIP_COUNTRY_CODE=GB
+NGINX_HOST=example.org
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Linux; Android 10; Pixel 3)
+NGINX_PID=1234
+NGINX_PIPELINED=no
+NGINX_QUERY_STRING=param1=value1¶m2=value2
+NGINX_REMOTE_ADDR=2001:0db8:85a3:0000:0000:8a2e:0370:7334
+NGINX_REMOTE_PORT=6789
+NGINX_REMOTE_USER=alice
+NGINX_REQUEST_ID=63f8ad2c3e1b4090
+NGINX_REQUEST_LENGTH=567
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_TIME=0.045
+NGINX_REQUEST_URI=/page?param1=value1¶m2=value2
+NGINX_SCHEME=https
+NGINX_SERVER_PROTOCOL=HTTP/2.0
+NGINX_STATUS=404
+NGINX_STATUS_FAMILY=4xx
+NGINX_TIMESTAMP_SEC=1644997920.456
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:20 +0000
+NGINX_UPSTREAM_CONNECT_TIME=0.0
+NGINX_UPSTREAM_HEADER_TIME=0.0
+NGINX_UPSTREAM_RESPONSE_LENGTH=0
+NGINX_UPSTREAM_RESPONSE_TIME=0.0
+PRIORITY=5
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=PUT /api/update HTTP/1.1
+NGINX_BODY_BYTES_SENT=543
+NGINX_BYTES_SENT=876
+NGINX_CONNECTION=123
+NGINX_CONNECTION_REQUESTS=3
+NGINX_GEOIP_COUNTRY_CODE=CA
+NGINX_GZIP_RATIO=1.8
+NGINX_HOST=api.example.com
+NGINX_HTTP_CF_RAY=xyz789abc123
+NGINX_HTTP_REFERER=https://example.com/page
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64)
+NGINX_PID=5678
+NGINX_PIPELINED=yes
+NGINX_REMOTE_ADDR=192.168.0.1
+NGINX_REMOTE_PORT=9876
+NGINX_REMOTE_USER=jane_doe
+NGINX_REQUEST_ID=9e632a5b24c18f76
+NGINX_REQUEST_LENGTH=234
+NGINX_REQUEST_METHOD=PUT
+NGINX_REQUEST_TIME=0.123
+NGINX_REQUEST_URI=/api/update
+NGINX_SCHEME=https
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_SSL_CIPHER=AES256-SHA256
+NGINX_SSL_PROTOCOL=TLSv1.2
+NGINX_STATUS=500
+NGINX_STATUS_FAMILY=5xx
+NGINX_TIMESTAMP_SEC=1644997930.987
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:30 +0000
+NGINX_UPSTREAM=backend-server-2:8080
+NGINX_UPSTREAM_CACHE_STATUS=HIT
+NGINX_UPSTREAM_CONNECT_TIME=0.045
+NGINX_UPSTREAM_HEADER_TIME=0.020
+NGINX_UPSTREAM_RESPONSE_LENGTH=7890
+NGINX_UPSTREAM_RESPONSE_TIME=0.058
+PRIORITY=3
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=DELETE /api/resource HTTP/2.0
+NGINX_BODY_BYTES_SENT=0
+NGINX_BYTES_SENT=123
+NGINX_CONNECTION=9876
+NGINX_CONNECTION_REQUESTS=8
+NGINX_HOST=api.example.com
+NGINX_HTTP_USER_AGENT=curl/7.68.0
+NGINX_PID=4321
+NGINX_PIPELINED=no
+NGINX_REMOTE_ADDR=203.0.113.1
+NGINX_REMOTE_PORT=5432
+NGINX_REQUEST_ID=1b6c59c8aef7d24a
+NGINX_REQUEST_LENGTH=456
+NGINX_REQUEST_METHOD=DELETE
+NGINX_REQUEST_TIME=0.032
+NGINX_REQUEST_URI=/api/resource
+NGINX_SCHEME=http
+NGINX_SERVER_PROTOCOL=HTTP/2.0
+NGINX_STATUS=204
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIMESTAMP_SEC=1644997940.234
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:40 +0000
+NGINX_UPSTREAM=backend-server-1:8080
+NGINX_UPSTREAM_CONNECT_TIME=0.012
+NGINX_UPSTREAM_HEADER_TIME=0.020
+NGINX_UPSTREAM_RESPONSE_LENGTH=0
+NGINX_UPSTREAM_RESPONSE_TIME=0.010
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /profile?user=bob HTTP/1.1
+NGINX_BODY_BYTES_SENT=1234
+NGINX_BYTES_SENT=2345
+NGINX_CONNECTION=5432
+NGINX_CONNECTION_REQUESTS=12
+NGINX_GEOIP_COUNTRY_CODE=US
+NGINX_HOST=example.com
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64)
+NGINX_PID=6543
+NGINX_PIPELINED=yes
+NGINX_QUERY_STRING=user=bob
+NGINX_REMOTE_ADDR=198.51.100.2
+NGINX_REMOTE_PORT=8765
+NGINX_REMOTE_USER=bob
+NGINX_REQUEST_ID=72692d781d0b8a4f
+NGINX_REQUEST_LENGTH=789
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_TIME=0.065
+NGINX_REQUEST_URI=/profile?user=bob
+NGINX_SCHEME=https
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_SSL_CIPHER=AES128-GCM-SHA256
+NGINX_SSL_PROTOCOL=TLSv1.3
+NGINX_STATUS=200
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIMESTAMP_SEC=1644997950.789
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:50 +0000
+NGINX_UPSTREAM=10.0.0.2:8080
+NGINX_UPSTREAM_CACHE_STATUS=MISS
+NGINX_UPSTREAM_CONNECT_TIME=0.045
+NGINX_UPSTREAM_HEADER_TIME=0.020
+NGINX_UPSTREAM_RESPONSE_LENGTH=7890
+NGINX_UPSTREAM_RESPONSE_TIME=0.058
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /dashboard HTTP/2.0
+NGINX_BODY_BYTES_SENT=0
+NGINX_BYTES_SENT=123
+NGINX_CONNECTION=65432
+NGINX_CONNECTION_REQUESTS=7
+NGINX_HOST=dashboard.example.org
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Linux; Android 10; Pixel 3)
+NGINX_PID=7890
+NGINX_PIPELINED=no
+NGINX_REMOTE_ADDR=203.0.113.2
+NGINX_REMOTE_PORT=9876
+NGINX_REQUEST_ID=c3e158d41e75a9d7
+NGINX_REQUEST_LENGTH=321
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_TIME=0.032
+NGINX_REQUEST_URI=/dashboard
+NGINX_SCHEME=https
+NGINX_SERVER_PROTOCOL=HTTP/2.0
+NGINX_STATUS=301
+NGINX_STATUS_FAMILY=3xx
+NGINX_TIMESTAMP_SEC=1644997960.321
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:60 +0000
+NGINX_UPSTREAM_CONNECT_TIME=0.0
+NGINX_UPSTREAM_HEADER_TIME=0.0
+NGINX_UPSTREAM_RESPONSE_LENGTH=0
+NGINX_UPSTREAM_RESPONSE_TIME=0.0
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=POST /submit-form HTTP/1.1
+NGINX_BODY_BYTES_SENT=876
+NGINX_BYTES_SENT=987
+NGINX_CONNECTION=8765
+NGINX_CONNECTION_REQUESTS=9
+NGINX_HOST=example.com
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64)
+NGINX_PID=8765
+NGINX_PIPELINED=yes
+NGINX_REMOTE_ADDR=10.0.0.4
+NGINX_REMOTE_PORT=12345
+NGINX_REQUEST_ID=f9f6e8235de54af4
+NGINX_REQUEST_LENGTH=654
+NGINX_REQUEST_METHOD=POST
+NGINX_REQUEST_TIME=0.045
+NGINX_REQUEST_URI=/submit-form
+NGINX_SCHEME=http
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=201
+NGINX_STATUS_FAMILY=2xx
+NGINX_TIMESTAMP_SEC=1644997970.555
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:70 +0000
+NGINX_UPSTREAM=backend-server-3:8080
+NGINX_UPSTREAM_CONNECT_TIME=0.012
+NGINX_UPSTREAM_HEADER_TIME=0.020
+NGINX_UPSTREAM_RESPONSE_LENGTH=0
+NGINX_UPSTREAM_RESPONSE_TIME=0.010
+PRIORITY=6
+SYSLOG_IDENTIFIER=nginx-log
+
+MESSAGE=GET /contact HTTP/1.1
+NGINX_BODY_BYTES_SENT=0
+NGINX_BYTES_SENT=0
+NGINX_CONNECTION=23456
+NGINX_CONNECTION_REQUESTS=6
+NGINX_GEOIP_COUNTRY_CODE=FR
+NGINX_HOST=example.org
+NGINX_HTTP_USER_AGENT=Mozilla/5.0 (Linux; Android 10; Pixel 3)
+NGINX_PID=3456
+NGINX_PIPELINED=no
+NGINX_REMOTE_ADDR=198.51.100.3
+NGINX_REMOTE_PORT=5678
+NGINX_REMOTE_USER=mary
+NGINX_REQUEST_ID=2ec3e8859e7a406c
+NGINX_REQUEST_LENGTH=432
+NGINX_REQUEST_METHOD=GET
+NGINX_REQUEST_TIME=0.032
+NGINX_REQUEST_URI=/contact
+NGINX_SCHEME=https
+NGINX_SERVER_PROTOCOL=HTTP/1.1
+NGINX_STATUS=404
+NGINX_STATUS_FAMILY=4xx
+NGINX_TIMESTAMP_SEC=1644997980.987
+NGINX_TIME_LOCAL=19/Feb/2023:14:15:80 +0000
+NGINX_UPSTREAM_CONNECT_TIME=0.0
+NGINX_UPSTREAM_HEADER_TIME=0.0
+NGINX_UPSTREAM_RESPONSE_LENGTH=0
+NGINX_UPSTREAM_RESPONSE_TIME=0.0
+PRIORITY=5
+SYSLOG_IDENTIFIER=nginx-log
+
diff --git a/collectors/log2journal/tests.sh b/collectors/log2journal/tests.sh
new file mode 100755
index 00000000000000..40243886691769
--- /dev/null
+++ b/collectors/log2journal/tests.sh
@@ -0,0 +1,148 @@
+#!/usr/bin/env bash
+
+if [ -f "${PWD}/log2journal" ]; then
+ log2journal_bin="${PWD}/log2journal"
+else
+ log2journal_bin="$(which log2journal)"
+fi
+
+[ -z "${log2journal_bin}" ] && echo >&2 "Cannot find log2journal binary" && exit 1
+echo >&2 "Using: ${log2journal_bin}"
+
+script_dir=$(dirname "$(readlink -f "$0")")
+tests="${script_dir}/tests.d"
+
+if [ ! -d "${tests}" ]; then
+ echo >&2 "tests directory '${tests}' is not found."
+ exit 1
+fi
+
+# Create a random directory name in /tmp
+tmp=$(mktemp -d /tmp/script_temp.XXXXXXXXXX)
+
+# Function to clean up the temporary directory on exit
+cleanup() {
+ echo "Cleaning up..."
+ rm -rf "$tmp"
+}
+
+# Register the cleanup function to run on script exit
+trap cleanup EXIT
+
+# Change to the temporary directory
+cd "$tmp" || exit 1
+
+# -----------------------------------------------------------------------------
+
+test_log2journal_config() {
+ local in="${1}"
+ local out="${2}"
+ shift 2
+
+ [ -f output ] && rm output
+
+ printf >&2 "running: "
+ printf >&2 "%q " "${log2journal_bin}" "${@}"
+ printf >&2 "\n"
+
+ "${log2journal_bin}" <"${in}" "${@}" >output 2>&1
+ ret=$?
+
+ [ $ret -ne 0 ] && echo >&2 "${log2journal_bin} exited with code: $ret" && cat output && exit 1
+
+ diff --ignore-all-space "${out}" output
+ [ $? -ne -0 ] && echo >&2 "${log2journal_bin} output does not match!" && exit 1
+
+ echo >&2 "OK"
+ echo >&2
+
+ return 0
+}
+
+# test yaml parsing
+echo >&2
+echo >&2 "Testing full yaml config parsing..."
+test_log2journal_config /dev/null "${tests}/full.output" -f "${tests}/full.yaml" --show-config || exit 1
+
+echo >&2 "Testing command line parsing..."
+test_log2journal_config /dev/null "${tests}/full.output" --show-config \
+ --prefix=NGINX_ \
+ --filename-key NGINX_LOG_FILENAME \
+ --inject SYSLOG_IDENTIFIER=nginx-log \
+ --inject=SYSLOG_IDENTIFIER2=nginx-log2 \
+ --inject 'PRIORITY=${NGINX_STATUS}' \
+ --inject='NGINX_STATUS_FAMILY=${NGINX_STATUS}${NGINX_METHOD}' \
+ --rewrite 'PRIORITY=//${NGINX_STATUS}/inject,dont-stop' \
+ --rewrite "PRIORITY=/^[123]/6" \
+ --rewrite='PRIORITY=|^4|5' \
+ '--rewrite=PRIORITY=-^5-3' \
+ --rewrite "PRIORITY=;.*;4" \
+ --rewrite 'NGINX_STATUS_FAMILY=|^(?[1-5])|${first_digit}xx' \
+ --rewrite 'NGINX_STATUS_FAMILY=|.*|UNKNOWN' \
+ --rename TEST1=TEST2 \
+ --rename=TEST3=TEST4 \
+ --unmatched-key MESSAGE \
+ --inject-unmatched PRIORITY=1 \
+ --inject-unmatched=PRIORITY2=2 \
+ --include=".*" \
+ --exclude ".*HELLO.*WORLD.*" \
+ '(?x) # Enable PCRE2 extended mode
+ ^
+ (?[^ ]+) \s - \s # NGINX_REMOTE_ADDR
+ (?[^ ]+) \s # NGINX_REMOTE_USER
+ \[
+ (?[^\]]+) # NGINX_TIME_LOCAL
+ \]
+ \s+ "
+ (?
+ (?[A-Z]+) \s+ # NGINX_METHOD
+ (?[^ ]+) \s+
+ HTTP/(?[^"]+)
+ )
+ " \s+
+ (?\d+) \s+ # NGINX_STATUS
+ (?\d+) \s+ # NGINX_BODY_BYTES_SENT
+ "(?[^"]*)" \s+ # NGINX_HTTP_REFERER
+ "(?[^"]*)" # NGINX_HTTP_USER_AGENT' \
+ || exit 1
+
+# -----------------------------------------------------------------------------
+
+test_log2journal() {
+ local n="${1}"
+ local in="${2}"
+ local out="${3}"
+ shift 3
+
+ printf >&2 "running test No ${n}: "
+ printf >&2 "%q " "${log2journal_bin}" "${@}"
+ printf >&2 "\n"
+ echo >&2 "using as input : ${in}"
+ echo >&2 "expecting output: ${out}"
+
+ [ -f output ] && rm output
+
+ "${log2journal_bin}" <"${in}" "${@}" >output 2>&1
+ ret=$?
+
+ [ $ret -ne 0 ] && echo >&2 "${log2journal_bin} exited with code: $ret" && cat output && exit 1
+
+ diff "${out}" output
+ [ $? -ne -0 ] && echo >&2 "${log2journal_bin} output does not match! - here is what we got:" && cat output && exit 1
+
+ echo >&2 "OK"
+ echo >&2
+
+ return 0
+}
+
+echo >&2
+echo >&2 "Testing parsing and output..."
+
+test_log2journal 1 "${tests}/json.log" "${tests}/json.output" json
+test_log2journal 2 "${tests}/json.log" "${tests}/json-include.output" json --include "OBJECT"
+test_log2journal 3 "${tests}/json.log" "${tests}/json-exclude.output" json --exclude "ARRAY[^2]"
+test_log2journal 4 "${tests}/nginx-json.log" "${tests}/nginx-json.output" -f "${script_dir}/log2journal.d/nginx-json.yaml"
+test_log2journal 5 "${tests}/nginx-combined.log" "${tests}/nginx-combined.output" -f "${script_dir}/log2journal.d/nginx-combined.yaml"
+test_log2journal 6 "${tests}/logfmt.log" "${tests}/logfmt.output" -f "${tests}/logfmt.yaml"
+test_log2journal 7 "${tests}/logfmt.log" "${tests}/default.output" -f "${script_dir}/log2journal.d/default.yaml"
diff --git a/collectors/macos.plugin/README.md b/collectors/macos.plugin/README.md
deleted file mode 100644
index 509e22edce4753..00000000000000
--- a/collectors/macos.plugin/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-
-
-# macos.plugin
-
-Collects resource usage and performance data on macOS systems
-
-By default, Netdata will enable monitoring metrics for disks, memory, and network only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins.
-
-
diff --git a/collectors/macos.plugin/README.md b/collectors/macos.plugin/README.md
new file mode 120000
index 00000000000000..2ea6842e42c77c
--- /dev/null
+++ b/collectors/macos.plugin/README.md
@@ -0,0 +1 @@
+integrations/macos.md
\ No newline at end of file
diff --git a/collectors/macos.plugin/integrations/macos.md b/collectors/macos.plugin/integrations/macos.md
new file mode 100644
index 00000000000000..5128a5a77c2f76
--- /dev/null
+++ b/collectors/macos.plugin/integrations/macos.md
@@ -0,0 +1,286 @@
+
+
+# macOS
+
+
+
+
+
+Plugin: macos.plugin
+Module: mach_smi
+
+
+
+## Overview
+
+Monitor macOS metrics for efficient operating system performance.
+
+The plugin uses three different methods to collect data:
+ - The function `sysctlbyname` is called to collect network, swap, loadavg, and boot time.
+ - The functtion `host_statistic` is called to collect CPU and Virtual memory data;
+ - The function `IOServiceGetMatchingServices` to collect storage information.
+
+
+This collector is only supported on the following platforms:
+
+- macOS
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per macOS instance
+
+These metrics refer to hardware and network monitoring.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.cpu | user, nice, system, idle | percentage |
+| system.ram | active, wired, throttled, compressor, inactive, purgeable, speculative, free | MiB |
+| mem.swapio | io, out | KiB/s |
+| mem.pgfaults | memory, cow, pagein, pageout, compress, decompress, zero_fill, reactivate, purge | faults/s |
+| system.load | load1, load5, load15 | load |
+| mem.swap | free, used | MiB |
+| system.ipv4 | received, sent | kilobits/s |
+| ipv4.tcppackets | received, sent | packets/s |
+| ipv4.tcperrors | InErrs, InCsumErrors, RetransSegs | packets/s |
+| ipv4.tcphandshake | EstabResets, ActiveOpens, PassiveOpens, AttemptFails | events/s |
+| ipv4.tcpconnaborts | baddata, userclosed, nomemory, timeout | connections/s |
+| ipv4.tcpofo | inqueue | packets/s |
+| ipv4.tcpsyncookies | received, sent, failed | packets/s |
+| ipv4.ecnpkts | CEP, NoECTP | packets/s |
+| ipv4.udppackets | received, sent | packets/s |
+| ipv4.udperrors | RcvbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti | events/s |
+| ipv4.icmp | received, sent | packets/s |
+| ipv4.icmp_errors | InErrors, OutErrors, InCsumErrors | packets/s |
+| ipv4.icmpmsg | InEchoReps, OutEchoReps, InEchos, OutEchos | packets/s |
+| ipv4.packets | received, sent, forwarded, delivered | packets/s |
+| ipv4.fragsout | ok, failed, created | packets/s |
+| ipv4.fragsin | ok, failed, all | packets/s |
+| ipv4.errors | InDiscards, OutDiscards, InHdrErrors, OutNoRoutes, InAddrErrors, InUnknownProtos | packets/s |
+| ipv6.packets | received, sent, forwarded, delivers | packets/s |
+| ipv6.fragsout | ok, failed, all | packets/s |
+| ipv6.fragsin | ok, failed, timeout, all | packets/s |
+| ipv6.errors | InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes | packets/s |
+| ipv6.icmp | received, sent | messages/s |
+| ipv6.icmpredir | received, sent | redirects/s |
+| ipv6.icmperrors | InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutTimeExcds, OutParmProblems | errors/s |
+| ipv6.icmpechos | InEchos, OutEchos, InEchoReplies, OutEchoReplies | messages/s |
+| ipv6.icmprouter | InSolicits, OutSolicits, InAdvertisements, OutAdvertisements | messages/s |
+| ipv6.icmpneighbor | InSolicits, OutSolicits, InAdvertisements, OutAdvertisements | messages/s |
+| ipv6.icmptypes | InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143 | messages/s |
+| system.uptime | uptime | seconds |
+| system.io | in, out | KiB/s |
+
+### Per disk
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.io | read, writes | KiB/s |
+| disk.ops | read, writes | operations/s |
+| disk.util | utilization | % of time working |
+| disk.iotime | reads, writes | milliseconds/s |
+| disk.await | reads, writes | milliseconds/operation |
+| disk.avgsz | reads, writes | KiB/operation |
+| disk.svctm | svctm | milliseconds/operation |
+
+### Per mount point
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.space | avail, used, reserved_for_root | GiB |
+| disk.inodes | avail, used, reserved_for_root | inodes |
+
+### Per network device
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| net.net | received, sent | kilobits/s |
+| net.packets | received, sent, multicast_received, multicast_sent | packets/s |
+| net.errors | inbound, outbound | errors/s |
+| net.drops | inbound | drops/s |
+| net.events | frames, collisions, carrier | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ interface_speed ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.net | network interface ${label:device} current speed |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+There are three sections in the file which you can configure:
+
+- `[plugin:macos:sysctl]` - Enable or disable monitoring for network, swap, loadavg, and boot time.
+- `[plugin:macos:mach_smi]` - Enable or disable monitoring for CPU and Virtual memory.
+- `[plugin:macos:iokit]` - Enable or disable monitoring for storage device.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| enable load average | Enable or disable monitoring of load average metrics (load1, load5, load15). | yes | no |
+| system swap | Enable or disable monitoring of system swap metrics (free, used). | yes | no |
+| bandwidth | Enable or disable monitoring of network bandwidth metrics (received, sent). | yes | no |
+| ipv4 TCP packets | Enable or disable monitoring of IPv4 TCP total packets metrics (received, sent). | yes | no |
+| ipv4 TCP errors | Enable or disable monitoring of IPv4 TCP packets metrics (Input Errors, Checksum, Retransmission segments). | yes | no |
+| ipv4 TCP handshake issues | Enable or disable monitoring of IPv4 TCP handshake metrics (Established Resets, Active Opens, Passive Opens, Attempt Fails). | yes | no |
+| ECN packets | Enable or disable monitoring of ECN statistics metrics (InCEPkts, InNoECTPkts). | auto | no |
+| TCP SYN cookies | Enable or disable monitoring of TCP SYN cookies metrics (received, sent, failed). | auto | no |
+| TCP out-of-order queue | Enable or disable monitoring of TCP out-of-order queue metrics (inqueue). | auto | no |
+| TCP connection aborts | Enable or disable monitoring of TCP connection aborts metrics (Bad Data, User closed, No memory, Timeout). | auto | no |
+| ipv4 UDP packets | Enable or disable monitoring of ipv4 UDP packets metrics (sent, received.). | yes | no |
+| ipv4 UDP errors | Enable or disable monitoring of ipv4 UDP errors metrics (Recieved Buffer error, Input Errors, No Ports, IN Checksum Errors, Ignore Multi). | yes | no |
+| ipv4 icmp packets | Enable or disable monitoring of IPv4 ICMP packets metrics (sent, received, in error, OUT error, IN Checksum error). | yes | no |
+| ipv4 icmp messages | Enable or disable monitoring of ipv4 ICMP messages metrics (I/O messages, I/O Errors, In Checksum). | yes | no |
+| ipv4 packets | Enable or disable monitoring of ipv4 packets metrics (received, sent, forwarded, delivered). | yes | no |
+| ipv4 fragments sent | Enable or disable monitoring of IPv4 fragments sent metrics (ok, fails, creates). | yes | no |
+| ipv4 fragments assembly | Enable or disable monitoring of IPv4 fragments assembly metrics (ok, failed, all). | yes | no |
+| ipv4 errors | Enable or disable monitoring of IPv4 errors metrics (I/O discard, I/O HDR errors, In Addr errors, In Unknown protos, OUT No Routes). | yes | no |
+| ipv6 packets | Enable or disable monitoring of IPv6 packets metrics (received, sent, forwarded, delivered). | auto | no |
+| ipv6 fragments sent | Enable or disable monitoring of IPv6 fragments sent metrics (ok, failed, all). | auto | no |
+| ipv6 fragments assembly | Enable or disable monitoring of IPv6 fragments assembly metrics (ok, failed, timeout, all). | auto | no |
+| ipv6 errors | Enable or disable monitoring of IPv6 errors metrics (I/O Discards, In Hdr Errors, In Addr Errors, In Truncaedd Packets, I/O No Routes). | auto | no |
+| icmp | Enable or disable monitoring of ICMP metrics (sent, received). | auto | no |
+| icmp redirects | Enable or disable monitoring of ICMP redirects metrics (received, sent). | auto | no |
+| icmp errors | Enable or disable monitoring of ICMP metrics (I/O Errors, In Checksums, In Destination Unreachable, In Packet too big, In Time Exceeds, In Parm Problem, Out Dest Unreachable, Out Timee Exceeds, Out Parm Problems.). | auto | no |
+| icmp echos | Enable or disable monitoring of ICMP echos metrics (I/O Echos, I/O Echo Reply). | auto | no |
+| icmp router | Enable or disable monitoring of ICMP router metrics (I/O Solicits, I/O Advertisements). | auto | no |
+| icmp neighbor | Enable or disable monitoring of ICMP neighbor metrics (I/O Solicits, I/O Advertisements). | auto | no |
+| icmp types | Enable or disable monitoring of ICMP types metrics (I/O Type1, I/O Type128, I/O Type129, Out Type133, Out Type135, In Type136, Out Type145). | auto | no |
+| space usage for all disks | Enable or disable monitoring of space usage for all disks metrics (available, used, reserved for root). | yes | no |
+| inodes usage for all disks | Enable or disable monitoring of inodes usage for all disks metrics (available, used, reserved for root). | yes | no |
+| bandwidth | Enable or disable monitoring of bandwidth metrics (received, sent). | yes | no |
+| system uptime | Enable or disable monitoring of system uptime metrics (uptime). | yes | no |
+| cpu utilization | Enable or disable monitoring of CPU utilization metrics (user, nice, system, idel). | yes | no |
+| system ram | Enable or disable monitoring of system RAM metrics (Active, Wired, throttled, compressor, inactive, purgeable, speculative, free). | yes | no |
+| swap i/o | Enable or disable monitoring of SWAP I/O metrics (I/O Swap). | yes | no |
+| memory page faults | Enable or disable monitoring of memory page faults metrics (memory, cow, I/O page, compress, decompress, zero fill, reactivate, purge). | yes | no |
+| disk i/o | Enable or disable monitoring of disk I/O metrics (In, Out). | yes | no |
+
+
+
+#### Examples
+
+##### Disable swap monitoring.
+
+A basic example that discards swap monitoring
+
+Config
+
+```yaml
+[plugin:macos:sysctl]
+ system swap = no
+[plugin:macos:mach_smi]
+ swap i/o = no
+
+```
+
+
+##### Disable complete Machine SMI section.
+
+A basic example that discards swap monitoring
+
+Config
+
+```yaml
+[plugin:macos:mach_smi]
+ cpu utilization = no
+ system ram = no
+ swap i/o = no
+ memory page faults = no
+ disk i/o = no
+
+```
+
+
+
diff --git a/collectors/macos.plugin/macos_fw.c b/collectors/macos.plugin/macos_fw.c
index ca06f428e1cf49..75ef386b9a2d6d 100644
--- a/collectors/macos.plugin/macos_fw.c
+++ b/collectors/macos.plugin/macos_fw.c
@@ -435,7 +435,7 @@ int do_macos_iokit(int update_every, usec_t dt) {
if (likely(do_space)) {
st = rrdset_find_active_bytype_localhost("disk_space", mntbuf[i].f_mntonname);
if (unlikely(!st)) {
- snprintfz(title, 4096, "Disk Space Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname);
+ snprintfz(title, sizeof(title) - 1, "Disk Space Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname);
st = rrdset_create_localhost(
"disk_space"
, mntbuf[i].f_mntonname
@@ -467,7 +467,7 @@ int do_macos_iokit(int update_every, usec_t dt) {
if (likely(do_inodes)) {
st = rrdset_find_active_bytype_localhost("disk_inodes", mntbuf[i].f_mntonname);
if (unlikely(!st)) {
- snprintfz(title, 4096, "Disk Files (inodes) Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname);
+ snprintfz(title, sizeof(title) - 1, "Disk Files (inodes) Usage for %s [%s]", mntbuf[i].f_mntonname, mntbuf[i].f_mntfromname);
st = rrdset_create_localhost(
"disk_inodes"
, mntbuf[i].f_mntonname
diff --git a/collectors/nfacct.plugin/README.md b/collectors/nfacct.plugin/README.md
deleted file mode 100644
index ae6597a409f6dc..00000000000000
--- a/collectors/nfacct.plugin/README.md
+++ /dev/null
@@ -1,63 +0,0 @@
-
-
-# Monitor Netfilter statistics (nfacct.plugin)
-
-`nfacct.plugin` collects Netfilter statistics.
-
-## Prerequisites
-
-If you are using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), install the
-`netdata-plugin-nfacct` package using your system package manager.
-
-If you built Netdata locally:
-
-1. install `libmnl-dev` and `libnetfilter-acct-dev` using the package manager of your system.
-
-2. re-install Netdata from source. The installer will detect that the required libraries are now available and will also build `netdata.plugin`.
-
-Keep in mind that NFACCT requires root access, so the plugin is setuid to root.
-
-## Charts
-
-The plugin provides Netfilter connection tracker statistics and nfacct packet and bandwidth accounting:
-
-Connection tracker:
-
-1. Connections.
-2. Changes.
-3. Expectations.
-4. Errors.
-5. Searches.
-
-Netfilter accounting:
-
-1. Packets.
-2. Bandwidth.
-
-## Configuration
-
-If you need to disable NFACCT for Netdata, edit /etc/netdata/netdata.conf and set:
-
-```
-[plugins]
- nfacct = no
-```
-
-## Debugging
-
-You can run the plugin by hand:
-
-```
-sudo /usr/libexec/netdata/plugins.d/nfacct.plugin 1 debug
-```
-
-You will get verbose output on what the plugin does.
-
-
diff --git a/collectors/nfacct.plugin/README.md b/collectors/nfacct.plugin/README.md
new file mode 120000
index 00000000000000..ea320d1399cb30
--- /dev/null
+++ b/collectors/nfacct.plugin/README.md
@@ -0,0 +1 @@
+integrations/netfilter.md
\ No newline at end of file
diff --git a/collectors/nfacct.plugin/integrations/netfilter.md b/collectors/nfacct.plugin/integrations/netfilter.md
new file mode 100644
index 00000000000000..831b6fb5b920e7
--- /dev/null
+++ b/collectors/nfacct.plugin/integrations/netfilter.md
@@ -0,0 +1,132 @@
+
+
+# Netfilter
+
+
+
+
+
+Plugin: nfacct.plugin
+Module: nfacct.plugin
+
+
+
+## Overview
+
+Monitor Netfilter metrics for optimal packet filtering and manipulation. Keep tabs on packet counts, dropped packets, and error rates to secure network operations.
+
+Netdata uses libmnl (https://www.netfilter.org/projects/libmnl/index.html) to collect information.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+This plugin needs setuid.
+
+### Default Behavior
+
+#### Auto-Detection
+
+This plugin uses socket to connect with netfilter to collect data
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Netfilter instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| netfilter.netlink_new | new, ignore, invalid | connections/s |
+| netfilter.netlink_changes | insert, delete, delete_list | changes/s |
+| netfilter.netlink_search | searched, search_restart, found | searches/s |
+| netfilter.netlink_errors | icmp_error, insert_failed, drop, early_drop | events/s |
+| netfilter.netlink_expect | created, deleted, new | expectations/s |
+| netfilter.nfacct_packets | a dimension per nfacct object | packets/s |
+| netfilter.nfacct_bytes | a dimension per nfacct object | kilobytes/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install required packages
+
+Install `libmnl-dev` and `libnetfilter-acct-dev` using the package manager of your system.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:nfacct]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| command options | Additinal parameters for collector | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/nfacct.plugin/plugin_nfacct.c b/collectors/nfacct.plugin/plugin_nfacct.c
index 430ceab52f069f..2863cd7eb08b02 100644
--- a/collectors/nfacct.plugin/plugin_nfacct.c
+++ b/collectors/nfacct.plugin/plugin_nfacct.c
@@ -18,6 +18,8 @@
#define NETDATA_CHART_PRIO_NETFILTER_PACKETS 8906
#define NETDATA_CHART_PRIO_NETFILTER_BYTES 8907
+#define NFACCT_RESTART_EVERY_SECONDS 86400 // restart the plugin every this many seconds
+
static inline size_t mnl_buffer_size() {
long s = MNL_SOCKET_BUFFER_SIZE;
if(s <= 0) return 8192;
@@ -745,20 +747,8 @@ void nfacct_signals()
}
int main(int argc, char **argv) {
- stderror = stderr;
clocks_init();
-
- // ------------------------------------------------------------------------
- // initialization of netdata plugin
-
- program_name = "nfacct.plugin";
-
- // disable syslog
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
+ nd_log_initialize_for_external_plugins("nfacct.plugin");
// ------------------------------------------------------------------------
// parse command line parameters
@@ -852,7 +842,7 @@ int main(int argc, char **argv) {
if(unlikely(netdata_exit)) break;
if(debug && iteration)
- fprintf(stderr, "nfacct.plugin: iteration %zu, dt %llu usec\n"
+ fprintf(stderr, "nfacct.plugin: iteration %zu, dt %"PRIu64" usec\n"
, iteration
, dt
);
@@ -879,9 +869,11 @@ int main(int argc, char **argv) {
fflush(stdout);
- // restart check (14400 seconds)
- if(now_monotonic_sec() - started_t > 14400) break;
+ if (now_monotonic_sec() - started_t > NFACCT_RESTART_EVERY_SECONDS) {
+ collector_info("NFACCT reached my lifetime expectancy. Exiting to restart.");
+ fprintf(stdout, "EXIT\n");
+ fflush(stdout);
+ exit(0);
+ }
}
-
- collector_info("NFACCT process exiting");
}
diff --git a/collectors/perf.plugin/README.md b/collectors/perf.plugin/README.md
deleted file mode 100644
index a8bd4b0e5ee1fd..00000000000000
--- a/collectors/perf.plugin/README.md
+++ /dev/null
@@ -1,87 +0,0 @@
-
-
-# Monitor CPU performance statistics (perf.plugin)
-
-`perf.plugin` collects system-wide CPU performance statistics from Performance Monitoring Units (PMU) using
-the `perf_event_open()` system call.
-
-## Important Notes
-
-If you are using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), you will need to install
-the `netdata-plugin-perf` package using your system package manager.
-
-Accessing hardware PMUs requires root permissions, so the plugin is setuid to root.
-
-Keep in mind that the number of PMUs in a system is usually quite limited and every hardware monitoring
-event for every CPU core needs a separate file descriptor to be opened.
-
-## Charts
-
-The plugin provides statistics for general hardware and software performance monitoring events:
-
-Hardware events:
-
-1. CPU cycles
-2. Instructions
-3. Branch instructions
-4. Cache operations
-5. BUS cycles
-6. Stalled frontend and backend cycles
-
-Software events:
-
-1. CPU migrations
-2. Alignment faults
-3. Emulation faults
-
-Hardware cache events:
-
-1. L1D cache operations
-2. L1D prefetch cache operations
-3. L1I cache operations
-4. LL cache operations
-5. DTLB cache operations
-6. ITLB cache operations
-7. PBU cache operations
-
-## Configuration
-
-The plugin is disabled by default because the number of PMUs is usually quite limited and it is not desired to
-allow Netdata to struggle silently for PMUs, interfering with other performance monitoring software. If you need to
-enable the perf plugin, edit /etc/netdata/netdata.conf and set:
-
-```raw
-[plugins]
- perf = yes
-```
-
-```raw
-[plugin:perf]
- update every = 1
- command options = all
-```
-
-You can use the `command options` parameter to pick what data should be collected and which charts should be
-displayed. If `all` is used, all general performance monitoring counters are probed and corresponding charts
-are enabled for the available counters. You can also define a particular set of enabled charts using the
-following keywords: `cycles`, `instructions`, `branch`, `cache`, `bus`, `stalled`, `migrations`, `alignment`,
-`emulation`, `L1D`, `L1D-prefetch`, `L1I`, `LL`, `DTLB`, `ITLB`, `PBU`.
-
-## Debugging
-
-You can run the plugin by hand:
-
-```raw
-sudo /usr/libexec/netdata/plugins.d/perf.plugin 1 all debug
-```
-
-You will get verbose output on what the plugin does.
-
-
diff --git a/collectors/perf.plugin/README.md b/collectors/perf.plugin/README.md
new file mode 120000
index 00000000000000..fb8a0cd69644fa
--- /dev/null
+++ b/collectors/perf.plugin/README.md
@@ -0,0 +1 @@
+integrations/cpu_performance.md
\ No newline at end of file
diff --git a/collectors/perf.plugin/integrations/cpu_performance.md b/collectors/perf.plugin/integrations/cpu_performance.md
new file mode 100644
index 00000000000000..d3c316d2e942d0
--- /dev/null
+++ b/collectors/perf.plugin/integrations/cpu_performance.md
@@ -0,0 +1,192 @@
+
+
+# CPU performance
+
+
+
+
+
+Plugin: perf.plugin
+Module: perf.plugin
+
+
+
+## Overview
+
+This collector monitors CPU performance metrics about cycles, instructions, migrations, cache operations and more.
+
+It uses syscall (2) to open a file descriptior to monitor the perf events.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+It needs setuid to use necessary syscall to collect perf events. Netada sets the permission during installation time.
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per CPU performance instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| perf.cpu_cycles | cpu, ref_cpu | cycles/s |
+| perf.instructions | instructions | instructions/s |
+| perf.instructions_per_cycle | ipc | instructions/cycle |
+| perf.branch_instructions | instructions, misses | instructions/s |
+| perf.cache | references, misses | operations/s |
+| perf.bus_cycles | bus | cycles/s |
+| perf.stalled_cycles | frontend, backend | cycles/s |
+| perf.migrations | migrations | migrations |
+| perf.alignment_faults | faults | faults |
+| perf.emulation_faults | faults | faults |
+| perf.l1d_cache | read_access, read_misses, write_access, write_misses | events/s |
+| perf.l1d_cache_prefetch | prefetches | prefetches/s |
+| perf.l1i_cache | read_access, read_misses | events/s |
+| perf.ll_cache | read_access, read_misses, write_access, write_misses | events/s |
+| perf.dtlb_cache | read_access, read_misses, write_access, write_misses | events/s |
+| perf.itlb_cache | read_access, read_misses | events/s |
+| perf.pbu_cache | read_access | events/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install perf plugin
+
+If you are [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure the `netdata-plugin-perf` package is installed.
+
+
+#### Enable the pref plugin
+
+The plugin is disabled by default because the number of PMUs is usually quite limited and it is not desired to allow Netdata to struggle silently for PMUs, interfering with other performance monitoring software.
+
+To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `netdata.conf` file.
+
+```bash
+cd /etc/netdata # Replace this path with your Netdata config directory, if different
+sudo ./edit-config netdata.conf
+```
+
+Change the value of the `perf` setting to `yes` in the `[plugins]` section. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:perf]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+You can get the available options running:
+
+```bash
+/usr/libexec/netdata/plugins.d/perf.plugin --help
+````
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| command options | Command options that specify charts shown by plugin. `cycles`, `instructions`, `branch`, `cache`, `bus`, `stalled`, `migrations`, `alignment`, `emulation`, `L1D`, `L1D-prefetch`, `L1I`, `LL`, `DTLB`, `ITLB`, `PBU`. | 1 | yes |
+
+
+
+#### Examples
+
+##### All metrics
+
+Monitor all metrics available.
+
+```yaml
+[plugin:perf]
+ command options = all
+
+```
+##### CPU cycles
+
+Monitor CPU cycles.
+
+Config
+
+```yaml
+[plugin:perf]
+ command options = cycles
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+
+
+
diff --git a/collectors/perf.plugin/metadata.yaml b/collectors/perf.plugin/metadata.yaml
index d7539b502539c6..eada3351dcd5e1 100644
--- a/collectors/perf.plugin/metadata.yaml
+++ b/collectors/perf.plugin/metadata.yaml
@@ -40,7 +40,22 @@ modules:
description: ""
setup:
prerequisites:
- list: []
+ list:
+ - title: Install perf plugin
+ description: |
+ If you are [using our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/UPDATE.md#determine-which-installation-method-you-used), make sure the `netdata-plugin-perf` package is installed.
+ - title: Enable the pref plugin
+ description: |
+ The plugin is disabled by default because the number of PMUs is usually quite limited and it is not desired to allow Netdata to struggle silently for PMUs, interfering with other performance monitoring software.
+
+ To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `netdata.conf` file.
+
+ ```bash
+ cd /etc/netdata # Replace this path with your Netdata config directory, if different
+ sudo ./edit-config netdata.conf
+ ```
+
+ Change the value of the `perf` setting to `yes` in the `[plugins]` section. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
configuration:
file:
name: "netdata.conf"
@@ -49,7 +64,7 @@ modules:
options:
description: |
You can get the available options running:
-
+
```bash
/usr/libexec/netdata/plugins.d/perf.plugin --help
````
@@ -62,7 +77,7 @@ modules:
default_value: 1
required: false
- name: command options
- description: Command options that specify charts shown by plugin.
+ description: Command options that specify charts shown by plugin. `cycles`, `instructions`, `branch`, `cache`, `bus`, `stalled`, `migrations`, `alignment`, `emulation`, `L1D`, `L1D-prefetch`, `L1I`, `LL`, `DTLB`, `ITLB`, `PBU`.
default_value: 1
required: true
examples:
@@ -84,7 +99,28 @@ modules:
command options = cycles
troubleshooting:
problems:
- list: []
+ list:
+ - name: Debug Mode
+ description: |
+ You can run `perf.plugin` with the debug option enabled, to troubleshoot issues with it. The output should give you clues as to why the collector isn't working.
+
+ - Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+ - Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+ - Run the `perf.plugin` in debug mode:
+
+ ```bash
+ ./perf.plugin 1 all debug
+ ```
alerts: []
metrics:
folding:
diff --git a/collectors/perf.plugin/perf_plugin.c b/collectors/perf.plugin/perf_plugin.c
index 68c0f917d3682b..fe3b04daa67cbb 100644
--- a/collectors/perf.plugin/perf_plugin.c
+++ b/collectors/perf.plugin/perf_plugin.c
@@ -1283,20 +1283,8 @@ void parse_command_line(int argc, char **argv) {
}
int main(int argc, char **argv) {
- stderror = stderr;
clocks_init();
-
- // ------------------------------------------------------------------------
- // initialization of netdata plugin
-
- program_name = "perf.plugin";
-
- // disable syslog
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
+ nd_log_initialize_for_external_plugins("perf.plugin");
parse_command_line(argc, argv);
@@ -1328,7 +1316,7 @@ int main(int argc, char **argv) {
if(unlikely(netdata_exit)) break;
if(unlikely(debug && iteration))
- fprintf(stderr, "perf.plugin: iteration %zu, dt %llu usec\n"
+ fprintf(stderr, "perf.plugin: iteration %zu, dt %"PRIu64" usec\n"
, iteration
, dt
);
diff --git a/collectors/plugins.d/README.md b/collectors/plugins.d/README.md
index 1c3b50cb717ec1..0752d389bd3b38 100644
--- a/collectors/plugins.d/README.md
+++ b/collectors/plugins.d/README.md
@@ -14,20 +14,20 @@ from external processes, thus allowing Netdata to use **external plugins**.
## Provided External Plugins
-|plugin|language|O/S|description|
-|:----:|:------:|:-:|:----------|
-|[apps.plugin](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md)|`C`|linux, freebsd|monitors the whole process tree on Linux and FreeBSD and breaks down system resource usage by **process**, **user** and **user group**.|
-|[charts.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/README.md)|`BASH`|all|a **plugin orchestrator** for data collection modules written in `BASH` v4+.|
-|[cups.plugin](https://github.com/netdata/netdata/blob/master/collectors/cups.plugin/README.md)|`C`|all|monitors **CUPS**|
-|[ebpf.plugin](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/README.md)|`C`|linux|monitors different metrics on environments using kernel internal functions.|
-|[go.d.plugin](https://github.com/netdata/go.d.plugin/blob/master/README.md)|`GO`|all|collects metrics from the system, applications, or third-party APIs.|
-|[ioping.plugin](https://github.com/netdata/netdata/blob/master/collectors/ioping.plugin/README.md)|`C`|all|measures disk latency.|
-|[freeipmi.plugin](https://github.com/netdata/netdata/blob/master/collectors/freeipmi.plugin/README.md)|`C`|linux|collects metrics from enterprise hardware sensors, on Linux servers.|
-|[nfacct.plugin](https://github.com/netdata/netdata/blob/master/collectors/nfacct.plugin/README.md)|`C`|linux|collects netfilter firewall, connection tracker and accounting metrics using `libmnl` and `libnetfilter_acct`.|
-|[xenstat.plugin](https://github.com/netdata/netdata/blob/master/collectors/xenstat.plugin/README.md)|`C`|linux|collects XenServer and XCP-ng metrics using `lxenstat`.|
-|[perf.plugin](https://github.com/netdata/netdata/blob/master/collectors/perf.plugin/README.md)|`C`|linux|collects CPU performance metrics using performance monitoring units (PMU).|
-|[python.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md)|`python`|all|a **plugin orchestrator** for data collection modules written in `python` v2 or v3 (both are supported).|
-|[slabinfo.plugin](https://github.com/netdata/netdata/blob/master/collectors/slabinfo.plugin/README.md)|`C`|linux|collects kernel internal cache objects (SLAB) metrics.|
+| plugin | language | O/S | description |
+|:------------------------------------------------------------------------------------------------------:|:--------:|:--------------:|:----------------------------------------------------------------------------------------------------------------------------------------|
+| [apps.plugin](https://github.com/netdata/netdata/blob/master/collectors/apps.plugin/README.md) | `C` | linux, freebsd | monitors the whole process tree on Linux and FreeBSD and breaks down system resource usage by **process**, **user** and **user group**. |
+| [charts.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/README.md) | `BASH` | all | a **plugin orchestrator** for data collection modules written in `BASH` v4+. |
+| [cups.plugin](https://github.com/netdata/netdata/blob/master/collectors/cups.plugin/README.md) | `C` | all | monitors **CUPS** |
+| [ebpf.plugin](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/README.md) | `C` | linux | monitors different metrics on environments using kernel internal functions. |
+| [go.d.plugin](https://github.com/netdata/go.d.plugin/blob/master/README.md) | `GO` | all | collects metrics from the system, applications, or third-party APIs. |
+| [ioping.plugin](https://github.com/netdata/netdata/blob/master/collectors/ioping.plugin/README.md) | `C` | all | measures disk latency. |
+| [freeipmi.plugin](https://github.com/netdata/netdata/blob/master/collectors/freeipmi.plugin/README.md) | `C` | linux | collects metrics from enterprise hardware sensors, on Linux servers. |
+| [nfacct.plugin](https://github.com/netdata/netdata/blob/master/collectors/nfacct.plugin/README.md) | `C` | linux | collects netfilter firewall, connection tracker and accounting metrics using `libmnl` and `libnetfilter_acct`. |
+| [xenstat.plugin](https://github.com/netdata/netdata/blob/master/collectors/xenstat.plugin/README.md) | `C` | linux | collects XenServer and XCP-ng metrics using `lxenstat`. |
+| [perf.plugin](https://github.com/netdata/netdata/blob/master/collectors/perf.plugin/README.md) | `C` | linux | collects CPU performance metrics using performance monitoring units (PMU). |
+| [python.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md) | `python` | all | a **plugin orchestrator** for data collection modules written in `python` v2 or v3 (both are supported). |
+| [slabinfo.plugin](https://github.com/netdata/netdata/blob/master/collectors/slabinfo.plugin/README.md) | `C` | linux | collects kernel internal cache objects (SLAB) metrics. |
Plugin orchestrators may also be described as **modular plugins**. They are modular since they accept custom made modules to be included. Writing modules for these plugins is easier than accessing the native Netdata API directly. You will find modules already available for each orchestrator under the directory of the particular modular plugin (e.g. under python.d.plugin for the python orchestrator).
Each of these modular plugins has each own methods for defining modules. Please check the examples and their documentation.
@@ -154,18 +154,18 @@ every 5 seconds.
There are a few environment variables that are set by `netdata` and are
available for the plugin to use.
-|variable|description|
-|:------:|:----------|
-|`NETDATA_USER_CONFIG_DIR`|The directory where all Netdata-related user configuration should be stored. If the plugin requires custom user configuration, this is the place the user has saved it (normally under `/etc/netdata`).|
-|`NETDATA_STOCK_CONFIG_DIR`|The directory where all Netdata -related stock configuration should be stored. If the plugin is shipped with configuration files, this is the place they can be found (normally under `/usr/lib/netdata/conf.d`).|
-|`NETDATA_PLUGINS_DIR`|The directory where all Netdata plugins are stored.|
-|`NETDATA_USER_PLUGINS_DIRS`|The list of directories where custom plugins are stored.|
-|`NETDATA_WEB_DIR`|The directory where the web files of Netdata are saved.|
-|`NETDATA_CACHE_DIR`|The directory where the cache files of Netdata are stored. Use this directory if the plugin requires a place to store data. A new directory should be created for the plugin for this purpose, inside this directory.|
-|`NETDATA_LOG_DIR`|The directory where the log files are stored. By default the `stderr` output of the plugin will be saved in the `error.log` file of Netdata.|
-|`NETDATA_HOST_PREFIX`|This is used in environments where system directories like `/sys` and `/proc` have to be accessed at a different path.|
-|`NETDATA_DEBUG_FLAGS`|This is a number (probably in hex starting with `0x`), that enables certain Netdata debugging features. Check **\[[Tracing Options]]** for more information.|
-|`NETDATA_UPDATE_EVERY`|The minimum number of seconds between chart refreshes. This is like the **internal clock** of Netdata (it is user configurable, defaulting to `1`). There is no meaning for a plugin to update its values more frequently than this number of seconds.|
+| variable | description |
+|:---------------------------:|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| `NETDATA_USER_CONFIG_DIR` | The directory where all Netdata-related user configuration should be stored. If the plugin requires custom user configuration, this is the place the user has saved it (normally under `/etc/netdata`). |
+| `NETDATA_STOCK_CONFIG_DIR` | The directory where all Netdata -related stock configuration should be stored. If the plugin is shipped with configuration files, this is the place they can be found (normally under `/usr/lib/netdata/conf.d`). |
+| `NETDATA_PLUGINS_DIR` | The directory where all Netdata plugins are stored. |
+| `NETDATA_USER_PLUGINS_DIRS` | The list of directories where custom plugins are stored. |
+| `NETDATA_WEB_DIR` | The directory where the web files of Netdata are saved. |
+| `NETDATA_CACHE_DIR` | The directory where the cache files of Netdata are stored. Use this directory if the plugin requires a place to store data. A new directory should be created for the plugin for this purpose, inside this directory. |
+| `NETDATA_LOG_DIR` | The directory where the log files are stored. By default the `stderr` output of the plugin will be saved in the `error.log` file of Netdata. |
+| `NETDATA_HOST_PREFIX` | This is used in environments where system directories like `/sys` and `/proc` have to be accessed at a different path. |
+| `NETDATA_DEBUG_FLAGS` | This is a number (probably in hex starting with `0x`), that enables certain Netdata debugging features. Check **\[[Tracing Options]]** for more information. |
+| `NETDATA_UPDATE_EVERY` | The minimum number of seconds between chart refreshes. This is like the **internal clock** of Netdata (it is user configurable, defaulting to `1`). There is no meaning for a plugin to update its values more frequently than this number of seconds. |
### The output of the plugin
@@ -298,7 +298,7 @@ the template is:
the context is giving the template of the chart. For example, if multiple charts present the same information for a different family, they should have the same `context`
- this is used for looking up rendering information for the chart (colors, sizes, informational texts) and also apply alarms to it
+ this is used for looking up rendering information for the chart (colors, sizes, informational texts) and also apply alerts to it
- `charttype`
@@ -388,12 +388,12 @@ the template is:
> VARIABLE [SCOPE] name = value
-`VARIABLE` defines a variable that can be used in alarms. This is to used for setting constants (like the max connections a server may accept).
+`VARIABLE` defines a variable that can be used in alerts. This is to used for setting constants (like the max connections a server may accept).
Variables support 2 scopes:
- `GLOBAL` or `HOST` to define the variable at the host level.
-- `LOCAL` or `CHART` to define the variable at the chart level. Use chart-local variables when the same variable may exist for different charts (i.e. Netdata monitors 2 mysql servers, and you need to set the `max_connections` each server accepts). Using chart-local variables is the ideal to build alarm templates.
+- `LOCAL` or `CHART` to define the variable at the chart level. Use chart-local variables when the same variable may exist for different charts (i.e. Netdata monitors 2 mysql servers, and you need to set the `max_connections` each server accepts). Using chart-local variables is the ideal to build alert templates.
The position of the `VARIABLE` line, sets its default scope (in case you do not specify a scope). So, defining a `VARIABLE` before any `CHART`, or between `END` and `BEGIN` (outside any chart), sets `GLOBAL` scope, while defining a `VARIABLE` just after a `CHART` or a `DIMENSION`, or within the `BEGIN` - `END` block of a chart, sets `LOCAL` scope.
diff --git a/collectors/plugins.d/gperf-config.txt b/collectors/plugins.d/gperf-config.txt
index b8140e66c5b482..bad51367ce1abb 100644
--- a/collectors/plugins.d/gperf-config.txt
+++ b/collectors/plugins.d/gperf-config.txt
@@ -12,44 +12,47 @@ PARSER_KEYWORD;
#
# Plugins Only Keywords
#
-FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1
-DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2
-EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3
-HOST, 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4
-HOST_DEFINE, 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5
-HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6
-HOST_LABEL, 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7
+FLUSH, 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1
+DISABLE, 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2
+EXIT, 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3
+HOST, 71, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 4
+HOST_DEFINE, 72, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 5
+HOST_DEFINE_END, 73, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 6
+HOST_LABEL, 74, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 7
#
# Common keywords
#
-BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8
-CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9
-CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10
-CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11
-DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12
-END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13
-FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14
-FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15
-LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16
-OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17
-SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18
-VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19
-DYNCFG_ENABLE, 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20
-DYNCFG_REGISTER_MODULE, 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21
-REPORT_JOB_STATUS, 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22
+BEGIN, 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8
+CHART, 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 9
+CLABEL, 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 10
+CLABEL_COMMIT, 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 11
+DIMENSION, 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 12
+END, 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13
+FUNCTION, 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 14
+FUNCTION_RESULT_BEGIN, 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15
+LABEL, 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 16
+OVERWRITE, 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 17
+SET, 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18
+VARIABLE, 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 19
+DYNCFG_ENABLE, 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20
+DYNCFG_REGISTER_MODULE, 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21
+DYNCFG_REGISTER_JOB, 103, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22
+DYNCFG_RESET, 104, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23
+REPORT_JOB_STATUS, 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24
+DELETE_JOB, 111, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25
#
# Streaming only keywords
#
-CLAIMED_ID, 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23
-BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24
-SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25
-END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26
+CLAIMED_ID, 61, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 26
+BEGIN2, 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27
+SET2, 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28
+END2, 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29
#
# Streaming Replication keywords
#
-CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27
-RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28
-RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29
-REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30
-RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31
-RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32
+CHART_DEFINITION_END, 33, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 30
+RBEGIN, 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31
+RDSTATE, 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32
+REND, 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33
+RSET, 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34
+RSSTATE, 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35
diff --git a/collectors/plugins.d/gperf-hashtable.h b/collectors/plugins.d/gperf-hashtable.h
index e7d20126f6388a..b327d8d6d3404a 100644
--- a/collectors/plugins.d/gperf-hashtable.h
+++ b/collectors/plugins.d/gperf-hashtable.h
@@ -30,12 +30,12 @@
#endif
-#define GPERF_PARSER_TOTAL_KEYWORDS 32
+#define GPERF_PARSER_TOTAL_KEYWORDS 35
#define GPERF_PARSER_MIN_WORD_LENGTH 3
#define GPERF_PARSER_MAX_WORD_LENGTH 22
#define GPERF_PARSER_MIN_HASH_VALUE 3
-#define GPERF_PARSER_MAX_HASH_VALUE 41
-/* maximum key range = 39, duplicates = 0 */
+#define GPERF_PARSER_MAX_HASH_VALUE 47
+/* maximum key range = 45, duplicates = 0 */
#ifdef __GNUC__
__inline
@@ -49,32 +49,32 @@ gperf_keyword_hash_function (register const char *str, register size_t len)
{
static unsigned char asso_values[] =
{
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 16, 7, 2, 11, 0,
- 8, 42, 3, 9, 42, 42, 9, 42, 0, 2,
- 42, 42, 1, 3, 42, 7, 17, 42, 27, 2,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
- 42, 42, 42, 42, 42, 42
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 11, 18, 0, 0, 0,
+ 6, 48, 9, 0, 48, 48, 20, 48, 0, 8,
+ 48, 48, 1, 12, 48, 20, 18, 48, 2, 0,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48, 48, 48, 48, 48,
+ 48, 48, 48, 48, 48, 48
};
return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
}
@@ -83,71 +83,79 @@ static PARSER_KEYWORD gperf_keywords[] =
{
{(char*)0}, {(char*)0}, {(char*)0},
#line 30 "gperf-config.txt"
- {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13},
-#line 46 "gperf-config.txt"
- {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 26},
-#line 53 "gperf-config.txt"
- {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 30},
+ {"END", 13, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 13},
+#line 49 "gperf-config.txt"
+ {"END2", 3, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29},
+#line 56 "gperf-config.txt"
+ {"REND", 25, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 33},
+#line 17 "gperf-config.txt"
+ {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3},
+#line 16 "gperf-config.txt"
+ {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2},
+#line 55 "gperf-config.txt"
+ {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32},
+#line 29 "gperf-config.txt"
+ {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 12},
+#line 42 "gperf-config.txt"
+ {"DELETE_JOB", 111, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25},
+ {(char*)0},
+#line 40 "gperf-config.txt"
+ {"DYNCFG_RESET", 104, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23},
+#line 37 "gperf-config.txt"
+ {"DYNCFG_ENABLE", 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20},
+#line 26 "gperf-config.txt"
+ {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 9},
#line 35 "gperf-config.txt"
- {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18},
-#line 45 "gperf-config.txt"
- {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 25},
-#line 54 "gperf-config.txt"
- {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31},
+ {"SET", 11, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 18},
+#line 48 "gperf-config.txt"
+ {"SET2", 1, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28},
+#line 57 "gperf-config.txt"
+ {"RSET", 21, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 34},
+#line 41 "gperf-config.txt"
+ {"REPORT_JOB_STATUS", 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24},
+#line 39 "gperf-config.txt"
+ {"DYNCFG_REGISTER_JOB", 103, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22},
+#line 58 "gperf-config.txt"
+ {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 35},
#line 18 "gperf-config.txt"
- {"HOST", 71, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 4},
-#line 26 "gperf-config.txt"
- {"CHART", 32, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 9},
-#line 55 "gperf-config.txt"
- {"RSSTATE", 24, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 32},
+ {"HOST", 71, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 4},
+#line 38 "gperf-config.txt"
+ {"DYNCFG_REGISTER_MODULE", 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21},
#line 25 "gperf-config.txt"
- {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8},
-#line 44 "gperf-config.txt"
- {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 24},
-#line 51 "gperf-config.txt"
- {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 28},
+ {"BEGIN", 12, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 8},
+#line 47 "gperf-config.txt"
+ {"BEGIN2", 2, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27},
+#line 54 "gperf-config.txt"
+ {"RBEGIN", 22, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 31},
+#line 27 "gperf-config.txt"
+ {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 10},
#line 21 "gperf-config.txt"
- {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 7},
+ {"HOST_LABEL", 74, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 7},
#line 19 "gperf-config.txt"
- {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 5},
-#line 27 "gperf-config.txt"
- {"CLABEL", 34, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 10},
-#line 39 "gperf-config.txt"
- {"REPORT_JOB_STATUS", 110, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 22},
-#line 52 "gperf-config.txt"
- {"RDSTATE", 23, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 29},
-#line 20 "gperf-config.txt"
- {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 6},
-#line 43 "gperf-config.txt"
- {"CLAIMED_ID", 61, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 23},
+ {"HOST_DEFINE", 72, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 5},
+#line 53 "gperf-config.txt"
+ {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 30},
+#line 46 "gperf-config.txt"
+ {"CLAIMED_ID", 61, PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 26},
#line 15 "gperf-config.txt"
- {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1},
-#line 31 "gperf-config.txt"
- {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 14},
+ {"FLUSH", 97, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 1},
+#line 20 "gperf-config.txt"
+ {"HOST_DEFINE_END", 73, PARSER_INIT_PLUGINSD|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 6},
#line 28 "gperf-config.txt"
- {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 11},
-#line 50 "gperf-config.txt"
- {"CHART_DEFINITION_END", 33, PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 27},
-#line 37 "gperf-config.txt"
- {"DYNCFG_ENABLE", 101, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 20},
-#line 16 "gperf-config.txt"
- {"DISABLE", 98, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 2},
+ {"CLABEL_COMMIT", 35, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 11},
+#line 31 "gperf-config.txt"
+ {"FUNCTION", 41, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 14},
#line 34 "gperf-config.txt"
- {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 17},
-#line 29 "gperf-config.txt"
- {"DIMENSION", 31, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 12},
+ {"OVERWRITE", 52, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 17},
#line 33 "gperf-config.txt"
- {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 16},
-#line 17 "gperf-config.txt"
- {"EXIT", 99, PARSER_INIT_PLUGINSD, WORKER_PARSER_FIRST_JOB + 3},
- {(char*)0}, {(char*)0}, {(char*)0},
-#line 38 "gperf-config.txt"
- {"DYNCFG_REGISTER_MODULE", 102, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 21},
-#line 32 "gperf-config.txt"
- {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15},
- {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+ {"LABEL", 51, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 16},
#line 36 "gperf-config.txt"
- {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 19}
+ {"VARIABLE", 53, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING|PARSER_REP_METADATA, WORKER_PARSER_FIRST_JOB + 19},
+ {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+ {(char*)0}, {(char*)0}, {(char*)0}, {(char*)0},
+ {(char*)0},
+#line 32 "gperf-config.txt"
+ {"FUNCTION_RESULT_BEGIN", 42, PARSER_INIT_PLUGINSD|PARSER_INIT_STREAMING, WORKER_PARSER_FIRST_JOB + 15}
};
PARSER_KEYWORD *
diff --git a/collectors/plugins.d/local_listeners.c b/collectors/plugins.d/local_listeners.c
index a39de79748db6c..f2c5e688bbc6e4 100644
--- a/collectors/plugins.d/local_listeners.c
+++ b/collectors/plugins.d/local_listeners.c
@@ -338,25 +338,59 @@ bool read_proc_net_x(const char *filename, PROC_NET_PROTOCOLS protocol) {
}
// ----------------------------------------------------------------------------
-
-int main(int argc __maybe_unused, char **argv __maybe_unused) {
+typedef struct {
+ bool read_tcp;
+ bool read_tcp6;
+ bool read_udp;
+ bool read_udp6;
+} CommandLineArguments;
+
+int main(int argc, char **argv) {
char path[FILENAME_MAX + 1];
hashTable_key_inode_port_value = createHashTable();
netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX");
if(!netdata_configured_host_prefix) netdata_configured_host_prefix = "";
- snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp", netdata_configured_host_prefix);
- read_proc_net_x(path, PROC_NET_PROTOCOL_TCP);
+ CommandLineArguments args = {.read_tcp = false, .read_tcp6 = false, .read_udp = false, .read_udp6 = false};
+
+ for (int i = 1; i < argc; i++) {
+ if (strcmp("tcp", argv[i]) == 0) {
+ args.read_tcp = true;
+ continue;
+ } else if (strcmp("tcp6", argv[i]) == 0) {
+ args.read_tcp6 = true;
+ continue;
+ } else if (strcmp("udp", argv[i]) == 0) {
+ args.read_udp = true;
+ continue;
+ } else if (strcmp("udp6", argv[i]) == 0) {
+ args.read_udp6 = true;
+ continue;
+ }
+ }
+
+ bool read_all_files = (!args.read_tcp && !args.read_tcp6 && !args.read_udp && !args.read_udp6);
- snprintfz(path, FILENAME_MAX, "%s/proc/net/udp", netdata_configured_host_prefix);
- read_proc_net_x(path, PROC_NET_PROTOCOL_UDP);
+ if (read_all_files || args.read_tcp) {
+ snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp", netdata_configured_host_prefix);
+ read_proc_net_x(path, PROC_NET_PROTOCOL_TCP);
+ }
- snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp6", netdata_configured_host_prefix);
- read_proc_net_x(path, PROC_NET_PROTOCOL_TCP6);
+ if (read_all_files || args.read_udp) {
+ snprintfz(path, FILENAME_MAX, "%s/proc/net/udp", netdata_configured_host_prefix);
+ read_proc_net_x(path, PROC_NET_PROTOCOL_UDP);
+ }
- snprintfz(path, FILENAME_MAX, "%s/proc/net/udp6", netdata_configured_host_prefix);
- read_proc_net_x(path, PROC_NET_PROTOCOL_UDP6);
+ if (read_all_files || args.read_tcp6) {
+ snprintfz(path, FILENAME_MAX, "%s/proc/net/tcp6", netdata_configured_host_prefix);
+ read_proc_net_x(path, PROC_NET_PROTOCOL_TCP6);
+ }
+
+ if (read_all_files || args.read_udp6) {
+ snprintfz(path, FILENAME_MAX, "%s/proc/net/udp6", netdata_configured_host_prefix);
+ read_proc_net_x(path, PROC_NET_PROTOCOL_UDP6);
+ }
snprintfz(path, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix);
find_all_sockets_in_proc(path);
diff --git a/collectors/plugins.d/plugins_d.c b/collectors/plugins.d/plugins_d.c
index 08c26a198ba30c..20061ad29c49f3 100644
--- a/collectors/plugins.d/plugins_d.c
+++ b/collectors/plugins.d/plugins_d.c
@@ -47,8 +47,7 @@ static inline bool plugin_is_running(struct plugind *cd) {
return ret;
}
-static void pluginsd_worker_thread_cleanup(void *arg)
-{
+static void pluginsd_worker_thread_cleanup(void *arg) {
struct plugind *cd = (struct plugind *)arg;
worker_unregister();
@@ -143,41 +142,64 @@ static void *pluginsd_worker_thread(void *arg) {
netdata_thread_cleanup_push(pluginsd_worker_thread_cleanup, arg);
- struct plugind *cd = (struct plugind *)arg;
- plugin_set_running(cd);
+ {
+ struct plugind *cd = (struct plugind *) arg;
+ plugin_set_running(cd);
- size_t count = 0;
+ size_t count = 0;
- while (service_running(SERVICE_COLLECTORS)) {
- FILE *fp_child_input = NULL;
- FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input);
+ while(service_running(SERVICE_COLLECTORS)) {
+ FILE *fp_child_input = NULL;
+ FILE *fp_child_output = netdata_popen(cd->cmd, &cd->unsafe.pid, &fp_child_input);
- if (unlikely(!fp_child_input || !fp_child_output)) {
- netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").", rrdhost_hostname(cd->host), cd->cmd);
- break;
- }
+ if(unlikely(!fp_child_input || !fp_child_output)) {
+ netdata_log_error("PLUGINSD: 'host:%s', cannot popen(\"%s\", \"r\").",
+ rrdhost_hostname(cd->host), cd->cmd);
+ break;
+ }
- netdata_log_info("PLUGINSD: 'host:%s' connected to '%s' running on pid %d",
- rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "PLUGINSD: 'host:%s' connected to '%s' running on pid %d",
+ rrdhost_hostname(cd->host),
+ cd->fullfilename, cd->unsafe.pid);
- count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0);
+ const char *plugin = strrchr(cd->fullfilename, '/');
+ if(plugin)
+ plugin++;
+ else
+ plugin = cd->fullfilename;
- netdata_log_info("PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).",
- rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count);
+ char module[100];
+ snprintfz(module, sizeof(module), "plugins.d[%s]", plugin);
+ ND_LOG_STACK lgs[] = {
+ ND_LOG_FIELD_TXT(NDF_MODULE, module),
+ ND_LOG_FIELD_TXT(NDF_NIDL_NODE, rrdhost_hostname(cd->host)),
+ ND_LOG_FIELD_TXT(NDF_SRC_TRANSPORT, "pluginsd"),
+ ND_LOG_FIELD_END(),
+ };
+ ND_LOG_STACK_PUSH(lgs);
- killpid(cd->unsafe.pid);
+ count = pluginsd_process(cd->host, cd, fp_child_input, fp_child_output, 0);
- int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "PLUGINSD: 'host:%s', '%s' (pid %d) disconnected after %zu successful data collections (ENDs).",
+ rrdhost_hostname(cd->host), cd->fullfilename, cd->unsafe.pid, count);
- if (likely(worker_ret_code == 0))
- pluginsd_worker_thread_handle_success(cd);
- else
- pluginsd_worker_thread_handle_error(cd, worker_ret_code);
+ killpid(cd->unsafe.pid);
- cd->unsafe.pid = 0;
- if (unlikely(!plugin_is_enabled(cd)))
- break;
- }
+ int worker_ret_code = netdata_pclose(fp_child_input, fp_child_output, cd->unsafe.pid);
+
+ if(likely(worker_ret_code == 0))
+ pluginsd_worker_thread_handle_success(cd);
+ else
+ pluginsd_worker_thread_handle_error(cd, worker_ret_code);
+
+ cd->unsafe.pid = 0;
+
+ if(unlikely(!plugin_is_enabled(cd)))
+ break;
+ }
+ }
netdata_thread_cleanup_pop(1);
return NULL;
@@ -217,6 +239,13 @@ void *pluginsd_main(void *ptr)
// disable some plugins by default
config_get_boolean(CONFIG_SECTION_PLUGINS, "slabinfo", CONFIG_BOOLEAN_NO);
+ config_get_boolean(CONFIG_SECTION_PLUGINS, "logs-management",
+#if defined(LOGS_MANAGEMENT_DEV_MODE)
+ CONFIG_BOOLEAN_YES
+#else
+ CONFIG_BOOLEAN_NO
+#endif
+ );
// it crashes (both threads) on Alpine after we made it multi-threaded
// works with "--device /dev/ipmi0", but this is not default
// see https://github.com/netdata/netdata/pull/15564 for details
diff --git a/collectors/plugins.d/plugins_d.h b/collectors/plugins.d/plugins_d.h
index 4988b50719e07d..37c70f7e39c0c4 100644
--- a/collectors/plugins.d/plugins_d.h
+++ b/collectors/plugins.d/plugins_d.h
@@ -10,51 +10,16 @@
#define PLUGINSD_CMD_MAX (FILENAME_MAX*2)
#define PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH 0
-#define PLUGINSD_KEYWORD_CHART "CHART"
-#define PLUGINSD_KEYWORD_CHART_DEFINITION_END "CHART_DEFINITION_END"
-#define PLUGINSD_KEYWORD_DIMENSION "DIMENSION"
-#define PLUGINSD_KEYWORD_BEGIN "BEGIN"
-#define PLUGINSD_KEYWORD_SET "SET"
-#define PLUGINSD_KEYWORD_END "END"
-#define PLUGINSD_KEYWORD_FLUSH "FLUSH"
-#define PLUGINSD_KEYWORD_DISABLE "DISABLE"
-#define PLUGINSD_KEYWORD_VARIABLE "VARIABLE"
-#define PLUGINSD_KEYWORD_LABEL "LABEL"
-#define PLUGINSD_KEYWORD_OVERWRITE "OVERWRITE"
-#define PLUGINSD_KEYWORD_CLABEL "CLABEL"
-#define PLUGINSD_KEYWORD_CLABEL_COMMIT "CLABEL_COMMIT"
-#define PLUGINSD_KEYWORD_FUNCTION "FUNCTION"
-#define PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN "FUNCTION_RESULT_BEGIN"
-#define PLUGINSD_KEYWORD_FUNCTION_RESULT_END "FUNCTION_RESULT_END"
-
-#define PLUGINSD_KEYWORD_REPLAY_CHART "REPLAY_CHART"
-#define PLUGINSD_KEYWORD_REPLAY_BEGIN "RBEGIN"
-#define PLUGINSD_KEYWORD_REPLAY_SET "RSET"
-#define PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE "RDSTATE"
-#define PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE "RSSTATE"
-#define PLUGINSD_KEYWORD_REPLAY_END "REND"
-
-#define PLUGINSD_KEYWORD_BEGIN_V2 "BEGIN2"
-#define PLUGINSD_KEYWORD_SET_V2 "SET2"
-#define PLUGINSD_KEYWORD_END_V2 "END2"
-
-#define PLUGINSD_KEYWORD_HOST_DEFINE "HOST_DEFINE"
-#define PLUGINSD_KEYWORD_HOST_DEFINE_END "HOST_DEFINE_END"
-#define PLUGINSD_KEYWORD_HOST_LABEL "HOST_LABEL"
-#define PLUGINSD_KEYWORD_HOST "HOST"
+#define PLUGINSD_KEYWORD_FUNCTION_PAYLOAD "FUNCTION_PAYLOAD"
+#define PLUGINSD_KEYWORD_FUNCTION_PAYLOAD_END "FUNCTION_PAYLOAD_END"
#define PLUGINSD_KEYWORD_DYNCFG_ENABLE "DYNCFG_ENABLE"
#define PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE "DYNCFG_REGISTER_MODULE"
+#define PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB "DYNCFG_REGISTER_JOB"
+#define PLUGINSD_KEYWORD_DYNCFG_RESET "DYNCFG_RESET"
#define PLUGINSD_KEYWORD_REPORT_JOB_STATUS "REPORT_JOB_STATUS"
-
-#define PLUGINSD_KEYWORD_EXIT "EXIT"
-
-#define PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT 10 // seconds
-
-#define PLUGINSD_LINE_MAX_SSL_READ 512
-
-#define PLUGINSD_MAX_WORDS 20
+#define PLUGINSD_KEYWORD_DELETE_JOB "DELETE_JOB"
#define PLUGINSD_MAX_DIRECTORIES 20
extern char *plugin_directories[PLUGINSD_MAX_DIRECTORIES];
@@ -99,37 +64,4 @@ void pluginsd_process_thread_cleanup(void *ptr);
size_t pluginsd_initialize_plugin_directories();
-#define pluginsd_function_result_begin_to_buffer(wb, transaction, code, content_type, expires) \
- buffer_sprintf(wb \
- , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \
- , (transaction) ? (transaction) : "" \
- , (int)(code) \
- , (content_type) ? (content_type) : "" \
- , (long int)(expires) \
- )
-
-#define pluginsd_function_result_end_to_buffer(wb) \
- buffer_strcat(wb, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n")
-
-#define pluginsd_function_result_begin_to_stdout(transaction, code, content_type, expires) \
- fprintf(stdout \
- , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \
- , (transaction) ? (transaction) : "" \
- , (int)(code) \
- , (content_type) ? (content_type) : "" \
- , (long int)(expires) \
- )
-
-#define pluginsd_function_result_end_to_stdout() \
- fprintf(stdout, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n")
-
-static inline void pluginsd_function_json_error(const char *transaction, int code, const char *msg) {
- char buffer[PLUGINSD_LINE_MAX + 1];
- json_escape_string(buffer, msg, PLUGINSD_LINE_MAX);
-
- pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec());
- fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer);
- pluginsd_function_result_end_to_stdout();
-}
-
#endif /* NETDATA_PLUGINS_D_H */
diff --git a/collectors/plugins.d/pluginsd_parser.c b/collectors/plugins.d/pluginsd_parser.c
index bc265a3afd37ec..3b47c6c0fe2ef9 100644
--- a/collectors/plugins.d/pluginsd_parser.c
+++ b/collectors/plugins.d/pluginsd_parser.c
@@ -4,12 +4,20 @@
#define LOG_FUNCTIONS false
+#define SERVING_STREAMING(parser) ((parser)->repertoire == PARSER_INIT_STREAMING)
+#define SERVING_PLUGINSD(parser) ((parser)->repertoire == PARSER_INIT_PLUGINSD)
+
static ssize_t send_to_plugin(const char *txt, void *data) {
PARSER *parser = data;
if(!txt || !*txt)
return 0;
+#ifdef ENABLE_H2O
+ if(parser->h2o_ctx)
+ return h2o_stream_write(parser->h2o_ctx, txt, strlen(txt));
+#endif
+
errno = 0;
spinlock_lock(&parser->writer.spinlock);
ssize_t bytes = -1;
@@ -107,23 +115,6 @@ static inline bool pluginsd_unlock_rrdset_data_collection(PARSER *parser) {
return false;
}
-void pluginsd_rrdset_cleanup(RRDSET *st) {
- spinlock_lock(&st->pluginsd.spinlock);
-
- for(size_t i = 0; i < st->pluginsd.size ; i++) {
- rrddim_acquired_release(st->pluginsd.rda[i]); // can be NULL
- st->pluginsd.rda[i] = NULL;
- }
-
- freez(st->pluginsd.rda);
- st->pluginsd.collector_tid = 0;
- st->pluginsd.rda = NULL;
- st->pluginsd.size = 0;
- st->pluginsd.pos = 0;
-
- spinlock_unlock(&st->pluginsd.spinlock);
-}
-
static inline void pluginsd_unlock_previous_scope_chart(PARSER *parser, const char *keyword, bool stale) {
if(unlikely(pluginsd_unlock_rrdset_data_collection(parser))) {
if(stale)
@@ -147,7 +138,12 @@ static inline void pluginsd_unlock_previous_scope_chart(PARSER *parser, const ch
static inline void pluginsd_clear_scope_chart(PARSER *parser, const char *keyword) {
pluginsd_unlock_previous_scope_chart(parser, keyword, true);
+
+ if(parser->user.cleanup_slots && parser->user.st)
+ rrdset_pluginsd_receive_unslot(parser->user.st);
+
parser->user.st = NULL;
+ parser->user.cleanup_slots = false;
}
static inline bool pluginsd_set_scope_chart(PARSER *parser, RRDSET *st, const char *keyword) {
@@ -157,11 +153,12 @@ static inline bool pluginsd_set_scope_chart(PARSER *parser, RRDSET *st, const ch
if(unlikely(old_collector_tid)) {
if(old_collector_tid != my_collector_tid) {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "PLUGINSD: keyword %s: 'host:%s/chart:%s' is collected twice (my tid %d, other collector tid %d)",
- keyword ? keyword : "UNKNOWN",
- rrdhost_hostname(st->rrdhost), rrdset_id(st),
- my_collector_tid, old_collector_tid);
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING,
+ "PLUGINSD: keyword %s: 'host:%s/chart:%s' is collected twice (my tid %d, other collector tid %d)",
+ keyword ? keyword : "UNKNOWN",
+ rrdhost_hostname(st->rrdhost), rrdset_id(st),
+ my_collector_tid, old_collector_tid);
return false;
}
@@ -173,61 +170,141 @@ static inline bool pluginsd_set_scope_chart(PARSER *parser, RRDSET *st, const ch
pluginsd_clear_scope_chart(parser, keyword);
- size_t dims = dictionary_entries(st->rrddim_root_index);
- if(unlikely(st->pluginsd.size < dims)) {
- st->pluginsd.rda = reallocz(st->pluginsd.rda, dims * sizeof(RRDDIM_ACQUIRED *));
+ st->pluginsd.pos = 0;
+ parser->user.st = st;
+ parser->user.cleanup_slots = false;
+
+ return true;
+}
+
+static inline void pluginsd_rrddim_put_to_slot(PARSER *parser, RRDSET *st, RRDDIM *rd, ssize_t slot, bool obsolete) {
+ size_t wanted_size = st->pluginsd.size;
+
+ if(slot >= 1) {
+ st->pluginsd.dims_with_slots = true;
+ wanted_size = slot;
+ }
+ else {
+ st->pluginsd.dims_with_slots = false;
+ wanted_size = dictionary_entries(st->rrddim_root_index);
+ }
+
+ if(wanted_size > st->pluginsd.size) {
+ st->pluginsd.prd_array = reallocz(st->pluginsd.prd_array, wanted_size * sizeof(struct pluginsd_rrddim));
// initialize the empty slots
- for(ssize_t i = (ssize_t)dims - 1; i >= (ssize_t)st->pluginsd.size ;i--)
- st->pluginsd.rda[i] = NULL;
+ for(ssize_t i = (ssize_t) wanted_size - 1; i >= (ssize_t) st->pluginsd.size; i--) {
+ st->pluginsd.prd_array[i].rda = NULL;
+ st->pluginsd.prd_array[i].rd = NULL;
+ st->pluginsd.prd_array[i].id = NULL;
+ }
- st->pluginsd.size = dims;
+ st->pluginsd.size = wanted_size;
}
- st->pluginsd.pos = 0;
- parser->user.st = st;
+ if(st->pluginsd.dims_with_slots) {
+ struct pluginsd_rrddim *prd = &st->pluginsd.prd_array[slot - 1];
- return true;
+ if(prd->rd != rd) {
+ prd->rda = rrddim_find_and_acquire(st, string2str(rd->id));
+ prd->rd = rrddim_acquired_to_rrddim(prd->rda);
+ prd->id = string2str(prd->rd->id);
+ }
+
+ if(obsolete)
+ parser->user.cleanup_slots = true;
+ }
}
-static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, const char *dimension, const char *cmd) {
+static inline RRDDIM *pluginsd_acquire_dimension(RRDHOST *host, RRDSET *st, const char *dimension, ssize_t slot, const char *cmd) {
if (unlikely(!dimension || !*dimension)) {
netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s, without a dimension.",
rrdhost_hostname(host), rrdset_id(st), cmd);
return NULL;
}
- if(unlikely(st->pluginsd.pos >= st->pluginsd.size))
- st->pluginsd.pos = 0;
+ if (unlikely(!st->pluginsd.size)) {
+ netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s, but the chart has no dimensions.",
+ rrdhost_hostname(host), rrdset_id(st), cmd);
+ return NULL;
+ }
+
+ struct pluginsd_rrddim *prd;
+ RRDDIM *rd;
+
+ if(likely(st->pluginsd.dims_with_slots)) {
+ // caching with slots
- RRDDIM_ACQUIRED *rda = st->pluginsd.rda[st->pluginsd.pos];
+ if(unlikely(slot < 1 || slot > st->pluginsd.size)) {
+ netdata_log_error("PLUGINSD: 'host:%s/chart:%s' got a %s with slot %zd, but slots in the range [1 - %u] are expected.",
+ rrdhost_hostname(host), rrdset_id(st), cmd, slot, st->pluginsd.size);
+ return NULL;
+ }
+
+ prd = &st->pluginsd.prd_array[slot - 1];
- if(likely(rda)) {
- RRDDIM *rd = rrddim_acquired_to_rrddim(rda);
- if (likely(rd && string_strcmp(rd->id, dimension) == 0)) {
- // we found a cached RDA
- st->pluginsd.pos++;
+ rd = prd->rd;
+ if(likely(rd)) {
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(strcmp(prd->id, dimension) != 0) {
+ ssize_t t;
+ for(t = 0; t < st->pluginsd.size ;t++) {
+ if (strcmp(st->pluginsd.prd_array[t].id, dimension) == 0)
+ break;
+ }
+ if(t >= st->pluginsd.size)
+ t = -1;
+
+ internal_fatal(true,
+ "PLUGINSD: expected to find dimension '%s' on slot %zd, but found '%s', "
+ "the right slot is %zd",
+ dimension, slot, prd->id, t);
+ }
+#endif
return rd;
}
- else {
- // the collector is sending dimensions in a different order
- // release the previous one, to reuse this slot
- rrddim_acquired_release(rda);
- st->pluginsd.rda[st->pluginsd.pos] = NULL;
+ }
+ else {
+ // caching without slots
+
+ if(unlikely(st->pluginsd.pos >= st->pluginsd.size))
+ st->pluginsd.pos = 0;
+
+ prd = &st->pluginsd.prd_array[st->pluginsd.pos++];
+
+ rd = prd->rd;
+ if(likely(rd)) {
+ const char *id = prd->id;
+
+ if(strcmp(id, dimension) == 0) {
+ // we found it cached
+ return rd;
+ }
+ else {
+ // the cached one is not good for us
+ rrddim_acquired_release(prd->rda);
+ prd->rda = NULL;
+ prd->rd = NULL;
+ prd->id = NULL;
+ }
}
}
- rda = rrddim_find_and_acquire(st, dimension);
+ // we need to find the dimension and set it to prd
+
+ RRDDIM_ACQUIRED *rda = rrddim_find_and_acquire(st, dimension);
if (unlikely(!rda)) {
netdata_log_error("PLUGINSD: 'host:%s/chart:%s/dim:%s' got a %s but dimension does not exist.",
- rrdhost_hostname(host), rrdset_id(st), dimension, cmd);
+ rrdhost_hostname(host), rrdset_id(st), dimension, cmd);
return NULL;
}
- st->pluginsd.rda[st->pluginsd.pos++] = rda;
+ prd->rda = rda;
+ prd->rd = rd = rrddim_acquired_to_rrddim(rda);
+ prd->id = string2str(rd->id);
- return rrddim_acquired_to_rrddim(rda);
+ return rd;
}
static inline RRDSET *pluginsd_find_chart(RRDHOST *host, const char *chart, const char *cmd) {
@@ -245,20 +322,89 @@ static inline RRDSET *pluginsd_find_chart(RRDHOST *host, const char *chart, cons
return st;
}
+static inline ssize_t pluginsd_parse_rrd_slot(char **words, size_t num_words) {
+ ssize_t slot = -1;
+ char *id = get_word(words, num_words, 1);
+ if(id && id[0] == PLUGINSD_KEYWORD_SLOT[0] && id[1] == PLUGINSD_KEYWORD_SLOT[1] &&
+ id[2] == PLUGINSD_KEYWORD_SLOT[2] && id[3] == PLUGINSD_KEYWORD_SLOT[3] && id[4] == ':') {
+ slot = (ssize_t) str2ull_encoded(&id[5]);
+ if(slot < 0) slot = 0; // to make the caller increment its idx of the words
+ }
+
+ return slot;
+}
+
+static inline void pluginsd_rrdset_cache_put_to_slot(PARSER *parser, RRDSET *st, ssize_t slot, bool obsolete) {
+ // clean possible old cached data
+ rrdset_pluginsd_receive_unslot(st);
+
+ if(unlikely(slot < 1 || slot >= INT32_MAX))
+ return;
+
+ RRDHOST *host = st->rrdhost;
+
+ if(unlikely((size_t)slot > host->rrdpush.receive.pluginsd_chart_slots.size)) {
+ spinlock_lock(&host->rrdpush.receive.pluginsd_chart_slots.spinlock);
+ size_t old_slots = host->rrdpush.receive.pluginsd_chart_slots.size;
+ size_t new_slots = (old_slots < PLUGINSD_MIN_RRDSET_POINTERS_CACHE) ? PLUGINSD_MIN_RRDSET_POINTERS_CACHE : old_slots * 2;
+
+ if(new_slots < (size_t)slot)
+ new_slots = slot;
+
+ host->rrdpush.receive.pluginsd_chart_slots.array =
+ reallocz(host->rrdpush.receive.pluginsd_chart_slots.array, new_slots * sizeof(RRDSET *));
+
+ for(size_t i = old_slots; i < new_slots ;i++)
+ host->rrdpush.receive.pluginsd_chart_slots.array[i] = NULL;
+
+ host->rrdpush.receive.pluginsd_chart_slots.size = new_slots;
+ spinlock_unlock(&host->rrdpush.receive.pluginsd_chart_slots.spinlock);
+ }
+
+ host->rrdpush.receive.pluginsd_chart_slots.array[slot - 1] = st;
+ st->pluginsd.last_slot = (int32_t)slot - 1;
+ parser->user.cleanup_slots = obsolete;
+}
+
+static inline RRDSET *pluginsd_rrdset_cache_get_from_slot(PARSER *parser, RRDHOST *host, const char *id, ssize_t slot, const char *keyword) {
+ if(unlikely(slot < 1 || (size_t)slot > host->rrdpush.receive.pluginsd_chart_slots.size))
+ return pluginsd_find_chart(host, id, keyword);
+
+ RRDSET *st = host->rrdpush.receive.pluginsd_chart_slots.array[slot - 1];
+
+ if(!st) {
+ st = pluginsd_find_chart(host, id, keyword);
+ if(st)
+ pluginsd_rrdset_cache_put_to_slot(parser, st, slot, rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE));
+ }
+ else {
+ internal_fatal(string_strcmp(st->id, id) != 0,
+ "PLUGINSD: wrong chart in slot %zd, expected '%s', found '%s'",
+ slot - 1, id, string2str(st->id));
+ }
+
+ return st;
+}
+
static inline PARSER_RC PLUGINSD_DISABLE_PLUGIN(PARSER *parser, const char *keyword, const char *msg) {
parser->user.enabled = 0;
if(keyword && msg) {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "PLUGINSD: keyword %s: %s", keyword, msg);
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_INFO,
+ "PLUGINSD: keyword %s: %s", keyword, msg);
}
return PARSER_RC_ERROR;
}
static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *parser) {
- char *dimension = get_word(words, num_words, 1);
- char *value = get_word(words, num_words, 2);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *dimension = get_word(words, num_words, idx++);
+ char *value = get_word(words, num_words, idx++);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_SET);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -266,7 +412,7 @@ static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *par
RRDSET *st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_SET, PLUGINSD_KEYWORD_CHART);
if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
- RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET);
+ RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_SET);
if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
st->pluginsd.set = true;
@@ -282,13 +428,17 @@ static inline PARSER_RC pluginsd_set(char **words, size_t num_words, PARSER *par
}
static inline PARSER_RC pluginsd_begin(char **words, size_t num_words, PARSER *parser) {
- char *id = get_word(words, num_words, 1);
- char *microseconds_txt = get_word(words, num_words, 2);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *id = get_word(words, num_words, idx++);
+ char *microseconds_txt = get_word(words, num_words, idx++);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_BEGIN);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
- RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN);
+ RRDSET *st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_BEGIN);
if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_BEGIN))
@@ -329,8 +479,9 @@ static inline PARSER_RC pluginsd_begin(char **words, size_t num_words, PARSER *p
}
static inline PARSER_RC pluginsd_end(char **words, size_t num_words, PARSER *parser) {
- UNUSED(words);
- UNUSED(num_words);
+ char *tv_sec = get_word(words, num_words, 1);
+ char *tv_usec = get_word(words, num_words, 2);
+ char *pending_rrdset_next = get_word(words, num_words, 3);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_END);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -344,16 +495,22 @@ static inline PARSER_RC pluginsd_end(char **words, size_t num_words, PARSER *par
pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_END);
parser->user.data_collections_count++;
- struct timeval now;
- now_realtime_timeval(&now);
- rrdset_timed_done(st, now, /* pending_rrdset_next = */ false);
+ struct timeval tv = {
+ .tv_sec = (tv_sec && *tv_sec) ? str2ll(tv_sec, NULL) : 0,
+ .tv_usec = (tv_usec && *tv_usec) ? str2ll(tv_usec, NULL) : 0
+ };
+
+ if(!tv.tv_sec)
+ now_realtime_timeval(&tv);
+
+ rrdset_timed_done(st, tv, pending_rrdset_next && *pending_rrdset_next ? true : false);
return PARSER_RC_OK;
}
static void pluginsd_host_define_cleanup(PARSER *parser) {
string_freez(parser->user.host_define.hostname);
- dictionary_destroy(parser->user.host_define.rrdlabels);
+ rrdlabels_destroy(parser->user.host_define.rrdlabels);
parser->user.host_define.hostname = NULL;
parser->user.host_define.rrdlabels = NULL;
@@ -390,17 +547,17 @@ static inline PARSER_RC pluginsd_host_define(char **words, size_t num_words, PAR
return PARSER_RC_OK;
}
-static inline PARSER_RC pluginsd_host_dictionary(char **words, size_t num_words, PARSER *parser, DICTIONARY *dict, const char *keyword) {
+static inline PARSER_RC pluginsd_host_dictionary(char **words, size_t num_words, PARSER *parser, RRDLABELS *labels, const char *keyword) {
char *name = get_word(words, num_words, 1);
char *value = get_word(words, num_words, 2);
if(!name || !*name || !value)
return PLUGINSD_DISABLE_PLUGIN(parser, keyword, "missing parameters");
- if(!parser->user.host_define.parsing_host || !dict)
+ if(!parser->user.host_define.parsing_host || !labels)
return PLUGINSD_DISABLE_PLUGIN(parser, keyword, "host is not defined, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this");
- rrdlabels_add(dict, name, value, RRDLABEL_SRC_CONFIG);
+ rrdlabels_add(labels, name, value, RRDLABEL_SRC_CONFIG);
return PARSER_RC_OK;
}
@@ -416,30 +573,29 @@ static inline PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, si
return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_HOST_DEFINE_END, "missing initialization, send " PLUGINSD_KEYWORD_HOST_DEFINE " before this");
RRDHOST *host = rrdhost_find_or_create(
- string2str(parser->user.host_define.hostname),
- string2str(parser->user.host_define.hostname),
- parser->user.host_define.machine_guid_str,
- "Netdata Virtual Host 1.0",
- netdata_configured_timezone,
- netdata_configured_abbrev_timezone,
- netdata_configured_utc_offset,
- NULL,
- program_name,
- program_version,
- default_rrd_update_every,
- default_rrd_history_entries,
- default_rrd_memory_mode,
- default_health_enabled,
- default_rrdpush_enabled,
- default_rrdpush_destination,
- default_rrdpush_api_key,
- default_rrdpush_send_charts_matching,
- default_rrdpush_enable_replication,
- default_rrdpush_seconds_to_replicate,
- default_rrdpush_replication_step,
- rrdhost_labels_to_system_info(parser->user.host_define.rrdlabels),
- false
- );
+ string2str(parser->user.host_define.hostname),
+ string2str(parser->user.host_define.hostname),
+ parser->user.host_define.machine_guid_str,
+ "Netdata Virtual Host 1.0",
+ netdata_configured_timezone,
+ netdata_configured_abbrev_timezone,
+ netdata_configured_utc_offset,
+ NULL,
+ program_name,
+ program_version,
+ default_rrd_update_every,
+ default_rrd_history_entries,
+ default_rrd_memory_mode,
+ default_health_enabled,
+ default_rrdpush_enabled,
+ default_rrdpush_destination,
+ default_rrdpush_api_key,
+ default_rrdpush_send_charts_matching,
+ default_rrdpush_enable_replication,
+ default_rrdpush_seconds_to_replicate,
+ default_rrdpush_replication_step,
+ rrdhost_labels_to_system_info(parser->user.host_define.rrdlabels),
+ false);
rrdhost_option_set(host, RRDHOST_OPTION_VIRTUAL_HOST);
@@ -489,18 +645,22 @@ static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *p
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_CHART);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
- char *type = get_word(words, num_words, 1);
- char *name = get_word(words, num_words, 2);
- char *title = get_word(words, num_words, 3);
- char *units = get_word(words, num_words, 4);
- char *family = get_word(words, num_words, 5);
- char *context = get_word(words, num_words, 6);
- char *chart = get_word(words, num_words, 7);
- char *priority_s = get_word(words, num_words, 8);
- char *update_every_s = get_word(words, num_words, 9);
- char *options = get_word(words, num_words, 10);
- char *plugin = get_word(words, num_words, 11);
- char *module = get_word(words, num_words, 12);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *type = get_word(words, num_words, idx++);
+ char *name = get_word(words, num_words, idx++);
+ char *title = get_word(words, num_words, idx++);
+ char *units = get_word(words, num_words, idx++);
+ char *family = get_word(words, num_words, idx++);
+ char *context = get_word(words, num_words, idx++);
+ char *chart = get_word(words, num_words, idx++);
+ char *priority_s = get_word(words, num_words, idx++);
+ char *update_every_s = get_word(words, num_words, idx++);
+ char *options = get_word(words, num_words, idx++);
+ char *plugin = get_word(words, num_words, idx++);
+ char *module = get_word(words, num_words, idx++);
// parse the id from type
char *id = NULL;
@@ -567,14 +727,15 @@ static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *p
module, priority, update_every,
chart_type);
+ bool obsolete = false;
if (likely(st)) {
if (options && *options) {
if (strstr(options, "obsolete")) {
- pluginsd_rrdset_cleanup(st);
- rrdset_is_obsolete(st);
+ rrdset_is_obsolete___safe_from_collector_thread(st);
+ obsolete = true;
}
else
- rrdset_isnot_obsolete(st);
+ rrdset_isnot_obsolete___safe_from_collector_thread(st);
if (strstr(options, "detail"))
rrdset_flag_set(st, RRDSET_FLAG_DETAIL);
@@ -592,13 +753,15 @@ static inline PARSER_RC pluginsd_chart(char **words, size_t num_words, PARSER *p
rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST);
}
else {
- rrdset_isnot_obsolete(st);
+ rrdset_isnot_obsolete___safe_from_collector_thread(st);
rrdset_flag_clear(st, RRDSET_FLAG_DETAIL);
rrdset_flag_clear(st, RRDSET_FLAG_STORE_FIRST);
}
if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_CHART))
return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
+
+ pluginsd_rrdset_cache_put_to_slot(parser, st, slot, obsolete);
}
else
pluginsd_clear_scope_chart(parser, PLUGINSD_KEYWORD_CHART);
@@ -649,12 +812,16 @@ static inline PARSER_RC pluginsd_chart_definition_end(char **words, size_t num_w
}
static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSER *parser) {
- char *id = get_word(words, num_words, 1);
- char *name = get_word(words, num_words, 2);
- char *algorithm = get_word(words, num_words, 3);
- char *multiplier_s = get_word(words, num_words, 4);
- char *divisor_s = get_word(words, num_words, 5);
- char *options = get_word(words, num_words, 6);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *id = get_word(words, num_words, idx++);
+ char *name = get_word(words, num_words, idx++);
+ char *algorithm = get_word(words, num_words, idx++);
+ char *multiplier_s = get_word(words, num_words, idx++);
+ char *divisor_s = get_word(words, num_words, idx++);
+ char *options = get_word(words, num_words, idx++);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_DIMENSION);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -693,11 +860,14 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE
int unhide_dimension = 1;
rrddim_option_clear(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS);
+ bool obsolete = false;
if (options && *options) {
- if (strstr(options, "obsolete") != NULL)
- rrddim_is_obsolete(st, rd);
+ if (strstr(options, "obsolete") != NULL) {
+ obsolete = true;
+ rrddim_is_obsolete___safe_from_collector_thread(st, rd);
+ }
else
- rrddim_isnot_obsolete(st, rd);
+ rrddim_isnot_obsolete___safe_from_collector_thread(st, rd);
unhide_dimension = !strstr(options, "hidden");
@@ -705,8 +875,9 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE
rrddim_option_set(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS);
if (strstr(options, "nooverflow") != NULL)
rrddim_option_set(rd, RRDDIM_OPTION_DONT_DETECT_RESETS_OR_OVERFLOWS);
- } else
- rrddim_isnot_obsolete(st, rd);
+ }
+ else
+ rrddim_isnot_obsolete___safe_from_collector_thread(st, rd);
bool should_update_dimension = false;
@@ -724,6 +895,8 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE
rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE);
}
+ pluginsd_rrddim_put_to_slot(parser, st, rd, slot, obsolete);
+
return PARSER_RC_OK;
}
@@ -733,14 +906,16 @@ static inline PARSER_RC pluginsd_dimension(char **words, size_t num_words, PARSE
struct inflight_function {
int code;
int timeout;
- BUFFER *destination_wb;
STRING *function;
- void (*callback)(BUFFER *wb, int code, void *callback_data);
- void *callback_data;
+ BUFFER *result_body_wb;
+ rrd_function_result_callback_t result_cb;
+ void *result_cb_data;
usec_t timeout_ut;
usec_t started_ut;
usec_t sent_ut;
const char *payload;
+ PARSER *parser;
+ bool virtual;
};
static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void *func, void *parser_ptr) {
@@ -751,42 +926,44 @@ static void inflight_functions_insert_callback(const DICTIONARY_ITEM *item, void
// leave this code as default, so that when the dictionary is destroyed this will be sent back to the caller
pf->code = HTTP_RESP_GATEWAY_TIMEOUT;
+ const char *transaction = dictionary_acquired_item_name(item);
+
char buffer[2048 + 1];
- snprintfz(buffer, 2048, "%s %s %d \"%s\"\n",
+ snprintfz(buffer, sizeof(buffer) - 1, "%s %s %d \"%s\"\n",
pf->payload ? "FUNCTION_PAYLOAD" : "FUNCTION",
- dictionary_acquired_item_name(item),
+ transaction,
pf->timeout,
string2str(pf->function));
// send the command to the plugin
- int ret = send_to_plugin(buffer, parser);
+ ssize_t ret = send_to_plugin(buffer, parser);
pf->sent_ut = now_realtime_usec();
if(ret < 0) {
- netdata_log_error("FUNCTION: failed to send function to plugin, error %d", ret);
- rrd_call_function_error(pf->destination_wb, "Failed to communicate with collector", HTTP_RESP_BACKEND_FETCH_FAILED);
+ netdata_log_error("FUNCTION '%s': failed to send it to the plugin, error %zd", string2str(pf->function), ret);
+ rrd_call_function_error(pf->result_body_wb, "Failed to communicate with collector", HTTP_RESP_SERVICE_UNAVAILABLE);
}
else {
internal_error(LOG_FUNCTIONS,
- "FUNCTION '%s' with transaction '%s' sent to collector (%d bytes, in %llu usec)",
+ "FUNCTION '%s' with transaction '%s' sent to collector (%zd bytes, in %"PRIu64" usec)",
string2str(pf->function), dictionary_acquired_item_name(item), ret,
pf->sent_ut - pf->started_ut);
}
if (!pf->payload)
return;
-
+
// send the payload to the plugin
ret = send_to_plugin(pf->payload, parser);
if(ret < 0) {
- netdata_log_error("FUNCTION_PAYLOAD: failed to send function to plugin, error %d", ret);
- rrd_call_function_error(pf->destination_wb, "Failed to communicate with collector", HTTP_RESP_BACKEND_FETCH_FAILED);
+ netdata_log_error("FUNCTION_PAYLOAD '%s': failed to send function to plugin, error %zd", string2str(pf->function), ret);
+ rrd_call_function_error(pf->result_body_wb, "Failed to communicate with collector", HTTP_RESP_SERVICE_UNAVAILABLE);
}
else {
internal_error(LOG_FUNCTIONS,
- "FUNCTION_PAYLOAD '%s' with transaction '%s' sent to collector (%d bytes, in %llu usec)",
+ "FUNCTION_PAYLOAD '%s' with transaction '%s' sent to collector (%zd bytes, in %"PRIu64" usec)",
string2str(pf->function), dictionary_acquired_item_name(item), ret,
pf->sent_ut - pf->started_ut);
}
@@ -798,23 +975,90 @@ static bool inflight_functions_conflict_callback(const DICTIONARY_ITEM *item __m
struct inflight_function *pf = new_func;
netdata_log_error("PLUGINSD_PARSER: duplicate UUID on pending function '%s' detected. Ignoring the second one.", string2str(pf->function));
- pf->code = rrd_call_function_error(pf->destination_wb, "This request is already in progress", HTTP_RESP_BAD_REQUEST);
- pf->callback(pf->destination_wb, pf->code, pf->callback_data);
+ pf->code = rrd_call_function_error(pf->result_body_wb, "This request is already in progress", HTTP_RESP_BAD_REQUEST);
+ pf->result_cb(pf->result_body_wb, pf->code, pf->result_cb_data);
string_freez(pf->function);
return false;
}
-static void inflight_functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func, void *parser_ptr __maybe_unused) {
+void delete_job_finalize(struct parser *parser __maybe_unused, struct configurable_plugin *plug, const char *fnc_sig, int code) {
+ if (code != DYNCFG_VFNC_RET_CFG_ACCEPTED)
+ return;
+
+ char *params_local = strdupz(fnc_sig);
+ char *words[DYNCFG_MAX_WORDS];
+ size_t words_c = quoted_strings_splitter(params_local, words, DYNCFG_MAX_WORDS, isspace_map_pluginsd);
+
+ if (words_c != 3) {
+ netdata_log_error("PLUGINSD_PARSER: invalid number of parameters for delete_job");
+ freez(params_local);
+ return;
+ }
+
+ const char *module = words[1];
+ const char *job = words[2];
+
+ delete_job(plug, module, job);
+
+ unlink_job(plug->name, module, job);
+
+ rrdpush_send_job_deleted(localhost, plug->name, module, job);
+
+ freez(params_local);
+}
+
+void set_job_finalize(struct parser *parser __maybe_unused, struct configurable_plugin *plug __maybe_unused, const char *fnc_sig, int code) {
+ if (code != DYNCFG_VFNC_RET_CFG_ACCEPTED)
+ return;
+
+ char *params_local = strdupz(fnc_sig);
+ char *words[DYNCFG_MAX_WORDS];
+ size_t words_c = quoted_strings_splitter(params_local, words, DYNCFG_MAX_WORDS, isspace_map_pluginsd);
+
+ if (words_c != 3) {
+ netdata_log_error("PLUGINSD_PARSER: invalid number of parameters for set_job_config");
+ freez(params_local);
+ return;
+ }
+
+ const char *module_name = get_word(words, words_c, 1);
+ const char *job_name = get_word(words, words_c, 2);
+
+ if (register_job(parser->user.host->configurable_plugins, parser->user.cd->configuration->name, module_name, job_name, JOB_TYPE_USER, JOB_FLG_USER_CREATED, 1)) {
+ freez(params_local);
+ return;
+ }
+
+ // only send this if it is not existing already (register_job cares for that)
+ rrdpush_send_dyncfg_reg_job(localhost, parser->user.cd->configuration->name, module_name, job_name, JOB_TYPE_USER, JOB_FLG_USER_CREATED);
+
+ freez(params_local);
+}
+
+static void inflight_functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func, void *parser_ptr) {
struct inflight_function *pf = func;
+ struct parser *parser = (struct parser *)parser_ptr;
internal_error(LOG_FUNCTIONS,
- "FUNCTION '%s' result of transaction '%s' received from collector (%zu bytes, request %llu usec, response %llu usec)",
+ "FUNCTION '%s' result of transaction '%s' received from collector (%zu bytes, request %"PRIu64" usec, response %"PRIu64" usec)",
string2str(pf->function), dictionary_acquired_item_name(item),
- buffer_strlen(pf->destination_wb), pf->sent_ut - pf->started_ut, now_realtime_usec() - pf->sent_ut);
+ buffer_strlen(pf->result_body_wb), pf->sent_ut - pf->started_ut, now_realtime_usec() - pf->sent_ut);
+
+ if (pf->virtual && SERVING_PLUGINSD(parser)) {
+ if (pf->payload) {
+ if (strncmp(string2str(pf->function), FUNCTION_NAME_SET_JOB_CONFIG, strlen(FUNCTION_NAME_SET_JOB_CONFIG)) == 0)
+ set_job_finalize(parser, parser->user.cd->configuration, string2str(pf->function), pf->code);
+ dyn_conf_store_config(string2str(pf->function), pf->payload, parser->user.cd->configuration);
+ } else if (strncmp(string2str(pf->function), FUNCTION_NAME_DELETE_JOB, strlen(FUNCTION_NAME_DELETE_JOB)) == 0) {
+ delete_job_finalize(parser, parser->user.cd->configuration, string2str(pf->function), pf->code);
+ }
+ }
+
+ pf->result_cb(pf->result_body_wb, pf->code, pf->result_cb_data);
- pf->callback(pf->destination_wb, pf->code, pf->callback_data);
string_freez(pf->function);
+ freez((void *)pf->payload);
}
void inflight_functions_init(PARSER *parser) {
@@ -830,11 +1074,11 @@ static void inflight_functions_garbage_collect(PARSER *parser, usec_t now) {
dfe_start_write(parser->inflight.functions, pf) {
if (pf->timeout_ut < now) {
internal_error(true,
- "FUNCTION '%s' removing expired transaction '%s', after %llu usec.",
+ "FUNCTION '%s' removing expired transaction '%s', after %"PRIu64" usec.",
string2str(pf->function), pf_dfe.name, now - pf->started_ut);
- if(!buffer_strlen(pf->destination_wb) || pf->code == HTTP_RESP_OK)
- pf->code = rrd_call_function_error(pf->destination_wb,
+ if(!buffer_strlen(pf->result_body_wb) || pf->code == HTTP_RESP_OK)
+ pf->code = rrd_call_function_error(pf->result_body_wb,
"Timeout waiting for collector response.",
HTTP_RESP_GATEWAY_TIMEOUT);
@@ -847,35 +1091,74 @@ static void inflight_functions_garbage_collect(PARSER *parser, usec_t now) {
dfe_done(pf);
}
+void pluginsd_function_cancel(void *data) {
+ struct inflight_function *look_for = data, *t;
+
+ bool sent = false;
+ dfe_start_read(look_for->parser->inflight.functions, t) {
+ if(look_for == t) {
+ const char *transaction = t_dfe.name;
+
+ internal_error(true, "PLUGINSD: sending function cancellation to plugin for transaction '%s'", transaction);
+
+ char buffer[2048 + 1];
+ snprintfz(buffer, sizeof(buffer) - 1, "%s %s\n",
+ PLUGINSD_KEYWORD_FUNCTION_CANCEL,
+ transaction);
+
+ // send the command to the plugin
+ ssize_t ret = send_to_plugin(buffer, t->parser);
+ if(ret < 0)
+ sent = true;
+
+ break;
+ }
+ }
+ dfe_done(t);
+
+ if(sent <= 0)
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "PLUGINSD: FUNCTION_CANCEL request didn't match any pending function requests in pluginsd.d.");
+}
+
// this is the function that is called from
// rrd_call_function_and_wait() and rrd_call_function_async()
-static int pluginsd_execute_function_callback(BUFFER *destination_wb, int timeout, const char *function, void *collector_data, void (*callback)(BUFFER *wb, int code, void *callback_data), void *callback_data) {
- PARSER *parser = collector_data;
+static int pluginsd_function_execute_cb(BUFFER *result_body_wb, int timeout, const char *function,
+ void *execute_cb_data,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb __maybe_unused,
+ void *is_cancelled_cb_data __maybe_unused,
+ rrd_function_register_canceller_cb_t register_canceller_cb,
+ void *register_canceller_db_data) {
+ PARSER *parser = execute_cb_data;
usec_t now = now_realtime_usec();
struct inflight_function tmp = {
.started_ut = now,
- .timeout_ut = now + timeout * USEC_PER_SEC,
- .destination_wb = destination_wb,
+ .timeout_ut = now + timeout * USEC_PER_SEC + RRDFUNCTIONS_TIMEOUT_EXTENSION_UT,
+ .result_body_wb = result_body_wb,
.timeout = timeout,
.function = string_strdupz(function),
- .callback = callback,
- .callback_data = callback_data,
- .payload = NULL
+ .result_cb = result_cb,
+ .result_cb_data = result_cb_data,
+ .payload = NULL,
+ .parser = parser,
};
uuid_t uuid;
- uuid_generate_time(uuid);
+ uuid_generate_random(uuid);
- char key[UUID_STR_LEN];
- uuid_unparse_lower(uuid, key);
+ char transaction[UUID_STR_LEN];
+ uuid_unparse_lower(uuid, transaction);
dictionary_write_lock(parser->inflight.functions);
// if there is any error, our dictionary callbacks will call the caller callback to notify
// the caller about the error - no need for error handling here.
- dictionary_set(parser->inflight.functions, key, &tmp, sizeof(struct inflight_function));
+ void *t = dictionary_set(parser->inflight.functions, transaction, &tmp, sizeof(struct inflight_function));
+ if(register_canceller_cb)
+ register_canceller_cb(register_canceller_db_data, pluginsd_function_cancel, t);
if(!parser->inflight.smaller_timeout || tmp.timeout_ut < parser->inflight.smaller_timeout)
parser->inflight.smaller_timeout = tmp.timeout_ut;
@@ -890,6 +1173,8 @@ static int pluginsd_execute_function_callback(BUFFER *destination_wb, int timeou
}
static inline PARSER_RC pluginsd_function(char **words, size_t num_words, PARSER *parser) {
+ // a plugin or a child is registering a function
+
bool global = false;
size_t i = 1;
if(num_words >= 2 && strcmp(get_word(words, num_words, 1), "GLOBAL") == 0) {
@@ -926,7 +1211,7 @@ static inline PARSER_RC pluginsd_function(char **words, size_t num_words, PARSER
timeout = PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT;
}
- rrd_collector_add_function(host, st, name, timeout, help, false, pluginsd_execute_function_callback, parser);
+ rrd_function_add(host, st, name, timeout, help, false, pluginsd_function_execute_cb, parser);
parser->user.data_collections_count++;
@@ -973,18 +1258,18 @@ static inline PARSER_RC pluginsd_function_result_begin(char **words, size_t num_
}
else {
if(format && *format)
- pf->destination_wb->content_type = functions_format_to_content_type(format);
+ pf->result_body_wb->content_type = functions_format_to_content_type(format);
pf->code = code;
- pf->destination_wb->expires = expiration;
+ pf->result_body_wb->expires = expiration;
if(expiration <= now_realtime_sec())
- buffer_no_cacheable(pf->destination_wb);
+ buffer_no_cacheable(pf->result_body_wb);
else
- buffer_cacheable(pf->destination_wb);
+ buffer_cacheable(pf->result_body_wb);
}
- parser->defer.response = (pf) ? pf->destination_wb : NULL;
+ parser->defer.response = (pf) ? pf->result_body_wb : NULL;
parser->defer.end_keyword = PLUGINSD_KEYWORD_FUNCTION_RESULT_END;
parser->defer.action = pluginsd_function_result_end;
parser->defer.action_data = string_strdupz(key); // it is ok is key is NULL
@@ -1133,6 +1418,15 @@ static inline PARSER_RC pluginsd_label(char **words, size_t num_words, PARSER *p
if(unlikely(!(parser->user.new_host_labels)))
parser->user.new_host_labels = rrdlabels_create();
+ if (strcmp(name,HOST_LABEL_IS_EPHEMERAL) == 0) {
+ int is_ephemeral = appconfig_test_boolean_value((char *) value);
+ if (is_ephemeral) {
+ RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_LABEL);
+ if (likely(host))
+ rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST);
+ }
+ }
+
rrdlabels_add(parser->user.new_host_labels, name, store, str2l(label_source));
if (allocated_store)
@@ -1151,6 +1445,8 @@ static inline PARSER_RC pluginsd_overwrite(char **words __maybe_unused, size_t n
host->rrdlabels = rrdlabels_create();
rrdlabels_migrate_to_these(host->rrdlabels, parser->user.new_host_labels);
+ if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST))
+ rrdlabels_add(host->rrdlabels, HOST_LABEL_IS_EPHEMERAL, "true", RRDLABEL_SRC_CONFIG);
rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE);
rrdlabels_destroy(parser->user.new_host_labels);
@@ -1163,7 +1459,7 @@ static inline PARSER_RC pluginsd_clabel(char **words, size_t num_words, PARSER *
const char *value = get_word(words, num_words, 2);
const char *label_source = get_word(words, num_words, 3);
- if (!name || !value || !*label_source) {
+ if (!name || !value || !label_source) {
netdata_log_error("Ignoring malformed or empty CHART LABEL command.");
return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
}
@@ -1197,16 +1493,21 @@ static inline PARSER_RC pluginsd_clabel_commit(char **words __maybe_unused, size
rrdset_flag_set(st, RRDSET_FLAG_METADATA_UPDATE);
rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_METADATA_UPDATE);
+ rrdset_metadata_updated(st);
parser->user.chart_rrdlabels_linked_temporarily = NULL;
return PARSER_RC_OK;
}
static inline PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PARSER *parser) {
- char *id = get_word(words, num_words, 1);
- char *start_time_str = get_word(words, num_words, 2);
- char *end_time_str = get_word(words, num_words, 3);
- char *child_now_str = get_word(words, num_words, 4);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *id = get_word(words, num_words, idx++);
+ char *start_time_str = get_word(words, num_words, idx++);
+ char *end_time_str = get_word(words, num_words, idx++);
+ char *child_now_str = get_word(words, num_words, idx++);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -1215,7 +1516,7 @@ static inline PARSER_RC pluginsd_replay_begin(char **words, size_t num_words, PA
if (likely(!id || !*id))
st = pluginsd_require_scope_chart(parser, PLUGINSD_KEYWORD_REPLAY_BEGIN, PLUGINSD_KEYWORD_REPLAY_BEGIN);
else
- st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_REPLAY_BEGIN);
if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -1330,9 +1631,13 @@ static inline SN_FLAGS pluginsd_parse_storage_number_flags(const char *flags_str
}
static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARSER *parser) {
- char *dimension = get_word(words, num_words, 1);
- char *value_str = get_word(words, num_words, 2);
- char *flags_str = get_word(words, num_words, 3);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *dimension = get_word(words, num_words, idx++);
+ char *value_str = get_word(words, num_words, idx++);
+ char *flags_str = get_word(words, num_words, idx++);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_SET);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -1341,15 +1646,16 @@ static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARS
if(!st) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
if(!parser->user.replay.rset_enabled) {
- error_limit_static_thread_var(erl, 1, 0);
- error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors",
- rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN);
+ nd_log_limit_static_thread_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_ERR,
+ "PLUGINSD: 'host:%s/chart:%s' got a %s but it is disabled by %s errors",
+ rrdhost_hostname(host), rrdset_id(st), PLUGINSD_KEYWORD_REPLAY_SET, PLUGINSD_KEYWORD_REPLAY_BEGIN);
// we have to return OK here
return PARSER_RC_OK;
}
- RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_SET);
+ RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_SET);
if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
st->pluginsd.set = true;
@@ -1390,8 +1696,10 @@ static inline PARSER_RC pluginsd_replay_set(char **words, size_t num_words, PARS
rd->collector.counter++;
}
else {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. Ignoring data.",
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_COLLECTORS, NDLP_WARNING,
+ "PLUGINSD: 'host:%s/chart:%s/dim:%s' has the ARCHIVED flag set, but it is replicated. "
+ "Ignoring data.",
rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_name(rd));
}
}
@@ -1403,11 +1711,15 @@ static inline PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, si
if(parser->user.replay.rset_enabled == false)
return PARSER_RC_OK;
- char *dimension = get_word(words, num_words, 1);
- char *last_collected_ut_str = get_word(words, num_words, 2);
- char *last_collected_value_str = get_word(words, num_words, 3);
- char *last_calculated_value_str = get_word(words, num_words, 4);
- char *last_stored_value_str = get_word(words, num_words, 5);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *dimension = get_word(words, num_words, idx++);
+ char *last_collected_ut_str = get_word(words, num_words, idx++);
+ char *last_collected_value_str = get_word(words, num_words, idx++);
+ char *last_calculated_value_str = get_word(words, num_words, idx++);
+ char *last_stored_value_str = get_word(words, num_words, idx++);
RRDHOST *host = pluginsd_require_scope_host(parser, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
if(!host) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
@@ -1421,7 +1733,7 @@ static inline PARSER_RC pluginsd_replay_rrddim_collection_state(char **words, si
st->pluginsd.set = false;
}
- RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
+ RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE);
if(!rd) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
usec_t dim_last_collected_ut = (usec_t)rd->collector.last_collected_time.tv_sec * USEC_PER_SEC + (usec_t)rd->collector.last_collected_time.tv_usec;
@@ -1585,10 +1897,14 @@ static inline PARSER_RC pluginsd_replay_end(char **words, size_t num_words, PARS
static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER *parser) {
timing_init();
- char *id = get_word(words, num_words, 1);
- char *update_every_str = get_word(words, num_words, 2);
- char *end_time_str = get_word(words, num_words, 3);
- char *wall_clock_time_str = get_word(words, num_words, 4);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *id = get_word(words, num_words, idx++);
+ char *update_every_str = get_word(words, num_words, idx++);
+ char *end_time_str = get_word(words, num_words, idx++);
+ char *wall_clock_time_str = get_word(words, num_words, idx++);
if(unlikely(!id || !update_every_str || !end_time_str || !wall_clock_time_str))
return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_BEGIN_V2, "missing parameters");
@@ -1598,14 +1914,15 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER
timing_step(TIMING_STEP_BEGIN2_PREPARE);
- RRDSET *st = pluginsd_find_chart(host, id, PLUGINSD_KEYWORD_BEGIN_V2);
+ RRDSET *st = pluginsd_rrdset_cache_get_from_slot(parser, host, id, slot, PLUGINSD_KEYWORD_BEGIN_V2);
+
if(unlikely(!st)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
if(!pluginsd_set_scope_chart(parser, st, PLUGINSD_KEYWORD_BEGIN_V2))
return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
- if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE | RRDSET_FLAG_ARCHIVED)))
- rrdset_isnot_obsolete(st);
+ if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))
+ rrdset_isnot_obsolete___safe_from_collector_thread(st);
timing_step(TIMING_STEP_BEGIN2_FIND_CHART);
@@ -1645,9 +1962,12 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER
parser->user.v2.stream_buffer = rrdset_push_metric_initialize(parser->user.st, wall_clock_time);
if(parser->user.v2.stream_buffer.v2 && parser->user.v2.stream_buffer.wb) {
- // check if receiver and sender have the same number parsing capabilities
+ // check receiver capabilities
bool can_copy = stream_has_capability(&parser->user, STREAM_CAP_IEEE754) == stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754);
- NUMBER_ENCODING encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX;
+
+ // check sender capabilities
+ bool with_slots = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_SLOTS) ? true : false;
+ NUMBER_ENCODING integer_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX;
BUFFER *wb = parser->user.v2.stream_buffer.wb;
@@ -1656,28 +1976,35 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER
if(unlikely(parser->user.v2.stream_buffer.begin_v2_added))
buffer_fast_strcat(wb, PLUGINSD_KEYWORD_END_V2 "\n", sizeof(PLUGINSD_KEYWORD_END_V2) - 1 + 1);
- buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2 " '", sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1 + 2);
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_BEGIN_V2, sizeof(PLUGINSD_KEYWORD_BEGIN_V2) - 1);
+
+ if(with_slots) {
+ buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2);
+ buffer_print_uint64_encoded(wb, integer_encoding, st->rrdpush.sender.chart_slot);
+ }
+
+ buffer_fast_strcat(wb, " '", 2);
buffer_fast_strcat(wb, rrdset_id(st), string_strlen(st->id));
buffer_fast_strcat(wb, "' ", 2);
if(can_copy)
buffer_strcat(wb, update_every_str);
else
- buffer_print_uint64_encoded(wb, encoding, update_every);
+ buffer_print_uint64_encoded(wb, integer_encoding, update_every);
buffer_fast_strcat(wb, " ", 1);
if(can_copy)
buffer_strcat(wb, end_time_str);
else
- buffer_print_uint64_encoded(wb, encoding, end_time);
+ buffer_print_uint64_encoded(wb, integer_encoding, end_time);
buffer_fast_strcat(wb, " ", 1);
if(can_copy)
buffer_strcat(wb, wall_clock_time_str);
else
- buffer_print_uint64_encoded(wb, encoding, wall_clock_time);
+ buffer_print_uint64_encoded(wb, integer_encoding, wall_clock_time);
buffer_fast_strcat(wb, "\n", 1);
@@ -1710,10 +2037,14 @@ static inline PARSER_RC pluginsd_begin_v2(char **words, size_t num_words, PARSER
static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER *parser) {
timing_init();
- char *dimension = get_word(words, num_words, 1);
- char *collected_str = get_word(words, num_words, 2);
- char *value_str = get_word(words, num_words, 3);
- char *flags_str = get_word(words, num_words, 4);
+ int idx = 1;
+ ssize_t slot = pluginsd_parse_rrd_slot(words, num_words);
+ if(slot >= 0) idx++;
+
+ char *dimension = get_word(words, num_words, idx++);
+ char *collected_str = get_word(words, num_words, idx++);
+ char *value_str = get_word(words, num_words, idx++);
+ char *flags_str = get_word(words, num_words, idx++);
if(unlikely(!dimension || !collected_str || !value_str || !flags_str))
return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_SET_V2, "missing parameters");
@@ -1726,13 +2057,13 @@ static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER *
timing_step(TIMING_STEP_SET2_PREPARE);
- RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, PLUGINSD_KEYWORD_SET_V2);
+ RRDDIM *rd = pluginsd_acquire_dimension(host, st, dimension, slot, PLUGINSD_KEYWORD_SET_V2);
if(unlikely(!rd)) return PLUGINSD_DISABLE_PLUGIN(parser, NULL, NULL);
st->pluginsd.set = true;
if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE | RRDDIM_FLAG_ARCHIVED)))
- rrddim_isnot_obsolete(st, rd);
+ rrddim_isnot_obsolete___safe_from_collector_thread(st, rd);
timing_step(TIMING_STEP_SET2_LOOKUP_DIMENSION);
@@ -1778,12 +2109,22 @@ static inline PARSER_RC pluginsd_set_v2(char **words, size_t num_words, PARSER *
if(parser->user.v2.stream_buffer.v2 && parser->user.v2.stream_buffer.begin_v2_added && parser->user.v2.stream_buffer.wb) {
// check if receiver and sender have the same number parsing capabilities
bool can_copy = stream_has_capability(&parser->user, STREAM_CAP_IEEE754) == stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754);
+
+ // check the sender capabilities
+ bool with_slots = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_SLOTS) ? true : false;
NUMBER_ENCODING integer_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_HEX;
NUMBER_ENCODING doubles_encoding = stream_has_capability(&parser->user.v2.stream_buffer, STREAM_CAP_IEEE754) ? NUMBER_ENCODING_BASE64 : NUMBER_ENCODING_DECIMAL;
BUFFER *wb = parser->user.v2.stream_buffer.wb;
buffer_need_bytes(wb, 1024);
- buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2 " '", sizeof(PLUGINSD_KEYWORD_SET_V2) - 1 + 2);
+ buffer_fast_strcat(wb, PLUGINSD_KEYWORD_SET_V2, sizeof(PLUGINSD_KEYWORD_SET_V2) - 1);
+
+ if(with_slots) {
+ buffer_fast_strcat(wb, " "PLUGINSD_KEYWORD_SLOT":", sizeof(PLUGINSD_KEYWORD_SLOT) - 1 + 2);
+ buffer_print_uint64_encoded(wb, integer_encoding, rd->rrdpush.sender.dim_slot);
+ }
+
+ buffer_fast_strcat(wb, " '", 2);
buffer_fast_strcat(wb, rrddim_id(rd), string_strlen(rd->id));
buffer_fast_strcat(wb, "' ", 2);
if(can_copy)
@@ -1864,13 +2205,27 @@ static inline PARSER_RC pluginsd_end_v2(char **words __maybe_unused, size_t num_
// ------------------------------------------------------------------------
// cleanup RRDSET / RRDDIM
- RRDDIM *rd;
- rrddim_foreach_read(rd, st) {
- rd->collector.calculated_value = 0;
- rd->collector.collected_value = 0;
- rrddim_clear_updated(rd);
+ if(likely(st->pluginsd.dims_with_slots)) {
+ for(size_t i = 0; i < st->pluginsd.size ;i++) {
+ RRDDIM *rd = st->pluginsd.prd_array[i].rd;
+
+ if(!rd)
+ continue;
+
+ rd->collector.calculated_value = 0;
+ rd->collector.collected_value = 0;
+ rrddim_clear_updated(rd);
+ }
+ }
+ else {
+ RRDDIM *rd;
+ rrddim_foreach_read(rd, st){
+ rd->collector.calculated_value = 0;
+ rd->collector.collected_value = 0;
+ rrddim_clear_updated(rd);
+ }
+ rrddim_foreach_done(rd);
}
- rrddim_foreach_done(rd);
// ------------------------------------------------------------------------
// reset state
@@ -1894,7 +2249,7 @@ struct mutex_cond {
int rc;
};
-static void virt_fnc_got_data_cb(BUFFER *wb, int code, void *callback_data)
+static void virt_fnc_got_data_cb(BUFFER *wb __maybe_unused, int code, void *callback_data)
{
struct mutex_cond *ctx = callback_data;
pthread_mutex_lock(&ctx->lock);
@@ -1904,9 +2259,81 @@ static void virt_fnc_got_data_cb(BUFFER *wb, int code, void *callback_data)
}
#define VIRT_FNC_TIMEOUT 1
+#define VIRT_FNC_BUF_SIZE (4096)
+void call_virtual_function_async(BUFFER *wb, RRDHOST *host, const char *name, const char *payload, rrd_function_result_callback_t callback, void *callback_data) {
+ PARSER *parser = NULL;
+
+ //TODO simplify (as we really need only first parameter to get plugin name maybe we can avoid parsing all)
+ char *words[PLUGINSD_MAX_WORDS];
+ char *function_with_params = strdupz(name);
+ size_t num_words = quoted_strings_splitter(function_with_params, words, PLUGINSD_MAX_WORDS, isspace_map_pluginsd);
+
+ if (num_words < 2) {
+ netdata_log_error("PLUGINSD: virtual function name is empty.");
+ freez(function_with_params);
+ return;
+ }
+
+ const DICTIONARY_ITEM *cpi = dictionary_get_and_acquire_item(host->configurable_plugins, get_word(words, num_words, 1));
+ if (unlikely(cpi == NULL)) {
+ netdata_log_error("PLUGINSD: virtual function plugin '%s' not found.", name);
+ freez(function_with_params);
+ return;
+ }
+ struct configurable_plugin *cp = dictionary_acquired_item_value(cpi);
+ parser = (PARSER *)cp->cb_usr_ctx;
+
+ BUFFER *function_out = buffer_create(VIRT_FNC_BUF_SIZE, NULL);
+ // if we are forwarding this to a plugin (as opposed to streaming/child) we have to remove the first parameter (plugin_name)
+ buffer_strcat(function_out, get_word(words, num_words, 0));
+ for (size_t i = 1; i < num_words; i++) {
+ if (i == 1 && SERVING_PLUGINSD(parser))
+ continue;
+ buffer_sprintf(function_out, " %s", get_word(words, num_words, i));
+ }
+ freez(function_with_params);
+
+ usec_t now = now_realtime_usec();
+
+ struct inflight_function tmp = {
+ .started_ut = now,
+ .timeout_ut = now + VIRT_FNC_TIMEOUT + USEC_PER_SEC,
+ .result_body_wb = wb,
+ .timeout = VIRT_FNC_TIMEOUT * 10,
+ .function = string_strdupz(buffer_tostring(function_out)),
+ .result_cb = callback,
+ .result_cb_data = callback_data,
+ .payload = payload != NULL ? strdupz(payload) : NULL,
+ .virtual = true,
+ };
+ buffer_free(function_out);
+
+ uuid_t uuid;
+ uuid_generate_time(uuid);
+
+ char key[UUID_STR_LEN];
+ uuid_unparse_lower(uuid, key);
+
+ dictionary_write_lock(parser->inflight.functions);
+
+ // if there is any error, our dictionary callbacks will call the caller callback to notify
+ // the caller about the error - no need for error handling here.
+ dictionary_set(parser->inflight.functions, key, &tmp, sizeof(struct inflight_function));
+
+ if(!parser->inflight.smaller_timeout || tmp.timeout_ut < parser->inflight.smaller_timeout)
+ parser->inflight.smaller_timeout = tmp.timeout_ut;
+
+ // garbage collect stale inflight functions
+ if(parser->inflight.smaller_timeout < now)
+ inflight_functions_garbage_collect(parser, now);
+
+ dictionary_write_unlock(parser->inflight.functions);
+}
+
+
dyncfg_config_t call_virtual_function_blocking(PARSER *parser, const char *name, int *rc, const char *payload) {
usec_t now = now_realtime_usec();
- BUFFER *wb = buffer_create(4096, NULL);
+ BUFFER *wb = buffer_create(VIRT_FNC_BUF_SIZE, NULL);
struct mutex_cond cond = {
.lock = PTHREAD_MUTEX_INITIALIZER,
@@ -1916,12 +2343,13 @@ dyncfg_config_t call_virtual_function_blocking(PARSER *parser, const char *name,
struct inflight_function tmp = {
.started_ut = now,
.timeout_ut = now + VIRT_FNC_TIMEOUT + USEC_PER_SEC,
- .destination_wb = wb,
+ .result_body_wb = wb,
.timeout = VIRT_FNC_TIMEOUT,
.function = string_strdupz(name),
- .callback = virt_fnc_got_data_cb,
- .callback_data = &cond,
- .payload = payload,
+ .result_cb = virt_fnc_got_data_cb,
+ .result_cb_data = &cond,
+ .payload = payload != NULL ? strdupz(payload) : NULL,
+ .virtual = true,
};
uuid_t uuid;
@@ -1968,98 +2396,188 @@ dyncfg_config_t call_virtual_function_blocking(PARSER *parser, const char *name,
return cfg;
}
-static dyncfg_config_t get_plugin_config_cb(void *usr_ctx)
+#define CVF_MAX_LEN (1024)
+static dyncfg_config_t get_plugin_config_cb(void *usr_ctx, const char *plugin_name)
{
PARSER *parser = usr_ctx;
- return call_virtual_function_blocking(parser, "get_plugin_config", NULL, NULL);
+
+ if (SERVING_STREAMING(parser)) {
+ char buf[CVF_MAX_LEN + 1];
+ snprintfz(buf, CVF_MAX_LEN, FUNCTION_NAME_GET_PLUGIN_CONFIG " %s", plugin_name);
+ return call_virtual_function_blocking(parser, buf, NULL, NULL);
+ }
+
+ return call_virtual_function_blocking(parser, FUNCTION_NAME_GET_PLUGIN_CONFIG, NULL, NULL);
}
-static dyncfg_config_t get_plugin_config_schema_cb(void *usr_ctx)
+static dyncfg_config_t get_plugin_config_schema_cb(void *usr_ctx, const char *plugin_name)
{
PARSER *parser = usr_ctx;
+
+ if (SERVING_STREAMING(parser)) {
+ char buf[CVF_MAX_LEN + 1];
+ snprintfz(buf, CVF_MAX_LEN, FUNCTION_NAME_GET_PLUGIN_CONFIG_SCHEMA " %s", plugin_name);
+ return call_virtual_function_blocking(parser, buf, NULL, NULL);
+ }
+
return call_virtual_function_blocking(parser, "get_plugin_config_schema", NULL, NULL);
}
-static dyncfg_config_t get_module_config_cb(void *usr_ctx, const char *module_name)
+static dyncfg_config_t get_module_config_cb(void *usr_ctx, const char *plugin_name, const char *module_name)
{
PARSER *parser = usr_ctx;
- char buf[1024];
- snprintfz(buf, sizeof(buf), "get_module_config %s", module_name);
- return call_virtual_function_blocking(parser, buf, NULL, NULL);
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_GET_MODULE_CONFIG);
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s", module_name);
+
+ dyncfg_config_t ret = call_virtual_function_blocking(parser, buffer_tostring(wb), NULL, NULL);
+
+ buffer_free(wb);
+
+ return ret;
}
-static dyncfg_config_t get_module_config_schema_cb(void *usr_ctx, const char *module_name)
+static dyncfg_config_t get_module_config_schema_cb(void *usr_ctx, const char *plugin_name, const char *module_name)
{
PARSER *parser = usr_ctx;
- char buf[1024];
- snprintfz(buf, sizeof(buf), "get_module_config_schema %s", module_name);
- return call_virtual_function_blocking(parser, buf, NULL, NULL);
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_GET_MODULE_CONFIG_SCHEMA);
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s", module_name);
+
+ dyncfg_config_t ret = call_virtual_function_blocking(parser, buffer_tostring(wb), NULL, NULL);
+
+ buffer_free(wb);
+
+ return ret;
}
-static dyncfg_config_t get_job_config_schema_cb(void *usr_ctx, const char *module_name)
+static dyncfg_config_t get_job_config_schema_cb(void *usr_ctx, const char *plugin_name, const char *module_name)
{
PARSER *parser = usr_ctx;
- char buf[1024];
- snprintfz(buf, sizeof(buf), "get_job_config_schema %s", module_name);
- return call_virtual_function_blocking(parser, buf, NULL, NULL);
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_GET_JOB_CONFIG_SCHEMA);
+
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s", module_name);
+
+ dyncfg_config_t ret = call_virtual_function_blocking(parser, buffer_tostring(wb), NULL, NULL);
+
+ buffer_free(wb);
+
+ return ret;
}
-static dyncfg_config_t get_job_config_cb(void *usr_ctx, const char *module_name, const char* job_name)
+static dyncfg_config_t get_job_config_cb(void *usr_ctx, const char *plugin_name, const char *module_name, const char* job_name)
{
PARSER *parser = usr_ctx;
- char buf[1024];
- snprintfz(buf, sizeof(buf), "get_job_config %s %s", module_name, job_name);
- return call_virtual_function_blocking(parser, buf, NULL, NULL);
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_GET_JOB_CONFIG);
+
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s %s", module_name, job_name);
+
+ dyncfg_config_t ret = call_virtual_function_blocking(parser, buffer_tostring(wb), NULL, NULL);
+
+ buffer_free(wb);
+
+ return ret;
}
-enum set_config_result set_plugin_config_cb(void *usr_ctx, dyncfg_config_t *cfg)
+enum set_config_result set_plugin_config_cb(void *usr_ctx, const char *plugin_name, dyncfg_config_t *cfg)
{
PARSER *parser = usr_ctx;
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_SET_PLUGIN_CONFIG);
+
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
int rc;
- call_virtual_function_blocking(parser, "set_plugin_config", &rc, cfg->data);
- if(rc != 1)
+ call_virtual_function_blocking(parser, buffer_tostring(wb), &rc, cfg->data);
+
+ buffer_free(wb);
+ if(rc != DYNCFG_VFNC_RET_CFG_ACCEPTED)
return SET_CONFIG_REJECTED;
return SET_CONFIG_ACCEPTED;
}
-enum set_config_result set_module_config_cb(void *usr_ctx, const char *module_name, dyncfg_config_t *cfg)
+enum set_config_result set_module_config_cb(void *usr_ctx, const char *plugin_name, const char *module_name, dyncfg_config_t *cfg)
{
PARSER *parser = usr_ctx;
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_SET_MODULE_CONFIG);
+
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s", module_name);
+
int rc;
+ call_virtual_function_blocking(parser, buffer_tostring(wb), &rc, cfg->data);
- char buf[1024];
- snprintfz(buf, sizeof(buf), "set_module_config %s", module_name);
- call_virtual_function_blocking(parser, buf, &rc, cfg->data);
+ buffer_free(wb);
- if(rc != 1)
+ if(rc != DYNCFG_VFNC_RET_CFG_ACCEPTED)
return SET_CONFIG_REJECTED;
return SET_CONFIG_ACCEPTED;
}
-enum set_config_result set_job_config_cb(void *usr_ctx, const char *module_name, const char *job_name, dyncfg_config_t *cfg)
+enum set_config_result set_job_config_cb(void *usr_ctx, const char *plugin_name, const char *module_name, const char *job_name, dyncfg_config_t *cfg)
{
PARSER *parser = usr_ctx;
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_SET_JOB_CONFIG);
+
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s %s", module_name, job_name);
+
int rc;
+ call_virtual_function_blocking(parser, buffer_tostring(wb), &rc, cfg->data);
- char buf[1024];
- snprintfz(buf, sizeof(buf), "set_job_config %s %s", module_name, job_name);
- call_virtual_function_blocking(parser, buf, &rc, cfg->data);
+ buffer_free(wb);
- if(rc != 1)
+ if(rc != DYNCFG_VFNC_RET_CFG_ACCEPTED)
return SET_CONFIG_REJECTED;
return SET_CONFIG_ACCEPTED;
}
-enum set_config_result delete_job_cb(void *usr_ctx, const char *module_name, const char *job_name)
+enum set_config_result delete_job_cb(void *usr_ctx, const char *plugin_name ,const char *module_name, const char *job_name)
{
PARSER *parser = usr_ctx;
+ BUFFER *wb = buffer_create(CVF_MAX_LEN, NULL);
+
+ buffer_strcat(wb, FUNCTION_NAME_DELETE_JOB);
+
+ if (SERVING_STREAMING(parser))
+ buffer_sprintf(wb, " %s", plugin_name);
+
+ buffer_sprintf(wb, " %s %s", module_name, job_name);
+
int rc;
+ call_virtual_function_blocking(parser, buffer_tostring(wb), &rc, NULL);
- char buf[1024];
- snprintfz(buf, sizeof(buf), "delete_job %s %s", module_name, job_name);
- call_virtual_function_blocking(parser, buf, &rc, NULL);
+ buffer_free(wb);
- if(rc != 1)
+ if(rc != DYNCFG_VFNC_RET_CFG_ACCEPTED)
return SET_CONFIG_REJECTED;
return SET_CONFIG_ACCEPTED;
}
@@ -2079,37 +2597,65 @@ static inline PARSER_RC pluginsd_register_plugin(char **words __maybe_unused, si
cfg->get_config_schema_cb = get_plugin_config_schema_cb;
cfg->cb_usr_ctx = parser;
- parser->user.cd->cfg_dict_item = register_plugin(cfg);
-
- if (unlikely(parser->user.cd->cfg_dict_item == NULL)) {
+ const DICTIONARY_ITEM *di = register_plugin(parser->user.host->configurable_plugins, cfg, SERVING_PLUGINSD(parser));
+ if (unlikely(di == NULL)) {
freez(cfg->name);
freez(cfg);
return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_ENABLE, "error registering plugin");
}
- parser->user.cd->configuration = cfg;
+ if (SERVING_PLUGINSD(parser)) {
+ // this is optimization for pluginsd to avoid extra dictionary lookup
+ // as we know which plugin is comunicating with us
+ parser->user.cd->cfg_dict_item = di;
+ parser->user.cd->configuration = cfg;
+ } else {
+ // register_plugin keeps the item acquired, so we need to release it
+ dictionary_acquired_item_release(parser->user.host->configurable_plugins, di);
+ }
+
+ rrdpush_send_dyncfg_enable(parser->user.host, cfg->name);
+
return PARSER_RC_OK;
}
+#define LOG_MSG_SIZE (1024)
+#define MODULE_NAME_IDX (SERVING_PLUGINSD(parser) ? 1 : 2)
+#define MODULE_TYPE_IDX (SERVING_PLUGINSD(parser) ? 2 : 3)
static inline PARSER_RC pluginsd_register_module(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) {
netdata_log_info("PLUGINSD: DYNCFG_REG_MODULE");
- struct configurable_plugin *plug_cfg = parser->user.cd->configuration;
- if (unlikely(plug_cfg == NULL))
- return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "you have to enable dynamic configuration first using " PLUGINSD_KEYWORD_DYNCFG_ENABLE);
-
- if (unlikely(num_words != 3))
- return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "expected 2 parameters module_name followed by module_type");
+ size_t expected_num_words = SERVING_PLUGINSD(parser) ? 3 : 4;
+
+ if (unlikely(num_words != expected_num_words)) {
+ char log[LOG_MSG_SIZE + 1];
+ snprintfz(log, LOG_MSG_SIZE, "expected %zu (got %zu) parameters: %smodule_name module_type", expected_num_words - 1, num_words - 1, SERVING_PLUGINSD(parser) ? "" : "plugin_name ");
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, log);
+ }
+
+ struct configurable_plugin *plug_cfg;
+ const DICTIONARY_ITEM *di = NULL;
+ if (SERVING_PLUGINSD(parser)) {
+ plug_cfg = parser->user.cd->configuration;
+ if (unlikely(plug_cfg == NULL))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "you have to enable dynamic configuration first using " PLUGINSD_KEYWORD_DYNCFG_ENABLE);
+ } else {
+ di = dictionary_get_and_acquire_item(parser->user.host->configurable_plugins, words[1]);
+ if (unlikely(di == NULL))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "plugin not found");
+
+ plug_cfg = (struct configurable_plugin *)dictionary_acquired_item_value(di);
+ }
struct module *mod = callocz(1, sizeof(struct module));
- mod->type = str2_module_type(words[2]);
+ mod->type = str2_module_type(words[MODULE_TYPE_IDX]);
if (unlikely(mod->type == MOD_TYPE_UNKNOWN)) {
freez(mod);
return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_MODULE, "unknown module type (allowed: job_array, single)");
}
- mod->name = strdupz(words[1]);
+ mod->name = strdupz(words[MODULE_NAME_IDX]);
mod->set_config_cb = set_module_config_cb;
mod->get_config_cb = get_module_config_cb;
@@ -2122,27 +2668,141 @@ static inline PARSER_RC pluginsd_register_module(char **words __maybe_unused, si
mod->delete_job_cb = delete_job_cb;
mod->job_config_cb_usr_ctx = parser;
- register_module(plug_cfg, mod);
+ register_module(parser->user.host->configurable_plugins, plug_cfg, mod, SERVING_PLUGINSD(parser));
+
+ if (di != NULL)
+ dictionary_acquired_item_release(parser->user.host->configurable_plugins, di);
+
+ rrdpush_send_dyncfg_reg_module(parser->user.host, plug_cfg->name, mod->name, mod->type);
+
return PARSER_RC_OK;
}
-// job_status
-static inline PARSER_RC pluginsd_job_status(char **words, size_t num_words, PARSER *parser)
-{
- if (unlikely(num_words != 6 && num_words != 5))
- return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "expected 4 or 5 parameters: module_name, job_name, status_code, state, [optional: message]");
+static inline PARSER_RC pluginsd_register_job_common(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused, const char *plugin_name) {
+ const char *module_name = words[0];
+ const char *job_name = words[1];
+ const char *job_type_str = words[2];
+ const char *flags_str = words[3];
+
+ long f = str2l(flags_str);
+
+ if (f < 0)
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "invalid flags received");
+
+ dyncfg_job_flg_t flags = f;
+
+ if (SERVING_PLUGINSD(parser))
+ flags |= JOB_FLG_PLUGIN_PUSHED;
+ else
+ flags |= JOB_FLG_STREAMING_PUSHED;
+
+ enum job_type job_type = dyncfg_str2job_type(job_type_str);
+ if (job_type == JOB_TYPE_UNKNOWN)
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "unknown job type");
+
+ if (SERVING_PLUGINSD(parser) && job_type == JOB_TYPE_USER)
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "plugins cannot push jobs of type \"user\" (this is allowed only in streaming)");
+
+ if (register_job(parser->user.host->configurable_plugins, plugin_name, module_name, job_name, job_type, flags, 0)) // ignore existing is off as this is explicitly called register job
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, "error registering job");
+
+ rrdpush_send_dyncfg_reg_job(parser->user.host, plugin_name, module_name, job_name, job_type, flags);
+
+ return PARSER_RC_OK;
+}
+
+static inline PARSER_RC pluginsd_register_job(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) {
+ size_t expected_num_words = SERVING_PLUGINSD(parser) ? 5 : 6;
+
+ if (unlikely(num_words != expected_num_words)) {
+ char log[LOG_MSG_SIZE + 1];
+ snprintfz(log, LOG_MSG_SIZE, "expected %zu (got %zu) parameters: %smodule_name job_name job_type", expected_num_words - 1, num_words - 1, SERVING_PLUGINSD(parser) ? "" : "plugin_name ");
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_REGISTER_JOB, log);
+ }
+
+ if (SERVING_PLUGINSD(parser)) {
+ return pluginsd_register_job_common(&words[1], num_words - 1, parser, parser->user.cd->configuration->name);
+ }
+ return pluginsd_register_job_common(&words[2], num_words - 2, parser, words[1]);
+}
+
+static inline PARSER_RC pluginsd_dyncfg_reset(char **words __maybe_unused, size_t num_words __maybe_unused, PARSER *parser __maybe_unused) {
+ if (unlikely(num_words != (SERVING_PLUGINSD(parser) ? 1 : 2)))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_RESET, SERVING_PLUGINSD(parser) ? "expected 0 parameters" : "expected 1 parameter: plugin_name");
+
+ if (SERVING_PLUGINSD(parser)) {
+ unregister_plugin(parser->user.host->configurable_plugins, parser->user.cd->cfg_dict_item);
+ rrdpush_send_dyncfg_reset(parser->user.host, parser->user.cd->configuration->name);
+ parser->user.cd->configuration = NULL;
+ } else {
+ const DICTIONARY_ITEM *di = dictionary_get_and_acquire_item(parser->user.host->configurable_plugins, words[1]);
+ if (unlikely(di == NULL))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DYNCFG_RESET, "plugin not found");
+ unregister_plugin(parser->user.host->configurable_plugins, di);
+ rrdpush_send_dyncfg_reset(parser->user.host, words[1]);
+ }
- int state = atoi(words[4]);
+ return PARSER_RC_OK;
+}
+
+static inline PARSER_RC pluginsd_job_status_common(char **words, size_t num_words, PARSER *parser, const char *plugin_name) {
+ int state = str2i(words[3]);
- enum job_status job_status = str2job_state(words[3]);
- if (unlikely(job_status == JOB_STATUS_UNKNOWN))
- return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "unknown job state");
+ enum job_status status = str2job_state(words[2]);
+ if (unlikely(SERVING_PLUGINSD(parser) && status == JOB_STATUS_UNKNOWN))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "unknown job status");
char *message = NULL;
- if (num_words == 6)
- message = strdupz(words[5]);
+ if (num_words == 5 && strlen(words[4]) > 0)
+ message = words[4];
+
+ const DICTIONARY_ITEM *plugin_item;
+ DICTIONARY *job_dict;
+ const DICTIONARY_ITEM *job_item = report_job_status_acq_lock(parser->user.host->configurable_plugins, &plugin_item, &job_dict, plugin_name, words[0], words[1], status, state, message);
+
+ if (job_item != NULL) {
+ struct job *job = dictionary_acquired_item_value(job_item);
+ rrdpush_send_job_status_update(parser->user.host, plugin_name, words[0], job);
+
+ pthread_mutex_unlock(&job->lock);
+ dictionary_acquired_item_release(job_dict, job_item);
+ dictionary_acquired_item_release(parser->user.host->configurable_plugins, plugin_item);
+ }
+
+ return PARSER_RC_OK;
+}
+
+// job_status [plugin_name if streaming] [message]
+static PARSER_RC pluginsd_job_status(char **words, size_t num_words, PARSER *parser) {
+ if (SERVING_PLUGINSD(parser)) {
+ if (unlikely(num_words != 5 && num_words != 6))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "expected 4 or 5 parameters: module_name, job_name, status_code, state, [optional: message]");
+ } else {
+ if (unlikely(num_words != 6 && num_words != 7))
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_REPORT_JOB_STATUS, "expected 5 or 6 parameters: plugin_name, module_name, job_name, status_code, state, [optional: message]");
+ }
+
+ if (SERVING_PLUGINSD(parser)) {
+ return pluginsd_job_status_common(&words[1], num_words - 1, parser, parser->user.cd->configuration->name);
+ }
+ return pluginsd_job_status_common(&words[2], num_words - 2, parser, words[1]);
+}
+
+static PARSER_RC pluginsd_delete_job(char **words, size_t num_words, PARSER *parser) {
+ // this can confuse a bit but there is a diference between KEYWORD_DELETE_JOB and actual delete_job function
+ // they are of opossite direction
+ if (num_words != 4)
+ return PLUGINSD_DISABLE_PLUGIN(parser, PLUGINSD_KEYWORD_DELETE_JOB, "expected 2 parameters: plugin_name, module_name, job_name");
+
+ const char *plugin_name = get_word(words, num_words, 1);
+ const char *module_name = get_word(words, num_words, 2);
+ const char *job_name = get_word(words, num_words, 3);
- report_job_status(parser->user.cd->configuration, words[1], words[2], job_status, state, message);
+ if (SERVING_STREAMING(parser))
+ delete_job_pname(parser->user.host->configurable_plugins, plugin_name, module_name, job_name);
+
+ // forward to parent if any
+ rrdpush_send_job_deleted(parser->user.host, plugin_name, module_name, job_name);
return PARSER_RC_OK;
}
@@ -2195,70 +2855,49 @@ static inline PARSER_RC streaming_claimed_id(char **words, size_t num_words, PAR
// ----------------------------------------------------------------------------
-static inline bool buffered_reader_read(struct buffered_reader *reader, int fd) {
-#ifdef NETDATA_INTERNAL_CHECKS
- if(reader->read_buffer[reader->read_len] != '\0')
- fatal("%s(): read_buffer does not start with zero", __FUNCTION__ );
-#endif
-
- ssize_t bytes_read = read(fd, reader->read_buffer + reader->read_len, sizeof(reader->read_buffer) - reader->read_len - 1);
- if(unlikely(bytes_read <= 0))
- return false;
-
- reader->read_len += bytes_read;
- reader->read_buffer[reader->read_len] = '\0';
-
- return true;
-}
-
-static inline bool buffered_reader_read_timeout(struct buffered_reader *reader, int fd, int timeout_ms) {
- errno = 0;
- struct pollfd fds[1];
+void pluginsd_process_thread_cleanup(void *ptr) {
+ PARSER *parser = (PARSER *)ptr;
- fds[0].fd = fd;
- fds[0].events = POLLIN;
+ pluginsd_cleanup_v2(parser);
+ pluginsd_host_define_cleanup(parser);
- int ret = poll(fds, 1, timeout_ms);
+ rrd_collector_finished();
- if (ret > 0) {
- /* There is data to read */
- if (fds[0].revents & POLLIN)
- return buffered_reader_read(reader, fd);
+#ifdef NETDATA_LOG_STREAM_RECEIVE
+ if(parser->user.stream_log_fp) {
+ fclose(parser->user.stream_log_fp);
+ parser->user.stream_log_fp = NULL;
+ }
+#endif
- else if(fds[0].revents & POLLERR) {
- netdata_log_error("PARSER: read failed: POLLERR.");
- return false;
- }
- else if(fds[0].revents & POLLHUP) {
- netdata_log_error("PARSER: read failed: POLLHUP.");
- return false;
- }
- else if(fds[0].revents & POLLNVAL) {
- netdata_log_error("PARSER: read failed: POLLNVAL.");
- return false;
- }
+ parser_destroy(parser);
+}
- netdata_log_error("PARSER: poll() returned positive number, but POLLIN|POLLERR|POLLHUP|POLLNVAL are not set.");
- return false;
- }
- else if (ret == 0) {
- netdata_log_error("PARSER: timeout while waiting for data.");
+bool parser_reconstruct_node(BUFFER *wb, void *ptr) {
+ PARSER *parser = ptr;
+ if(!parser || !parser->user.host)
return false;
- }
- netdata_log_error("PARSER: poll() failed with code %d.", ret);
- return false;
+ buffer_strcat(wb, rrdhost_hostname(parser->user.host));
+ return true;
}
-void pluginsd_process_thread_cleanup(void *ptr) {
- PARSER *parser = (PARSER *)ptr;
+bool parser_reconstruct_instance(BUFFER *wb, void *ptr) {
+ PARSER *parser = ptr;
+ if(!parser || !parser->user.st)
+ return false;
- pluginsd_cleanup_v2(parser);
- pluginsd_host_define_cleanup(parser);
+ buffer_strcat(wb, rrdset_name(parser->user.st));
+ return true;
+}
- rrd_collector_finished();
+bool parser_reconstruct_context(BUFFER *wb, void *ptr) {
+ PARSER *parser = ptr;
+ if(!parser || !parser->user.st)
+ return false;
- parser_destroy(parser);
+ buffer_strcat(wb, string2str(parser->user.st->context));
+ return true;
}
inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugin_input, FILE *fp_plugin_output, int trust_durations)
@@ -2308,26 +2947,51 @@ inline size_t pluginsd_process(RRDHOST *host, struct plugind *cd, FILE *fp_plugi
// so, parser needs to be allocated before pushing it
netdata_thread_cleanup_push(pluginsd_process_thread_cleanup, parser);
- buffered_reader_init(&parser->reader);
- char buffer[PLUGINSD_LINE_MAX + 2];
- while(likely(service_running(SERVICE_COLLECTORS))) {
- if (unlikely(!buffered_reader_next_line(&parser->reader, buffer, PLUGINSD_LINE_MAX + 2))) {
- if(unlikely(!buffered_reader_read_timeout(&parser->reader, fileno((FILE *)parser->fp_input), 2 * 60 * MSEC_PER_SEC)))
- break;
- }
- else if(unlikely(parser_action(parser, buffer)))
- break;
- }
-
- cd->unsafe.enabled = parser->user.enabled;
- count = parser->user.data_collections_count;
-
- if (likely(count)) {
- cd->successful_collections += count;
- cd->serial_failures = 0;
- }
- else
- cd->serial_failures++;
+ {
+ ND_LOG_STACK lgs[] = {
+ ND_LOG_FIELD_CB(NDF_REQUEST, line_splitter_reconstruct_line, &parser->line),
+ ND_LOG_FIELD_CB(NDF_NIDL_NODE, parser_reconstruct_node, parser),
+ ND_LOG_FIELD_CB(NDF_NIDL_INSTANCE, parser_reconstruct_instance, parser),
+ ND_LOG_FIELD_CB(NDF_NIDL_CONTEXT, parser_reconstruct_context, parser),
+ ND_LOG_FIELD_END(),
+ };
+ ND_LOG_STACK_PUSH(lgs);
+
+ buffered_reader_init(&parser->reader);
+ BUFFER *buffer = buffer_create(sizeof(parser->reader.read_buffer) + 2, NULL);
+ while(likely(service_running(SERVICE_COLLECTORS))) {
+
+ if(unlikely(!buffered_reader_next_line(&parser->reader, buffer))) {
+ buffered_reader_ret_t ret = buffered_reader_read_timeout(
+ &parser->reader,
+ fileno((FILE *) parser->fp_input),
+ 2 * 60 * MSEC_PER_SEC, true
+ );
+
+ if(unlikely(ret != BUFFERED_READER_READ_OK))
+ break;
+
+ continue;
+ }
+
+ if(unlikely(parser_action(parser, buffer->buffer)))
+ break;
+
+ buffer->len = 0;
+ buffer->buffer[0] = '\0';
+ }
+ buffer_free(buffer);
+
+ cd->unsafe.enabled = parser->user.enabled;
+ count = parser->user.data_collections_count;
+
+ if(likely(count)) {
+ cd->successful_collections += count;
+ cd->serial_failures = 0;
+ }
+ else
+ cd->serial_failures++;
+ }
// free parser with the pop function
netdata_thread_cleanup_pop(1);
@@ -2452,10 +3116,22 @@ PARSER_RC parser_execute(PARSER *parser, PARSER_KEYWORD *keyword, char **words,
case 101:
return pluginsd_register_plugin(words, num_words, parser);
-
+
case 102:
return pluginsd_register_module(words, num_words, parser);
+ case 103:
+ return pluginsd_register_job(words, num_words, parser);
+
+ case 104:
+ return pluginsd_dyncfg_reset(words, num_words, parser);
+
+ case 110:
+ return pluginsd_job_status(words, num_words, parser);
+
+ case 111:
+ return pluginsd_delete_job(words, num_words, parser);
+
default:
fatal("Unknown keyword '%s' with id %zu", keyword->keyword, keyword->id);
}
@@ -2472,14 +3148,20 @@ void parser_init_repertoire(PARSER *parser, PARSER_REPERTOIRE repertoire) {
}
}
+static void parser_destroy_dyncfg(PARSER *parser) {
+ if (parser->user.cd != NULL && parser->user.cd->configuration != NULL) {
+ unregister_plugin(parser->user.host->configurable_plugins, parser->user.cd->cfg_dict_item);
+ parser->user.cd->configuration = NULL;
+ } else if (parser->user.host != NULL && SERVING_STREAMING(parser) && parser->user.host != localhost){
+ dictionary_flush(parser->user.host->configurable_plugins);
+ }
+}
+
void parser_destroy(PARSER *parser) {
if (unlikely(!parser))
return;
- if (parser->user.cd != NULL && parser->user.cd->configuration != NULL) {
- unregister_plugin(parser->user.cd->cfg_dict_item);
- parser->user.cd->configuration = NULL;
- }
+ parser_destroy_dyncfg(parser);
dictionary_destroy(parser->inflight.functions);
freez(parser);
diff --git a/collectors/plugins.d/pluginsd_parser.h b/collectors/plugins.d/pluginsd_parser.h
index 5e1ea124241dd3..35474642935748 100644
--- a/collectors/plugins.d/pluginsd_parser.h
+++ b/collectors/plugins.d/pluginsd_parser.h
@@ -10,6 +10,12 @@
// this has to be in-sync with the same at receiver.c
#define WORKER_RECEIVER_JOB_REPLICATION_COMPLETION (WORKER_PARSER_FIRST_JOB - 3)
+// this controls the max response size of a function
+#define PLUGINSD_MAX_DEFERRED_SIZE (20 * 1024 * 1024)
+
+#define PLUGINSD_MIN_RRDSET_POINTERS_CACHE 1024
+
+#define HOST_LABEL_IS_EPHEMERAL "_is_ephemeral"
// PARSER return codes
typedef enum __attribute__ ((__packed__)) parser_rc {
PARSER_RC_OK, // Callback was successful, go on
@@ -25,6 +31,7 @@ typedef enum __attribute__ ((__packed__)) parser_input_type {
typedef enum __attribute__ ((__packed__)) {
PARSER_INIT_PLUGINSD = (1 << 1),
PARSER_INIT_STREAMING = (1 << 2),
+ PARSER_REP_METADATA = (1 << 3),
} PARSER_REPERTOIRE;
struct parser;
@@ -38,16 +45,22 @@ typedef struct parser_keyword {
} PARSER_KEYWORD;
typedef struct parser_user_object {
+ bool cleanup_slots;
RRDSET *st;
RRDHOST *host;
void *opaque;
struct plugind *cd;
int trust_durations;
- DICTIONARY *new_host_labels;
- DICTIONARY *chart_rrdlabels_linked_temporarily;
+ RRDLABELS *new_host_labels;
+ RRDLABELS *chart_rrdlabels_linked_temporarily;
size_t data_collections_count;
int enabled;
+#ifdef NETDATA_LOG_STREAM_RECEIVE
+ FILE *stream_log_fp;
+ PARSER_REPERTOIRE stream_log_repertoire;
+#endif
+
STREAM_CAPABILITIES capabilities; // receiver capabilities
struct {
@@ -55,7 +68,7 @@ typedef struct parser_user_object {
uuid_t machine_guid;
char machine_guid_str[UUID_STR_LEN];
STRING *hostname;
- DICTIONARY *rrdlabels;
+ RRDLABELS *rrdlabels;
} host_define;
struct parser_user_object_replay {
@@ -85,17 +98,21 @@ typedef struct parser {
PARSER_REPERTOIRE repertoire;
uint32_t flags;
int fd; // Socket
- size_t line;
FILE *fp_input; // Input source e.g. stream
FILE *fp_output; // Stream to send commands to plugin
#ifdef ENABLE_HTTPS
NETDATA_SSL *ssl_output;
#endif
+#ifdef ENABLE_H2O
+ void *h2o_ctx; // if set we use h2o_stream functions to send data
+#endif
PARSER_USER_OBJECT user; // User defined structure to hold extra state between calls
struct buffered_reader reader;
+ struct line_splitter line;
+ PARSER_KEYWORD *keyword;
struct {
const char *end_keyword;
@@ -147,19 +164,28 @@ static inline PARSER_KEYWORD *parser_find_keyword(PARSER *parser, const char *co
return NULL;
}
+bool parser_reconstruct_node(BUFFER *wb, void *ptr);
+bool parser_reconstruct_instance(BUFFER *wb, void *ptr);
+bool parser_reconstruct_context(BUFFER *wb, void *ptr);
+
static inline int parser_action(PARSER *parser, char *input) {
- parser->line++;
+#ifdef NETDATA_LOG_STREAM_RECEIVE
+ static __thread char line[PLUGINSD_LINE_MAX + 1];
+ strncpyz(line, input, sizeof(line) - 1);
+#endif
+
+ parser->line.count++;
if(unlikely(parser->flags & PARSER_DEFER_UNTIL_KEYWORD)) {
- char command[PLUGINSD_LINE_MAX + 1];
- bool has_keyword = find_first_keyword(input, command, PLUGINSD_LINE_MAX, isspace_map_pluginsd);
+ char command[100 + 1];
+ bool has_keyword = find_first_keyword(input, command, 100, isspace_map_pluginsd);
if(!has_keyword || strcmp(command, parser->defer.end_keyword) != 0) {
if(parser->defer.response) {
buffer_strcat(parser->defer.response, input);
- if(buffer_strlen(parser->defer.response) > 10 * 1024 * 1024) {
- // more than 10MB of data
- // a bad plugin that did not send the end_keyword
+ if(buffer_strlen(parser->defer.response) > PLUGINSD_MAX_DEFERRED_SIZE) {
+ // more than PLUGINSD_MAX_DEFERRED_SIZE of data,
+ // or a bad plugin that did not send the end_keyword
internal_error(true, "PLUGINSD: deferred response is too big (%zu bytes). Stopping this plugin.", buffer_strlen(parser->defer.response));
return 1;
}
@@ -180,18 +206,25 @@ static inline int parser_action(PARSER *parser, char *input) {
return 0;
}
- char *words[PLUGINSD_MAX_WORDS];
- size_t num_words = quoted_strings_splitter_pluginsd(input, words, PLUGINSD_MAX_WORDS);
- const char *command = get_word(words, num_words, 0);
+ parser->line.num_words = quoted_strings_splitter_pluginsd(input, parser->line.words, PLUGINSD_MAX_WORDS);
+ const char *command = get_word(parser->line.words, parser->line.num_words, 0);
- if(unlikely(!command))
+ if(unlikely(!command)) {
+ line_splitter_reset(&parser->line);
return 0;
+ }
PARSER_RC rc;
- PARSER_KEYWORD *t = parser_find_keyword(parser, command);
- if(likely(t)) {
- worker_is_busy(t->worker_job_id);
- rc = parser_execute(parser, t, words, num_words);
+ parser->keyword = parser_find_keyword(parser, command);
+ if(likely(parser->keyword)) {
+ worker_is_busy(parser->keyword->worker_job_id);
+
+#ifdef NETDATA_LOG_STREAM_RECEIVE
+ if(parser->user.stream_log_fp && parser->keyword->repertoire & parser->user.stream_log_repertoire)
+ fprintf(parser->user.stream_log_fp, "%s", line);
+#endif
+
+ rc = parser_execute(parser, parser->keyword, parser->line.words, parser->line.num_words);
// rc = (*t->func)(words, num_words, parser);
worker_is_idle();
}
@@ -199,22 +232,13 @@ static inline int parser_action(PARSER *parser, char *input) {
rc = PARSER_RC_ERROR;
if(rc == PARSER_RC_ERROR) {
- BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL);
- for(size_t i = 0; i < num_words ;i++) {
- if(i) buffer_fast_strcat(wb, " ", 1);
-
- buffer_fast_strcat(wb, "\"", 1);
- const char *s = get_word(words, num_words, i);
- buffer_strcat(wb, s?s:"");
- buffer_fast_strcat(wb, "\"", 1);
- }
-
+ CLEAN_BUFFER *wb = buffer_create(PLUGINSD_LINE_MAX, NULL);
+ line_splitter_reconstruct_line(wb, &parser->line);
netdata_log_error("PLUGINSD: parser_action('%s') failed on line %zu: { %s } (quotes added to show parsing)",
- command, parser->line, buffer_tostring(wb));
-
- buffer_free(wb);
+ command, parser->line.count, buffer_tostring(wb));
}
+ line_splitter_reset(&parser->line);
return (rc == PARSER_RC_ERROR || rc == PARSER_RC_STOP);
}
diff --git a/collectors/proc.plugin/README.md b/collectors/proc.plugin/README.md
index 16ae6f412842ae..62e46569f4837e 100644
--- a/collectors/proc.plugin/README.md
+++ b/collectors/proc.plugin/README.md
@@ -398,11 +398,11 @@ You can set the following values for each configuration option:
#### Wireless configuration
-#### alarms
+#### alerts
-There are several alarms defined in `health.d/net.conf`.
+There are several alerts defined in `health.d/net.conf`.
-The tricky ones are `inbound packets dropped` and `inbound packets dropped ratio`. They have quite a strict policy so that they warn users about possible issues. These alarms can be annoying for some network configurations. It is especially true for some bonding configurations if an interface is a child or a bonding interface itself. If it is expected to have a certain number of drops on an interface for a certain network configuration, a separate alarm with different triggering thresholds can be created or the existing one can be disabled for this specific interface. It can be done with the help of the [families](https://github.com/netdata/netdata/blob/master/health/REFERENCE.md#alarm-line-families) line in the alarm configuration. For example, if you want to disable the `inbound packets dropped` alarm for `eth0`, set `families: !eth0 *` in the alarm definition for `template: inbound_packets_dropped`.
+The tricky ones are `inbound packets dropped` and `inbound packets dropped ratio`. They have quite a strict policy so that they warn users about possible issues. These alerts can be annoying for some network configurations. It is especially true for some bonding configurations if an interface is a child or a bonding interface itself. If it is expected to have a certain number of drops on an interface for a certain network configuration, a separate alert with different triggering thresholds can be created or the existing one can be disabled for this specific interface. It can be done with the help of the [families](https://github.com/netdata/netdata/blob/master/health/REFERENCE.md#alert-line-families) line in the alert configuration. For example, if you want to disable the `inbound packets dropped` alert for `eth0`, set `families: !eth0 *` in the alert definition for `template: inbound_packets_dropped`.
#### configuration
diff --git a/collectors/proc.plugin/integrations/amd_gpu.md b/collectors/proc.plugin/integrations/amd_gpu.md
new file mode 100644
index 00000000000000..e85cce221b84f9
--- /dev/null
+++ b/collectors/proc.plugin/integrations/amd_gpu.md
@@ -0,0 +1,110 @@
+
+
+# AMD GPU
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/class/drm
+
+
+
+## Overview
+
+This integration monitors AMD GPU metrics, such as utilization, clock frequency and memory usage.
+
+It reads `/sys/class/drm` to collect metrics for every AMD GPU card instance it encounters.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per gpu
+
+These metrics refer to the GPU.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| product_name | GPU product name (e.g. AMD RX 6600) |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| amdgpu.gpu_utilization | utilization | percentage |
+| amdgpu.gpu_mem_utilization | utilization | percentage |
+| amdgpu.gpu_clk_frequency | frequency | MHz |
+| amdgpu.gpu_mem_clk_frequency | frequency | MHz |
+| amdgpu.gpu_mem_vram_usage_perc | usage | percentage |
+| amdgpu.gpu_mem_vram_usage | free, used | bytes |
+| amdgpu.gpu_mem_vis_vram_usage_perc | usage | percentage |
+| amdgpu.gpu_mem_vis_vram_usage | free, used | bytes |
+| amdgpu.gpu_mem_gtt_usage_perc | usage | percentage |
+| amdgpu.gpu_mem_gtt_usage | free, used | bytes |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/btrfs.md b/collectors/proc.plugin/integrations/btrfs.md
new file mode 100644
index 00000000000000..5f994c8419058a
--- /dev/null
+++ b/collectors/proc.plugin/integrations/btrfs.md
@@ -0,0 +1,137 @@
+
+
+# BTRFS
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/fs/btrfs
+
+
+
+## Overview
+
+This integration provides usage and error statistics from the BTRFS filesystem.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per btrfs filesystem
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| filesystem_uuid | TBD |
+| filesystem_label | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| btrfs.disk | unallocated, data_free, data_used, meta_free, meta_used, sys_free, sys_used | MiB |
+| btrfs.data | free, used | MiB |
+| btrfs.metadata | free, used, reserved | MiB |
+| btrfs.system | free, used | MiB |
+| btrfs.commits | commits | commits |
+| btrfs.commits_perc_time | commits | percentage |
+| btrfs.commit_timings | last, max | ms |
+
+### Per btrfs device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device_id | TBD |
+| filesystem_uuid | TBD |
+| filesystem_label | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| btrfs.device_errors | write_errs, read_errs, flush_errs, corruption_errs, generation_errs | errors |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ btrfs_allocated ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.disk | percentage of allocated BTRFS physical disk space |
+| [ btrfs_data ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.data | utilization of BTRFS data space |
+| [ btrfs_metadata ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.metadata | utilization of BTRFS metadata space |
+| [ btrfs_system ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.system | utilization of BTRFS system space |
+| [ btrfs_device_read_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.device_errors | number of encountered BTRFS read errors |
+| [ btrfs_device_write_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.device_errors | number of encountered BTRFS write errors |
+| [ btrfs_device_flush_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.device_errors | number of encountered BTRFS flush errors |
+| [ btrfs_device_corruption_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.device_errors | number of encountered BTRFS corruption errors |
+| [ btrfs_device_generation_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/btrfs.conf) | btrfs.device_errors | number of encountered BTRFS generation errors |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/conntrack.md b/collectors/proc.plugin/integrations/conntrack.md
new file mode 100644
index 00000000000000..b38f6b5080f20b
--- /dev/null
+++ b/collectors/proc.plugin/integrations/conntrack.md
@@ -0,0 +1,105 @@
+
+
+# Conntrack
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/stat/nf_conntrack
+
+
+
+## Overview
+
+This integration monitors the connection tracking mechanism of Netfilter in the Linux Kernel.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Conntrack instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| netfilter.conntrack_sockets | connections | active connections |
+| netfilter.conntrack_new | new, ignore, invalid | connections/s |
+| netfilter.conntrack_changes | inserted, deleted, delete_list | changes/s |
+| netfilter.conntrack_expect | created, deleted, new | expectations/s |
+| netfilter.conntrack_search | searched, restarted, found | searches/s |
+| netfilter.conntrack_errors | icmp_error, error_failed, drop, early_drop | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ netfilter_conntrack_full ](https://github.com/netdata/netdata/blob/master/health/health.d/netfilter.conf) | netfilter.conntrack_sockets | netfilter connection tracker table size utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/disk_statistics.md b/collectors/proc.plugin/integrations/disk_statistics.md
new file mode 100644
index 00000000000000..8f7448c399a834
--- /dev/null
+++ b/collectors/proc.plugin/integrations/disk_statistics.md
@@ -0,0 +1,149 @@
+
+
+# Disk Statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/diskstats
+
+
+
+## Overview
+
+Detailed statistics for each of your system's disk devices and partitions.
+The data is reported by the kernel and can be used to monitor disk activity on a Linux system.
+
+Get valuable insight into how your disks are performing and where potential bottlenecks might be.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Disk Statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.io | in, out | KiB/s |
+
+### Per disk
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device | TBD |
+| mount_point | TBD |
+| device_type | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| disk.io | reads, writes | KiB/s |
+| disk_ext.io | discards | KiB/s |
+| disk.ops | reads, writes | operations/s |
+| disk_ext.ops | discards, flushes | operations/s |
+| disk.qops | operations | operations |
+| disk.backlog | backlog | milliseconds |
+| disk.busy | busy | milliseconds |
+| disk.util | utilization | % of time working |
+| disk.mops | reads, writes | merged operations/s |
+| disk_ext.mops | discards | merged operations/s |
+| disk.iotime | reads, writes | milliseconds/s |
+| disk_ext.iotime | discards, flushes | milliseconds/s |
+| disk.await | reads, writes | milliseconds/operation |
+| disk_ext.await | discards, flushes | milliseconds/operation |
+| disk.avgsz | reads, writes | KiB/operation |
+| disk_ext.avgsz | discards | KiB/operation |
+| disk.svctm | svctm | milliseconds/operation |
+| disk.bcache_cache_alloc | ununsed, dirty, clean, metadata, undefined | percentage |
+| disk.bcache_hit_ratio | 5min, 1hour, 1day, ever | percentage |
+| disk.bcache_rates | congested, writeback | KiB/s |
+| disk.bcache_size | dirty | MiB |
+| disk.bcache_usage | avail | percentage |
+| disk.bcache_cache_read_races | races, errors | operations/s |
+| disk.bcache | hits, misses, collisions, readaheads | operations/s |
+| disk.bcache_bypass | hits, misses | operations/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 10min_disk_backlog ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.backlog | average backlog size of the ${label:device} disk over the last 10 minutes |
+| [ 10min_disk_utilization ](https://github.com/netdata/netdata/blob/master/health/health.d/disks.conf) | disk.util | average percentage of time ${label:device} disk was busy over the last 10 minutes |
+| [ bcache_cache_dirty ](https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf) | disk.bcache_cache_alloc | percentage of cache space used for dirty data and metadata (this usually means your SSD cache is too small) |
+| [ bcache_cache_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/bcache.conf) | disk.bcache_cache_read_races | number of times data was read from the cache, the bucket was reused and invalidated in the last 10 minutes (when this occurs the data is reread from the backing device) |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/entropy.md b/collectors/proc.plugin/integrations/entropy.md
new file mode 100644
index 00000000000000..8432a1f960943a
--- /dev/null
+++ b/collectors/proc.plugin/integrations/entropy.md
@@ -0,0 +1,133 @@
+
+
+# Entropy
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/sys/kernel/random/entropy_avail
+
+
+
+## Overview
+
+Entropy, a measure of the randomness or unpredictability of data.
+
+In the context of cryptography, entropy is used to generate random numbers or keys that are essential for
+secure communication and encryption. Without a good source of entropy, cryptographic protocols can become
+vulnerable to attacks that exploit the predictability of the generated keys.
+
+In most operating systems, entropy is generated by collecting random events from various sources, such as
+hardware interrupts, mouse movements, keyboard presses, and disk activity. These events are fed into a pool
+of entropy, which is then used to generate random numbers when needed.
+
+The `/dev/random` device in Linux is one such source of entropy, and it provides an interface for programs
+to access the pool of entropy. When a program requests random numbers, it reads from the `/dev/random` device,
+which blocks until enough entropy is available to generate the requested numbers. This ensures that the
+generated numbers are truly random and not predictable.
+
+However, if the pool of entropy gets depleted, the `/dev/random` device may block indefinitely, causing
+programs that rely on random numbers to slow down or even freeze. This is especially problematic for
+cryptographic protocols that require a continuous stream of random numbers, such as SSL/TLS and SSH.
+
+To avoid this issue, some systems use a hardware random number generator (RNG) to generate high-quality
+entropy. A hardware RNG generates random numbers by measuring physical phenomena, such as thermal noise or
+radioactive decay. These sources of randomness are considered to be more reliable and unpredictable than
+software-based sources.
+
+One such hardware RNG is the Trusted Platform Module (TPM), which is a dedicated hardware chip that is used
+for cryptographic operations and secure boot. The TPM contains a built-in hardware RNG that generates
+high-quality entropy, which can be used to seed the pool of entropy in the operating system.
+
+Alternatively, software-based solutions such as `Haveged` can be used to generate additional entropy by
+exploiting sources of randomness in the system, such as CPU utilization and network traffic. These solutions
+can help to mitigate the risk of entropy depletion, but they may not be as reliable as hardware-based solutions.
+
+
+
+
+This collector is only supported on the following platforms:
+
+- linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Entropy instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.entropy | entropy | entropy |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ lowest_entropy ](https://github.com/netdata/netdata/blob/master/health/health.d/entropy.conf) | system.entropy | minimum number of bits of entropy available for the kernel’s random number generator |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/infiniband.md b/collectors/proc.plugin/integrations/infiniband.md
new file mode 100644
index 00000000000000..6cb5fdc5352efd
--- /dev/null
+++ b/collectors/proc.plugin/integrations/infiniband.md
@@ -0,0 +1,99 @@
+
+
+# InfiniBand
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/class/infiniband
+
+
+
+## Overview
+
+This integration monitors InfiniBand network inteface statistics.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per infiniband port
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ib.bytes | Received, Sent | kilobits/s |
+| ib.packets | Received, Sent, Mcast_rcvd, Mcast_sent, Ucast_rcvd, Ucast_sent | packets/s |
+| ib.errors | Pkts_malformated, Pkts_rcvd_discarded, Pkts_sent_discarded, Tick_Wait_to_send, Pkts_missed_resource, Buffer_overrun, Link_Downed, Link_recovered, Link_integrity_err, Link_minor_errors, Pkts_rcvd_with_EBP, Pkts_rcvd_discarded_by_switch, Pkts_sent_discarded_by_switch | errors/s |
+| ib.hwerrors | Duplicated_packets, Pkt_Seq_Num_gap, Ack_timer_expired, Drop_missing_buffer, Drop_out_of_sequence, NAK_sequence_rcvd, CQE_err_Req, CQE_err_Resp, CQE_Flushed_err_Req, CQE_Flushed_err_Resp, Remote_access_err_Req, Remote_access_err_Resp, Remote_invalid_req, Local_length_err_Resp, RNR_NAK_Packets, CNP_Pkts_ignored, RoCE_ICRC_Errors | errors/s |
+| ib.hwpackets | RoCEv2_Congestion_sent, RoCEv2_Congestion_rcvd, IB_Congestion_handled, ATOMIC_req_rcvd, Connection_req_rcvd, Read_req_rcvd, Write_req_rcvd, RoCE_retrans_adaptive, RoCE_retrans_timeout, RoCE_slow_restart, RoCE_slow_restart_congestion, RoCE_slow_restart_count | packets/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/inter_process_communication.md b/collectors/proc.plugin/integrations/inter_process_communication.md
new file mode 100644
index 00000000000000..55708a4b0ff82c
--- /dev/null
+++ b/collectors/proc.plugin/integrations/inter_process_communication.md
@@ -0,0 +1,120 @@
+
+
+# Inter Process Communication
+
+
+
+
+
+Plugin: proc.plugin
+Module: ipc
+
+
+
+## Overview
+
+IPC stands for Inter-Process Communication. It is a mechanism which allows processes to communicate with each
+other and synchronize their actions.
+
+This collector exposes information about:
+
+- Message Queues: This allows messages to be exchanged between processes. It's a more flexible method that
+ allows messages to be placed onto a queue and read at a later time.
+
+- Shared Memory: This method allows for the fastest form of IPC because processes can exchange data by
+ reading/writing into shared memory segments.
+
+- Semaphores: They are used to synchronize the operations performed by independent processes. So, if multiple
+ processes are trying to access a single shared resource, semaphores can ensure that only one process
+ accesses the resource at a given time.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Inter Process Communication instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ipc_semaphores | semaphores | semaphores |
+| system.ipc_semaphore_arrays | arrays | arrays |
+| system.message_queue_message | a dimension per queue | messages |
+| system.message_queue_bytes | a dimension per queue | bytes |
+| system.shared_memory_segments | segments | segments |
+| system.shared_memory_bytes | bytes | bytes |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ semaphores_used ](https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf) | system.ipc_semaphores | IPC semaphore utilization |
+| [ semaphore_arrays_used ](https://github.com/netdata/netdata/blob/master/health/health.d/ipc.conf) | system.ipc_semaphore_arrays | IPC semaphore arrays utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/interrupts.md b/collectors/proc.plugin/integrations/interrupts.md
new file mode 100644
index 00000000000000..1b85fb767386cc
--- /dev/null
+++ b/collectors/proc.plugin/integrations/interrupts.md
@@ -0,0 +1,141 @@
+
+
+# Interrupts
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/interrupts
+
+
+
+## Overview
+
+Monitors `/proc/interrupts`, a file organized by CPU and then by the type of interrupt.
+The numbers reported are the counts of the interrupts that have occurred of each type.
+
+An interrupt is a signal to the processor emitted by hardware or software indicating an event that needs
+immediate attention. The processor then interrupts its current activities and executes the interrupt handler
+to deal with the event. This is part of the way a computer multitasks and handles concurrent processing.
+
+The types of interrupts include:
+
+- **I/O interrupts**: These are caused by I/O devices like the keyboard, mouse, printer, etc. For example, when
+ you type something on the keyboard, an interrupt is triggered so the processor can handle the new input.
+
+- **Timer interrupts**: These are generated at regular intervals by the system's timer circuit. It's primarily
+ used to switch the CPU among different tasks.
+
+- **Software interrupts**: These are generated by a program requiring disk I/O operations, or other system resources.
+
+- **Hardware interrupts**: These are caused by hardware conditions such as power failure, overheating, etc.
+
+Monitoring `/proc/interrupts` can be used for:
+
+- **Performance tuning**: If an interrupt is happening very frequently, it could be a sign that a device is not
+ configured correctly, or there is a software bug causing unnecessary interrupts. This could lead to system
+ performance degradation.
+
+- **System troubleshooting**: If you're seeing a lot of unexpected interrupts, it could be a sign of a hardware problem.
+
+- **Understanding system behavior**: More generally, keeping an eye on what interrupts are occurring can help you
+ understand what your system is doing. It can provide insights into the system's interaction with hardware,
+ drivers, and other parts of the kernel.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Interrupts instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.interrupts | a dimension per device | interrupts/s |
+
+### Per cpu core
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| cpu | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.interrupts | a dimension per device | interrupts/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/ip_virtual_server.md b/collectors/proc.plugin/integrations/ip_virtual_server.md
new file mode 100644
index 00000000000000..5c7afd2ebde747
--- /dev/null
+++ b/collectors/proc.plugin/integrations/ip_virtual_server.md
@@ -0,0 +1,97 @@
+
+
+# IP Virtual Server
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/ip_vs_stats
+
+
+
+## Overview
+
+This integration monitors IP Virtual Server statistics
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per IP Virtual Server instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipvs.sockets | connections | connections/s |
+| ipvs.packets | received, sent | packets/s |
+| ipvs.net | received, sent | kilobits/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/ipv6_socket_statistics.md b/collectors/proc.plugin/integrations/ipv6_socket_statistics.md
new file mode 100644
index 00000000000000..2c1ee2721a29f0
--- /dev/null
+++ b/collectors/proc.plugin/integrations/ipv6_socket_statistics.md
@@ -0,0 +1,99 @@
+
+
+# IPv6 Socket Statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/sockstat6
+
+
+
+## Overview
+
+This integration provides IPv6 socket statistics.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per IPv6 Socket Statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipv6.sockstat6_tcp_sockets | inuse | sockets |
+| ipv6.sockstat6_udp_sockets | inuse | sockets |
+| ipv6.sockstat6_udplite_sockets | inuse | sockets |
+| ipv6.sockstat6_raw_sockets | inuse | sockets |
+| ipv6.sockstat6_frag_sockets | inuse | fragments |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/kernel_same-page_merging.md b/collectors/proc.plugin/integrations/kernel_same-page_merging.md
new file mode 100644
index 00000000000000..336f0feaf62c4e
--- /dev/null
+++ b/collectors/proc.plugin/integrations/kernel_same-page_merging.md
@@ -0,0 +1,103 @@
+
+
+# Kernel Same-Page Merging
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/kernel/mm/ksm
+
+
+
+## Overview
+
+Kernel Samepage Merging (KSM) is a memory-saving feature in Linux that enables the kernel to examine the
+memory of different processes and identify identical pages. It then merges these identical pages into a
+single page that the processes share. This is particularly useful for virtualization, where multiple virtual
+machines might be running the same operating system or applications and have many identical pages.
+
+The collector provides information about the operation and effectiveness of KSM on your system.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Kernel Same-Page Merging instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.ksm | shared, unshared, sharing, volatile | MiB |
+| mem.ksm_savings | savings, offered | MiB |
+| mem.ksm_ratios | savings | percentage |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/md_raid.md b/collectors/proc.plugin/integrations/md_raid.md
new file mode 100644
index 00000000000000..34a4840bb0d0a3
--- /dev/null
+++ b/collectors/proc.plugin/integrations/md_raid.md
@@ -0,0 +1,125 @@
+
+
+# MD RAID
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/mdstat
+
+
+
+## Overview
+
+This integration monitors the status of MD RAID devices.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per MD RAID instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| md.health | a dimension per md array | failed disks |
+
+### Per md array
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device | TBD |
+| raid_level | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| md.disks | inuse, down | disks |
+| md.mismatch_cnt | count | unsynchronized blocks |
+| md.status | check, resync, recovery, reshape | percent |
+| md.expected_time_until_operation_finish | finish_in | seconds |
+| md.operation_speed | speed | KiB/s |
+| md.nonredundant | available | boolean |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ mdstat_last_collected ](https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf) | md.disks | number of seconds since the last successful data collection |
+| [ mdstat_disks ](https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf) | md.disks | number of devices in the down state for the ${label:device} ${label:raid_level} array. Any number > 0 indicates that the array is degraded. |
+| [ mdstat_mismatch_cnt ](https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf) | md.mismatch_cnt | number of unsynchronized blocks for the ${label:device} ${label:raid_level} array |
+| [ mdstat_nonredundant_last_collected ](https://github.com/netdata/netdata/blob/master/health/health.d/mdstat.conf) | md.nonredundant | number of seconds since the last successful data collection |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/memory_modules_dimms.md b/collectors/proc.plugin/integrations/memory_modules_dimms.md
new file mode 100644
index 00000000000000..351c6fcd72234c
--- /dev/null
+++ b/collectors/proc.plugin/integrations/memory_modules_dimms.md
@@ -0,0 +1,146 @@
+
+
+# Memory modules (DIMMs)
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/devices/system/edac/mc
+
+
+
+## Overview
+
+The Error Detection and Correction (EDAC) subsystem is detecting and reporting errors in the system's memory,
+primarily ECC (Error-Correcting Code) memory errors.
+
+The collector provides data for:
+
+- Per memory controller (MC): correctable and uncorrectable errors. These can be of 2 kinds:
+ - errors related to a DIMM
+ - errors that cannot be associated with a DIMM
+
+- Per memory DIMM: correctable and uncorrectable errors. There are 2 kinds:
+ - memory controllers that can identify the physical DIMMS and report errors directly for them,
+ - memory controllers that report errors for memory address ranges that can be linked to dimms.
+ In this case the DIMMS reported may be more than the physical DIMMS installed.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per memory controller
+
+These metrics refer to the memory controller.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| controller | [mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller. |
+| mc_name | Memory controller type. |
+| size_mb | The amount of memory in megabytes that this memory controller manages. |
+| max_location | Last available memory slot in this memory controller. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.edac_mc | correctable, uncorrectable, correctable_noinfo, uncorrectable_noinfo | errors/s |
+
+### Per memory module
+
+These metrics refer to the memory module (or rank, [depends on the memory controller](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#f5)).
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| controller | [mcX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#mcx-directories) directory name of this memory controller. |
+| dimm | [dimmX or rankX](https://www.kernel.org/doc/html/v5.0/admin-guide/ras.html#dimmx-or-rankx-directories) directory name of this memory module. |
+| dimm_dev_type | Type of DRAM device used in this memory module. For example, x1, x2, x4, x8. |
+| dimm_edac_mode | Used type of error detection and correction. For example, S4ECD4ED would mean a Chipkill with x4 DRAM. |
+| dimm_label | Label assigned to this memory module. |
+| dimm_location | Location of the memory module. |
+| dimm_mem_type | Type of the memory module. |
+| size | The amount of memory in megabytes that this memory module manages. |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.edac_mc | correctable, uncorrectable | errors/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ecc_memory_mc_noinfo_correctable ](https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf) | mem.edac_mc | memory controller ${label:controller} ECC correctable errors (unknown DIMM slot) in the last 10 minutes |
+| [ ecc_memory_mc_noinfo_uncorrectable ](https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf) | mem.edac_mc | memory controller ${label:controller} ECC uncorrectable errors (unknown DIMM slot) in the last 10 minutes |
+| [ ecc_memory_dimm_correctable ](https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf) | mem.edac_mc_dimm | DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC correctable errors in the last 10 minutes |
+| [ ecc_memory_dimm_uncorrectable ](https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf) | mem.edac_mc_dimm | DIMM ${label:dimm} controller ${label:controller} (location ${label:dimm_location}) ECC uncorrectable errors in the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/memory_statistics.md b/collectors/proc.plugin/integrations/memory_statistics.md
new file mode 100644
index 00000000000000..52f1bf530c5a78
--- /dev/null
+++ b/collectors/proc.plugin/integrations/memory_statistics.md
@@ -0,0 +1,138 @@
+
+
+# Memory Statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/vmstat
+
+
+
+## Overview
+
+Linux Virtual memory subsystem.
+
+Information about memory management, indicating how effectively the kernel allocates and frees
+memory resources in response to system demands.
+
+Monitors page faults, which occur when a process requests a portion of its memory that isn't
+immediately available. Monitoring these events can help diagnose inefficiencies in memory management and
+provide insights into application behavior.
+
+Tracks swapping activity — a vital aspect of memory management where the kernel moves data from RAM to
+swap space, and vice versa, based on memory demand and usage. It also monitors the utilization of zswap,
+a compressed cache for swap pages, and provides insights into its usage and performance implications.
+
+In the context of virtualized environments, it tracks the ballooning mechanism which is used to balance
+memory resources between host and guest systems.
+
+For systems using NUMA architecture, it provides insights into the local and remote memory accesses, which
+can impact the performance based on the memory access times.
+
+The collector also watches for 'Out of Memory' kills, a drastic measure taken by the system when it runs out
+of memory resources.
+
+
+
+
+This collector is only supported on the following platforms:
+
+- linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Memory Statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.swapio | in, out | KiB/s |
+| system.pgpgio | in, out | KiB/s |
+| system.pgfaults | minor, major | faults/s |
+| mem.balloon | inflate, deflate, migrate | KiB/s |
+| mem.zswapio | in, out | KiB/s |
+| mem.ksm_cow | swapin, write | KiB/s |
+| mem.thp_faults | alloc, fallback, fallback_charge | events/s |
+| mem.thp_file | alloc, fallback, mapped, fallback_charge | events/s |
+| mem.thp_zero | alloc, failed | events/s |
+| mem.thp_collapse | alloc, failed | events/s |
+| mem.thp_split | split, failed, split_pmd, split_deferred | events/s |
+| mem.thp_swapout | swapout, fallback | events/s |
+| mem.thp_compact | success, fail, stall | events/s |
+| mem.oom_kill | kills | kills/s |
+| mem.numa | local, foreign, interleave, other, pte_updates, huge_pte_updates, hint_faults, hint_faults_local, pages_migrated | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 30min_ram_swapped_out ](https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf) | mem.swapio | percentage of the system RAM swapped in the last 30 minutes |
+| [ oom_kill ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | mem.oom_kill | number of out of memory kills in the last 30 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/memory_usage.md b/collectors/proc.plugin/integrations/memory_usage.md
new file mode 100644
index 00000000000000..141bd29ad637da
--- /dev/null
+++ b/collectors/proc.plugin/integrations/memory_usage.md
@@ -0,0 +1,135 @@
+
+
+# Memory Usage
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/meminfo
+
+
+
+## Overview
+
+`/proc/meminfo` provides detailed information about the system's current memory usage. It includes information
+about different types of memory, RAM, Swap, ZSwap, HugePages, Transparent HugePages (THP), Kernel memory,
+SLAB memory, memory mappings, and more.
+
+Monitoring /proc/meminfo can be useful for:
+
+- **Performance Tuning**: Understanding your system's memory usage can help you make decisions about system
+ tuning and optimization. For example, if your system is frequently low on free memory, it might benefit
+ from more RAM.
+
+- **Troubleshooting**: If your system is experiencing problems, `/proc/meminfo` can provide clues about
+ whether memory usage is a factor. For example, if your system is slow and cached swap is high, it could
+ mean that your system is swapping out a lot of memory to disk, which can degrade performance.
+
+- **Capacity Planning**: By monitoring memory usage over time, you can understand trends and make informed
+ decisions about future capacity needs.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Memory Usage instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ram | free, used, cached, buffers | MiB |
+| mem.available | avail | MiB |
+| mem.swap | free, used | MiB |
+| mem.swap_cached | cached | MiB |
+| mem.zswap | in-ram, on-disk | MiB |
+| mem.hwcorrupt | HardwareCorrupted | MiB |
+| mem.commited | Commited_AS | MiB |
+| mem.writeback | Dirty, Writeback, FuseWriteback, NfsWriteback, Bounce | MiB |
+| mem.kernel | Slab, KernelStack, PageTables, VmallocUsed, Percpu | MiB |
+| mem.slab | reclaimable, unreclaimable | MiB |
+| mem.hugepages | free, used, surplus, reserved | MiB |
+| mem.thp | anonymous, shmem | MiB |
+| mem.thp_details | ShmemPmdMapped, FileHugePages, FilePmdMapped | MiB |
+| mem.reclaiming | Active, Inactive, Active(anon), Inactive(anon), Active(file), Inactive(file), Unevictable, Mlocked | MiB |
+| mem.high_low | high_used, low_used, high_free, low_free | MiB |
+| mem.cma | used, free | MiB |
+| mem.directmaps | 4k, 2m, 4m, 1g | MiB |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ram_in_use ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | system.ram | system memory utilization |
+| [ ram_available ](https://github.com/netdata/netdata/blob/master/health/health.d/ram.conf) | mem.available | percentage of estimated amount of RAM available for userspace processes, without causing swapping |
+| [ used_swap ](https://github.com/netdata/netdata/blob/master/health/health.d/swap.conf) | mem.swap | swap memory utilization |
+| [ 1hour_memory_hw_corrupted ](https://github.com/netdata/netdata/blob/master/health/health.d/memory.conf) | mem.hwcorrupt | amount of memory corrupted due to a hardware failure |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/network_interfaces.md b/collectors/proc.plugin/integrations/network_interfaces.md
new file mode 100644
index 00000000000000..0cfd56faeeb2f7
--- /dev/null
+++ b/collectors/proc.plugin/integrations/network_interfaces.md
@@ -0,0 +1,137 @@
+
+
+# Network interfaces
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/dev
+
+
+
+## Overview
+
+Monitor network interface metrics about bandwidth, state, errors and more.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Network interfaces instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.net | received, sent | kilobits/s |
+
+### Per network device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| interface_type | TBD |
+| device | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| net.net | received, sent | kilobits/s |
+| net.speed | speed | kilobits/s |
+| net.duplex | full, half, unknown | state |
+| net.operstate | up, down, notpresent, lowerlayerdown, testing, dormant, unknown | state |
+| net.carrier | up, down | state |
+| net.mtu | mtu | octets |
+| net.packets | received, sent, multicast | packets/s |
+| net.errors | inbound, outbound | errors/s |
+| net.drops | inbound, outbound | drops/s |
+| net.fifo | receive, transmit | errors |
+| net.compressed | received, sent | packets/s |
+| net.events | frames, collisions, carrier | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ interface_speed ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.net | network interface ${label:device} current speed |
+| [ 1m_received_traffic_overflow ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.net | average inbound utilization for the network interface ${label:device} over the last minute |
+| [ 1m_sent_traffic_overflow ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.net | average outbound utilization for the network interface ${label:device} over the last minute |
+| [ inbound_packets_dropped_ratio ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.drops | ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes |
+| [ outbound_packets_dropped_ratio ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.drops | ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes |
+| [ wifi_inbound_packets_dropped_ratio ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.drops | ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes |
+| [ wifi_outbound_packets_dropped_ratio ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.drops | ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes |
+| [ 1m_received_packets_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.packets | average number of packets received by the network interface ${label:device} over the last minute |
+| [ 10s_received_packets_storm ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.packets | ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute |
+| [ 10min_fifo_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/net.conf) | net.fifo | number of FIFO errors for the network interface ${label:device} in the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/network_statistics.md b/collectors/proc.plugin/integrations/network_statistics.md
new file mode 100644
index 00000000000000..726fd9d6145c54
--- /dev/null
+++ b/collectors/proc.plugin/integrations/network_statistics.md
@@ -0,0 +1,161 @@
+
+
+# Network statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/netstat
+
+
+
+## Overview
+
+This integration provides metrics from the `netstat`, `snmp` and `snmp6` modules.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Network statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.ip | received, sent | kilobits/s |
+| ip.tcpmemorypressures | pressures | events/s |
+| ip.tcpconnaborts | baddata, userclosed, nomemory, timeout, linger, failed | connections/s |
+| ip.tcpreorders | timestamp, sack, fack, reno | packets/s |
+| ip.tcpofo | inqueue, dropped, merged, pruned | packets/s |
+| ip.tcpsyncookies | received, sent, failed | packets/s |
+| ip.tcp_syn_queue | drops, cookies | packets/s |
+| ip.tcp_accept_queue | overflows, drops | packets/s |
+| ip.tcpsock | connections | active connections |
+| ip.tcppackets | received, sent | packets/s |
+| ip.tcperrors | InErrs, InCsumErrors, RetransSegs | packets/s |
+| ip.tcpopens | active, passive | connections/s |
+| ip.tcphandshake | EstabResets, OutRsts, AttemptFails, SynRetrans | events/s |
+| ipv4.packets | received, sent, forwarded, delivered | packets/s |
+| ipv4.errors | InDiscards, OutDiscards, InNoRoutes, OutNoRoutes, InHdrErrors, InAddrErrors, InTruncatedPkts, InCsumErrors | packets/s |
+| ipc4.bcast | received, sent | kilobits/s |
+| ipv4.bcastpkts | received, sent | packets/s |
+| ipv4.mcast | received, sent | kilobits/s |
+| ipv4.mcastpkts | received, sent | packets/s |
+| ipv4.icmp | received, sent | packets/s |
+| ipv4.icmpmsg | InEchoReps, OutEchoReps, InDestUnreachs, OutDestUnreachs, InRedirects, OutRedirects, InEchos, OutEchos, InRouterAdvert, OutRouterAdvert, InRouterSelect, OutRouterSelect, InTimeExcds, OutTimeExcds, InParmProbs, OutParmProbs, InTimestamps, OutTimestamps, InTimestampReps, OutTimestampReps | packets/s |
+| ipv4.icmp_errors | InErrors, OutErrors, InCsumErrors | packets/s |
+| ipv4.udppackets | received, sent | packets/s |
+| ipv4.udperrors | RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti | events/s |
+| ipv4.udplite | received, sent | packets/s |
+| ipv4.udplite_errors | RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti | packets/s |
+| ipv4.ecnpkts | CEP, NoECTP, ECTP0, ECTP1 | packets/s |
+| ipv4.fragsin | ok, failed, all | packets/s |
+| ipv4.fragsout | ok, failed, created | packets/s |
+| system.ipv6 | received, sent | kilobits/s |
+| ipv6.packets | received, sent, forwarded, delivers | packets/s |
+| ipv6.errors | InDiscards, OutDiscards, InHdrErrors, InAddrErrors, InUnknownProtos, InTooBigErrors, InTruncatedPkts, InNoRoutes, OutNoRoutes | packets/s |
+| ipv6.bcast | received, sent | kilobits/s |
+| ipv6.mcast | received, sent | kilobits/s |
+| ipv6.mcastpkts | received, sent | packets/s |
+| ipv6.udppackets | received, sent | packets/s |
+| ipv6.udperrors | RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors, IgnoredMulti | events/s |
+| ipv6.udplitepackets | received, sent | packets/s |
+| ipv6.udpliteerrors | RcvbufErrors, SndbufErrors, InErrors, NoPorts, InCsumErrors | events/s |
+| ipv6.icmp | received, sent | messages/s |
+| ipv6.icmpredir | received, sent | redirects/s |
+| ipv6.icmperrors | InErrors, OutErrors, InCsumErrors, InDestUnreachs, InPktTooBigs, InTimeExcds, InParmProblems, OutDestUnreachs, OutPktTooBigs, OutTimeExcds, OutParmProblems | errors/s |
+| ipv6.icmpechos | InEchos, OutEchos, InEchoReplies, OutEchoReplies | messages/s |
+| ipv6.groupmemb | InQueries, OutQueries, InResponses, OutResponses, InReductions, OutReductions | messages/s |
+| ipv6.icmprouter | InSolicits, OutSolicits, InAdvertisements, OutAdvertisements | messages/s |
+| ipv6.icmpneighbor | InSolicits, OutSolicits, InAdvertisements, OutAdvertisements | messages/s |
+| ipv6.icmpmldv2 | received, sent | reports/s |
+| ipv6.icmptypes | InType1, InType128, InType129, InType136, OutType1, OutType128, OutType129, OutType133, OutType135, OutType143 | messages/s |
+| ipv6.ect | InNoECTPkts, InECT1Pkts, InECT0Pkts, InCEPkts | packets/s |
+| ipv6.ect | InNoECTPkts, InECT1Pkts, InECT0Pkts, InCEPkts | packets/s |
+| ipv6.fragsin | ok, failed, timeout, all | packets/s |
+| ipv6.fragsout | ok, failed, all | packets/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 1m_tcp_syn_queue_drops ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf) | ip.tcp_syn_queue | average number of SYN requests was dropped due to the full TCP SYN queue over the last minute (SYN cookies were not enabled) |
+| [ 1m_tcp_syn_queue_cookies ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf) | ip.tcp_syn_queue | average number of sent SYN cookies due to the full TCP SYN queue over the last minute |
+| [ 1m_tcp_accept_queue_overflows ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf) | ip.tcp_accept_queue | average number of overflows in the TCP accept queue over the last minute |
+| [ 1m_tcp_accept_queue_drops ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_listen.conf) | ip.tcp_accept_queue | average number of dropped packets in the TCP accept queue over the last minute |
+| [ tcp_connections ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf) | ip.tcpsock | TCP connections utilization |
+| [ 1m_ip_tcp_resets_sent ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ip.tcphandshake | average number of sent TCP RESETS over the last minute |
+| [ 10s_ip_tcp_resets_sent ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ip.tcphandshake | average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has crashed. Netdata will not send a clear notification for this alarm. |
+| [ 1m_ip_tcp_resets_received ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ip.tcphandshake | average number of received TCP RESETS over the last minute |
+| [ 10s_ip_tcp_resets_received ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf) | ip.tcphandshake | average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed. Netdata will not send a clear notification for this alarm. |
+| [ 1m_ipv4_udp_receive_buffer_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf) | ipv4.udperrors | average number of UDP receive buffer errors over the last minute |
+| [ 1m_ipv4_udp_send_buffer_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/udp_errors.conf) | ipv4.udperrors | average number of UDP send buffer errors over the last minute |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/nfs_client.md b/collectors/proc.plugin/integrations/nfs_client.md
new file mode 100644
index 00000000000000..db584771483bd8
--- /dev/null
+++ b/collectors/proc.plugin/integrations/nfs_client.md
@@ -0,0 +1,99 @@
+
+
+# NFS Client
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/rpc/nfs
+
+
+
+## Overview
+
+This integration provides statistics from the Linux kernel's NFS Client.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per NFS Client instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| nfs.net | udp, tcp | operations/s |
+| nfs.rpc | calls, retransmits, auth_refresh | calls/s |
+| nfs.proc2 | a dimension per proc2 call | calls/s |
+| nfs.proc3 | a dimension per proc3 call | calls/s |
+| nfs.proc4 | a dimension per proc4 call | calls/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/nfs_server.md b/collectors/proc.plugin/integrations/nfs_server.md
new file mode 100644
index 00000000000000..0c906b4d8610de
--- /dev/null
+++ b/collectors/proc.plugin/integrations/nfs_server.md
@@ -0,0 +1,104 @@
+
+
+# NFS Server
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/rpc/nfsd
+
+
+
+## Overview
+
+This integration provides statistics from the Linux kernel's NFS Server.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per NFS Server instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| nfsd.readcache | hits, misses, nocache | reads/s |
+| nfsd.filehandles | stale | handles/s |
+| nfsd.io | read, write | kilobytes/s |
+| nfsd.threads | threads | threads |
+| nfsd.net | udp, tcp | packets/s |
+| nfsd.rpc | calls, bad_format, bad_auth | calls/s |
+| nfsd.proc2 | a dimension per proc2 call | calls/s |
+| nfsd.proc3 | a dimension per proc3 call | calls/s |
+| nfsd.proc4 | a dimension per proc4 call | calls/s |
+| nfsd.proc4ops | a dimension per proc4 operation | operations/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/non-uniform_memory_access.md b/collectors/proc.plugin/integrations/non-uniform_memory_access.md
new file mode 100644
index 00000000000000..6f495fb7905392
--- /dev/null
+++ b/collectors/proc.plugin/integrations/non-uniform_memory_access.md
@@ -0,0 +1,111 @@
+
+
+# Non-Uniform Memory Access
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/devices/system/node
+
+
+
+## Overview
+
+Information about NUMA (Non-Uniform Memory Access) nodes on the system.
+
+NUMA is a method of configuring a cluster of microprocessor in a multiprocessing system so that they can
+share memory locally, improving performance and the ability of the system to be expanded. NUMA is used in a
+symmetric multiprocessing (SMP) system.
+
+In a NUMA system, processors, memory, and I/O devices are grouped together into cells, also known as nodes.
+Each node has its own memory and set of I/O devices, and one or more processors. While a processor can access
+memory in any of the nodes, it does so faster when accessing memory within its own node.
+
+The collector provides statistics on memory allocations for processes running on the NUMA nodes, revealing the
+efficiency of memory allocations in multi-node systems.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per numa node
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| numa_node | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.numa_nodes | hit, miss, local, foreign, interleave, other | events/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/page_types.md b/collectors/proc.plugin/integrations/page_types.md
new file mode 100644
index 00000000000000..b228629b6e3e7c
--- /dev/null
+++ b/collectors/proc.plugin/integrations/page_types.md
@@ -0,0 +1,113 @@
+
+
+# Page types
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/pagetypeinfo
+
+
+
+## Overview
+
+This integration provides metrics about the system's memory page types
+
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Page types instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.pagetype_global | a dimension per pagesize | B |
+
+### Per node, zone, type
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| node_id | TBD |
+| node_zone | TBD |
+| node_type | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.pagetype | a dimension per pagesize | B |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/power_supply.md b/collectors/proc.plugin/integrations/power_supply.md
new file mode 100644
index 00000000000000..9a474e82a39354
--- /dev/null
+++ b/collectors/proc.plugin/integrations/power_supply.md
@@ -0,0 +1,107 @@
+
+
+# Power Supply
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/class/power_supply
+
+
+
+## Overview
+
+This integration monitors Power supply metrics, such as battery status, AC power status and more.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per power device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| powersupply.capacity | capacity | percentage |
+| powersupply.charge | empty_design, empty, now, full, full_design | Ah |
+| powersupply.energy | empty_design, empty, now, full, full_design | Wh |
+| powersupply.voltage | min_design, min, now, max, max_design | V |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ linux_power_supply_capacity ](https://github.com/netdata/netdata/blob/master/health/health.d/linux_power_supply.conf) | powersupply.capacity | percentage of remaining power supply capacity |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/pressure_stall_information.md b/collectors/proc.plugin/integrations/pressure_stall_information.md
new file mode 100644
index 00000000000000..53f4aa0508e491
--- /dev/null
+++ b/collectors/proc.plugin/integrations/pressure_stall_information.md
@@ -0,0 +1,129 @@
+
+
+# Pressure Stall Information
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/pressure
+
+
+
+## Overview
+
+Introduced in Linux kernel 4.20, `/proc/pressure` provides information about system pressure stall information
+(PSI). PSI is a feature that allows the system to track the amount of time the system is stalled due to
+resource contention, such as CPU, memory, or I/O.
+
+The collectors monitored 3 separate files for CPU, memory, and I/O:
+
+- **cpu**: Tracks the amount of time tasks are stalled due to CPU contention.
+- **memory**: Tracks the amount of time tasks are stalled due to memory contention.
+- **io**: Tracks the amount of time tasks are stalled due to I/O contention.
+- **irq**: Tracks the amount of time tasks are stalled due to IRQ contention.
+
+Each of them provides metrics for stall time over the last 10 seconds, 1 minute, 5 minutes, and 15 minutes.
+
+Monitoring the /proc/pressure files can provide important insights into system performance and capacity planning:
+
+- **Identifying resource contention**: If these metrics are consistently high, it indicates that tasks are
+ frequently being stalled due to lack of resources, which can significantly degrade system performance.
+
+- **Troubleshooting performance issues**: If a system is experiencing performance issues, these metrics can
+ help identify whether resource contention is the cause.
+
+- **Capacity planning**: By monitoring these metrics over time, you can understand trends in resource
+ utilization and make informed decisions about when to add more resources to your system.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Pressure Stall Information instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.cpu_some_pressure | some10, some60, some300 | percentage |
+| system.cpu_some_pressure_stall_time | time | ms |
+| system.cpu_full_pressure | some10, some60, some300 | percentage |
+| system.cpu_full_pressure_stall_time | time | ms |
+| system.memory_some_pressure | some10, some60, some300 | percentage |
+| system.memory_some_pressure_stall_time | time | ms |
+| system.memory_full_pressure | some10, some60, some300 | percentage |
+| system.memory_full_pressure_stall_time | time | ms |
+| system.io_some_pressure | some10, some60, some300 | percentage |
+| system.io_some_pressure_stall_time | time | ms |
+| system.io_full_pressure | some10, some60, some300 | percentage |
+| system.io_full_pressure_stall_time | time | ms |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/sctp_statistics.md b/collectors/proc.plugin/integrations/sctp_statistics.md
new file mode 100644
index 00000000000000..15c0d424d0b249
--- /dev/null
+++ b/collectors/proc.plugin/integrations/sctp_statistics.md
@@ -0,0 +1,99 @@
+
+
+# SCTP Statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/sctp/snmp
+
+
+
+## Overview
+
+This integration provides statistics about the Stream Control Transmission Protocol (SCTP).
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per SCTP Statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| sctp.established | established | associations |
+| sctp.transitions | active, passive, aborted, shutdown | transitions/s |
+| sctp.packets | received, sent | packets/s |
+| sctp.packet_errors | invalid, checksum | packets/s |
+| sctp.fragmentation | reassembled, fragmented | packets/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/socket_statistics.md b/collectors/proc.plugin/integrations/socket_statistics.md
new file mode 100644
index 00000000000000..d8ef26647c7aac
--- /dev/null
+++ b/collectors/proc.plugin/integrations/socket_statistics.md
@@ -0,0 +1,109 @@
+
+
+# Socket statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/sockstat
+
+
+
+## Overview
+
+This integration provides socket statistics.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Socket statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ip.sockstat_sockets | used | sockets |
+| ipv4.sockstat_tcp_sockets | alloc, orphan, inuse, timewait | sockets |
+| ipv4.sockstat_tcp_mem | mem | KiB |
+| ipv4.sockstat_udp_sockets | inuse | sockets |
+| ipv4.sockstat_udp_mem | mem | sockets |
+| ipv4.sockstat_udplite_sockets | inuse | sockets |
+| ipv4.sockstat_raw_sockets | inuse | sockets |
+| ipv4.sockstat_frag_sockets | inuse | fragments |
+| ipv4.sockstat_frag_mem | mem | KiB |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ tcp_orphans ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_orphans.conf) | ipv4.sockstat_tcp_sockets | orphan IPv4 TCP sockets utilization |
+| [ tcp_memory ](https://github.com/netdata/netdata/blob/master/health/health.d/tcp_mem.conf) | ipv4.sockstat_tcp_mem | TCP memory utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/softirq_statistics.md b/collectors/proc.plugin/integrations/softirq_statistics.md
new file mode 100644
index 00000000000000..f966cf9714d993
--- /dev/null
+++ b/collectors/proc.plugin/integrations/softirq_statistics.md
@@ -0,0 +1,133 @@
+
+
+# SoftIRQ statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/softirqs
+
+
+
+## Overview
+
+In the Linux kernel, handling of hardware interrupts is split into two halves: the top half and the bottom half.
+The top half is the routine that responds immediately to an interrupt, while the bottom half is deferred to be processed later.
+
+Softirqs are a mechanism in the Linux kernel used to handle the bottom halves of interrupts, which can be
+deferred and processed later in a context where it's safe to enable interrupts.
+
+The actual work of handling the interrupt is offloaded to a softirq and executed later when the system
+decides it's a good time to process them. This helps to keep the system responsive by not blocking the top
+half for too long, which could lead to missed interrupts.
+
+Monitoring `/proc/softirqs` is useful for:
+
+- **Performance tuning**: A high rate of softirqs could indicate a performance issue. For instance, a high
+ rate of network softirqs (`NET_RX` and `NET_TX`) could indicate a network performance issue.
+
+- **Troubleshooting**: If a system is behaving unexpectedly, checking the softirqs could provide clues about
+ what is going on. For example, a sudden increase in block device softirqs (BLOCK) might indicate a problem
+ with a disk.
+
+- **Understanding system behavior**: Knowing what types of softirqs are happening can help you understand what
+ your system is doing, particularly in terms of how it's interacting with hardware and how it's handling
+ interrupts.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per SoftIRQ statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.softirqs | a dimension per softirq | softirqs/s |
+
+### Per cpu core
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| cpu | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.softirqs | a dimension per softirq | softirqs/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/softnet_statistics.md b/collectors/proc.plugin/integrations/softnet_statistics.md
new file mode 100644
index 00000000000000..58e6cf6e558568
--- /dev/null
+++ b/collectors/proc.plugin/integrations/softnet_statistics.md
@@ -0,0 +1,135 @@
+
+
+# Softnet Statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/softnet_stat
+
+
+
+## Overview
+
+`/proc/net/softnet_stat` provides statistics that relate to the handling of network packets by softirq.
+
+It provides information about:
+
+- Total number of processed packets (`processed`).
+- Times ksoftirq ran out of quota (`dropped`).
+- Times net_rx_action was rescheduled.
+- Number of times processed all lists before quota.
+- Number of times did not process all lists due to quota.
+- Number of times net_rx_action was rescheduled for GRO (Generic Receive Offload) cells.
+- Number of times GRO cells were processed.
+
+Monitoring the /proc/net/softnet_stat file can be useful for:
+
+- **Network performance monitoring**: By tracking the total number of processed packets and how many packets
+ were dropped, you can gain insights into your system's network performance.
+
+- **Troubleshooting**: If you're experiencing network-related issues, this collector can provide valuable clues.
+ For instance, a high number of dropped packets may indicate a network problem.
+
+- **Capacity planning**: If your system is consistently processing near its maximum capacity of network
+ packets, it might be time to consider upgrading your network infrastructure.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Softnet Statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.softnet_stat | processed, dropped, squeezed, received_rps, flow_limit_count | events/s |
+
+### Per cpu core
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.softnet_stat | processed, dropped, squeezed, received_rps, flow_limit_count | events/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 1min_netdev_backlog_exceeded ](https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf) | system.softnet_stat | average number of dropped packets in the last minute due to exceeded net.core.netdev_max_backlog |
+| [ 1min_netdev_budget_ran_outs ](https://github.com/netdata/netdata/blob/master/health/health.d/softnet.conf) | system.softnet_stat | average number of times ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs with work remaining over the last minute (this can be a cause for dropped packets) |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/synproxy.md b/collectors/proc.plugin/integrations/synproxy.md
new file mode 100644
index 00000000000000..2db17ef6fec1a8
--- /dev/null
+++ b/collectors/proc.plugin/integrations/synproxy.md
@@ -0,0 +1,97 @@
+
+
+# Synproxy
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/stat/synproxy
+
+
+
+## Overview
+
+This integration provides statistics about the Synproxy netfilter module.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Synproxy instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| netfilter.synproxy_syn_received | received | packets/s |
+| netfilter.synproxy_conn_reopened | reopened | connections/s |
+| netfilter.synproxy_cookies | valid, invalid, retransmits | cookies/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/system_load_average.md b/collectors/proc.plugin/integrations/system_load_average.md
new file mode 100644
index 00000000000000..6e986d90c4a2da
--- /dev/null
+++ b/collectors/proc.plugin/integrations/system_load_average.md
@@ -0,0 +1,128 @@
+
+
+# System Load Average
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/loadavg
+
+
+
+## Overview
+
+The `/proc/loadavg` file provides information about the system load average.
+
+The load average is a measure of the amount of computational work that a system performs. It is a
+representation of the average system load over a period of time.
+
+This file contains three numbers representing the system load averages for the last 1, 5, and 15 minutes,
+respectively. It also includes the currently running processes and the total number of processes.
+
+Monitoring the load average can be used for:
+
+- **System performance**: If the load average is too high, it may indicate that your system is overloaded.
+ On a system with a single CPU, if the load average is 1, it means the single CPU is fully utilized. If the
+ load averages are consistently higher than the number of CPUs/cores, it may indicate that your system is
+ overloaded and tasks are waiting for CPU time.
+
+- **Troubleshooting**: If the load average is unexpectedly high, it can be a sign of a problem. This could be
+ due to a runaway process, a software bug, or a hardware issue.
+
+- **Capacity planning**: By monitoring the load average over time, you can understand the trends in your
+ system's workload. This can help with capacity planning and scaling decisions.
+
+Remember that load average not only considers CPU usage, but also includes processes waiting for disk I/O.
+Therefore, high load averages could be due to I/O contention as well as CPU contention.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per System Load Average instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.load | load1, load5, load15 | load |
+| system.active_processes | active | processes |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ load_cpu_number ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | number of active CPU cores in the system |
+| [ load_average_15 ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | system fifteen-minute load average |
+| [ load_average_5 ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | system five-minute load average |
+| [ load_average_1 ](https://github.com/netdata/netdata/blob/master/health/health.d/load.conf) | system.load | system one-minute load average |
+| [ active_processes ](https://github.com/netdata/netdata/blob/master/health/health.d/processes.conf) | system.active_processes | system process IDs (PID) space utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/system_statistics.md b/collectors/proc.plugin/integrations/system_statistics.md
new file mode 100644
index 00000000000000..f3df1a19aa9c76
--- /dev/null
+++ b/collectors/proc.plugin/integrations/system_statistics.md
@@ -0,0 +1,169 @@
+
+
+# System statistics
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/stat
+
+
+
+## Overview
+
+CPU utilization, states and frequencies and key Linux system performance metrics.
+
+The `/proc/stat` file provides various types of system statistics:
+
+- The overall system CPU usage statistics
+- Per CPU core statistics
+- The total context switching of the system
+- The total number of processes running
+- The total CPU interrupts
+- The total CPU softirqs
+
+The collector also reads:
+
+- `/proc/schedstat` for statistics about the process scheduler in the Linux kernel.
+- `/sys/devices/system/cpu/[X]/thermal_throttle/core_throttle_count` to get the count of thermal throttling events for a specific CPU core on Linux systems.
+- `/sys/devices/system/cpu/[X]/thermal_throttle/package_throttle_count` to get the count of thermal throttling events for a specific CPU package on a Linux system.
+- `/sys/devices/system/cpu/[X]/cpufreq/scaling_cur_freq` to get the current operating frequency of a specific CPU core.
+- `/sys/devices/system/cpu/[X]/cpufreq/stats/time_in_state` to get the amount of time the CPU has spent in each of its available frequency states.
+- `/sys/devices/system/cpu/[X]/cpuidle/state[X]/name` to get the names of the idle states for each CPU core in a Linux system.
+- `/sys/devices/system/cpu/[X]/cpuidle/state[X]/time` to get the total time each specific CPU core has spent in each idle state since the system was started.
+
+
+
+
+This collector is only supported on the following platforms:
+
+- linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The collector auto-detects all metrics. No configuration is needed.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The collector disables cpu frequency and idle state monitoring when there are more than 128 CPU cores available.
+
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per System statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.cpu | guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle | percentage |
+| system.intr | interrupts | interrupts/s |
+| system.ctxt | switches | context switches/s |
+| system.forks | started | processes/s |
+| system.processes | running, blocked | processes |
+| cpu.core_throttling | a dimension per cpu core | events/s |
+| cpu.package_throttling | a dimension per package | events/s |
+| cpu.cpufreq | a dimension per cpu core | MHz |
+
+### Per cpu core
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| cpu | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| cpu.cpu | guest_nice, guest, steal, softirq, irq, user, system, nice, iowait, idle | percentage |
+| cpuidle.cpu_cstate_residency_time | a dimension per c-state | percentage |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ 10min_cpu_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU utilization over the last 10 minutes (excluding iowait, nice and steal) |
+| [ 10min_cpu_iowait ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU iowait time over the last 10 minutes |
+| [ 20min_steal_cpu ](https://github.com/netdata/netdata/blob/master/health/health.d/cpu.conf) | system.cpu | average CPU steal time over the last 20 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `plugin:proc:/proc/stat` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/system_uptime.md b/collectors/proc.plugin/integrations/system_uptime.md
new file mode 100644
index 00000000000000..0954c0642ed658
--- /dev/null
+++ b/collectors/proc.plugin/integrations/system_uptime.md
@@ -0,0 +1,108 @@
+
+
+# System Uptime
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/uptime
+
+
+
+## Overview
+
+The amount of time the system has been up (running).
+
+Uptime is a critical aspect of overall system performance:
+
+- **Availability**: Uptime monitoring can show whether a server is consistently available or experiences frequent downtimes.
+- **Performance Monitoring**: While server uptime alone doesn't provide detailed performance data, analyzing the duration and frequency of downtimes can help identify patterns or trends.
+- **Proactive problem detection**: If server uptime monitoring reveals unexpected downtimes or a decreasing uptime trend, it can serve as an early warning sign of potential problems.
+- **Root cause analysis**: When investigating server downtime, the uptime metric alone may not provide enough information to pinpoint the exact cause.
+- **Load balancing**: Uptime data can indirectly indicate load balancing issues if certain servers have significantly lower uptimes than others.
+- **Optimize maintenance efforts**: Servers with consistently low uptimes or frequent downtimes may require more attention.
+- **Compliance requirements**: Server uptime data can be used to demonstrate compliance with regulatory requirements or SLAs that mandate a minimum level of server availability.
+
+
+
+
+This collector is only supported on the following platforms:
+
+- linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per System Uptime instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.uptime | uptime | seconds |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/wireless_network_interfaces.md b/collectors/proc.plugin/integrations/wireless_network_interfaces.md
new file mode 100644
index 00000000000000..a8d2406ee7b5a7
--- /dev/null
+++ b/collectors/proc.plugin/integrations/wireless_network_interfaces.md
@@ -0,0 +1,100 @@
+
+
+# Wireless network interfaces
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/net/wireless
+
+
+
+## Overview
+
+Monitor wireless devices with metrics about status, link quality, signal level, noise level and more.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per wireless device
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| wireless.status | status | status |
+| wireless.link_quality | link_quality | value |
+| wireless.signal_level | signal_level | dBm |
+| wireless.noise_level | noise_level | dBm |
+| wireless.discarded_packets | nwid, crypt, frag, retry, misc | packets/s |
+| wireless.missed_beacons | missed_beacons | frames/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md b/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md
new file mode 100644
index 00000000000000..c200ba673bb0df
--- /dev/null
+++ b/collectors/proc.plugin/integrations/zfs_adaptive_replacement_cache.md
@@ -0,0 +1,125 @@
+
+
+# ZFS Adaptive Replacement Cache
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/spl/kstat/zfs/arcstats
+
+
+
+## Overview
+
+This integration monitors ZFS Adadptive Replacement Cache (ARC) statistics.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per ZFS Adaptive Replacement Cache instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| zfs.arc_size | arcsz, target, min, max | MiB |
+| zfs.l2_size | actual, size | MiB |
+| zfs.reads | arc, demand, prefetch, metadata, l2 | reads/s |
+| zfs.bytes | read, write | KiB/s |
+| zfs.hits | hits, misses | percentage |
+| zfs.hits_rate | hits, misses | events/s |
+| zfs.dhits | hits, misses | percentage |
+| zfs.dhits_rate | hits, misses | events/s |
+| zfs.phits | hits, misses | percentage |
+| zfs.phits_rate | hits, misses | events/s |
+| zfs.mhits | hits, misses | percentage |
+| zfs.mhits_rate | hits, misses | events/s |
+| zfs.l2hits | hits, misses | percentage |
+| zfs.l2hits_rate | hits, misses | events/s |
+| zfs.list_hits | mfu, mfu_ghost, mru, mru_ghost | hits/s |
+| zfs.arc_size_breakdown | recent, frequent | percentage |
+| zfs.memory_ops | direct, throttled, indirect | operations/s |
+| zfs.important_ops | evict_skip, deleted, mutex_miss, hash_collisions | operations/s |
+| zfs.actual_hits | hits, misses | percentage |
+| zfs.actual_hits_rate | hits, misses | events/s |
+| zfs.demand_data_hits | hits, misses | percentage |
+| zfs.demand_data_hits_rate | hits, misses | events/s |
+| zfs.prefetch_data_hits | hits, misses | percentage |
+| zfs.prefetch_data_hits_rate | hits, misses | events/s |
+| zfs.hash_elements | current, max | elements |
+| zfs.hash_chains | current, max | chains |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ zfs_memory_throttle ](https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf) | zfs.memory_ops | number of times ZFS had to limit the ARC growth in the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/zfs_pools.md b/collectors/proc.plugin/integrations/zfs_pools.md
new file mode 100644
index 00000000000000..2985d39b066f02
--- /dev/null
+++ b/collectors/proc.plugin/integrations/zfs_pools.md
@@ -0,0 +1,105 @@
+
+
+# ZFS Pools
+
+
+
+
+
+Plugin: proc.plugin
+Module: /proc/spl/kstat/zfs
+
+
+
+## Overview
+
+This integration provides metrics about the state of ZFS pools.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per zfs pool
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| pool | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| zfspool.state | online, degraded, faulted, offline, removed, unavail, suspended | boolean |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ zfs_pool_state_warn ](https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf) | zfspool.state | ZFS pool ${label:pool} state is degraded |
+| [ zfs_pool_state_crit ](https://github.com/netdata/netdata/blob/master/health/health.d/zfs.conf) | zfspool.state | ZFS pool ${label:pool} state is faulted or unavail |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/integrations/zram.md b/collectors/proc.plugin/integrations/zram.md
new file mode 100644
index 00000000000000..111b17c6283c4f
--- /dev/null
+++ b/collectors/proc.plugin/integrations/zram.md
@@ -0,0 +1,106 @@
+
+
+# ZRAM
+
+
+
+
+
+Plugin: proc.plugin
+Module: /sys/block/zram
+
+
+
+## Overview
+
+zRAM, or compressed RAM, is a block device that uses a portion of your system's RAM as a block device.
+The data written to this block device is compressed and stored in memory.
+
+The collectors provides information about the operation and the effectiveness of zRAM on your system.
+
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per zram device
+
+
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device | TBD |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.zram_usage | compressed, metadata | MiB |
+| mem.zram_savings | savings, original | MiB |
+| mem.zram_ratio | ratio | ratio |
+| mem.zram_efficiency | percent | percentage |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+
+
+There are no configuration options.
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/proc.plugin/ipc.c b/collectors/proc.plugin/ipc.c
index b166deba68515a..204977bdf42c70 100644
--- a/collectors/proc.plugin/ipc.c
+++ b/collectors/proc.plugin/ipc.c
@@ -451,8 +451,8 @@ int do_ipc(int update_every, usec_t dt) {
msq->found = 0;
}
else {
- rrddim_is_obsolete(st_msq_messages, msq->rd_messages);
- rrddim_is_obsolete(st_msq_bytes, msq->rd_bytes);
+ rrddim_is_obsolete___safe_from_collector_thread(st_msq_messages, msq->rd_messages);
+ rrddim_is_obsolete___safe_from_collector_thread(st_msq_bytes, msq->rd_bytes);
// remove message queue from the linked list
if(!msq_prev)
@@ -480,19 +480,19 @@ int do_ipc(int update_every, usec_t dt) {
if(unlikely(dimensions_num > dimensions_limit)) {
collector_info("Message queue statistics has been disabled");
collector_info("There are %lld dimensions in memory but limit was set to %lld", dimensions_num, dimensions_limit);
- rrdset_is_obsolete(st_msq_messages);
- rrdset_is_obsolete(st_msq_bytes);
+ rrdset_is_obsolete___safe_from_collector_thread(st_msq_messages);
+ rrdset_is_obsolete___safe_from_collector_thread(st_msq_bytes);
st_msq_messages = NULL;
st_msq_bytes = NULL;
do_msg = CONFIG_BOOLEAN_NO;
}
else if(unlikely(!message_queue_root)) {
collector_info("Making chart %s (%s) obsolete since it does not have any dimensions", rrdset_name(st_msq_messages), rrdset_id(st_msq_messages));
- rrdset_is_obsolete(st_msq_messages);
+ rrdset_is_obsolete___safe_from_collector_thread(st_msq_messages);
st_msq_messages = NULL;
collector_info("Making chart %s (%s) obsolete since it does not have any dimensions", rrdset_name(st_msq_bytes), rrdset_id(st_msq_bytes));
- rrdset_is_obsolete(st_msq_bytes);
+ rrdset_is_obsolete___safe_from_collector_thread(st_msq_bytes);
st_msq_bytes = NULL;
}
}
diff --git a/collectors/proc.plugin/metadata.yaml b/collectors/proc.plugin/metadata.yaml
index 81d83f50e5a8b9..45351b36f458ba 100644
--- a/collectors/proc.plugin/metadata.yaml
+++ b/collectors/proc.plugin/metadata.yaml
@@ -2643,22 +2643,22 @@ modules:
os: "linux"
- name: inbound_packets_dropped_ratio
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
+ metric: net.drops
info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
os: "linux"
- name: outbound_packets_dropped_ratio
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
+ metric: net.drops
info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
os: "linux"
- name: wifi_inbound_packets_dropped_ratio
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
+ metric: net.drops
info: ratio of inbound dropped packets for the network interface ${label:device} over the last 10 minutes
os: "linux"
- name: wifi_outbound_packets_dropped_ratio
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.packets
+ metric: net.drops
info: ratio of outbound dropped packets for the network interface ${label:device} over the last 10 minutes
os: "linux"
- name: 1m_received_packets_rate
@@ -2669,20 +2669,8 @@ modules:
- name: 10s_received_packets_storm
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.packets
- info:
- ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over
- the last minute
+ info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, compared to the rate over the last minute
os: "linux freebsd"
- - name: inbound_packets_dropped
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.drops
- info: number of inbound dropped packets for the network interface ${label:device} in the last 10 minutes
- os: "linux"
- - name: outbound_packets_dropped
- link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
- metric: net.drops
- info: number of outbound dropped packets for the network interface ${label:device} in the last 10 minutes
- os: "linux"
- name: 10min_fifo_errors
link: https://github.com/netdata/netdata/blob/master/health/health.d/net.conf
metric: net.fifo
@@ -3140,29 +3128,29 @@ modules:
os: "linux"
- name: tcp_connections
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_conn.conf
- metric: ipv4.tcpsock
- info: IPv4 TCP connections utilization
+ metric: ip.tcpsock
+ info: TCP connections utilization
os: "linux"
- - name: 1m_ipv4_tcp_resets_sent
+ - name: 1m_ip_tcp_resets_sent
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
- metric: ipv4.tcphandshake
+ metric: ip.tcphandshake
info: average number of sent TCP RESETS over the last minute
os: "linux"
- - name: 10s_ipv4_tcp_resets_sent
+ - name: 10s_ip_tcp_resets_sent
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
- metric: ipv4.tcphandshake
+ metric: ip.tcphandshake
info:
average number of sent TCP RESETS over the last 10 seconds. This can indicate a port scan, or that a service running on this host has
crashed. Netdata will not send a clear notification for this alarm.
os: "linux"
- - name: 1m_ipv4_tcp_resets_received
+ - name: 1m_ip_tcp_resets_received
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
- metric: ipv4.tcphandshake
+ metric: ip.tcphandshake
info: average number of received TCP RESETS over the last minute
os: "linux freebsd"
- - name: 10s_ipv4_tcp_resets_received
+ - name: 10s_ip_tcp_resets_received
link: https://github.com/netdata/netdata/blob/master/health/health.d/tcp_resets.conf
- metric: ipv4.tcphandshake
+ metric: ip.tcphandshake
info:
average number of received TCP RESETS over the last 10 seconds. This can be an indication that a service this host needs has crashed.
Netdata will not send a clear notification for this alarm.
@@ -3189,57 +3177,12 @@ modules:
labels: []
metrics:
- name: system.ip
- description: IP Bandwidth
+ description: IPv4 Bandwidth
unit: "kilobits/s"
chart_type: area
dimensions:
- name: received
- name: sent
- - name: ip.inerrors
- description: IP Input Errors
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: noroutes
- - name: truncated
- - name: checksum
- - name: ip.mcast
- description: IP Multicast Bandwidth
- unit: "kilobits/s"
- chart_type: area
- dimensions:
- - name: received
- - name: sent
- - name: ip.bcast
- description: IP Broadcast Bandwidth
- unit: "kilobits/s"
- chart_type: area
- dimensions:
- - name: received
- - name: sent
- - name: ip.mcastpkts
- description: IP Multicast Packets
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: received
- - name: sent
- - name: ip.bcastpkts
- description: IP Broadcast Packets
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: received
- - name: sent
- - name: ip.ecnpkts
- description: IP ECN Statistics
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: CEP
- - name: NoECTP
- - name: ECTP0
- - name: ECTP1
- name: ip.tcpmemorypressures
description: TCP Memory Pressures
unit: "events/s"
@@ -3297,31 +3240,52 @@ modules:
dimensions:
- name: overflows
- name: drops
- - name: ipv4.packets
- description: IPv4 Packets
+ - name: ip.tcpsock
+ description: IPv4 TCP Connections
+ unit: "active connections"
+ chart_type: line
+ dimensions:
+ - name: connections
+ - name: ip.tcppackets
+ description: IPv4 TCP Packets
unit: "packets/s"
chart_type: line
dimensions:
- name: received
- name: sent
- - name: forwarded
- - name: delivered
- - name: ipv4.fragsout
- description: IPv4 Fragments Sent
+ - name: ip.tcperrors
+ description: IPv4 TCP Errors
unit: "packets/s"
chart_type: line
dimensions:
- - name: ok
- - name: failed
- - name: created
- - name: ipv4.fragsin
- description: IPv4 Fragments Reassembly
+ - name: InErrs
+ - name: InCsumErrors
+ - name: RetransSegs
+ - name: ip.tcpopens
+ description: IPv4 TCP Opens
+ unit: "connections/s"
+ chart_type: line
+ dimensions:
+ - name: active
+ - name: passive
+ - name: ip.tcphandshake
+ description: IPv4 TCP Handshake Issues
+ unit: "events/s"
+ chart_type: line
+ dimensions:
+ - name: EstabResets
+ - name: OutRsts
+ - name: AttemptFails
+ - name: SynRetrans
+ - name: ipv4.packets
+ description: IPv4 Packets
unit: "packets/s"
chart_type: line
dimensions:
- - name: ok
- - name: failed
- - name: all
+ - name: received
+ - name: sent
+ - name: forwarded
+ - name: delivered
- name: ipv4.errors
description: IPv4 Errors
unit: "packets/s"
@@ -3329,25 +3293,47 @@ modules:
dimensions:
- name: InDiscards
- name: OutDiscards
- - name: InHdrErrors
+ - name: InNoRoutes
- name: OutNoRoutes
+ - name: InHdrErrors
- name: InAddrErrors
- - name: InUnknownProtos
- - name: ipv4.icmp
- description: IPv4 ICMP Packets
+ - name: InTruncatedPkts
+ - name: InCsumErrors
+ - name: ipc4.bcast
+ description: IP Broadcast Bandwidth
+ unit: "kilobits/s"
+ chart_type: area
+ dimensions:
+ - name: received
+ - name: sent
+ - name: ipv4.bcastpkts
+ description: IP Broadcast Packets
unit: "packets/s"
chart_type: line
dimensions:
- name: received
- name: sent
- - name: ipv4.icmp_errors
- description: IPv4 ICMP Errors
+ - name: ipv4.mcast
+ description: IPv4 Multicast Bandwidth
+ unit: "kilobits/s"
+ chart_type: area
+ dimensions:
+ - name: received
+ - name: sent
+ - name: ipv4.mcastpkts
+ description: IP Multicast Packets
unit: "packets/s"
chart_type: line
dimensions:
- - name: InErrors
- - name: OutErrors
- - name: InCsumErrors
+ - name: received
+ - name: sent
+ - name: ipv4.icmp
+ description: IPv4 ICMP Packets
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
- name: ipv4.icmpmsg
description: IPv4 ICMP Messages
unit: "packets/s"
@@ -3373,43 +3359,14 @@ modules:
- name: OutTimestamps
- name: InTimestampReps
- name: OutTimestampReps
- - name: ipv4.tcpsock
- description: IPv4 TCP Connections
- unit: "active connections"
- chart_type: line
- dimensions:
- - name: connections
- - name: ipv4.tcppackets
- description: IPv4 TCP Packets
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: received
- - name: sent
- - name: ipv4.tcperrors
- description: IPv4 TCP Errors
+ - name: ipv4.icmp_errors
+ description: IPv4 ICMP Errors
unit: "packets/s"
chart_type: line
dimensions:
- - name: InErrs
+ - name: InErrors
+ - name: OutErrors
- name: InCsumErrors
- - name: RetransSegs
- - name: ipv4.tcpopens
- description: IPv4 TCP Opens
- unit: "connections/s"
- chart_type: line
- dimensions:
- - name: active
- - name: passive
- - name: ipv4.tcphandshake
- description: IPv4 TCP Handshake Issues
- unit: "events/s"
- chart_type: line
- dimensions:
- - name: EstabResets
- - name: OutRsts
- - name: AttemptFails
- - name: SynRetrans
- name: ipv4.udppackets
description: IPv4 UDP Packets
unit: "packets/s"
@@ -3446,6 +3403,31 @@ modules:
- name: NoPorts
- name: InCsumErrors
- name: IgnoredMulti
+ - name: ipv4.ecnpkts
+ description: IP ECN Statistics
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: CEP
+ - name: NoECTP
+ - name: ECTP0
+ - name: ECTP1
+ - name: ipv4.fragsin
+ description: IPv4 Fragments Reassembly
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: ok
+ - name: failed
+ - name: all
+ - name: ipv4.fragsout
+ description: IPv4 Fragments Sent
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: ok
+ - name: failed
+ - name: created
- name: system.ipv6
description: IPv6 Bandwidth
unit: "kilobits/s"
@@ -3453,7 +3435,7 @@ modules:
dimensions:
- name: received
- name: sent
- - name: system.ipv6
+ - name: ipv6.packets
description: IPv6 Packets
unit: "packets/s"
chart_type: line
@@ -3462,23 +3444,6 @@ modules:
- name: sent
- name: forwarded
- name: delivers
- - name: ipv6.fragsout
- description: IPv6 Fragments Sent
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: ok
- - name: failed
- - name: all
- - name: ipv6.fragsin
- description: IPv6 Fragments Reassembly
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: ok
- - name: failed
- - name: timeout
- - name: all
- name: ipv6.errors
description: IPv6 Errors
unit: "packets/s"
@@ -3493,6 +3458,27 @@ modules:
- name: InTruncatedPkts
- name: InNoRoutes
- name: OutNoRoutes
+ - name: ipv6.bcast
+ description: IPv6 Broadcast Bandwidth
+ unit: "kilobits/s"
+ chart_type: area
+ dimensions:
+ - name: received
+ - name: sent
+ - name: ipv6.mcast
+ description: IPv6 Multicast Bandwidth
+ unit: "kilobits/s"
+ chart_type: area
+ dimensions:
+ - name: received
+ - name: sent
+ - name: ipv6.mcastpkts
+ description: IPv6 Multicast Packets
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: received
+ - name: sent
- name: ipv6.udppackets
description: IPv6 UDP Packets
unit: "packets/s"
@@ -3528,27 +3514,6 @@ modules:
- name: InErrors
- name: NoPorts
- name: InCsumErrors
- - name: ipv6.mcast
- description: IPv6 Multicast Bandwidth
- unit: "kilobits/s"
- chart_type: area
- dimensions:
- - name: received
- - name: sent
- - name: ipv6.bcast
- description: IPv6 Broadcast Bandwidth
- unit: "kilobits/s"
- chart_type: area
- dimensions:
- - name: received
- - name: sent
- - name: ipv6.mcastpkts
- description: IPv6 Multicast Packets
- unit: "packets/s"
- chart_type: line
- dimensions:
- - name: received
- - name: sent
- name: ipv6.icmp
description: IPv6 ICMP Messages
unit: "messages/s"
@@ -3657,6 +3622,23 @@ modules:
- name: InECT1Pkts
- name: InECT0Pkts
- name: InCEPkts
+ - name: ipv6.fragsin
+ description: IPv6 Fragments Reassembly
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: ok
+ - name: failed
+ - name: timeout
+ - name: all
+ - name: ipv6.fragsout
+ description: IPv6 Fragments Sent
+ unit: "packets/s"
+ chart_type: line
+ dimensions:
+ - name: ok
+ - name: failed
+ - name: all
- meta:
plugin_name: proc.plugin
module_name: /proc/net/sockstat
@@ -3734,8 +3716,8 @@ modules:
description: ""
labels: []
metrics:
- - name: ipv4.sockstat_sockets
- description: IPv4 Sockets Used
+ - name: ip.sockstat_sockets
+ description: Sockets used for all address families
unit: "sockets"
chart_type: line
dimensions:
diff --git a/collectors/proc.plugin/plugin_proc.c b/collectors/proc.plugin/plugin_proc.c
index fbcaa614a51cd1..3f11aaf6c956ca 100644
--- a/collectors/proc.plugin/plugin_proc.c
+++ b/collectors/proc.plugin/plugin_proc.c
@@ -138,10 +138,18 @@ static bool is_lxcfs_proc_mounted() {
return false;
}
+static bool log_proc_module(BUFFER *wb, void *data) {
+ struct proc_module *pm = data;
+ buffer_sprintf(wb, "proc.plugin[%s]", pm->name);
+ return true;
+}
+
void *proc_main(void *ptr)
{
worker_register("PROC");
+ rrd_collector_started();
+
if (config_get_boolean("plugin:proc", "/proc/net/dev", CONFIG_BOOLEAN_YES)) {
netdev_thread = mallocz(sizeof(netdata_thread_t));
netdata_log_debug(D_SYSTEM, "Starting thread %s.", THREAD_NETDEV_NAME);
@@ -151,46 +159,56 @@ void *proc_main(void *ptr)
netdata_thread_cleanup_push(proc_main_cleanup, ptr);
- config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO);
+ {
+ config_get_boolean("plugin:proc", "/proc/pagetypeinfo", CONFIG_BOOLEAN_NO);
- // check the enabled status for each module
- int i;
- for (i = 0; proc_modules[i].name; i++) {
- struct proc_module *pm = &proc_modules[i];
+ // check the enabled status for each module
+ int i;
+ for(i = 0; proc_modules[i].name; i++) {
+ struct proc_module *pm = &proc_modules[i];
- pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES);
- pm->rd = NULL;
+ pm->enabled = config_get_boolean("plugin:proc", pm->name, CONFIG_BOOLEAN_YES);
+ pm->rd = NULL;
- worker_register_job_name(i, proc_modules[i].dim);
- }
+ worker_register_job_name(i, proc_modules[i].dim);
+ }
- usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
- heartbeat_t hb;
- heartbeat_init(&hb);
+ usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
+ heartbeat_t hb;
+ heartbeat_init(&hb);
- inside_lxc_container = is_lxcfs_proc_mounted();
+ inside_lxc_container = is_lxcfs_proc_mounted();
- while (service_running(SERVICE_COLLECTORS)) {
- worker_is_idle();
- usec_t hb_dt = heartbeat_next(&hb, step);
+#define LGS_MODULE_ID 0
- if (unlikely(!service_running(SERVICE_COLLECTORS)))
- break;
+ ND_LOG_STACK lgs[] = {
+ [LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin"),
+ ND_LOG_FIELD_END(),
+ };
+ ND_LOG_STACK_PUSH(lgs);
- for (i = 0; proc_modules[i].name; i++) {
- if (unlikely(!service_running(SERVICE_COLLECTORS)))
- break;
+ while(service_running(SERVICE_COLLECTORS)) {
+ worker_is_idle();
+ usec_t hb_dt = heartbeat_next(&hb, step);
- struct proc_module *pm = &proc_modules[i];
- if (unlikely(!pm->enabled))
- continue;
+ if(unlikely(!service_running(SERVICE_COLLECTORS)))
+ break;
- netdata_log_debug(D_PROCNETDEV_LOOP, "PROC calling %s.", pm->name);
+ for(i = 0; proc_modules[i].name; i++) {
+ if(unlikely(!service_running(SERVICE_COLLECTORS)))
+ break;
- worker_is_busy(i);
- pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
- }
- }
+ struct proc_module *pm = &proc_modules[i];
+ if(unlikely(!pm->enabled))
+ continue;
+
+ worker_is_busy(i);
+ lgs[LGS_MODULE_ID] = ND_LOG_FIELD_CB(NDF_MODULE, log_proc_module, pm);
+ pm->enabled = !pm->func(localhost->rrd_update_every, hb_dt);
+ lgs[LGS_MODULE_ID] = ND_LOG_FIELD_TXT(NDF_MODULE, "proc.plugin");
+ }
+ }
+ }
netdata_thread_cleanup_pop(1);
return NULL;
diff --git a/collectors/proc.plugin/plugin_proc.h b/collectors/proc.plugin/plugin_proc.h
index a90f4838e93c06..e4fc105bac4949 100644
--- a/collectors/proc.plugin/plugin_proc.h
+++ b/collectors/proc.plugin/plugin_proc.h
@@ -58,8 +58,9 @@ void netdev_rename_device_add(
const char *host_device,
const char *container_device,
const char *container_name,
- DICTIONARY *labels,
- const char *ctx_prefix);
+ RRDLABELS *labels,
+ const char *ctx_prefix,
+ const DICTIONARY_ITEM *cgroup_netdev_link);
void netdev_rename_device_del(const char *host_device);
diff --git a/collectors/proc.plugin/proc_diskstats.c b/collectors/proc.plugin/proc_diskstats.c
index 359fa9a810eb42..475d90835f5a02 100644
--- a/collectors/proc.plugin/proc_diskstats.c
+++ b/collectors/proc.plugin/proc_diskstats.c
@@ -6,6 +6,8 @@
#define PLUGIN_PROC_MODULE_DISKSTATS_NAME "/proc/diskstats"
#define CONFIG_SECTION_PLUGIN_PROC_DISKSTATS "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_DISKSTATS_NAME
+#define RRDFUNCTIONS_DISKSTATS_HELP "View block device statistics"
+
#define DISK_TYPE_UNKNOWN 0
#define DISK_TYPE_PHYSICAL 1
#define DISK_TYPE_PARTITION 2
@@ -14,15 +16,25 @@
#define DEFAULT_PREFERRED_IDS "*"
#define DEFAULT_EXCLUDED_DISKS "loop* ram*"
+static netdata_mutex_t diskstats_dev_mutex = NETDATA_MUTEX_INITIALIZER;
+
static struct disk {
char *disk; // the name of the disk (sda, sdb, etc, after being looked up)
char *device; // the device of the disk (before being looked up)
+ char *disk_by_id;
+ char *model;
+ char *serial;
+// bool rotational;
+// bool removable;
uint32_t hash;
unsigned long major;
unsigned long minor;
int sector_size;
int type;
+ bool excluded;
+ bool function_ready;
+
char *mount_point;
char *chart_id;
@@ -163,7 +175,7 @@ static struct disk {
struct disk *next;
} *disk_root = NULL;
-#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st)
+#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete___safe_from_collector_thread(st); (st) = NULL; } } while(st)
// static char *path_to_get_hw_sector_size = NULL;
// static char *path_to_get_hw_sector_size_partitions = NULL;
@@ -172,6 +184,8 @@ static char *path_to_sys_block_device = NULL;
static char *path_to_sys_block_device_bcache = NULL;
static char *path_to_sys_devices_virtual_block_device = NULL;
static char *path_to_device_mapper = NULL;
+static char *path_to_dev_disk = NULL;
+static char *path_to_sys_block = NULL;
static char *path_to_device_label = NULL;
static char *path_to_device_id = NULL;
static char *path_to_veritas_volume_groups = NULL;
@@ -352,7 +366,10 @@ static inline int get_disk_name_from_path(const char *path, char *result, size_t
DIR *dir = opendir(path);
if (!dir) {
- collector_error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s'.", disk, major, minor, path);
+ if (errno == ENOENT)
+ nd_log_collector(NDLP_DEBUG, "DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s': no such file or directory.", disk, major, minor, path);
+ else
+ collector_error("DEVICE-MAPPER ('%s', %lu:%lu): Cannot open directory '%s'.", disk, major, minor, path);
goto failed;
}
@@ -469,22 +486,127 @@ static inline char *get_disk_name(unsigned long major, unsigned long minor, char
return strdup(result);
}
+static inline bool ends_with(const char *str, const char *suffix) {
+ if (!str || !suffix)
+ return false;
+
+ size_t len_str = strlen(str);
+ size_t len_suffix = strlen(suffix);
+ if (len_suffix > len_str)
+ return false;
+
+ return strncmp(str + len_str - len_suffix, suffix, len_suffix) == 0;
+}
+
+static inline char *get_disk_by_id(char *device) {
+ char pathname[256 + 1];
+ snprintfz(pathname, sizeof(pathname) - 1, "%s/by-id", path_to_dev_disk);
+
+ struct dirent *entry;
+ DIR *dp = opendir(pathname);
+ if (dp == NULL) {
+ internal_error(true, "Cannot open '%s'", pathname);
+ return NULL;
+ }
+
+ while ((entry = readdir(dp))) {
+ // We ignore the '.' and '..' entries
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ if(strncmp(entry->d_name, "md-uuid-", 8) == 0 ||
+ strncmp(entry->d_name, "dm-uuid-", 8) == 0 ||
+ strncmp(entry->d_name, "nvme-eui.", 9) == 0 ||
+ strncmp(entry->d_name, "wwn-", 4) == 0 ||
+ strncmp(entry->d_name, "lvm-pv-uuid-", 12) == 0)
+ continue;
+
+ char link_target[256 + 1];
+ char full_path[256 + 1];
+ snprintfz(full_path, 256, "%s/%s", pathname, entry->d_name);
+
+ ssize_t len = readlink(full_path, link_target, 256);
+ if (len == -1)
+ continue;
+
+ link_target[len] = '\0';
+
+ if (ends_with(link_target, device)) {
+ char *s = strdupz(entry->d_name);
+ closedir(dp);
+ return s;
+ }
+ }
+
+ closedir(dp);
+ return NULL;
+}
+
+static inline char *get_disk_model(char *device) {
+ char path[256 + 1];
+ char buffer[256 + 1];
+
+ snprintfz(path, sizeof(path) - 1, "%s/%s/device/model", path_to_sys_block, device);
+ if(read_file(path, buffer, 256) != 0) {
+ snprintfz(path, sizeof(path) - 1, "%s/%s/device/name", path_to_sys_block, device);
+ if(read_file(path, buffer, 256) != 0)
+ return NULL;
+ }
+
+ char *clean = trim(buffer);
+ if (!clean)
+ return NULL;
+
+ return strdupz(clean);
+}
+
+static inline char *get_disk_serial(char *device) {
+ char path[256 + 1];
+ char buffer[256 + 1];
+
+ snprintfz(path, sizeof(path) - 1, "%s/%s/device/serial", path_to_sys_block, device);
+ if(read_file(path, buffer, 256) != 0)
+ return NULL;
+
+ return strdupz(buffer);
+}
+
+//static inline bool get_disk_rotational(char *device) {
+// char path[256 + 1];
+// char buffer[256 + 1];
+//
+// snprintfz(path, 256, "%s/%s/queue/rotational", path_to_sys_block, device);
+// if(read_file(path, buffer, 256) != 0)
+// return false;
+//
+// return buffer[0] == '1';
+//}
+//
+//static inline bool get_disk_removable(char *device) {
+// char path[256 + 1];
+// char buffer[256 + 1];
+//
+// snprintfz(path, 256, "%s/%s/removable", path_to_sys_block, device);
+// if(read_file(path, buffer, 256) != 0)
+// return false;
+//
+// return buffer[0] == '1';
+//}
+
static void get_disk_config(struct disk *d) {
int def_enable = global_enable_new_disks_detected_at_runtime;
- if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk)))
+ if(def_enable != CONFIG_BOOLEAN_NO && (simple_pattern_matches(excluded_disks, d->device) || simple_pattern_matches(excluded_disks, d->disk))) {
+ d->excluded = true;
def_enable = CONFIG_BOOLEAN_NO;
-#ifdef NETDATA_SKIP_IF_NOT_COLLECT
- if(!def_enable) {
- netdata_log_debug(D_COLLECTOR, "DISKSTAT: Skipping device: %s, disk: %s because it is excluded by configuration.", d->device, d->disk);
- return;
}
-#endif
char var_name[4096 + 1];
snprintfz(var_name, 4096, CONFIG_SECTION_PLUGIN_PROC_DISKSTATS ":%s", d->disk);
- def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable);
+ if (config_exists(var_name, "enable"))
+ def_enable = config_get_boolean_ondemand(var_name, "enable", def_enable);
+
if(unlikely(def_enable == CONFIG_BOOLEAN_NO)) {
// the user does not want any metrics for this disk
d->do_io = CONFIG_BOOLEAN_NO;
@@ -536,7 +658,8 @@ static void get_disk_config(struct disk *d) {
// def_performance
// check the user configuration (this will also show our 'on demand' decision)
- def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance);
+ if (config_exists(var_name, "enable performance metrics"))
+ def_performance = config_get_boolean_ondemand(var_name, "enable performance metrics", def_performance);
int ddo_io = CONFIG_BOOLEAN_NO,
ddo_ops = CONFIG_BOOLEAN_NO,
@@ -559,21 +682,44 @@ static void get_disk_config(struct disk *d) {
ddo_ext = global_do_ext,
ddo_backlog = global_do_backlog,
ddo_bcache = global_do_bcache;
+ } else {
+ d->excluded = true;
}
- d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io);
- d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops);
- d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops);
- d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime);
- d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops);
- d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util);
- d->do_ext = config_get_boolean_ondemand(var_name, "extended operations", ddo_ext);
- d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog);
-
- if(d->device_is_bcache)
- d->do_bcache = config_get_boolean_ondemand(var_name, "bcache", ddo_bcache);
- else
+ d->do_io = ddo_io;
+ d->do_ops = ddo_ops;
+ d->do_mops = ddo_mops;
+ d->do_iotime = ddo_iotime;
+ d->do_qops = ddo_qops;
+ d->do_util = ddo_util;
+ d->do_ext = ddo_ext;
+ d->do_backlog = ddo_backlog;
+
+ if (config_exists(var_name, "bandwidth"))
+ d->do_io = config_get_boolean_ondemand(var_name, "bandwidth", ddo_io);
+ if (config_exists(var_name, "operations"))
+ d->do_ops = config_get_boolean_ondemand(var_name, "operations", ddo_ops);
+ if (config_exists(var_name, "merged operations"))
+ d->do_mops = config_get_boolean_ondemand(var_name, "merged operations", ddo_mops);
+ if (config_exists(var_name, "i/o time"))
+ d->do_iotime = config_get_boolean_ondemand(var_name, "i/o time", ddo_iotime);
+ if (config_exists(var_name, "queued operations"))
+ d->do_qops = config_get_boolean_ondemand(var_name, "queued operations", ddo_qops);
+ if (config_exists(var_name, "utilization percentage"))
+ d->do_util = config_get_boolean_ondemand(var_name, "utilization percentage", ddo_util);
+ if (config_exists(var_name, "extended operations"))
+ d->do_ext = config_get_boolean_ondemand(var_name, "extended operations", ddo_ext);
+ if (config_exists(var_name, "backlog"))
+ d->do_backlog = config_get_boolean_ondemand(var_name, "backlog", ddo_backlog);
+
+ d->do_bcache = ddo_bcache;
+
+ if (d->device_is_bcache) {
+ if (config_exists(var_name, "bcache"))
+ d->do_bcache = config_get_boolean_ondemand(var_name, "bcache", ddo_bcache);
+ } else {
d->do_bcache = 0;
+ }
}
}
@@ -598,8 +744,15 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis
// create a new disk structure
d = (struct disk *)callocz(1, sizeof(struct disk));
+ d->excluded = false;
+ d->function_ready = false;
d->disk = get_disk_name(major, minor, disk);
d->device = strdupz(disk);
+ d->disk_by_id = get_disk_by_id(disk);
+ d->model = get_disk_model(disk);
+ d->serial = get_disk_serial(disk);
+// d->rotational = get_disk_rotational(disk);
+// d->removable = get_disk_removable(disk);
d->hash = simple_hash(d->device);
d->major = major;
d->minor = minor;
@@ -854,30 +1007,399 @@ static struct disk *get_disk(unsigned long major, unsigned long minor, char *dis
}
get_disk_config(d);
+
return d;
}
+static const char *get_disk_type_string(int disk_type) {
+ switch (disk_type) {
+ case DISK_TYPE_PHYSICAL:
+ return "physical";
+ case DISK_TYPE_PARTITION:
+ return "partition";
+ case DISK_TYPE_VIRTUAL:
+ return "virtual";
+ default:
+ return "unknown";
+ }
+}
+
static void add_labels_to_disk(struct disk *d, RRDSET *st) {
rrdlabels_add(st->rrdlabels, "device", d->disk, RRDLABEL_SRC_AUTO);
rrdlabels_add(st->rrdlabels, "mount_point", d->mount_point, RRDLABEL_SRC_AUTO);
+ rrdlabels_add(st->rrdlabels, "id", d->disk_by_id, RRDLABEL_SRC_AUTO);
+ rrdlabels_add(st->rrdlabels, "model", d->model, RRDLABEL_SRC_AUTO);
+ rrdlabels_add(st->rrdlabels, "serial", d->serial, RRDLABEL_SRC_AUTO);
+ rrdlabels_add(st->rrdlabels, "device_type", get_disk_type_string(d->type), RRDLABEL_SRC_AUTO);
+}
- switch (d->type) {
- default:
- case DISK_TYPE_UNKNOWN:
- rrdlabels_add(st->rrdlabels, "device_type", "unknown", RRDLABEL_SRC_AUTO);
- break;
+static int diskstats_function_block_devices(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused,
+ void *collector_data __maybe_unused,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused,
+ void *register_canceller_cb_data __maybe_unused) {
+
+ buffer_flush(wb);
+ wb->content_type = CT_APPLICATION_JSON;
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
+
+ buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost));
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", 1);
+ buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_DISKSTATS_HELP);
+ buffer_json_member_add_array(wb, "data");
+
+ double max_io_reads = 0.0;
+ double max_io_writes = 0.0;
+ double max_io = 0.0;
+ double max_backlog_time = 0.0;
+ double max_busy_time = 0.0;
+ double max_busy_perc = 0.0;
+ double max_iops_reads = 0.0;
+ double max_iops_writes = 0.0;
+ double max_iops_time_reads = 0.0;
+ double max_iops_time_writes = 0.0;
+ double max_iops_avg_time_read = 0.0;
+ double max_iops_avg_time_write = 0.0;
+ double max_iops_avg_size_read = 0.0;
+ double max_iops_avg_size_write = 0.0;
+
+ netdata_mutex_lock(&diskstats_dev_mutex);
+
+ for (struct disk *d = disk_root; d; d = d->next) {
+ if (unlikely(!d->function_ready))
+ continue;
- case DISK_TYPE_PHYSICAL:
- rrdlabels_add(st->rrdlabels, "device_type", "physical", RRDLABEL_SRC_AUTO);
- break;
+ buffer_json_add_array_item_array(wb);
+
+ buffer_json_add_array_item_string(wb, d->device);
+ buffer_json_add_array_item_string(wb, get_disk_type_string(d->type));
+ buffer_json_add_array_item_string(wb, d->disk_by_id);
+ buffer_json_add_array_item_string(wb, d->model);
+ buffer_json_add_array_item_string(wb, d->serial);
+
+ // IO
+ double io_reads = rrddim_get_last_stored_value(d->rd_io_reads, &max_io_reads, 1024.0);
+ double io_writes = rrddim_get_last_stored_value(d->rd_io_writes, &max_io_writes, 1024.0);
+ double io_total = NAN;
+ if (!isnan(io_reads) && !isnan(io_writes)) {
+ io_total = io_reads + io_writes;
+ max_io = MAX(max_io, io_total);
+ }
+ // Backlog and Busy Time
+ double busy_perc = rrddim_get_last_stored_value(d->rd_util_utilization, &max_busy_perc, 1);
+ double busy_time = rrddim_get_last_stored_value(d->rd_busy_busy, &max_busy_time, 1);
+ double backlog_time = rrddim_get_last_stored_value(d->rd_backlog_backlog, &max_backlog_time, 1);
+ // IOPS
+ double iops_reads = rrddim_get_last_stored_value(d->rd_ops_reads, &max_iops_reads, 1);
+ double iops_writes = rrddim_get_last_stored_value(d->rd_ops_writes, &max_iops_writes, 1);
+ // IO Time
+ double iops_time_reads = rrddim_get_last_stored_value(d->rd_iotime_reads, &max_iops_time_reads, 1);
+ double iops_time_writes = rrddim_get_last_stored_value(d->rd_iotime_writes, &max_iops_time_writes, 1);
+ // Avg IO Time
+ double iops_avg_time_read = rrddim_get_last_stored_value(d->rd_await_reads, &max_iops_avg_time_read, 1);
+ double iops_avg_time_write = rrddim_get_last_stored_value(d->rd_await_writes, &max_iops_avg_time_write, 1);
+ // Avg IO Size
+ double iops_avg_size_read = rrddim_get_last_stored_value(d->rd_avgsz_reads, &max_iops_avg_size_read, 1);
+ double iops_avg_size_write = rrddim_get_last_stored_value(d->rd_avgsz_writes, &max_iops_avg_size_write, 1);
+
+
+ buffer_json_add_array_item_double(wb, io_reads);
+ buffer_json_add_array_item_double(wb, io_writes);
+ buffer_json_add_array_item_double(wb, io_total);
+ buffer_json_add_array_item_double(wb, busy_perc);
+ buffer_json_add_array_item_double(wb, busy_time);
+ buffer_json_add_array_item_double(wb, backlog_time);
+ buffer_json_add_array_item_double(wb, iops_reads);
+ buffer_json_add_array_item_double(wb, iops_writes);
+ buffer_json_add_array_item_double(wb, iops_time_reads);
+ buffer_json_add_array_item_double(wb, iops_time_writes);
+ buffer_json_add_array_item_double(wb, iops_avg_time_read);
+ buffer_json_add_array_item_double(wb, iops_avg_time_write);
+ buffer_json_add_array_item_double(wb, iops_avg_size_read);
+ buffer_json_add_array_item_double(wb, iops_avg_size_write);
+
+ // End
+ buffer_json_array_close(wb);
+ }
- case DISK_TYPE_PARTITION:
- rrdlabels_add(st->rrdlabels, "device_type", "partition", RRDLABEL_SRC_AUTO);
- break;
+ netdata_mutex_unlock(&diskstats_dev_mutex);
- case DISK_TYPE_VIRTUAL:
- rrdlabels_add(st->rrdlabels, "device_type", "virtual", RRDLABEL_SRC_AUTO);
- break;
+ buffer_json_array_close(wb); // data
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Device", "Device Name",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Type", "Device Type",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "ID", "Device ID",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Model", "Device Model",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Serial", "Device Serial Number",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Read", "Data Read from Device",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_io_reads, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Written", "Data Writen to Device",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_io_writes, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Total", "Data Transferred to and from Device",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "MiB", max_io, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Busy%", "Disk Busy Percentage",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "%", max_busy_perc, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Busy", "Disk Busy Time",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "milliseconds", max_busy_time, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Backlog", "Disk Backlog",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "milliseconds", max_backlog_time, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Reads", "Completed Read Operations",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "ops", max_iops_reads, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "Writes", "Completed Write Operations",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "ops", max_iops_writes, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "ReadsTime", "Read Operations Time",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "milliseconds", max_iops_time_reads, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "WritesTime", "Write Operations Time",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "milliseconds", max_iops_time_writes, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "ReadAvgTime", "Average Read Operation Service Time",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "milliseconds", max_iops_avg_time_read, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "WriteAvgTime", "Average Write Operation Service Time",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "milliseconds", max_iops_avg_time_write, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "ReadAvgSz", "Average Read Operation Size",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "KiB", max_iops_avg_size_read, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ buffer_rrdf_table_add_field(wb, field_id++, "WriteAvgSz", "Average Write Operation Size",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "KiB", max_iops_avg_size_write, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+ }
+
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "Total");
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "IO");
+ {
+ buffer_json_member_add_string(wb, "name", "IO");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Read");
+ buffer_json_add_array_item_string(wb, "Written");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Busy");
+ {
+ buffer_json_member_add_string(wb, "name", "Busy");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Busy");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "IO");
+ buffer_json_add_array_item_string(wb, "Device");
+ buffer_json_array_close(wb);
+
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Busy");
+ buffer_json_add_array_item_string(wb, "Device");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_object(wb, "group_by");
+ {
+ buffer_json_member_add_object(wb, "Type");
+ {
+ buffer_json_member_add_string(wb, "name", "Type");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Type");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // group_by
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ int response = HTTP_RESP_OK;
+ if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) {
+ buffer_flush(wb);
+ response = HTTP_RESP_CLIENT_CLOSED_REQUEST;
+ }
+
+ if(result_cb)
+ result_cb(wb, response, result_cb_data);
+
+ return response;
+}
+
+static void diskstats_cleanup_disks() {
+ struct disk *d = disk_root, *last = NULL;
+ while (d) {
+ if (unlikely(global_cleanup_removed_disks && !d->updated)) {
+ struct disk *t = d;
+
+ rrdset_obsolete_and_pointer_null(d->st_avgsz);
+ rrdset_obsolete_and_pointer_null(d->st_ext_avgsz);
+ rrdset_obsolete_and_pointer_null(d->st_await);
+ rrdset_obsolete_and_pointer_null(d->st_ext_await);
+ rrdset_obsolete_and_pointer_null(d->st_backlog);
+ rrdset_obsolete_and_pointer_null(d->st_busy);
+ rrdset_obsolete_and_pointer_null(d->st_io);
+ rrdset_obsolete_and_pointer_null(d->st_ext_io);
+ rrdset_obsolete_and_pointer_null(d->st_iotime);
+ rrdset_obsolete_and_pointer_null(d->st_ext_iotime);
+ rrdset_obsolete_and_pointer_null(d->st_mops);
+ rrdset_obsolete_and_pointer_null(d->st_ext_mops);
+ rrdset_obsolete_and_pointer_null(d->st_ops);
+ rrdset_obsolete_and_pointer_null(d->st_ext_ops);
+ rrdset_obsolete_and_pointer_null(d->st_qops);
+ rrdset_obsolete_and_pointer_null(d->st_svctm);
+ rrdset_obsolete_and_pointer_null(d->st_util);
+ rrdset_obsolete_and_pointer_null(d->st_bcache);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_bypass);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_rates);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_size);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_usage);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_hit_ratio);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_cache_allocations);
+ rrdset_obsolete_and_pointer_null(d->st_bcache_cache_read_races);
+
+ if (d == disk_root) {
+ disk_root = d = d->next;
+ last = NULL;
+ } else if (last) {
+ last->next = d = d->next;
+ }
+
+ freez(t->bcache_filename_dirty_data);
+ freez(t->bcache_filename_writeback_rate);
+ freez(t->bcache_filename_cache_congested);
+ freez(t->bcache_filename_cache_available_percent);
+ freez(t->bcache_filename_stats_five_minute_cache_hit_ratio);
+ freez(t->bcache_filename_stats_hour_cache_hit_ratio);
+ freez(t->bcache_filename_stats_day_cache_hit_ratio);
+ freez(t->bcache_filename_stats_total_cache_hit_ratio);
+ freez(t->bcache_filename_stats_total_cache_hits);
+ freez(t->bcache_filename_stats_total_cache_misses);
+ freez(t->bcache_filename_stats_total_cache_miss_collisions);
+ freez(t->bcache_filename_stats_total_cache_bypass_hits);
+ freez(t->bcache_filename_stats_total_cache_bypass_misses);
+ freez(t->bcache_filename_stats_total_cache_readaheads);
+ freez(t->bcache_filename_cache_read_races);
+ freez(t->bcache_filename_cache_io_errors);
+ freez(t->bcache_filename_priority_stats);
+
+ freez(t->disk);
+ freez(t->device);
+ freez(t->disk_by_id);
+ freez(t->model);
+ freez(t->serial);
+ freez(t->mount_point);
+ freez(t->chart_id);
+ freez(t);
+ } else {
+ d->updated = 0;
+ last = d;
+ d = d->next;
+ }
}
}
@@ -928,6 +1450,12 @@ int do_proc_diskstats(int update_every, usec_t dt) {
snprintfz(buffer, FILENAME_MAX, "%s/dev/mapper", netdata_configured_host_prefix);
path_to_device_mapper = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to device mapper", buffer);
+ snprintfz(buffer, FILENAME_MAX, "%s/dev/disk", netdata_configured_host_prefix);
+ path_to_dev_disk = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to /dev/disk", buffer);
+
+ snprintfz(buffer, FILENAME_MAX, "%s/sys/block", netdata_configured_host_prefix);
+ path_to_sys_block = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to /sys/block", buffer);
+
snprintfz(buffer, FILENAME_MAX, "%s/dev/disk/by-label", netdata_configured_host_prefix);
path_to_device_label = config_get(CONFIG_SECTION_PLUGIN_PROC_DISKSTATS, "path to /dev/disk/by-label", buffer);
@@ -960,12 +1488,20 @@ int do_proc_diskstats(int update_every, usec_t dt) {
ff = procfile_readall(ff);
if(unlikely(!ff)) return 0; // we return 0, so that we will retry to open it next time
+ static bool add_func = true;
+ if (add_func) {
+ rrd_function_add(localhost, NULL, "block-devices", 10, RRDFUNCTIONS_DISKSTATS_HELP, true, diskstats_function_block_devices, NULL);
+ add_func = false;
+ }
+
size_t lines = procfile_lines(ff), l;
collected_number system_read_kb = 0, system_write_kb = 0;
int do_dc_stats = 0, do_fl_stats = 0;
+ netdata_mutex_lock(&diskstats_dev_mutex);
+
for(l = 0; l < lines ;l++) {
// --------------------------------------------------------------------------
// Read parameters
@@ -1090,7 +1626,6 @@ int do_proc_diskstats(int update_every, usec_t dt) {
// --------------------------------------------------------------------------
// Do performance metrics
-
if(d->do_io == CONFIG_BOOLEAN_YES || (d->do_io == CONFIG_BOOLEAN_AUTO &&
(readsectors || writesectors || discardsectors ||
netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
@@ -1936,8 +2471,13 @@ int do_proc_diskstats(int update_every, usec_t dt) {
rrdset_done(d->st_bcache_bypass);
}
}
+
+ d->function_ready = !d->excluded;
}
+ diskstats_cleanup_disks();
+
+ netdata_mutex_unlock(&diskstats_dev_mutex);
// update the system total I/O
if(global_do_io == CONFIG_BOOLEAN_YES || (global_do_io == CONFIG_BOOLEAN_AUTO &&
@@ -1971,77 +2511,5 @@ int do_proc_diskstats(int update_every, usec_t dt) {
rrdset_done(st_io);
}
- // cleanup removed disks
-
- struct disk *d = disk_root, *last = NULL;
- while(d) {
- if(unlikely(global_cleanup_removed_disks && !d->updated)) {
- struct disk *t = d;
-
- rrdset_obsolete_and_pointer_null(d->st_avgsz);
- rrdset_obsolete_and_pointer_null(d->st_ext_avgsz);
- rrdset_obsolete_and_pointer_null(d->st_await);
- rrdset_obsolete_and_pointer_null(d->st_ext_await);
- rrdset_obsolete_and_pointer_null(d->st_backlog);
- rrdset_obsolete_and_pointer_null(d->st_busy);
- rrdset_obsolete_and_pointer_null(d->st_io);
- rrdset_obsolete_and_pointer_null(d->st_ext_io);
- rrdset_obsolete_and_pointer_null(d->st_iotime);
- rrdset_obsolete_and_pointer_null(d->st_ext_iotime);
- rrdset_obsolete_and_pointer_null(d->st_mops);
- rrdset_obsolete_and_pointer_null(d->st_ext_mops);
- rrdset_obsolete_and_pointer_null(d->st_ops);
- rrdset_obsolete_and_pointer_null(d->st_ext_ops);
- rrdset_obsolete_and_pointer_null(d->st_qops);
- rrdset_obsolete_and_pointer_null(d->st_svctm);
- rrdset_obsolete_and_pointer_null(d->st_util);
- rrdset_obsolete_and_pointer_null(d->st_bcache);
- rrdset_obsolete_and_pointer_null(d->st_bcache_bypass);
- rrdset_obsolete_and_pointer_null(d->st_bcache_rates);
- rrdset_obsolete_and_pointer_null(d->st_bcache_size);
- rrdset_obsolete_and_pointer_null(d->st_bcache_usage);
- rrdset_obsolete_and_pointer_null(d->st_bcache_hit_ratio);
- rrdset_obsolete_and_pointer_null(d->st_bcache_cache_allocations);
- rrdset_obsolete_and_pointer_null(d->st_bcache_cache_read_races);
-
- if(d == disk_root) {
- disk_root = d = d->next;
- last = NULL;
- }
- else if(last) {
- last->next = d = d->next;
- }
-
- freez(t->bcache_filename_dirty_data);
- freez(t->bcache_filename_writeback_rate);
- freez(t->bcache_filename_cache_congested);
- freez(t->bcache_filename_cache_available_percent);
- freez(t->bcache_filename_stats_five_minute_cache_hit_ratio);
- freez(t->bcache_filename_stats_hour_cache_hit_ratio);
- freez(t->bcache_filename_stats_day_cache_hit_ratio);
- freez(t->bcache_filename_stats_total_cache_hit_ratio);
- freez(t->bcache_filename_stats_total_cache_hits);
- freez(t->bcache_filename_stats_total_cache_misses);
- freez(t->bcache_filename_stats_total_cache_miss_collisions);
- freez(t->bcache_filename_stats_total_cache_bypass_hits);
- freez(t->bcache_filename_stats_total_cache_bypass_misses);
- freez(t->bcache_filename_stats_total_cache_readaheads);
- freez(t->bcache_filename_cache_read_races);
- freez(t->bcache_filename_cache_io_errors);
- freez(t->bcache_filename_priority_stats);
-
- freez(t->disk);
- freez(t->device);
- freez(t->mount_point);
- freez(t->chart_id);
- freez(t);
- }
- else {
- d->updated = 0;
- last = d;
- d = d->next;
- }
- }
-
return 0;
}
diff --git a/collectors/proc.plugin/proc_interrupts.c b/collectors/proc.plugin/proc_interrupts.c
index 9a20700a3db611..37071b22f842fd 100644
--- a/collectors/proc.plugin/proc_interrupts.c
+++ b/collectors/proc.plugin/proc_interrupts.c
@@ -201,10 +201,10 @@ int do_proc_interrupts(int update_every, usec_t dt) {
for(c = 0; c < cpus ;c++) {
if(unlikely(!core_st[c])) {
char id[50+1];
- snprintfz(id, 50, "cpu%d_interrupts", c);
+ snprintfz(id, sizeof(id) - 1, "cpu%d_interrupts", c);
char title[100+1];
- snprintfz(title, 100, "CPU Interrupts");
+ snprintfz(title, sizeof(title) - 1, "CPU Interrupts");
core_st[c] = rrdset_create_localhost(
"cpu"
, id
@@ -221,7 +221,7 @@ int do_proc_interrupts(int update_every, usec_t dt) {
);
char core[50+1];
- snprintfz(core, 50, "cpu%d", c);
+ snprintfz(core, sizeof(core) - 1, "cpu%d", c);
rrdlabels_add(core_st[c]->rrdlabels, "cpu", core, RRDLABEL_SRC_AUTO);
}
diff --git a/collectors/proc.plugin/proc_mdstat.c b/collectors/proc.plugin/proc_mdstat.c
index c3d1793cbeeb1a..3857d9ec4f5102 100644
--- a/collectors/proc.plugin/proc_mdstat.c
+++ b/collectors/proc.plugin/proc_mdstat.c
@@ -70,10 +70,10 @@ static inline void make_chart_obsolete(char *name, const char *id_modifier)
RRDSET *st = NULL;
if (likely(name && id_modifier)) {
- snprintfz(id, 50, "mdstat.%s_%s", name, id_modifier);
+ snprintfz(id, sizeof(id) - 1, "mdstat.%s_%s", name, id_modifier);
st = rrdset_find_active_byname_localhost(id);
if (likely(st))
- rrdset_is_obsolete(st);
+ rrdset_is_obsolete___safe_from_collector_thread(st);
}
}
@@ -409,7 +409,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
update_every,
RRDSET_TYPE_LINE);
- rrdset_isnot_obsolete(st_mdstat_health);
+ rrdset_isnot_obsolete___safe_from_collector_thread(st_mdstat_health);
}
if (!redundant_num) {
@@ -438,10 +438,10 @@ int do_proc_mdstat(int update_every, usec_t dt)
if (likely(raid->redundant)) {
if (likely(do_disks)) {
- snprintfz(id, 50, "%s_disks", raid->name);
+ snprintfz(id, sizeof(id) - 1, "%s_disks", raid->name);
if (unlikely(!raid->st_disks && !(raid->st_disks = rrdset_find_active_byname_localhost(id)))) {
- snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
+ snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level);
raid->st_disks = rrdset_create_localhost(
"mdstat",
@@ -457,7 +457,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
update_every,
RRDSET_TYPE_STACKED);
- rrdset_isnot_obsolete(raid->st_disks);
+ rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_disks);
add_labels_to_mdstat(raid, raid->st_disks);
}
@@ -473,10 +473,10 @@ int do_proc_mdstat(int update_every, usec_t dt)
}
if (likely(do_mismatch)) {
- snprintfz(id, 50, "%s_mismatch", raid->name);
+ snprintfz(id, sizeof(id) - 1, "%s_mismatch", raid->name);
if (unlikely(!raid->st_mismatch_cnt && !(raid->st_mismatch_cnt = rrdset_find_active_byname_localhost(id)))) {
- snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
+ snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level);
raid->st_mismatch_cnt = rrdset_create_localhost(
"mdstat",
@@ -492,7 +492,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
update_every,
RRDSET_TYPE_LINE);
- rrdset_isnot_obsolete(raid->st_mismatch_cnt);
+ rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_mismatch_cnt);
add_labels_to_mdstat(raid, raid->st_mismatch_cnt);
}
@@ -505,10 +505,10 @@ int do_proc_mdstat(int update_every, usec_t dt)
}
if (likely(do_operations)) {
- snprintfz(id, 50, "%s_operation", raid->name);
+ snprintfz(id, sizeof(id) - 1, "%s_operation", raid->name);
if (unlikely(!raid->st_operation && !(raid->st_operation = rrdset_find_active_byname_localhost(id)))) {
- snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
+ snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level);
raid->st_operation = rrdset_create_localhost(
"mdstat",
@@ -524,7 +524,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
update_every,
RRDSET_TYPE_LINE);
- rrdset_isnot_obsolete(raid->st_operation);
+ rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_operation);
add_labels_to_mdstat(raid, raid->st_operation);
}
@@ -544,9 +544,9 @@ int do_proc_mdstat(int update_every, usec_t dt)
rrddim_set_by_pointer(raid->st_operation, raid->rd_reshape, raid->reshape);
rrdset_done(raid->st_operation);
- snprintfz(id, 50, "%s_finish", raid->name);
+ snprintfz(id, sizeof(id) - 1, "%s_finish", raid->name);
if (unlikely(!raid->st_finish && !(raid->st_finish = rrdset_find_active_byname_localhost(id)))) {
- snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
+ snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level);
raid->st_finish = rrdset_create_localhost(
"mdstat",
@@ -561,7 +561,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
NETDATA_CHART_PRIO_MDSTAT_FINISH + raid_idx * 10,
update_every, RRDSET_TYPE_LINE);
- rrdset_isnot_obsolete(raid->st_finish);
+ rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_finish);
add_labels_to_mdstat(raid, raid->st_finish);
}
@@ -572,9 +572,9 @@ int do_proc_mdstat(int update_every, usec_t dt)
rrddim_set_by_pointer(raid->st_finish, raid->rd_finish_in, raid->finish_in);
rrdset_done(raid->st_finish);
- snprintfz(id, 50, "%s_speed", raid->name);
+ snprintfz(id, sizeof(id) - 1, "%s_speed", raid->name);
if (unlikely(!raid->st_speed && !(raid->st_speed = rrdset_find_active_byname_localhost(id)))) {
- snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
+ snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level);
raid->st_speed = rrdset_create_localhost(
"mdstat",
@@ -590,7 +590,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
update_every,
RRDSET_TYPE_LINE);
- rrdset_isnot_obsolete(raid->st_speed);
+ rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_speed);
add_labels_to_mdstat(raid, raid->st_speed);
}
@@ -603,10 +603,10 @@ int do_proc_mdstat(int update_every, usec_t dt)
}
} else {
if (likely(do_nonredundant)) {
- snprintfz(id, 50, "%s_availability", raid->name);
+ snprintfz(id, sizeof(id) - 1, "%s_availability", raid->name);
if (unlikely(!raid->st_nonredundant && !(raid->st_nonredundant = rrdset_find_active_localhost(id)))) {
- snprintfz(family, 50, "%s (%s)", raid->name, raid->level);
+ snprintfz(family, sizeof(family) - 1, "%s (%s)", raid->name, raid->level);
raid->st_nonredundant = rrdset_create_localhost(
"mdstat",
@@ -622,7 +622,7 @@ int do_proc_mdstat(int update_every, usec_t dt)
update_every,
RRDSET_TYPE_LINE);
- rrdset_isnot_obsolete(raid->st_nonredundant);
+ rrdset_isnot_obsolete___safe_from_collector_thread(raid->st_nonredundant);
add_labels_to_mdstat(raid, raid->st_nonredundant);
}
diff --git a/collectors/proc.plugin/proc_net_dev.c b/collectors/proc.plugin/proc_net_dev.c
index 88863f48ee48ef..b39f396833dcff 100644
--- a/collectors/proc.plugin/proc_net_dev.c
+++ b/collectors/proc.plugin/proc_net_dev.c
@@ -5,16 +5,35 @@
#define PLUGIN_PROC_MODULE_NETDEV_NAME "/proc/net/dev"
#define CONFIG_SECTION_PLUGIN_PROC_NETDEV "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETDEV_NAME
+#define RRDFUNCTIONS_NETDEV_HELP "View network interface statistics"
+
#define STATE_LENGTH_MAX 32
#define READ_RETRY_PERIOD 60 // seconds
+void cgroup_netdev_reset_all(void);
+void cgroup_netdev_release(const DICTIONARY_ITEM *link);
+const void *cgroup_netdev_dup(const DICTIONARY_ITEM *link);
+void cgroup_netdev_add_bandwidth(const DICTIONARY_ITEM *link, NETDATA_DOUBLE received, NETDATA_DOUBLE sent);
+
enum {
NETDEV_DUPLEX_UNKNOWN,
NETDEV_DUPLEX_HALF,
NETDEV_DUPLEX_FULL
};
+static const char *get_duplex_string(int duplex)
+{
+ switch (duplex) {
+ case NETDEV_DUPLEX_FULL:
+ return "full";
+ case NETDEV_DUPLEX_HALF:
+ return "half";
+ default:
+ return "unknown";
+ }
+}
+
enum {
NETDEV_OPERSTATE_UNKNOWN,
NETDEV_OPERSTATE_NOTPRESENT,
@@ -44,6 +63,26 @@ static inline int get_operstate(char *operstate)
return NETDEV_OPERSTATE_UNKNOWN;
}
+static const char *get_operstate_string(int operstate)
+{
+ switch (operstate) {
+ case NETDEV_OPERSTATE_UP:
+ return "up";
+ case NETDEV_OPERSTATE_DOWN:
+ return "down";
+ case NETDEV_OPERSTATE_NOTPRESENT:
+ return "notpresent";
+ case NETDEV_OPERSTATE_LOWERLAYERDOWN:
+ return "lowerlayerdown";
+ case NETDEV_OPERSTATE_TESTING:
+ return "testing";
+ case NETDEV_OPERSTATE_DORMANT:
+ return "dormant";
+ default:
+ return "unknown";
+ }
+}
+
// ----------------------------------------------------------------------------
// netdev list
@@ -58,6 +97,8 @@ static struct netdev {
int enabled;
int updated;
+ bool function_ready;
+
time_t discover_time;
int carrier_file_exists;
@@ -123,7 +164,7 @@ static struct netdev {
const char *chart_family;
- DICTIONARY *chart_labels;
+ RRDLABELS *chart_labels;
int flipped;
unsigned long priority;
@@ -208,6 +249,8 @@ static struct netdev {
char *filename_carrier;
char *filename_mtu;
+ const DICTIONARY_ITEM *cgroup_netdev_link;
+
struct netdev *next;
} *netdev_root = NULL, *netdev_last_used = NULL;
@@ -216,18 +259,18 @@ static size_t netdev_added = 0, netdev_found = 0;
// ----------------------------------------------------------------------------
static void netdev_charts_release(struct netdev *d) {
- if(d->st_bandwidth) rrdset_is_obsolete(d->st_bandwidth);
- if(d->st_packets) rrdset_is_obsolete(d->st_packets);
- if(d->st_errors) rrdset_is_obsolete(d->st_errors);
- if(d->st_drops) rrdset_is_obsolete(d->st_drops);
- if(d->st_fifo) rrdset_is_obsolete(d->st_fifo);
- if(d->st_compressed) rrdset_is_obsolete(d->st_compressed);
- if(d->st_events) rrdset_is_obsolete(d->st_events);
- if(d->st_speed) rrdset_is_obsolete(d->st_speed);
- if(d->st_duplex) rrdset_is_obsolete(d->st_duplex);
- if(d->st_operstate) rrdset_is_obsolete(d->st_operstate);
- if(d->st_carrier) rrdset_is_obsolete(d->st_carrier);
- if(d->st_mtu) rrdset_is_obsolete(d->st_mtu);
+ if(d->st_bandwidth) rrdset_is_obsolete___safe_from_collector_thread(d->st_bandwidth);
+ if(d->st_packets) rrdset_is_obsolete___safe_from_collector_thread(d->st_packets);
+ if(d->st_errors) rrdset_is_obsolete___safe_from_collector_thread(d->st_errors);
+ if(d->st_drops) rrdset_is_obsolete___safe_from_collector_thread(d->st_drops);
+ if(d->st_fifo) rrdset_is_obsolete___safe_from_collector_thread(d->st_fifo);
+ if(d->st_compressed) rrdset_is_obsolete___safe_from_collector_thread(d->st_compressed);
+ if(d->st_events) rrdset_is_obsolete___safe_from_collector_thread(d->st_events);
+ if(d->st_speed) rrdset_is_obsolete___safe_from_collector_thread(d->st_speed);
+ if(d->st_duplex) rrdset_is_obsolete___safe_from_collector_thread(d->st_duplex);
+ if(d->st_operstate) rrdset_is_obsolete___safe_from_collector_thread(d->st_operstate);
+ if(d->st_carrier) rrdset_is_obsolete___safe_from_collector_thread(d->st_carrier);
+ if(d->st_mtu) rrdset_is_obsolete___safe_from_collector_thread(d->st_mtu);
d->st_bandwidth = NULL;
d->st_compressed = NULL;
@@ -326,6 +369,7 @@ static void netdev_free(struct netdev *d) {
netdev_charts_release(d);
netdev_free_chart_strings(d);
rrdlabels_destroy(d->chart_labels);
+ cgroup_netdev_release(d->cgroup_netdev_link);
freez((void *)d->name);
freez((void *)d->filename_speed);
@@ -348,15 +392,18 @@ static struct netdev_rename {
const char *container_name;
const char *ctx_prefix;
- DICTIONARY *chart_labels;
+ RRDLABELS *chart_labels;
int processed;
+ const DICTIONARY_ITEM *cgroup_netdev_link;
+
struct netdev_rename *next;
} *netdev_rename_root = NULL;
static int netdev_pending_renames = 0;
static netdata_mutex_t netdev_rename_mutex = NETDATA_MUTEX_INITIALIZER;
+static netdata_mutex_t netdev_dev_mutex = NETDATA_MUTEX_INITIALIZER;
static struct netdev_rename *netdev_rename_find(const char *host_device, uint32_t hash) {
struct netdev_rename *r;
@@ -373,8 +420,9 @@ void netdev_rename_device_add(
const char *host_device,
const char *container_device,
const char *container_name,
- DICTIONARY *labels,
- const char *ctx_prefix)
+ RRDLABELS *labels,
+ const char *ctx_prefix,
+ const DICTIONARY_ITEM *cgroup_netdev_link)
{
netdata_mutex_lock(&netdev_rename_mutex);
@@ -391,6 +439,8 @@ void netdev_rename_device_add(
r->hash = hash;
r->next = netdev_rename_root;
r->processed = 0;
+ r->cgroup_netdev_link = cgroup_netdev_link;
+
netdev_rename_root = r;
netdev_pending_renames++;
collector_info("CGROUP: registered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name);
@@ -406,6 +456,8 @@ void netdev_rename_device_add(
rrdlabels_migrate_to_these(r->chart_labels, labels);
r->processed = 0;
+ r->cgroup_netdev_link = cgroup_netdev_link;
+
netdev_pending_renames++;
collector_info("CGROUP: altered network interface rename for '%s' as '%s' under '%s'", r->host_device, r->container_device, r->container_name);
}
@@ -438,6 +490,7 @@ void netdev_rename_device_del(const char *host_device) {
freez((void *) r->container_device);
freez((void *) r->ctx_prefix);
rrdlabels_destroy(r->chart_labels);
+ cgroup_netdev_release(r->cgroup_netdev_link);
freez((void *) r);
break;
}
@@ -451,6 +504,7 @@ static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename *
netdev_charts_release(d);
netdev_free_chart_strings(d);
+ d->cgroup_netdev_link = cgroup_netdev_dup(r->cgroup_netdev_link);
char buffer[RRD_ID_LENGTH_MAX + 1];
@@ -521,6 +575,7 @@ static inline void netdev_rename_cgroup(struct netdev *d, struct netdev_rename *
d->chart_family = strdupz("net");
rrdlabels_copy(d->chart_labels, r->chart_labels);
+ rrdlabels_add(d->chart_labels, "container_device", r->container_device, RRDLABEL_SRC_AUTO);
d->priority = NETDATA_CHART_PRIO_CGROUP_NET_IFACE;
d->flipped = 1;
@@ -554,6 +609,319 @@ static inline void netdev_rename_all_lock(void) {
}
// ----------------------------------------------------------------------------
+
+int netdev_function_net_interfaces(BUFFER *wb, int timeout __maybe_unused, const char *function __maybe_unused,
+ void *collector_data __maybe_unused,
+ rrd_function_result_callback_t result_cb, void *result_cb_data,
+ rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data,
+ rrd_function_register_canceller_cb_t register_canceller_cb __maybe_unused,
+ void *register_canceller_cb_data __maybe_unused) {
+
+ buffer_flush(wb);
+ wb->content_type = CT_APPLICATION_JSON;
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
+
+ buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost));
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", 1);
+ buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_NETDEV_HELP);
+ buffer_json_member_add_array(wb, "data");
+
+ double max_traffic_rx = 0.0;
+ double max_traffic_tx = 0.0;
+ double max_traffic = 0.0;
+ double max_packets_rx = 0.0;
+ double max_packets_tx = 0.0;
+ double max_mcast_rx = 0.0;
+ double max_drops_rx = 0.0;
+ double max_drops_tx = 0.0;
+
+ netdata_mutex_lock(&netdev_dev_mutex);
+
+ RRDDIM *rd = NULL;
+
+ for (struct netdev *d = netdev_root; d != netdev_last_used; d = d->next) {
+ if (unlikely(!d->function_ready))
+ continue;
+
+ buffer_json_add_array_item_array(wb);
+
+ buffer_json_add_array_item_string(wb, d->name);
+
+ buffer_json_add_array_item_string(wb, d->virtual ? "virtual" : "physical");
+ buffer_json_add_array_item_string(wb, d->flipped ? "cgroup" : "host");
+ buffer_json_add_array_item_string(wb, d->carrier == 1 ? "up" : "down");
+ buffer_json_add_array_item_string(wb, get_operstate_string(d->operstate));
+ buffer_json_add_array_item_string(wb, get_duplex_string(d->duplex));
+ buffer_json_add_array_item_double(wb, d->speed > 0 ? d->speed : NAN);
+ buffer_json_add_array_item_double(wb, d->mtu > 0 ? d->mtu : NAN);
+
+ rd = d->flipped ? d->rd_tbytes : d->rd_rbytes;
+ double traffic_rx = rrddim_get_last_stored_value(rd, &max_traffic_rx, 1000.0);
+ rd = d->flipped ? d->rd_rbytes : d->rd_tbytes;
+ double traffic_tx = rrddim_get_last_stored_value(rd, &max_traffic_tx, 1000.0);
+
+ rd = d->flipped ? d->rd_tpackets : d->rd_rpackets;
+ double packets_rx = rrddim_get_last_stored_value(rd, &max_packets_rx, 1000.0);
+ rd = d->flipped ? d->rd_rpackets : d->rd_tpackets;
+ double packets_tx = rrddim_get_last_stored_value(rd, &max_packets_tx, 1000.0);
+
+ double mcast_rx = rrddim_get_last_stored_value(d->rd_rmulticast, &max_mcast_rx, 1000.0);
+
+ rd = d->flipped ? d->rd_tdrops : d->rd_rdrops;
+ double drops_rx = rrddim_get_last_stored_value(rd, &max_drops_rx, 1.0);
+ rd = d->flipped ? d->rd_rdrops : d->rd_tdrops;
+ double drops_tx = rrddim_get_last_stored_value(rd, &max_drops_tx, 1.0);
+
+ // FIXME: "traffic" (total) is needed only for default_sorting
+ // can be removed when default_sorting will accept multiple columns (sum)
+ double traffic = NAN;
+ if (!isnan(traffic_rx) && !isnan(traffic_tx)) {
+ traffic = traffic_rx + traffic_tx;
+ max_traffic = MAX(max_traffic, traffic);
+ }
+
+
+ buffer_json_add_array_item_double(wb, traffic_rx);
+ buffer_json_add_array_item_double(wb, traffic_tx);
+ buffer_json_add_array_item_double(wb, traffic);
+ buffer_json_add_array_item_double(wb, packets_rx);
+ buffer_json_add_array_item_double(wb, packets_tx);
+ buffer_json_add_array_item_double(wb, mcast_rx);
+ buffer_json_add_array_item_double(wb, drops_rx);
+ buffer_json_add_array_item_double(wb, drops_tx);
+
+ buffer_json_add_array_item_object(wb);
+ {
+ buffer_json_member_add_string(wb, "severity", drops_rx + drops_tx > 0 ? "warning" : "normal");
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_array_close(wb);
+ }
+
+ netdata_mutex_unlock(&netdev_dev_mutex);
+
+ buffer_json_array_close(wb); // data
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Interface", "Network Interface Name",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Type", "Network Interface Type",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "UsedBy", "Indicates whether the network interface is used by a cgroup or by the host system",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "PhState", "Current Physical State",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "OpState", "Current Operational State",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Duplex", "Current Duplex Mode",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Speed", "Current Link Speed",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 0, "Mbit", NAN, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "MTU", "Maximum Transmission Unit",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER,
+ 0, "Octets", NAN, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_UNIQUE_KEY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "In", "Traffic Received",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Mbit", max_traffic_rx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Out", "Traffic Sent",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Mbit", max_traffic_tx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "Total", "Traffic Received and Sent",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Mbit", max_traffic, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "PktsIn", "Received Packets",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Kpps", max_packets_rx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "PktsOut", "Sent Packets",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Kpps", max_packets_tx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "McastIn", "Multicast Received Packets",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Kpps", max_mcast_rx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "DropsIn", "Dropped Inbound Packets",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Drops", max_drops_rx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "DropsOut", "Dropped Outbound Packets",
+ RRDF_FIELD_TYPE_BAR_WITH_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER,
+ 2, "Drops", max_drops_tx, RRDF_FIELD_SORT_DESCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE,
+ NULL);
+
+ buffer_rrdf_table_add_field(
+ wb, field_id++,
+ "rowOptions", "rowOptions",
+ RRDF_FIELD_TYPE_NONE,
+ RRDR_FIELD_VISUAL_ROW_OPTIONS,
+ RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_FIXED,
+ NULL,
+ RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_DUMMY,
+ NULL);
+ }
+
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "Total");
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "Traffic");
+ {
+ buffer_json_member_add_string(wb, "name", "Traffic");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "In");
+ buffer_json_add_array_item_string(wb, "Out");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "Packets");
+ {
+ buffer_json_member_add_string(wb, "name", "Packets");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "PktsIn");
+ buffer_json_add_array_item_string(wb, "PktsOut");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Traffic");
+ buffer_json_add_array_item_string(wb, "Interface");
+ buffer_json_array_close(wb);
+
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "Traffic");
+ buffer_json_add_array_item_string(wb, "Type");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_object(wb, "group_by");
+ {
+ buffer_json_member_add_object(wb, "Type");
+ {
+ buffer_json_member_add_string(wb, "name", "Type");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "Type");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "UsedBy");
+ {
+ buffer_json_member_add_string(wb, "name", "UsedBy");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "UsedBy");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // group_by
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ int response = HTTP_RESP_OK;
+ if(is_cancelled_cb && is_cancelled_cb(is_cancelled_cb_data)) {
+ buffer_flush(wb);
+ response = HTTP_RESP_CLIENT_CLOSED_REQUEST;
+ }
+
+ if(result_cb)
+ result_cb(wb, response, result_cb_data);
+
+ return response;
+}
+
// netdev data collection
static void netdev_cleanup() {
@@ -615,6 +983,7 @@ static struct netdev *get_netdev(const char *name) {
d->hash = simple_hash(d->name);
d->len = strlen(d->name);
d->chart_labels = rrdlabels_create();
+ d->function_ready = false;
d->chart_type_net_bytes = strdupz("net");
d->chart_type_net_compressed = strdupz("net_compressed");
@@ -778,63 +1147,89 @@ int do_proc_net_dev(int update_every, usec_t dt) {
if(d->enabled)
d->enabled = !simple_pattern_matches(disabled_list, d->name);
-#ifdef NETDATA_SKIP_IF_NOT_COLLECT
- if(unlikely(!d->enabled)) {
- netdata_log_debug(D_COLLECTOR, "PLUGIN: proc_net_dev: Skipping interface '%s' disabled by configuration", d->name);
- continue;
- }
-#endif
- char buffer[FILENAME_MAX + 1];
+ char buf[FILENAME_MAX + 1];
+ snprintfz(buf, FILENAME_MAX, path_to_sys_devices_virtual_net, d->name);
- snprintfz(buffer, FILENAME_MAX, path_to_sys_devices_virtual_net, d->name);
- if (likely(access(buffer, R_OK) == 0)) {
- d->virtual = 1;
- rrdlabels_add(d->chart_labels, "interface_type", "virtual", RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT);
- }
- else {
+ d->virtual = likely(access(buf, R_OK) == 0) ? 1 : 0;
+
+ // At least on Proxmox inside LXC: eth0 is virtual.
+ // Virtual interfaces are not taken into account in system.net calculations
+ if (inside_lxc_container && d->virtual && strncmp(d->name, "eth", 3) == 0)
d->virtual = 0;
- rrdlabels_add(d->chart_labels, "interface_type", "real", RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT);
- }
- rrdlabels_add(d->chart_labels, "device", name, RRDLABEL_SRC_AUTO|RRDLABEL_FLAG_PERMANENT);
+
+ if (d->virtual)
+ rrdlabels_add(d->chart_labels, "interface_type", "virtual", RRDLABEL_SRC_AUTO);
+ else
+ rrdlabels_add(d->chart_labels, "interface_type", "real", RRDLABEL_SRC_AUTO);
+
+ rrdlabels_add(d->chart_labels, "device", name, RRDLABEL_SRC_AUTO);
if(likely(!d->virtual)) {
// set the filename to get the interface speed
- snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_speed, d->name);
- d->filename_speed = strdupz(buffer);
+ snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_speed, d->name);
+ d->filename_speed = strdupz(buf);
- snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_duplex, d->name);
- d->filename_duplex = strdupz(buffer);
+ snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_duplex, d->name);
+ d->filename_duplex = strdupz(buf);
}
- snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_operstate, d->name);
- d->filename_operstate = strdupz(buffer);
+ snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_operstate, d->name);
+ d->filename_operstate = strdupz(buf);
- snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_carrier, d->name);
- d->filename_carrier = strdupz(buffer);
+ snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_carrier, d->name);
+ d->filename_carrier = strdupz(buf);
- snprintfz(buffer, FILENAME_MAX, path_to_sys_class_net_mtu, d->name);
- d->filename_mtu = strdupz(buffer);
+ snprintfz(buf, FILENAME_MAX, path_to_sys_class_net_mtu, d->name);
+ d->filename_mtu = strdupz(buf);
- snprintfz(buffer, FILENAME_MAX, "plugin:proc:/proc/net/dev:%s", d->name);
- d->enabled = config_get_boolean_ondemand(buffer, "enabled", d->enabled);
- d->virtual = config_get_boolean(buffer, "virtual", d->virtual);
+ snprintfz(buf, FILENAME_MAX, "plugin:proc:/proc/net/dev:%s", d->name);
+
+ if (config_exists(buf, "enabled"))
+ d->enabled = config_get_boolean_ondemand(buf, "enabled", d->enabled);
+ if (config_exists(buf, "virtual"))
+ d->virtual = config_get_boolean(buf, "virtual", d->virtual);
if(d->enabled == CONFIG_BOOLEAN_NO)
continue;
- d->do_bandwidth = config_get_boolean_ondemand(buffer, "bandwidth", do_bandwidth);
- d->do_packets = config_get_boolean_ondemand(buffer, "packets", do_packets);
- d->do_errors = config_get_boolean_ondemand(buffer, "errors", do_errors);
- d->do_drops = config_get_boolean_ondemand(buffer, "drops", do_drops);
- d->do_fifo = config_get_boolean_ondemand(buffer, "fifo", do_fifo);
- d->do_compressed = config_get_boolean_ondemand(buffer, "compressed", do_compressed);
- d->do_events = config_get_boolean_ondemand(buffer, "events", do_events);
- d->do_speed = config_get_boolean_ondemand(buffer, "speed", do_speed);
- d->do_duplex = config_get_boolean_ondemand(buffer, "duplex", do_duplex);
- d->do_operstate = config_get_boolean_ondemand(buffer, "operstate", do_operstate);
- d->do_carrier = config_get_boolean_ondemand(buffer, "carrier", do_carrier);
- d->do_mtu = config_get_boolean_ondemand(buffer, "mtu", do_mtu);
+ d->do_bandwidth = do_bandwidth;
+ d->do_packets = do_packets;
+ d->do_errors = do_errors;
+ d->do_drops = do_drops;
+ d->do_fifo = do_fifo;
+ d->do_compressed = do_compressed;
+ d->do_events = do_events;
+ d->do_speed = do_speed;
+ d->do_duplex = do_duplex;
+ d->do_operstate = do_operstate;
+ d->do_carrier = do_carrier;
+ d->do_mtu = do_mtu;
+
+ if (config_exists(buf, "bandwidth"))
+ d->do_bandwidth = config_get_boolean_ondemand(buf, "bandwidth", do_bandwidth);
+ if (config_exists(buf, "packets"))
+ d->do_packets = config_get_boolean_ondemand(buf, "packets", do_packets);
+ if (config_exists(buf, "errors"))
+ d->do_errors = config_get_boolean_ondemand(buf, "errors", do_errors);
+ if (config_exists(buf, "drops"))
+ d->do_drops = config_get_boolean_ondemand(buf, "drops", do_drops);
+ if (config_exists(buf, "fifo"))
+ d->do_fifo = config_get_boolean_ondemand(buf, "fifo", do_fifo);
+ if (config_exists(buf, "compressed"))
+ d->do_compressed = config_get_boolean_ondemand(buf, "compressed", do_compressed);
+ if (config_exists(buf, "events"))
+ d->do_events = config_get_boolean_ondemand(buf, "events", do_events);
+ if (config_exists(buf, "speed"))
+ d->do_speed = config_get_boolean_ondemand(buf, "speed", do_speed);
+ if (config_exists(buf, "duplex"))
+ d->do_duplex = config_get_boolean_ondemand(buf, "duplex", do_duplex);
+ if (config_exists(buf, "operstate"))
+ d->do_operstate = config_get_boolean_ondemand(buf, "operstate", do_operstate);
+ if (config_exists(buf, "carrier"))
+ d->do_carrier = config_get_boolean_ondemand(buf, "carrier", do_carrier);
+ if (config_exists(buf, "mtu"))
+ d->do_mtu = config_get_boolean_ondemand(buf, "mtu", do_mtu);
}
if(unlikely(!d->enabled))
@@ -1014,6 +1409,11 @@ int do_proc_net_dev(int update_every, usec_t dt) {
rrddim_set_by_pointer(d->st_bandwidth, d->rd_tbytes, (collected_number)d->tbytes);
rrdset_done(d->st_bandwidth);
+ if(d->cgroup_netdev_link)
+ cgroup_netdev_add_bandwidth(d->cgroup_netdev_link,
+ d->flipped ? d->rd_tbytes->collector.last_stored_value : -d->rd_rbytes->collector.last_stored_value,
+ d->flipped ? -d->rd_rbytes->collector.last_stored_value : d->rd_tbytes->collector.last_stored_value);
+
// update the interface speed
if(d->filename_speed) {
if(unlikely(!d->chart_var_speed)) {
@@ -1468,6 +1868,8 @@ int do_proc_net_dev(int update_every, usec_t dt) {
rrddim_set_by_pointer(d->st_events, d->rd_tcarrier, (collected_number)d->tcarrier);
rrdset_done(d->st_events);
}
+
+ d->function_ready = true;
}
if(do_bandwidth == CONFIG_BOOLEAN_YES || (do_bandwidth == CONFIG_BOOLEAN_AUTO &&
@@ -1524,6 +1926,9 @@ void *netdev_main(void *ptr)
netdata_thread_cleanup_push(netdev_main_cleanup, ptr);
+ rrd_collector_started();
+ rrd_function_add(localhost, NULL, "network-interfaces", 10, RRDFUNCTIONS_NETDEV_HELP, true, netdev_function_net_interfaces, NULL);
+
usec_t step = localhost->rrd_update_every * USEC_PER_SEC;
heartbeat_t hb;
heartbeat_init(&hb);
@@ -1535,11 +1940,17 @@ void *netdev_main(void *ptr)
if (unlikely(!service_running(SERVICE_COLLECTORS)))
break;
+ cgroup_netdev_reset_all();
+
worker_is_busy(0);
+
+ netdata_mutex_lock(&netdev_dev_mutex);
if(do_proc_net_dev(localhost->rrd_update_every, hb_dt))
break;
+ netdata_mutex_unlock(&netdev_dev_mutex);
}
netdata_thread_cleanup_pop(1);
+
return NULL;
}
diff --git a/collectors/proc.plugin/proc_net_netstat.c b/collectors/proc.plugin/proc_net_netstat.c
index ce3068c0e5bc51..170daad5d980cb 100644
--- a/collectors/proc.plugin/proc_net_netstat.c
+++ b/collectors/proc.plugin/proc_net_netstat.c
@@ -2,9 +2,9 @@
#include "plugin_proc.h"
-#define RRD_TYPE_NET_NETSTAT "ip"
-#define RRD_TYPE_NET_SNMP "ipv4"
-#define RRD_TYPE_NET_SNMP6 "ipv6"
+#define RRD_TYPE_NET_IP "ip"
+#define RRD_TYPE_NET_IP4 "ipv4"
+#define RRD_TYPE_NET_IP6 "ipv6"
#define PLUGIN_PROC_MODULE_NETSTAT_NAME "/proc/net/netstat"
#define CONFIG_SECTION_PLUGIN_PROC_NETSTAT "plugin:" PLUGIN_PROC_CONFIG_NAME ":" PLUGIN_PROC_MODULE_NETSTAT_NAME
@@ -424,7 +424,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "packets"
, NULL
, "packets"
@@ -464,7 +464,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "fragsout"
, NULL
, "fragments6"
@@ -506,7 +506,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "fragsin"
, NULL
, "fragments6"
@@ -557,7 +557,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "errors"
, NULL
, "errors"
@@ -605,7 +605,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "udppackets"
, NULL
, "udp6"
@@ -647,7 +647,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "udperrors"
, NULL
, "udp6"
@@ -689,7 +689,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "udplitepackets"
, NULL
, "udplite6"
@@ -730,7 +730,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "udpliteerrors"
, NULL
, "udplite6"
@@ -771,7 +771,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "mcast"
, NULL
, "multicast6"
@@ -806,7 +806,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "bcast"
, NULL
, "broadcast6"
@@ -841,7 +841,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "mcastpkts"
, NULL
, "multicast6"
@@ -876,7 +876,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmp"
, NULL
, "icmp6"
@@ -910,7 +910,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmpredir"
, NULL
, "icmp6"
@@ -962,7 +962,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmperrors"
, NULL
, "icmp6"
@@ -1018,7 +1018,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmpechos"
, NULL
, "icmp6"
@@ -1064,7 +1064,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "groupmemb"
, NULL
, "icmp6"
@@ -1109,7 +1109,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmprouter"
, NULL
, "icmp6"
@@ -1151,7 +1151,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmpneighbor"
, NULL
, "icmp6"
@@ -1189,7 +1189,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmpmldv2"
, NULL
, "icmp6"
@@ -1239,7 +1239,7 @@ static void do_proc_net_snmp6(int update_every) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6
+ RRD_TYPE_NET_IP6
, "icmptypes"
, NULL
, "icmp6"
@@ -1287,7 +1287,7 @@ static void do_proc_net_snmp6(int update_every) {
if (unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP6,
+ RRD_TYPE_NET_IP6,
"ect",
NULL,
"packets",
@@ -1852,11 +1852,11 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_system_ip)) {
st_system_ip = rrdset_create_localhost(
"system"
- , RRD_TYPE_NET_NETSTAT
+ , "ip" // FIXME: this is ipv4. Not changing it because it will require to do changes in cloud-frontend too
, NULL
, "network"
, NULL
- , "IP Bandwidth"
+ , "IPv4 Bandwidth"
, "kilobits/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
@@ -1874,43 +1874,6 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
rrdset_done(st_system_ip);
}
- if(do_inerrors == CONFIG_BOOLEAN_YES || (do_inerrors == CONFIG_BOOLEAN_AUTO &&
- (ipext_InNoRoutes ||
- ipext_InTruncatedPkts ||
- netdata_zero_metrics_enabled == CONFIG_BOOLEAN_YES))) {
- do_inerrors = CONFIG_BOOLEAN_YES;
- static RRDSET *st_ip_inerrors = NULL;
- static RRDDIM *rd_noroutes = NULL, *rd_truncated = NULL, *rd_checksum = NULL;
-
- if(unlikely(!st_ip_inerrors)) {
- st_ip_inerrors = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
- , "inerrors"
- , NULL
- , "errors"
- , NULL
- , "IP Input Errors"
- , "packets/s"
- , PLUGIN_PROC_NAME
- , PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_ERRORS
- , update_every
- , RRDSET_TYPE_LINE
- );
-
- rrdset_flag_set(st_ip_inerrors, RRDSET_FLAG_DETAIL);
-
- rd_noroutes = rrddim_add(st_ip_inerrors, "InNoRoutes", "noroutes", 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_truncated = rrddim_add(st_ip_inerrors, "InTruncatedPkts", "truncated", 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_checksum = rrddim_add(st_ip_inerrors, "InCsumErrors", "checksum", 1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set_by_pointer(st_ip_inerrors, rd_noroutes, ipext_InNoRoutes);
- rrddim_set_by_pointer(st_ip_inerrors, rd_truncated, ipext_InTruncatedPkts);
- rrddim_set_by_pointer(st_ip_inerrors, rd_checksum, ipext_InCsumErrors);
- rrdset_done(st_ip_inerrors);
- }
-
if(do_mcast == CONFIG_BOOLEAN_YES || (do_mcast == CONFIG_BOOLEAN_AUTO &&
(ipext_InMcastOctets ||
ipext_OutMcastOctets ||
@@ -1921,7 +1884,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_ip_mcast)) {
st_ip_mcast = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP4
, "mcast"
, NULL
, "multicast"
@@ -1930,7 +1893,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "kilobits/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_MCAST
+ , NETDATA_CHART_PRIO_IPV4_MCAST
, update_every
, RRDSET_TYPE_AREA
);
@@ -1960,16 +1923,16 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_ip_bcast)) {
st_ip_bcast = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP4
, "bcast"
, NULL
, "broadcast"
, NULL
- , "IP Broadcast Bandwidth"
+ , "IPv4 Broadcast Bandwidth"
, "kilobits/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_BCAST
+ , NETDATA_CHART_PRIO_IPV4_BCAST
, update_every
, RRDSET_TYPE_AREA
);
@@ -1999,16 +1962,16 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_ip_mcastpkts)) {
st_ip_mcastpkts = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP4
, "mcastpkts"
, NULL
, "multicast"
, NULL
- , "IP Multicast Packets"
+ , "IPv4 Multicast Packets"
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_MCAST_PACKETS
+ , NETDATA_CHART_PRIO_IPV4_MCAST_PACKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2035,16 +1998,16 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_ip_bcastpkts)) {
st_ip_bcastpkts = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP4
, "bcastpkts"
, NULL
, "broadcast"
, NULL
- , "IP Broadcast Packets"
+ , "IPv4 Broadcast Packets"
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_BCAST_PACKETS
+ , NETDATA_CHART_PRIO_IPV4_BCAST_PACKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2073,16 +2036,16 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_ecnpkts)) {
st_ecnpkts = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP4
, "ecnpkts"
, NULL
, "ecn"
, NULL
- , "IP ECN Statistics"
+ , "IPv4 ECN Statistics"
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_ECN
+ , NETDATA_CHART_PRIO_IPV4_ECN
, update_every
, RRDSET_TYPE_LINE
);
@@ -2114,7 +2077,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_tcpmemorypressures)) {
st_tcpmemorypressures = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcpmemorypressures"
, NULL
, "tcp"
@@ -2123,7 +2086,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "events/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IP_TCP_MEM
+ , NETDATA_CHART_PRIO_IP_TCP_MEM_PRESSURE
, update_every
, RRDSET_TYPE_LINE
);
@@ -2150,7 +2113,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_tcpconnaborts)) {
st_tcpconnaborts = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcpconnaborts"
, NULL
, "tcp"
@@ -2194,7 +2157,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_tcpreorders)) {
st_tcpreorders = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcpreorders"
, NULL
, "tcp"
@@ -2236,7 +2199,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_ip_tcpofo)) {
st_ip_tcpofo = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcpofo"
, NULL
, "tcp"
@@ -2276,7 +2239,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_syncookies)) {
st_syncookies = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcpsyncookies"
, NULL
, "tcp"
@@ -2315,7 +2278,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_syn_queue)) {
st_syn_queue = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcp_syn_queue"
, NULL
, "tcp"
@@ -2351,7 +2314,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_accept_queue)) {
st_accept_queue = rrdset_create_localhost(
- RRD_TYPE_NET_NETSTAT
+ RRD_TYPE_NET_IP
, "tcp_accept_queue"
, NULL
, "tcp"
@@ -2392,7 +2355,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "packets"
, NULL
, "packets"
@@ -2433,7 +2396,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "fragsout"
, NULL
, "fragments"
@@ -2442,7 +2405,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_FRAGMENTS
+ , NETDATA_CHART_PRIO_IPV4_FRAGMENTS_OUT
, update_every
, RRDSET_TYPE_LINE
);
@@ -2473,7 +2436,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "fragsin"
, NULL
, "fragments"
@@ -2482,7 +2445,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_FRAGMENTS + 1
+ , NETDATA_CHART_PRIO_IPV4_FRAGMENTS_IN
, update_every
, RRDSET_TYPE_LINE
);
@@ -2513,13 +2476,16 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
static RRDDIM *rd_InDiscards = NULL,
*rd_OutDiscards = NULL,
*rd_InHdrErrors = NULL,
+ *rd_InNoRoutes = NULL,
*rd_OutNoRoutes = NULL,
*rd_InAddrErrors = NULL,
+ *rd_InTruncatedPkts = NULL,
+ *rd_InCsumErrors = NULL,
*rd_InUnknownProtos = NULL;
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "errors"
, NULL
, "errors"
@@ -2537,11 +2503,14 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
rd_InDiscards = rrddim_add(st, "InDiscards", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_OutDiscards = rrddim_add(st, "OutDiscards", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_InHdrErrors = rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_InNoRoutes = rrddim_add(st, "InNoRoutes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_OutNoRoutes = rrddim_add(st, "OutNoRoutes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_InHdrErrors = rrddim_add(st, "InHdrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_InAddrErrors = rrddim_add(st, "InAddrErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_InUnknownProtos = rrddim_add(st, "InUnknownProtos", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_InTruncatedPkts = rrddim_add(st, "InTruncatedPkts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_InCsumErrors = rrddim_add(st, "InCsumErrors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
rrddim_set_by_pointer(st, rd_InDiscards, (collected_number)snmp_root.ip_InDiscards);
@@ -2549,7 +2518,10 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
rrddim_set_by_pointer(st, rd_InHdrErrors, (collected_number)snmp_root.ip_InHdrErrors);
rrddim_set_by_pointer(st, rd_InAddrErrors, (collected_number)snmp_root.ip_InAddrErrors);
rrddim_set_by_pointer(st, rd_InUnknownProtos, (collected_number)snmp_root.ip_InUnknownProtos);
+ rrddim_set_by_pointer(st, rd_InNoRoutes, (collected_number)ipext_InNoRoutes);
rrddim_set_by_pointer(st, rd_OutNoRoutes, (collected_number)snmp_root.ip_OutNoRoutes);
+ rrddim_set_by_pointer(st, rd_InTruncatedPkts, (collected_number)ipext_InTruncatedPkts);
+ rrddim_set_by_pointer(st, rd_InCsumErrors, (collected_number)ipext_InCsumErrors);
rrdset_done(st);
}
@@ -2571,7 +2543,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_packets)) {
st_packets = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "icmp"
, NULL
, "icmp"
@@ -2580,7 +2552,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_ICMP
+ , NETDATA_CHART_PRIO_IPV4_ICMP_PACKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2602,7 +2574,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st_errors)) {
st_errors = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "icmp_errors"
, NULL
, "icmp"
@@ -2611,7 +2583,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_ICMP + 1
+ , NETDATA_CHART_PRIO_IPV4_ICMP_ERRORS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2678,7 +2650,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "icmpmsg"
, NULL
, "icmp"
@@ -2687,7 +2659,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_ICMP + 2
+ , NETDATA_CHART_PRIO_IPV4_ICMP_MESSAGES
, update_every
, RRDSET_TYPE_LINE
);
@@ -2754,16 +2726,16 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP
, "tcpsock"
, NULL
, "tcp"
, NULL
- , "IPv4 TCP Connections"
+ , "TCP Connections"
, "active connections"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_TCP
+ , NETDATA_CHART_PRIO_IP_TCP_ESTABLISHED_CONNS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2787,7 +2759,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP
, "tcppackets"
, NULL
, "tcp"
@@ -2796,7 +2768,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_TCP + 4
+ , NETDATA_CHART_PRIO_IP_TCP_PACKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2826,7 +2798,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP
, "tcperrors"
, NULL
, "tcp"
@@ -2835,7 +2807,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_TCP + 20
+ , NETDATA_CHART_PRIO_IP_TCP_ERRORS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2864,7 +2836,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP
, "tcpopens"
, NULL
, "tcp"
@@ -2873,7 +2845,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "connections/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_TCP + 5
+ , NETDATA_CHART_PRIO_IP_TCP_OPENS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2903,7 +2875,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP
, "tcphandshake"
, NULL
, "tcp"
@@ -2912,7 +2884,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "events/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_TCP + 30
+ , NETDATA_CHART_PRIO_IP_TCP_HANDSHAKE
, update_every
, RRDSET_TYPE_LINE
);
@@ -2946,7 +2918,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "udppackets"
, NULL
, "udp"
@@ -2955,7 +2927,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDP
+ , NETDATA_CHART_PRIO_IPV4_UDP_PACKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -2991,7 +2963,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "udperrors"
, NULL
, "udp"
@@ -3000,7 +2972,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "events/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDP + 10
+ , NETDATA_CHART_PRIO_IPV4_UDP_ERRORS
, update_every
, RRDSET_TYPE_LINE
);
@@ -3044,7 +3016,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "udplite"
, NULL
, "udplite"
@@ -3053,7 +3025,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDPLITE
+ , NETDATA_CHART_PRIO_IPV4_UDPLITE_PACKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -3078,7 +3050,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- RRD_TYPE_NET_SNMP
+ RRD_TYPE_NET_IP4
, "udplite_errors"
, NULL
, "udplite"
@@ -3087,7 +3059,7 @@ int do_proc_net_netstat(int update_every, usec_t dt) {
, "packets/s"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NETSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDPLITE + 10
+ , NETDATA_CHART_PRIO_IPV4_UDPLITE_ERRORS
, update_every
, RRDSET_TYPE_LINE);
diff --git a/collectors/proc.plugin/proc_net_sockstat.c b/collectors/proc.plugin/proc_net_sockstat.c
index e94b891ca87e26..b0feab5fa4d377 100644
--- a/collectors/proc.plugin/proc_net_sockstat.c
+++ b/collectors/proc.plugin/proc_net_sockstat.c
@@ -228,16 +228,16 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
if(unlikely(!st)) {
st = rrdset_create_localhost(
- "ipv4"
+ "ip"
, "sockstat_sockets"
, NULL
, "sockets"
, NULL
- , "IPv4 Sockets Used"
+ , "Sockets used for all address families"
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_SOCKETS
+ , NETDATA_CHART_PRIO_IP_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -272,7 +272,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, NULL
, "tcp"
, NULL
- , "IPv4 TCP Sockets"
+ , "TCP Sockets"
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
@@ -310,11 +310,11 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, NULL
, "tcp"
, NULL
- , "IPv4 TCP Sockets Memory"
+ , "TCP Sockets Memory"
, "KiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_TCP_MEM
+ , NETDATA_CHART_PRIO_IPV4_TCP_SOCKETS_MEM
, update_every
, RRDSET_TYPE_AREA
);
@@ -347,7 +347,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDP
+ , NETDATA_CHART_PRIO_IPV4_UDP_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -380,7 +380,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, "KiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDP_MEM
+ , NETDATA_CHART_PRIO_IPV4_UDP_SOCKETS_MEM
, update_every
, RRDSET_TYPE_AREA
);
@@ -413,7 +413,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_UDPLITE
+ , NETDATA_CHART_PRIO_IPV4_UDPLITE_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -479,7 +479,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, "fragments"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_FRAGMENTS
+ , NETDATA_CHART_PRIO_IPV4_FRAGMENTS_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -512,7 +512,7 @@ int do_proc_net_sockstat(int update_every, usec_t dt) {
, "KiB"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT_NAME
- , NETDATA_CHART_PRIO_IPV4_FRAGMENTS_MEM
+ , NETDATA_CHART_PRIO_IPV4_FRAGMENTS_SOCKETS_MEM
, update_every
, RRDSET_TYPE_AREA
);
diff --git a/collectors/proc.plugin/proc_net_sockstat6.c b/collectors/proc.plugin/proc_net_sockstat6.c
index 065cf605592e3b..16e0248af895b0 100644
--- a/collectors/proc.plugin/proc_net_sockstat6.c
+++ b/collectors/proc.plugin/proc_net_sockstat6.c
@@ -130,7 +130,7 @@ int do_proc_net_sockstat6(int update_every, usec_t dt) {
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME
- , NETDATA_CHART_PRIO_IPV6_TCP
+ , NETDATA_CHART_PRIO_IPV6_TCP_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -163,7 +163,7 @@ int do_proc_net_sockstat6(int update_every, usec_t dt) {
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME
- , NETDATA_CHART_PRIO_IPV6_UDP
+ , NETDATA_CHART_PRIO_IPV6_UDP_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -196,7 +196,7 @@ int do_proc_net_sockstat6(int update_every, usec_t dt) {
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME
- , NETDATA_CHART_PRIO_IPV6_UDPLITE
+ , NETDATA_CHART_PRIO_IPV6_UDPLITE_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -229,7 +229,7 @@ int do_proc_net_sockstat6(int update_every, usec_t dt) {
, "sockets"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME
- , NETDATA_CHART_PRIO_IPV6_RAW
+ , NETDATA_CHART_PRIO_IPV6_RAW_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
@@ -262,7 +262,7 @@ int do_proc_net_sockstat6(int update_every, usec_t dt) {
, "fragments"
, PLUGIN_PROC_NAME
, PLUGIN_PROC_MODULE_NET_SOCKSTAT6_NAME
- , NETDATA_CHART_PRIO_IPV6_FRAGMENTS
+ , NETDATA_CHART_PRIO_IPV6_FRAGMENTS_SOCKETS
, update_every
, RRDSET_TYPE_LINE
);
diff --git a/collectors/proc.plugin/proc_net_softnet_stat.c b/collectors/proc.plugin/proc_net_softnet_stat.c
index dfd372b2a9553e..2f01b8859815ac 100644
--- a/collectors/proc.plugin/proc_net_softnet_stat.c
+++ b/collectors/proc.plugin/proc_net_softnet_stat.c
@@ -111,12 +111,12 @@ int do_proc_net_softnet_stat(int update_every, usec_t dt) {
if(do_per_core) {
for(l = 0; l < lines ;l++) {
char id[50+1];
- snprintfz(id, 50, "cpu%zu_softnet_stat", l);
+ snprintfz(id, sizeof(id) - 1,"cpu%zu_softnet_stat", l);
st = rrdset_find_active_bytype_localhost("cpu", id);
if(unlikely(!st)) {
char title[100+1];
- snprintfz(title, 100, "CPU softnet_stat");
+ snprintfz(title, sizeof(title) - 1, "CPU softnet_stat");
st = rrdset_create_localhost(
"cpu"
diff --git a/collectors/proc.plugin/proc_net_wireless.c b/collectors/proc.plugin/proc_net_wireless.c
index 08ab2eada186ef..c7efa33350a4cb 100644
--- a/collectors/proc.plugin/proc_net_wireless.c
+++ b/collectors/proc.plugin/proc_net_wireless.c
@@ -85,12 +85,13 @@ static struct netwireless {
static void netwireless_free_st(struct netwireless *wireless_dev)
{
- if (wireless_dev->st_status) rrdset_is_obsolete(wireless_dev->st_status);
- if (wireless_dev->st_link) rrdset_is_obsolete(wireless_dev->st_link);
- if (wireless_dev->st_level) rrdset_is_obsolete(wireless_dev->st_level);
- if (wireless_dev->st_noise) rrdset_is_obsolete(wireless_dev->st_noise);
- if (wireless_dev->st_discarded_packets) rrdset_is_obsolete(wireless_dev->st_discarded_packets);
- if (wireless_dev->st_missed_beacon) rrdset_is_obsolete(wireless_dev->st_missed_beacon);
+ if (wireless_dev->st_status) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_status);
+ if (wireless_dev->st_link) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_link);
+ if (wireless_dev->st_level) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_level);
+ if (wireless_dev->st_noise) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_noise);
+ if (wireless_dev->st_discarded_packets)
+ rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_discarded_packets);
+ if (wireless_dev->st_missed_beacon) rrdset_is_obsolete___safe_from_collector_thread(wireless_dev->st_missed_beacon);
wireless_dev->st_status = NULL;
wireless_dev->st_link = NULL;
diff --git a/collectors/proc.plugin/proc_pagetypeinfo.c b/collectors/proc.plugin/proc_pagetypeinfo.c
index e5318ce8fb95b4..fc5496c630a73e 100644
--- a/collectors/proc.plugin/proc_pagetypeinfo.c
+++ b/collectors/proc.plugin/proc_pagetypeinfo.c
@@ -211,7 +211,7 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) {
);
for (o = 0; o < pageorders_cnt; o++) {
char id[3+1];
- snprintfz(id, 3, "%lu", o);
+ snprintfz(id, sizeof(id) - 1, "%lu", o);
char name[20+1];
dim_name(name, o, pagesize);
@@ -234,7 +234,7 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) {
// "pagetype Node" + NUMA-NodeId + ZoneName + TypeName
char setid[13+1+2+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME+1];
- snprintfz(setid, 13+1+2+1+MAX_ZONETYPE_NAME+1+MAX_PAGETYPE_NAME, "pagetype_Node%d_%s_%s", pgl->node, pgl->zone, pgl->type);
+ snprintfz(setid, sizeof(setid) - 1, "pagetype_Node%d_%s_%s", pgl->node, pgl->zone, pgl->type);
// Skip explicitly refused charts
if (simple_pattern_matches(filter_types, setid))
@@ -260,14 +260,14 @@ int do_proc_pagetypeinfo(int update_every, usec_t dt) {
);
char node[50+1];
- snprintfz(node, 50, "node%d", pgl->node);
+ snprintfz(node, sizeof(node) - 1, "node%d", pgl->node);
rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_id", node, RRDLABEL_SRC_AUTO);
rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_zone", pgl->zone, RRDLABEL_SRC_AUTO);
rrdlabels_add(st_nodezonetype[p]->rrdlabels, "node_type", pgl->type, RRDLABEL_SRC_AUTO);
for (o = 0; o < pageorders_cnt; o++) {
char dimid[3+1];
- snprintfz(dimid, 3, "%lu", o);
+ snprintfz(dimid, sizeof(dimid) - 1, "%lu", o);
char dimname[20+1];
dim_name(dimname, o, pagesize);
diff --git a/collectors/proc.plugin/proc_softirqs.c b/collectors/proc.plugin/proc_softirqs.c
index ccf46cb8aa6471..5f0502f66598d9 100644
--- a/collectors/proc.plugin/proc_softirqs.c
+++ b/collectors/proc.plugin/proc_softirqs.c
@@ -197,10 +197,10 @@ int do_proc_softirqs(int update_every, usec_t dt) {
if (unlikely(core_sum == 0)) continue; // try next core
char id[50 + 1];
- snprintfz(id, 50, "cpu%d_softirqs", c);
+ snprintfz(id, sizeof(id) - 1, "cpu%d_softirqs", c);
char title[100 + 1];
- snprintfz(title, 100, "CPU softirqs");
+ snprintfz(title, sizeof(title) - 1, "CPU softirqs");
core_st[c] = rrdset_create_localhost(
"cpu"
@@ -218,7 +218,7 @@ int do_proc_softirqs(int update_every, usec_t dt) {
);
char core[50+1];
- snprintfz(core, 50, "cpu%d", c);
+ snprintfz(core, sizeof(core) - 1, "cpu%d", c);
rrdlabels_add(core_st[c]->rrdlabels, "cpu", core, RRDLABEL_SRC_AUTO);
}
diff --git a/collectors/proc.plugin/proc_spl_kstat_zfs.c b/collectors/proc.plugin/proc_spl_kstat_zfs.c
index 428ef0d3299c81..27178b60fe0f48 100644
--- a/collectors/proc.plugin/proc_spl_kstat_zfs.c
+++ b/collectors/proc.plugin/proc_spl_kstat_zfs.c
@@ -240,7 +240,7 @@ DICTIONARY *zfs_pools = NULL;
void disable_zfs_pool_state(struct zfs_pool *pool)
{
if (pool->st)
- rrdset_is_obsolete(pool->st);
+ rrdset_is_obsolete___safe_from_collector_thread(pool->st);
pool->st = NULL;
@@ -335,7 +335,10 @@ int do_proc_spl_kstat_zfs_pool_state(int update_every, usec_t dt)
if (likely(do_zfs_pool_state)) {
DIR *dir = opendir(dirname);
if (unlikely(!dir)) {
- collector_error("Cannot read directory '%s'", dirname);
+ if (errno == ENOENT)
+ collector_info("Cannot read directory '%s'", dirname);
+ else
+ collector_error("Cannot read directory '%s'", dirname);
return 1;
}
diff --git a/collectors/proc.plugin/proc_stat.c b/collectors/proc.plugin/proc_stat.c
index a4f76796b60412..84160f22f78c6c 100644
--- a/collectors/proc.plugin/proc_stat.c
+++ b/collectors/proc.plugin/proc_stat.c
@@ -1038,7 +1038,7 @@ int do_proc_stat(int update_every, usec_t dt) {
);
char corebuf[50+1];
- snprintfz(corebuf, 50, "cpu%zu", core);
+ snprintfz(corebuf, sizeof(corebuf) - 1, "cpu%zu", core);
rrdlabels_add(cpuidle_charts[core].st->rrdlabels, "cpu", corebuf, RRDLABEL_SRC_AUTO);
char cpuidle_dim_id[RRD_ID_LENGTH_MAX + 1];
diff --git a/collectors/proc.plugin/sys_block_zram.c b/collectors/proc.plugin/sys_block_zram.c
index f9166ace00b3c8..dac7cac0f4e6e2 100644
--- a/collectors/proc.plugin/sys_block_zram.c
+++ b/collectors/proc.plugin/sys_block_zram.c
@@ -3,7 +3,7 @@
#include "plugin_proc.h"
#define PLUGIN_PROC_MODULE_ZRAM_NAME "/sys/block/zram"
-#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete(st); (st) = NULL; } } while(st)
+#define rrdset_obsolete_and_pointer_null(st) do { if(st) { rrdset_is_obsolete___safe_from_collector_thread(st); (st) = NULL; } } while(st)
typedef struct mm_stat {
unsigned long long orig_data_size;
diff --git a/collectors/proc.plugin/sys_class_drm.c b/collectors/proc.plugin/sys_class_drm.c
index 284662cf65e593..3ed1fb8757b56e 100644
--- a/collectors/proc.plugin/sys_class_drm.c
+++ b/collectors/proc.plugin/sys_class_drm.c
@@ -648,13 +648,17 @@ static int read_clk_freq_file(procfile **p_ff, const char *const pathname, colle
*p_ff = procfile_open(pathname, NULL, PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
if(unlikely(!*p_ff)) return -2;
}
-
+
if(unlikely(NULL == (*p_ff = procfile_readall(*p_ff)))) return -3;
for(size_t l = 0; l < procfile_lines(*p_ff) ; l++) {
+ char *str_with_units = NULL;
+ if((*p_ff)->lines->lines[l].words >= 3 && !strcmp(procfile_lineword((*p_ff), l, 2), "*")) //format: X: collected_number *
+ str_with_units = procfile_lineword((*p_ff), l, 1);
+ else if ((*p_ff)->lines->lines[l].words == 2 && !strcmp(procfile_lineword((*p_ff), l, 1), "*")) //format: collected_number *
+ str_with_units = procfile_lineword((*p_ff), l, 0);
- if((*p_ff)->lines->lines[l].words >= 3 && !strcmp(procfile_lineword((*p_ff), l, 2), "*")){
- char *str_with_units = procfile_lineword((*p_ff), l, 1);
+ if (str_with_units) {
char *delim = strchr(str_with_units, 'M');
char str_without_units[10];
memcpy(str_without_units, str_with_units, delim - str_with_units);
@@ -707,7 +711,7 @@ static int do_rrd_util_gpu(struct card *const c){
else {
collector_error("Cannot read util_gpu for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_util_gpu);
- rrdset_is_obsolete(c->st_util_gpu);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_util_gpu);
return 1;
}
}
@@ -721,7 +725,7 @@ static int do_rrd_util_mem(struct card *const c){
else {
collector_error("Cannot read util_mem for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_util_mem);
- rrdset_is_obsolete(c->st_util_mem);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_util_mem);
return 1;
}
}
@@ -735,7 +739,7 @@ static int do_rrd_clk_gpu(struct card *const c){
else {
collector_error("Cannot read clk_gpu for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_clk_gpu);
- rrdset_is_obsolete(c->st_clk_gpu);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_clk_gpu);
return 1;
}
}
@@ -749,7 +753,7 @@ static int do_rrd_clk_mem(struct card *const c){
else {
collector_error("Cannot read clk_mem for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_clk_mem);
- rrdset_is_obsolete(c->st_clk_mem);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_clk_mem);
return 1;
}
}
@@ -771,8 +775,8 @@ static int do_rrd_vram(struct card *const c){
collector_error("Cannot read used_vram for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_mem_used_vram);
freez((void *) c->pathname_mem_total_vram);
- rrdset_is_obsolete(c->st_mem_usage_perc_vram);
- rrdset_is_obsolete(c->st_mem_usage_vram);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_perc_vram);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_vram);
return 1;
}
}
@@ -794,8 +798,8 @@ static int do_rrd_vis_vram(struct card *const c){
collector_error("Cannot read used_vis_vram for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_mem_used_vis_vram);
freez((void *) c->pathname_mem_total_vis_vram);
- rrdset_is_obsolete(c->st_mem_usage_perc_vis_vram);
- rrdset_is_obsolete(c->st_mem_usage_vis_vram);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_perc_vis_vram);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_vis_vram);
return 1;
}
}
@@ -817,8 +821,8 @@ static int do_rrd_gtt(struct card *const c){
collector_error("Cannot read used_gtt for %s: [%s]", c->pathname, c->id.marketing_name);
freez((void *) c->pathname_mem_used_gtt);
freez((void *) c->pathname_mem_total_gtt);
- rrdset_is_obsolete(c->st_mem_usage_perc_gtt);
- rrdset_is_obsolete(c->st_mem_usage_gtt);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_perc_gtt);
+ rrdset_is_obsolete___safe_from_collector_thread(c->st_mem_usage_gtt);
return 1;
}
}
diff --git a/collectors/proc.plugin/sys_class_power_supply.c b/collectors/proc.plugin/sys_class_power_supply.c
index 8687ecb552c37e..3f793b9c658f88 100644
--- a/collectors/proc.plugin/sys_class_power_supply.c
+++ b/collectors/proc.plugin/sys_class_power_supply.c
@@ -65,7 +65,7 @@ void power_supply_free(struct power_supply *ps) {
// free capacity structure
if(likely(ps->capacity)) {
- if(likely(ps->capacity->st)) rrdset_is_obsolete(ps->capacity->st);
+ if(likely(ps->capacity->st)) rrdset_is_obsolete___safe_from_collector_thread(ps->capacity->st);
freez(ps->capacity->filename);
if(likely(ps->capacity->fd != -1)) close(ps->capacity->fd);
files_num--;
@@ -89,7 +89,7 @@ void power_supply_free(struct power_supply *ps) {
}
// free properties
- if(likely(pr->st)) rrdset_is_obsolete(pr->st);
+ if(likely(pr->st)) rrdset_is_obsolete___safe_from_collector_thread(pr->st);
freez(pr->name);
freez(pr->title);
freez(pr->units);
diff --git a/collectors/proc.plugin/sys_devices_pci_aer.c b/collectors/proc.plugin/sys_devices_pci_aer.c
index 134426238a6879..563ebf0515bed5 100644
--- a/collectors/proc.plugin/sys_devices_pci_aer.c
+++ b/collectors/proc.plugin/sys_devices_pci_aer.c
@@ -240,8 +240,8 @@ int do_proc_sys_devices_pci_aer(int update_every, usec_t dt __maybe_unused) {
continue;
if(!a->st) {
- const char *title;
- const char *context;
+ const char *title = "";
+ const char *context = "";
switch(a->type) {
case AER_DEV_NONFATAL:
@@ -268,6 +268,11 @@ int do_proc_sys_devices_pci_aer(int update_every, usec_t dt __maybe_unused) {
title = "PCI Root-Port Advanced Error Reporting (AER) Fatal Errors";
context = "pci.rootport_aer_fatal";
break;
+
+ default:
+ title = "Unknown PCI Advanced Error Reporting";
+ context = "pci.unknown_aer";
+ break;
}
char id[RRD_ID_LENGTH_MAX + 1];
diff --git a/collectors/proc.plugin/sys_devices_system_edac_mc.c b/collectors/proc.plugin/sys_devices_system_edac_mc.c
index 0947f61f090730..fdaa22cb700f40 100644
--- a/collectors/proc.plugin/sys_devices_system_edac_mc.c
+++ b/collectors/proc.plugin/sys_devices_system_edac_mc.c
@@ -265,22 +265,22 @@ int do_proc_sys_devices_system_edac_mc(int update_every, usec_t dt __maybe_unuse
char buffer[1024 + 1];
- if(read_edac_mc_rank_file(m->name, d->name, "dimm_dev_type", buffer, 1024))
+ if (read_edac_mc_rank_file(m->name, d->name, "dimm_dev_type", buffer, 1024))
rrdlabels_add(d->st->rrdlabels, "dimm_dev_type", buffer, RRDLABEL_SRC_AUTO);
- if(read_edac_mc_rank_file(m->name, d->name, "dimm_edac_mode", buffer, 1024))
+ if (read_edac_mc_rank_file(m->name, d->name, "dimm_edac_mode", buffer, 1024))
rrdlabels_add(d->st->rrdlabels, "dimm_edac_mode", buffer, RRDLABEL_SRC_AUTO);
- if(read_edac_mc_rank_file(m->name, d->name, "dimm_label", buffer, 1024))
+ if (read_edac_mc_rank_file(m->name, d->name, "dimm_label", buffer, 1024))
rrdlabels_add(d->st->rrdlabels, "dimm_label", buffer, RRDLABEL_SRC_AUTO);
- if(read_edac_mc_rank_file(m->name, d->name, "dimm_location", buffer, 1024))
+ if (read_edac_mc_rank_file(m->name, d->name, "dimm_location", buffer, 1024))
rrdlabels_add(d->st->rrdlabels, "dimm_location", buffer, RRDLABEL_SRC_AUTO);
- if(read_edac_mc_rank_file(m->name, d->name, "dimm_mem_type", buffer, 1024))
+ if (read_edac_mc_rank_file(m->name, d->name, "dimm_mem_type", buffer, 1024))
rrdlabels_add(d->st->rrdlabels, "dimm_mem_type", buffer, RRDLABEL_SRC_AUTO);
- if(read_edac_mc_rank_file(m->name, d->name, "size", buffer, 1024))
+ if (read_edac_mc_rank_file(m->name, d->name, "size", buffer, 1024))
rrdlabels_add(d->st->rrdlabels, "size", buffer, RRDLABEL_SRC_AUTO);
d->ce.rd = rrddim_add(d->st, "correctable", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
diff --git a/collectors/proc.plugin/sys_fs_btrfs.c b/collectors/proc.plugin/sys_fs_btrfs.c
index da89411bd78852..f1d6fe720f80a1 100644
--- a/collectors/proc.plugin/sys_fs_btrfs.c
+++ b/collectors/proc.plugin/sys_fs_btrfs.c
@@ -196,8 +196,8 @@ static inline int collect_btrfs_commits_stats(BTRFS_NODE *node, int update_every
static inline void btrfs_free_commits_stats(BTRFS_NODE *node){
if(node->st_commits){
- rrdset_is_obsolete(node->st_commits);
- rrdset_is_obsolete(node->st_commit_timings);
+ rrdset_is_obsolete___safe_from_collector_thread(node->st_commits);
+ rrdset_is_obsolete___safe_from_collector_thread(node->st_commit_timings);
}
freez(node->commit_stats_filename);
node->commit_stats_filename = NULL;
@@ -211,7 +211,7 @@ static inline void btrfs_free_disk(BTRFS_DISK *d) {
static inline void btrfs_free_device(BTRFS_DEVICE *d) {
if(d->st_error_stats)
- rrdset_is_obsolete(d->st_error_stats);
+ rrdset_is_obsolete___safe_from_collector_thread(d->st_error_stats);
freez(d->error_stats_filename);
freez(d);
}
@@ -220,16 +220,16 @@ static inline void btrfs_free_node(BTRFS_NODE *node) {
// collector_info("BTRFS: destroying '%s'", node->id);
if(node->st_allocation_disks)
- rrdset_is_obsolete(node->st_allocation_disks);
+ rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_disks);
if(node->st_allocation_data)
- rrdset_is_obsolete(node->st_allocation_data);
+ rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_data);
if(node->st_allocation_metadata)
- rrdset_is_obsolete(node->st_allocation_metadata);
+ rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_metadata);
if(node->st_allocation_system)
- rrdset_is_obsolete(node->st_allocation_system);
+ rrdset_is_obsolete___safe_from_collector_thread(node->st_allocation_system);
freez(node->allocation_data_bytes_used_filename);
freez(node->allocation_data_total_bytes_filename);
@@ -392,14 +392,14 @@ static inline int find_btrfs_devices(BTRFS_NODE *node, const char *path) {
continue;
}
- collector_info("BTRFS: device found '%s'", de->d_name);
+ // internal_error("BTRFS: device found '%s'", de->d_name);
// --------------------------------------------------------------------
// search for it
for(d = node->devices ; d ; d = d->next) {
if(str2ll(de->d_name, NULL) == d->id){
- collector_info("BTRFS: existing device id '%d'", d->id);
+ // collector_info("BTRFS: existing device id '%d'", d->id);
break;
}
}
@@ -411,11 +411,11 @@ static inline int find_btrfs_devices(BTRFS_NODE *node, const char *path) {
d = callocz(sizeof(BTRFS_DEVICE), 1);
d->id = str2ll(de->d_name, NULL);
- collector_info("BTRFS: new device with id '%d'", d->id);
+ // collector_info("BTRFS: new device with id '%d'", d->id);
snprintfz(filename, FILENAME_MAX, "%s/%d/error_stats", path, d->id);
d->error_stats_filename = strdupz(filename);
- collector_info("BTRFS: error_stats_filename '%s'", filename);
+ // collector_info("BTRFS: error_stats_filename '%s'", filename);
// link it
d->next = node->devices;
@@ -795,7 +795,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "disk_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "disk_%s", node->label);
- snprintfz(title, 200, "BTRFS Physical Disk Allocation");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Physical Disk Allocation");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -854,7 +854,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "data_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "data_%s", node->label);
- snprintfz(title, 200, "BTRFS Data Allocation");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Data Allocation");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -898,7 +898,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "metadata_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "metadata_%s", node->label);
- snprintfz(title, 200, "BTRFS Metadata Allocation");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Metadata Allocation");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -944,7 +944,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "system_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "system_%s", node->label);
- snprintfz(title, 200, "BTRFS System Allocation");
+ snprintfz(title, sizeof(title) - 1, "BTRFS System Allocation");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -988,7 +988,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "commits_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "commits_%s", node->label);
- snprintfz(title, 200, "BTRFS Commits");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Commits");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -1021,7 +1021,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "commits_perc_time_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "commits_perc_time_%s", node->label);
- snprintfz(title, 200, "BTRFS Commits Time Share");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Commits Time Share");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -1055,7 +1055,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "commit_timings_%s", node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "commit_timings_%s", node->label);
- snprintfz(title, 200, "BTRFS Commit Timings");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Commit Timings");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
@@ -1101,7 +1101,7 @@ int do_sys_fs_btrfs(int update_every, usec_t dt) {
snprintfz(id, RRD_ID_LENGTH_MAX, "device_errors_dev%d_%s", d->id, node->id);
snprintfz(name, RRD_ID_LENGTH_MAX, "device_errors_dev%d_%s", d->id, node->label);
- snprintfz(title, 200, "BTRFS Device Errors");
+ snprintfz(title, sizeof(title) - 1, "BTRFS Device Errors");
netdata_fix_chart_id(id);
netdata_fix_chart_name(name);
diff --git a/collectors/python.d.plugin/adaptec_raid/README.md b/collectors/python.d.plugin/adaptec_raid/README.md
deleted file mode 100644
index 41d5b62e09abf6..00000000000000
--- a/collectors/python.d.plugin/adaptec_raid/README.md
+++ /dev/null
@@ -1,103 +0,0 @@
-
-
-# Adaptec RAID controller collector
-
-Collects logical and physical devices metrics using `arcconf` command-line utility.
-
-Executed commands:
-
-- `sudo -n arcconf GETCONFIG 1 LD`
-- `sudo -n arcconf GETCONFIG 1 PD`
-
-## Requirements
-
-The module uses `arcconf`, which can only be executed by `root`. It uses
-`sudo` and assumes that it is configured such that the `netdata` user can execute `arcconf` as root without a password.
-
-- Add to your `/etc/sudoers` file:
-
-`which arcconf` shows the full path to the binary.
-
-```bash
-netdata ALL=(root) NOPASSWD: /path/to/arcconf
-```
-
-- Reset Netdata's systemd
- unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux
- distributions with systemd)
-
-The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `arcconf` using `sudo`.
-
-
-As the `root` user, do the following:
-
-```cmd
-mkdir /etc/systemd/system/netdata.service.d
-echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
-systemctl daemon-reload
-systemctl restart netdata.service
-```
-
-## Charts
-
-- Logical Device Status
-- Physical Device State
-- Physical Device S.M.A.R.T warnings
-- Physical Device Temperature
-
-## Enable the collector
-
-The `adaptec_raid` collector is disabled by default. To enable it, use `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf`
-file.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d.conf
-```
-
-Change the value of the `adaptec_raid` setting to `yes`. Save the file and restart the Netdata Agent with `sudo
-systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
-
-## Configuration
-
-Edit the `python.d/adaptec_raid.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/adaptec_raid.conf
-```
-
-
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `adaptec_raid` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `adaptec_raid` module in debug mode:
-
-```bash
-./python.d.plugin adaptec_raid debug trace
-```
-
diff --git a/collectors/python.d.plugin/adaptec_raid/README.md b/collectors/python.d.plugin/adaptec_raid/README.md
new file mode 120000
index 00000000000000..97a103eb9f1cc2
--- /dev/null
+++ b/collectors/python.d.plugin/adaptec_raid/README.md
@@ -0,0 +1 @@
+integrations/adaptecraid.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
index bb59d88e1d3c46..1995ad681083db 100644
--- a/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
+++ b/collectors/python.d.plugin/adaptec_raid/adaptec_raid.chart.py
@@ -87,7 +87,7 @@ def find_pds(d):
elif row.startswith('Temperature'):
v = row.split(':')[-1].split()[0]
pd.temperature = v
- elif row.startswith('NCQ status'):
+ elif row.startswith(('NCQ status', 'Device Phy')) or not row:
if pd.id and pd.state and pd.smart_warnings:
pds.append(pd)
pd = PD()
diff --git a/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md b/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md
new file mode 100644
index 00000000000000..13d22ba54f1c31
--- /dev/null
+++ b/collectors/python.d.plugin/adaptec_raid/integrations/adaptecraid.md
@@ -0,0 +1,204 @@
+
+
+# AdaptecRAID
+
+
+
+
+
+Plugin: python.d.plugin
+Module: adaptec_raid
+
+
+
+## Overview
+
+This collector monitors Adaptec RAID hardware storage controller metrics about both physical and logical drives.
+
+
+It uses the arcconf command line utility (from adaptec) to monitor your raid controller.
+
+Executed commands:
+ - `sudo -n arcconf GETCONFIG 1 LD`
+ - `sudo -n arcconf GETCONFIG 1 PD`
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+The module uses arcconf, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute arcconf as root without a password.
+
+### Default Behavior
+
+#### Auto-Detection
+
+After all the permissions are satisfied, netdata should be to execute commands via the arcconf command line utility
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per AdaptecRAID instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| adaptec_raid.ld_status | a dimension per logical device | bool |
+| adaptec_raid.pd_state | a dimension per physical device | bool |
+| adaptec_raid.smart_warnings | a dimension per physical device | count |
+| adaptec_raid.temperature | a dimension per physical device | celsius |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ adaptec_raid_ld_status ](https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf) | adaptec_raid.ld_status | logical device status is failed or degraded |
+| [ adaptec_raid_pd_state ](https://github.com/netdata/netdata/blob/master/health/health.d/adaptec_raid.conf) | adaptec_raid.pd_state | physical device state is not online |
+
+
+## Setup
+
+### Prerequisites
+
+#### Grant permissions for netdata, to run arcconf as sudoer
+
+The module uses arcconf, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute arcconf as root without a password.
+
+Add to your /etc/sudoers file:
+which arcconf shows the full path to the binary.
+
+```bash
+netdata ALL=(root) NOPASSWD: /path/to/arcconf
+```
+
+
+#### Reset Netdata's systemd unit CapabilityBoundingSet (Linux distributions with systemd)
+
+The default CapabilityBoundingSet doesn't allow using sudo, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute arcconf using sudo.
+
+As root user, do the following:
+
+```bash
+mkdir /etc/systemd/system/netdata.service.d
+echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
+systemctl daemon-reload
+systemctl restart netdata.service
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/adaptec_raid.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/adaptec_raid.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration per job
+
+```yaml
+job_name:
+ name: my_job_name
+ update_every: 1 # the JOB's data collection frequency
+ priority: 60000 # the JOB's order on the dashboard
+ penalty: yes # the JOB's penalty
+ autodetection_retry: 0 # the JOB's re-check interval in seconds
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `adaptec_raid` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin adaptec_raid debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/adaptec_raid/metadata.yaml b/collectors/python.d.plugin/adaptec_raid/metadata.yaml
index 7ee4ce7c256225..c69baff4a83a64 100644
--- a/collectors/python.d.plugin/adaptec_raid/metadata.yaml
+++ b/collectors/python.d.plugin/adaptec_raid/metadata.yaml
@@ -27,8 +27,8 @@ modules:
It uses the arcconf command line utility (from adaptec) to monitor your raid controller.
Executed commands:
- - sudo -n arcconf GETCONFIG 1 LD
- - sudo -n arcconf GETCONFIG 1 PD
+ - `sudo -n arcconf GETCONFIG 1 LD`
+ - `sudo -n arcconf GETCONFIG 1 PD`
supported_platforms:
include: []
exclude: []
diff --git a/collectors/python.d.plugin/alarms/README.md b/collectors/python.d.plugin/alarms/README.md
deleted file mode 100644
index 0f956b291c4431..00000000000000
--- a/collectors/python.d.plugin/alarms/README.md
+++ /dev/null
@@ -1,89 +0,0 @@
-
-
-# Alarms
-
-This collector creates an 'Alarms' menu with one line plot showing alarm states over time. Alarm states are mapped to integer values according to the below default mapping. Any alarm status types not in this mapping will be ignored (Note: This mapping can be changed by editing the `status_map` in the `alarms.conf` file). If you would like to learn more about the different alarm statuses check out the docs [here](https://github.com/netdata/netdata/blob/master/health/REFERENCE.md#alarm-statuses).
-
-```
-{
- 'CLEAR': 0,
- 'WARNING': 1,
- 'CRITICAL': 2
-}
-```
-
-## Charts
-
-Below is an example of the chart produced when running `stress-ng --all 2` for a few minutes. You can see the various warning and critical alarms raised.
-
-
-
-## Configuration
-
-Enable the collector and [restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md).
-
-```bash
-cd /etc/netdata/
-sudo ./edit-config python.d.conf
-# Set `alarms: no` to `alarms: yes`
-sudo systemctl restart netdata
-```
-
-If needed, edit the `python.d/alarms.conf` configuration file using `edit-config` from the your agent's [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is usually at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/alarms.conf
-```
-
-The `alarms` specific part of the `alarms.conf` file should look like this:
-
-```yaml
-# what url to pull data from
-local:
- url: 'http://127.0.0.1:19999/api/v1/alarms?all'
- # define how to map alarm status to numbers for the chart
- status_map:
- CLEAR: 0
- WARNING: 1
- CRITICAL: 2
- # set to true to include a chart with calculated alarm values over time
- collect_alarm_values: false
- # define the type of chart for plotting status over time e.g. 'line' or 'stacked'
- alarm_status_chart_type: 'line'
- # a "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only
- # alarms with "cpu" or "load" in alarm name. Default includes all.
- alarm_contains_words: ''
- # a "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude
- # all alarms with "cpu" or "load" in alarm name. Default excludes None.
- alarm_excludes_words: ''
-```
-
-It will default to pulling all alarms at each time step from the Netdata rest api at `http://127.0.0.1:19999/api/v1/alarms?all`
-### Troubleshooting
-
-To troubleshoot issues with the `alarms` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `alarms` module in debug mode:
-
-```bash
-./python.d.plugin alarms debug trace
-```
-
diff --git a/collectors/python.d.plugin/alarms/README.md b/collectors/python.d.plugin/alarms/README.md
new file mode 120000
index 00000000000000..85759ae6c648b7
--- /dev/null
+++ b/collectors/python.d.plugin/alarms/README.md
@@ -0,0 +1 @@
+integrations/netdata_agent_alarms.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md b/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md
new file mode 100644
index 00000000000000..9fb69878a70441
--- /dev/null
+++ b/collectors/python.d.plugin/alarms/integrations/netdata_agent_alarms.md
@@ -0,0 +1,201 @@
+
+
+# Netdata Agent alarms
+
+Plugin: python.d.plugin
+Module: alarms
+
+
+
+## Overview
+
+This collector creates an 'Alarms' menu with one line plot of `alarms.status`.
+
+
+Alarm status is read from the Netdata agent rest api [`/api/v1/alarms?all`](https://learn.netdata.cloud/api#/alerts/alerts1).
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+It discovers instances of Netdata running on localhost, and gathers metrics from `http://127.0.0.1:19999/api/v1/alarms?all`. `CLEAR` status is mapped to `0`, `WARNING` to `1` and `CRITICAL` to `2`. Also, by default all alarms produced will be monitored.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Netdata Agent alarms instance
+
+These metrics refer to the entire monitored application.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| alarms.status | a dimension per alarm representing the latest status of the alarm. | status |
+| alarms.values | a dimension per alarm representing the latest collected value of the alarm. | value |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/alarms.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/alarms.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| url | Netdata agent alarms endpoint to collect from. Can be local or remote so long as reachable by agent. | http://127.0.0.1:19999/api/v1/alarms?all | yes |
+| status_map | Mapping of alarm status to integer number that will be the metric value collected. | {"CLEAR": 0, "WARNING": 1, "CRITICAL": 2} | yes |
+| collect_alarm_values | set to true to include a chart with calculated alarm values over time. | no | yes |
+| alarm_status_chart_type | define the type of chart for plotting status over time e.g. 'line' or 'stacked'. | line | yes |
+| alarm_contains_words | A "," separated list of words you want to filter alarm names for. For example 'cpu,load' would filter for only alarms with "cpu" or "load" in alarm name. Default includes all. | | yes |
+| alarm_excludes_words | A "," separated list of words you want to exclude based on alarm name. For example 'cpu,load' would exclude all alarms with "cpu" or "load" in alarm name. Default excludes None. | | yes |
+| update_every | Sets the default data collection frequency. | 10 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+jobs:
+ url: 'http://127.0.0.1:19999/api/v1/alarms?all'
+
+```
+##### Advanced
+
+An advanced example configuration with multiple jobs collecting different subsets of alarms for plotting on different charts.
+"ML" job will collect status and values for all alarms with "ml_" in the name. Default job will collect status for all other alarms.
+
+
+Config
+
+```yaml
+ML:
+ update_every: 5
+ url: 'http://127.0.0.1:19999/api/v1/alarms?all'
+ status_map:
+ CLEAR: 0
+ WARNING: 1
+ CRITICAL: 2
+ collect_alarm_values: true
+ alarm_status_chart_type: 'stacked'
+ alarm_contains_words: 'ml_'
+
+Default:
+ update_every: 5
+ url: 'http://127.0.0.1:19999/api/v1/alarms?all'
+ status_map:
+ CLEAR: 0
+ WARNING: 1
+ CRITICAL: 2
+ collect_alarm_values: false
+ alarm_status_chart_type: 'stacked'
+ alarm_excludes_words: 'ml_'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `alarms` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin alarms debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/am2320/README.md b/collectors/python.d.plugin/am2320/README.md
deleted file mode 100644
index b8a6acb0b3aaf4..00000000000000
--- a/collectors/python.d.plugin/am2320/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-
-
-# AM2320 sensor monitoring with netdata
-
-Displays a graph of the temperature and humidity from a AM2320 sensor.
-
-## Requirements
- - Adafruit Circuit Python AM2320 library
- - Adafruit AM2320 I2C sensor
- - Python 3 (Adafruit libraries are not Python 2.x compatible)
-
-
-It produces the following charts:
-1. **Temperature**
-2. **Humidity**
-
-## Configuration
-
-Edit the `python.d/am2320.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/am2320.conf
-```
-
-Raspberry Pi Instructions:
-
-Hardware install:
-Connect the am2320 to the Raspberry Pi I2C pins
-
-Raspberry Pi 3B/4 Pins:
-
-- Board 3.3V (pin 1) to sensor VIN (pin 1)
-- Board SDA (pin 3) to sensor SDA (pin 2)
-- Board GND (pin 6) to sensor GND (pin 3)
-- Board SCL (pin 5) to sensor SCL (pin 4)
-
-You may also need to add two I2C pullup resistors if your board does not already have them. The Raspberry Pi does have internal pullup resistors but it doesn't hurt to add them anyway. You can use 2.2K - 10K but we will just use 10K. The resistors go from VDD to SCL and SDA each.
-
-Software install:
-- `sudo pip3 install adafruit-circuitpython-am2320`
-- edit `/etc/netdata/netdata.conf`
-- find `[plugin:python.d]`
-- add `command options = -ppython3`
-- save the file.
-- restart the netdata service.
-- check the dashboard.
-
-### Troubleshooting
-
-To troubleshoot issues with the `am2320` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `am2320` module in debug mode:
-
-```bash
-./python.d.plugin am2320 debug trace
-```
-
diff --git a/collectors/python.d.plugin/am2320/README.md b/collectors/python.d.plugin/am2320/README.md
new file mode 120000
index 00000000000000..0bc5ea90e839d6
--- /dev/null
+++ b/collectors/python.d.plugin/am2320/README.md
@@ -0,0 +1 @@
+integrations/am2320.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/am2320/integrations/am2320.md b/collectors/python.d.plugin/am2320/integrations/am2320.md
new file mode 100644
index 00000000000000..72b351eb555013
--- /dev/null
+++ b/collectors/python.d.plugin/am2320/integrations/am2320.md
@@ -0,0 +1,181 @@
+
+
+# AM2320
+
+
+
+
+
+Plugin: python.d.plugin
+Module: am2320
+
+
+
+## Overview
+
+This collector monitors AM2320 sensor metrics about temperature and humidity.
+
+It retrieves temperature and humidity values by contacting an AM2320 sensor over i2c.
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Assuming prerequisites are met, the collector will try to connect to the sensor via i2c
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per AM2320 instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| am2320.temperature | temperature | celsius |
+| am2320.humidity | humidity | percentage |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Sensor connection to a Raspberry Pi
+
+Connect the am2320 to the Raspberry Pi I2C pins
+
+Raspberry Pi 3B/4 Pins:
+
+- Board 3.3V (pin 1) to sensor VIN (pin 1)
+- Board SDA (pin 3) to sensor SDA (pin 2)
+- Board GND (pin 6) to sensor GND (pin 3)
+- Board SCL (pin 5) to sensor SCL (pin 4)
+
+You may also need to add two I2C pullup resistors if your board does not already have them. The Raspberry Pi does have internal pullup resistors but it doesn't hurt to add them anyway. You can use 2.2K - 10K but we will just use 10K. The resistors go from VDD to SCL and SDA each.
+
+
+#### Software requirements
+
+Install the Adafruit Circuit Python AM2320 library:
+
+`sudo pip3 install adafruit-circuitpython-am2320`
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/am2320.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/am2320.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Local sensor
+
+A basic JOB configuration
+
+```yaml
+local_sensor:
+ name: 'Local AM2320'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `am2320` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin am2320 debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/beanstalk/README.md b/collectors/python.d.plugin/beanstalk/README.md
deleted file mode 100644
index c86ca354ab3cac..00000000000000
--- a/collectors/python.d.plugin/beanstalk/README.md
+++ /dev/null
@@ -1,156 +0,0 @@
-
-
-# Beanstalk collector
-
-Provides server and tube-level statistics.
-
-## Requirements
-
-- `python-beanstalkc`
-
-**Server statistics:**
-
-1. **Cpu usage** in cpu time
-
- - user
- - system
-
-2. **Jobs rate** in jobs/s
-
- - total
- - timeouts
-
-3. **Connections rate** in connections/s
-
- - connections
-
-4. **Commands rate** in commands/s
-
- - put
- - peek
- - peek-ready
- - peek-delayed
- - peek-buried
- - reserve
- - use
- - watch
- - ignore
- - delete
- - release
- - bury
- - kick
- - stats
- - stats-job
- - stats-tube
- - list-tubes
- - list-tube-used
- - list-tubes-watched
- - pause-tube
-
-5. **Current tubes** in tubes
-
- - tubes
-
-6. **Current jobs** in jobs
-
- - urgent
- - ready
- - reserved
- - delayed
- - buried
-
-7. **Current connections** in connections
-
- - written
- - producers
- - workers
- - waiting
-
-8. **Binlog** in records/s
-
- - written
- - migrated
-
-9. **Uptime** in seconds
-
- - uptime
-
-**Per tube statistics:**
-
-1. **Jobs rate** in jobs/s
-
- - jobs
-
-2. **Jobs** in jobs
-
- - using
- - ready
- - reserved
- - delayed
- - buried
-
-3. **Connections** in connections
-
- - using
- - waiting
- - watching
-
-4. **Commands** in commands/s
-
- - deletes
- - pauses
-
-5. **Pause** in seconds
-
- - since
- - left
-
-## Configuration
-
-Edit the `python.d/beanstalk.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/beanstalk.conf
-```
-
-Sample:
-
-```yaml
-host : '127.0.0.1'
-port : 11300
-```
-
-If no configuration is given, module will attempt to connect to beanstalkd on `127.0.0.1:11300` address
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `beanstalk` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `beanstalk` module in debug mode:
-
-```bash
-./python.d.plugin beanstalk debug trace
-```
-
diff --git a/collectors/python.d.plugin/beanstalk/README.md b/collectors/python.d.plugin/beanstalk/README.md
new file mode 120000
index 00000000000000..4efe13889a9669
--- /dev/null
+++ b/collectors/python.d.plugin/beanstalk/README.md
@@ -0,0 +1 @@
+integrations/beanstalk.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md b/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md
new file mode 100644
index 00000000000000..5095c0c284a6d2
--- /dev/null
+++ b/collectors/python.d.plugin/beanstalk/integrations/beanstalk.md
@@ -0,0 +1,219 @@
+
+
+# Beanstalk
+
+
+
+
+
+Plugin: python.d.plugin
+Module: beanstalk
+
+
+
+## Overview
+
+Monitor Beanstalk metrics to enhance job queueing and processing efficiency. Track job rates, processing times, and queue lengths for better task management.
+
+The collector uses the `beanstalkc` python module to connect to a `beanstalkd` service and gather metrics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is given, module will attempt to connect to beanstalkd on 127.0.0.1:11300 address.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Beanstalk instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| beanstalk.cpu_usage | user, system | cpu time |
+| beanstalk.jobs_rate | total, timeouts | jobs/s |
+| beanstalk.connections_rate | connections | connections/s |
+| beanstalk.commands_rate | put, peek, peek-ready, peek-delayed, peek-buried, reserve, use, watch, ignore, delete, bury, kick, stats, stats-job, stats-tube, list-tubes, list-tube-used, list-tubes-watched, pause-tube | commands/s |
+| beanstalk.connections_rate | tubes | tubes |
+| beanstalk.current_jobs | urgent, ready, reserved, delayed, buried | jobs |
+| beanstalk.current_connections | written, producers, workers, waiting | connections |
+| beanstalk.binlog | written, migrated | records/s |
+| beanstalk.uptime | uptime | seconds |
+
+### Per tube
+
+Metrics related to Beanstalk tubes. Each tube produces its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| beanstalk.jobs_rate | jobs | jobs/s |
+| beanstalk.jobs | urgent, ready, reserved, delayed, buried | jobs |
+| beanstalk.connections | using, waiting, watching | connections |
+| beanstalk.commands | deletes, pauses | commands/s |
+| beanstalk.pause | since, left | seconds |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ beanstalk_server_buried_jobs ](https://github.com/netdata/netdata/blob/master/health/health.d/beanstalkd.conf) | beanstalk.current_jobs | number of buried jobs across all tubes. You need to manually kick them so they can be processed. Presence of buried jobs in a tube does not affect new jobs. |
+
+
+## Setup
+
+### Prerequisites
+
+#### beanstalkc python module
+
+The collector requires the `beanstalkc` python module to be installed.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/beanstalk.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/beanstalk.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| host | IP or URL to a beanstalk service. | 127.0.0.1 | no |
+| port | Port to the IP or URL to a beanstalk service. | 11300 | no |
+
+
+
+#### Examples
+
+##### Remote beanstalk server
+
+A basic remote beanstalk server
+
+```yaml
+remote:
+ name: 'beanstalk'
+ host: '1.2.3.4'
+ port: 11300
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+localhost:
+ name: 'local_beanstalk'
+ host: '127.0.0.1'
+ port: 11300
+
+remote_job:
+ name: 'remote_beanstalk'
+ host: '192.0.2.1'
+ port: 113000
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `beanstalk` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin beanstalk debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/beanstalk/metadata.yaml b/collectors/python.d.plugin/beanstalk/metadata.yaml
index b6ff2f11686336..7dff9cb3a1440b 100644
--- a/collectors/python.d.plugin/beanstalk/metadata.yaml
+++ b/collectors/python.d.plugin/beanstalk/metadata.yaml
@@ -8,7 +8,7 @@ modules:
link: "https://beanstalkd.github.io/"
categories:
- data-collection.message-brokers
- - data-collection.task-queues
+ #- data-collection.task-queues
icon_filename: "beanstalk.svg"
related_resources:
integrations:
diff --git a/collectors/python.d.plugin/bind_rndc/README.md b/collectors/python.d.plugin/bind_rndc/README.md
deleted file mode 100644
index aa173f385204ac..00000000000000
--- a/collectors/python.d.plugin/bind_rndc/README.md
+++ /dev/null
@@ -1,102 +0,0 @@
-
-
-# ISC Bind collector
-
-Collects Name server summary performance statistics using `rndc` tool.
-
-## Requirements
-
-- Version of bind must be 9.6 +
-- Netdata must have permissions to run `rndc stats`
-
-It produces:
-
-1. **Name server statistics**
-
- - requests
- - responses
- - success
- - auth_answer
- - nonauth_answer
- - nxrrset
- - failure
- - nxdomain
- - recursion
- - duplicate
- - rejections
-
-2. **Incoming queries**
-
- - RESERVED0
- - A
- - NS
- - CNAME
- - SOA
- - PTR
- - MX
- - TXT
- - X25
- - AAAA
- - SRV
- - NAPTR
- - A6
- - DS
- - RSIG
- - DNSKEY
- - SPF
- - ANY
- - DLV
-
-3. **Outgoing queries**
-
-- Same as Incoming queries
-
-## Configuration
-
-Edit the `python.d/bind_rndc.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/bind_rndc.conf
-```
-
-Sample:
-
-```yaml
-local:
- named_stats_path : '/var/log/bind/named.stats'
-```
-
-If no configuration is given, module will attempt to read named.stats file at `/var/log/bind/named.stats`
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `bind_rndc` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `bind_rndc` module in debug mode:
-
-```bash
-./python.d.plugin bind_rndc debug trace
-```
-
diff --git a/collectors/python.d.plugin/bind_rndc/README.md b/collectors/python.d.plugin/bind_rndc/README.md
new file mode 120000
index 00000000000000..03a182ae8d736b
--- /dev/null
+++ b/collectors/python.d.plugin/bind_rndc/README.md
@@ -0,0 +1 @@
+integrations/isc_bind_rndc.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md b/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md
new file mode 100644
index 00000000000000..163f8282c3818e
--- /dev/null
+++ b/collectors/python.d.plugin/bind_rndc/integrations/isc_bind_rndc.md
@@ -0,0 +1,215 @@
+
+
+# ISC Bind (RNDC)
+
+
+
+
+
+Plugin: python.d.plugin
+Module: bind_rndc
+
+
+
+## Overview
+
+Monitor ISCBind (RNDC) performance for optimal DNS server operations. Monitor query rates, response times, and error rates to ensure reliable DNS service delivery.
+
+This collector uses the `rndc` tool to dump (named.stats) statistics then read them to gather Bind Name Server summary performance metrics.
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is given, the collector will attempt to read named.stats file at `/var/log/bind/named.stats`
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per ISC Bind (RNDC) instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| bind_rndc.name_server_statistics | requests, rejected_queries, success, failure, responses, duplicate, recursion, nxrrset, nxdomain, non_auth_answer, auth_answer, dropped_queries | stats |
+| bind_rndc.incoming_queries | a dimension per incoming query type | queries |
+| bind_rndc.outgoing_queries | a dimension per outgoing query type | queries |
+| bind_rndc.stats_size | stats_size | MiB |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ bind_rndc_stats_file_size ](https://github.com/netdata/netdata/blob/master/health/health.d/bind_rndc.conf) | bind_rndc.stats_size | BIND statistics-file size |
+
+
+## Setup
+
+### Prerequisites
+
+#### Minimum bind version and permissions
+
+Version of bind must be >=9.6 and the Netdata user must have permissions to run `rndc stats`
+
+#### Setup log rotate for bind stats
+
+BIND appends logs at EVERY RUN. It is NOT RECOMMENDED to set `update_every` below 30 sec.
+It is STRONGLY RECOMMENDED to create a `bind-rndc.conf` file for logrotate.
+
+To set up BIND to dump stats do the following:
+
+1. Add to 'named.conf.options' options {}:
+`statistics-file "/var/log/bind/named.stats";`
+
+2. Create bind/ directory in /var/log:
+`cd /var/log/ && mkdir bind`
+
+3. Change owner of directory to 'bind' user:
+`chown bind bind/`
+
+4. RELOAD (NOT restart) BIND:
+`systemctl reload bind9.service`
+
+5. Run as a root 'rndc stats' to dump (BIND will create named.stats in new directory)
+
+To allow Netdata to run 'rndc stats' change '/etc/bind/rndc.key' group to netdata:
+`chown :netdata rndc.key`
+
+Last, BUT NOT least, is to create bind-rndc.conf in logrotate.d/:
+```
+/var/log/bind/named.stats {
+
+ daily
+ rotate 4
+ compress
+ delaycompress
+ create 0644 bind bind
+ missingok
+ postrotate
+ rndc reload > /dev/null
+ endscript
+}
+```
+To test your logrotate conf file run as root:
+`logrotate /etc/logrotate.d/bind-rndc -d (debug dry-run mode)`
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/bind_rndc.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/bind_rndc.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| named_stats_path | Path to the named stats, after being dumped by `nrdc` | /var/log/bind/named.stats | no |
+
+
+
+#### Examples
+
+##### Local bind stats
+
+Define a local path to bind stats file
+
+```yaml
+local:
+ named_stats_path: '/var/log/bind/named.stats'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `bind_rndc` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin bind_rndc debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/bind_rndc/metadata.yaml b/collectors/python.d.plugin/bind_rndc/metadata.yaml
index 1e9fb24fe00b5f..e3568e448b4ea5 100644
--- a/collectors/python.d.plugin/bind_rndc/metadata.yaml
+++ b/collectors/python.d.plugin/bind_rndc/metadata.yaml
@@ -4,7 +4,7 @@ modules:
plugin_name: python.d.plugin
module_name: bind_rndc
monitored_instance:
- name: ISCBind (RNDC)
+ name: ISC Bind (RNDC)
link: "https://www.isc.org/bind/"
categories:
- data-collection.dns-and-dhcp-servers
diff --git a/collectors/python.d.plugin/boinc/README.md b/collectors/python.d.plugin/boinc/README.md
deleted file mode 100644
index ea439775445689..00000000000000
--- a/collectors/python.d.plugin/boinc/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-# BOINC collector
-
-Monitors task counts for the Berkeley Open Infrastructure Networking Computing (BOINC) distributed computing client using the same RPC interface that the BOINC monitoring GUI does.
-
-It provides charts tracking the total number of tasks and active tasks, as well as ones tracking each of the possible states for tasks.
-
-## Configuration
-
-Edit the `python.d/boinc.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/boinc.conf
-```
-
-BOINC requires use of a password to access it's RPC interface. You can
-find this password in the `gui_rpc_auth.cfg` file in your BOINC directory.
-
-By default, the module will try to auto-detect the password by looking
-in `/var/lib/boinc` for this file (this is the location most Linux
-distributions use for a system-wide BOINC installation), so things may
-just work without needing configuration for the local system.
-
-You can monitor remote systems as well:
-
-```yaml
-remote:
- hostname: some-host
- password: some-password
-```
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `boinc` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `boinc` module in debug mode:
-
-```bash
-./python.d.plugin boinc debug trace
-```
-
diff --git a/collectors/python.d.plugin/boinc/README.md b/collectors/python.d.plugin/boinc/README.md
new file mode 120000
index 00000000000000..22c10ca17c42e8
--- /dev/null
+++ b/collectors/python.d.plugin/boinc/README.md
@@ -0,0 +1 @@
+integrations/boinc.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/boinc/integrations/boinc.md b/collectors/python.d.plugin/boinc/integrations/boinc.md
new file mode 100644
index 00000000000000..d6874d4559c2eb
--- /dev/null
+++ b/collectors/python.d.plugin/boinc/integrations/boinc.md
@@ -0,0 +1,204 @@
+
+
+# BOINC
+
+
+
+
+
+Plugin: python.d.plugin
+Module: boinc
+
+
+
+## Overview
+
+This collector monitors task counts for the Berkeley Open Infrastructure Networking Computing (BOINC) distributed computing client.
+
+It uses the same RPC interface that the BOINC monitoring GUI does.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, the module will try to auto-detect the password to the RPC interface by looking in `/var/lib/boinc` for this file (this is the location most Linux distributions use for a system-wide BOINC installation), so things may just work without needing configuration for a local system.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per BOINC instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| boinc.tasks | Total, Active | tasks |
+| boinc.states | New, Downloading, Ready to Run, Compute Errors, Uploading, Uploaded, Aborted, Failed Uploads | tasks |
+| boinc.sched | Uninitialized, Preempted, Scheduled | tasks |
+| boinc.process | Uninitialized, Executing, Suspended, Aborted, Quit, Copy Pending | tasks |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ boinc_total_tasks ](https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf) | boinc.tasks | average number of total tasks over the last 10 minutes |
+| [ boinc_active_tasks ](https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf) | boinc.tasks | average number of active tasks over the last 10 minutes |
+| [ boinc_compute_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf) | boinc.states | average number of compute errors over the last 10 minutes |
+| [ boinc_upload_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/boinc.conf) | boinc.states | average number of failed uploads over the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+#### Boinc RPC interface
+
+BOINC requires use of a password to access it's RPC interface. You can find this password in the `gui_rpc_auth.cfg` file in your BOINC directory.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/boinc.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/boinc.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| hostname | Define a hostname where boinc is running. | localhost | no |
+| port | The port of boinc RPC interface. | | no |
+| password | Provide a password to connect to a boinc RPC interface. | | no |
+
+
+
+#### Examples
+
+##### Configuration of a remote boinc instance
+
+A basic JOB configuration for a remote boinc instance
+
+```yaml
+remote:
+ hostname: '1.2.3.4'
+ port: 1234
+ password: 'some-password'
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 1234
+ password: 'some-password'
+
+remote_job:
+ name: 'remote'
+ host: '192.0.2.1'
+ port: 1234
+ password: some-other-password
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `boinc` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin boinc debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/ceph/README.md b/collectors/python.d.plugin/ceph/README.md
deleted file mode 100644
index 555491ad7b2661..00000000000000
--- a/collectors/python.d.plugin/ceph/README.md
+++ /dev/null
@@ -1,71 +0,0 @@
-
-
-# CEPH collector
-
-Monitors the ceph cluster usage and consumption data of a server, and produces:
-
-- Cluster statistics (usage, available, latency, objects, read/write rate)
-- OSD usage
-- OSD latency
-- Pool usage
-- Pool read/write operations
-- Pool read/write rate
-- number of objects per pool
-
-## Requirements
-
-- `rados` python module
-- Granting read permissions to ceph group from keyring file
-
-```shell
-# chmod 640 /etc/ceph/ceph.client.admin.keyring
-```
-
-## Configuration
-
-Edit the `python.d/ceph.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/ceph.conf
-```
-
-Sample:
-
-```yaml
-local:
- config_file: '/etc/ceph/ceph.conf'
- keyring_file: '/etc/ceph/ceph.client.admin.keyring'
-```
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `ceph` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `ceph` module in debug mode:
-
-```bash
-./python.d.plugin ceph debug trace
-```
-
diff --git a/collectors/python.d.plugin/ceph/README.md b/collectors/python.d.plugin/ceph/README.md
new file mode 120000
index 00000000000000..654248b701f7bb
--- /dev/null
+++ b/collectors/python.d.plugin/ceph/README.md
@@ -0,0 +1 @@
+integrations/ceph.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/ceph/integrations/ceph.md b/collectors/python.d.plugin/ceph/integrations/ceph.md
new file mode 100644
index 00000000000000..cfda01fbe785fa
--- /dev/null
+++ b/collectors/python.d.plugin/ceph/integrations/ceph.md
@@ -0,0 +1,194 @@
+
+
+# Ceph
+
+
+
+
+
+Plugin: python.d.plugin
+Module: ceph
+
+
+
+## Overview
+
+This collector monitors Ceph metrics about Cluster statistics, OSD usage, latency and Pool statistics.
+
+Uses the `rados` python module to connect to a Ceph cluster.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Ceph instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ceph.general_usage | avail, used | KiB |
+| ceph.general_objects | cluster | objects |
+| ceph.general_bytes | read, write | KiB/s |
+| ceph.general_operations | read, write | operations |
+| ceph.general_latency | apply, commit | milliseconds |
+| ceph.pool_usage | a dimension per Ceph Pool | KiB |
+| ceph.pool_objects | a dimension per Ceph Pool | objects |
+| ceph.pool_read_bytes | a dimension per Ceph Pool | KiB/s |
+| ceph.pool_write_bytes | a dimension per Ceph Pool | KiB/s |
+| ceph.pool_read_operations | a dimension per Ceph Pool | operations |
+| ceph.pool_write_operations | a dimension per Ceph Pool | operations |
+| ceph.osd_usage | a dimension per Ceph OSD | KiB |
+| ceph.osd_size | a dimension per Ceph OSD | KiB |
+| ceph.apply_latency | a dimension per Ceph OSD | milliseconds |
+| ceph.commit_latency | a dimension per Ceph OSD | milliseconds |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ceph_cluster_space_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/ceph.conf) | ceph.general_usage | cluster disk space utilization |
+
+
+## Setup
+
+### Prerequisites
+
+#### `rados` python module
+
+Make sure the `rados` python module is installed
+
+#### Granting read permissions to ceph group from keyring file
+
+Execute: `chmod 640 /etc/ceph/ceph.client.admin.keyring`
+
+#### Create a specific rados_id
+
+You can optionally create a rados_id to use instead of admin
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/ceph.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/ceph.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| config_file | Ceph config file | | yes |
+| keyring_file | Ceph keyring file. netdata user must be added into ceph group and keyring file must be read group permission. | | yes |
+| rados_id | A rados user id to use for connecting to the Ceph cluster. | admin | no |
+
+
+
+#### Examples
+
+##### Basic local Ceph cluster
+
+A basic configuration to connect to a local Ceph cluster.
+
+```yaml
+local:
+ config_file: '/etc/ceph/ceph.conf'
+ keyring_file: '/etc/ceph/ceph.client.admin.keyring'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `ceph` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin ceph debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/changefinder/README.md b/collectors/python.d.plugin/changefinder/README.md
deleted file mode 100644
index 0e9bab88757fda..00000000000000
--- a/collectors/python.d.plugin/changefinder/README.md
+++ /dev/null
@@ -1,241 +0,0 @@
-
-
-# Online change point detection with Netdata
-
-This collector uses the Python [changefinder](https://github.com/shunsukeaihara/changefinder) library to
-perform [online](https://en.wikipedia.org/wiki/Online_machine_learning) [changepoint detection](https://en.wikipedia.org/wiki/Change_detection)
-on your Netdata charts and/or dimensions.
-
-Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a
-changepoint score for each chart or dimension you configure it to work on. This is
-an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step
-to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap
-to compute at each step of data collection (see the notes section below for more details) and it should scale fairly
-well to work on lots of charts or hosts (if running on a parent node for example).
-
-> As this is a somewhat unique collector and involves often subjective concepts like changepoints and anomalies, we would love to hear any feedback on it from the community. Please let us know on the [community forum](https://community.netdata.cloud/t/changefinder-collector-feedback/972) or drop us a note at [analytics-ml-team@netdata.cloud](mailto:analytics-ml-team@netdata.cloud) for any and all feedback, both positive and negative. This sort of feedback is priceless to help us make complex features more useful.
-
-## Charts
-
-Two charts are available:
-
-### ChangeFinder Scores (`changefinder.scores`)
-
-This chart shows the percentile of the score that is output from the ChangeFinder library (it is turned off by default
-but available with `show_scores: true`).
-
-A high observed score is more likely to be a valid changepoint worth exploring, even more so when multiple charts or
-dimensions have high changepoint scores at the same time or very close together.
-
-### ChangeFinder Flags (`changefinder.flags`)
-
-This chart shows `1` or `0` if the latest score has a percentile value that exceeds the `cf_threshold` threshold. By
-default, any scores that are in the 99th or above percentile will raise a flag on this chart.
-
-The raw changefinder score itself can be a little noisy and so limiting ourselves to just periods where it surpasses
-the 99th percentile can help manage the "[signal to noise ratio](https://en.wikipedia.org/wiki/Signal-to-noise_ratio)"
-better.
-
-The `cf_threshold` parameter might be one you want to play around with to tune things specifically for the workloads on
-your node and the specific charts you want to monitor. For example, maybe the 95th percentile might work better for you
-than the 99th percentile.
-
-Below is an example of the chart produced by this collector. The first 3/4 of the period looks normal in that we see a
-few individual changes being picked up somewhat randomly over time. But then at around 14:59 towards the end of the
-chart we see two periods with 'spikes' of multiple changes for a small period of time. This is the sort of pattern that
-might be a sign something on the system that has changed sufficiently enough to merit some investigation.
-
-
-
-## Requirements
-
-- This collector will only work with Python 3 and requires the packages below be installed.
-
-```bash
-# become netdata user
-sudo su -s /bin/bash netdata
-# install required packages for the netdata user
-pip3 install --user numpy==1.19.5 changefinder==0.03 scipy==1.5.4
-```
-
-**Note**: if you need to tell Netdata to use Python 3 then you can pass the below command in the python plugin section
-of your `netdata.conf` file.
-
-```yaml
-[ plugin:python.d ]
- # update every = 1
- command options = -ppython3
-```
-
-## Configuration
-
-Install the Python requirements above, enable the collector and restart Netdata.
-
-```bash
-cd /etc/netdata/
-sudo ./edit-config python.d.conf
-# Set `changefinder: no` to `changefinder: yes`
-sudo systemctl restart netdata
-```
-
-The configuration for the changefinder collector defines how it will behave on your system and might take some
-experimentation with over time to set it optimally for your node. Out of the box, the config comes with
-some [sane defaults](https://www.netdata.cloud/blog/redefining-monitoring-netdata/) to get you started that try to
-balance the flexibility and power of the ML models with the goal of being as cheap as possible in term of cost on the
-node resources.
-
-_**Note**: If you are unsure about any of the below configuration options then it's best to just ignore all this and
-leave the `changefinder.conf` file alone to begin with. Then you can return to it later if you would like to tune things
-a bit more once the collector is running for a while and you have a feeling for its performance on your node._
-
-Edit the `python.d/changefinder.conf` configuration file using `edit-config` from the your
-agent's [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is usually at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/changefinder.conf
-```
-
-The default configuration should look something like this. Here you can see each parameter (with sane defaults) and some
-information about each one and what it does.
-
-```yaml
-# -
-# JOBS (data collection sources)
-
-# Pull data from local Netdata node.
-local:
-
- # A friendly name for this job.
- name: 'local'
-
- # What host to pull data from.
- host: '127.0.0.1:19999'
-
- # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
- charts_regex: 'system\..*'
-
- # Charts to exclude, useful if you would like to exclude some specific charts.
- # Note: should be a ',' separated string like 'chart.name,chart.name'.
- charts_to_exclude: ''
-
- # Get ChangeFinder scores 'per_dim' or 'per_chart'.
- mode: 'per_chart'
-
- # Default parameters that can be passed to the changefinder library.
- cf_r: 0.5
- cf_order: 1
- cf_smooth: 15
-
- # The percentile above which scores will be flagged.
- cf_threshold: 99
-
- # The number of recent scores to use when calculating the percentile of the changefinder score.
- n_score_samples: 14400
-
- # Set to true if you also want to chart the percentile scores in addition to the flags.
- # Mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time.
- show_scores: false
-```
-
-## Troubleshooting
-
-To see any relevant log messages you can use a command like below.
-
-```bash
-grep 'changefinder' /var/log/netdata/error.log
-```
-
-If you would like to log in as `netdata` user and run the collector in debug mode to see more detail.
-
-```bash
-# become netdata user
-sudo su -s /bin/bash netdata
-# run collector in debug using `nolock` option if netdata is already running the collector itself.
-/usr/libexec/netdata/plugins.d/python.d.plugin changefinder debug trace nolock
-```
-
-## Notes
-
-- It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's
- typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly
- this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw
- score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have
- already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then
- should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning
- approaches which need some initial window of time before they can be useful.
-- As this collector does most of the work in Python itself, you may want to try it out first on a test or development
- system to get a sense of its performance characteristics on a node similar to where you would like to use it.
-- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the
- typical performance characteristics we saw from running this collector (with defaults) were:
- - A runtime (`netdata.runtime_changefinder`) of ~30ms.
- - Typically ~1% additional cpu usage.
- - About ~85mb of ram (`apps.mem`) being continually used by the `python.d.plugin` under default configuration.
-
-## Useful links and further reading
-
-- [PyPi changefinder](https://pypi.org/project/changefinder/) reference page.
-- [GitHub repo](https://github.com/shunsukeaihara/changefinder) for the changefinder library.
-- Relevant academic papers:
- - Yamanishi K, Takeuchi J. A unifying framework for detecting outliers and change points from nonstationary time
- series data. 8th ACM SIGKDD international conference on Knowledge discovery and data mining - KDD02. 2002:
- 676. ([pdf](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.12.3469&rep=rep1&type=pdf))
- - Kawahara Y, Sugiyama M. Sequential Change-Point Detection Based on Direct Density-Ratio Estimation. SIAM
- International Conference on Data Mining. 2009:
- 389–400. ([pdf](https://onlinelibrary.wiley.com/doi/epdf/10.1002/sam.10124))
- - Liu S, Yamada M, Collier N, Sugiyama M. Change-point detection in time-series data by relative density-ratio
- estimation. Neural Networks. Jul.2013 43:72–83. [PubMed: 23500502] ([pdf](https://arxiv.org/pdf/1203.0453.pdf))
- - T. Iwata, K. Nakamura, Y. Tokusashi, and H. Matsutani, “Accelerating Online Change-Point Detection Algorithm using
- 10 GbE FPGA NIC,” Proc. International European Conference on Parallel and Distributed Computing (Euro-Par’18)
- Workshops, vol.11339, pp.506–517, Aug.
- 2018 ([pdf](https://www.arc.ics.keio.ac.jp/~matutani/papers/iwata_heteropar2018.pdf))
-- The [ruptures](https://github.com/deepcharles/ruptures) python package is also a good place to learn more about
- changepoint detection (mostly offline as opposed to online but deals with similar concepts).
-- A nice [blog post](https://techrando.com/2019/08/14/a-brief-introduction-to-change-point-detection-using-python/)
- showing some of the other options and libraries for changepoint detection in Python.
-- [Bayesian changepoint detection](https://github.com/hildensia/bayesian_changepoint_detection) library - we may explore
- implementing a collector for this or integrating this approach into this collector at a future date if there is
- interest and it proves computationaly feasible.
-- You might also find the
- Netdata [anomalies collector](https://github.com/netdata/netdata/tree/master/collectors/python.d.plugin/anomalies)
- interesting.
-- [Anomaly Detection](https://en.wikipedia.org/wiki/Anomaly_detection) wikipedia page.
-- [Anomaly Detection YouTube playlist](https://www.youtube.com/playlist?list=PL6Zhl9mK2r0KxA6rB87oi4kWzoqGd5vp0)
- maintained by [andrewm4894](https://github.com/andrewm4894/) from Netdata.
-- [awesome-TS-anomaly-detection](https://github.com/rob-med/awesome-TS-anomaly-detection) Github list of useful tools,
- libraries and resources.
-- [Mendeley public group](https://www.mendeley.com/community/interesting-anomaly-detection-papers/) with some
- interesting anomaly detection papers we have been reading.
-- Good [blog post](https://www.anodot.com/blog/what-is-anomaly-detection/) from Anodot on time series anomaly detection.
- Anodot also have some great whitepapers in this space too that some may find useful.
-- Novelty and outlier detection in
- the [scikit-learn documentation](https://scikit-learn.org/stable/modules/outlier_detection.html).
-
-### Troubleshooting
-
-To troubleshoot issues with the `changefinder` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `changefinder` module in debug mode:
-
-```bash
-./python.d.plugin changefinder debug trace
-```
-
diff --git a/collectors/python.d.plugin/changefinder/README.md b/collectors/python.d.plugin/changefinder/README.md
new file mode 120000
index 00000000000000..0ca704eb1f491f
--- /dev/null
+++ b/collectors/python.d.plugin/changefinder/README.md
@@ -0,0 +1 @@
+integrations/python.d_changefinder.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md b/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md
new file mode 100644
index 00000000000000..c338c93741a55a
--- /dev/null
+++ b/collectors/python.d.plugin/changefinder/integrations/python.d_changefinder.md
@@ -0,0 +1,217 @@
+
+
+# python.d changefinder
+
+Plugin: python.d.plugin
+Module: changefinder
+
+
+
+## Overview
+
+This collector uses the Python [changefinder](https://github.com/shunsukeaihara/changefinder) library to
+perform [online](https://en.wikipedia.org/wiki/Online_machine_learning) [changepoint detection](https://en.wikipedia.org/wiki/Change_detection)
+on your Netdata charts and/or dimensions.
+
+
+Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a changepoint score for each chart or dimension you configure it to work on. This is an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap to compute at each step of data collection (see the notes section below for more details) and it should scale fairly well to work on lots of charts or hosts (if running on a parent node for example).
+### Notes - It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's
+ typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly
+ this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw
+ score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have
+ already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then
+ should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning
+ approaches which need some initial window of time before they can be useful.
+- As this collector does most of the work in Python itself, you may want to try it out first on a test or development
+ system to get a sense of its performance characteristics on a node similar to where you would like to use it.
+- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the
+ typical performance characteristics we saw from running this collector (with defaults) were:
+ - A runtime (`netdata.runtime_changefinder`) of ~30ms.
+ - Typically ~1% additional cpu usage.
+ - About ~85mb of ram (`apps.mem`) being continually used by the `python.d.plugin` under default configuration.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default this collector will work over all `system.*` charts.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per python.d changefinder instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| changefinder.scores | a dimension per chart | score |
+| changefinder.flags | a dimension per chart | flag |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Python Requirements
+
+This collector will only work with Python 3 and requires the packages below be installed.
+
+```bash
+# become netdata user
+sudo su -s /bin/bash netdata
+# install required packages for the netdata user
+pip3 install --user numpy==1.19.5 changefinder==0.03 scipy==1.5.4
+```
+
+**Note**: if you need to tell Netdata to use Python 3 then you can pass the below command in the python plugin section
+of your `netdata.conf` file.
+
+```yaml
+[ plugin:python.d ]
+ # update every = 1
+ command options = -ppython3
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/changefinder.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/changefinder.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | yes |
+| charts_to_exclude | charts to exclude, useful if you would like to exclude some specific charts. note: should be a ',' separated string like 'chart.name,chart.name'. | | no |
+| mode | get ChangeFinder scores 'per_dim' or 'per_chart'. | per_chart | yes |
+| cf_r | default parameters that can be passed to the changefinder library. | 0.5 | no |
+| cf_order | default parameters that can be passed to the changefinder library. | 1 | no |
+| cf_smooth | default parameters that can be passed to the changefinder library. | 15 | no |
+| cf_threshold | the percentile above which scores will be flagged. | 99 | no |
+| n_score_samples | the number of recent scores to use when calculating the percentile of the changefinder score. | 14400 | no |
+| show_scores | set to true if you also want to chart the percentile scores in addition to the flags. (mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time) | no | no |
+
+
+
+#### Examples
+
+##### Default
+
+Default configuration.
+
+```yaml
+local:
+ name: 'local'
+ host: '127.0.0.1:19999'
+ charts_regex: 'system\..*'
+ charts_to_exclude: ''
+ mode: 'per_chart'
+ cf_r: 0.5
+ cf_order: 1
+ cf_smooth: 15
+ cf_threshold: 99
+ n_score_samples: 14400
+ show_scores: false
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `changefinder` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin changefinder debug trace
+ ```
+
+### Debug Mode
+
+
+
+### Log Messages
+
+
+
+
diff --git a/collectors/python.d.plugin/changefinder/metadata.yaml b/collectors/python.d.plugin/changefinder/metadata.yaml
index 6dcd903e72a890..170d9146a117ec 100644
--- a/collectors/python.d.plugin/changefinder/metadata.yaml
+++ b/collectors/python.d.plugin/changefinder/metadata.yaml
@@ -5,55 +5,187 @@ modules:
module_name: changefinder
monitored_instance:
name: python.d changefinder
- link: ''
+ link: ""
categories:
- data-collection.other
- icon_filename: ''
+ icon_filename: ""
related_resources:
integrations:
list: []
info_provided_to_referring_integrations:
- description: ''
- keywords: []
+ description: ""
+ keywords:
+ - change detection
+ - anomaly detection
+ - machine learning
+ - ml
most_popular: false
overview:
data_collection:
- metrics_description: ''
- method_description: ''
+ metrics_description: |
+ This collector uses the Python [changefinder](https://github.com/shunsukeaihara/changefinder) library to
+ perform [online](https://en.wikipedia.org/wiki/Online_machine_learning) [changepoint detection](https://en.wikipedia.org/wiki/Change_detection)
+ on your Netdata charts and/or dimensions.
+ method_description: >
+ Instead of this collector just _collecting_ data, it also does some computation on the data it collects to return a
+ changepoint score for each chart or dimension you configure it to work on. This is
+ an [online](https://en.wikipedia.org/wiki/Online_machine_learning) machine learning algorithm so there is no batch step
+ to train the model, instead it evolves over time as more data arrives. That makes this particular algorithm quite cheap
+ to compute at each step of data collection (see the notes section below for more details) and it should scale fairly
+ well to work on lots of charts or hosts (if running on a parent node for example).
+
+ ### Notes
+ - It may take an hour or two (depending on your choice of `n_score_samples`) for the collector to 'settle' into it's
+ typical behaviour in terms of the trained models and scores you will see in the normal running of your node. Mainly
+ this is because it can take a while to build up a proper distribution of previous scores in over to convert the raw
+ score returned by the ChangeFinder algorithm into a percentile based on the most recent `n_score_samples` that have
+ already been produced. So when you first turn the collector on, it will have a lot of flags in the beginning and then
+ should 'settle down' once it has built up enough history. This is a typical characteristic of online machine learning
+ approaches which need some initial window of time before they can be useful.
+ - As this collector does most of the work in Python itself, you may want to try it out first on a test or development
+ system to get a sense of its performance characteristics on a node similar to where you would like to use it.
+ - On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the
+ typical performance characteristics we saw from running this collector (with defaults) were:
+ - A runtime (`netdata.runtime_changefinder`) of ~30ms.
+ - Typically ~1% additional cpu usage.
+ - About ~85mb of ram (`apps.mem`) being continually used by the `python.d.plugin` under default configuration.
supported_platforms:
include: []
exclude: []
multi_instance: true
additional_permissions:
- description: ''
+ description: ""
default_behavior:
auto_detection:
- description: ''
+ description: "By default this collector will work over all `system.*` charts."
limits:
- description: ''
+ description: ""
performance_impact:
- description: ''
+ description: ""
setup:
prerequisites:
- list: []
+ list:
+ - title: Python Requirements
+ description: |
+ This collector will only work with Python 3 and requires the packages below be installed.
+
+ ```bash
+ # become netdata user
+ sudo su -s /bin/bash netdata
+ # install required packages for the netdata user
+ pip3 install --user numpy==1.19.5 changefinder==0.03 scipy==1.5.4
+ ```
+
+ **Note**: if you need to tell Netdata to use Python 3 then you can pass the below command in the python plugin section
+ of your `netdata.conf` file.
+
+ ```yaml
+ [ plugin:python.d ]
+ # update every = 1
+ command options = -ppython3
+ ```
configuration:
file:
- name: ''
- description: ''
+ name: python.d/changefinder.conf
+ description: ""
options:
- description: ''
+ description: |
+ There are 2 sections:
+
+ * Global variables
+ * One or more JOBS that can define multiple different instances to monitor.
+
+ The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+ Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+ Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
folding:
- title: ''
+ title: "Config options"
enabled: true
- list: []
+ list:
+ - name: charts_regex
+ description: what charts to pull data for - A regex like `system\..*|` or `system\..*|apps.cpu|apps.mem` etc.
+ default_value: "system\\..*"
+ required: true
+ - name: charts_to_exclude
+ description: |
+ charts to exclude, useful if you would like to exclude some specific charts.
+ note: should be a ',' separated string like 'chart.name,chart.name'.
+ default_value: ""
+ required: false
+ - name: mode
+ description: get ChangeFinder scores 'per_dim' or 'per_chart'.
+ default_value: "per_chart"
+ required: true
+ - name: cf_r
+ description: default parameters that can be passed to the changefinder library.
+ default_value: 0.5
+ required: false
+ - name: cf_order
+ description: default parameters that can be passed to the changefinder library.
+ default_value: 1
+ required: false
+ - name: cf_smooth
+ description: default parameters that can be passed to the changefinder library.
+ default_value: 15
+ required: false
+ - name: cf_threshold
+ description: the percentile above which scores will be flagged.
+ default_value: 99
+ required: false
+ - name: n_score_samples
+ description: the number of recent scores to use when calculating the percentile of the changefinder score.
+ default_value: 14400
+ required: false
+ - name: show_scores
+ description: |
+ set to true if you also want to chart the percentile scores in addition to the flags. (mainly useful for debugging or if you want to dive deeper on how the scores are evolving over time)
+ default_value: false
+ required: false
examples:
folding:
enabled: true
- title: ''
- list: []
+ title: "Config"
+ list:
+ - name: Default
+ description: Default configuration.
+ folding:
+ enabled: false
+ config: |
+ local:
+ name: 'local'
+ host: '127.0.0.1:19999'
+ charts_regex: 'system\..*'
+ charts_to_exclude: ''
+ mode: 'per_chart'
+ cf_r: 0.5
+ cf_order: 1
+ cf_smooth: 15
+ cf_threshold: 99
+ n_score_samples: 14400
+ show_scores: false
troubleshooting:
problems:
- list: []
+ list:
+ - name: "Debug Mode"
+ description: |
+ If you would like to log in as `netdata` user and run the collector in debug mode to see more detail.
+
+ ```bash
+ # become netdata user
+ sudo su -s /bin/bash netdata
+ # run collector in debug using `nolock` option if netdata is already running the collector itself.
+ /usr/libexec/netdata/plugins.d/python.d.plugin changefinder debug trace nolock
+ ```
+ - name: "Log Messages"
+ description: |
+ To see any relevant log messages you can use a command like below.
+
+ ```bash
+ grep 'changefinder' /var/log/netdata/error.log
+ grep 'changefinder' /var/log/netdata/collector.log
+ ```
alerts: []
metrics:
folding:
diff --git a/collectors/python.d.plugin/dovecot/README.md b/collectors/python.d.plugin/dovecot/README.md
deleted file mode 100644
index 2397b74789c319..00000000000000
--- a/collectors/python.d.plugin/dovecot/README.md
+++ /dev/null
@@ -1,128 +0,0 @@
-
-
-# Dovecot collector
-
-Provides statistics information from Dovecot server.
-
-Statistics are taken from dovecot socket by executing `EXPORT global` command.
-More information about dovecot stats can be found on [project wiki page.](http://wiki2.dovecot.org/Statistics)
-
-Module isn't compatible with new statistic api (v2.3), but you are still able to use the module with Dovecot v2.3
-by following [upgrading steps.](https://wiki2.dovecot.org/Upgrading/2.3).
-
-**Requirement:**
-Dovecot UNIX socket with R/W permissions for user `netdata` or Dovecot with configured TCP/IP socket.
-
-Module gives information with following charts:
-
-1. **sessions**
-
- - active sessions
-
-2. **logins**
-
- - logins
-
-3. **commands** - number of IMAP commands
-
- - commands
-
-4. **Faults**
-
- - minor
- - major
-
-5. **Context Switches**
-
- - voluntary
- - involuntary
-
-6. **disk** in bytes/s
-
- - read
- - write
-
-7. **bytes** in bytes/s
-
- - read
- - write
-
-8. **number of syscalls** in syscalls/s
-
- - read
- - write
-
-9. **lookups** - number of lookups per second
-
- - path
- - attr
-
-10. **hits** - number of cache hits
-
- - hits
-
-11. **attempts** - authorization attempts
-
- - success
- - failure
-
-12. **cache** - cached authorization hits
-
- - hit
- - miss
-
-## Configuration
-
-Edit the `python.d/dovecot.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/dovecot.conf
-```
-
-Sample:
-
-```yaml
-localtcpip:
- name : 'local'
- host : '127.0.0.1'
- port : 24242
-
-localsocket:
- name : 'local'
- socket : '/var/run/dovecot/stats'
-```
-
-If no configuration is given, module will attempt to connect to dovecot using unix socket localized in `/var/run/dovecot/stats`
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `dovecot` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `dovecot` module in debug mode:
-
-```bash
-./python.d.plugin dovecot debug trace
-```
-
diff --git a/collectors/python.d.plugin/dovecot/README.md b/collectors/python.d.plugin/dovecot/README.md
new file mode 120000
index 00000000000000..c4749cedce0686
--- /dev/null
+++ b/collectors/python.d.plugin/dovecot/README.md
@@ -0,0 +1 @@
+integrations/dovecot.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/dovecot/integrations/dovecot.md b/collectors/python.d.plugin/dovecot/integrations/dovecot.md
new file mode 100644
index 00000000000000..4e7952765057fe
--- /dev/null
+++ b/collectors/python.d.plugin/dovecot/integrations/dovecot.md
@@ -0,0 +1,197 @@
+
+
+# Dovecot
+
+
+
+
+
+Plugin: python.d.plugin
+Module: dovecot
+
+
+
+## Overview
+
+This collector monitors Dovecot metrics about sessions, logins, commands, page faults and more.
+
+It uses the dovecot socket and executes the `EXPORT global` command to get the statistics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is given, the collector will attempt to connect to dovecot using unix socket localized in `/var/run/dovecot/stats`
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Dovecot instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| dovecot.sessions | active sessions | number |
+| dovecot.logins | logins | number |
+| dovecot.commands | commands | commands |
+| dovecot.faults | minor, major | faults |
+| dovecot.context_switches | voluntary, involuntary | switches |
+| dovecot.io | read, write | KiB/s |
+| dovecot.net | read, write | kilobits/s |
+| dovecot.syscalls | read, write | syscalls/s |
+| dovecot.lookup | path, attr | number/s |
+| dovecot.cache | hits | hits/s |
+| dovecot.auth | ok, failed | attempts |
+| dovecot.auth_cache | hit, miss | number |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Dovecot configuration
+
+The Dovecot UNIX socket should have R/W permissions for user netdata, or Dovecot should be configured with a TCP/IP socket.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/dovecot.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/dovecot.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| socket | Use this socket to communicate with Devcot | /var/run/dovecot/stats | no |
+| host | Instead of using a socket, you can point the collector to an ip for devcot statistics. | | no |
+| port | Used in combination with host, configures the port devcot listens to. | | no |
+
+
+
+#### Examples
+
+##### Local TCP
+
+A basic TCP configuration.
+
+Config
+
+```yaml
+localtcpip:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 24242
+
+```
+
+
+##### Local socket
+
+A basic local socket configuration
+
+Config
+
+```yaml
+localsocket:
+ name: 'local'
+ socket: '/var/run/dovecot/stats'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `dovecot` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin dovecot debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/example/README.md b/collectors/python.d.plugin/example/README.md
deleted file mode 100644
index 63ec7a2985a6d5..00000000000000
--- a/collectors/python.d.plugin/example/README.md
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-# Example module in Python
-
-You can add custom data collectors using Python.
-
-Netdata provides an [example python data collection module](https://github.com/netdata/netdata/tree/master/collectors/python.d.plugin/example).
-
-If you want to write your own collector, read our [writing a new Python module](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md#how-to-write-a-new-module) tutorial.
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `example` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `example` module in debug mode:
-
-```bash
-./python.d.plugin example debug trace
-```
-
diff --git a/collectors/python.d.plugin/example/README.md b/collectors/python.d.plugin/example/README.md
new file mode 120000
index 00000000000000..55877a99ab64d6
--- /dev/null
+++ b/collectors/python.d.plugin/example/README.md
@@ -0,0 +1 @@
+integrations/example_collector.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/example/integrations/example_collector.md b/collectors/python.d.plugin/example/integrations/example_collector.md
new file mode 100644
index 00000000000000..7dded67ba95832
--- /dev/null
+++ b/collectors/python.d.plugin/example/integrations/example_collector.md
@@ -0,0 +1,171 @@
+
+
+# Example collector
+
+Plugin: python.d.plugin
+Module: example
+
+
+
+## Overview
+
+Example collector that generates some random numbers as metrics.
+
+If you want to write your own collector, read our [writing a new Python module](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md#how-to-write-a-new-module) tutorial.
+
+
+The `get_data()` function uses `random.randint()` to generate a random number which will be collected as a metric.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Example collector instance
+
+These metrics refer to the entire monitored application.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| example.random | random | number |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/example.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/example.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| num_lines | The number of lines to create. | 4 | no |
+| lower | The lower bound of numbers to randomly sample from. | 0 | no |
+| upper | The upper bound of numbers to randomly sample from. | 100 | no |
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+four_lines:
+ name: "Four Lines"
+ update_every: 1
+ priority: 60000
+ penalty: yes
+ autodetection_retry: 0
+ num_lines: 4
+ lower: 0
+ upper: 100
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `example` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin example debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/exim/README.md b/collectors/python.d.plugin/exim/README.md
deleted file mode 100644
index bc00ab7c6e1566..00000000000000
--- a/collectors/python.d.plugin/exim/README.md
+++ /dev/null
@@ -1,64 +0,0 @@
-
-
-# Exim collector
-
-Simple module executing `exim -bpc` to grab exim queue.
-This command can take a lot of time to finish its execution thus it is not recommended to run it every second.
-
-## Requirements
-
-The module uses the `exim` binary, which can only be executed as root by default. We need to allow other users to `exim` binary. We solve that adding `queue_list_requires_admin` statement in exim configuration and set to `false`, because it is `true` by default. On many Linux distributions, the default location of `exim` configuration is in `/etc/exim.conf`.
-
-1. Edit the `exim` configuration with your preferred editor and add:
-`queue_list_requires_admin = false`
-2. Restart `exim` and Netdata
-
-*WHM (CPanel) server*
-
-On a WHM server, you can reconfigure `exim` over the WHM interface with the following steps.
-
-1. Login to WHM
-2. Navigate to Service Configuration --> Exim Configuration Manager --> tab Advanced Editor
-3. Scroll down to the button **Add additional configuration setting** and click on it.
-4. In the new dropdown which will appear above we need to find and choose:
-`queue_list_requires_admin` and set to `false`
-5. Scroll to the end and click the **Save** button.
-
-It produces only one chart:
-
-1. **Exim Queue Emails**
-
- - emails
-
-Configuration is not needed.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `exim` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `exim` module in debug mode:
-
-```bash
-./python.d.plugin exim debug trace
-```
-
diff --git a/collectors/python.d.plugin/exim/README.md b/collectors/python.d.plugin/exim/README.md
new file mode 120000
index 00000000000000..f1f2ef9f927dd8
--- /dev/null
+++ b/collectors/python.d.plugin/exim/README.md
@@ -0,0 +1 @@
+integrations/exim.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/exim/integrations/exim.md b/collectors/python.d.plugin/exim/integrations/exim.md
new file mode 100644
index 00000000000000..f0ae33d3eeb881
--- /dev/null
+++ b/collectors/python.d.plugin/exim/integrations/exim.md
@@ -0,0 +1,181 @@
+
+
+# Exim
+
+
+
+
+
+Plugin: python.d.plugin
+Module: exim
+
+
+
+## Overview
+
+This collector monitors Exim mail queue.
+
+It uses the `exim` command line binary to get the statistics.
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Assuming setup prerequisites are met, the collector will try to gather statistics using the method described above, even without any configuration.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Exim instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| exim.qemails | emails | emails |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Exim configuration - local installation
+
+The module uses the `exim` binary, which can only be executed as root by default. We need to allow other users to `exim` binary. We solve that adding `queue_list_requires_admin` statement in exim configuration and set to `false`, because it is `true` by default. On many Linux distributions, the default location of `exim` configuration is in `/etc/exim.conf`.
+
+1. Edit the `exim` configuration with your preferred editor and add:
+`queue_list_requires_admin = false`
+2. Restart `exim` and Netdata
+
+
+#### Exim configuration - WHM (CPanel) server
+
+On a WHM server, you can reconfigure `exim` over the WHM interface with the following steps.
+
+1. Login to WHM
+2. Navigate to Service Configuration --> Exim Configuration Manager --> tab Advanced Editor
+3. Scroll down to the button **Add additional configuration setting** and click on it.
+4. In the new dropdown which will appear above we need to find and choose:
+`queue_list_requires_admin` and set to `false`
+5. Scroll to the end and click the **Save** button.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/exim.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/exim.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| command | Path and command to the `exim` binary | exim -bpc | no |
+
+
+
+#### Examples
+
+##### Local exim install
+
+A basic local exim install
+
+```yaml
+local:
+ command: 'exim -bpc'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `exim` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin exim debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/fail2ban/README.md b/collectors/python.d.plugin/fail2ban/README.md
deleted file mode 100644
index 41276d5f760558..00000000000000
--- a/collectors/python.d.plugin/fail2ban/README.md
+++ /dev/null
@@ -1,105 +0,0 @@
-
-
-# Fail2ban collector
-
-Monitors the fail2ban log file to show all bans for all active jails.
-
-## Requirements
-
-The `fail2ban.log` file must be readable by the user `netdata`:
-
-- change the file ownership and access permissions.
-- update `/etc/logrotate.d/fail2ban` to persists the changes after rotating the log file.
-
-
- Click to expand the instruction.
-
-To change the file ownership and access permissions, execute the following:
-
-```shell
-sudo chown root:netdata /var/log/fail2ban.log
-sudo chmod 640 /var/log/fail2ban.log
-```
-
-To persist the changes after rotating the log file, add `create 640 root netdata` to the `/etc/logrotate.d/fail2ban`:
-
-```shell
-/var/log/fail2ban.log {
-
- weekly
- rotate 4
- compress
-
- delaycompress
- missingok
- postrotate
- fail2ban-client flushlogs 1>/dev/null
- endscript
-
- # If fail2ban runs as non-root it still needs to have write access
- # to logfiles.
- # create 640 fail2ban adm
- create 640 root netdata
-}
-```
-
-
-
-## Charts
-
-- Failed attempts in attempts/s
-- Bans in bans/s
-- Banned IP addresses (since the last restart of netdata) in ips
-
-## Configuration
-
-Edit the `python.d/fail2ban.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/fail2ban.conf
-```
-
-Sample:
-
-```yaml
-local:
- log_path: '/var/log/fail2ban.log'
- conf_path: '/etc/fail2ban/jail.local'
- exclude: 'dropbear apache'
-```
-
-If no configuration is given, module will attempt to read log file at `/var/log/fail2ban.log` and conf file
-at `/etc/fail2ban/jail.local`. If conf file is not found default jail is `ssh`.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `fail2ban` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `fail2ban` module in debug mode:
-
-```bash
-./python.d.plugin fail2ban debug trace
-```
-
diff --git a/collectors/python.d.plugin/fail2ban/README.md b/collectors/python.d.plugin/fail2ban/README.md
new file mode 120000
index 00000000000000..642a8bcf533105
--- /dev/null
+++ b/collectors/python.d.plugin/fail2ban/README.md
@@ -0,0 +1 @@
+integrations/fail2ban.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md b/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md
new file mode 100644
index 00000000000000..a7116be5e47067
--- /dev/null
+++ b/collectors/python.d.plugin/fail2ban/integrations/fail2ban.md
@@ -0,0 +1,209 @@
+
+
+# Fail2ban
+
+
+
+
+
+Plugin: python.d.plugin
+Module: fail2ban
+
+
+
+## Overview
+
+Monitor Fail2ban performance for prime intrusion prevention operations. Monitor ban counts, jail statuses, and failed login attempts to ensure robust network security.
+
+
+It collects metrics through reading the default log and configuration files of fail2ban.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The `fail2ban.log` file must be readable by the user `netdata`.
+ - change the file ownership and access permissions.
+ - update `/etc/logrotate.d/fail2ban`` to persist the changes after rotating the log file.
+
+To change the file ownership and access permissions, execute the following:
+
+```shell
+sudo chown root:netdata /var/log/fail2ban.log
+sudo chmod 640 /var/log/fail2ban.log
+```
+
+To persist the changes after rotating the log file, add `create 640 root netdata` to the `/etc/logrotate.d/fail2ban`:
+
+```shell
+/var/log/fail2ban.log {
+
+ weekly
+ rotate 4
+ compress
+
+ delaycompress
+ missingok
+ postrotate
+ fail2ban-client flushlogs 1>/dev/null
+ endscript
+
+ # If fail2ban runs as non-root it still needs to have write access
+ # to logfiles.
+ # create 640 fail2ban adm
+ create 640 root netdata
+}
+```
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default the collector will attempt to read log file at /var/log/fail2ban.log and conf file at /etc/fail2ban/jail.local.
+If conf file is not found default jail is ssh.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Fail2ban instance
+
+These metrics refer to the entire monitored application.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| fail2ban.failed_attempts | a dimension per jail | attempts/s |
+| fail2ban.bans | a dimension per jail | bans/s |
+| fail2ban.banned_ips | a dimension per jail | ips |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/fail2ban.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/fail2ban.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| log_path | path to fail2ban.log. | /var/log/fail2ban.log | no |
+| conf_path | path to jail.local/jail.conf. | /etc/fail2ban/jail.local | no |
+| conf_dir | path to jail.d/. | /etc/fail2ban/jail.d/ | no |
+| exclude | jails you want to exclude from autodetection. | | no |
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+local:
+ log_path: '/var/log/fail2ban.log'
+ conf_path: '/etc/fail2ban/jail.local'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `fail2ban` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin fail2ban debug trace
+ ```
+
+### Debug Mode
+
+
+
+
diff --git a/collectors/python.d.plugin/fail2ban/metadata.yaml b/collectors/python.d.plugin/fail2ban/metadata.yaml
index 80aa68b624b472..61f762679c9cda 100644
--- a/collectors/python.d.plugin/fail2ban/metadata.yaml
+++ b/collectors/python.d.plugin/fail2ban/metadata.yaml
@@ -35,29 +35,29 @@ modules:
The `fail2ban.log` file must be readable by the user `netdata`.
- change the file ownership and access permissions.
- update `/etc/logrotate.d/fail2ban`` to persist the changes after rotating the log file.
-
+
To change the file ownership and access permissions, execute the following:
-
+
```shell
sudo chown root:netdata /var/log/fail2ban.log
sudo chmod 640 /var/log/fail2ban.log
```
-
+
To persist the changes after rotating the log file, add `create 640 root netdata` to the `/etc/logrotate.d/fail2ban`:
-
+
```shell
/var/log/fail2ban.log {
-
+
weekly
rotate 4
compress
-
+
delaycompress
missingok
postrotate
fail2ban-client flushlogs 1>/dev/null
endscript
-
+
# If fail2ban runs as non-root it still needs to have write access
# to logfiles.
# create 640 fail2ban adm
@@ -67,7 +67,8 @@ modules:
default_behavior:
auto_detection:
description: |
- By default the collector will attempt to read log file at /var/log/fail2ban.log and conf file at /etc/fail2ban/jail.local. If conf file is not found default jail is ssh.
+ By default the collector will attempt to read log file at /var/log/fail2ban.log and conf file at /etc/fail2ban/jail.local.
+ If conf file is not found default jail is ssh.
limits:
description: ""
performance_impact:
@@ -77,19 +78,19 @@ modules:
list: []
configuration:
file:
- name: ""
+ name: python.d/fail2ban.conf
description: ""
options:
description: |
There are 2 sections:
-
+
* Global variables
* One or more JOBS that can define multiple different instances to monitor.
-
+
The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
-
+
Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
-
+
Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
folding:
title: Config options
@@ -146,7 +147,26 @@ modules:
conf_path: '/etc/fail2ban/jail.local'
troubleshooting:
problems:
- list: []
+ list:
+ - name: Debug Mode
+ description: |
+ To troubleshoot issues with the `fail2ban` module, run the `python.d.plugin` with the debug option enabled.
+ The output will give you the output of the data collection job or error messages on why the collector isn't working.
+
+ First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
+ not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
+ plugin's directory, switch to the `netdata` user.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ sudo su -s /bin/bash netdata
+ ```
+
+ Now you can manually run the `fail2ban` module in debug mode:
+
+ ```bash
+ ./python.d.plugin fail2ban debug trace
+ ```
alerts: []
metrics:
folding:
diff --git a/collectors/python.d.plugin/gearman/README.md b/collectors/python.d.plugin/gearman/README.md
deleted file mode 100644
index 329c34726c5d7c..00000000000000
--- a/collectors/python.d.plugin/gearman/README.md
+++ /dev/null
@@ -1,73 +0,0 @@
-
-
-# Gearman collector
-
-Monitors Gearman worker statistics. A chart is shown for each job as well as one showing a summary of all workers.
-
-Note: Charts may show as a line graph rather than an area
-graph if you load Netdata with no jobs running. To change
-this go to "Settings" > "Which dimensions to show?" and
-select "All".
-
-Plugin can obtain data from tcp socket **OR** unix socket.
-
-**Requirement:**
-Socket MUST be readable by netdata user.
-
-It produces:
-
- * Workers queued
- * Workers idle
- * Workers running
-
-## Configuration
-
-Edit the `python.d/gearman.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/gearman.conf
-```
-
-```yaml
-localhost:
- name : 'local'
- host : 'localhost'
- port : 4730
-
- # TLS information can be provided as well
- tls : no
- cert : /path/to/cert
- key : /path/to/key
-```
-
-When no configuration file is found, module tries to connect to TCP/IP socket: `localhost:4730`.
-
-### Troubleshooting
-
-To troubleshoot issues with the `gearman` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `gearman` module in debug mode:
-
-```bash
-./python.d.plugin gearman debug trace
-```
-
diff --git a/collectors/python.d.plugin/gearman/README.md b/collectors/python.d.plugin/gearman/README.md
new file mode 120000
index 00000000000000..70189d69860f35
--- /dev/null
+++ b/collectors/python.d.plugin/gearman/README.md
@@ -0,0 +1 @@
+integrations/gearman.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/gearman/integrations/gearman.md b/collectors/python.d.plugin/gearman/integrations/gearman.md
new file mode 100644
index 00000000000000..3923d14017749c
--- /dev/null
+++ b/collectors/python.d.plugin/gearman/integrations/gearman.md
@@ -0,0 +1,210 @@
+
+
+# Gearman
+
+
+
+
+
+Plugin: python.d.plugin
+Module: gearman
+
+
+
+## Overview
+
+Monitor Gearman metrics for proficient system task distribution. Track job counts, worker statuses, and queue lengths for effective distributed task management.
+
+This collector connects to a Gearman instance via either TCP or unix socket.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+When no configuration file is found, the collector tries to connect to TCP/IP socket: localhost:4730.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Gearman instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| gearman.total_jobs | Pending, Running | Jobs |
+
+### Per gearman job
+
+Metrics related to Gearman jobs. Each job produces its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| gearman.single_job | Pending, Idle, Runnning | Jobs |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ gearman_workers_queued ](https://github.com/netdata/netdata/blob/master/health/health.d/gearman.conf) | gearman.single_job | average number of queued jobs over the last 10 minutes |
+
+
+## Setup
+
+### Prerequisites
+
+#### Socket permissions
+
+The gearman UNIX socket should have read permission for user netdata.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/gearman.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/gearman.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| host | URL or IP where gearman is running. | localhost | no |
+| port | Port of URL or IP where gearman is running. | 4730 | no |
+| tls | Use tls to connect to gearman. | false | no |
+| cert | Provide a certificate file if needed to connect to a TLS gearman instance. | | no |
+| key | Provide a key file if needed to connect to a TLS gearman instance. | | no |
+
+
+
+#### Examples
+
+##### Local gearman service
+
+A basic host and port gearman configuration for localhost.
+
+```yaml
+localhost:
+ name: 'local'
+ host: 'localhost'
+ port: 4730
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ host: 'localhost'
+ port: 4730
+
+remote:
+ name: 'remote'
+ host: '192.0.2.1'
+ port: 4730
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `gearman` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin gearman debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/go_expvar/README.md b/collectors/python.d.plugin/go_expvar/README.md
deleted file mode 100644
index f86fa6d04e53a7..00000000000000
--- a/collectors/python.d.plugin/go_expvar/README.md
+++ /dev/null
@@ -1,342 +0,0 @@
-
-
-# Go applications collector
-
-Monitors Go application that exposes its metrics with the use of `expvar` package from the Go standard library. The package produces charts for Go runtime memory statistics and optionally any number of custom charts.
-
-The `go_expvar` module produces the following charts:
-
-1. **Heap allocations** in kB
-
- - alloc: size of objects allocated on the heap
- - inuse: size of allocated heap spans
-
-2. **Stack allocations** in kB
-
- - inuse: size of allocated stack spans
-
-3. **MSpan allocations** in kB
-
- - inuse: size of allocated mspan structures
-
-4. **MCache allocations** in kB
-
- - inuse: size of allocated mcache structures
-
-5. **Virtual memory** in kB
-
- - sys: size of reserved virtual address space
-
-6. **Live objects**
-
- - live: number of live objects in memory
-
-7. **GC pauses average** in ns
-
- - avg: average duration of all GC stop-the-world pauses
-
-## Monitoring Go applications
-
-Netdata can be used to monitor running Go applications that expose their metrics with
-the use of the [expvar package](https://golang.org/pkg/expvar/) included in Go standard library.
-
-The `expvar` package exposes these metrics over HTTP and is very easy to use.
-Consider this minimal sample below:
-
-```go
-package main
-
-import (
- _ "expvar"
- "net/http"
-)
-
-func main() {
- http.ListenAndServe("127.0.0.1:8080", nil)
-}
-```
-
-When imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that
-exposes Go runtime's memory statistics in JSON format. You can inspect the output by opening
-the URL in your browser (or by using `wget` or `curl`).
-
-Sample output:
-
-```json
-{
-"cmdline": ["./expvar-demo-binary"],
-"memstats": {"Alloc":630856,"TotalAlloc":630856,"Sys":3346432,"Lookups":27, }
-}
-```
-
-You can of course expose and monitor your own variables as well.
-Here is a sample Go application that exposes a few custom variables:
-
-```go
-package main
-
-import (
- "expvar"
- "net/http"
- "runtime"
- "time"
-)
-
-func main() {
-
- tick := time.NewTicker(1 * time.Second)
- num_go := expvar.NewInt("runtime.goroutines")
- counters := expvar.NewMap("counters")
- counters.Set("cnt1", new(expvar.Int))
- counters.Set("cnt2", new(expvar.Float))
-
- go http.ListenAndServe(":8080", nil)
-
- for {
- select {
- case <- tick.C:
- num_go.Set(int64(runtime.NumGoroutine()))
- counters.Add("cnt1", 1)
- counters.AddFloat("cnt2", 1.452)
- }
- }
-}
-```
-
-Apart from the runtime memory stats, this application publishes two counters and the
-number of currently running Goroutines and updates these stats every second.
-
-In the next section, we will cover how to monitor and chart these exposed stats with
-the use of `netdata`s `go_expvar` module.
-
-### Using Netdata go_expvar module
-
-The `go_expvar` module is disabled by default. To enable it, edit `python.d.conf` (to edit it on your system run
-`/etc/netdata/edit-config python.d.conf`), and change the `go_expvar` variable to `yes`:
-
-```
-# Enable / Disable python.d.plugin modules
-#default_run: yes
-#
-# If "default_run" = "yes" the default for all modules is enabled (yes).
-# Setting any of these to "no" will disable it.
-#
-# If "default_run" = "no" the default for all modules is disabled (no).
-# Setting any of these to "yes" will enable it.
-...
-go_expvar: yes
-...
-```
-
-Next, we need to edit the module configuration file (found at `/etc/netdata/python.d/go_expvar.conf` by default) (to
-edit it on your system run `/etc/netdata/edit-config python.d/go_expvar.conf`). The module configuration consists of
-jobs, where each job can be used to monitor a separate Go application. Let's see a sample job configuration:
-
-```
-# /etc/netdata/python.d/go_expvar.conf
-
-app1:
- name : 'app1'
- url : 'http://127.0.0.1:8080/debug/vars'
- collect_memstats: true
- extra_charts: {}
-```
-
-Let's go over each of the defined options:
-
-```
-name: 'app1'
-```
-
-This is the job name that will appear at the Netdata dashboard.
-If not defined, the job_name (top level key) will be used.
-
-```
-url: 'http://127.0.0.1:8080/debug/vars'
-```
-
-This is the URL of the expvar endpoint. As the expvar handler can be installed
-in a custom path, the whole URL has to be specified. This value is mandatory.
-
-```
-collect_memstats: true
-```
-
-Whether to enable collecting stats about Go runtime's memory. You can find more
-information about the exposed values at the [runtime package docs](https://golang.org/pkg/runtime/#MemStats).
-
-```
-extra_charts: {}
-```
-
-Enables the user to specify custom expvars to monitor and chart.
-Will be explained in more detail below.
-
-**Note: if `collect_memstats` is disabled and no `extra_charts` are defined, the plugin will
-disable itself, as there will be no data to collect!**
-
-Apart from these options, each job supports options inherited from Netdata's `python.d.plugin`
-and its base `UrlService` class. These are:
-
-```
-update_every: 1 # the job's data collection frequency
-priority: 60000 # the job's order on the dashboard
-user: admin # use when the expvar endpoint is protected by HTTP Basic Auth
-password: sekret # use when the expvar endpoint is protected by HTTP Basic Auth
-```
-
-### Monitoring custom vars with go_expvar
-
-Now, memory stats might be useful, but what if you want Netdata to monitor some custom values
-that your Go application exposes? The `go_expvar` module can do that as well with the use of
-the `extra_charts` configuration variable.
-
-The `extra_charts` variable is a YaML list of Netdata chart definitions.
-Each chart definition has the following keys:
-
-```
-id: Netdata chart ID
-options: a key-value mapping of chart options
-lines: a list of line definitions
-```
-
-**Note: please do not use dots in the chart or line ID field.
-See [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.**
-
-Please see these two links to the official Netdata documentation for more information about the values:
-
-- [External plugins - charts](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#chart)
-- [Chart variables](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md#global-variables-order-and-chart)
-
-**Line definitions**
-
-Each chart can define multiple lines (dimensions).
-A line definition is a key-value mapping of line options.
-Each line can have the following options:
-
-```
-# mandatory
-expvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint
-expvar_type: value type; supported are "float" or "int"
-id: the id of this line/dimension in Netdata
-
-# optional - Netdata defaults are used if these options are not defined
-name: ''
-algorithm: absolute
-multiplier: 1
-divisor: 100 if expvar_type == float, 1 if expvar_type == int
-hidden: False
-```
-
-Please see the following link for more information about the options and their default values:
-[External plugins - dimensions](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#dimension)
-
-Apart from top-level expvars, this plugin can also parse expvars stored in a multi-level map;
-All dicts in the resulting JSON document are then flattened to one level.
-Expvar names are joined together with '.' when flattening.
-
-Example:
-
-```
-{
- "counters": {"cnt1": 1042, "cnt2": 1512.9839999999983},
- "runtime.goroutines": 5
-}
-```
-
-In the above case, the exported variables will be available under `runtime.goroutines`,
-`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision,
-the first defined key wins and all subsequent keys with the same name are ignored.
-
-## Enable the collector
-
-The `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d.conf
-```
-
-Change the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl
-restart netdata`, or the appropriate method for your system, to finish enabling the `go_expvar` collector.
-
-## Configuration
-
-Edit the `python.d/go_expvar.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/go_expvar.conf
-```
-
-The configuration below matches the second Go application described above.
-Netdata will monitor and chart memory stats for the application, as well as a custom chart of
-running goroutines and two dummy counters.
-
-```
-app1:
- name : 'app1'
- url : 'http://127.0.0.1:8080/debug/vars'
- collect_memstats: true
- extra_charts:
- - id: "runtime_goroutines"
- options:
- name: num_goroutines
- title: "runtime: number of goroutines"
- units: goroutines
- family: runtime
- context: expvar.runtime.goroutines
- chart_type: line
- lines:
- - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines}
- - id: "foo_counters"
- options:
- name: counters
- title: "some random counters"
- units: awesomeness
- family: counters
- context: expvar.foo.counters
- chart_type: line
- lines:
- - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1}
- - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2}
-```
-
-**Netdata charts example**
-
-The images below show how do the final charts in Netdata look.
-
-
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `go_expvar` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `go_expvar` module in debug mode:
-
-```bash
-./python.d.plugin go_expvar debug trace
-```
-
diff --git a/collectors/python.d.plugin/go_expvar/README.md b/collectors/python.d.plugin/go_expvar/README.md
new file mode 120000
index 00000000000000..f28a82f343c268
--- /dev/null
+++ b/collectors/python.d.plugin/go_expvar/README.md
@@ -0,0 +1 @@
+integrations/go_applications_expvar.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md b/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md
new file mode 100644
index 00000000000000..8d61fa2ae9fe99
--- /dev/null
+++ b/collectors/python.d.plugin/go_expvar/integrations/go_applications_expvar.md
@@ -0,0 +1,335 @@
+
+
+# Go applications (EXPVAR)
+
+
+
+
+
+Plugin: python.d.plugin
+Module: go_expvar
+
+
+
+## Overview
+
+This collector monitors Go applications that expose their metrics with the use of the `expvar` package from the Go standard library. It produces charts for Go runtime memory statistics and optionally any number of custom charts.
+
+It connects via http to gather the metrics exposed via the `expvar` package.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Go applications (EXPVAR) instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| expvar.memstats.heap | alloc, inuse | KiB |
+| expvar.memstats.stack | inuse | KiB |
+| expvar.memstats.mspan | inuse | KiB |
+| expvar.memstats.mcache | inuse | KiB |
+| expvar.memstats.live_objects | live | objects |
+| expvar.memstats.sys | sys | KiB |
+| expvar.memstats.gc_pauses | avg | ns |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Enable the go_expvar collector
+
+The `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
+
+```bash
+cd /etc/netdata # Replace this path with your Netdata config directory, if different
+sudo ./edit-config python.d.conf
+```
+
+Change the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
+
+
+#### Sample `expvar` usage in a Go application
+
+The `expvar` package exposes metrics over HTTP and is very easy to use.
+Consider this minimal sample below:
+
+```go
+package main
+
+import (
+ _ "expvar"
+ "net/http"
+)
+
+func main() {
+ http.ListenAndServe("127.0.0.1:8080", nil)
+}
+```
+
+When imported this way, the `expvar` package registers a HTTP handler at `/debug/vars` that
+exposes Go runtime's memory statistics in JSON format. You can inspect the output by opening
+the URL in your browser (or by using `wget` or `curl`).
+
+Sample output:
+
+```json
+{
+"cmdline": ["./expvar-demo-binary"],
+"memstats": {"Alloc":630856,"TotalAlloc":630856,"Sys":3346432,"Lookups":27, }
+}
+```
+
+You can of course expose and monitor your own variables as well.
+Here is a sample Go application that exposes a few custom variables:
+
+```go
+package main
+
+import (
+ "expvar"
+ "net/http"
+ "runtime"
+ "time"
+)
+
+func main() {
+
+ tick := time.NewTicker(1 * time.Second)
+ num_go := expvar.NewInt("runtime.goroutines")
+ counters := expvar.NewMap("counters")
+ counters.Set("cnt1", new(expvar.Int))
+ counters.Set("cnt2", new(expvar.Float))
+
+ go http.ListenAndServe(":8080", nil)
+
+ for {
+ select {
+ case <- tick.C:
+ num_go.Set(int64(runtime.NumGoroutine()))
+ counters.Add("cnt1", 1)
+ counters.AddFloat("cnt2", 1.452)
+ }
+ }
+}
+```
+
+Apart from the runtime memory stats, this application publishes two counters and the
+number of currently running Goroutines and updates these stats every second.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/go_expvar.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/go_expvar.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified. Each JOB can be used to monitor a different Go application.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| url | the URL and port of the expvar endpoint. Please include the whole path of the endpoint, as the expvar handler can be installed in a non-standard location. | | yes |
+| user | If the URL is password protected, this is the username to use. | | no |
+| pass | If the URL is password protected, this is the password to use. | | no |
+| collect_memstats | Enables charts for Go runtime's memory statistics. | | no |
+| extra_charts | Defines extra data/charts to monitor, please see the example below. | | no |
+
+
+
+#### Examples
+
+##### Monitor a Go app1 application
+
+The example below sets a configuration for a Go application, called `app1`. Besides the `memstats`, the application also exposes two counters and the number of currently running Goroutines and updates these stats every second.
+
+The `go_expvar` collector can monitor these as well with the use of the `extra_charts` configuration variable.
+
+The `extra_charts` variable is a YaML list of Netdata chart definitions.
+Each chart definition has the following keys:
+
+```
+id: Netdata chart ID
+options: a key-value mapping of chart options
+lines: a list of line definitions
+```
+
+**Note: please do not use dots in the chart or line ID field.
+See [this issue](https://github.com/netdata/netdata/pull/1902#issuecomment-284494195) for explanation.**
+
+Please see these two links to the official Netdata documentation for more information about the values:
+
+- [External plugins - charts](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#chart)
+- [Chart variables](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/README.md#global-variables-order-and-chart)
+
+**Line definitions**
+
+Each chart can define multiple lines (dimensions).
+A line definition is a key-value mapping of line options.
+Each line can have the following options:
+
+```
+# mandatory
+expvar_key: the name of the expvar as present in the JSON output of /debug/vars endpoint
+expvar_type: value type; supported are "float" or "int"
+id: the id of this line/dimension in Netdata
+
+# optional - Netdata defaults are used if these options are not defined
+name: ''
+algorithm: absolute
+multiplier: 1
+divisor: 100 if expvar_type == float, 1 if expvar_type == int
+hidden: False
+```
+
+Please see the following link for more information about the options and their default values:
+[External plugins - dimensions](https://github.com/netdata/netdata/blob/master/collectors/plugins.d/README.md#dimension)
+
+Apart from top-level expvars, this plugin can also parse expvars stored in a multi-level map;
+All dicts in the resulting JSON document are then flattened to one level.
+Expvar names are joined together with '.' when flattening.
+
+Example:
+
+```
+{
+ "counters": {"cnt1": 1042, "cnt2": 1512.9839999999983},
+ "runtime.goroutines": 5
+}
+```
+
+In the above case, the exported variables will be available under `runtime.goroutines`,
+`counters.cnt1` and `counters.cnt2` expvar_keys. If the flattening results in a key collision,
+the first defined key wins and all subsequent keys with the same name are ignored.
+
+
+```yaml
+app1:
+ name : 'app1'
+ url : 'http://127.0.0.1:8080/debug/vars'
+ collect_memstats: true
+ extra_charts:
+ - id: "runtime_goroutines"
+ options:
+ name: num_goroutines
+ title: "runtime: number of goroutines"
+ units: goroutines
+ family: runtime
+ context: expvar.runtime.goroutines
+ chart_type: line
+ lines:
+ - {expvar_key: 'runtime.goroutines', expvar_type: int, id: runtime_goroutines}
+ - id: "foo_counters"
+ options:
+ name: counters
+ title: "some random counters"
+ units: awesomeness
+ family: counters
+ context: expvar.foo.counters
+ chart_type: line
+ lines:
+ - {expvar_key: 'counters.cnt1', expvar_type: int, id: counters_cnt1}
+ - {expvar_key: 'counters.cnt2', expvar_type: float, id: counters_cnt2}
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `go_expvar` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin go_expvar debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/go_expvar/metadata.yaml b/collectors/python.d.plugin/go_expvar/metadata.yaml
index 92669dd9c087bc..9419b024a654eb 100644
--- a/collectors/python.d.plugin/go_expvar/metadata.yaml
+++ b/collectors/python.d.plugin/go_expvar/metadata.yaml
@@ -4,7 +4,7 @@ modules:
plugin_name: python.d.plugin
module_name: go_expvar
monitored_instance:
- name: Go applications
+ name: Go applications (EXPVAR)
link: "https://pkg.go.dev/expvar"
categories:
- data-collection.apm
@@ -39,6 +39,16 @@ modules:
setup:
prerequisites:
list:
+ - title: "Enable the go_expvar collector"
+ description: |
+ The `go_expvar` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
+
+ ```bash
+ cd /etc/netdata # Replace this path with your Netdata config directory, if different
+ sudo ./edit-config python.d.conf
+ ```
+
+ Change the value of the `go_expvar` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
- title: "Sample `expvar` usage in a Go application"
description: |
The `expvar` package exposes metrics over HTTP and is very easy to use.
diff --git a/collectors/python.d.plugin/haproxy/haproxy.chart.py b/collectors/python.d.plugin/haproxy/haproxy.chart.py
index 6f94c9a0712cd3..f412febb7849d4 100644
--- a/collectors/python.d.plugin/haproxy/haproxy.chart.py
+++ b/collectors/python.d.plugin/haproxy/haproxy.chart.py
@@ -44,6 +44,7 @@
'bctime',
'health_sup',
'health_sdown',
+ 'health_smaint',
'health_bdown',
'health_idle'
]
@@ -167,6 +168,10 @@
'options': [None, 'Backend Servers In UP State', 'health servers', 'health', 'haproxy_hs.up', 'line'],
'lines': []
},
+ 'health_smaint': {
+ 'options': [None, 'Backend Servers In MAINT State', 'maintenance servers', 'health', 'haproxy_hs.maint', 'line'],
+ 'lines': []
+ },
'health_bdown': {
'options': [None, 'Is Backend Failed?', 'boolean', 'health', 'haproxy_hb.down', 'line'],
'lines': []
@@ -267,6 +272,8 @@ def _get_stat_data(self):
if server_status(server, name, 'UP')])
stat_data['hsdown_' + idx] = len([server for server in self.data['servers']
if server_status(server, name, 'DOWN')])
+ stat_data['hsmaint_' + idx] = len([server for server in self.data['servers']
+ if server_status(server, name, 'MAINT')])
stat_data['hbdown_' + idx] = 1 if backend.get('status') == 'DOWN' else 0
for metric in BACKEND_METRICS:
stat_data['_'.join(['backend', metric, idx])] = backend.get(metric) or 0
@@ -321,6 +328,7 @@ def create_charts(self):
BACKEND_METRICS[metric]['divisor']])
self.definitions['health_sup']['lines'].append(['hsup_' + idx, name, 'absolute'])
self.definitions['health_sdown']['lines'].append(['hsdown_' + idx, name, 'absolute'])
+ self.definitions['health_smaint']['lines'].append(['hsmaint_' + idx, name, 'absolute'])
self.definitions['health_bdown']['lines'].append(['hbdown_' + idx, name, 'absolute'])
@@ -352,7 +360,7 @@ def is_server(server):
def server_status(server, backend_name, status='DOWN'):
- return server.get('# pxname') == backend_name and server.get('status') == status
+ return server.get('# pxname') == backend_name and server.get('status').partition(' ')[0] == status
def url_remove_params(url):
diff --git a/collectors/python.d.plugin/hddtemp/README.md b/collectors/python.d.plugin/hddtemp/README.md
deleted file mode 100644
index b42da734627250..00000000000000
--- a/collectors/python.d.plugin/hddtemp/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-
-
-# Hard drive temperature collector
-
-Monitors disk temperatures from one or more `hddtemp` daemons.
-
-**Requirement:**
-Running `hddtemp` in daemonized mode with access on tcp port
-
-It produces one chart **Temperature** with dynamic number of dimensions (one per disk)
-
-## Configuration
-
-Edit the `python.d/hddtemp.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/hddtemp.conf
-```
-
-Sample:
-
-```yaml
-update_every: 3
-host: "127.0.0.1"
-port: 7634
-```
-
-If no configuration is given, module will attempt to connect to hddtemp daemon on `127.0.0.1:7634` address
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `hddtemp` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `hddtemp` module in debug mode:
-
-```bash
-./python.d.plugin hddtemp debug trace
-```
-
diff --git a/collectors/python.d.plugin/hddtemp/README.md b/collectors/python.d.plugin/hddtemp/README.md
new file mode 120000
index 00000000000000..95c7593f803357
--- /dev/null
+++ b/collectors/python.d.plugin/hddtemp/README.md
@@ -0,0 +1 @@
+integrations/hdd_temperature.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md b/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md
new file mode 100644
index 00000000000000..4a1504f0777d41
--- /dev/null
+++ b/collectors/python.d.plugin/hddtemp/integrations/hdd_temperature.md
@@ -0,0 +1,217 @@
+
+
+# HDD temperature
+
+
+
+
+
+Plugin: python.d.plugin
+Module: hddtemp
+
+
+
+## Overview
+
+This collector monitors disk temperatures.
+
+
+It uses the `hddtemp` daemon to gather the metrics.
+
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, this collector will attempt to connect to the `hddtemp` daemon on `127.0.0.1:7634`
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per HDD temperature instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| hddtemp.temperatures | a dimension per disk | Celsius |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Run `hddtemp` in daemon mode
+
+You can execute `hddtemp` in TCP/IP daemon mode by using the `-d` argument.
+
+So running `hddtemp -d` would run the daemon, by default on port 7634.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/hddtemp.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/hddtemp.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+By default this collector will try to autodetect disks (autodetection works only for disk which names start with "sd"). However this can be overridden by setting the option `disks` to an array of desired disks.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no |
+| devices | Array of desired disks to detect, in case their name doesn't start with `sd`. | | no |
+| host | The IP or HOSTNAME to connect to. | localhost | yes |
+| port | The port to connect to. | 7634 | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+localhost:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 7634
+
+```
+##### Custom disk names
+
+An example defining the disk names to detect.
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 7634
+ devices:
+ - customdisk1
+ - customdisk2
+
+```
+
+
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 7634
+
+remote_job:
+ name : 'remote'
+ host : 'http://192.0.2.1:2812'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `hddtemp` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin hddtemp debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/hddtemp/metadata.yaml b/collectors/python.d.plugin/hddtemp/metadata.yaml
index ee62dc96da4348..d8b56fc66ec9f8 100644
--- a/collectors/python.d.plugin/hddtemp/metadata.yaml
+++ b/collectors/python.d.plugin/hddtemp/metadata.yaml
@@ -105,7 +105,7 @@ modules:
examples:
folding:
enabled: true
- title: ""
+ title: "Config"
list:
- name: Basic
description: A basic example configuration.
diff --git a/collectors/python.d.plugin/hpssa/README.md b/collectors/python.d.plugin/hpssa/README.md
deleted file mode 100644
index 12b25047588d4b..00000000000000
--- a/collectors/python.d.plugin/hpssa/README.md
+++ /dev/null
@@ -1,106 +0,0 @@
-
-
-# HP Smart Storage Arrays collector
-
-Monitors controller, cache module, logical and physical drive state and temperature using `ssacli` tool.
-
-Executed commands:
-
-- `sudo -n ssacli ctrl all show config detail`
-
-## Requirements:
-
-This module uses `ssacli`, which can only be executed by root. It uses
-`sudo` and assumes that it is configured such that the `netdata` user can execute `ssacli` as root without a password.
-
-- Add to your `/etc/sudoers` file:
-
-`which ssacli` shows the full path to the binary.
-
-```bash
-netdata ALL=(root) NOPASSWD: /path/to/ssacli
-```
-
-- Reset Netdata's systemd
- unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux
- distributions with systemd)
-
-The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `ssacli` using `sudo`.
-
-As the `root` user, do the following:
-
-```cmd
-mkdir /etc/systemd/system/netdata.service.d
-echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
-systemctl daemon-reload
-systemctl restart netdata.service
-```
-
-## Charts
-
-- Controller status
-- Controller temperature
-- Logical drive status
-- Physical drive status
-- Physical drive temperature
-
-## Enable the collector
-
-The `hpssa` collector is disabled by default. To enable it, use `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf`
-file.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d.conf
-```
-
-Change the value of the `hpssa` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl
-restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
-
-## Configuration
-
-Edit the `python.d/hpssa.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/hpssa.conf
-```
-
-If `ssacli` cannot be found in the `PATH`, configure it in `hpssa.conf`.
-
-```yaml
-ssacli_path: /usr/sbin/ssacli
-```
-
-Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate
-method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
-
-### Troubleshooting
-
-To troubleshoot issues with the `hpssa` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `hpssa` module in debug mode:
-
-```bash
-./python.d.plugin hpssa debug trace
-```
-
diff --git a/collectors/python.d.plugin/hpssa/README.md b/collectors/python.d.plugin/hpssa/README.md
new file mode 120000
index 00000000000000..82802d8b475b56
--- /dev/null
+++ b/collectors/python.d.plugin/hpssa/README.md
@@ -0,0 +1 @@
+integrations/hp_smart_storage_arrays.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md b/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md
new file mode 100644
index 00000000000000..d46cc9065ea5f6
--- /dev/null
+++ b/collectors/python.d.plugin/hpssa/integrations/hp_smart_storage_arrays.md
@@ -0,0 +1,205 @@
+
+
+# HP Smart Storage Arrays
+
+
+
+
+
+Plugin: python.d.plugin
+Module: hpssa
+
+
+
+## Overview
+
+This collector monitors HP Smart Storage Arrays metrics about operational statuses and temperatures.
+
+It uses the command line tool `ssacli`. The exact command used is `sudo -n ssacli ctrl all show config detail`
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is provided, the collector will try to execute the `ssacli` binary.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per HP Smart Storage Arrays instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| hpssa.ctrl_status | ctrl_{adapter slot}_status, cache_{adapter slot}_status, battery_{adapter slot}_status per adapter | Status |
+| hpssa.ctrl_temperature | ctrl_{adapter slot}_temperature, cache_{adapter slot}_temperature per adapter | Celsius |
+| hpssa.ld_status | a dimension per logical drive | Status |
+| hpssa.pd_status | a dimension per physical drive | Status |
+| hpssa.pd_temperature | a dimension per physical drive | Celsius |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Enable the hpssa collector
+
+The `hpssa` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
+
+```bash
+cd /etc/netdata # Replace this path with your Netdata config directory, if different
+sudo ./edit-config python.d.conf
+```
+
+Change the value of the `hpssa` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
+
+
+#### Allow user netdata to execute `ssacli` as root.
+
+This module uses `ssacli`, which can only be executed by root. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `ssacli` as root without a password.
+
+- Add to your `/etc/sudoers` file:
+
+`which ssacli` shows the full path to the binary.
+
+```bash
+netdata ALL=(root) NOPASSWD: /path/to/ssacli
+```
+
+- Reset Netdata's systemd
+ unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux
+ distributions with systemd)
+
+The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `ssacli` using `sudo`.
+
+As the `root` user, do the following:
+
+```cmd
+mkdir /etc/systemd/system/netdata.service.d
+echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
+systemctl daemon-reload
+systemctl restart netdata.service
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/hpssa.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/hpssa.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| ssacli_path | Path to the `ssacli` command line utility. Configure this if `ssacli` is not in the $PATH | | no |
+| use_sudo | Whether or not to use `sudo` to execute `ssacli` | True | no |
+
+
+
+#### Examples
+
+##### Local simple config
+
+A basic configuration, specyfing the path to `ssacli`
+
+```yaml
+local:
+ ssacli_path: /usr/sbin/ssacli
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `hpssa` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin hpssa debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/hpssa/metadata.yaml b/collectors/python.d.plugin/hpssa/metadata.yaml
index dc91f05e441de7..7871cc276635eb 100644
--- a/collectors/python.d.plugin/hpssa/metadata.yaml
+++ b/collectors/python.d.plugin/hpssa/metadata.yaml
@@ -40,6 +40,16 @@ modules:
setup:
prerequisites:
list:
+ - title: 'Enable the hpssa collector'
+ description: |
+ The `hpssa` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
+
+ ```bash
+ cd /etc/netdata # Replace this path with your Netdata config directory, if different
+ sudo ./edit-config python.d.conf
+ ```
+
+ Change the value of the `hpssa` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
- title: 'Allow user netdata to execute `ssacli` as root.'
description: |
This module uses `ssacli`, which can only be executed by root. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `ssacli` as root without a password.
diff --git a/collectors/python.d.plugin/icecast/README.md b/collectors/python.d.plugin/icecast/README.md
deleted file mode 100644
index 25bbf738e2c25a..00000000000000
--- a/collectors/python.d.plugin/icecast/README.md
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-# Icecast collector
-
-Monitors the number of listeners for active sources.
-
-## Requirements
-
-- icecast version >= 2.4.0
-
-It produces the following charts:
-
-1. **Listeners** in listeners
-
-- source number
-
-## Configuration
-
-Edit the `python.d/icecast.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/icecast.conf
-```
-
-Needs only `url` to server's `/status-json.xsl`
-
-Here is an example for remote server:
-
-```yaml
-remote:
- url : 'http://1.2.3.4:8443/status-json.xsl'
-```
-
-Without configuration, module attempts to connect to `http://localhost:8443/status-json.xsl`
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `icecast` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `icecast` module in debug mode:
-
-```bash
-./python.d.plugin icecast debug trace
-```
-
diff --git a/collectors/python.d.plugin/icecast/README.md b/collectors/python.d.plugin/icecast/README.md
new file mode 120000
index 00000000000000..db3c1b57286867
--- /dev/null
+++ b/collectors/python.d.plugin/icecast/README.md
@@ -0,0 +1 @@
+integrations/icecast.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/icecast/integrations/icecast.md b/collectors/python.d.plugin/icecast/integrations/icecast.md
new file mode 100644
index 00000000000000..12d7d59ee855f1
--- /dev/null
+++ b/collectors/python.d.plugin/icecast/integrations/icecast.md
@@ -0,0 +1,166 @@
+
+
+# Icecast
+
+
+
+
+
+Plugin: python.d.plugin
+Module: icecast
+
+
+
+## Overview
+
+This collector monitors Icecast listener counts.
+
+It connects to an icecast URL and uses the `status-json.xsl` endpoint to retrieve statistics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Without configuration, the collector attempts to connect to http://localhost:8443/status-json.xsl
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Icecast instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| icecast.listeners | a dimension for each active source | listeners |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Icecast minimum version
+
+Needs at least icecast version >= 2.4.0
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/icecast.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/icecast.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| url | The URL (and port) to the icecast server. Needs to also include `/status-json.xsl` | http://localhost:8443/status-json.xsl | no |
+| user | Username to use to connect to `url` if it's password protected. | | no |
+| pass | Password to use to connect to `url` if it's password protected. | | no |
+
+
+
+#### Examples
+
+##### Remote Icecast server
+
+Configure a remote icecast server
+
+```yaml
+remote:
+ url: 'http://1.2.3.4:8443/status-json.xsl'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `icecast` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin icecast debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/ipfs/README.md b/collectors/python.d.plugin/ipfs/README.md
deleted file mode 100644
index c990ae34f0ab15..00000000000000
--- a/collectors/python.d.plugin/ipfs/README.md
+++ /dev/null
@@ -1,74 +0,0 @@
-
-
-# IPFS collector
-
-Collects [`IPFS`](https://ipfs.io) basic information like file system bandwidth, peers and repo metrics.
-
-## Charts
-
-It produces the following charts:
-
-- Bandwidth in `kilobits/s`
-- Peers in `peers`
-- Repo Size in `GiB`
-- Repo Objects in `objects`
-
-## Configuration
-
-Edit the `python.d/ipfs.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/ipfs.conf
-```
-
-
-
-Calls to the following endpoints are disabled due to `IPFS` bugs:
-
-- `/api/v0/stats/repo` (https://github.com/ipfs/go-ipfs/issues/3874)
-- `/api/v0/pin/ls` (https://github.com/ipfs/go-ipfs/issues/7528)
-
-Can be enabled in the collector configuration file.
-
-The configuration needs only `url` to `IPFS` server, here is an example for 2 `IPFS` instances:
-
-```yaml
-localhost:
- url: 'http://localhost:5001'
-
-remote:
- url: 'http://203.0.113.10::5001'
-```
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `ipfs` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `ipfs` module in debug mode:
-
-```bash
-./python.d.plugin ipfs debug trace
-```
-
diff --git a/collectors/python.d.plugin/ipfs/README.md b/collectors/python.d.plugin/ipfs/README.md
new file mode 120000
index 00000000000000..eee6a07b2a0e8b
--- /dev/null
+++ b/collectors/python.d.plugin/ipfs/README.md
@@ -0,0 +1 @@
+integrations/ipfs.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/ipfs/integrations/ipfs.md b/collectors/python.d.plugin/ipfs/integrations/ipfs.md
new file mode 100644
index 00000000000000..77dc745aabf936
--- /dev/null
+++ b/collectors/python.d.plugin/ipfs/integrations/ipfs.md
@@ -0,0 +1,203 @@
+
+
+# IPFS
+
+
+
+
+
+Plugin: python.d.plugin
+Module: ipfs
+
+
+
+## Overview
+
+This collector monitors IPFS server metrics about its quality and performance.
+
+It connects to an http endpoint of the IPFS server to collect the metrics
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If the endpoint is accessible by the Agent, netdata will autodetect it
+
+#### Limits
+
+Calls to the following endpoints are disabled due to IPFS bugs:
+
+/api/v0/stats/repo (https://github.com/ipfs/go-ipfs/issues/3874)
+/api/v0/pin/ls (https://github.com/ipfs/go-ipfs/issues/7528)
+
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per IPFS instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| ipfs.bandwidth | in, out | kilobits/s |
+| ipfs.peers | peers | peers |
+| ipfs.repo_size | avail, size | GiB |
+| ipfs.repo_objects | objects, pinned, recursive_pins | objects |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ ipfs_datastore_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/ipfs.conf) | ipfs.repo_size | IPFS datastore utilization |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/ipfs.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/ipfs.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | no |
+| url | URL to the IPFS API | no | yes |
+| repoapi | Collect repo metrics. | no | no |
+| pinapi | Set status of IPFS pinned object polling. | no | no |
+
+
+
+#### Examples
+
+##### Basic (default out-of-the-box)
+
+A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default.
+
+```yaml
+localhost:
+ name: 'local'
+ url: 'http://localhost:5001'
+ repoapi: no
+ pinapi: no
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ url: 'http://localhost:5001'
+ repoapi: no
+ pinapi: no
+
+remote_host:
+ name: 'remote'
+ url: 'http://192.0.2.1:5001'
+ repoapi: no
+ pinapi: no
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `ipfs` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin ipfs debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/litespeed/README.md b/collectors/python.d.plugin/litespeed/README.md
deleted file mode 100644
index 1ad5ad42c5be35..00000000000000
--- a/collectors/python.d.plugin/litespeed/README.md
+++ /dev/null
@@ -1,95 +0,0 @@
-
-
-# LiteSpeed collector
-
-Collects web server performance metrics for network, connection, requests, and cache.
-
-It produces:
-
-1. **Network Throughput HTTP** in kilobits/s
-
- - in
- - out
-
-2. **Network Throughput HTTPS** in kilobits/s
-
- - in
- - out
-
-3. **Connections HTTP** in connections
-
- - free
- - used
-
-4. **Connections HTTPS** in connections
-
- - free
- - used
-
-5. **Requests** in requests/s
-
- - requests
-
-6. **Requests In Processing** in requests
-
- - processing
-
-7. **Public Cache Hits** in hits/s
-
- - hits
-
-8. **Private Cache Hits** in hits/s
-
- - hits
-
-9. **Static Hits** in hits/s
-
- - hits
-
-## Configuration
-
-Edit the `python.d/litespeed.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/litespeed.conf
-```
-
-```yaml
-local:
- path : 'PATH'
-```
-
-If no configuration is given, module will use "/tmp/lshttpd/".
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `litespeed` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `litespeed` module in debug mode:
-
-```bash
-./python.d.plugin litespeed debug trace
-```
-
diff --git a/collectors/python.d.plugin/litespeed/README.md b/collectors/python.d.plugin/litespeed/README.md
new file mode 120000
index 00000000000000..e7418b3dcc40c5
--- /dev/null
+++ b/collectors/python.d.plugin/litespeed/README.md
@@ -0,0 +1 @@
+integrations/litespeed.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/litespeed/integrations/litespeed.md b/collectors/python.d.plugin/litespeed/integrations/litespeed.md
new file mode 100644
index 00000000000000..87f2d0b12abadb
--- /dev/null
+++ b/collectors/python.d.plugin/litespeed/integrations/litespeed.md
@@ -0,0 +1,170 @@
+
+
+# Litespeed
+
+
+
+
+
+Plugin: python.d.plugin
+Module: litespeed
+
+
+
+## Overview
+
+Examine Litespeed metrics for insights into web server operations. Analyze request rates, response times, and error rates for efficient web service delivery.
+
+The collector uses the statistics under /tmp/lshttpd to gather the metrics.
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is present, the collector will attempt to read files under /tmp/lshttpd/.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Litespeed instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| litespeed.net_throughput | in, out | kilobits/s |
+| litespeed.net_throughput | in, out | kilobits/s |
+| litespeed.connections | free, used | conns |
+| litespeed.connections | free, used | conns |
+| litespeed.requests | requests | requests/s |
+| litespeed.requests_processing | processing | requests |
+| litespeed.cache | hits | hits/s |
+| litespeed.cache | hits | hits/s |
+| litespeed.static | hits | hits/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/litespeed.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/litespeed.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| path | Use a different path than the default, where the lightspeed stats files reside. | /tmp/lshttpd/ | no |
+
+
+
+#### Examples
+
+##### Set the path to statistics
+
+Change the path for the litespeed stats files
+
+```yaml
+localhost:
+ name: 'local'
+ path: '/tmp/lshttpd'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `litespeed` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin litespeed debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/megacli/README.md b/collectors/python.d.plugin/megacli/README.md
deleted file mode 100644
index 1af4d0ea765add..00000000000000
--- a/collectors/python.d.plugin/megacli/README.md
+++ /dev/null
@@ -1,109 +0,0 @@
-
-
-# MegaRAID controller collector
-
-Collects adapter, physical drives and battery stats using `megacli` command-line tool.
-
-Executed commands:
-
-- `sudo -n megacli -LDPDInfo -aAll`
-- `sudo -n megacli -AdpBbuCmd -a0`
-
-## Requirements
-
-The module uses `megacli`, which can only be executed by `root`. It uses
-`sudo` and assumes that it is configured such that the `netdata` user can execute `megacli` as root without a password.
-
-- Add to your `/etc/sudoers` file:
-
-`which megacli` shows the full path to the binary.
-
-```bash
-netdata ALL=(root) NOPASSWD: /path/to/megacli
-```
-
-- Reset Netdata's systemd
- unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux
- distributions with systemd)
-
-The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `megacli` using `sudo`.
-
-
-As the `root` user, do the following:
-
-```cmd
-mkdir /etc/systemd/system/netdata.service.d
-echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
-systemctl daemon-reload
-systemctl restart netdata.service
-```
-
-## Charts
-
-- Adapter State
-- Physical Drives Media Errors
-- Physical Drives Predictive Failures
-- Battery Relative State of Charge
-- Battery Cycle Count
-
-## Enable the collector
-
-The `megacli` collector is disabled by default. To enable it, use `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf`
-file.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d.conf
-```
-
-Change the value of the `megacli` setting to `yes`. Save the file and restart the Netdata Agent
-with `sudo systemctl restart netdata`, or the appropriate method for your system.
-
-## Configuration
-
-Edit the `python.d/megacli.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/megacli.conf
-```
-
-Battery stats disabled by default. To enable them, modify `megacli.conf`.
-
-```yaml
-do_battery: yes
-```
-
-Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate
-method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `megacli` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `megacli` module in debug mode:
-
-```bash
-./python.d.plugin megacli debug trace
-```
-
diff --git a/collectors/python.d.plugin/megacli/README.md b/collectors/python.d.plugin/megacli/README.md
new file mode 120000
index 00000000000000..e5df4d41df2ffc
--- /dev/null
+++ b/collectors/python.d.plugin/megacli/README.md
@@ -0,0 +1 @@
+integrations/megacli.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/megacli/integrations/megacli.md b/collectors/python.d.plugin/megacli/integrations/megacli.md
new file mode 100644
index 00000000000000..0c4af78a9bee04
--- /dev/null
+++ b/collectors/python.d.plugin/megacli/integrations/megacli.md
@@ -0,0 +1,220 @@
+
+
+# MegaCLI
+
+
+
+
+
+Plugin: python.d.plugin
+Module: megacli
+
+
+
+## Overview
+
+Examine MegaCLI metrics with Netdata for insights into RAID controller performance. Improve your RAID controller efficiency with real-time MegaCLI metrics.
+
+Collects adapter, physical drives and battery stats using megacli command-line tool
+
+Executed commands:
+
+ - `sudo -n megacli -LDPDInfo -aAll`
+ - `sudo -n megacli -AdpBbuCmd -a0`
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+The module uses megacli, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute megacli as root without a password.
+
+### Default Behavior
+
+#### Auto-Detection
+
+After all the permissions are satisfied, netdata should be to execute commands via the megacli command line utility
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per MegaCLI instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| megacli.adapter_degraded | a dimension per adapter | is degraded |
+| megacli.pd_media_error | a dimension per physical drive | errors/s |
+| megacli.pd_predictive_failure | a dimension per physical drive | failures/s |
+
+### Per battery
+
+Metrics related to Battery Backup Units, each BBU provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| megacli.bbu_relative_charge | adapter {battery id} | percentage |
+| megacli.bbu_cycle_count | adapter {battery id} | cycle count |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ megacli_adapter_state ](https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf) | megacli.adapter_degraded | adapter is in the degraded state (0: false, 1: true) |
+| [ megacli_pd_media_errors ](https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf) | megacli.pd_media_error | number of physical drive media errors |
+| [ megacli_pd_predictive_failures ](https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf) | megacli.pd_predictive_failure | number of physical drive predictive failures |
+| [ megacli_bbu_relative_charge ](https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf) | megacli.bbu_relative_charge | average battery backup unit (BBU) relative state of charge over the last 10 seconds |
+| [ megacli_bbu_cycle_count ](https://github.com/netdata/netdata/blob/master/health/health.d/megacli.conf) | megacli.bbu_cycle_count | average battery backup unit (BBU) charge cycles count over the last 10 seconds |
+
+
+## Setup
+
+### Prerequisites
+
+#### Grant permissions for netdata, to run megacli as sudoer
+
+The module uses megacli, which can only be executed by root. It uses sudo and assumes that it is configured such that the netdata user can execute megacli as root without a password.
+
+Add to your /etc/sudoers file:
+which megacli shows the full path to the binary.
+
+```bash
+netdata ALL=(root) NOPASSWD: /path/to/megacli
+```
+
+
+#### Reset Netdata's systemd unit CapabilityBoundingSet (Linux distributions with systemd)
+
+The default CapabilityBoundingSet doesn't allow using sudo, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute arcconf using sudo.
+
+As root user, do the following:
+
+```bash
+mkdir /etc/systemd/system/netdata.service.d
+echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
+systemctl daemon-reload
+systemctl restart netdata.service
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/megacli.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/megacli.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| do_battery | default is no. Battery stats (adds additional call to megacli `megacli -AdpBbuCmd -a0`). | no | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration per job
+
+```yaml
+job_name:
+ name: myname
+ update_every: 1
+ priority: 60000
+ penalty: yes
+ autodetection_retry: 0
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `megacli` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin megacli debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/megacli/megacli.chart.py b/collectors/python.d.plugin/megacli/megacli.chart.py
index ef35ff63f43742..8222092a800094 100644
--- a/collectors/python.d.plugin/megacli/megacli.chart.py
+++ b/collectors/python.d.plugin/megacli/megacli.chart.py
@@ -91,7 +91,7 @@ def battery_charts(bats):
RE_ADAPTER = re.compile(
- r'Adapter #([0-9]+) State(?:\s+)?: ([a-zA-Z]+)'
+ r'Adapter #([0-9]+) State(?:\s+)?: ([a-zA-Z ]+)'
)
RE_VD = re.compile(
@@ -124,14 +124,14 @@ def find_batteries(d):
class Adapter:
def __init__(self, n, state):
self.id = n
- self.state = int(state == 'Degraded')
+ # TODO: Rewrite all of this
+ self.state = int(state in ("Partially Degraded", "Degraded", "Failed"))
def data(self):
return {
'adapter_{0}_degraded'.format(self.id): self.state,
}
-
class PD:
def __init__(self, n, media_err, predict_fail):
self.id = n
diff --git a/collectors/python.d.plugin/megacli/metadata.yaml b/collectors/python.d.plugin/megacli/metadata.yaml
index f75a8d2ab6f134..4a2ba43ee5ed5f 100644
--- a/collectors/python.d.plugin/megacli/metadata.yaml
+++ b/collectors/python.d.plugin/megacli/metadata.yaml
@@ -27,8 +27,8 @@ modules:
Executed commands:
- sudo -n megacli -LDPDInfo -aAll
- sudo -n megacli -AdpBbuCmd -a0
+ - `sudo -n megacli -LDPDInfo -aAll`
+ - `sudo -n megacli -AdpBbuCmd -a0`
supported_platforms:
include: []
exclude: []
diff --git a/collectors/python.d.plugin/memcached/README.md b/collectors/python.d.plugin/memcached/README.md
deleted file mode 100644
index 612bd49d7d7949..00000000000000
--- a/collectors/python.d.plugin/memcached/README.md
+++ /dev/null
@@ -1,122 +0,0 @@
-
-
-# Memcached collector
-
-Collects memory-caching system performance metrics. It reads server response to stats command ([stats interface](https://github.com/memcached/memcached/wiki/Commands#stats)).
-
-
-1. **Network** in kilobytes/s
-
- - read
- - written
-
-2. **Connections** per second
-
- - current
- - rejected
- - total
-
-3. **Items** in cluster
-
- - current
- - total
-
-4. **Evicted and Reclaimed** items
-
- - evicted
- - reclaimed
-
-5. **GET** requests/s
-
- - hits
- - misses
-
-6. **GET rate** rate in requests/s
-
- - rate
-
-7. **SET rate** rate in requests/s
-
- - rate
-
-8. **DELETE** requests/s
-
- - hits
- - misses
-
-9. **CAS** requests/s
-
- - hits
- - misses
- - bad value
-
-10. **Increment** requests/s
-
- - hits
- - misses
-
-11. **Decrement** requests/s
-
- - hits
- - misses
-
-12. **Touch** requests/s
-
- - hits
- - misses
-
-13. **Touch rate** rate in requests/s
-
- - rate
-
-## Configuration
-
-Edit the `python.d/memcached.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/memcached.conf
-```
-
-Sample:
-
-```yaml
-localtcpip:
- name : 'local'
- host : '127.0.0.1'
- port : 24242
-```
-
-If no configuration is given, module will attempt to connect to memcached instance on `127.0.0.1:11211` address.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `memcached` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `memcached` module in debug mode:
-
-```bash
-./python.d.plugin memcached debug trace
-```
-
diff --git a/collectors/python.d.plugin/memcached/README.md b/collectors/python.d.plugin/memcached/README.md
new file mode 120000
index 00000000000000..2cb76d33c06255
--- /dev/null
+++ b/collectors/python.d.plugin/memcached/README.md
@@ -0,0 +1 @@
+integrations/memcached.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/memcached/integrations/memcached.md b/collectors/python.d.plugin/memcached/integrations/memcached.md
new file mode 100644
index 00000000000000..113b86c8c96acb
--- /dev/null
+++ b/collectors/python.d.plugin/memcached/integrations/memcached.md
@@ -0,0 +1,215 @@
+
+
+# Memcached
+
+
+
+
+
+Plugin: python.d.plugin
+Module: memcached
+
+
+
+## Overview
+
+Monitor Memcached metrics for proficient in-memory key-value store operations. Track cache hits, misses, and memory usage for efficient data caching.
+
+It reads server response to stats command ([stats interface](https://github.com/memcached/memcached/wiki/Commands#stats)).
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is given, collector will attempt to connect to memcached instance on `127.0.0.1:11211` address.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Memcached instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| memcached.cache | available, used | MiB |
+| memcached.net | in, out | kilobits/s |
+| memcached.connections | current, rejected, total | connections/s |
+| memcached.items | current, total | items |
+| memcached.evicted_reclaimed | reclaimed, evicted | items |
+| memcached.get | hints, misses | requests |
+| memcached.get_rate | rate | requests/s |
+| memcached.set_rate | rate | requests/s |
+| memcached.delete | hits, misses | requests |
+| memcached.cas | hits, misses, bad value | requests |
+| memcached.increment | hits, misses | requests |
+| memcached.decrement | hits, misses | requests |
+| memcached.touch | hits, misses | requests |
+| memcached.touch_rate | rate | requests/s |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ memcached_cache_memory_usage ](https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf) | memcached.cache | cache memory utilization |
+| [ memcached_cache_fill_rate ](https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf) | memcached.cache | average rate the cache fills up (positive), or frees up (negative) space over the last hour |
+| [ memcached_out_of_cache_space_time ](https://github.com/netdata/netdata/blob/master/health/health.d/memcached.conf) | memcached.cache | estimated time the cache will run out of space if the system continues to add data at the same rate as the past hour |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/memcached.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/memcached.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| host | the host to connect to. | 127.0.0.1 | no |
+| port | the port to connect to. | 11211 | no |
+| update_every | Sets the default data collection frequency. | 10 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### localhost
+
+An example configuration for localhost.
+
+```yaml
+localhost:
+ name: 'local'
+ host: 'localhost'
+ port: 11211
+
+```
+##### localipv4
+
+An example configuration for localipv4.
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 11211
+
+```
+
+
+##### localipv6
+
+An example configuration for localipv6.
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ host: '::1'
+ port: 11211
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `memcached` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin memcached debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/monit/README.md b/collectors/python.d.plugin/monit/README.md
deleted file mode 100644
index f762de0d3702ae..00000000000000
--- a/collectors/python.d.plugin/monit/README.md
+++ /dev/null
@@ -1,78 +0,0 @@
-
-
-# Monit collector
-
-Monit monitoring module. Data is grabbed from stats XML interface (exists for a long time, but not mentioned in official
-documentation). Mostly this plugin shows statuses of monit targets, i.e.
-[statuses of specified checks](https://mmonit.com/monit/documentation/monit.html#Service-checks).
-
-1. **Filesystems**
-
- - Filesystems
- - Directories
- - Files
- - Pipes
-
-2. **Applications**
-
- - Processes (+threads/childs)
- - Programs
-
-3. **Network**
-
- - Hosts (+latency)
- - Network interfaces
-
-## Configuration
-
-Edit the `python.d/monit.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically
-at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/monit.conf
-```
-
-Sample:
-
-```yaml
-local:
- name: 'local'
- url: 'http://localhost:2812'
- user: : admin
- pass: : monit
-```
-
-If no configuration is given, module will attempt to connect to monit as `http://localhost:2812`.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `monit` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `monit` module in debug mode:
-
-```bash
-./python.d.plugin monit debug trace
-```
-
diff --git a/collectors/python.d.plugin/monit/README.md b/collectors/python.d.plugin/monit/README.md
new file mode 120000
index 00000000000000..ac69496f40408c
--- /dev/null
+++ b/collectors/python.d.plugin/monit/README.md
@@ -0,0 +1 @@
+integrations/monit.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/monit/integrations/monit.md b/collectors/python.d.plugin/monit/integrations/monit.md
new file mode 100644
index 00000000000000..18219141d609f3
--- /dev/null
+++ b/collectors/python.d.plugin/monit/integrations/monit.md
@@ -0,0 +1,214 @@
+
+
+# Monit
+
+
+
+
+
+Plugin: python.d.plugin
+Module: monit
+
+
+
+## Overview
+
+This collector monitors Monit targets such as filesystems, directories, files, FIFO pipes and more.
+
+
+It gathers data from Monit's XML interface.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, this collector will attempt to connect to Monit at `http://localhost:2812`
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Monit instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| monit.filesystems | a dimension per target | filesystems |
+| monit.directories | a dimension per target | directories |
+| monit.files | a dimension per target | files |
+| monit.fifos | a dimension per target | pipes |
+| monit.programs | a dimension per target | programs |
+| monit.services | a dimension per target | processes |
+| monit.process_uptime | a dimension per target | seconds |
+| monit.process_threads | a dimension per target | threads |
+| monit.process_childrens | a dimension per target | children |
+| monit.hosts | a dimension per target | hosts |
+| monit.host_latency | a dimension per target | milliseconds |
+| monit.networks | a dimension per target | interfaces |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/monit.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/monit.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no |
+| url | The URL to fetch Monit's metrics. | http://localhost:2812 | yes |
+| user | Username in case the URL is password protected. | | no |
+| pass | Password in case the URL is password protected. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+```yaml
+localhost:
+ name : 'local'
+ url : 'http://localhost:2812'
+
+```
+##### Basic Authentication
+
+Example using basic username and password in order to authenticate.
+
+Config
+
+```yaml
+localhost:
+ name : 'local'
+ url : 'http://localhost:2812'
+ user: 'foo'
+ pass: 'bar'
+
+```
+
+
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+localhost:
+ name: 'local'
+ url: 'http://localhost:2812'
+
+remote_job:
+ name: 'remote'
+ url: 'http://192.0.2.1:2812'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `monit` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin monit debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/nsd/README.md b/collectors/python.d.plugin/nsd/README.md
deleted file mode 100644
index ccc4e712b14902..00000000000000
--- a/collectors/python.d.plugin/nsd/README.md
+++ /dev/null
@@ -1,91 +0,0 @@
-
-
-# NSD collector
-
-Uses the `nsd-control stats_noreset` command to provide `nsd` statistics.
-
-## Requirements
-
-- Version of `nsd` must be 4.0+
-- Netdata must have permissions to run `nsd-control stats_noreset`
-
-It produces:
-
-1. **Queries**
-
- - queries
-
-2. **Zones**
-
- - master
- - slave
-
-3. **Protocol**
-
- - udp
- - udp6
- - tcp
- - tcp6
-
-4. **Query Type**
-
- - A
- - NS
- - CNAME
- - SOA
- - PTR
- - HINFO
- - MX
- - NAPTR
- - TXT
- - AAAA
- - SRV
- - ANY
-
-5. **Transfer**
-
- - NOTIFY
- - AXFR
-
-6. **Return Code**
-
- - NOERROR
- - FORMERR
- - SERVFAIL
- - NXDOMAIN
- - NOTIMP
- - REFUSED
- - YXDOMAIN
-
-Configuration is not needed.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `nsd` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `nsd` module in debug mode:
-
-```bash
-./python.d.plugin nsd debug trace
-```
-
diff --git a/collectors/python.d.plugin/nsd/README.md b/collectors/python.d.plugin/nsd/README.md
new file mode 120000
index 00000000000000..59fcfe49134540
--- /dev/null
+++ b/collectors/python.d.plugin/nsd/README.md
@@ -0,0 +1 @@
+integrations/name_server_daemon.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md b/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md
new file mode 100644
index 00000000000000..0e66c44ebe414b
--- /dev/null
+++ b/collectors/python.d.plugin/nsd/integrations/name_server_daemon.md
@@ -0,0 +1,199 @@
+
+
+# Name Server Daemon
+
+
+
+
+
+Plugin: python.d.plugin
+Module: nsd
+
+
+
+## Overview
+
+This collector monitors NSD statistics like queries, zones, protocols, query types and more.
+
+
+It uses the `nsd-control stats_noreset` command to gather metrics.
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If permissions are satisfied, the collector will be able to run `nsd-control stats_noreset`, thus collecting metrics.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Name Server Daemon instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| nsd.queries | queries | queries/s |
+| nsd.zones | master, slave | zones |
+| nsd.protocols | udp, udp6, tcp, tcp6 | queries/s |
+| nsd.type | A, NS, CNAME, SOA, PTR, HINFO, MX, NAPTR, TXT, AAAA, SRV, ANY | queries/s |
+| nsd.transfer | NOTIFY, AXFR | queries/s |
+| nsd.rcode | NOERROR, FORMERR, SERVFAIL, NXDOMAIN, NOTIMP, REFUSED, YXDOMAIN | queries/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### NSD version
+
+The version of `nsd` must be 4.0+.
+
+
+#### Provide Netdata the permissions to run the command
+
+Netdata must have permissions to run the `nsd-control stats_noreset` command.
+
+You can:
+
+- Add "netdata" user to "nsd" group:
+ ```
+ usermod -aG nsd netdata
+ ```
+- Add Netdata to sudoers
+ 1. Edit the sudoers file:
+ ```
+ visudo -f /etc/sudoers.d/netdata
+ ```
+ 2. Add the entry:
+ ```
+ Defaults:netdata !requiretty
+ netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset
+ ```
+
+ > Note that you will need to set the `command` option to `sudo /usr/sbin/nsd-control stats_noreset` if you use this method.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/nsd.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/nsd.conf
+```
+#### Options
+
+This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior.
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 30 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| command | The command to run | nsd-control stats_noreset | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+```yaml
+local:
+ name: 'nsd_local'
+ command: 'nsd-control stats_noreset'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `nsd` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin nsd debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/nsd/metadata.yaml b/collectors/python.d.plugin/nsd/metadata.yaml
index bd0a256f3dbf17..f5e2c46b0adc33 100644
--- a/collectors/python.d.plugin/nsd/metadata.yaml
+++ b/collectors/python.d.plugin/nsd/metadata.yaml
@@ -40,6 +40,9 @@ modules:
setup:
prerequisites:
list:
+ - title: NSD version
+ description: |
+ The version of `nsd` must be 4.0+.
- title: Provide Netdata the permissions to run the command
description: |
Netdata must have permissions to run the `nsd-control stats_noreset` command.
diff --git a/collectors/python.d.plugin/openldap/README.md b/collectors/python.d.plugin/openldap/README.md
deleted file mode 100644
index eddf40b2cbb20d..00000000000000
--- a/collectors/python.d.plugin/openldap/README.md
+++ /dev/null
@@ -1,102 +0,0 @@
-
-
-# OpenLDAP collector
-
-Provides statistics information from openldap (slapd) server.
-Statistics are taken from LDAP monitoring interface. Manual page, slapd-monitor(5) is available.
-
-**Requirement:**
-
-- Follow instructions from to activate monitoring interface.
-- Install python ldap module `pip install ldap` or `yum install python-ldap`
-- Modify openldap.conf with your credentials
-
-### Module gives information with following charts:
-
-1. **connections**
-
- - total connections number
-
-2. **Bytes**
-
- - sent
-
-3. **operations**
-
- - completed
- - initiated
-
-4. **referrals**
-
- - sent
-
-5. **entries**
-
- - sent
-
-6. **ldap operations**
-
- - bind
- - search
- - unbind
- - add
- - delete
- - modify
- - compare
-
-7. **waiters**
-
- - read
- - write
-
-## Configuration
-
-Edit the `python.d/openldap.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/openldap.conf
-```
-
-Sample:
-
-```yaml
-openldap:
- name : 'local'
- username : "cn=monitor,dc=superb,dc=eu"
- password : "testpass"
- server : 'localhost'
- port : 389
-```
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `openldap` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `openldap` module in debug mode:
-
-```bash
-./python.d.plugin openldap debug trace
-```
-
diff --git a/collectors/python.d.plugin/openldap/README.md b/collectors/python.d.plugin/openldap/README.md
new file mode 120000
index 00000000000000..45f36b9b92ea6b
--- /dev/null
+++ b/collectors/python.d.plugin/openldap/README.md
@@ -0,0 +1 @@
+integrations/openldap.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/openldap/integrations/openldap.md b/collectors/python.d.plugin/openldap/integrations/openldap.md
new file mode 100644
index 00000000000000..a9480a490c3915
--- /dev/null
+++ b/collectors/python.d.plugin/openldap/integrations/openldap.md
@@ -0,0 +1,215 @@
+
+
+# OpenLDAP
+
+
+
+
+
+Plugin: python.d.plugin
+Module: openldap
+
+
+
+## Overview
+
+This collector monitors OpenLDAP metrics about connections, operations, referrals and more.
+
+Statistics are taken from the monitoring interface of a openLDAP (slapd) server
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This collector doesn't work until all the prerequisites are checked.
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per OpenLDAP instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| openldap.total_connections | connections | connections/s |
+| openldap.traffic_stats | sent | KiB/s |
+| openldap.operations_status | completed, initiated | ops/s |
+| openldap.referrals | sent | referrals/s |
+| openldap.entries | sent | entries/s |
+| openldap.ldap_operations | bind, search, unbind, add, delete, modify, compare | ops/s |
+| openldap.waiters | write, read | waiters/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Configure the openLDAP server to expose metrics to monitor it.
+
+Follow instructions from https://www.openldap.org/doc/admin24/monitoringslapd.html to activate monitoring interface.
+
+
+#### Install python-ldap module
+
+Install python ldap module
+
+1. From pip package manager
+
+```bash
+pip install ldap
+```
+
+2. With apt package manager (in most deb based distros)
+
+
+```bash
+apt-get install python-ldap
+```
+
+
+3. With yum package manager (in most rpm based distros)
+
+
+```bash
+yum install python-ldap
+```
+
+
+#### Insert credentials for Netdata to access openLDAP server
+
+Use the `ldappasswd` utility to set a password for the username you will use.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/openldap.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/openldap.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| username | The bind user with right to access monitor statistics | | yes |
+| password | The password for the binded user | | yes |
+| server | The listening address of the LDAP server. In case of TLS, use the hostname which the certificate is published for. | | yes |
+| port | The listening port of the LDAP server. Change to 636 port in case of TLS connection. | 389 | yes |
+| use_tls | Make True if a TLS connection is used over ldaps:// | no | no |
+| use_start_tls | Make True if a TLS connection is used over ldap:// | no | no |
+| cert_check | False if you want to ignore certificate check | True | yes |
+| timeout | Seconds to timeout if no connection exist | | yes |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+```yaml
+username: "cn=admin"
+password: "pass"
+server: "localhost"
+port: "389"
+check_cert: True
+timeout: 1
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `openldap` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin openldap debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/oracledb/README.md b/collectors/python.d.plugin/oracledb/README.md
deleted file mode 100644
index 315816de0e3445..00000000000000
--- a/collectors/python.d.plugin/oracledb/README.md
+++ /dev/null
@@ -1,115 +0,0 @@
-
-
-# OracleDB collector
-
-Monitors the performance and health metrics of the Oracle database.
-
-## Requirements
-
-- `oracledb` package.
-
-It produces following charts:
-
-- session activity
- - Session Count
- - Session Limit Usage
- - Logons
-- disk activity
- - Physical Disk Reads/Writes
- - Sorts On Disk
- - Full Table Scans
-- database and buffer activity
- - Database Wait Time Ratio
- - Shared Pool Free Memory
- - In-Memory Sorts Ratio
- - SQL Service Response Time
- - User Rollbacks
- - Enqueue Timeouts
-- cache
- - Cache Hit Ratio
- - Global Cache Blocks Events
-- activities
- - Activities
-- wait time
- - Wait Time
-- tablespace
- - Size
- - Usage
- - Usage In Percent
-- allocated space
- - Size
- - Usage
- - Usage In Percent
-
-## prerequisite
-
-To use the Oracle module do the following:
-
-1. Install `oracledb` package ([link](https://python-oracledb.readthedocs.io/en/latest/user_guide/installation.html)).
-
-2. Create a read-only `netdata` user with proper access to your Oracle Database Server.
-
-Connect to your Oracle database with an administrative user and execute:
-
-```SQL
-CREATE USER netdata IDENTIFIED BY ;
-
-GRANT CONNECT TO netdata;
-GRANT SELECT_CATALOG_ROLE TO netdata;
-```
-
-## Configuration
-
-Edit the `python.d/oracledb.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/oracledb.conf
-```
-
-```yaml
-local:
- user: 'netdata'
- password: 'secret'
- server: 'localhost:1521'
- service: 'XE'
-
-
-remote:
- user: 'netdata'
- password: 'secret'
- server: '10.0.0.1:1521'
- service: 'XE'
-```
-
-All parameters are required. Without them module will fail to start.
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `oracledb` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `oracledb` module in debug mode:
-
-```bash
-./python.d.plugin oracledb debug trace
-```
-
diff --git a/collectors/python.d.plugin/oracledb/README.md b/collectors/python.d.plugin/oracledb/README.md
new file mode 120000
index 00000000000000..a75e3611e57644
--- /dev/null
+++ b/collectors/python.d.plugin/oracledb/README.md
@@ -0,0 +1 @@
+integrations/oracle_db.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/oracledb/integrations/oracle_db.md b/collectors/python.d.plugin/oracledb/integrations/oracle_db.md
new file mode 100644
index 00000000000000..30557c0214f221
--- /dev/null
+++ b/collectors/python.d.plugin/oracledb/integrations/oracle_db.md
@@ -0,0 +1,226 @@
+
+
+# Oracle DB
+
+
+
+
+
+Plugin: python.d.plugin
+Module: oracledb
+
+
+
+## Overview
+
+This collector monitors OracleDB database metrics about sessions, tables, memory and more.
+
+It collects the metrics via the supported database client library
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+In order for this collector to work, it needs a read-only user `netdata` in the RDBMS.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+When the requirements are met, databases on the local host on port 1521 will be auto-detected
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+These metrics refer to the entire monitored application.
+
+### Per Oracle DB instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| oracledb.session_count | total, active | sessions |
+| oracledb.session_limit_usage | usage | % |
+| oracledb.logons | logons | events/s |
+| oracledb.physical_disk_read_writes | reads, writes | events/s |
+| oracledb.sorts_on_disks | sorts | events/s |
+| oracledb.full_table_scans | full table scans | events/s |
+| oracledb.database_wait_time_ratio | wait time ratio | % |
+| oracledb.shared_pool_free_memory | free memory | % |
+| oracledb.in_memory_sorts_ratio | in-memory sorts | % |
+| oracledb.sql_service_response_time | time | seconds |
+| oracledb.user_rollbacks | rollbacks | events/s |
+| oracledb.enqueue_timeouts | enqueue timeouts | events/s |
+| oracledb.cache_hit_ration | buffer, cursor, library, row | % |
+| oracledb.global_cache_blocks | corrupted, lost | events/s |
+| oracledb.activity | parse count, execute count, user commits, user rollbacks | events/s |
+| oracledb.wait_time | application, configuration, administrative, concurrency, commit, network, user I/O, system I/O, scheduler, other | ms |
+| oracledb.tablespace_size | a dimension per active tablespace | KiB |
+| oracledb.tablespace_usage | a dimension per active tablespace | KiB |
+| oracledb.tablespace_usage_in_percent | a dimension per active tablespace | % |
+| oracledb.allocated_size | a dimension per active tablespace | B |
+| oracledb.allocated_usage | a dimension per active tablespace | B |
+| oracledb.allocated_usage_in_percent | a dimension per active tablespace | % |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Install the python-oracledb package
+
+You can follow the official guide below to install the required package:
+
+Source: https://python-oracledb.readthedocs.io/en/latest/user_guide/installation.html
+
+
+#### Create a read only user for netdata
+
+Follow the official instructions for your oracle RDBMS to create a read-only user for netdata. The operation may follow this approach
+
+Connect to your Oracle database with an administrative user and execute:
+
+```bash
+CREATE USER netdata IDENTIFIED BY ;
+
+GRANT CONNECT TO netdata;
+GRANT SELECT_CATALOG_ROLE TO netdata;
+```
+
+
+#### Edit the configuration
+
+Edit the configuration troubleshooting:
+
+1. Provide a valid user for the netdata collector to access the database
+2. Specify the network target this database is listening.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/oracledb.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/oracledb.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| user | The username for the user account. | no | yes |
+| password | The password for the user account. | no | yes |
+| server | The IP address or hostname (and port) of the Oracle Database Server. | no | yes |
+| service | The Oracle Database service name. To view the services available on your server run this query, `select SERVICE_NAME from gv$session where sid in (select sid from V$MYSTAT)`. | no | yes |
+| protocol | one of the strings "tcp" or "tcps" indicating whether to use unencrypted network traffic or encrypted network traffic | no | yes |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration, two jobs described for two databases.
+
+```yaml
+local:
+ user: 'netdata'
+ password: 'secret'
+ server: 'localhost:1521'
+ service: 'XE'
+ protocol: 'tcps'
+
+remote:
+ user: 'netdata'
+ password: 'secret'
+ server: '10.0.0.1:1521'
+ service: 'XE'
+ protocol: 'tcps'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `oracledb` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin oracledb debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/pandas/README.md b/collectors/python.d.plugin/pandas/README.md
deleted file mode 100644
index 19b11d5be41c74..00000000000000
--- a/collectors/python.d.plugin/pandas/README.md
+++ /dev/null
@@ -1,96 +0,0 @@
-# Ingest structured data (Pandas)
-
-
-
-
-
-[Pandas](https://pandas.pydata.org/) is a de-facto standard in reading and processing most types of structured data in Python.
-If you have metrics appearing in a CSV, JSON, XML, HTML, or [other supported format](https://pandas.pydata.org/docs/user_guide/io.html),
-either locally or via some HTTP endpoint, you can easily ingest and present those metrics in Netdata, by leveraging the Pandas collector.
-
-The collector uses [pandas](https://pandas.pydata.org/) to pull data and do pandas-based
-preprocessing, before feeding to Netdata.
-
-## Requirements
-
-This collector depends on some Python (Python 3 only) packages that can usually be installed via `pip` or `pip3`.
-
-```bash
-sudo pip install pandas requests
-```
-
-Note: If you would like to use [`pandas.read_sql`](https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html) to query a database, you will need to install the below packages as well.
-
-```bash
-sudo pip install 'sqlalchemy<2.0' psycopg2-binary
-```
-
-## Configuration
-
-Below is an example configuration to query some json weather data from [Open-Meteo](https://open-meteo.com),
-do some data wrangling on it and save in format as expected by Netdata.
-
-```yaml
-# example pulling some hourly temperature data
-temperature:
- name: "temperature"
- update_every: 3
- chart_configs:
- - name: "temperature_by_city"
- title: "Temperature By City"
- family: "temperature.today"
- context: "pandas.temperature"
- type: "line"
- units: "Celsius"
- df_steps: >
- pd.DataFrame.from_dict(
- {city: requests.get(
- f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly=temperature_2m'
- ).json()['hourly']['temperature_2m']
- for (city,lat,lng)
- in [
- ('dublin', 53.3441, -6.2675),
- ('athens', 37.9792, 23.7166),
- ('london', 51.5002, -0.1262),
- ('berlin', 52.5235, 13.4115),
- ('paris', 48.8567, 2.3510),
- ]
- }
- ); # use dictionary comprehension to make multiple requests;
- df.describe(); # get aggregate stats for each city;
- df.transpose()[['mean', 'max', 'min']].reset_index(); # just take mean, min, max;
- df.rename(columns={'index':'city'}); # some column renaming;
- df.pivot(columns='city').mean().to_frame().reset_index(); # force to be one row per city;
- df.rename(columns={0:'degrees'}); # some column renaming;
- pd.concat([df, df['city']+'_'+df['level_0']], axis=1); # add new column combining city and summary measurement label;
- df.rename(columns={0:'measurement'}); # some column renaming;
- df[['measurement', 'degrees']].set_index('measurement'); # just take two columns we want;
- df.sort_index(); # sort by city name;
- df.transpose(); # transpose so its just one wide row;
-```
-
-`chart_configs` is a list of dictionary objects where each one defines the sequence of `df_steps` to be run using [`pandas`](https://pandas.pydata.org/),
-and the `name`, `title` etc to define the
-[CHART variables](https://github.com/netdata/netdata/blob/master/docs/guides/python-collector.md#create-charts)
-that will control how the results will look in netdata.
-
-The example configuration above would result in a `data` dictionary like the below being collected by Netdata
-at each time step. They keys in this dictionary will be the "dimensions" of the chart.
-
-```javascript
-{'athens_max': 26.2, 'athens_mean': 19.45952380952381, 'athens_min': 12.2, 'berlin_max': 17.4, 'berlin_mean': 10.764285714285714, 'berlin_min': 5.7, 'dublin_max': 15.3, 'dublin_mean': 12.008928571428571, 'dublin_min': 6.6, 'london_max': 18.9, 'london_mean': 12.510714285714286, 'london_min': 5.2, 'paris_max': 19.4, 'paris_mean': 12.054166666666665, 'paris_min': 4.8}
-```
-
-Which, given the above configuration would end up as a chart like below in Netdata.
-
-
-
-## Notes
-- Each line in `df_steps` must return a pandas
-[DataFrame](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html) object (`df`) at each step.
-- You can use
-[this colab notebook](https://colab.research.google.com/drive/1VYrddSegZqGtkWGFuiUbMbUk5f3rW6Hi?usp=sharing)
-to mock up and work on your `df_steps` iteratively before adding them to your config.
-- This collector is expecting one row in the final pandas DataFrame. It is that first row that will be taken
-as the most recent values for each dimension on each chart using (`df.to_dict(orient='records')[0]`).
-See [pd.to_dict()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_dict.html).
diff --git a/collectors/python.d.plugin/pandas/README.md b/collectors/python.d.plugin/pandas/README.md
new file mode 120000
index 00000000000000..2fabe63c15fbdb
--- /dev/null
+++ b/collectors/python.d.plugin/pandas/README.md
@@ -0,0 +1 @@
+integrations/pandas.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/pandas/integrations/pandas.md b/collectors/python.d.plugin/pandas/integrations/pandas.md
new file mode 100644
index 00000000000000..83c5c66b1bb94f
--- /dev/null
+++ b/collectors/python.d.plugin/pandas/integrations/pandas.md
@@ -0,0 +1,365 @@
+
+
+# Pandas
+
+
+
+
+
+Plugin: python.d.plugin
+Module: pandas
+
+
+
+## Overview
+
+[Pandas](https://pandas.pydata.org/) is a de-facto standard in reading and processing most types of structured data in Python.
+If you have metrics appearing in a CSV, JSON, XML, HTML, or [other supported format](https://pandas.pydata.org/docs/user_guide/io.html),
+either locally or via some HTTP endpoint, you can easily ingest and present those metrics in Netdata, by leveraging the Pandas collector.
+
+This collector can be used to collect pretty much anything that can be read by Pandas, and then processed by Pandas.
+
+
+The collector uses [pandas](https://pandas.pydata.org/) to pull data and do pandas-based preprocessing, before feeding to Netdata.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+This collector is expecting one row in the final pandas DataFrame. It is that first row that will be taken
+as the most recent values for each dimension on each chart using (`df.to_dict(orient='records')[0]`).
+See [pd.to_dict()](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_dict.html)."
+
+
+### Per Pandas instance
+
+These metrics refer to the entire monitored application.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Python Requirements
+
+This collector depends on some Python (Python 3 only) packages that can usually be installed via `pip` or `pip3`.
+
+```bash
+sudo pip install pandas requests
+```
+
+Note: If you would like to use [`pandas.read_sql`](https://pandas.pydata.org/docs/reference/api/pandas.read_sql.html) to query a database, you will need to install the below packages as well.
+
+```bash
+sudo pip install 'sqlalchemy<2.0' psycopg2-binary
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/pandas.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/pandas.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| chart_configs | an array of chart configuration dictionaries | [] | yes |
+| chart_configs.name | name of the chart to be displayed in the dashboard. | None | yes |
+| chart_configs.title | title of the chart to be displayed in the dashboard. | None | yes |
+| chart_configs.family | [family](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#families) of the chart to be displayed in the dashboard. | None | yes |
+| chart_configs.context | [context](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#contexts) of the chart to be displayed in the dashboard. | None | yes |
+| chart_configs.type | the type of the chart to be displayed in the dashboard. | None | yes |
+| chart_configs.units | the units of the chart to be displayed in the dashboard. | None | yes |
+| chart_configs.df_steps | a series of pandas operations (one per line) that each returns a dataframe. | None | yes |
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Temperature API Example
+
+example pulling some hourly temperature data, a chart for today forecast (mean,min,max) and another chart for current.
+
+Config
+
+```yaml
+temperature:
+ name: "temperature"
+ update_every: 5
+ chart_configs:
+ - name: "temperature_forecast_by_city"
+ title: "Temperature By City - Today Forecast"
+ family: "temperature.today"
+ context: "pandas.temperature"
+ type: "line"
+ units: "Celsius"
+ df_steps: >
+ pd.DataFrame.from_dict(
+ {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}&hourly=temperature_2m').json()['hourly']['temperature_2m']
+ for (city,lat,lng)
+ in [
+ ('dublin', 53.3441, -6.2675),
+ ('athens', 37.9792, 23.7166),
+ ('london', 51.5002, -0.1262),
+ ('berlin', 52.5235, 13.4115),
+ ('paris', 48.8567, 2.3510),
+ ('madrid', 40.4167, -3.7033),
+ ('new_york', 40.71, -74.01),
+ ('los_angeles', 34.05, -118.24),
+ ]
+ }
+ );
+ df.describe(); # get aggregate stats for each city;
+ df.transpose()[['mean', 'max', 'min']].reset_index(); # just take mean, min, max;
+ df.rename(columns={'index':'city'}); # some column renaming;
+ df.pivot(columns='city').mean().to_frame().reset_index(); # force to be one row per city;
+ df.rename(columns={0:'degrees'}); # some column renaming;
+ pd.concat([df, df['city']+'_'+df['level_0']], axis=1); # add new column combining city and summary measurement label;
+ df.rename(columns={0:'measurement'}); # some column renaming;
+ df[['measurement', 'degrees']].set_index('measurement'); # just take two columns we want;
+ df.sort_index(); # sort by city name;
+ df.transpose(); # transpose so its just one wide row;
+ - name: "temperature_current_by_city"
+ title: "Temperature By City - Current"
+ family: "temperature.current"
+ context: "pandas.temperature"
+ type: "line"
+ units: "Celsius"
+ df_steps: >
+ pd.DataFrame.from_dict(
+ {city: requests.get(f'https://api.open-meteo.com/v1/forecast?latitude={lat}&longitude={lng}¤t_weather=true').json()['current_weather']
+ for (city,lat,lng)
+ in [
+ ('dublin', 53.3441, -6.2675),
+ ('athens', 37.9792, 23.7166),
+ ('london', 51.5002, -0.1262),
+ ('berlin', 52.5235, 13.4115),
+ ('paris', 48.8567, 2.3510),
+ ('madrid', 40.4167, -3.7033),
+ ('new_york', 40.71, -74.01),
+ ('los_angeles', 34.05, -118.24),
+ ]
+ }
+ );
+ df.transpose();
+ df[['temperature']];
+ df.transpose();
+
+```
+
+
+##### API CSV Example
+
+example showing a read_csv from a url and some light pandas data wrangling.
+
+Config
+
+```yaml
+example_csv:
+ name: "example_csv"
+ update_every: 2
+ chart_configs:
+ - name: "london_system_cpu"
+ title: "London System CPU - Ratios"
+ family: "london_system_cpu"
+ context: "pandas"
+ type: "line"
+ units: "n"
+ df_steps: >
+ pd.read_csv('https://london.my-netdata.io/api/v1/data?chart=system.cpu&format=csv&after=-60', storage_options={'User-Agent': 'netdata'});
+ df.drop('time', axis=1);
+ df.mean().to_frame().transpose();
+ df.apply(lambda row: (row.user / row.system), axis = 1).to_frame();
+ df.rename(columns={0:'average_user_system_ratio'});
+ df*100;
+
+```
+
+
+##### API JSON Example
+
+example showing a read_json from a url and some light pandas data wrangling.
+
+Config
+
+```yaml
+example_json:
+ name: "example_json"
+ update_every: 2
+ chart_configs:
+ - name: "london_system_net"
+ title: "London System Net - Total Bandwidth"
+ family: "london_system_net"
+ context: "pandas"
+ type: "area"
+ units: "kilobits/s"
+ df_steps: >
+ pd.DataFrame(requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['data'], columns=requests.get('https://london.my-netdata.io/api/v1/data?chart=system.net&format=json&after=-1').json()['labels']);
+ df.drop('time', axis=1);
+ abs(df);
+ df.sum(axis=1).to_frame();
+ df.rename(columns={0:'total_bandwidth'});
+
+```
+
+
+##### XML Example
+
+example showing a read_xml from a url and some light pandas data wrangling.
+
+Config
+
+```yaml
+example_xml:
+ name: "example_xml"
+ update_every: 2
+ line_sep: "|"
+ chart_configs:
+ - name: "temperature_forcast"
+ title: "Temperature Forecast"
+ family: "temp"
+ context: "pandas.temp"
+ type: "line"
+ units: "celsius"
+ df_steps: >
+ pd.read_xml('http://metwdb-openaccess.ichec.ie/metno-wdb2ts/locationforecast?lat=54.7210798611;long=-8.7237392806', xpath='./product/time[1]/location/temperature', parser='etree')|
+ df.rename(columns={'value': 'dublin'})|
+ df[['dublin']]|
+
+```
+
+
+##### SQL Example
+
+example showing a read_sql from a postgres database using sqlalchemy.
+
+Config
+
+```yaml
+sql:
+ name: "sql"
+ update_every: 5
+ chart_configs:
+ - name: "sql"
+ title: "SQL Example"
+ family: "sql.example"
+ context: "example"
+ type: "line"
+ units: "percent"
+ df_steps: >
+ pd.read_sql_query(
+ sql='\
+ select \
+ random()*100 as metric_1, \
+ random()*100 as metric_2 \
+ ',
+ con=create_engine('postgresql://localhost/postgres?user=netdata&password=netdata')
+ );
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `pandas` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin pandas debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/pandas/metadata.yaml b/collectors/python.d.plugin/pandas/metadata.yaml
index 28a1d3b212d533..92ee1e986f6d37 100644
--- a/collectors/python.d.plugin/pandas/metadata.yaml
+++ b/collectors/python.d.plugin/pandas/metadata.yaml
@@ -5,7 +5,7 @@ modules:
module_name: pandas
monitored_instance:
name: Pandas
- link: https://learn.netdata.cloud/docs/data-collection/generic-data-collection/structured-data-pandas
+ link: https://pandas.pydata.org/
categories:
- data-collection.generic-data-collection
icon_filename: pandas.png
@@ -26,8 +26,6 @@ modules:
either locally or via some HTTP endpoint, you can easily ingest and present those metrics in Netdata, by leveraging the Pandas collector.
This collector can be used to collect pretty much anything that can be read by Pandas, and then processed by Pandas.
-
- More detailed information can be found in the Netdata documentation [here](https://learn.netdata.cloud/docs/data-collection/generic-data-collection/structured-data-pandas).
method_description: |
The collector uses [pandas](https://pandas.pydata.org/) to pull data and do pandas-based preprocessing, before feeding to Netdata.
supported_platforms:
@@ -92,11 +90,11 @@ modules:
default_value: None
required: true
- name: chart_configs.family
- description: "[family](https://learn.netdata.cloud/docs/data-collection/chart-dimensions-contexts-and-families#family) of the chart to be displayed in the dashboard."
+ description: "[family](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#families) of the chart to be displayed in the dashboard."
default_value: None
required: true
- name: chart_configs.context
- description: "[context](https://learn.netdata.cloud/docs/data-collection/chart-dimensions-contexts-and-families#context) of the chart to be displayed in the dashboard."
+ description: "[context](https://github.com/netdata/netdata/blob/master/docs/cloud/visualize/interact-new-charts.md#contexts) of the chart to be displayed in the dashboard."
default_value: None
required: true
- name: chart_configs.type
diff --git a/collectors/python.d.plugin/postfix/README.md b/collectors/python.d.plugin/postfix/README.md
deleted file mode 100644
index ba5565499a65b3..00000000000000
--- a/collectors/python.d.plugin/postfix/README.md
+++ /dev/null
@@ -1,59 +0,0 @@
-
-
-# Postfix collector
-
-Monitors MTA email queue statistics using [postqueue](http://www.postfix.org/postqueue.1.html) tool.
-
-The collector executes `postqueue -p` to get Postfix queue statistics.
-
-## Requirements
-
-Postfix has internal access controls that limit activities on the mail queue. By default, all users are allowed to view
-the queue. If your system is configured with stricter access controls, you need to grant the `netdata` user access to
-view the mail queue. In order to do it, add `netdata` to `authorized_mailq_users` in the `/etc/postfix/main.cf` file.
-
-See the `authorized_mailq_users` setting in
-the [Postfix documentation](https://www.postfix.org/postconf.5.html) for more details.
-
-## Charts
-
-It produces only two charts:
-
-1. **Postfix Queue Emails**
-
- - emails
-
-2. **Postfix Queue Emails Size** in KB
-
- - size
-
-## Configuration
-
-Configuration is not needed.
-### Troubleshooting
-
-To troubleshoot issues with the `postfix` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `postfix` module in debug mode:
-
-```bash
-./python.d.plugin postfix debug trace
-```
-
diff --git a/collectors/python.d.plugin/postfix/README.md b/collectors/python.d.plugin/postfix/README.md
new file mode 120000
index 00000000000000..c62eb5c247bb07
--- /dev/null
+++ b/collectors/python.d.plugin/postfix/README.md
@@ -0,0 +1 @@
+integrations/postfix.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/postfix/integrations/postfix.md b/collectors/python.d.plugin/postfix/integrations/postfix.md
new file mode 100644
index 00000000000000..2bb99922c7744f
--- /dev/null
+++ b/collectors/python.d.plugin/postfix/integrations/postfix.md
@@ -0,0 +1,151 @@
+
+
+# Postfix
+
+
+
+
+
+Plugin: python.d.plugin
+Module: postfix
+
+
+
+## Overview
+
+Keep an eye on Postfix metrics for efficient mail server operations.
+Improve your mail server performance with Netdata's real-time metrics and built-in alerts.
+
+
+Monitors MTA email queue statistics using [postqueue](http://www.postfix.org/postqueue.1.html) tool.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+Postfix has internal access controls that limit activities on the mail queue. By default, all users are allowed to view the queue. If your system is configured with stricter access controls, you need to grant the `netdata` user access to view the mail queue. In order to do it, add `netdata` to `authorized_mailq_users` in the `/etc/postfix/main.cf` file.
+See the `authorized_mailq_users` setting in the [Postfix documentation](https://www.postfix.org/postconf.5.html) for more details.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The collector executes `postqueue -p` to get Postfix queue statistics.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Postfix instance
+
+These metrics refer to the entire monitored application.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| postfix.qemails | emails | emails |
+| postfix.qsize | size | KiB |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+There is no configuration file.
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `postfix` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin postfix debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/puppet/README.md b/collectors/python.d.plugin/puppet/README.md
deleted file mode 100644
index 3b0c55b97d9336..00000000000000
--- a/collectors/python.d.plugin/puppet/README.md
+++ /dev/null
@@ -1,90 +0,0 @@
-
-
-# Puppet collector
-
-Monitor status of Puppet Server and Puppet DB.
-
-Following charts are drawn:
-
-1. **JVM Heap**
-
- - committed (allocated from OS)
- - used (actual use)
-
-2. **JVM Non-Heap**
-
- - committed (allocated from OS)
- - used (actual use)
-
-3. **CPU Usage**
-
- - execution
- - GC (taken by garbage collection)
-
-4. **File Descriptors**
-
- - max
- - used
-
-## Configuration
-
-Edit the `python.d/puppet.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/puppet.conf
-```
-
-```yaml
-puppetdb:
- url: 'https://fqdn.example.com:8081'
- tls_cert_file: /path/to/client.crt
- tls_key_file: /path/to/client.key
- autodetection_retry: 1
-
-puppetserver:
- url: 'https://fqdn.example.com:8140'
- autodetection_retry: 1
-```
-
-When no configuration is given, module uses `https://fqdn.example.com:8140`.
-
-### notes
-
-- Exact Fully Qualified Domain Name of the node should be used.
-- Usually Puppet Server/DB startup time is VERY long. So, there should
- be quite reasonable retry count.
-- Secure PuppetDB config may require client certificate. Not applies
- to default PuppetDB configuration though.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `puppet` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `puppet` module in debug mode:
-
-```bash
-./python.d.plugin puppet debug trace
-```
-
diff --git a/collectors/python.d.plugin/puppet/README.md b/collectors/python.d.plugin/puppet/README.md
new file mode 120000
index 00000000000000..b6c4c83f930244
--- /dev/null
+++ b/collectors/python.d.plugin/puppet/README.md
@@ -0,0 +1 @@
+integrations/puppet.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/puppet/integrations/puppet.md b/collectors/python.d.plugin/puppet/integrations/puppet.md
new file mode 100644
index 00000000000000..ca190b576c53de
--- /dev/null
+++ b/collectors/python.d.plugin/puppet/integrations/puppet.md
@@ -0,0 +1,215 @@
+
+
+# Puppet
+
+
+
+
+
+Plugin: python.d.plugin
+Module: puppet
+
+
+
+## Overview
+
+This collector monitors Puppet metrics about JVM Heap, Non-Heap, CPU usage and file descriptors.'
+
+
+It uses Puppet's metrics API endpoint to gather the metrics.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, this collector will use `https://fqdn.example.com:8140` as the URL to look for metrics.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Puppet instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| puppet.jvm | committed, used | MiB |
+| puppet.jvm | committed, used | MiB |
+| puppet.cpu | execution, GC | percentage |
+| puppet.fdopen | used | descriptors |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/puppet.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/puppet.conf
+```
+#### Options
+
+This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior.
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+> Notes:
+> - Exact Fully Qualified Domain Name of the node should be used.
+> - Usually Puppet Server/DB startup time is VERY long. So, there should be quite reasonable retry count.
+> - A secured PuppetDB config may require a client certificate. This does not apply to the default PuppetDB configuration though.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| url | HTTP or HTTPS URL, exact Fully Qualified Domain Name of the node should be used. | https://fqdn.example.com:8081 | yes |
+| tls_verify | Control HTTPS server certificate verification. | False | no |
+| tls_ca_file | Optional CA (bundle) file to use | | no |
+| tls_cert_file | Optional client certificate file | | no |
+| tls_key_file | Optional client key file | | no |
+| update_every | Sets the default data collection frequency. | 30 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration
+
+```yaml
+puppetserver:
+ url: 'https://fqdn.example.com:8140'
+ autodetection_retry: 1
+
+```
+##### TLS Certificate
+
+An example using a TLS certificate
+
+Config
+
+```yaml
+puppetdb:
+ url: 'https://fqdn.example.com:8081'
+ tls_cert_file: /path/to/client.crt
+ tls_key_file: /path/to/client.key
+ autodetection_retry: 1
+
+```
+
+
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+puppetserver1:
+ url: 'https://fqdn.example.com:8140'
+ autodetection_retry: 1
+
+puppetserver2:
+ url: 'https://fqdn.example2.com:8140'
+ autodetection_retry: 1
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `puppet` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin puppet debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/python.d.plugin.in b/collectors/python.d.plugin/python.d.plugin.in
index 681ceb403ae509..86fea209c5d488 100644
--- a/collectors/python.d.plugin/python.d.plugin.in
+++ b/collectors/python.d.plugin/python.d.plugin.in
@@ -222,8 +222,11 @@ class ModuleConfig:
def __init__(self, name, config=None):
self.name = name
self.config = config or OrderedDict()
+ self.is_stock = False
def load(self, abs_path):
+ if not IS_ATTY:
+ self.is_stock = abs_path.startswith(DIRS.modules_stock_config)
self.config.update(load_config(abs_path) or dict())
def defaults(self):
@@ -242,6 +245,7 @@ class ModuleConfig:
config = OrderedDict()
config.update(job_config)
config['job_name'] = job_name
+ config['__is_stock'] = self.is_stock
for k, v in self.defaults().items():
config.setdefault(k, v)
@@ -309,7 +313,8 @@ class JobsConfigsBuilder:
return None
configs = config.create_jobs()
- self.log.info("[{0}] built {1} job(s) configs".format(module_name, len(configs)))
+ if not config.is_stock:
+ self.log.info("[{0}] built {1} job(s) configs".format(module_name, len(configs)))
self.apply_defaults(configs, self.module_defaults)
self.apply_defaults(configs, self.job_defaults)
@@ -338,6 +343,7 @@ class Job(threading.Thread):
self.autodetection_retry = config['autodetection_retry']
self.checks = self.inf
self.job = None
+ self.is_stock = config.get('__is_stock', False)
self.status = JOB_STATUS_INIT
def is_inited(self):
@@ -350,8 +356,14 @@ class Job(threading.Thread):
return self.job.name
def check(self):
+ if self.is_stock:
+ self.job.logger.mute()
+
ok = self.job.check()
+
+ self.job.logger.unmute()
self.checks -= self.checks != self.inf and not ok
+
return ok
def create(self):
@@ -503,7 +515,6 @@ class FileLockRegistry:
name = "docker" + name[7:]
return name
-
def register(self, name):
name = self.rename(name)
if name in self.locks:
@@ -582,8 +593,8 @@ class Plugin:
try:
statuses = JobsStatuses().from_file(abs_path)
except Exception as error:
- self.log.error("[{0}] config file invalid YAML format: {1}".format(
- module_name, ' '.join([v.strip() for v in str(error).split('\n')])))
+ self.log.error("'{0}' invalid JSON format: {1}".format(
+ abs_path, ' '.join([v.strip() for v in str(error).split('\n')])))
return None
self.log.debug("'{0}' is loaded".format(abs_path))
return statuses
@@ -685,12 +696,14 @@ class Plugin:
try:
ok = job.check()
except Exception as error:
- self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format(
- job.module_name, job.real_name, repr(error)))
+ if not job.is_stock:
+ self.log.warning("{0}[{1}] : unhandled exception on check : {2}, skipping the job".format(
+ job.module_name, job.real_name, repr(error)))
job.status = JOB_STATUS_DROPPED
continue
if not ok:
- self.log.info('{0}[{1}] : check failed'.format(job.module_name, job.real_name))
+ if not job.is_stock:
+ self.log.info('{0}[{1}] : check failed'.format(job.module_name, job.real_name))
job.status = JOB_STATUS_RECOVERING if job.need_to_recheck() else JOB_STATUS_DROPPED
continue
self.log.info('{0}[{1}] : check success'.format(job.module_name, job.real_name))
@@ -876,6 +889,17 @@ def main():
cmd = parse_command_line()
log = PythonDLogger()
+ level = os.getenv('NETDATA_LOG_LEVEL') or str()
+ level = level.lower()
+ if level == 'debug':
+ log.logger.severity = 'DEBUG'
+ elif level == 'info':
+ log.logger.severity = 'INFO'
+ elif level == 'warn' or level == 'warning':
+ log.logger.severity = 'WARNING'
+ elif level == 'err' or level == 'error':
+ log.logger.severity = 'ERROR'
+
if cmd.debug:
log.logger.severity = 'DEBUG'
if cmd.trace:
diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py
index a7acc23b664590..3f122e1d9de7c1 100644
--- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py
+++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/SimpleService.py
@@ -8,7 +8,7 @@
from bases.charts import Charts, ChartError, create_runtime_chart
from bases.collection import safe_print
-from bases.loggers import PythonDLimitedLogger
+from bases.loggers import PythonDLogger
from third_party.monotonic import monotonic
from time import sleep, time
@@ -62,7 +62,7 @@ def clean_module_name(name):
return name
-class SimpleService(PythonDLimitedLogger, object):
+class SimpleService(PythonDLogger, object):
"""
Prototype of Service class.
Implemented basic functionality to run jobs by `python.d.plugin`
@@ -73,7 +73,7 @@ def __init__(self, configuration, name=''):
:param configuration:
:param name:
"""
- PythonDLimitedLogger.__init__(self)
+ PythonDLogger.__init__(self)
self.configuration = configuration
self.order = list()
self.definitions = dict()
diff --git a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py
index 1faf036a4bac53..76129d376fe620 100644
--- a/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py
+++ b/collectors/python.d.plugin/python_modules/bases/FrameworkServices/UrlService.py
@@ -6,8 +6,6 @@
import urllib3
-from distutils.version import StrictVersion as version
-
from bases.FrameworkServices.SimpleService import SimpleService
try:
@@ -15,28 +13,11 @@
except AttributeError:
pass
-# https://github.com/urllib3/urllib3/blob/master/CHANGES.rst#19-2014-07-04
-# New retry logic and urllib3.util.retry.Retry configuration object. (Issue https://github.com/urllib3/urllib3/pull/326)
-URLLIB3_MIN_REQUIRED_VERSION = '1.9'
URLLIB3_VERSION = urllib3.__version__
URLLIB3 = 'urllib3'
-
-def version_check():
- if version(URLLIB3_VERSION) >= version(URLLIB3_MIN_REQUIRED_VERSION):
- return
-
- err = '{0} version: {1}, minimum required version: {2}, please upgrade'.format(
- URLLIB3,
- URLLIB3_VERSION,
- URLLIB3_MIN_REQUIRED_VERSION,
- )
- raise Exception(err)
-
-
class UrlService(SimpleService):
def __init__(self, configuration=None, name=None):
- version_check()
SimpleService.__init__(self, configuration=configuration, name=name)
self.debug("{0} version: {1}".format(URLLIB3, URLLIB3_VERSION))
self.url = self.configuration.get('url')
diff --git a/collectors/python.d.plugin/python_modules/bases/loggers.py b/collectors/python.d.plugin/python_modules/bases/loggers.py
index 47f196a6de32e0..7ae8ab0c12a288 100644
--- a/collectors/python.d.plugin/python_modules/bases/loggers.py
+++ b/collectors/python.d.plugin/python_modules/bases/loggers.py
@@ -4,6 +4,8 @@
# SPDX-License-Identifier: GPL-3.0-or-later
import logging
+import os
+import stat
import traceback
from sys import exc_info
@@ -15,39 +17,46 @@
from bases.collection import on_try_except_finally, unicode_str
+LOGGING_LEVELS = {
+ 'CRITICAL': 50,
+ 'ERROR': 40,
+ 'WARNING': 30,
+ 'INFO': 20,
+ 'DEBUG': 10,
+ 'NOTSET': 0,
+}
-LOGGING_LEVELS = {'CRITICAL': 50,
- 'ERROR': 40,
- 'WARNING': 30,
- 'INFO': 20,
- 'DEBUG': 10,
- 'NOTSET': 0}
-DEFAULT_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s : %(message)s'
-DEFAULT_LOG_TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
+def is_stderr_connected_to_journal():
+ journal_stream = os.environ.get("JOURNAL_STREAM")
+ if not journal_stream:
+ return False
-PYTHON_D_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s'
-PYTHON_D_LOG_NAME = 'python.d'
+ colon_index = journal_stream.find(":")
+ if colon_index <= 0:
+ return False
+ device, inode = journal_stream[:colon_index], journal_stream[colon_index + 1:]
-def limiter(log_max_count=30, allowed_in_seconds=60):
- def on_decorator(func):
+ try:
+ device_number, inode_number = os.fstat(2)[stat.ST_DEV], os.fstat(2)[stat.ST_INO]
+ except OSError:
+ return False
- def on_call(*args):
- current_time = args[0]._runtime_counters.start_mono
- lc = args[0]._logger_counters
+ return str(device_number) == device and str(inode_number) == inode
- if lc.logged and lc.logged % log_max_count == 0:
- if current_time - lc.time_to_compare <= allowed_in_seconds:
- lc.dropped += 1
- return
- lc.time_to_compare = current_time
- lc.logged += 1
- func(*args)
+is_journal = is_stderr_connected_to_journal()
+
+DEFAULT_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s : %(message)s'
+PYTHON_D_LOG_LINE_FORMAT = '%(asctime)s: %(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s'
+
+if is_journal:
+ DEFAULT_LOG_LINE_FORMAT = '%(name)s %(levelname)s : %(message)s'
+ PYTHON_D_LOG_LINE_FORMAT = '%(name)s %(levelname)s: %(module_name)s[%(job_name)s] : %(message)s '
- return on_call
- return on_decorator
+DEFAULT_LOG_TIME_FORMAT = '%Y-%m-%d %H:%M:%S'
+PYTHON_D_LOG_NAME = 'python.d'
def add_traceback(func):
@@ -66,27 +75,16 @@ def on_call(*args):
return on_call
-class LoggerCounters:
- def __init__(self):
- self.logged = 0
- self.dropped = 0
- self.time_to_compare = time()
-
- def __repr__(self):
- return 'LoggerCounter(logged: {logged}, dropped: {dropped})'.format(logged=self.logged,
- dropped=self.dropped)
-
-
class BaseLogger(object):
- def __init__(self, logger_name, log_fmt=DEFAULT_LOG_LINE_FORMAT, date_fmt=DEFAULT_LOG_TIME_FORMAT,
- handler=logging.StreamHandler):
- """
- :param logger_name:
- :param log_fmt:
- :param date_fmt:
- :param handler:
- """
+ def __init__(
+ self,
+ logger_name,
+ log_fmt=DEFAULT_LOG_LINE_FORMAT,
+ date_fmt=DEFAULT_LOG_TIME_FORMAT,
+ handler=logging.StreamHandler,
+ ):
self.logger = logging.getLogger(logger_name)
+ self._muted = False
if not self.has_handlers():
self.severity = 'INFO'
self.logger.addHandler(handler())
@@ -96,11 +94,6 @@ def __repr__(self):
return ''.format(name=self.logger.name)
def set_formatter(self, fmt, date_fmt=DEFAULT_LOG_TIME_FORMAT):
- """
- :param fmt:
- :param date_fmt:
- :return:
- """
if self.has_handlers():
self.logger.handlers[0].setFormatter(logging.Formatter(fmt=fmt, datefmt=date_fmt))
@@ -113,43 +106,48 @@ def severity(self):
@severity.setter
def severity(self, level):
- """
- :param level: or
- :return:
- """
if level in LOGGING_LEVELS:
self.logger.setLevel(LOGGING_LEVELS[level])
+ def _log(self, level, *msg, **kwargs):
+ if not self._muted:
+ self.logger.log(level, ' '.join(map(unicode_str, msg)), **kwargs)
+
def debug(self, *msg, **kwargs):
- self.logger.debug(' '.join(map(unicode_str, msg)), **kwargs)
+ self._log(logging.DEBUG, *msg, **kwargs)
def info(self, *msg, **kwargs):
- self.logger.info(' '.join(map(unicode_str, msg)), **kwargs)
+ self._log(logging.INFO, *msg, **kwargs)
def warning(self, *msg, **kwargs):
- self.logger.warning(' '.join(map(unicode_str, msg)), **kwargs)
+ self._log(logging.WARN, *msg, **kwargs)
def error(self, *msg, **kwargs):
- self.logger.error(' '.join(map(unicode_str, msg)), **kwargs)
+ self._log(logging.ERROR, *msg, **kwargs)
- def alert(self, *msg, **kwargs):
- self.logger.critical(' '.join(map(unicode_str, msg)), **kwargs)
+ def alert(self, *msg, **kwargs):
+ self._log(logging.CRITICAL, *msg, **kwargs)
@on_try_except_finally(on_finally=(exit, 1))
def fatal(self, *msg, **kwargs):
- self.logger.critical(' '.join(map(unicode_str, msg)), **kwargs)
+ self._log(logging.CRITICAL, *msg, **kwargs)
+
+ def mute(self):
+ self._muted = True
+
+ def unmute(self):
+ self._muted = False
class PythonDLogger(object):
- def __init__(self, logger_name=PYTHON_D_LOG_NAME, log_fmt=PYTHON_D_LOG_LINE_FORMAT):
- """
- :param logger_name:
- :param log_fmt:
- """
+ def __init__(
+ self,
+ logger_name=PYTHON_D_LOG_NAME,
+ log_fmt=PYTHON_D_LOG_LINE_FORMAT,
+ ):
self.logger = BaseLogger(logger_name, log_fmt=log_fmt)
self.module_name = 'plugin'
self.job_name = 'main'
- self._logger_counters = LoggerCounters()
_LOG_TRACEBACK = False
@@ -162,45 +160,39 @@ def log_traceback(self, value):
PythonDLogger._LOG_TRACEBACK = value
def debug(self, *msg):
- self.logger.debug(*msg, extra={'module_name': self.module_name,
- 'job_name': self.job_name or self.module_name})
+ self.logger.debug(*msg, extra={
+ 'module_name': self.module_name,
+ 'job_name': self.job_name or self.module_name,
+ })
def info(self, *msg):
- self.logger.info(*msg, extra={'module_name': self.module_name,
- 'job_name': self.job_name or self.module_name})
+ self.logger.info(*msg, extra={
+ 'module_name': self.module_name,
+ 'job_name': self.job_name or self.module_name,
+ })
def warning(self, *msg):
- self.logger.warning(*msg, extra={'module_name': self.module_name,
- 'job_name': self.job_name or self.module_name})
+ self.logger.warning(*msg, extra={
+ 'module_name': self.module_name,
+ 'job_name': self.job_name or self.module_name,
+ })
@add_traceback
def error(self, *msg):
- self.logger.error(*msg, extra={'module_name': self.module_name,
- 'job_name': self.job_name or self.module_name})
+ self.logger.error(*msg, extra={
+ 'module_name': self.module_name,
+ 'job_name': self.job_name or self.module_name,
+ })
@add_traceback
def alert(self, *msg):
- self.logger.alert(*msg, extra={'module_name': self.module_name,
- 'job_name': self.job_name or self.module_name})
+ self.logger.alert(*msg, extra={
+ 'module_name': self.module_name,
+ 'job_name': self.job_name or self.module_name,
+ })
def fatal(self, *msg):
- self.logger.fatal(*msg, extra={'module_name': self.module_name,
- 'job_name': self.job_name or self.module_name})
-
-
-class PythonDLimitedLogger(PythonDLogger):
- @limiter()
- def info(self, *msg):
- PythonDLogger.info(self, *msg)
-
- @limiter()
- def warning(self, *msg):
- PythonDLogger.warning(self, *msg)
-
- @limiter()
- def error(self, *msg):
- PythonDLogger.error(self, *msg)
-
- @limiter()
- def alert(self, *msg):
- PythonDLogger.alert(self, *msg)
+ self.logger.fatal(*msg, extra={
+ 'module_name': self.module_name,
+ 'job_name': self.job_name or self.module_name,
+ })
diff --git a/collectors/python.d.plugin/rethinkdbs/README.md b/collectors/python.d.plugin/rethinkdbs/README.md
deleted file mode 100644
index 527ce4c316c146..00000000000000
--- a/collectors/python.d.plugin/rethinkdbs/README.md
+++ /dev/null
@@ -1,77 +0,0 @@
-
-
-# RethinkDB collector
-
-Collects database server and cluster statistics.
-
-Following charts are drawn:
-
-1. **Connected Servers**
-
- - connected
- - missing
-
-2. **Active Clients**
-
- - active
-
-3. **Queries** per second
-
- - queries
-
-4. **Documents** per second
-
- - documents
-
-## Configuration
-
-Edit the `python.d/rethinkdbs.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically
-at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/rethinkdbs.conf
-```
-
-```yaml
-localhost:
- name: 'local'
- host: '127.0.0.1'
- port: 28015
- user: "user"
- password: "pass"
-```
-
-When no configuration file is found, module tries to connect to `127.0.0.1:28015`.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `rethinkdbs` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `rethinkdbs` module in debug mode:
-
-```bash
-./python.d.plugin rethinkdbs debug trace
-```
-
diff --git a/collectors/python.d.plugin/rethinkdbs/README.md b/collectors/python.d.plugin/rethinkdbs/README.md
new file mode 120000
index 00000000000000..78ddcfa18e2c66
--- /dev/null
+++ b/collectors/python.d.plugin/rethinkdbs/README.md
@@ -0,0 +1 @@
+integrations/rethinkdb.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md b/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md
new file mode 100644
index 00000000000000..ab51c05149d621
--- /dev/null
+++ b/collectors/python.d.plugin/rethinkdbs/integrations/rethinkdb.md
@@ -0,0 +1,190 @@
+
+
+# RethinkDB
+
+
+
+
+
+Plugin: python.d.plugin
+Module: rethinkdbs
+
+
+
+## Overview
+
+This collector monitors metrics about RethinkDB clusters and database servers.
+
+It uses the `rethinkdb` python module to connect to a RethinkDB server instance and gather statistics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+When no configuration file is found, the collector tries to connect to 127.0.0.1:28015.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per RethinkDB instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| rethinkdb.cluster_connected_servers | connected, missing | servers |
+| rethinkdb.cluster_clients_active | active | clients |
+| rethinkdb.cluster_queries | queries | queries/s |
+| rethinkdb.cluster_documents | reads, writes | documents/s |
+
+### Per database server
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| rethinkdb.client_connections | connections | connections |
+| rethinkdb.clients_active | active | clients |
+| rethinkdb.queries | queries | queries/s |
+| rethinkdb.documents | reads, writes | documents/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Required python module
+
+The collector requires the `rethinkdb` python module to be installed.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/rethinkdbs.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/rethinkdbs.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| host | Hostname or ip of the RethinkDB server. | localhost | no |
+| port | Port to connect to the RethinkDB server. | 28015 | no |
+| user | The username to use to connect to the RethinkDB server. | admin | no |
+| password | The password to use to connect to the RethinkDB server. | | no |
+| timeout | Set a connect timeout to the RethinkDB server. | 2 | no |
+
+
+
+#### Examples
+
+##### Local RethinkDB server
+
+An example of a configuration for a local RethinkDB server
+
+```yaml
+localhost:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 28015
+ user: "user"
+ password: "pass"
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `rethinkdbs` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin rethinkdbs debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/retroshare/README.md b/collectors/python.d.plugin/retroshare/README.md
deleted file mode 100644
index b7f2fcb14d4b47..00000000000000
--- a/collectors/python.d.plugin/retroshare/README.md
+++ /dev/null
@@ -1,70 +0,0 @@
-
-
-# RetroShare collector
-
-Monitors application bandwidth, peers and DHT metrics.
-
-This module will monitor one or more `RetroShare` applications, depending on your configuration.
-
-## Charts
-
-This module produces the following charts:
-
-- Bandwidth in `kilobits/s`
-- Peers in `peers`
-- DHT in `peers`
-
-
-## Configuration
-
-Edit the `python.d/retroshare.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/retroshare.conf
-```
-
-Here is an example for 2 servers:
-
-```yaml
-localhost:
- url : 'http://localhost:9090'
- user : "user"
- password : "pass"
-
-remote:
- url : 'http://203.0.113.1:9090'
- user : "user"
- password : "pass"
-```
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `retroshare` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `retroshare` module in debug mode:
-
-```bash
-./python.d.plugin retroshare debug trace
-```
-
diff --git a/collectors/python.d.plugin/retroshare/README.md b/collectors/python.d.plugin/retroshare/README.md
new file mode 120000
index 00000000000000..4e4c2cdb74d520
--- /dev/null
+++ b/collectors/python.d.plugin/retroshare/README.md
@@ -0,0 +1 @@
+integrations/retroshare.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/retroshare/integrations/retroshare.md b/collectors/python.d.plugin/retroshare/integrations/retroshare.md
new file mode 100644
index 00000000000000..4fc003c6f909eb
--- /dev/null
+++ b/collectors/python.d.plugin/retroshare/integrations/retroshare.md
@@ -0,0 +1,191 @@
+
+
+# RetroShare
+
+
+
+
+
+Plugin: python.d.plugin
+Module: retroshare
+
+
+
+## Overview
+
+This collector monitors RetroShare statistics such as application bandwidth, peers, and DHT metrics.
+
+It connects to the RetroShare web interface to gather metrics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The collector will attempt to connect and detect a RetroShare web interface through http://localhost:9090, even without any configuration.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per RetroShare instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| retroshare.bandwidth | Upload, Download | kilobits/s |
+| retroshare.peers | All friends, Connected friends | peers |
+| retroshare.dht | DHT nodes estimated, RS nodes estimated | peers |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ retroshare_dht_working ](https://github.com/netdata/netdata/blob/master/health/health.d/retroshare.conf) | retroshare.dht | number of DHT peers |
+
+
+## Setup
+
+### Prerequisites
+
+#### RetroShare web interface
+
+RetroShare needs to be configured to enable the RetroShare WEB Interface and allow access from the Netdata host.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/retroshare.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/retroshare.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| url | The URL to the RetroShare Web UI. | http://localhost:9090 | no |
+
+
+
+#### Examples
+
+##### Local RetroShare Web UI
+
+A basic configuration for a RetroShare server running on localhost.
+
+Config
+
+```yaml
+localhost:
+ name: 'local retroshare'
+ url: 'http://localhost:9090'
+
+```
+
+
+##### Remote RetroShare Web UI
+
+A basic configuration for a remote RetroShare server.
+
+Config
+
+```yaml
+remote:
+ name: 'remote retroshare'
+ url: 'http://1.2.3.4:9090'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `retroshare` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin retroshare debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/riakkv/README.md b/collectors/python.d.plugin/riakkv/README.md
deleted file mode 100644
index e822c551ebae8b..00000000000000
--- a/collectors/python.d.plugin/riakkv/README.md
+++ /dev/null
@@ -1,149 +0,0 @@
-
-
-# Riak KV collector
-
-Collects database stats from `/stats` endpoint.
-
-## Requirements
-
-- An accessible `/stats` endpoint. See [the Riak KV configuration reference documentation](https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces)
- for how to enable this.
-
-The following charts are included, which are mostly derived from the metrics
-listed
-[here](https://docs.riak.com/riak/kv/latest/using/reference/statistics-monitoring/index.html#riak-metrics-to-graph).
-
-1. **Throughput** in operations/s
-
-- **KV operations**
- - gets
- - puts
-
-- **Data type updates**
- - counters
- - sets
- - maps
-
-- **Search queries**
- - queries
-
-- **Search documents**
- - indexed
-
-- **Strong consistency operations**
- - gets
- - puts
-
-2. **Latency** in milliseconds
-
-- **KV latency** of the past minute
- - get (mean, median, 95th / 99th / 100th percentile)
- - put (mean, median, 95th / 99th / 100th percentile)
-
-- **Data type latency** of the past minute
- - counter_merge (mean, median, 95th / 99th / 100th percentile)
- - set_merge (mean, median, 95th / 99th / 100th percentile)
- - map_merge (mean, median, 95th / 99th / 100th percentile)
-
-- **Search latency** of the past minute
- - query (median, min, max, 95th / 99th percentile)
- - index (median, min, max, 95th / 99th percentile)
-
-- **Strong consistency latency** of the past minute
- - get (mean, median, 95th / 99th / 100th percentile)
- - put (mean, median, 95th / 99th / 100th percentile)
-
-3. **Erlang VM metrics**
-
-- **System counters**
- - processes
-
-- **Memory allocation** in MB
- - processes.allocated
- - processes.used
-
-4. **General load / health metrics**
-
-- **Siblings encountered in KV operations** during the past minute
- - get (mean, median, 95th / 99th / 100th percentile)
-
-- **Object size in KV operations** during the past minute in KB
- - get (mean, median, 95th / 99th / 100th percentile)
-
-- **Message queue length** in unprocessed messages
- - vnodeq_size (mean, median, 95th / 99th / 100th percentile)
-
-- **Index operations** encountered by Search
- - errors
-
-- **Protocol buffer connections**
- - active
-
-- **Repair operations coordinated by this node**
- - read
-
-- **Active finite state machines by kind**
- - get
- - put
- - secondary_index
- - list_keys
-
-- **Rejected finite state machines**
- - get
- - put
-
-- **Number of writes to Search failed due to bad data format by reason**
- - bad_entry
- - extract_fail
-
-## Configuration
-
-Edit the `python.d/riakkv.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/riakkv.conf
-```
-
-The module needs to be passed the full URL to Riak's stats endpoint.
-For example:
-
-```yaml
-myriak:
- url: http://myriak.example.com:8098/stats
-```
-
-With no explicit configuration given, the module will attempt to connect to
-`http://localhost:8098/stats`.
-
-The default update frequency for the plugin is set to 2 seconds as Riak
-internally updates the metrics every second. If we were to update the metrics
-every second, the resulting graph would contain odd jitter.
-### Troubleshooting
-
-To troubleshoot issues with the `riakkv` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `riakkv` module in debug mode:
-
-```bash
-./python.d.plugin riakkv debug trace
-```
-
diff --git a/collectors/python.d.plugin/riakkv/README.md b/collectors/python.d.plugin/riakkv/README.md
new file mode 120000
index 00000000000000..f43ece09ba3b0f
--- /dev/null
+++ b/collectors/python.d.plugin/riakkv/README.md
@@ -0,0 +1 @@
+integrations/riakkv.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/riakkv/integrations/riakkv.md b/collectors/python.d.plugin/riakkv/integrations/riakkv.md
new file mode 100644
index 00000000000000..2e8279bc3122f9
--- /dev/null
+++ b/collectors/python.d.plugin/riakkv/integrations/riakkv.md
@@ -0,0 +1,220 @@
+
+
+# RiakKV
+
+
+
+
+
+Plugin: python.d.plugin
+Module: riakkv
+
+
+
+## Overview
+
+This collector monitors RiakKV metrics about throughput, latency, resources and more.'
+
+
+This collector reads the database stats from the `/stats` endpoint.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If the /stats endpoint is accessible, RiakKV instances on the local host running on port 8098 will be autodetected.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per RiakKV instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| riak.kv.throughput | gets, puts | operations/s |
+| riak.dt.vnode_updates | counters, sets, maps | operations/s |
+| riak.search | queries | queries/s |
+| riak.search.documents | indexed | documents/s |
+| riak.consistent.operations | gets, puts | operations/s |
+| riak.kv.latency.get | mean, median, 95, 99, 100 | ms |
+| riak.kv.latency.put | mean, median, 95, 99, 100 | ms |
+| riak.dt.latency.counter_merge | mean, median, 95, 99, 100 | ms |
+| riak.dt.latency.set_merge | mean, median, 95, 99, 100 | ms |
+| riak.dt.latency.map_merge | mean, median, 95, 99, 100 | ms |
+| riak.search.latency.query | median, min, 95, 99, 999, max | ms |
+| riak.search.latency.index | median, min, 95, 99, 999, max | ms |
+| riak.consistent.latency.get | mean, median, 95, 99, 100 | ms |
+| riak.consistent.latency.put | mean, median, 95, 99, 100 | ms |
+| riak.vm | processes | total |
+| riak.vm.memory.processes | allocated, used | MB |
+| riak.kv.siblings_encountered.get | mean, median, 95, 99, 100 | siblings |
+| riak.kv.objsize.get | mean, median, 95, 99, 100 | KB |
+| riak.search.vnodeq_size | mean, median, 95, 99, 100 | messages |
+| riak.search.index | errors | errors |
+| riak.core.protobuf_connections | active | connections |
+| riak.core.repairs | read | repairs |
+| riak.core.fsm_active | get, put, secondary index, list keys | fsms |
+| riak.core.fsm_rejected | get, put | fsms |
+| riak.search.index | bad_entry, extract_fail | writes |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ riakkv_1h_kv_get_mean_latency ](https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf) | riak.kv.latency.get | average time between reception of client GET request and subsequent response to client over the last hour |
+| [ riakkv_kv_get_slow ](https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf) | riak.kv.latency.get | average time between reception of client GET request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour |
+| [ riakkv_1h_kv_put_mean_latency ](https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf) | riak.kv.latency.put | average time between reception of client PUT request and subsequent response to the client over the last hour |
+| [ riakkv_kv_put_slow ](https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf) | riak.kv.latency.put | average time between reception of client PUT request and subsequent response to the client over the last 3 minutes, compared to the average over the last hour |
+| [ riakkv_vm_high_process_count ](https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf) | riak.vm | number of processes running in the Erlang VM |
+| [ riakkv_list_keys_active ](https://github.com/netdata/netdata/blob/master/health/health.d/riakkv.conf) | riak.core.fsm_active | number of currently running list keys finite state machines |
+
+
+## Setup
+
+### Prerequisites
+
+#### Configure RiakKV to enable /stats endpoint
+
+You can follow the RiakKV configuration reference documentation for how to enable this.
+
+Source : https://docs.riak.com/riak/kv/2.2.3/configuring/reference/#client-interfaces
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/riakkv.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/riakkv.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| url | The url of the server | no | yes |
+
+
+
+#### Examples
+
+##### Basic (default)
+
+A basic example configuration per job
+
+```yaml
+local:
+url: 'http://localhost:8098/stats'
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+local:
+ url: 'http://localhost:8098/stats'
+
+remote:
+ url: 'http://192.0.2.1:8098/stats'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `riakkv` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin riakkv debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/samba/README.md b/collectors/python.d.plugin/samba/README.md
deleted file mode 100644
index 8fe133fd510794..00000000000000
--- a/collectors/python.d.plugin/samba/README.md
+++ /dev/null
@@ -1,144 +0,0 @@
-
-
-# Samba collector
-
-Monitors the performance metrics of Samba file sharing using `smbstatus` command-line tool.
-
-Executed commands:
-
-- `sudo -n smbstatus -P`
-
-## Requirements
-
-- `smbstatus` program
-- `sudo` program
-- `smbd` must be compiled with profiling enabled
-- `smbd` must be started either with the `-P 1` option or inside `smb.conf` using `smbd profiling level`
-
-The module uses `smbstatus`, which can only be executed by `root`. It uses
-`sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a
-password.
-
-- Add to your `/etc/sudoers` file:
-
-`which smbstatus` shows the full path to the binary.
-
-```bash
-netdata ALL=(root) NOPASSWD: /path/to/smbstatus
-```
-
-- Reset Netdata's systemd
- unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux
- distributions with systemd)
-
-The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `smbstatus` using `sudo`.
-
-
-As the `root` user, do the following:
-
-```cmd
-mkdir /etc/systemd/system/netdata.service.d
-echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
-systemctl daemon-reload
-systemctl restart netdata.service
-```
-
-## Charts
-
-1. **Syscall R/Ws** in kilobytes/s
-
- - sendfile
- - recvfile
-
-2. **Smb2 R/Ws** in kilobytes/s
-
- - readout
- - writein
- - readin
- - writeout
-
-3. **Smb2 Create/Close** in operations/s
-
- - create
- - close
-
-4. **Smb2 Info** in operations/s
-
- - getinfo
- - setinfo
-
-5. **Smb2 Find** in operations/s
-
- - find
-
-6. **Smb2 Notify** in operations/s
-
- - notify
-
-7. **Smb2 Lesser Ops** as counters
-
- - tcon
- - negprot
- - tdis
- - cancel
- - logoff
- - flush
- - lock
- - keepalive
- - break
- - sessetup
-
-## Enable the collector
-
-The `samba` collector is disabled by default. To enable it, use `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf`
-file.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d.conf
-```
-
-Change the value of the `samba` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl
-restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
-
-## Configuration
-
-Edit the `python.d/samba.conf` configuration file using `edit-config` from the
-Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/samba.conf
-```
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `samba` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `samba` module in debug mode:
-
-```bash
-./python.d.plugin samba debug trace
-```
-
diff --git a/collectors/python.d.plugin/samba/README.md b/collectors/python.d.plugin/samba/README.md
new file mode 120000
index 00000000000000..3b63bbab6615bf
--- /dev/null
+++ b/collectors/python.d.plugin/samba/README.md
@@ -0,0 +1 @@
+integrations/samba.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/samba/integrations/samba.md b/collectors/python.d.plugin/samba/integrations/samba.md
new file mode 100644
index 00000000000000..1bd1664ee69440
--- /dev/null
+++ b/collectors/python.d.plugin/samba/integrations/samba.md
@@ -0,0 +1,221 @@
+
+
+# Samba
+
+
+
+
+
+Plugin: python.d.plugin
+Module: samba
+
+
+
+## Overview
+
+This collector monitors the performance metrics of Samba file sharing.
+
+It is using the `smbstatus` command-line tool.
+
+Executed commands:
+
+- `sudo -n smbstatus -P`
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+`smbstatus` is used, which can only be executed by `root`. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a password.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+After all the permissions are satisfied, the `smbstatus -P` binary is executed.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Samba instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| syscall.rw | sendfile, recvfile | KiB/s |
+| smb2.rw | readout, writein, readin, writeout | KiB/s |
+| smb2.create_close | create, close | operations/s |
+| smb2.get_set_info | getinfo, setinfo | operations/s |
+| smb2.find | find | operations/s |
+| smb2.notify | notify | operations/s |
+| smb2.sm_counters | tcon, negprot, tdis, cancel, logoff, flush, lock, keepalive, break, sessetup | count |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Enable the samba collector
+
+The `samba` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
+
+```bash
+cd /etc/netdata # Replace this path with your Netdata config directory, if different
+sudo ./edit-config python.d.conf
+```
+Change the value of the `samba` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
+
+
+#### Permissions and programs
+
+To run the collector you need:
+
+- `smbstatus` program
+- `sudo` program
+- `smbd` must be compiled with profiling enabled
+- `smbd` must be started either with the `-P 1` option or inside `smb.conf` using `smbd profiling level`
+
+The module uses `smbstatus`, which can only be executed by `root`. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a password.
+
+- add to your `/etc/sudoers` file:
+
+ `which smbstatus` shows the full path to the binary.
+
+ ```bash
+ netdata ALL=(root) NOPASSWD: /path/to/smbstatus
+ ```
+
+- Reset Netdata's systemd unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux distributions with systemd)
+
+ The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `smbstatus` using `sudo`.
+
+
+ As the `root` user, do the following:
+
+ ```cmd
+ mkdir /etc/systemd/system/netdata.service.d
+ echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
+ systemctl daemon-reload
+ systemctl restart netdata.service
+ ```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/samba.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/samba.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration.
+
+Config
+
+```yaml
+my_job_name:
+ name: my_name
+ update_every: 1
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `samba` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin samba debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/samba/metadata.yaml b/collectors/python.d.plugin/samba/metadata.yaml
index 43bca208ed194c..ec31e047555773 100644
--- a/collectors/python.d.plugin/samba/metadata.yaml
+++ b/collectors/python.d.plugin/samba/metadata.yaml
@@ -23,9 +23,9 @@ modules:
metrics_description: "This collector monitors the performance metrics of Samba file sharing."
method_description: |
It is using the `smbstatus` command-line tool.
-
+
Executed commands:
-
+
- `sudo -n smbstatus -P`
supported_platforms:
include: []
@@ -44,32 +44,41 @@ modules:
setup:
prerequisites:
list:
+ - title: Enable the samba collector
+ description: |
+ The `samba` collector is disabled by default. To enable it, use `edit-config` from the Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`, to edit the `python.d.conf` file.
+
+ ```bash
+ cd /etc/netdata # Replace this path with your Netdata config directory, if different
+ sudo ./edit-config python.d.conf
+ ```
+ Change the value of the `samba` setting to `yes`. Save the file and restart the Netdata Agent with `sudo systemctl restart netdata`, or the [appropriate method](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for your system.
- title: Permissions and programs
description: |
To run the collector you need:
-
+
- `smbstatus` program
- `sudo` program
- `smbd` must be compiled with profiling enabled
- `smbd` must be started either with the `-P 1` option or inside `smb.conf` using `smbd profiling level`
-
+
The module uses `smbstatus`, which can only be executed by `root`. It uses `sudo` and assumes that it is configured such that the `netdata` user can execute `smbstatus` as root without a password.
-
+
- add to your `/etc/sudoers` file:
-
+
`which smbstatus` shows the full path to the binary.
-
+
```bash
netdata ALL=(root) NOPASSWD: /path/to/smbstatus
```
-
+
- Reset Netdata's systemd unit [CapabilityBoundingSet](https://www.freedesktop.org/software/systemd/man/systemd.exec.html#Capabilities) (Linux distributions with systemd)
-
+
The default CapabilityBoundingSet doesn't allow using `sudo`, and is quite strict in general. Resetting is not optimal, but a next-best solution given the inability to execute `smbstatus` using `sudo`.
-
-
+
+
As the `root` user, do the following:
-
+
```cmd
mkdir /etc/systemd/system/netdata.service.d
echo -e '[Service]\nCapabilityBoundingSet=~' | tee /etc/systemd/system/netdata.service.d/unset-capability-bounding-set.conf
@@ -82,14 +91,14 @@ modules:
options:
description: |
There are 2 sections:
-
+
* Global variables
* One or more JOBS that can define multiple different instances to monitor.
-
+
The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
-
+
Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
-
+
Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
folding:
title: "Config options"
diff --git a/collectors/python.d.plugin/sensors/README.md b/collectors/python.d.plugin/sensors/README.md
deleted file mode 100644
index 7ee31bd6728015..00000000000000
--- a/collectors/python.d.plugin/sensors/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-
-
-# Linux machine sensors collector
-
-Reads system sensors information (temperature, voltage, electric current, power, etc.).
-
-Charts are created dynamically.
-
-## Configuration
-
-Edit the `python.d/sensors.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/sensors.conf
-```
-
-### possible issues
-
-There have been reports from users that on certain servers, ACPI ring buffer errors are printed by the kernel (`dmesg`)
-when ACPI sensors are being accessed. We are tracking such cases in
-issue [#827](https://github.com/netdata/netdata/issues/827). Please join this discussion for help.
-
-When `lm-sensors` doesn't work on your device (e.g. for RPi temperatures),
-use [the legacy bash collector](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/sensors/README.md)
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `sensors` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `sensors` module in debug mode:
-
-```bash
-./python.d.plugin sensors debug trace
-```
-
diff --git a/collectors/python.d.plugin/sensors/README.md b/collectors/python.d.plugin/sensors/README.md
new file mode 120000
index 00000000000000..4e92b088274370
--- /dev/null
+++ b/collectors/python.d.plugin/sensors/README.md
@@ -0,0 +1 @@
+integrations/linux_sensors_lm-sensors.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md b/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md
new file mode 100644
index 00000000000000..e426c8c839b989
--- /dev/null
+++ b/collectors/python.d.plugin/sensors/integrations/linux_sensors_lm-sensors.md
@@ -0,0 +1,187 @@
+
+
+# Linux Sensors (lm-sensors)
+
+
+
+
+
+Plugin: python.d.plugin
+Module: sensors
+
+
+
+## Overview
+
+Examine Linux Sensors metrics with Netdata for insights into hardware health and performance.
+
+Enhance your system's reliability with real-time hardware health insights.
+
+
+Reads system sensors information (temperature, voltage, electric current, power, etc.) via [lm-sensors](https://hwmon.wiki.kernel.org/lm_sensors).
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The following type of sensors are auto-detected:
+- temperature - fan - voltage - current - power - energy - humidity
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per chip
+
+Metrics related to chips. Each chip provides a set of the following metrics, each having the chip name in the metric name as reported by `sensors -u`.
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| sensors.temperature | a dimension per sensor | Celsius |
+| sensors.voltage | a dimension per sensor | Volts |
+| sensors.current | a dimension per sensor | Ampere |
+| sensors.power | a dimension per sensor | Watt |
+| sensors.fan | a dimension per sensor | Rotations/min |
+| sensors.energy | a dimension per sensor | Joule |
+| sensors.humidity | a dimension per sensor | Percent |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/sensors.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/sensors.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| types | The types of sensors to collect. | temperature, fan, voltage, current, power, energy, humidity | yes |
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+
+
+
+#### Examples
+
+##### Default
+
+Default configuration.
+
+```yaml
+types:
+ - temperature
+ - fan
+ - voltage
+ - current
+ - power
+ - energy
+ - humidity
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `sensors` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin sensors debug trace
+ ```
+
+### lm-sensors doesn't work on your device
+
+
+
+### ACPI ring buffer errors are printed
+
+
+
+
diff --git a/collectors/python.d.plugin/sensors/metadata.yaml b/collectors/python.d.plugin/sensors/metadata.yaml
index c3f681915a9323..d7cb2206fd4dc1 100644
--- a/collectors/python.d.plugin/sensors/metadata.yaml
+++ b/collectors/python.d.plugin/sensors/metadata.yaml
@@ -117,7 +117,16 @@ modules:
- humidity
troubleshooting:
problems:
- list: []
+ list:
+ - name: lm-sensors doesn't work on your device
+ description: |
+ When `lm-sensors` doesn't work on your device (e.g. for RPi temperatures),
+ use [the legacy bash collector](https://github.com/netdata/netdata/blob/master/collectors/charts.d.plugin/sensors/README.md)
+ - name: ACPI ring buffer errors are printed
+ description: |
+ There have been reports from users that on certain servers, ACPI ring buffer errors are printed by the kernel (`dmesg`)
+ when ACPI sensors are being accessed. We are tracking such cases in issue [#827](https://github.com/netdata/netdata/issues/827).
+ Please join this discussion for help.
alerts: []
metrics:
folding:
diff --git a/collectors/python.d.plugin/sensors/sensors.chart.py b/collectors/python.d.plugin/sensors/sensors.chart.py
index 701bf641472cf0..0d9de3750fad9c 100644
--- a/collectors/python.d.plugin/sensors/sensors.chart.py
+++ b/collectors/python.d.plugin/sensors/sensors.chart.py
@@ -66,7 +66,7 @@
LIMITS = {
'temperature': [-127, 1000],
- 'voltage': [-127, 127],
+ 'voltage': [-400, 400],
'current': [-127, 127],
'fan': [0, 65535]
}
diff --git a/collectors/python.d.plugin/smartd_log/README.md b/collectors/python.d.plugin/smartd_log/README.md
deleted file mode 100644
index e79348b05fd5c4..00000000000000
--- a/collectors/python.d.plugin/smartd_log/README.md
+++ /dev/null
@@ -1,148 +0,0 @@
-
-
-# Storage devices collector
-
-Monitors `smartd` log files to collect HDD/SSD S.M.A.R.T attributes.
-
-## Requirements
-
-- `smartmontools`
-
-It produces following charts for SCSI devices:
-
-1. **Read Error Corrected**
-
-2. **Read Error Uncorrected**
-
-3. **Write Error Corrected**
-
-4. **Write Error Uncorrected**
-
-5. **Verify Error Corrected**
-
-6. **Verify Error Uncorrected**
-
-7. **Temperature**
-
-For ATA devices:
-
-1. **Read Error Rate**
-
-2. **Seek Error Rate**
-
-3. **Soft Read Error Rate**
-
-4. **Write Error Rate**
-
-5. **SATA Interface Downshift**
-
-6. **UDMA CRC Error Count**
-
-7. **Throughput Performance**
-
-8. **Seek Time Performance**
-
-9. **Start/Stop Count**
-
-10. **Power-On Hours Count**
-
-11. **Power Cycle Count**
-
-12. **Unexpected Power Loss**
-
-13. **Spin-Up Time**
-
-14. **Spin-up Retries**
-
-15. **Calibration Retries**
-
-16. **Temperature**
-
-17. **Reallocated Sectors Count**
-
-18. **Reserved Block Count**
-
-19. **Program Fail Count**
-
-20. **Erase Fail Count**
-
-21. **Wear Leveller Worst Case Erase Count**
-
-22. **Unused Reserved NAND Blocks**
-
-23. **Reallocation Event Count**
-
-24. **Current Pending Sector Count**
-
-25. **Offline Uncorrectable Sector Count**
-
-26. **Percent Lifetime Used**
-
-## prerequisite
-
-`smartd` must be running with `-A` option to write smartd attribute information to files.
-
-For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`:
-
-```
-# dump smartd attrs info every 600 seconds
-smartd_opts="-A /var/log/smartd/ -i 600"
-```
-
-You may need to create the smartd directory before smartd will write to it:
-
-```sh
-mkdir -p /var/log/smartd
-```
-
-Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also for more info on the `-A --attributelog=PREFIX` command.
-
-`smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files.
-
-## Configuration
-
-Edit the `python.d/smartd_log.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/smartd_log.conf
-```
-
-```yaml
-local:
- log_path : '/var/log/smartd/'
-```
-
-If no configuration is given, module will attempt to read log files in `/var/log/smartd/` directory.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `smartd_log` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `smartd_log` module in debug mode:
-
-```bash
-./python.d.plugin smartd_log debug trace
-```
-
diff --git a/collectors/python.d.plugin/smartd_log/README.md b/collectors/python.d.plugin/smartd_log/README.md
new file mode 120000
index 00000000000000..63aad6c8506180
--- /dev/null
+++ b/collectors/python.d.plugin/smartd_log/README.md
@@ -0,0 +1 @@
+integrations/s.m.a.r.t..md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md b/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md
new file mode 100644
index 00000000000000..5c5b569e9dc8df
--- /dev/null
+++ b/collectors/python.d.plugin/smartd_log/integrations/s.m.a.r.t..md
@@ -0,0 +1,223 @@
+
+
+# S.M.A.R.T.
+
+
+
+
+
+Plugin: python.d.plugin
+Module: smartd_log
+
+
+
+## Overview
+
+This collector monitors HDD/SSD S.M.A.R.T. metrics about drive health and performance.
+
+
+It reads `smartd` log files to collect the metrics.
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Upon satisfying the prerequisites, the collector will auto-detect metrics if written in either `/var/log/smartd/` or `/var/lib/smartmontools/`.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+The metrics listed below are split in terms of availability on device type, SCSI or ATA.
+
+### Per S.M.A.R.T. instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit | SCSI | ATA |
+|:------|:----------|:----|:---:|:---:|
+| smartd_log.read_error_rate | a dimension per device | value | | • |
+| smartd_log.seek_error_rate | a dimension per device | value | | • |
+| smartd_log.soft_read_error_rate | a dimension per device | errors | | • |
+| smartd_log.write_error_rate | a dimension per device | value | | • |
+| smartd_log.read_total_err_corrected | a dimension per device | errors | • | |
+| smartd_log.read_total_unc_errors | a dimension per device | errors | • | |
+| smartd_log.write_total_err_corrected | a dimension per device | errors | • | |
+| smartd_log.write_total_unc_errors | a dimension per device | errors | • | |
+| smartd_log.verify_total_err_corrected | a dimension per device | errors | • | |
+| smartd_log.verify_total_unc_errors | a dimension per device | errors | • | |
+| smartd_log.sata_interface_downshift | a dimension per device | events | | • |
+| smartd_log.udma_crc_error_count | a dimension per device | errors | | • |
+| smartd_log.throughput_performance | a dimension per device | value | | • |
+| smartd_log.seek_time_performance | a dimension per device | value | | • |
+| smartd_log.start_stop_count | a dimension per device | events | | • |
+| smartd_log.power_on_hours_count | a dimension per device | hours | | • |
+| smartd_log.power_cycle_count | a dimension per device | events | | • |
+| smartd_log.unexpected_power_loss | a dimension per device | events | | • |
+| smartd_log.spin_up_time | a dimension per device | ms | | • |
+| smartd_log.spin_up_retries | a dimension per device | retries | | • |
+| smartd_log.calibration_retries | a dimension per device | retries | | • |
+| smartd_log.airflow_temperature_celsius | a dimension per device | celsius | | • |
+| smartd_log.temperature_celsius | a dimension per device | celsius | • | • |
+| smartd_log.reallocated_sectors_count | a dimension per device | sectors | | • |
+| smartd_log.reserved_block_count | a dimension per device | percentage | | • |
+| smartd_log.program_fail_count | a dimension per device | errors | | • |
+| smartd_log.erase_fail_count | a dimension per device | failures | | • |
+| smartd_log.wear_leveller_worst_case_erase_count | a dimension per device | erases | | • |
+| smartd_log.unused_reserved_nand_blocks | a dimension per device | blocks | | • |
+| smartd_log.reallocation_event_count | a dimension per device | events | | • |
+| smartd_log.current_pending_sector_count | a dimension per device | sectors | | • |
+| smartd_log.offline_uncorrectable_sector_count | a dimension per device | sectors | | • |
+| smartd_log.percent_lifetime_used | a dimension per device | percentage | | • |
+| smartd_log.media_wearout_indicator | a dimension per device | percentage | | • |
+| smartd_log.nand_writes_1gib | a dimension per device | GiB | | • |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Configure `smartd` to write attribute information to files.
+
+`smartd` must be running with `-A` option to write `smartd` attribute information to files.
+
+For this you need to set `smartd_opts` (or `SMARTD_ARGS`, check _smartd.service_ content) in `/etc/default/smartmontools`:
+
+```
+# dump smartd attrs info every 600 seconds
+smartd_opts="-A /var/log/smartd/ -i 600"
+```
+
+You may need to create the smartd directory before smartd will write to it:
+
+```sh
+mkdir -p /var/log/smartd
+```
+
+Otherwise, all the smartd `.csv` files may get written to `/var/lib/smartmontools` (default location). See also for more info on the `-A --attributelog=PREFIX` command.
+
+`smartd` appends logs at every run. It's strongly recommended to use `logrotate` for smartd files.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/smartd_log.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/smartd_log.conf
+```
+#### Options
+
+This particular collector does not need further configuration to work if permissions are satisfied, but you can always customize it's data collection behavior.
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| log_path | path to smartd log files. | /var/log/smartd | yes |
+| exclude_disks | Space-separated patterns. If the pattern is in the drive name, the module will not collect data for it. | | no |
+| age | Time in minutes since the last dump to file. | 30 | no |
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+```yaml
+custom:
+ name: smartd_log
+ log_path: '/var/log/smartd/'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `smartd_log` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin smartd_log debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
index dc4e95decd0bb8..a896164dfc6580 100644
--- a/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
+++ b/collectors/python.d.plugin/smartd_log/smartd_log.chart.py
@@ -39,6 +39,7 @@
ATTR172 = '172'
ATTR173 = '173'
ATTR174 = '174'
+ATTR177 = '177'
ATTR180 = '180'
ATTR183 = '183'
ATTR190 = '190'
@@ -50,6 +51,8 @@
ATTR202 = '202'
ATTR206 = '206'
ATTR233 = '233'
+ATTR241 = '241'
+ATTR242 = '242'
ATTR249 = '249'
ATTR_READ_ERR_COR = 'read-total-err-corrected'
ATTR_READ_ERR_UNC = 'read-total-unc-errors'
@@ -114,6 +117,8 @@
'offline_uncorrectable_sector_count',
'percent_lifetime_used',
'media_wearout_indicator',
+ 'total_lbas_written',
+ 'total_lbas_read',
]
CHARTS = {
@@ -329,7 +334,7 @@
'media_wearout_indicator': {
'options': [None, 'Media Wearout Indicator', 'percentage', 'wear', 'smartd_log.media_wearout_indicator', 'line'],
'lines': [],
- 'attrs': [ATTR233],
+ 'attrs': [ATTR233, ATTR177],
'algo': ABSOLUTE,
},
'nand_writes_1gib': {
@@ -338,6 +343,18 @@
'attrs': [ATTR249],
'algo': ABSOLUTE,
},
+ 'total_lbas_written': {
+ 'options': [None, 'Total LBAs Written', 'sectors', 'wear', 'smartd_log.total_lbas_written', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR241],
+ 'algo': ABSOLUTE,
+ },
+ 'total_lbas_read': {
+ 'options': [None, 'Total LBAs Read', 'sectors', 'wear', 'smartd_log.total_lbas_read', 'line'],
+ 'lines': [],
+ 'attrs': [ATTR242],
+ 'algo': ABSOLUTE,
+ },
}
# NOTE: 'parse_temp' decodes ATA 194 raw value. Not heavily tested. Written by @Ferroin
@@ -519,6 +536,7 @@ def ata_attribute_factory(value):
elif name in [
ATTR1,
ATTR7,
+ ATTR177,
ATTR202,
ATTR206,
ATTR233,
diff --git a/collectors/python.d.plugin/spigotmc/README.md b/collectors/python.d.plugin/spigotmc/README.md
deleted file mode 100644
index f39d9bab679e6b..00000000000000
--- a/collectors/python.d.plugin/spigotmc/README.md
+++ /dev/null
@@ -1,61 +0,0 @@
-
-
-# SpigotMC collector
-
-Performs basic monitoring for Spigot Minecraft servers.
-
-It provides two charts, one tracking server-side ticks-per-second in
-1, 5 and 15 minute averages, and one tracking the number of currently
-active users.
-
-This is not compatible with Spigot plugins which change the format of
-the data returned by the `tps` or `list` console commands.
-
-## Configuration
-
-Edit the `python.d/spigotmc.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/spigotmc.conf
-```
-
-```yaml
-host: localhost
-port: 25575
-password: pass
-```
-
-By default, a connection to port 25575 on the local system is attempted with an empty password.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `spigotmc` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `spigotmc` module in debug mode:
-
-```bash
-./python.d.plugin spigotmc debug trace
-```
-
diff --git a/collectors/python.d.plugin/spigotmc/README.md b/collectors/python.d.plugin/spigotmc/README.md
new file mode 120000
index 00000000000000..66e5c9c47d0b33
--- /dev/null
+++ b/collectors/python.d.plugin/spigotmc/README.md
@@ -0,0 +1 @@
+integrations/spigotmc.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md b/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md
new file mode 100644
index 00000000000000..55ec8fa224947b
--- /dev/null
+++ b/collectors/python.d.plugin/spigotmc/integrations/spigotmc.md
@@ -0,0 +1,216 @@
+
+
+# SpigotMC
+
+
+
+
+
+Plugin: python.d.plugin
+Module: spigotmc
+
+
+
+## Overview
+
+This collector monitors SpigotMC server performance, in the form of ticks per second average, memory utilization, and active users.
+
+
+It sends the `tps`, `list` and `online` commands to the Server, and gathers the metrics from the responses.
+
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, this collector will attempt to connect to a Spigot server running on the local host on port `25575`.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per SpigotMC instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| spigotmc.tps | 1 Minute Average, 5 Minute Average, 15 Minute Average | ticks |
+| spigotmc.users | Users | users |
+| spigotmc.mem | used, allocated, max | MiB |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Enable the Remote Console Protocol
+
+Under your SpigotMC server's `server.properties` configuration file, you should set `enable-rcon` to `true`.
+
+This will allow the Server to listen and respond to queries over the rcon protocol.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/spigotmc.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/spigotmc.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| host | The host's IP to connect to. | localhost | yes |
+| port | The port the remote console is listening on. | 25575 | yes |
+| password | Remote console password if any. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+```yaml
+local:
+ name: local_server
+ url: 127.0.0.1
+ port: 25575
+
+```
+##### Basic Authentication
+
+An example using basic password for authentication with the remote console.
+
+Config
+
+```yaml
+local:
+ name: local_server_pass
+ url: 127.0.0.1
+ port: 25575
+ password: 'foobar'
+
+```
+
+
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+local_server:
+ name : my_local_server
+ url : 127.0.0.1
+ port: 25575
+
+remote_server:
+ name : another_remote_server
+ url : 192.0.2.1
+ port: 25575
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `spigotmc` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin spigotmc debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/squid/README.md b/collectors/python.d.plugin/squid/README.md
deleted file mode 100644
index da5349184f56c5..00000000000000
--- a/collectors/python.d.plugin/squid/README.md
+++ /dev/null
@@ -1,81 +0,0 @@
-
-
-# Squid collector
-
-Monitors one or more squid instances depending on configuration.
-
-It produces following charts:
-
-1. **Client Bandwidth** in kilobits/s
-
- - in
- - out
- - hits
-
-2. **Client Requests** in requests/s
-
- - requests
- - hits
- - errors
-
-3. **Server Bandwidth** in kilobits/s
-
- - in
- - out
-
-4. **Server Requests** in requests/s
-
- - requests
- - errors
-
-## Configuration
-
-Edit the `python.d/squid.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/squid.conf
-```
-
-```yaml
-priority : 50000
-
-local:
- request : 'cache_object://localhost:3128/counters'
- host : 'localhost'
- port : 3128
-```
-
-Without any configuration module will try to autodetect where squid presents its `counters` data
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `squid` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `squid` module in debug mode:
-
-```bash
-./python.d.plugin squid debug trace
-```
-
diff --git a/collectors/python.d.plugin/squid/README.md b/collectors/python.d.plugin/squid/README.md
new file mode 120000
index 00000000000000..c4e5a03d773053
--- /dev/null
+++ b/collectors/python.d.plugin/squid/README.md
@@ -0,0 +1 @@
+integrations/squid.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/squid/integrations/squid.md b/collectors/python.d.plugin/squid/integrations/squid.md
new file mode 100644
index 00000000000000..6599826da606fb
--- /dev/null
+++ b/collectors/python.d.plugin/squid/integrations/squid.md
@@ -0,0 +1,199 @@
+
+
+# Squid
+
+
+
+
+
+Plugin: python.d.plugin
+Module: squid
+
+
+
+## Overview
+
+This collector monitors statistics about the Squid Clients and Servers, like bandwidth and requests.
+
+
+It collects metrics from the endpoint where Squid exposes its `counters` data.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, this collector will try to autodetect where Squid presents its `counters` data, by trying various configurations.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Squid instance
+
+These metrics refer to each monitored Squid instance.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| squid.clients_net | in, out, hits | kilobits/s |
+| squid.clients_requests | requests, hits, errors | requests/s |
+| squid.servers_net | in, out | kilobits/s |
+| squid.servers_requests | requests, errors | requests/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Configure Squid's Cache Manager
+
+Take a look at [Squid's official documentation](https://wiki.squid-cache.org/Features/CacheManager/Index#controlling-access-to-the-cache-manager) on how to configure access to the Cache Manager.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/squid.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/squid.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 1 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | local | no |
+| host | The host to connect to. | | yes |
+| port | The port to connect to. | | yes |
+| request | The URL to request from Squid. | | yes |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+```yaml
+example_job_name:
+ name: 'local'
+ host: 'localhost'
+ port: 3128
+ request: 'cache_object://localhost:3128/counters'
+
+```
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+local_job:
+ name: 'local'
+ host: '127.0.0.1'
+ port: 3128
+ request: 'cache_object://127.0.0.1:3128/counters'
+
+remote_job:
+ name: 'remote'
+ host: '192.0.2.1'
+ port: 3128
+ request: 'cache_object://192.0.2.1:3128/counters'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `squid` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin squid debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/tomcat/README.md b/collectors/python.d.plugin/tomcat/README.md
deleted file mode 100644
index 923d6238f78461..00000000000000
--- a/collectors/python.d.plugin/tomcat/README.md
+++ /dev/null
@@ -1,76 +0,0 @@
-
-
-# Apache Tomcat collector
-
-Presents memory utilization of tomcat containers.
-
-Charts:
-
-1. **Requests** per second
-
- - accesses
-
-2. **Volume** in KB/s
-
- - volume
-
-3. **Threads**
-
- - current
- - busy
-
-4. **JVM Free Memory** in MB
-
- - jvm
-
-## Configuration
-
-Edit the `python.d/tomcat.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/tomcat.conf
-```
-
-```yaml
-localhost:
- name : 'local'
- url : 'http://127.0.0.1:8080/manager/status?XML=true'
- user : 'tomcat_username'
- pass : 'secret_tomcat_password'
-```
-
-Without configuration, module attempts to connect to `http://localhost:8080/manager/status?XML=true`, without any credentials.
-So it will probably fail.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `tomcat` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `tomcat` module in debug mode:
-
-```bash
-./python.d.plugin tomcat debug trace
-```
-
diff --git a/collectors/python.d.plugin/tomcat/README.md b/collectors/python.d.plugin/tomcat/README.md
new file mode 120000
index 00000000000000..997090c35102a9
--- /dev/null
+++ b/collectors/python.d.plugin/tomcat/README.md
@@ -0,0 +1 @@
+integrations/tomcat.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/tomcat/integrations/tomcat.md b/collectors/python.d.plugin/tomcat/integrations/tomcat.md
new file mode 100644
index 00000000000000..883f29dd31ca11
--- /dev/null
+++ b/collectors/python.d.plugin/tomcat/integrations/tomcat.md
@@ -0,0 +1,203 @@
+
+
+# Tomcat
+
+
+
+
+
+Plugin: python.d.plugin
+Module: tomcat
+
+
+
+## Overview
+
+This collector monitors Tomcat metrics about bandwidth, processing time, threads and more.
+
+
+It parses the information provided by the http endpoint of the `/manager/status` in XML format
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+You need to provide the username and the password, to access the webserver's status page. Create a seperate user with read only rights for this particular endpoint
+
+### Default Behavior
+
+#### Auto-Detection
+
+If the Netdata Agent and the Tomcat webserver are in the same host, without configuration, module attempts to connect to http://localhost:8080/manager/status?XML=true, without any credentials. So it will probably fail.
+
+#### Limits
+
+This module is not supporting SSL communication. If you want a Netdata Agent to monitor a Tomcat deployment, you shouldnt try to monitor it via public network (public internet). Credentials are passed by Netdata in an unsecure port
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Tomcat instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| tomcat.accesses | accesses, errors | requests/s |
+| tomcat.bandwidth | sent, received | KiB/s |
+| tomcat.processing_time | processing time | seconds |
+| tomcat.threads | current, busy | current threads |
+| tomcat.jvm | free, eden, survivor, tenured, code cache, compressed, metaspace | MiB |
+| tomcat.jvm_eden | used, committed, max | MiB |
+| tomcat.jvm_survivor | used, committed, max | MiB |
+| tomcat.jvm_tenured | used, committed, max | MiB |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Create a read-only `netdata` user, to monitor the `/status` endpoint.
+
+This is necessary for configuring the collector.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/tomcat.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/tomcat.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options per job
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| url | The URL of the Tomcat server's status endpoint. Always add the suffix ?XML=true. | no | yes |
+| user | A valid user with read permission to access the /manager/status endpoint of the server. Required if the endpoint is password protected | no | no |
+| pass | A valid password for the user in question. Required if the endpoint is password protected | no | no |
+| connector_name | The connector component that communicates with a web connector via the AJP protocol, e.g ajp-bio-8009 | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration
+
+```yaml
+localhost:
+ name : 'local'
+ url : 'http://localhost:8080/manager/status?XML=true'
+
+```
+##### Using an IPv4 endpoint
+
+A typical configuration using an IPv4 endpoint
+
+Config
+
+```yaml
+local_ipv4:
+ name : 'local'
+ url : 'http://127.0.0.1:8080/manager/status?XML=true'
+
+```
+
+
+##### Using an IPv6 endpoint
+
+A typical configuration using an IPv6 endpoint
+
+Config
+
+```yaml
+local_ipv6:
+ name : 'local'
+ url : 'http://[::1]:8080/manager/status?XML=true'
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `tomcat` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin tomcat debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/tomcat/metadata.yaml b/collectors/python.d.plugin/tomcat/metadata.yaml
index c22f4f58b37035..e685260736f33c 100644
--- a/collectors/python.d.plugin/tomcat/metadata.yaml
+++ b/collectors/python.d.plugin/tomcat/metadata.yaml
@@ -45,7 +45,7 @@ modules:
prerequisites:
list:
- title: Create a read-only `netdata` user, to monitor the `/status` endpoint.
- description: You will need this configuring the collector
+ description: This is necessary for configuring the collector.
configuration:
file:
name: "python.d/tomcat.conf"
diff --git a/collectors/python.d.plugin/tor/README.md b/collectors/python.d.plugin/tor/README.md
deleted file mode 100644
index 15f7e22823f131..00000000000000
--- a/collectors/python.d.plugin/tor/README.md
+++ /dev/null
@@ -1,89 +0,0 @@
-
-
-# Tor collector
-
-Connects to the Tor control port to collect traffic statistics.
-
-## Requirements
-
-- `tor` program
-- `stem` python package
-
-It produces only one chart:
-
-1. **Traffic**
-
- - read
- - write
-
-## Configuration
-
-Edit the `python.d/tor.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/tor.conf
-```
-
-Needs only `control_port`.
-
-Here is an example for local server:
-
-```yaml
-update_every : 1
-priority : 60000
-
-local_tcp:
- name: 'local'
- control_port: 9051
- password: # if required
-
-local_socket:
- name: 'local'
- control_port: '/var/run/tor/control'
- password: # if required
-```
-
-### prerequisite
-
-Add to `/etc/tor/torrc`:
-
-```
-ControlPort 9051
-```
-
-For more options please read the manual.
-
-Without configuration, module attempts to connect to `127.0.0.1:9051`.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `tor` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `tor` module in debug mode:
-
-```bash
-./python.d.plugin tor debug trace
-```
-
diff --git a/collectors/python.d.plugin/tor/README.md b/collectors/python.d.plugin/tor/README.md
new file mode 120000
index 00000000000000..7c20cd40ad9934
--- /dev/null
+++ b/collectors/python.d.plugin/tor/README.md
@@ -0,0 +1 @@
+integrations/tor.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/tor/integrations/tor.md b/collectors/python.d.plugin/tor/integrations/tor.md
new file mode 100644
index 00000000000000..0e57fa793af378
--- /dev/null
+++ b/collectors/python.d.plugin/tor/integrations/tor.md
@@ -0,0 +1,197 @@
+
+
+# Tor
+
+
+
+
+
+Plugin: python.d.plugin
+Module: tor
+
+
+
+## Overview
+
+This collector monitors Tor bandwidth traffic .
+
+It connects to the Tor control port to collect traffic statistics.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+If no configuration is provided the collector will try to connect to 127.0.0.1:9051 to detect a running tor instance.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Tor instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| tor.traffic | read, write | KiB/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Required python module
+
+The `stem` python library needs to be installed.
+
+
+#### Required Tor configuration
+
+Add to /etc/tor/torrc:
+
+ControlPort 9051
+
+For more options please read the manual.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/tor.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/tor.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| control_addr | Tor control IP address | 127.0.0.1 | no |
+| control_port | Tor control port. Can be either a tcp port, or a path to a socket file. | 9051 | no |
+| password | Tor control password | | no |
+
+
+
+#### Examples
+
+##### Local TCP
+
+A basic TCP configuration. `local_addr` is ommited and will default to `127.0.0.1`
+
+Config
+
+```yaml
+local_tcp:
+ name: 'local'
+ control_port: 9051
+ password: # if required
+
+```
+
+
+##### Local socket
+
+A basic local socket configuration
+
+Config
+
+```yaml
+local_socket:
+ name: 'local'
+ control_port: '/var/run/tor/control'
+ password: # if required
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `tor` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin tor debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/tor/metadata.yaml b/collectors/python.d.plugin/tor/metadata.yaml
index d0ecc1a43d4d70..8647eca2320ae0 100644
--- a/collectors/python.d.plugin/tor/metadata.yaml
+++ b/collectors/python.d.plugin/tor/metadata.yaml
@@ -39,6 +39,9 @@ modules:
setup:
prerequisites:
list:
+ - title: 'Required python module'
+ description: |
+ The `stem` python library needs to be installed.
- title: 'Required Tor configuration'
description: |
Add to /etc/tor/torrc:
diff --git a/collectors/python.d.plugin/uwsgi/README.md b/collectors/python.d.plugin/uwsgi/README.md
deleted file mode 100644
index 393be9fc5886bb..00000000000000
--- a/collectors/python.d.plugin/uwsgi/README.md
+++ /dev/null
@@ -1,75 +0,0 @@
-
-
-# uWSGI collector
-
-Monitors performance metrics exposed by [`Stats Server`](https://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html).
-
-
-Following charts are drawn:
-
-1. **Requests**
-
- - requests per second
- - transmitted data
- - average request time
-
-2. **Memory**
-
- - rss
- - vsz
-
-3. **Exceptions**
-4. **Harakiris**
-5. **Respawns**
-
-## Configuration
-
-Edit the `python.d/uwsgi.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/uwsgi.conf
-```
-
-```yaml
-socket:
- name : 'local'
- socket : '/tmp/stats.socket'
-
-localhost:
- name : 'local'
- host : 'localhost'
- port : 1717
-```
-
-When no configuration file is found, module tries to connect to TCP/IP socket: `localhost:1717`.
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `uwsgi` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `uwsgi` module in debug mode:
-
-```bash
-./python.d.plugin uwsgi debug trace
-```
-
diff --git a/collectors/python.d.plugin/uwsgi/README.md b/collectors/python.d.plugin/uwsgi/README.md
new file mode 120000
index 00000000000000..44b8559492a874
--- /dev/null
+++ b/collectors/python.d.plugin/uwsgi/README.md
@@ -0,0 +1 @@
+integrations/uwsgi.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md b/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md
new file mode 100644
index 00000000000000..af58608bd0706a
--- /dev/null
+++ b/collectors/python.d.plugin/uwsgi/integrations/uwsgi.md
@@ -0,0 +1,219 @@
+
+
+# uWSGI
+
+
+
+
+
+Plugin: python.d.plugin
+Module: uwsgi
+
+
+
+## Overview
+
+This collector monitors uWSGI metrics about requests, workers, memory and more.
+
+It collects every metric exposed from the stats server of uWSGI, either from the `stats.socket` or from the web server's TCP/IP socket.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This collector will auto-detect uWSGI instances deployed on the local host, running on port 1717, or exposing stats on socket `tmp/stats.socket`.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per uWSGI instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| uwsgi.requests | a dimension per worker | requests/s |
+| uwsgi.tx | a dimension per worker | KiB/s |
+| uwsgi.avg_rt | a dimension per worker | milliseconds |
+| uwsgi.memory_rss | a dimension per worker | MiB |
+| uwsgi.memory_vsz | a dimension per worker | MiB |
+| uwsgi.exceptions | exceptions | exceptions |
+| uwsgi.harakiris | harakiris | harakiris |
+| uwsgi.respawns | respawns | respawns |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Enable the uWSGI Stats server
+
+Make sure that you uWSGI exposes it's metrics via a Stats server.
+
+Source: https://uwsgi-docs.readthedocs.io/en/latest/StatsServer.html
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/uwsgi.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/uwsgi.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | The JOB's name as it will appear at the dashboard (by default is the job_name) | job_name | no |
+| socket | The 'path/to/uwsgistats.sock' | no | no |
+| host | The host to connect to | no | no |
+| port | The port to connect to | no | no |
+
+
+
+#### Examples
+
+##### Basic (default out-of-the-box)
+
+A basic example configuration, one job will run at a time. Autodetect mechanism uses it by default. As all JOBs have the same name, only one can run at a time.
+
+Config
+
+```yaml
+socket:
+ name : 'local'
+ socket : '/tmp/stats.socket'
+
+localhost:
+ name : 'local'
+ host : 'localhost'
+ port : 1717
+
+localipv4:
+ name : 'local'
+ host : '127.0.0.1'
+ port : 1717
+
+localipv6:
+ name : 'local'
+ host : '::1'
+ port : 1717
+
+```
+
+
+##### Multi-instance
+
+> **Note**: When you define multiple jobs, their names must be unique.
+
+Collecting metrics from local and remote instances.
+
+
+Config
+
+```yaml
+local:
+ name : 'local'
+ host : 'localhost'
+ port : 1717
+
+remote:
+ name : 'remote'
+ host : '192.0.2.1'
+ port : 1717
+
+```
+
+
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `uwsgi` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin uwsgi debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/varnish/README.md b/collectors/python.d.plugin/varnish/README.md
deleted file mode 100644
index d30a9fb1dc9f7c..00000000000000
--- a/collectors/python.d.plugin/varnish/README.md
+++ /dev/null
@@ -1,88 +0,0 @@
-
-
-# Varnish Cache collector
-
-Provides HTTP accelerator global, Backends (VBE) and Storages (SMF, SMA, MSE) statistics using `varnishstat` tool.
-
-Note that both, Varnish-Cache (free and open source) and Varnish-Plus (Commercial/Enterprise version), are supported.
-
-## Requirements
-
-- `netdata` user must be a member of the `varnish` group
-
-## Charts
-
-This module produces the following charts:
-
-- Connections Statistics in `connections/s`
-- Client Requests in `requests/s`
-- All History Hit Rate Ratio in `percent`
-- Current Poll Hit Rate Ratio in `percent`
-- Expired Objects in `expired/s`
-- Least Recently Used Nuked Objects in `nuked/s`
-- Number Of Threads In All Pools in `pools`
-- Threads Statistics in `threads/s`
-- Current Queue Length in `requests`
-- Backend Connections Statistics in `connections/s`
-- Requests To The Backend in `requests/s`
-- ESI Statistics in `problems/s`
-- Memory Usage in `MiB`
-- Uptime in `seconds`
-
-For every backend (VBE):
-
-- Backend Response Statistics in `kilobits/s`
-
-For every storage (SMF, SMA, or MSE):
-
-- Storage Usage in `KiB`
-- Storage Allocated Objects
-
-## Configuration
-
-Edit the `python.d/varnish.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/varnish.conf
-```
-
-Only one parameter is supported:
-
-```yaml
-instance_name: 'name'
-```
-
-The name of the `varnishd` instance to get logs from. If not specified, the host name is used.
-
-
-
-
-### Troubleshooting
-
-To troubleshoot issues with the `varnish` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `varnish` module in debug mode:
-
-```bash
-./python.d.plugin varnish debug trace
-```
-
diff --git a/collectors/python.d.plugin/varnish/README.md b/collectors/python.d.plugin/varnish/README.md
new file mode 120000
index 00000000000000..194be2335a1901
--- /dev/null
+++ b/collectors/python.d.plugin/varnish/README.md
@@ -0,0 +1 @@
+integrations/varnish.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/varnish/integrations/varnish.md b/collectors/python.d.plugin/varnish/integrations/varnish.md
new file mode 100644
index 00000000000000..da74dcf8f0e9c3
--- /dev/null
+++ b/collectors/python.d.plugin/varnish/integrations/varnish.md
@@ -0,0 +1,213 @@
+
+
+# Varnish
+
+
+
+
+
+Plugin: python.d.plugin
+Module: varnish
+
+
+
+## Overview
+
+This collector monitors Varnish metrics about HTTP accelerator global, Backends (VBE) and Storages (SMF, SMA, MSE) statistics.
+
+Note that both, Varnish-Cache (free and open source) and Varnish-Plus (Commercial/Enterprise version), are supported.
+
+
+It uses the `varnishstat` tool in order to collect the metrics.
+
+
+This collector is supported on all platforms.
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+`netdata` user must be a member of the `varnish` group.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+By default, if the permissions are satisfied, the `varnishstat` tool will be executed on the host.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Varnish instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| varnish.session_connection | accepted, dropped | connections/s |
+| varnish.client_requests | received | requests/s |
+| varnish.all_time_hit_rate | hit, miss, hitpass | percentage |
+| varnish.current_poll_hit_rate | hit, miss, hitpass | percentage |
+| varnish.cached_objects_expired | objects | expired/s |
+| varnish.cached_objects_nuked | objects | nuked/s |
+| varnish.threads_total | None | number |
+| varnish.threads_statistics | created, failed, limited | threads/s |
+| varnish.threads_queue_len | in queue | requests |
+| varnish.backend_connections | successful, unhealthy, reused, closed, recycled, failed | connections/s |
+| varnish.backend_requests | sent | requests/s |
+| varnish.esi_statistics | errors, warnings | problems/s |
+| varnish.memory_usage | free, allocated | MiB |
+| varnish.uptime | uptime | seconds |
+
+### Per Backend
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| varnish.backend | header, body | kilobits/s |
+
+### Per Storage
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| varnish.storage_usage | free, allocated | KiB |
+| varnish.storage_alloc_objs | allocated | objects |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Provide the necessary permissions
+
+In order for the collector to work, you need to add the `netdata` user to the `varnish` user group, so that it can execute the `varnishstat` tool:
+
+```
+usermod -aG varnish netdata
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/varnish.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/varnish.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| instance_name | the name of the varnishd instance to get logs from. If not specified, the local host name is used. | | yes |
+| update_every | Sets the default data collection frequency. | 10 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+
+
+
+#### Examples
+
+##### Basic
+
+An example configuration.
+
+```yaml
+job_name:
+ instance_name: ''
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `varnish` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin varnish debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/varnish/metadata.yaml b/collectors/python.d.plugin/varnish/metadata.yaml
index aa245c25fcb404..d31c1cf6fccef1 100644
--- a/collectors/python.d.plugin/varnish/metadata.yaml
+++ b/collectors/python.d.plugin/varnish/metadata.yaml
@@ -75,8 +75,8 @@ modules:
enabled: true
list:
- name: instance_name
- description: the name of the varnishd instance to get logs from. If not specified, the host name is used.
- default_value: ''
+ description: the name of the varnishd instance to get logs from. If not specified, the local host name is used.
+ default_value: ""
required: true
- name: update_every
description: Sets the default data collection frequency.
diff --git a/collectors/python.d.plugin/w1sensor/README.md b/collectors/python.d.plugin/w1sensor/README.md
deleted file mode 100644
index ca08b0400985c3..00000000000000
--- a/collectors/python.d.plugin/w1sensor/README.md
+++ /dev/null
@@ -1,50 +0,0 @@
-
-
-# 1-Wire Sensors collector
-
-Monitors sensor temperature.
-
-On Linux these are supported by the wire, w1_gpio, and w1_therm modules.
-Currently temperature sensors are supported and automatically detected.
-
-Charts are created dynamically based on the number of detected sensors.
-
-## Configuration
-
-Edit the `python.d/w1sensor.conf` configuration file using `edit-config` from the Netdata [config
-directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md), which is typically at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/w1sensor.conf
-```
-
-An example of a working configuration can be found in the default [configuration file](https://github.com/netdata/netdata/blob/master/collectors/python.d.plugin/w1sensor/w1sensor.conf) of this collector.
-
-### Troubleshooting
-
-To troubleshoot issues with the `w1sensor` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `w1sensor` module in debug mode:
-
-```bash
-./python.d.plugin w1sensor debug trace
-```
-
diff --git a/collectors/python.d.plugin/w1sensor/README.md b/collectors/python.d.plugin/w1sensor/README.md
new file mode 120000
index 00000000000000..c0fa9cd1bb5f10
--- /dev/null
+++ b/collectors/python.d.plugin/w1sensor/README.md
@@ -0,0 +1 @@
+integrations/1-wire_sensors.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md b/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md
new file mode 100644
index 00000000000000..fe3c05ba6fa836
--- /dev/null
+++ b/collectors/python.d.plugin/w1sensor/integrations/1-wire_sensors.md
@@ -0,0 +1,167 @@
+
+
+# 1-Wire Sensors
+
+
+
+
+
+Plugin: python.d.plugin
+Module: w1sensor
+
+
+
+## Overview
+
+Monitor 1-Wire Sensors metrics with Netdata for optimal environmental conditions monitoring. Enhance your environmental monitoring with real-time insights and alerts.
+
+The collector uses the wire, w1_gpio, and w1_therm kernel modules. Currently temperature sensors are supported and automatically detected.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+The collector will try to auto detect available 1-Wire devices.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per 1-Wire Sensors instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| w1sensor.temp | a dimension per sensor | Celsius |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Required Linux kernel modules
+
+Make sure `wire`, `w1_gpio`, and `w1_therm` kernel modules are loaded.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/w1sensor.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/w1sensor.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+| name | Job name. This value will overwrite the `job_name` value. JOBS with the same name are mutually exclusive. Only one of them will be allowed running at any time. This allows autodetection to try several alternatives and pick the one that works. | | no |
+| name_<1-Wire id> | This allows associating a human readable name with a sensor's 1-Wire identifier. | | no |
+
+
+
+#### Examples
+
+##### Provide human readable names
+
+Associate two 1-Wire identifiers with human readable names.
+
+```yaml
+sensors:
+ name_00000022276e: 'Machine room'
+ name_00000022298f: 'Rack 12'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `w1sensor` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin w1sensor debug trace
+ ```
+
+
diff --git a/collectors/python.d.plugin/zscores/README.md b/collectors/python.d.plugin/zscores/README.md
deleted file mode 100644
index dcb685c989d89b..00000000000000
--- a/collectors/python.d.plugin/zscores/README.md
+++ /dev/null
@@ -1,158 +0,0 @@
-# Basic anomaly detection using Z-scores
-
-By using smoothed, rolling [Z-Scores](https://en.wikipedia.org/wiki/Standard_score) for selected metrics or charts you can narrow down your focus and shorten root cause analysis.
-
-This collector uses the [Netdata rest api](https://github.com/netdata/netdata/blob/master/web/api/README.md) to get the `mean` and `stddev`
-for each dimension on specified charts over a time range (defined by `train_secs` and `offset_secs`). For each dimension
-it will calculate a Z-Score as `z = (x - mean) / stddev` (clipped at `z_clip`). Scores are then smoothed over
-time (`z_smooth_n`) and, if `mode: 'per_chart'`, aggregated across dimensions to a smoothed, rolling chart level Z-Score
-at each time step.
-
-## Charts
-
-Two charts are produced:
-
-- **Z-Score** (`zscores.z`): This chart shows the calculated Z-Score per chart (or dimension if `mode='per_dim'`).
-- **Z-Score >3** (`zscores.3stddev`): This chart shows a `1` if the absolute value of the Z-Score is greater than 3 or
- a `0` otherwise.
-
-Below is an example of the charts produced by this collector and a typical example of how they would look when things
-are 'normal' on the system. Most of the zscores tend to bounce randomly around a range typically between 0 to +3 (or -3
-to +3 if `z_abs: 'false'`), a few charts might stay steady at a more constant higher value depending on your
-configuration and the typical workload on your system (typically those charts that do not change that much have a
-smaller range of values on which to calculate a zscore and so tend to have a higher typical zscore).
-
-So really its a combination of the zscores values themselves plus, perhaps more importantly, how they change when
-something strange occurs on your system which can be most useful.
-
-
-
-For example, if we go onto the system and run a command
-like [`stress-ng --all 2`](https://wiki.ubuntu.com/Kernel/Reference/stress-ng) to create some stress, we see many charts
-begin to have zscores that jump outside the typical range. When the absolute zscore for a chart is greater than 3 you
-will see a corresponding line appear on the `zscores.3stddev` chart to make it a bit clearer what charts might be worth
-looking at first (for more background information on why 3 stddev
-see [here](https://en.wikipedia.org/wiki/68%E2%80%9395%E2%80%9399.7_rule#:~:text=In%20the%20empirical%20sciences%20the,99.7%25%20probability%20as%20near%20certainty.))
-.
-
-In the example below we basically took a sledge hammer to our system so its not surprising that lots of charts light up
-after we run the stress command. In a more realistic setting you might just see a handful of charts with strange zscores
-and that could be a good indication of where to look first.
-
-
-
-Then as the issue passes the zscores should settle back down into their normal range again as they are calculated in a
-rolling and smoothed way (as defined by your `zscores.conf` file).
-
-
-
-## Requirements
-
-This collector will only work with Python 3 and requires the below packages be installed.
-
-```bash
-# become netdata user
-sudo su -s /bin/bash netdata
-# install required packages
-pip3 install numpy pandas requests netdata-pandas==0.0.38
-```
-
-## Configuration
-
-Install the underlying Python requirements, Enable the collector and restart Netdata.
-
-```bash
-cd /etc/netdata/
-sudo ./edit-config python.d.conf
-# Set `zscores: no` to `zscores: yes`
-sudo systemctl restart netdata
-```
-
-The configuration for the zscores collector defines how it will behave on your system and might take some
-experimentation with over time to set it optimally. Out of the box, the config comes with
-some [sane defaults](https://www.netdata.cloud/blog/redefining-monitoring-netdata/) to get you started.
-
-If you are unsure about any of the below configuration options then it's best to just ignore all this and leave
-the `zscores.conf` files alone to begin with. Then you can return to it later if you would like to tune things a bit
-more once the collector is running for a while.
-
-Edit the `python.d/zscores.conf` configuration file using `edit-config` from the your
-agent's [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory), which is
-usually at `/etc/netdata`.
-
-```bash
-cd /etc/netdata # Replace this path with your Netdata config directory, if different
-sudo ./edit-config python.d/zscores.conf
-```
-
-The default configuration should look something like this. Here you can see each parameter (with sane defaults) and some
-information about each one and what it does.
-
-```bash
-# what host to pull data from
-host: '127.0.0.1:19999'
-# What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
-charts_regex: 'system\..*'
-# length of time to base calculations off for mean and stddev
-train_secs: 14400 # use last 4 hours to work out the mean and stddev for the zscore
-# offset preceding latest data to ignore when calculating mean and stddev
-offset_secs: 300 # ignore last 5 minutes of data when calculating the mean and stddev
-# recalculate the mean and stddev every n steps of the collector
-train_every_n: 900 # recalculate mean and stddev every 15 minutes
-# smooth the z score by averaging it over last n values
-z_smooth_n: 15 # take a rolling average of the last 15 zscore values to reduce sensitivity to temporary 'spikes'
-# cap absolute value of zscore (before smoothing) for better stability
-z_clip: 10 # cap each zscore at 10 so as to avoid really large individual zscores swamping any rolling average
-# set z_abs: 'true' to make all zscores be absolute values only.
-z_abs: 'true'
-# burn in period in which to initially calculate mean and stddev on every step
-burn_in: 2 # on startup of the collector continually update the mean and stddev in case any gaps or initial calculations fail to return
-# mode can be to get a zscore 'per_dim' or 'per_chart'
-mode: 'per_chart' # 'per_chart' means individual dimension level smoothed zscores will be aggregated to one zscore per chart per time step
-# per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'
-per_chart_agg: 'mean' # 'absmax' will take the max absolute value across all dimensions but will maintain the sign. 'mean' will just average.
-```
-
-## Notes
-
-- Python 3 is required as the [`netdata-pandas`](https://github.com/netdata/netdata-pandas) package uses python async
- libraries ([asks](https://pypi.org/project/asks/) and [trio](https://pypi.org/project/trio/)) to make asynchronous
- calls to the netdata rest api to get the required data for each chart when calculating the mean and stddev.
-- It may take a few hours or so for the collector to 'settle' into it's typical behaviour in terms of the scores you
- will see in the normal running of your system.
-- The zscore you see for each chart when using `mode: 'per_chart'` as actually an aggregated zscore across all the
- dimensions on the underlying chart.
-- If you set `mode: 'per_dim'` then you will see a zscore for each dimension on each chart as opposed to one per chart.
-- As this collector does some calculations itself in python you may want to try it out first on a test or development
- system to get a sense of its performance characteristics. Most of the work in calculating the mean and stddev will be
- pushed down to the underlying Netdata C libraries via the rest api. But some data wrangling and calculations are then
- done using [Pandas](https://pandas.pydata.org/) and [Numpy](https://numpy.org/) within the collector itself.
-- On a development n1-standard-2 (2 vCPUs, 7.5 GB memory) vm running Ubuntu 18.04 LTS and not doing any work some of the
- typical performance characteristics we saw from running this collector were:
- - A runtime (`netdata.runtime_zscores`) of ~50ms when doing scoring and ~500ms when recalculating the mean and
- stddev.
- - Typically 3%-3.5% cpu usage from scoring, jumping to ~35% for one second when recalculating the mean and stddev.
- - About ~50mb of ram (`apps.mem`) being continually used by the `python.d.plugin`.
-- If you activate this collector on a fresh node, it might take a little while to build up enough data to calculate a
- proper zscore. So until you actually have `train_secs` of available data the mean and stddev calculated will be subject
- to more noise.
-### Troubleshooting
-
-To troubleshoot issues with the `zscores` module, run the `python.d.plugin` with the debug option enabled. The
-output will give you the output of the data collection job or error messages on why the collector isn't working.
-
-First, navigate to your plugins directory, usually they are located under `/usr/libexec/netdata/plugins.d/`. If that's
-not the case on your system, open `netdata.conf` and look for the setting `plugins directory`. Once you're in the
-plugin's directory, switch to the `netdata` user.
-
-```bash
-cd /usr/libexec/netdata/plugins.d/
-sudo su -s /bin/bash netdata
-```
-
-Now you can manually run the `zscores` module in debug mode:
-
-```bash
-./python.d.plugin zscores debug trace
-```
-
diff --git a/collectors/python.d.plugin/zscores/README.md b/collectors/python.d.plugin/zscores/README.md
new file mode 120000
index 00000000000000..159ce078713440
--- /dev/null
+++ b/collectors/python.d.plugin/zscores/README.md
@@ -0,0 +1 @@
+integrations/python.d_zscores.md
\ No newline at end of file
diff --git a/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md b/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md
new file mode 100644
index 00000000000000..9d7d1c3d5ab0d8
--- /dev/null
+++ b/collectors/python.d.plugin/zscores/integrations/python.d_zscores.md
@@ -0,0 +1,195 @@
+
+
+# python.d zscores
+
+Plugin: python.d.plugin
+Module: zscores
+
+
+
+## Overview
+
+By using smoothed, rolling [Z-Scores](https://en.wikipedia.org/wiki/Standard_score) for selected metrics or charts you can narrow down your focus and shorten root cause analysis.
+
+
+This collector uses the [Netdata rest api](https://github.com/netdata/netdata/blob/master/web/api/README.md) to get the `mean` and `stddev`
+for each dimension on specified charts over a time range (defined by `train_secs` and `offset_secs`).
+
+For each dimension it will calculate a Z-Score as `z = (x - mean) / stddev` (clipped at `z_clip`). Scores are then smoothed over
+time (`z_smooth_n`) and, if `mode: 'per_chart'`, aggregated across dimensions to a smoothed, rolling chart level Z-Score at each time step.
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per python.d zscores instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| zscores.z | a dimension per chart or dimension | z |
+| zscores.3stddev | a dimension per chart or dimension | count |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Python Requirements
+
+This collector will only work with Python 3 and requires the below packages be installed.
+
+```bash
+# become netdata user
+sudo su -s /bin/bash netdata
+# install required packages
+pip3 install numpy pandas requests netdata-pandas==0.0.38
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `python.d/zscores.conf`.
+
+
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config python.d/zscores.conf
+```
+#### Options
+
+There are 2 sections:
+
+* Global variables
+* One or more JOBS that can define multiple different instances to monitor.
+
+The following options can be defined globally: priority, penalty, autodetection_retry, update_every, but can also be defined per JOB to override the global values.
+
+Additionally, the following collapsed table contains all the options that can be configured inside a JOB definition.
+
+Every configuration JOB starts with a `job_name` value which will appear in the dashboard, unless a `name` parameter is specified.
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| charts_regex | what charts to pull data for - A regex like `system\..*/` or `system\..*/apps.cpu/apps.mem` etc. | system\..* | yes |
+| train_secs | length of time (in seconds) to base calculations off for mean and stddev. | 14400 | yes |
+| offset_secs | offset (in seconds) preceding latest data to ignore when calculating mean and stddev. | 300 | yes |
+| train_every_n | recalculate the mean and stddev every n steps of the collector. | 900 | yes |
+| z_smooth_n | smooth the z score (to reduce sensitivity to spikes) by averaging it over last n values. | 15 | yes |
+| z_clip | cap absolute value of zscore (before smoothing) for better stability. | 10 | yes |
+| z_abs | set z_abs: 'true' to make all zscores be absolute values only. | true | yes |
+| burn_in | burn in period in which to initially calculate mean and stddev on every step. | 2 | yes |
+| mode | mode can be to get a zscore 'per_dim' or 'per_chart'. | per_chart | yes |
+| per_chart_agg | per_chart_agg is how you aggregate from dimension to chart when mode='per_chart'. | mean | yes |
+| update_every | Sets the default data collection frequency. | 5 | no |
+| priority | Controls the order of charts at the netdata dashboard. | 60000 | no |
+| autodetection_retry | Sets the job re-check interval in seconds. | 0 | no |
+| penalty | Indicates whether to apply penalty to update_every in case of failures. | yes | no |
+
+
+
+#### Examples
+
+##### Default
+
+Default configuration.
+
+```yaml
+local:
+ name: 'local'
+ host: '127.0.0.1:19999'
+ charts_regex: 'system\..*'
+ charts_to_exclude: 'system.uptime'
+ train_secs: 14400
+ offset_secs: 300
+ train_every_n: 900
+ z_smooth_n: 15
+ z_clip: 10
+ z_abs: 'true'
+ burn_in: 2
+ mode: 'per_chart'
+ per_chart_agg: 'mean'
+
+```
+
+
+## Troubleshooting
+
+### Debug Mode
+
+To troubleshoot issues with the `zscores` collector, run the `python.d.plugin` with the debug option enabled. The output
+should give you clues as to why the collector isn't working.
+
+- Navigate to the `plugins.d` directory, usually at `/usr/libexec/netdata/plugins.d/`. If that's not the case on
+ your system, open `netdata.conf` and look for the `plugins` setting under `[directories]`.
+
+ ```bash
+ cd /usr/libexec/netdata/plugins.d/
+ ```
+
+- Switch to the `netdata` user.
+
+ ```bash
+ sudo -u netdata -s
+ ```
+
+- Run the `python.d.plugin` to debug the collector:
+
+ ```bash
+ ./python.d.plugin zscores debug trace
+ ```
+
+
diff --git a/collectors/slabinfo.plugin/README.md b/collectors/slabinfo.plugin/README.md
deleted file mode 100644
index abcbe1e3fe926d..00000000000000
--- a/collectors/slabinfo.plugin/README.md
+++ /dev/null
@@ -1,36 +0,0 @@
-
-
-# slabinfo.plugin
-
-SLAB is a cache mechanism used by the Kernel to avoid fragmentation.
-
-Each internal structure (process, file descriptor, inode...) is stored within a SLAB.
-
-## configuring Netdata for slabinfo
-
-The plugin is disabled by default because it collects and displays a huge amount of metrics.
-To enable it set `slabinfo = yes` in the `plugins` section of the `netdata.conf` configuration file.
-
-If you are using [our official native DEB/RPM packages](https://github.com/netdata/netdata/blob/master/packaging/installer/methods/packages.md), you will additionally need to install the `netdata-plugin-slabinfo`
-package using your system package manager.
-
-There is currently no configuration needed for the plugin itself.
-
-As `/proc/slabinfo` is only readable by root, this plugin is setuid root.
-
-## For what use
-
-This slabinfo details allows to have clues on actions done on your system.
-In the following screenshot, you can clearly see a `find` done on a ext4 filesystem (the number of `ext4_inode_cache` & `dentry` are rising fast), and a few seconds later, an admin issued a `echo 3 > /proc/sys/vm/drop_cached` as their count dropped.
-
-
-
-
-
diff --git a/collectors/slabinfo.plugin/README.md b/collectors/slabinfo.plugin/README.md
new file mode 120000
index 00000000000000..4d4629a77f86a2
--- /dev/null
+++ b/collectors/slabinfo.plugin/README.md
@@ -0,0 +1 @@
+integrations/linux_kernel_slab_allocator_statistics.md
\ No newline at end of file
diff --git a/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md b/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md
new file mode 100644
index 00000000000000..ce8115270b076c
--- /dev/null
+++ b/collectors/slabinfo.plugin/integrations/linux_kernel_slab_allocator_statistics.md
@@ -0,0 +1,131 @@
+
+
+# Linux kernel SLAB allocator statistics
+
+
+
+
+
+Plugin: slabinfo.plugin
+Module: slabinfo.plugin
+
+
+
+## Overview
+
+Collects metrics on kernel SLAB cache utilization to monitor the low-level performance impact of workloads in the kernel.
+
+
+The plugin parses `/proc/slabinfo`
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector only supports collecting metrics from a single instance of this integration.
+
+This integration requires read access to `/proc/slabinfo`, which is accessible only to the root user by default. Netdata uses Linux Capabilities to give the plugin access to this file. `CAP_DAC_READ_SEARCH` is added automatically during installation. This capability allows bypassing file read permission checks and directory read and execute permission checks. If file capabilities are not usable, then the plugin is instead installed with the SUID bit set in permissions sVko that it runs as root.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+Due to the large number of metrics generated by this integration, it is disabled by default and must be manually enabled inside `/etc/netdata/netdata.conf`
+
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+SLAB cache utilization metrics for the whole system.
+
+### Per Linux kernel SLAB allocator statistics instance
+
+
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| mem.slabmemory | a dimension per cache | B |
+| mem.slabfilling | a dimension per cache | % |
+| mem.slabwaste | a dimension per cache | B |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Minimum setup
+
+If you installed `netdata` using a package manager, it is also necessary to install the package `netdata-plugin-slabinfo`.
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugins]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+The main configuration file.
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| Enable plugin | As described above plugin is disabled by default, this option is used to enable plugin. | no | yes |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/slabinfo.plugin/metadata.yaml b/collectors/slabinfo.plugin/metadata.yaml
index 7d135d611a5579..f19778297db64f 100644
--- a/collectors/slabinfo.plugin/metadata.yaml
+++ b/collectors/slabinfo.plugin/metadata.yaml
@@ -50,7 +50,9 @@ modules:
description: ""
setup:
prerequisites:
- list: []
+ list:
+ - title: Minimum setup
+ description: "If you installed `netdata` using a package manager, it is also necessary to install the package `netdata-plugin-slabinfo`."
configuration:
file:
name: "netdata.conf"
diff --git a/collectors/slabinfo.plugin/slabinfo.c b/collectors/slabinfo.plugin/slabinfo.c
index 25b96e386ef9e0..9b9119a6ea1415 100644
--- a/collectors/slabinfo.plugin/slabinfo.c
+++ b/collectors/slabinfo.plugin/slabinfo.c
@@ -336,12 +336,11 @@ void usage(void) {
}
int main(int argc, char **argv) {
- stderror = stderr;
clocks_init();
+ nd_log_initialize_for_external_plugins("slabinfo.plugin");
program_name = argv[0];
program_version = "0.1";
- error_log_syslog = 0;
int update_every = 1, i, n, freq = 0;
diff --git a/collectors/statsd.plugin/README.md b/collectors/statsd.plugin/README.md
index dd74923ec03414..e3c8f9f81f1c40 100644
--- a/collectors/statsd.plugin/README.md
+++ b/collectors/statsd.plugin/README.md
@@ -36,7 +36,7 @@ Netdata ships with a few synthetic chart definitions to automatically present ap
more uniform way. These synthetic charts are configuration files (you can create your own) that re-arrange
statsd metrics into a more meaningful way.
-On synthetic charts, we can have alarms as with any metric and chart.
+On synthetic charts, we can have alerts as with any metric and chart.
- [K6 load testing tool](https://k6.io)
- **Description:** k6 is a developer-centric, free and open-source load testing tool built for making performance testing a productive and enjoyable experience.
@@ -173,8 +173,8 @@ You can find the configuration at `/etc/netdata/netdata.conf`:
# update every (flushInterval) = 1
# udp messages to process at once = 10
# create private charts for metrics matching = *
- # max private charts allowed = 200
# max private charts hard limit = 1000
+ # cleanup obsolete charts after secs = 0
# private charts memory mode = save
# private charts history = 3996
# histograms and timers percentile (percentThreshold) = 95.00000
@@ -234,13 +234,11 @@ The default behavior is to use the same settings as the rest of the Netdata Agen
- `private charts memory mode`
- `private charts history`
-### Optimize private metric charts visualization and storage
+### Optimize private metric charts storage
-If you have thousands of metrics, each with its own private chart, you may notice that your web browser becomes slow when you view the Netdata dashboard (this is a web browser issue we need to address at the Netdata UI). So, Netdata has a protection to stop creating charts when `max private charts allowed = 200` (soft limit) is reached.
+For optimization reasons, Netdata imposes a hard limit on private metric charts. The limit is set via the `max private charts hard limit` setting (which defaults to 1000 charts). Metrics above this hard limit are still collected, but they can only be used in synthetic charts (once a metric is added to chart, it will be sent to backend servers too).
-The metrics above this soft limit are still processed by Netdata, can be used in synthetic charts and will be available to be sent to backend time-series databases, up to `max private charts hard limit = 1000`. So, between 200 and 1000 charts, Netdata will still generate charts, but they will automatically be created with `memory mode = none` (Netdata will not maintain a database for them). These metrics will be sent to backend time series databases, if the backend configuration is set to `as collected`.
-
-Metrics above the hard limit are still collected, but they can only be used in synthetic charts (once a metric is added to chart, it will be sent to backend servers too).
+If you have many ephemeral metrics collected (i.e. that you collect values for a certain amount of time), you can set the configuration option `set charts as obsolete after secs`. Setting a value in seconds here, means that Netdata will mark those metrics (and their private charts) as obsolete after the specified time has passed since the last sent metric value. Those charts will later be deleted according to the setting in `cleanup obsolete charts after secs`. Setting `set charts as obsolete after secs` to 0 (which is also the default value) will disable this functionality.
Example private charts (automatically generated without any configuration):
@@ -348,11 +346,11 @@ Using the above configuration `myapp` should get its own section on the dashboar
- `gaps when not collected = yes|no`, enables or disables gaps on the charts of the application in case that no metrics are collected.
- `memory mode` sets the memory mode for all charts of the application. The default is the global default for Netdata (not the global default for StatsD private charts). We suggest not to use this (we have commented it out in the example) and let your app use the global default for Netdata, which is our dbengine.
-- `history` sets the size of the round robin database for this application. The default is the global default for Netdata (not the global default for StatsD private charts). This is only relevant if you use `memory mode = save`. Read more on our [metrics storage(]/docs/store/change-metrics-storage.md) doc.
+- `history` sets the size of the round-robin database for this application. The default is the global default for Netdata (not the global default for StatsD private charts). This is only relevant if you use `memory mode = save`. Read more on our [metrics storage(]/docs/store/change-metrics-storage.md) doc.
`[dictionary]` defines name-value associations. These are used to renaming metrics, when added to synthetic charts. Metric names are also defined at each `dimension` line. However, using the dictionary dimension names can be declared globally, for each app and is the only way to rename dimensions when using patterns. Of course the dictionary can be empty or missing.
-Then, add any number of charts. Each chart should start with `[id]`. The chart will be called `app_name.id`. `family` controls the submenu on the dashboard. `context` controls the alarm templates. `priority` controls the ordering of the charts on the dashboard. The rest of the settings are informational.
+Then, add any number of charts. Each chart should start with `[id]`. The chart will be called `app_name.id`. `family` controls the submenu on the dashboard. `context` controls the alert templates. `priority` controls the ordering of the charts on the dashboard. The rest of the settings are informational.
Add any number of metrics to a chart, using `dimension` lines. These lines accept 5 space separated parameters:
@@ -361,7 +359,7 @@ Add any number of metrics to a chart, using `dimension` lines. These lines accep
3. an optional selector (type) of the value to shown (see below)
4. an optional multiplier
5. an optional divider
-6. optional flags, space separated and enclosed in quotes. All the external plugins `DIMENSION` flags can be used. Currently the only usable flag is `hidden`, to add the dimension, but not show it on the dashboard. This is usually needed to have the values available for percentage calculation, or use them in alarms.
+6. optional flags, space separated and enclosed in quotes. All the external plugins `DIMENSION` flags can be used. Currently, the only usable flag is `hidden`, to add the dimension, but not show it on the dashboard. This is usually needed to have the values available for percentage calculation, or use them in alerts.
So, the format is this:
@@ -439,7 +437,7 @@ Use the dictionary in 2 ways:
1. set `dimension = myapp.metric1 ''` and have at the dictionary `myapp.metric1 = metric1 name`
2. set `dimension = myapp.metric1 'm1'` and have at the dictionary `m1 = metric1 name`
-In both cases, the dimension will be added with ID `myapp.metric1` and will be named `metric1 name`. So, in alarms use either of the 2 as `${myapp.metric1}` or `${metric1 name}`.
+In both cases, the dimension will be added with ID `myapp.metric1` and will be named `metric1 name`. So, in alerts use either of the 2 as `${myapp.metric1}` or `${metric1 name}`.
> keep in mind that if you add multiple times the same StatsD metric to a chart, Netdata will append `TYPE` to the dimension ID, so `myapp.metric1` will be added as `myapp.metric1_last` or `myapp.metric1_events`, etc. If you add multiple times the same metric with the same `TYPE` to a chart, Netdata will also append an incremental counter to the dimension ID, i.e. `myapp.metric1_last1`, `myapp.metric1_last2`, etc.
diff --git a/collectors/statsd.plugin/statsd.c b/collectors/statsd.plugin/statsd.c
index 5422d2905956e3..9cc3a9d97952b8 100644
--- a/collectors/statsd.plugin/statsd.c
+++ b/collectors/statsd.plugin/statsd.c
@@ -95,6 +95,7 @@ typedef enum __attribute__((packed)) statsd_metric_options {
STATSD_METRIC_OPTION_USEFUL = 0x00000080, // set when the charting thread finds the metric useful (i.e. used in a chart)
STATSD_METRIC_OPTION_COLLECTION_FULL_LOGGED = 0x00000100, // set when the collection is full for this metric
STATSD_METRIC_OPTION_UPDATED_CHART_METADATA = 0x00000200, // set when the private chart metadata have been updated via tags
+ STATSD_METRIC_OPTION_OBSOLETE = 0x00004000, // set when the metric is obsoleted
} STATS_METRIC_OPTIONS;
typedef enum __attribute__((packed)) statsd_metric_type {
@@ -117,6 +118,7 @@ typedef struct statsd_metric {
// metadata about data collection
collected_number events; // the number of times this metric has been collected (never resets)
uint32_t count; // the number of times this metric has been collected since the last flush
+ time_t last_collected; // timestamp of the last incoming value
// the actual collected data
union {
@@ -268,6 +270,7 @@ static struct statsd {
collected_number decimal_detail;
uint32_t private_charts;
uint32_t max_private_charts_hard;
+ uint32_t set_obsolete_after;
STATSD_APP *apps;
uint32_t recvmmsg_size;
@@ -476,6 +479,16 @@ static inline int value_is_zinit(const char *value) {
#define is_metric_checked(m) ((m)->options & STATSD_METRIC_OPTION_CHECKED)
#define is_metric_useful_for_collection(m) (!is_metric_checked(m) || ((m)->options & STATSD_METRIC_OPTION_USEFUL))
+static inline void metric_update_counters_and_obsoletion(STATSD_METRIC *m) {
+ m->events++;
+ m->count++;
+ m->last_collected = now_realtime_sec();
+ if (m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE))) {
+ rrdset_isnot_obsolete___safe_from_collector_thread(m->st);
+ m->options &= ~STATSD_METRIC_OPTION_OBSOLETE;
+ }
+}
+
static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, const char *sampling) {
if(!is_metric_useful_for_collection(m)) return;
@@ -498,8 +511,7 @@ static inline void statsd_process_gauge(STATSD_METRIC *m, const char *value, con
else
m->gauge.value = statsd_parse_float(value, 1.0);
- m->events++;
- m->count++;
+ metric_update_counters_and_obsoletion(m);
}
}
@@ -516,8 +528,7 @@ static inline void statsd_process_counter_or_meter(STATSD_METRIC *m, const char
else {
m->counter.value += llrintndd((NETDATA_DOUBLE) statsd_parse_int(value, 1) / statsd_parse_sampling_rate(sampling));
- m->events++;
- m->count++;
+ metric_update_counters_and_obsoletion(m);
}
}
@@ -559,8 +570,7 @@ static inline void statsd_process_histogram_or_timer(STATSD_METRIC *m, const cha
m->histogram.ext->values[m->histogram.ext->used++] = v;
}
- m->events++;
- m->count++;
+ metric_update_counters_and_obsoletion(m);
}
}
@@ -597,8 +607,7 @@ static inline void statsd_process_set(STATSD_METRIC *m, const char *value) {
#else
dictionary_set(m->set.dict, value, NULL, 0);
#endif
- m->events++;
- m->count++;
+ metric_update_counters_and_obsoletion(m);
}
}
@@ -630,8 +639,7 @@ static inline void statsd_process_dictionary(STATSD_METRIC *m, const char *value
}
t->count++;
- m->events++;
- m->count++;
+ metric_update_counters_and_obsoletion(m);
}
}
@@ -1627,6 +1635,9 @@ static inline RRDSET *statsd_private_rrdset_create(
static inline void statsd_private_chart_gauge(STATSD_METRIC *m) {
netdata_log_debug(D_STATSD, "updating private chart for gauge metric '%s'", m->name);
+ if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE)))
+ return;
+
if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) {
m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA;
@@ -1667,6 +1678,9 @@ static inline void statsd_private_chart_gauge(STATSD_METRIC *m) {
static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) {
netdata_log_debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name);
+ if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE)))
+ return;
+
if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) {
m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA;
@@ -1707,6 +1721,9 @@ static inline void statsd_private_chart_counter_or_meter(STATSD_METRIC *m, const
static inline void statsd_private_chart_set(STATSD_METRIC *m) {
netdata_log_debug(D_STATSD, "updating private chart for set metric '%s'", m->name);
+ if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE)))
+ return;
+
if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) {
m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA;
@@ -1747,6 +1764,9 @@ static inline void statsd_private_chart_set(STATSD_METRIC *m) {
static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) {
netdata_log_debug(D_STATSD, "updating private chart for dictionary metric '%s'", m->name);
+ if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE)))
+ return;
+
if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) {
m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA;
@@ -1790,6 +1810,9 @@ static inline void statsd_private_chart_dictionary(STATSD_METRIC *m) {
static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) {
netdata_log_debug(D_STATSD, "updating private chart for %s metric '%s'", dim, m->name);
+ if(m->st && unlikely(rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE)))
+ return;
+
if(unlikely(!m->st || m->options & STATSD_METRIC_OPTION_UPDATED_CHART_METADATA)) {
m->options &= ~STATSD_METRIC_OPTION_UPDATED_CHART_METADATA;
@@ -1842,6 +1865,16 @@ static inline void statsd_private_chart_timer_or_histogram(STATSD_METRIC *m, con
// --------------------------------------------------------------------------------------------------------------------
// statsd flush metrics
+static inline void metric_check_obsoletion(STATSD_METRIC *m) {
+ if(statsd.set_obsolete_after &&
+ !rrdset_flag_check(m->st, RRDSET_FLAG_OBSOLETE) &&
+ m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED &&
+ m->last_collected + statsd.set_obsolete_after < now_realtime_sec()) {
+ rrdset_is_obsolete___safe_from_collector_thread(m->st);
+ m->options |= STATSD_METRIC_OPTION_OBSOLETE;
+ }
+}
+
static inline void statsd_flush_gauge(STATSD_METRIC *m) {
netdata_log_debug(D_STATSD, "flushing gauge metric '%s'", m->name);
@@ -1855,6 +1888,8 @@ static inline void statsd_flush_gauge(STATSD_METRIC *m) {
if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED))))
statsd_private_chart_gauge(m);
+
+ metric_check_obsoletion(m);
}
static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *dim, const char *family) {
@@ -1870,6 +1905,8 @@ static inline void statsd_flush_counter_or_meter(STATSD_METRIC *m, const char *d
if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED))))
statsd_private_chart_counter_or_meter(m, dim, family);
+
+ metric_check_obsoletion(m);
}
static inline void statsd_flush_counter(STATSD_METRIC *m) {
@@ -1896,6 +1933,8 @@ static inline void statsd_flush_set(STATSD_METRIC *m) {
if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED))))
statsd_private_chart_set(m);
+
+ metric_check_obsoletion(m);
}
static inline void statsd_flush_dictionary(STATSD_METRIC *m) {
@@ -1924,6 +1963,8 @@ static inline void statsd_flush_dictionary(STATSD_METRIC *m) {
dictionary_entries(m->dictionary.dict));
}
}
+
+ metric_check_obsoletion(m);
}
static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char *dim, const char *family, const char *units) {
@@ -1977,6 +2018,8 @@ static inline void statsd_flush_timer_or_histogram(STATSD_METRIC *m, const char
if(unlikely(m->options & STATSD_METRIC_OPTION_PRIVATE_CHART_ENABLED && (updated || !(m->options & STATSD_METRIC_OPTION_SHOW_GAPS_WHEN_NOT_COLLECTED))))
statsd_private_chart_timer_or_histogram(m, dim, family, units);
+
+ metric_check_obsoletion(m);
}
static inline void statsd_flush_timer(STATSD_METRIC *m) {
@@ -2283,7 +2326,7 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_
if(unlikely(is_metric_checked(m))) break;
if(unlikely(!(m->options & STATSD_METRIC_OPTION_CHECKED_IN_APPS))) {
- netdata_log_access("NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name);
+ nd_log(NDLS_ACCESS, NDLP_DEBUG, "NEW STATSD METRIC '%s': '%s'", statsd_metric_type_string(m->type), m->name);
check_if_metric_is_for_app(index, m);
m->options |= STATSD_METRIC_OPTION_CHECKED_IN_APPS;
}
@@ -2326,8 +2369,20 @@ static inline void statsd_flush_index_metrics(STATSD_INDEX *index, void (*flush_
dfe_done(m);
// flush all the useful metrics
- for(m = index->first_useful; m ; m = m->next_useful) {
+ STATSD_METRIC *m_prev;
+ for(m_prev = m = index->first_useful; m ; m = m->next_useful) {
flush_metric(m);
+ if (m->options & STATSD_METRIC_OPTION_OBSOLETE) {
+ if (m == index->first_useful)
+ index->first_useful = m->next_useful;
+ else
+ m_prev->next_useful = m->next_useful;
+ dictionary_del(index->dict, m->name);
+ index->useful--;
+ index->metrics--;
+ statsd.private_charts--;
+ } else
+ m_prev = m;
}
}
@@ -2447,6 +2502,7 @@ void *statsd_main(void *ptr) {
config_get(CONFIG_SECTION_STATSD, "create private charts for metrics matching", "*"), NULL,
SIMPLE_PATTERN_EXACT, true);
statsd.max_private_charts_hard = (size_t)config_get_number(CONFIG_SECTION_STATSD, "max private charts hard limit", (long long)statsd.max_private_charts_hard);
+ statsd.set_obsolete_after = (size_t)config_get_number(CONFIG_SECTION_STATSD, "set charts as obsolete after secs", (long long)statsd.set_obsolete_after);
statsd.decimal_detail = (collected_number)config_get_number(CONFIG_SECTION_STATSD, "decimal detail", (long long int)statsd.decimal_detail);
statsd.tcp_idle_timeout = (size_t) config_get_number(CONFIG_SECTION_STATSD, "disconnect idle tcp clients after seconds", (long long int)statsd.tcp_idle_timeout);
statsd.private_charts_hidden = (unsigned int)config_get_boolean(CONFIG_SECTION_STATSD, "private charts hidden", statsd.private_charts_hidden);
@@ -2458,7 +2514,7 @@ void *statsd_main(void *ptr) {
}
{
char buffer[314 + 1];
- snprintfz(buffer, 314, "%0.1f%%", statsd.histogram_percentile);
+ snprintfz(buffer, sizeof(buffer) - 1, "%0.1f%%", statsd.histogram_percentile);
statsd.histogram_percentile_str = strdupz(buffer);
}
diff --git a/collectors/systemd-journal.plugin/Makefile.am b/collectors/systemd-journal.plugin/Makefile.am
index fd8f4ab2166c06..48f667c1bc03a7 100644
--- a/collectors/systemd-journal.plugin/Makefile.am
+++ b/collectors/systemd-journal.plugin/Makefile.am
@@ -5,6 +5,11 @@ MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
dist_noinst_DATA = \
README.md \
+ systemd-journal-self-signed-certs.sh \
+ forward_secure_sealing.md \
+ active_journal_centralization_guide_no_encryption.md \
+ passive_journal_centralization_guide_no_encryption.md \
+ passive_journal_centralization_guide_self_signed_certs.md \
$(NULL)
dist_libconfig_DATA = \
diff --git a/collectors/systemd-journal.plugin/README.md b/collectors/systemd-journal.plugin/README.md
index e69de29bb2d1d6..c3c639045d7f30 100644
--- a/collectors/systemd-journal.plugin/README.md
+++ b/collectors/systemd-journal.plugin/README.md
@@ -0,0 +1,472 @@
+
+# `systemd` journal plugin
+
+[KEY FEATURES](#key-features) | [JOURNAL SOURCES](#journal-sources) | [JOURNAL FIELDS](#journal-fields) |
+[PLAY MODE](#play-mode) | [FULL TEXT SEARCH](#full-text-search) | [PERFORMANCE](#query-performance) |
+[CONFIGURATION](#configuration-and-maintenance) | [FAQ](#faq)
+
+The `systemd` journal plugin by Netdata makes viewing, exploring and analyzing `systemd` journal logs simple and
+efficient.
+It automatically discovers available journal sources, allows advanced filtering, offers interactive visual
+representations and supports exploring the logs of both individual servers and the logs on infrastructure wide
+journal centralization servers.
+
+
+
+## Key features
+
+- Works on both **individual servers** and **journal centralization servers**.
+- Supports `persistent` and `volatile` journals.
+- Supports `system`, `user`, `namespaces` and `remote` journals.
+- Allows filtering on **any journal field** or **field value**, for any time-frame.
+- Allows **full text search** (`grep`) on all journal fields, for any time-frame.
+- Provides a **histogram** for log entries over time, with a break down per field-value, for any field and any
+ time-frame.
+- Works directly on journal files, without any other third-party components.
+- Supports coloring log entries, the same way `journalctl` does.
+- In PLAY mode provides the same experience as `journalctl -f`, showing new log entries immediately after they are
+ received.
+
+### Prerequisites
+
+`systemd-journal.plugin` is a Netdata Function Plugin.
+
+To protect your privacy, as with all Netdata Functions, a free Netdata Cloud user account is required to access it.
+For more information check [this discussion](https://github.com/netdata/netdata/discussions/16136).
+
+### Limitations
+
+#### Plugin availability
+
+The following are limitations related to the availability of the plugin:
+
+- Netdata versions prior to 1.44 shipped in a docker container do not include this plugin.
+ The problem is that `libsystemd` is not available in Alpine Linux (there is a `libsystemd`, but it is a dummy that
+ returns failure on all calls). Starting with Netdata version 1.44, Netdata containers use a Debian base image
+ making this plugin available when Netdata is running in a container.
+- For the same reason (lack of `systemd` support for Alpine Linux), the plugin is not available on `static` builds of
+ Netdata (which are based on `muslc`, not `glibc`). If your Netdata is installed in `/opt/netdata` you most likely have
+ a static build of Netdata.
+- On old systemd systems (like Centos 7), the plugin runs always in "full data query" mode, which makes it slower. The
+ reason, is that systemd API is missing some important calls we need to use the field indexes of `systemd` journal.
+ However, when running in this mode, the plugin offers also negative matches on the data (like filtering for all logs
+ that do not have set some field), and this is the reason "full data query" mode is also offered as an option even on
+ newer versions of `systemd`.
+
+#### `systemd` journal features
+
+The following are limitations related to the features of `systemd` journal:
+
+- This plugin assumes that binary field values are text fields with newlines in them. `systemd-journal` has the ability
+ to support binary fields, without specifying the nature of the binary data. However, binary fields are commonly used
+ to store log entries that include multiple lines of text. The plugin treats all binary fields are multi-line text.
+- This plugin does not support multiple values per field for any given log entry. `systemd` journal has the ability to
+ accept the same field key, multiple times, with multiple values on a single log entry. This plugin will present the
+ last value and ignore the others for this log entry.
+- This plugin will only read journal files located in `/var/log/journal` or `/run/log/journal`. `systemd-journal-remote` has the
+ ability to store journal files anywhere (user configured). If journal files are not located in `/var/log/journal`
+ or `/run/log/journal` (and any of their subdirectories), the plugin will not find them. A simple solution is to link
+ the other directories somewhere inside `/var/log/journal`. The plugin will pick them up, even if a sub-directory of
+ `/var/log/journal` is a link to a directory outside `/var/log/journal`.
+
+Other than the above, this plugin supports all features of `systemd` journals.
+
+## Journal Sources
+
+The plugin automatically detects the available journal sources, based on the journal files available in
+`/var/log/journal` (persistent logs) and `/run/log/journal` (volatile logs).
+
+
+
+The plugin, by default, merges all journal sources together, to provide a unified view of all log messages available.
+
+> To improve query performance, we recommend selecting the relevant journal source, before doing more analysis on the
+> logs.
+
+### `system` journals
+
+`system` journals are the default journals available on all `systemd` based systems.
+
+`system` journals contain:
+
+- kernel log messages (via `kmsg`),
+- audit records, originating from the kernel audit subsystem,
+- messages received by `systemd-journald` via `syslog`,
+- messages received via the standard output and error of service units,
+- structured messages received via the native journal API.
+
+### `user` journals
+
+Unlike `journalctl`, the Netdata plugin allows viewing, exploring and querying the journal files of **all users**.
+
+By default, each user, with a UID outside the range of system users (0 - 999), dynamic service users,
+and the nobody user (65534), will get their own set of `user` journal files. For more information about
+this policy check [Users, Groups, UIDs and GIDs on systemd Systems](https://systemd.io/UIDS-GIDS/).
+
+Keep in mind that `user` journals are merged with the `system` journals when they are propagated to a journal
+centralization server. So, at the centralization server, the `remote` journals contain both the `system` and `user`
+journals of the sender.
+
+### `namespaces` journals
+
+The plugin auto-detects the namespaces available and provides a list of all namespaces at the "sources" list on the UI.
+
+Journal namespaces are both a mechanism for logically isolating the log stream of projects consisting
+of one or more services from the rest of the system and a mechanism for improving performance.
+
+`systemd` service units may be assigned to a specific journal namespace through the `LogNamespace=` unit file setting.
+
+Keep in mind that namespaces require special configuration to be propagated to a journal centralization server.
+This makes them a little more difficult to handle, from the administration perspective.
+
+### `remote` journals
+
+Remote journals are created by `systemd-journal-remote`. This `systemd` feature allows creating logs centralization
+points within your infrastructure, based exclusively on `systemd`.
+
+Usually `remote` journals are named by the IP of the server sending these logs. The Netdata plugin automatically
+extracts these IPs and performs a reverse DNS lookup to find their hostnames. When this is successful,
+`remote` journals are named by the hostnames of the origin servers.
+
+For information about configuring a journal centralization server,
+check [this FAQ item](#how-do-i-configure-a-journal-centralization-server).
+
+## Journal Fields
+
+`systemd` journals are designed to support multiple fields per log entry. The power of `systemd` journals is that,
+unlike other log management systems, it supports dynamic and variable fields for each log message,
+while all fields and their values are indexed for fast querying.
+
+This means that each application can log messages annotated with its own unique fields and values, and `systemd`
+journals will automatically index all of them, without any configuration or manual action.
+
+For a description of the most frequent fields found in `systemd` journals, check `man systemd.journal-fields`.
+
+Fields found in the journal files are automatically added to the UI in multiple places to help you explore
+and filter the data.
+
+The plugin automatically enriches certain fields to make them more user-friendly:
+
+- `_BOOT_ID`: the hex value is annotated with the timestamp of the first message encountered for this boot id.
+- `PRIORITY`: the numeric value is replaced with the human-readable name of each priority.
+- `SYSLOG_FACILITY`: the encoded value is replaced with the human-readable name of each facility.
+- `ERRNO`: the numeric value is annotated with the short name of each value.
+- `_UID` `_AUDIT_LOGINUID`, `_SYSTEMD_OWNER_UID`, `OBJECT_UID`, `OBJECT_SYSTEMD_OWNER_UID`, `OBJECT_AUDIT_LOGINUID`:
+ the local user database is consulted to annotate them with usernames.
+- `_GID`, `OBJECT_GID`: the local group database is consulted to annotate them with group names.
+- `_CAP_EFFECTIVE`: the encoded value is annotated with a human-readable list of the linux capabilities.
+- `_SOURCE_REALTIME_TIMESTAMP`: the numeric value is annotated with human-readable datetime in UTC.
+- `MESSAGE_ID`: for the known `MESSAGE_ID`s, the value is replaced with the well known name of the event.
+
+The values of all other fields are presented as found in the journals.
+
+> IMPORTANT:
+> The UID and GID annotations are added during presentation and are taken from the server running the plugin.
+> For `remote` sources, the names presented may not reflect the actual user and group names on the origin server.
+> The numeric value will still be visible though, as-is on the origin server.
+
+The annotations are not searchable with full-text search. They are only added for the presentation of the fields.
+
+### Journal fields as columns in the table
+
+All journal fields available in the journal files are offered as columns on the UI. Use the gear button above the table:
+
+
+
+### Journal fields as additional info to each log entry
+
+When you click a log line, the `info` sidebar will open on the right of the screen, to provide the full list of fields
+related to this log line. You can close this `info` sidebar, by selecting the filter icon at its top.
+
+
+
+### Journal fields as filters
+
+The plugin presents a select list of fields as filters to the query, with counters for each of the possible values
+for the field. This list can used to quickly check which fields and values are available for the entire time-frame
+of the query.
+
+Internally the plugin has:
+
+1. A white-list of fields, to be presented as filters.
+2. A black-list of fields, to prevent them from becoming filters. This list includes fields with a very high
+ cardinality, like timestamps, unique message ids, etc. This is mainly for protecting the server's performance,
+ to avoid building in memory indexes for the fields that almost each of their values is unique.
+
+Keep in mind that the values presented in the filters, and their sorting is affected by the "full data queries"
+setting:
+
+
+
+When "full data queries" is off, empty values are hidden and cannot be selected. This is due to a limitation of
+`libsystemd` that does not allow negative or empty matches. Also, values with zero counters may appear in the list.
+
+When "full data queries" is on, Netdata is applying all filtering to the data (not `libsystemd`), but this means
+that all the data of the entire time-frame, without any filtering applied, have to be read by the plugin to prepare
+the response required. So, "full data queries" can be significantly slower over long time-frames.
+
+### Journal fields as histogram sources
+
+The plugin presents a histogram of the number of log entries across time.
+
+The data source of this histogram can be any of the fields that are available as filters.
+For each of the values this field has, across the entire time-frame of the query, the histogram will get corresponding
+dimensions, showing the number of log entries, per value, over time.
+
+The granularity of the histogram is adjusted automatically to have about 150 columns visible on screen.
+
+The histogram presented by the plugin is interactive:
+
+- **Zoom**, either with the global date-time picker, or the zoom tool in the histogram's toolbox.
+- **Pan**, either with global date-time picker, or by dragging with the mouse the chart to the left or the right.
+- **Click**, to quickly jump to the highlighted point in time in the log entries.
+
+
+
+## PLAY mode
+
+The plugin supports PLAY mode, to continuously update the screen with new log entries found in the journal files.
+Just hit the "play" button at the top of the Netdata dashboard screen.
+
+On centralized log servers, PLAY mode provides a unified view of all the new logs encountered across the entire
+infrastructure,
+from all hosts sending logs to the central logs server via `systemd-remote`.
+
+## Full-text search
+
+The plugin supports searching for any text on all fields of the log entries.
+
+Full text search is combined with the selected filters.
+
+The text box accepts asterisks `*` as wildcards. So, `a*b*c` means match anything that contains `a`, then `b` and
+then `c` with anything between them.
+
+Spaces are treated as OR expressions. So that `a*b c*d` means `a*b OR c*d`.
+
+Negative expressions are supported, by prefixing any string with `!`. Example: `!systemd *` means match anything that
+does not contain `systemd` on any of its fields.
+
+## Query performance
+
+Journal files are designed to be accessed by multiple readers and one writer, concurrently.
+
+Readers (like this Netdata plugin), open the journal files and `libsystemd`, behind the scenes, maps regions
+of the files into memory, to satisfy each query.
+
+On logs aggregation servers, the performance of the queries depend on the following factors:
+
+1. The **number of files** involved in each query.
+
+ This is why we suggest to select a source when possible.
+
+2. The **speed of the disks** hosting the journal files.
+
+ Journal files perform a lot of reading while querying, so the fastest the disks, the faster the query will finish.
+
+3. The **memory available** for caching parts of the files.
+
+ Increased memory will help the kernel cache the most frequently used parts of the journal files, avoiding disk I/O
+ and speeding up queries.
+
+4. The **number of filters** applied.
+
+ Queries are significantly faster when just a few filters are selected.
+
+In general, for a faster experience, **keep a low number of rows within the visible timeframe**.
+
+Even on long timeframes, selecting a couple of filters that will result in a **few dozen thousand** log entries
+will provide fast / rapid responses, usually less than a second. To the contrary, viewing timeframes with **millions
+of entries** may result in longer delays.
+
+The plugin aborts journal queries when your browser cancels inflight requests. This allows you to work on the UI
+while there are background queries running.
+
+At the time of this writing, this Netdata plugin is about 25-30 times faster than `journalctl` on queries that access
+multiple journal files, over long time-frames.
+
+During the development of this plugin, we submitted, to `systemd`, a number of patches to improve `journalctl`
+performance by a factor of 14:
+
+-
+-
+-
+
+However, even after these patches are merged, `journalctl` will still be 2x slower than this Netdata plugin,
+on multi-journal queries.
+
+The problem lies in the way `libsystemd` handles multi-journal file queries. To overcome this problem,
+the Netdata plugin queries each file individually and it then it merges the results to be returned.
+This is transparent, thanks to the `facets` library in `libnetdata` that handles on-the-fly indexing, filtering,
+and searching of any dataset, independently of its source.
+
+## Performance at scale
+
+On busy logs servers, or when querying long timeframes that match millions of log entries, the plugin has a sampling
+algorithm to allow it respond promptly. It works like this:
+
+1. The latest 500k log entries are queried in full, evaluating all the fields of every single log entry. This evaluation
+ allows counting the unique values per field, updating the counters next to each value at the filters section of the
+ dashboard.
+2. When the latest 500k log entries have been processed and there are more data to read, the plugin divides evenly 500k
+ more log entries to the number of journal files matched by the query. So, it will continue to evaluate all the fields
+ of all log entries, up to the budget per file, aiming to fully query 1 million log entries in total.
+3. When the budget is hit for a given file, the plugin continues to scan log entries, but this time it does not evaluate
+ the fields and their values, so the counters per field and value are not updated. These unsampled log entries are
+ shown in the histogram with the label `[unsampled]`.
+4. The plugin continues to count `[unsampled]` entries until as many as sampled entries have been evaluated and at least
+ 1% of the journal file has been processed.
+5. When the `[unsampled]` budget is exhausted, the plugin stops processing the journal file and based on the processing
+ completed so far and the number of entries in the journal file, it estimates the remaining number of log entries in
+ that file. This is shown as `[estimated]` at the histogram.
+6. In systemd versions 254 or later, the plugin fetches the unique sequence number of each log entry and calculates the
+ the percentage of the file matched by the query, versus the total number of the log entries in the journal file.
+7. In systemd versions prior to 254, the plugin estimates the number of entries the journal file contributes to the
+ query, using the amount of log entries matched it vs. the total duration the log file has entries for.
+
+The above allow the plugin to respond promptly even when the number of log entries in the journal files is several
+dozens millions, while providing accurate estimations of the log entries over time at the histogram and enough counters
+at the fields filtering section to help users get an overview of the whole timeframe.
+
+The fact that the latest 500k log entries and 1% of all journal files (which are spread over time) have been fully
+evaluated, including counting the number of appearances for each field value, the plugin usually provides an accurate
+representation of the whole timeframe.
+
+Keep in mind that although the plugin is quite effective and responds promptly when there are hundreds of journal files
+matching a query, response times may be longer when there are several thousands of smaller files. systemd versions 254+
+attempt to solve this problem by allowing `systemd-journal-remote` to create larger files. However, for systemd
+versions prior to 254, `systemd-journal-remote` creates files of up to 32MB each, which when running very busy
+journals centralization servers aggregating several thousands of log entries per second, the number of files can grow
+to several dozens of thousands quickly. In such setups, the plugin should ideally skip processing journal files
+entirely, relying solely on the estimations of the sequence of files each file is part of. However, this has not been
+implemented yet. To improve the query performance in such setups, the user has to query smaller timeframes.
+
+Another optimization taking place in huge journal centralization points, is the initial scan of the database. The plugin
+needs to know the list of all journal files available, including the details of the first and the last message in each
+of them. When there are several thousands of files in a directory (like it usually happens in `/var/log/journal/remote`),
+directory listing and examination of each file can take a considerable amount of time (even `ls -l` takes minutes).
+To work around this problem, the plugin uses `inotify` to receive file updates immediately and scans the library from
+the newest to the oldest file, allowing the user interface to work immediately after startup, for the most recent
+timeframes.
+
+### Best practices for better performance
+
+systemd-journal has been designed **first to be reliable** and then to be fast. It includes several mechanisms to ensure
+minimal data loss under all conditions (e.g. disk corruption, tampering, forward secure sealing) and despite the fact
+that it utilizes several techniques to require minimal disk footprint (like deduplication of log entries, linking of
+values and fields, compression) the disk footprint of journal files remains significantly higher compared to other log
+management solutions.
+
+The higher disk footprint results in higher disk I/O during querying, since a lot more data have to read from disk to
+evaluate a query. Query performance at scale can greatly benefit by utilizing a compressed filesystem (ext4, btrfs, zfs)
+to store systemd-journal files.
+
+systemd-journal files are cached by the operating system. There is no database server to serve queries. Each file is
+opened and the query runs by directly accessing the data in it.
+
+Therefore systemd-journal relies on the caching layer of the operating system to optimize query performance. The more
+RAM the system has, although it will not be reported as `used` (it will be reported as `cache`), the faster the queries
+will get. The first time a timeframe is accessed the query performance will be slower, but further queries on the same
+timeframe will be significantly faster since journal data are now cached in memory.
+
+So, on busy logs centralization systems, queries performance can be improved significantly by using a compressed
+filesystem for storing the journal files, and higher amounts of RAM.
+
+## Configuration and maintenance
+
+This Netdata plugin does not require any configuration or maintenance.
+
+## FAQ
+
+### Can I use this plugin on journal centralization servers?
+
+Yes. You can centralize your logs using `systemd-journal-remote`, and then install Netdata
+on this logs centralization server to explore the logs of all your infrastructure.
+
+This plugin will automatically provide multi-node views of your logs and also give you the ability to combine the logs
+of multiple servers, as you see fit.
+
+Check [configuring a logs centralization server](#how-do-i-configure-a-journal-centralization-server).
+
+### Can I use this plugin from a parent Netdata?
+
+Yes. When your nodes are connected to a Netdata parent, all their functions are available
+via the parent's UI. So, from the parent UI, you can access the functions of all your nodes.
+
+Keep in mind that to protect your privacy, in order to access Netdata functions, you need a
+free Netdata Cloud account.
+
+### Is any of my data exposed to Netdata Cloud from this plugin?
+
+No. When you access the agent directly, none of your data passes through Netdata Cloud.
+You need a free Netdata Cloud account only to verify your identity and enable the use of
+Netdata Functions. Once this is done, all the data flow directly from your Netdata agent
+to your web browser.
+
+Also check [this discussion](https://github.com/netdata/netdata/discussions/16136).
+
+When you access Netdata via `https://app.netdata.cloud`, your data travel via Netdata Cloud,
+but they are not stored in Netdata Cloud. This is to allow you access your Netdata agents from
+anywhere. All communication from/to Netdata Cloud is encrypted.
+
+### What are `volatile` and `persistent` journals?
+
+`systemd` `journald` allows creating both `volatile` journals in a `tmpfs` ram drive,
+and `persistent` journals stored on disk.
+
+`volatile` journals are particularly useful when the system monitored is sensitive to
+disk I/O, or does not have any writable disks at all.
+
+For more information check `man systemd-journald`.
+
+### I centralize my logs with Loki. Why to use Netdata for my journals?
+
+`systemd` journals have almost infinite cardinality at their labels and all of them are indexed,
+even if every single message has unique fields and values.
+
+When you send `systemd` journal logs to Loki, even if you use the `relabel_rules` argument to
+`loki.source.journal` with a JSON format, you need to specify which of the fields from journald
+you want inherited by Loki. This means you need to know the most important fields beforehand.
+At the same time you loose all the flexibility `systemd` journal provides:
+**indexing on all fields and all their values**.
+
+Loki generally assumes that all logs are like a table. All entries in a stream share the same
+fields. But journald does exactly the opposite. Each log entry is unique and may have its own unique fields.
+
+So, Loki and `systemd-journal` are good for different use cases.
+
+`systemd-journal` already runs in your systems. You use it today. It is there inside all your systems
+collecting the system and applications logs. And for its use case, it has advantages over other
+centralization solutions. So, why not use it?
+
+### Is it worth to build a `systemd` logs centralization server?
+
+Yes. It is simple, fast and the software to do it is already in your systems.
+
+For application and system logs, `systemd` journal is ideal and the visibility you can get
+by centralizing your system logs and the use of this Netdata plugin, is unparalleled.
+
+### How do I configure a journal centralization server?
+
+A short summary to get journal server running can be found below.
+There are two strategies you can apply, when it comes down to a centralized server for `systemd` journal logs.
+
+1. _Active sources_, where the centralized server fetches the logs from each individual server
+2. _Passive sources_, where the centralized server accepts a log stream from an individual server.
+
+For more options and reference to documentation, check `man systemd-journal-remote` and `man systemd-journal-upload`.
+
+#### _passive_ journal centralization without encryption
+
+If you want to setup your own passive journal centralization setup without encryption, [check out guide on it](https://github.com/netdata/netdata/blob/master/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md).
+
+#### _passive_ journal centralization with encryption using self-signed certificates
+
+If you want to setup your own passive journal centralization setup using self-signed certificates for encryption, [check out guide on it](https://github.com/netdata/netdata/blob/master/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md).
+
+#### Limitations when using a logs centralization server
+
+As of this writing `namespaces` support by `systemd` is limited:
+
+- Docker containers cannot log to namespaces. Check [this issue](https://github.com/moby/moby/issues/41879).
+- `systemd-journal-upload` automatically uploads `system` and `user` journals, but not `namespaces` journals. For this
+ you need to spawn a `systemd-journal-upload` per namespace.
diff --git a/collectors/systemd-journal.plugin/active_journal_centralization_guide_no_encryption.md b/collectors/systemd-journal.plugin/active_journal_centralization_guide_no_encryption.md
new file mode 100644
index 00000000000000..cbed1e81e5a7b2
--- /dev/null
+++ b/collectors/systemd-journal.plugin/active_journal_centralization_guide_no_encryption.md
@@ -0,0 +1,126 @@
+# Active journal source without encryption
+
+This page will guide you through creating an active journal source without the use of encryption.
+
+Once you enable an active journal source on a server, `systemd-journal-gatewayd` will expose an REST API on TCP port 19531. This API can be used for querying the logs, exporting the logs, or monitoring new log entries, remotely.
+
+> ⚠️ **IMPORTANT**
+> These instructions will expose your logs to the network, without any encryption or authorization.
+> DO NOT USE THIS ON NON-TRUSTED NETWORKS.
+
+## Configuring an active journal source
+
+On the server you want to expose their logs, install `systemd-journal-gateway`.
+
+```bash
+# change this according to your distro
+sudo apt-get install systemd-journal-gateway
+```
+
+Optionally, if you want to change the port (the default is `19531`), edit `systemd-journal-gatewayd.socket`
+
+```bash
+# edit the socket file
+sudo systemctl edit systemd-journal-gatewayd.socket
+```
+
+and add the following lines into the instructed place, and choose your desired port; save and exit.
+
+```bash
+[Socket]
+ListenStream=
+```
+
+Finally, enable it, so that it will start automatically upon receiving a connection:
+
+```bash
+# enable systemd-journal-remote
+sudo systemctl daemon-reload
+sudo systemctl enable --now systemd-journal-gatewayd.socket
+```
+
+## Using the active journal source
+
+### Simple Logs Explorer
+
+`systemd-journal-gateway` provides a simple HTML5 application to browse the logs.
+
+To use it, open your web browser and navigate to:
+
+```
+http://server.ip:19531/browse
+```
+
+A simple page like this will be presented:
+
+
+
+### Use it with `curl`
+
+`man systemd-journal-gatewayd` documents the supported API methods and provides examples to query the API using `curl` commands.
+
+### Copying the logs to a central journals server
+
+`systemd-journal-remote` has the ability to query instances of `systemd-journal-gatewayd` to fetch their logs, so that the central server fetches the logs, instead of waiting for the individual servers to push their logs to it.
+
+However, this kind of logs centralization has a key problem: **there is no guarantee that there will be no gaps in the logs replicated**. Theoretically, the REST API of `systemd-journal-gatewayd` supports querying past data, and `systemd-journal-remote` could keep track of the state of replication and automatically continue from the point it stopped last time. But it does not. So, currently the best logs centralization option is to use a **passive** centralization, where the clients push their logs to the server.
+
+Given these limitations, if you still want to configure an **active** journals centralization, this is what you need to do:
+
+On the centralization server install `systemd-journal-remote`:
+
+```bash
+# change this according to your distro
+sudo apt-get install systemd-journal-remote
+```
+
+Then, copy `systemd-journal-remote.service` to configure it for querying the active source:
+
+```bash
+# replace "clientX" with the name of the active client node
+sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/systemd-journal-remote-clientX.service
+
+# edit it to make sure it the ExecStart line is like this:
+# ExecStart=/usr/lib/systemd/systemd-journal-remote --url http://clientX:19531/entries?follow
+sudo nano /etc/systemd/system/systemd-journal-remote-clientX.service
+
+# reload systemd
+sudo systemctl daemon-reload
+```
+
+```bash
+# enable systemd-journal-remote
+sudo systemctl enable --now systemd-journal-remote-clientX.service
+```
+
+You can repeat this process to create as many `systemd-journal-remote` services, as the active source you have.
+
+## Verify it works
+
+To verify the central server is receiving logs, run this on the central server:
+
+```bash
+sudo ls -l /var/log/journal/remote/
+```
+
+You should see new files from the client's hostname or IP.
+
+Also, any of the new service files (`systemctl status systemd-journal-clientX`) should show something like this:
+
+```bash
+● systemd-journal-clientX.service - Fetching systemd journal logs from 192.168.2.146
+ Loaded: loaded (/etc/systemd/system/systemd-journal-clientX.service; enabled; preset: disabled)
+ Drop-In: /usr/lib/systemd/system/service.d
+ └─10-timeout-abort.conf
+ Active: active (running) since Wed 2023-10-18 07:35:52 EEST; 23min ago
+ Main PID: 77959 (systemd-journal)
+ Tasks: 2 (limit: 6928)
+ Memory: 7.7M
+ CPU: 518ms
+ CGroup: /system.slice/systemd-journal-clientX.service
+ ├─77959 /usr/lib/systemd/systemd-journal-remote --url "http://192.168.2.146:19531/entries?follow"
+ └─77962 curl "-HAccept: application/vnd.fdo.journal" --silent --show-error "http://192.168.2.146:19531/entries?follow"
+
+Oct 18 07:35:52 systemd-journal-server systemd[1]: Started systemd-journal-clientX.service - Fetching systemd journal logs from 192.168.2.146.
+Oct 18 07:35:52 systemd-journal-server systemd-journal-remote[77959]: Spawning curl http://192.168.2.146:19531/entries?follow...
+```
diff --git a/collectors/systemd-journal.plugin/forward_secure_sealing.md b/collectors/systemd-journal.plugin/forward_secure_sealing.md
new file mode 100644
index 00000000000000..b41570d68c29ea
--- /dev/null
+++ b/collectors/systemd-journal.plugin/forward_secure_sealing.md
@@ -0,0 +1,80 @@
+# Forward Secure Sealing (FSS) in Systemd-Journal
+
+Forward Secure Sealing (FSS) is a feature in the systemd journal designed to detect log file tampering.
+Given that attackers often try to hide their actions by modifying or deleting log file entries,
+FSS provides administrators with a mechanism to identify any such unauthorized alterations.
+
+## Importance
+Logs are a crucial component of system monitoring and auditing. Ensuring their integrity means administrators can trust
+the data, detect potential breaches, and trace actions back to their origins. Traditional methods to maintain this
+integrity involve writing logs to external systems or printing them out. While these methods are effective, they are
+not foolproof. FSS offers a more streamlined approach, allowing for log verification directly on the local system.
+
+## How FSS Works
+FSS operates by "sealing" binary logs at regular intervals. This seal is a cryptographic operation, ensuring that any
+tampering with the logs prior to the sealing can be detected. If an attacker modifies logs before they are sealed,
+these changes become a permanent part of the sealed record, highlighting any malicious activity.
+
+The technology behind FSS is based on "Forward Secure Pseudo Random Generators" (FSPRG), a concept stemming from
+academic research.
+
+Two keys are central to FSS:
+
+- **Sealing Key**: Kept on the system, used to seal the logs.
+- **Verification Key**: Stored securely off-system, used to verify the sealed logs.
+
+Every so often, the sealing key is regenerated in a non-reversible process, ensuring that old keys are obsolete and the
+latest logs are sealed with a fresh key. The off-site verification key can regenerate any past sealing key, allowing
+administrators to verify older seals. If logs are tampered with, verification will fail, alerting administrators to the
+breach.
+
+## Enabling FSS
+To enable FSS, use the following command:
+
+```bash
+journalctl --setup-keys
+```
+
+By default, systemd will seal the logs every 15 minutes. However, this interval can be adjusted using a flag during key
+generation. For example, to seal logs every 10 seconds:
+
+```bash
+journalctl --setup-keys --interval=10s
+```
+
+## Verifying Journals
+After enabling FSS, you can verify the integrity of your logs using the verification key:
+
+```bash
+journalctl --verify
+```
+
+If any discrepancies are found, you'll be alerted, indicating potential tampering.
+
+## Disabling FSS
+Should you wish to disable FSS:
+
+**Delete the Sealing Key**: This stops new log entries from being sealed.
+
+```bash
+journalctl --rotate
+```
+
+**Rotate and Prune the Journals**: This will start a new unsealed journal and can remove old sealed journals.
+
+```bash
+journalctl --vacuum-time=1s
+```
+
+
+**Adjust Systemd Configuration (Optional)**: If you've made changes to facilitate FSS in `/etc/systemd/journald.conf`,
+consider reverting or adjusting those. Restart the systemd-journald service afterward:
+
+```bash
+systemctl restart systemd-journald
+```
+
+## Conclusion
+FSS is a significant advancement in maintaining log integrity. While not a replacement for all traditional integrity
+methods, it offers a valuable tool in the battle against unauthorized log tampering. By integrating FSS into your log
+management strategy, you ensure a more transparent, reliable, and tamper-evident logging system.
diff --git a/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md
new file mode 100644
index 00000000000000..b70c22033e1aef
--- /dev/null
+++ b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_no_encryption.md
@@ -0,0 +1,150 @@
+# Passive journal centralization without encryption
+
+This page will guide you through creating a passive journal centralization setup without the use of encryption.
+
+Once you centralize your infrastructure logs to a server, Netdata will automatically detects all the logs from all servers and organize them in sources.
+With the setup described in this document, journal files are identified by the IPs of the clients sending the logs. Netdata will automatically do
+reverse DNS lookups to find the names of the server and name the sources on the dashboard accordingly.
+
+A _passive_ journal server waits for clients to push their metrics to it, so in this setup we will:
+
+1. configure `systemd-journal-remote` on the server, to listen for incoming connections.
+2. configure `systemd-journal-upload` on the clients, to push their logs to the server.
+
+> ⚠️ **IMPORTANT**
+> These instructions will copy your logs to a central server, without any encryption or authorization.
+> DO NOT USE THIS ON NON-TRUSTED NETWORKS.
+
+## Server configuration
+
+On the centralization server install `systemd-journal-remote`:
+
+```bash
+# change this according to your distro
+sudo apt-get install systemd-journal-remote
+```
+
+Make sure the journal transfer protocol is `http`:
+
+```bash
+sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/
+
+# edit it to make sure it says:
+# --listen-http=-3
+# not:
+# --listen-https=-3
+sudo nano /etc/systemd/system/systemd-journal-remote.service
+
+# reload systemd
+sudo systemctl daemon-reload
+```
+
+Optionally, if you want to change the port (the default is `19532`), edit `systemd-journal-remote.socket`
+
+```bash
+# edit the socket file
+sudo systemctl edit systemd-journal-remote.socket
+```
+
+and add the following lines into the instructed place, and choose your desired port; save and exit.
+
+```bash
+[Socket]
+ListenStream=
+```
+
+Finally, enable it, so that it will start automatically upon receiving a connection:
+
+```bash
+# enable systemd-journal-remote
+sudo systemctl enable --now systemd-journal-remote.socket
+sudo systemctl enable systemd-journal-remote.service
+```
+
+`systemd-journal-remote` is now listening for incoming journals from remote hosts.
+
+## Client configuration
+
+On the clients, install `systemd-journal-remote` (it includes `systemd-journal-upload`):
+
+```bash
+# change this according to your distro
+sudo apt-get install systemd-journal-remote
+```
+
+Edit `/etc/systemd/journal-upload.conf` and set the IP address and the port of the server, like so:
+
+```conf
+[Upload]
+URL=http://centralization.server.ip:19532
+```
+
+Edit `systemd-journal-upload`, and add `Restart=always` to make sure the client will keep trying to push logs, even if the server is temporarily not there, like this:
+
+```bash
+sudo systemctl edit systemd-journal-upload
+```
+
+At the top, add:
+
+```conf
+[Service]
+Restart=always
+```
+
+Enable and start `systemd-journal-upload`, like this:
+
+```bash
+sudo systemctl enable systemd-journal-upload
+sudo systemctl start systemd-journal-upload
+```
+
+## Verify it works
+
+To verify the central server is receiving logs, run this on the central server:
+
+```bash
+sudo ls -l /var/log/journal/remote/
+```
+
+You should see new files from the client's IP.
+
+Also, `systemctl status systemd-journal-remote` should show something like this:
+
+```bash
+systemd-journal-remote.service - Journal Remote Sink Service
+ Loaded: loaded (/etc/systemd/system/systemd-journal-remote.service; indirect; preset: disabled)
+ Active: active (running) since Sun 2023-10-15 14:29:46 EEST; 2h 24min ago
+TriggeredBy: ● systemd-journal-remote.socket
+ Docs: man:systemd-journal-remote(8)
+ man:journal-remote.conf(5)
+ Main PID: 2118153 (systemd-journal)
+ Status: "Processing requests..."
+ Tasks: 1 (limit: 154152)
+ Memory: 2.2M
+ CPU: 71ms
+ CGroup: /system.slice/systemd-journal-remote.service
+ └─2118153 /usr/lib/systemd/systemd-journal-remote --listen-http=-3 --output=/var/log/journal/remote/
+```
+
+Note the `status: "Processing requests..."` and the PID under `CGroup`.
+
+On the client `systemctl status systemd-journal-upload` should show something like this:
+
+```bash
+● systemd-journal-upload.service - Journal Remote Upload Service
+ Loaded: loaded (/lib/systemd/system/systemd-journal-upload.service; enabled; vendor preset: disabled)
+ Drop-In: /etc/systemd/system/systemd-journal-upload.service.d
+ └─override.conf
+ Active: active (running) since Sun 2023-10-15 10:39:04 UTC; 3h 17min ago
+ Docs: man:systemd-journal-upload(8)
+ Main PID: 4169 (systemd-journal)
+ Status: "Processing input..."
+ Tasks: 1 (limit: 13868)
+ Memory: 3.5M
+ CPU: 1.081s
+ CGroup: /system.slice/systemd-journal-upload.service
+ └─4169 /lib/systemd/systemd-journal-upload --save-state
+```
+
+Note the `Status: "Processing input..."` and the PID under `CGroup`.
diff --git a/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md
new file mode 100644
index 00000000000000..722d1ceae4e546
--- /dev/null
+++ b/collectors/systemd-journal.plugin/passive_journal_centralization_guide_self_signed_certs.md
@@ -0,0 +1,250 @@
+# Passive journal centralization with encryption using self-signed certificates
+
+This page will guide you through creating a **passive** journal centralization setup using **self-signed certificates** for encryption and authorization.
+
+Once you centralize your infrastructure logs to a server, Netdata will automatically detect all the logs from all servers and organize them in sources. With the setup described in this document, on recent systemd versions, Netdata will automatically name all remote sources using the names of the clients, as they are described at their certificates (on older versions, the names will be IPs or reverse DNS lookups of the IPs).
+
+A **passive** journal server waits for clients to push their metrics to it, so in this setup we will:
+
+1. configure a certificates authority and issue self-signed certificates for your servers.
+2. configure `systemd-journal-remote` on the server, to listen for incoming connections.
+3. configure `systemd-journal-upload` on the clients, to push their logs to the server.
+
+Keep in mind that the authorization involved works like this:
+
+1. The server (`systemd-journal-remote`) validates that the client (`systemd-journal-upload`) uses a trusted certificate (a certificate issued by the same certificate authority as its own).
+ So, **the server will accept logs from any client having a valid certificate**.
+2. The client (`systemd-journal-upload`) validates that the receiver (`systemd-journal-remote`) uses a trusted certificate (like the server does) and it also checks that the hostname or IP of the URL specified to its configuration, matches one of the names or IPs of the server it gets connected to. So, **the client does a validation that it connected to the right server**, using the URL hostname against the names and IPs of the server on its certificate.
+
+This means, that if both certificates are issued by the same certificate authority, only the client can potentially reject the server.
+
+## Self-signed certificates
+
+To simplify the process of creating and managing self-signed certificates, we have created [this bash script](https://github.com/netdata/netdata/blob/master/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh).
+
+This helps to also automate the distribution of the certificates to your servers (it generates a new bash script for each of your servers, which includes everything required, including the certificates).
+
+We suggest to keep this script and all the involved certificates at the journals centralization server, in the directory `/etc/ssl/systemd-journal`, so that you can make future changes as required. If you prefer to keep the certificate authority and all the certificates at a more secure location, just use the script on that location.
+
+On the server that will issue the certificates (usually the centralizaton server), do the following:
+
+```bash
+# install systemd-journal-remote to add the users and groups required and openssl for the certs
+# change this according to your distro
+sudo apt-get install systemd-journal-remote openssl
+
+# download the script and make it executable
+curl >systemd-journal-self-signed-certs.sh "https://raw.githubusercontent.com/netdata/netdata/master/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh"
+chmod 750 systemd-journal-self-signed-certs.sh
+```
+
+To create certificates for your servers, run this:
+
+```bash
+sudo ./systemd-journal-self-signed-certs.sh "server1" "DNS:hostname1" "IP:10.0.0.1"
+```
+
+Where:
+
+ - `server1` is the canonical name of the server. On newer systemd version, this name will be used by `systemd-journal-remote` and Netdata when you view the logs on the dashboard.
+ - `DNS:hostname1` is a DNS name that the server is reachable at. Add `"DNS:xyz"` multiple times to define multiple DNS names for the server.
+ - `IP:10.0.0.1` is an IP that the server is reachable at. Add `"IP:xyz"` multiple times to define multiple IPs for the server.
+
+Repeat this process to create the certificates for all your servers. You can add servers as required, at any time in the future.
+
+Existing certificates are never re-generated. Typically certificates need to be revoked and new ones to be issued. But `systemd-journal-remote` tools do not support handling revocations. So, the only option you have to re-issue a certificate is to delete its files in `/etc/ssl/systemd-journal` and run the script again to create a new one.
+
+Once you run the script of each of your servers, in `/etc/ssl/systemd-journal` you will find shell scripts named `runme-on-XXX.sh`, where `XXX` are the canonical names of your servers.
+
+These `runme-on-XXX.sh` include everything to install the certificates, fix their file permissions to be accessible by `systemd-journal-remote` and `systemd-journal-upload`, and update `/etc/systemd/journal-remote.conf` and `/etc/systemd/journal-upload.conf`.
+
+You can copy and paste (or `scp`) these scripts on your server and each of your clients:
+
+```bash
+sudo scp /etc/ssl/systemd-journal/runme-on-XXX.sh XXX:/tmp/
+```
+
+For the rest of this guide, we assume that you have copied the right `runme-on-XXX.sh` at the `/tmp` of all the servers for which you issued certificates.
+
+### note about certificates file permissions
+
+It is worth noting that `systemd-journal` certificates need to be owned by `systemd-journal-remote:systemd-journal`.
+
+Both the user `systemd-journal-remote` and the group `systemd-journal` are automatically added by the `systemd-journal-remote` package. However, `systemd-journal-upload` (and `systemd-journal-gatewayd` - that is not used in this guide) use dynamic users. Thankfully they are added to the `systemd-journal` remote group.
+
+So, by having the certificates owned by `systemd-journal-remote:systemd-journal`, satisfies both `systemd-journal-remote` which is not in the `systemd-journal` group, and `systemd-journal-upload` (and `systemd-journal-gatewayd`) which use dynamic users.
+
+You don't need to do anything about it (the scripts take care of everything), but it is worth noting how this works.
+
+## Server configuration
+
+On the centralization server install `systemd-journal-remote`:
+
+```bash
+# change this according to your distro
+sudo apt-get install systemd-journal-remote
+```
+
+Make sure the journal transfer protocol is `https`:
+
+```bash
+sudo cp /lib/systemd/system/systemd-journal-remote.service /etc/systemd/system/
+
+# edit it to make sure it says:
+# --listen-https=-3
+# not:
+# --listen-http=-3
+sudo nano /etc/systemd/system/systemd-journal-remote.service
+
+# reload systemd
+sudo systemctl daemon-reload
+```
+
+Optionally, if you want to change the port (the default is `19532`), edit `systemd-journal-remote.socket`
+
+```bash
+# edit the socket file
+sudo systemctl edit systemd-journal-remote.socket
+```
+
+and add the following lines into the instructed place, and choose your desired port; save and exit.
+
+```bash
+[Socket]
+ListenStream=
+```
+
+Next, run the `runme-on-XXX.sh` script on the server:
+
+```bash
+# if you run the certificate authority on the server:
+sudo /etc/ssl/systemd-journal/runme-on-XXX.sh
+
+# if you run the certificate authority elsewhere,
+# assuming you have coped the runme-on-XXX.sh script (as described above):
+sudo bash /tmp/runme-on-XXX.sh
+```
+
+This will install the certificates in `/etc/ssl/systemd-journal`, set the right file permissions, and update `/etc/systemd/journal-remote.conf` and `/etc/systemd/journal-upload.conf` to use the right certificate files.
+
+Finally, enable it, so that it will start automatically upon receiving a connection:
+
+```bash
+# enable systemd-journal-remote
+sudo systemctl enable --now systemd-journal-remote.socket
+sudo systemctl enable systemd-journal-remote.service
+```
+
+`systemd-journal-remote` is now listening for incoming journals from remote hosts.
+
+> When done, remember to `rm /tmp/runme-on-*.sh` to make sure your certificates are secure.
+
+## Client configuration
+
+On the clients, install `systemd-journal-remote` (it includes `systemd-journal-upload`):
+
+```bash
+# change this according to your distro
+sudo apt-get install systemd-journal-remote
+```
+
+Edit `/etc/systemd/journal-upload.conf` and set the IP address and the port of the server, like so:
+
+```conf
+[Upload]
+URL=https://centralization.server.ip:19532
+```
+
+Make sure that `centralization.server.ip` is one of the `DNS:` or `IP:` parameters you defined when you created the centralization server certificates. If it is not, the client may reject to connect.
+
+Next, edit `systemd-journal-upload.service`, and add `Restart=always` to make sure the client will keep trying to push logs, even if the server is temporarily not there, like this:
+
+```bash
+sudo systemctl edit systemd-journal-upload.service
+```
+
+At the top, add:
+
+```conf
+[Service]
+Restart=always
+```
+
+Enable `systemd-journal-upload.service`, like this:
+
+```bash
+sudo systemctl enable systemd-journal-upload.service
+```
+
+Assuming that you have in `/tmp` the relevant `runme-on-XXX.sh` script for this client, run:
+
+```bash
+sudo bash /tmp/runme-on-XXX.sh
+```
+
+This will install the certificates in `/etc/ssl/systemd-journal`, set the right file permissions, and update `/etc/systemd/journal-remote.conf` and `/etc/systemd/journal-upload.conf` to use the right certificate files.
+
+Finally, restart `systemd-journal-upload.service`:
+
+```bash
+sudo systemctl restart systemd-journal-upload.service
+```
+
+The client should now be pushing logs to the central server.
+
+> When done, remember to `rm /tmp/runme-on-*.sh` to make sure your certificates are secure.
+
+Here it is in action, in Netdata:
+
+
+
+
+## Verify it works
+
+To verify the central server is receiving logs, run this on the central server:
+
+```bash
+sudo ls -l /var/log/journal/remote/
+```
+
+Depending on the `systemd` version you use, you should see new files from the clients' canonical names (as defined at their certificates) or IPs.
+
+Also, `systemctl status systemd-journal-remote` should show something like this:
+
+```bash
+systemd-journal-remote.service - Journal Remote Sink Service
+ Loaded: loaded (/etc/systemd/system/systemd-journal-remote.service; indirect; preset: disabled)
+ Active: active (running) since Sun 2023-10-15 14:29:46 EEST; 2h 24min ago
+TriggeredBy: ● systemd-journal-remote.socket
+ Docs: man:systemd-journal-remote(8)
+ man:journal-remote.conf(5)
+ Main PID: 2118153 (systemd-journal)
+ Status: "Processing requests..."
+ Tasks: 1 (limit: 154152)
+ Memory: 2.2M
+ CPU: 71ms
+ CGroup: /system.slice/systemd-journal-remote.service
+ └─2118153 /usr/lib/systemd/systemd-journal-remote --listen-https=-3 --output=/var/log/journal/remote/
+```
+
+Note the `status: "Processing requests..."` and the PID under `CGroup`.
+
+On the client `systemctl status systemd-journal-upload` should show something like this:
+
+```bash
+● systemd-journal-upload.service - Journal Remote Upload Service
+ Loaded: loaded (/lib/systemd/system/systemd-journal-upload.service; enabled; vendor preset: disabled)
+ Drop-In: /etc/systemd/system/systemd-journal-upload.service.d
+ └─override.conf
+ Active: active (running) since Sun 2023-10-15 10:39:04 UTC; 3h 17min ago
+ Docs: man:systemd-journal-upload(8)
+ Main PID: 4169 (systemd-journal)
+ Status: "Processing input..."
+ Tasks: 1 (limit: 13868)
+ Memory: 3.5M
+ CPU: 1.081s
+ CGroup: /system.slice/systemd-journal-upload.service
+ └─4169 /lib/systemd/systemd-journal-upload --save-state
+```
+
+Note the `Status: "Processing input..."` and the PID under `CGroup`.
diff --git a/collectors/systemd-journal.plugin/systemd-internals.h b/collectors/systemd-journal.plugin/systemd-internals.h
new file mode 100644
index 00000000000000..e1ae44d4f1b058
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-internals.h
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_COLLECTORS_SYSTEMD_INTERNALS_H
+#define NETDATA_COLLECTORS_SYSTEMD_INTERNALS_H
+
+#include "collectors/all.h"
+#include "libnetdata/libnetdata.h"
+
+#include
+#include
+#include
+
+#define SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION "View, search and analyze systemd journal entries."
+#define SYSTEMD_JOURNAL_FUNCTION_NAME "systemd-journal"
+#define SYSTEMD_JOURNAL_DEFAULT_TIMEOUT 60
+#define SYSTEMD_JOURNAL_ENABLE_ESTIMATIONS_FILE_PERCENTAGE 0.01
+#define SYSTEMD_JOURNAL_EXECUTE_WATCHER_PENDING_EVERY_MS 250
+#define SYSTEMD_JOURNAL_ALL_FILES_SCAN_EVERY_USEC (5 * 60 * USEC_PER_SEC)
+
+#define SYSTEMD_UNITS_FUNCTION_DESCRIPTION "View the status of systemd units"
+#define SYSTEMD_UNITS_FUNCTION_NAME "systemd-list-units"
+#define SYSTEMD_UNITS_DEFAULT_TIMEOUT 30
+
+extern __thread size_t fstat_thread_calls;
+extern __thread size_t fstat_thread_cached_responses;
+void fstat_cache_enable_on_thread(void);
+void fstat_cache_disable_on_thread(void);
+
+extern netdata_mutex_t stdout_mutex;
+
+typedef enum {
+ ND_SD_JOURNAL_NO_FILE_MATCHED,
+ ND_SD_JOURNAL_FAILED_TO_OPEN,
+ ND_SD_JOURNAL_FAILED_TO_SEEK,
+ ND_SD_JOURNAL_TIMED_OUT,
+ ND_SD_JOURNAL_OK,
+ ND_SD_JOURNAL_NOT_MODIFIED,
+ ND_SD_JOURNAL_CANCELLED,
+} ND_SD_JOURNAL_STATUS;
+
+typedef enum {
+ SDJF_NONE = 0,
+ SDJF_ALL = (1 << 0),
+ SDJF_LOCAL_ALL = (1 << 1),
+ SDJF_REMOTE_ALL = (1 << 2),
+ SDJF_LOCAL_SYSTEM = (1 << 3),
+ SDJF_LOCAL_USER = (1 << 4),
+ SDJF_LOCAL_NAMESPACE = (1 << 5),
+ SDJF_LOCAL_OTHER = (1 << 6),
+} SD_JOURNAL_FILE_SOURCE_TYPE;
+
+struct journal_file {
+ const char *filename;
+ size_t filename_len;
+ STRING *source;
+ SD_JOURNAL_FILE_SOURCE_TYPE source_type;
+ usec_t file_last_modified_ut;
+ usec_t msg_first_ut;
+ usec_t msg_last_ut;
+ size_t size;
+ bool logged_failure;
+ bool logged_journalctl_failure;
+ usec_t max_journal_vs_realtime_delta_ut;
+
+ usec_t last_scan_monotonic_ut;
+ usec_t last_scan_header_vs_last_modified_ut;
+
+ uint64_t first_seqnum;
+ uint64_t last_seqnum;
+ sd_id128_t first_writer_id;
+ sd_id128_t last_writer_id;
+
+ uint64_t messages_in_file;
+};
+
+#define SDJF_SOURCE_ALL_NAME "all"
+#define SDJF_SOURCE_LOCAL_NAME "all-local-logs"
+#define SDJF_SOURCE_LOCAL_SYSTEM_NAME "all-local-system-logs"
+#define SDJF_SOURCE_LOCAL_USERS_NAME "all-local-user-logs"
+#define SDJF_SOURCE_LOCAL_OTHER_NAME "all-uncategorized"
+#define SDJF_SOURCE_NAMESPACES_NAME "all-local-namespaces"
+#define SDJF_SOURCE_REMOTES_NAME "all-remote-systems"
+
+#define ND_SD_JOURNAL_OPEN_FLAGS (0)
+
+#define JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT (5 * USEC_PER_SEC) // assume always 5 seconds latency
+#define JOURNAL_VS_REALTIME_DELTA_MAX_UT (2 * 60 * USEC_PER_SEC) // up to 2 minutes latency
+
+extern DICTIONARY *journal_files_registry;
+extern DICTIONARY *used_hashes_registry;
+extern DICTIONARY *function_query_status_dict;
+extern DICTIONARY *boot_ids_to_first_ut;
+
+int journal_file_dict_items_backward_compar(const void *a, const void *b);
+int journal_file_dict_items_forward_compar(const void *a, const void *b);
+void buffer_json_journal_versions(BUFFER *wb);
+void available_journal_file_sources_to_json_array(BUFFER *wb);
+bool journal_files_completed_once(void);
+void journal_files_registry_update(void);
+void journal_directory_scan_recursively(DICTIONARY *files, DICTIONARY *dirs, const char *dirname, int depth);
+
+FACET_ROW_SEVERITY syslog_priority_to_facet_severity(FACETS *facets, FACET_ROW *row, void *data);
+
+void netdata_systemd_journal_dynamic_row_id(FACETS *facets, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data);
+void netdata_systemd_journal_transform_priority(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_syslog_facility(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_errno(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_boot_id(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_uid(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_gid(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_cap_effective(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+void netdata_systemd_journal_transform_timestamp_usec(FACETS *facets, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope, void *data);
+
+usec_t journal_file_update_annotation_boot_id(sd_journal *j, struct journal_file *jf, const char *boot_id);
+
+#define MAX_JOURNAL_DIRECTORIES 100
+struct journal_directory {
+ char *path;
+};
+extern struct journal_directory journal_directories[MAX_JOURNAL_DIRECTORIES];
+
+void journal_init_files_and_directories(void);
+void journal_init_query_status(void);
+void function_systemd_journal(const char *transaction, char *function, int timeout, bool *cancelled);
+void journal_file_update_header(const char *filename, struct journal_file *jf);
+
+void netdata_systemd_journal_message_ids_init(void);
+void netdata_systemd_journal_transform_message_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused);
+
+void *journal_watcher_main(void *arg);
+
+#ifdef ENABLE_SYSTEMD_DBUS
+void function_systemd_units(const char *transaction, char *function, int timeout, bool *cancelled);
+#endif
+
+static inline void send_newline_and_flush(void) {
+ netdata_mutex_lock(&stdout_mutex);
+ fprintf(stdout, "\n");
+ fflush(stdout);
+ netdata_mutex_unlock(&stdout_mutex);
+}
+
+static inline bool parse_journal_field(const char *data, size_t data_length, const char **key, size_t *key_length, const char **value, size_t *value_length) {
+ const char *k = data;
+ const char *equal = strchr(k, '=');
+ if(unlikely(!equal))
+ return false;
+
+ size_t kl = equal - k;
+
+ const char *v = ++equal;
+ size_t vl = data_length - kl - 1;
+
+ *key = k;
+ *key_length = kl;
+ *value = v;
+ *value_length = vl;
+
+ return true;
+}
+
+#endif //NETDATA_COLLECTORS_SYSTEMD_INTERNALS_H
diff --git a/collectors/systemd-journal.plugin/systemd-journal-annotations.c b/collectors/systemd-journal.plugin/systemd-journal-annotations.c
new file mode 100644
index 00000000000000..b12356110c90b9
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-journal-annotations.c
@@ -0,0 +1,719 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "systemd-internals.h"
+
+const char *errno_map[] = {
+ [1] = "1 (EPERM)", // "Operation not permitted",
+ [2] = "2 (ENOENT)", // "No such file or directory",
+ [3] = "3 (ESRCH)", // "No such process",
+ [4] = "4 (EINTR)", // "Interrupted system call",
+ [5] = "5 (EIO)", // "Input/output error",
+ [6] = "6 (ENXIO)", // "No such device or address",
+ [7] = "7 (E2BIG)", // "Argument list too long",
+ [8] = "8 (ENOEXEC)", // "Exec format error",
+ [9] = "9 (EBADF)", // "Bad file descriptor",
+ [10] = "10 (ECHILD)", // "No child processes",
+ [11] = "11 (EAGAIN)", // "Resource temporarily unavailable",
+ [12] = "12 (ENOMEM)", // "Cannot allocate memory",
+ [13] = "13 (EACCES)", // "Permission denied",
+ [14] = "14 (EFAULT)", // "Bad address",
+ [15] = "15 (ENOTBLK)", // "Block device required",
+ [16] = "16 (EBUSY)", // "Device or resource busy",
+ [17] = "17 (EEXIST)", // "File exists",
+ [18] = "18 (EXDEV)", // "Invalid cross-device link",
+ [19] = "19 (ENODEV)", // "No such device",
+ [20] = "20 (ENOTDIR)", // "Not a directory",
+ [21] = "21 (EISDIR)", // "Is a directory",
+ [22] = "22 (EINVAL)", // "Invalid argument",
+ [23] = "23 (ENFILE)", // "Too many open files in system",
+ [24] = "24 (EMFILE)", // "Too many open files",
+ [25] = "25 (ENOTTY)", // "Inappropriate ioctl for device",
+ [26] = "26 (ETXTBSY)", // "Text file busy",
+ [27] = "27 (EFBIG)", // "File too large",
+ [28] = "28 (ENOSPC)", // "No space left on device",
+ [29] = "29 (ESPIPE)", // "Illegal seek",
+ [30] = "30 (EROFS)", // "Read-only file system",
+ [31] = "31 (EMLINK)", // "Too many links",
+ [32] = "32 (EPIPE)", // "Broken pipe",
+ [33] = "33 (EDOM)", // "Numerical argument out of domain",
+ [34] = "34 (ERANGE)", // "Numerical result out of range",
+ [35] = "35 (EDEADLK)", // "Resource deadlock avoided",
+ [36] = "36 (ENAMETOOLONG)", // "File name too long",
+ [37] = "37 (ENOLCK)", // "No locks available",
+ [38] = "38 (ENOSYS)", // "Function not implemented",
+ [39] = "39 (ENOTEMPTY)", // "Directory not empty",
+ [40] = "40 (ELOOP)", // "Too many levels of symbolic links",
+ [42] = "42 (ENOMSG)", // "No message of desired type",
+ [43] = "43 (EIDRM)", // "Identifier removed",
+ [44] = "44 (ECHRNG)", // "Channel number out of range",
+ [45] = "45 (EL2NSYNC)", // "Level 2 not synchronized",
+ [46] = "46 (EL3HLT)", // "Level 3 halted",
+ [47] = "47 (EL3RST)", // "Level 3 reset",
+ [48] = "48 (ELNRNG)", // "Link number out of range",
+ [49] = "49 (EUNATCH)", // "Protocol driver not attached",
+ [50] = "50 (ENOCSI)", // "No CSI structure available",
+ [51] = "51 (EL2HLT)", // "Level 2 halted",
+ [52] = "52 (EBADE)", // "Invalid exchange",
+ [53] = "53 (EBADR)", // "Invalid request descriptor",
+ [54] = "54 (EXFULL)", // "Exchange full",
+ [55] = "55 (ENOANO)", // "No anode",
+ [56] = "56 (EBADRQC)", // "Invalid request code",
+ [57] = "57 (EBADSLT)", // "Invalid slot",
+ [59] = "59 (EBFONT)", // "Bad font file format",
+ [60] = "60 (ENOSTR)", // "Device not a stream",
+ [61] = "61 (ENODATA)", // "No data available",
+ [62] = "62 (ETIME)", // "Timer expired",
+ [63] = "63 (ENOSR)", // "Out of streams resources",
+ [64] = "64 (ENONET)", // "Machine is not on the network",
+ [65] = "65 (ENOPKG)", // "Package not installed",
+ [66] = "66 (EREMOTE)", // "Object is remote",
+ [67] = "67 (ENOLINK)", // "Link has been severed",
+ [68] = "68 (EADV)", // "Advertise error",
+ [69] = "69 (ESRMNT)", // "Srmount error",
+ [70] = "70 (ECOMM)", // "Communication error on send",
+ [71] = "71 (EPROTO)", // "Protocol error",
+ [72] = "72 (EMULTIHOP)", // "Multihop attempted",
+ [73] = "73 (EDOTDOT)", // "RFS specific error",
+ [74] = "74 (EBADMSG)", // "Bad message",
+ [75] = "75 (EOVERFLOW)", // "Value too large for defined data type",
+ [76] = "76 (ENOTUNIQ)", // "Name not unique on network",
+ [77] = "77 (EBADFD)", // "File descriptor in bad state",
+ [78] = "78 (EREMCHG)", // "Remote address changed",
+ [79] = "79 (ELIBACC)", // "Can not access a needed shared library",
+ [80] = "80 (ELIBBAD)", // "Accessing a corrupted shared library",
+ [81] = "81 (ELIBSCN)", // ".lib section in a.out corrupted",
+ [82] = "82 (ELIBMAX)", // "Attempting to link in too many shared libraries",
+ [83] = "83 (ELIBEXEC)", // "Cannot exec a shared library directly",
+ [84] = "84 (EILSEQ)", // "Invalid or incomplete multibyte or wide character",
+ [85] = "85 (ERESTART)", // "Interrupted system call should be restarted",
+ [86] = "86 (ESTRPIPE)", // "Streams pipe error",
+ [87] = "87 (EUSERS)", // "Too many users",
+ [88] = "88 (ENOTSOCK)", // "Socket operation on non-socket",
+ [89] = "89 (EDESTADDRREQ)", // "Destination address required",
+ [90] = "90 (EMSGSIZE)", // "Message too long",
+ [91] = "91 (EPROTOTYPE)", // "Protocol wrong type for socket",
+ [92] = "92 (ENOPROTOOPT)", // "Protocol not available",
+ [93] = "93 (EPROTONOSUPPORT)", // "Protocol not supported",
+ [94] = "94 (ESOCKTNOSUPPORT)", // "Socket type not supported",
+ [95] = "95 (ENOTSUP)", // "Operation not supported",
+ [96] = "96 (EPFNOSUPPORT)", // "Protocol family not supported",
+ [97] = "97 (EAFNOSUPPORT)", // "Address family not supported by protocol",
+ [98] = "98 (EADDRINUSE)", // "Address already in use",
+ [99] = "99 (EADDRNOTAVAIL)", // "Cannot assign requested address",
+ [100] = "100 (ENETDOWN)", // "Network is down",
+ [101] = "101 (ENETUNREACH)", // "Network is unreachable",
+ [102] = "102 (ENETRESET)", // "Network dropped connection on reset",
+ [103] = "103 (ECONNABORTED)", // "Software caused connection abort",
+ [104] = "104 (ECONNRESET)", // "Connection reset by peer",
+ [105] = "105 (ENOBUFS)", // "No buffer space available",
+ [106] = "106 (EISCONN)", // "Transport endpoint is already connected",
+ [107] = "107 (ENOTCONN)", // "Transport endpoint is not connected",
+ [108] = "108 (ESHUTDOWN)", // "Cannot send after transport endpoint shutdown",
+ [109] = "109 (ETOOMANYREFS)", // "Too many references: cannot splice",
+ [110] = "110 (ETIMEDOUT)", // "Connection timed out",
+ [111] = "111 (ECONNREFUSED)", // "Connection refused",
+ [112] = "112 (EHOSTDOWN)", // "Host is down",
+ [113] = "113 (EHOSTUNREACH)", // "No route to host",
+ [114] = "114 (EALREADY)", // "Operation already in progress",
+ [115] = "115 (EINPROGRESS)", // "Operation now in progress",
+ [116] = "116 (ESTALE)", // "Stale file handle",
+ [117] = "117 (EUCLEAN)", // "Structure needs cleaning",
+ [118] = "118 (ENOTNAM)", // "Not a XENIX named type file",
+ [119] = "119 (ENAVAIL)", // "No XENIX semaphores available",
+ [120] = "120 (EISNAM)", // "Is a named type file",
+ [121] = "121 (EREMOTEIO)", // "Remote I/O error",
+ [122] = "122 (EDQUOT)", // "Disk quota exceeded",
+ [123] = "123 (ENOMEDIUM)", // "No medium found",
+ [124] = "124 (EMEDIUMTYPE)", // "Wrong medium type",
+ [125] = "125 (ECANCELED)", // "Operation canceled",
+ [126] = "126 (ENOKEY)", // "Required key not available",
+ [127] = "127 (EKEYEXPIRED)", // "Key has expired",
+ [128] = "128 (EKEYREVOKED)", // "Key has been revoked",
+ [129] = "129 (EKEYREJECTED)", // "Key was rejected by service",
+ [130] = "130 (EOWNERDEAD)", // "Owner died",
+ [131] = "131 (ENOTRECOVERABLE)", // "State not recoverable",
+ [132] = "132 (ERFKILL)", // "Operation not possible due to RF-kill",
+ [133] = "133 (EHWPOISON)", // "Memory page has hardware error",
+};
+
+const char *linux_capabilities[] = {
+ [CAP_CHOWN] = "CHOWN",
+ [CAP_DAC_OVERRIDE] = "DAC_OVERRIDE",
+ [CAP_DAC_READ_SEARCH] = "DAC_READ_SEARCH",
+ [CAP_FOWNER] = "FOWNER",
+ [CAP_FSETID] = "FSETID",
+ [CAP_KILL] = "KILL",
+ [CAP_SETGID] = "SETGID",
+ [CAP_SETUID] = "SETUID",
+ [CAP_SETPCAP] = "SETPCAP",
+ [CAP_LINUX_IMMUTABLE] = "LINUX_IMMUTABLE",
+ [CAP_NET_BIND_SERVICE] = "NET_BIND_SERVICE",
+ [CAP_NET_BROADCAST] = "NET_BROADCAST",
+ [CAP_NET_ADMIN] = "NET_ADMIN",
+ [CAP_NET_RAW] = "NET_RAW",
+ [CAP_IPC_LOCK] = "IPC_LOCK",
+ [CAP_IPC_OWNER] = "IPC_OWNER",
+ [CAP_SYS_MODULE] = "SYS_MODULE",
+ [CAP_SYS_RAWIO] = "SYS_RAWIO",
+ [CAP_SYS_CHROOT] = "SYS_CHROOT",
+ [CAP_SYS_PTRACE] = "SYS_PTRACE",
+ [CAP_SYS_PACCT] = "SYS_PACCT",
+ [CAP_SYS_ADMIN] = "SYS_ADMIN",
+ [CAP_SYS_BOOT] = "SYS_BOOT",
+ [CAP_SYS_NICE] = "SYS_NICE",
+ [CAP_SYS_RESOURCE] = "SYS_RESOURCE",
+ [CAP_SYS_TIME] = "SYS_TIME",
+ [CAP_SYS_TTY_CONFIG] = "SYS_TTY_CONFIG",
+ [CAP_MKNOD] = "MKNOD",
+ [CAP_LEASE] = "LEASE",
+ [CAP_AUDIT_WRITE] = "AUDIT_WRITE",
+ [CAP_AUDIT_CONTROL] = "AUDIT_CONTROL",
+ [CAP_SETFCAP] = "SETFCAP",
+ [CAP_MAC_OVERRIDE] = "MAC_OVERRIDE",
+ [CAP_MAC_ADMIN] = "MAC_ADMIN",
+ [CAP_SYSLOG] = "SYSLOG",
+ [CAP_WAKE_ALARM] = "WAKE_ALARM",
+ [CAP_BLOCK_SUSPEND] = "BLOCK_SUSPEND",
+ [37 /*CAP_AUDIT_READ*/] = "AUDIT_READ",
+ [38 /*CAP_PERFMON*/] = "PERFMON",
+ [39 /*CAP_BPF*/] = "BPF",
+ [40 /* CAP_CHECKPOINT_RESTORE */] = "CHECKPOINT_RESTORE",
+};
+
+static const char *syslog_facility_to_name(int facility) {
+ switch (facility) {
+ case LOG_FAC(LOG_KERN): return "kern";
+ case LOG_FAC(LOG_USER): return "user";
+ case LOG_FAC(LOG_MAIL): return "mail";
+ case LOG_FAC(LOG_DAEMON): return "daemon";
+ case LOG_FAC(LOG_AUTH): return "auth";
+ case LOG_FAC(LOG_SYSLOG): return "syslog";
+ case LOG_FAC(LOG_LPR): return "lpr";
+ case LOG_FAC(LOG_NEWS): return "news";
+ case LOG_FAC(LOG_UUCP): return "uucp";
+ case LOG_FAC(LOG_CRON): return "cron";
+ case LOG_FAC(LOG_AUTHPRIV): return "authpriv";
+ case LOG_FAC(LOG_FTP): return "ftp";
+ case LOG_FAC(LOG_LOCAL0): return "local0";
+ case LOG_FAC(LOG_LOCAL1): return "local1";
+ case LOG_FAC(LOG_LOCAL2): return "local2";
+ case LOG_FAC(LOG_LOCAL3): return "local3";
+ case LOG_FAC(LOG_LOCAL4): return "local4";
+ case LOG_FAC(LOG_LOCAL5): return "local5";
+ case LOG_FAC(LOG_LOCAL6): return "local6";
+ case LOG_FAC(LOG_LOCAL7): return "local7";
+ default: return NULL;
+ }
+}
+
+static const char *syslog_priority_to_name(int priority) {
+ switch (priority) {
+ case LOG_ALERT: return "alert";
+ case LOG_CRIT: return "critical";
+ case LOG_DEBUG: return "debug";
+ case LOG_EMERG: return "panic";
+ case LOG_ERR: return "error";
+ case LOG_INFO: return "info";
+ case LOG_NOTICE: return "notice";
+ case LOG_WARNING: return "warning";
+ default: return NULL;
+ }
+}
+
+FACET_ROW_SEVERITY syslog_priority_to_facet_severity(FACETS *facets __maybe_unused, FACET_ROW *row, void *data __maybe_unused) {
+ // same to
+ // https://github.com/systemd/systemd/blob/aab9e4b2b86905a15944a1ac81e471b5b7075932/src/basic/terminal-util.c#L1501
+ // function get_log_colors()
+
+ FACET_ROW_KEY_VALUE *priority_rkv = dictionary_get(row->dict, "PRIORITY");
+ if(!priority_rkv || priority_rkv->empty)
+ return FACET_ROW_SEVERITY_NORMAL;
+
+ int priority = str2i(buffer_tostring(priority_rkv->wb));
+
+ if(priority <= LOG_ERR)
+ return FACET_ROW_SEVERITY_CRITICAL;
+
+ else if (priority <= LOG_WARNING)
+ return FACET_ROW_SEVERITY_WARNING;
+
+ else if(priority <= LOG_NOTICE)
+ return FACET_ROW_SEVERITY_NOTICE;
+
+ else if(priority >= LOG_DEBUG)
+ return FACET_ROW_SEVERITY_DEBUG;
+
+ return FACET_ROW_SEVERITY_NORMAL;
+}
+
+static char *uid_to_username(uid_t uid, char *buffer, size_t buffer_size) {
+ static __thread char tmp[1024 + 1];
+ struct passwd pw, *result = NULL;
+
+ if (getpwuid_r(uid, &pw, tmp, sizeof(tmp), &result) != 0 || !result || !pw.pw_name || !(*pw.pw_name))
+ snprintfz(buffer, buffer_size - 1, "%u", uid);
+ else
+ snprintfz(buffer, buffer_size - 1, "%u (%s)", uid, pw.pw_name);
+
+ return buffer;
+}
+
+static char *gid_to_groupname(gid_t gid, char* buffer, size_t buffer_size) {
+ static __thread char tmp[1024];
+ struct group grp, *result = NULL;
+
+ if (getgrgid_r(gid, &grp, tmp, sizeof(tmp), &result) != 0 || !result || !grp.gr_name || !(*grp.gr_name))
+ snprintfz(buffer, buffer_size - 1, "%u", gid);
+ else
+ snprintfz(buffer, buffer_size - 1, "%u (%s)", gid, grp.gr_name);
+
+ return buffer;
+}
+
+void netdata_systemd_journal_transform_syslog_facility(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ int facility = str2i(buffer_tostring(wb));
+ const char *name = syslog_facility_to_name(facility);
+ if (name) {
+ buffer_flush(wb);
+ buffer_strcat(wb, name);
+ }
+ }
+}
+
+void netdata_systemd_journal_transform_priority(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ if(scope == FACETS_TRANSFORM_FACET_SORT)
+ return;
+
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ int priority = str2i(buffer_tostring(wb));
+ const char *name = syslog_priority_to_name(priority);
+ if (name) {
+ buffer_flush(wb);
+ buffer_strcat(wb, name);
+ }
+ }
+}
+
+void netdata_systemd_journal_transform_errno(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ if(scope == FACETS_TRANSFORM_FACET_SORT)
+ return;
+
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ unsigned err_no = str2u(buffer_tostring(wb));
+ if(err_no > 0 && err_no < sizeof(errno_map) / sizeof(*errno_map)) {
+ const char *name = errno_map[err_no];
+ if(name) {
+ buffer_flush(wb);
+ buffer_strcat(wb, name);
+ }
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------
+// UID and GID transformation
+
+#define UID_GID_HASHTABLE_SIZE 10000
+
+struct word_t2str_hashtable_entry {
+ struct word_t2str_hashtable_entry *next;
+ Word_t hash;
+ size_t len;
+ char str[];
+};
+
+struct word_t2str_hashtable {
+ SPINLOCK spinlock;
+ size_t size;
+ struct word_t2str_hashtable_entry *hashtable[UID_GID_HASHTABLE_SIZE];
+};
+
+struct word_t2str_hashtable uid_hashtable = {
+ .size = UID_GID_HASHTABLE_SIZE,
+};
+
+struct word_t2str_hashtable gid_hashtable = {
+ .size = UID_GID_HASHTABLE_SIZE,
+};
+
+struct word_t2str_hashtable_entry **word_t2str_hashtable_slot(struct word_t2str_hashtable *ht, Word_t hash) {
+ size_t slot = hash % ht->size;
+ struct word_t2str_hashtable_entry **e = &ht->hashtable[slot];
+
+ while(*e && (*e)->hash != hash)
+ e = &((*e)->next);
+
+ return e;
+}
+
+const char *uid_to_username_cached(uid_t uid, size_t *length) {
+ spinlock_lock(&uid_hashtable.spinlock);
+
+ struct word_t2str_hashtable_entry **e = word_t2str_hashtable_slot(&uid_hashtable, uid);
+ if(!(*e)) {
+ static __thread char buf[1024];
+ const char *name = uid_to_username(uid, buf, sizeof(buf));
+ size_t size = strlen(name) + 1;
+
+ *e = callocz(1, sizeof(struct word_t2str_hashtable_entry) + size);
+ (*e)->len = size - 1;
+ (*e)->hash = uid;
+ memcpy((*e)->str, name, size);
+ }
+
+ spinlock_unlock(&uid_hashtable.spinlock);
+
+ *length = (*e)->len;
+ return (*e)->str;
+}
+
+const char *gid_to_groupname_cached(gid_t gid, size_t *length) {
+ spinlock_lock(&gid_hashtable.spinlock);
+
+ struct word_t2str_hashtable_entry **e = word_t2str_hashtable_slot(&gid_hashtable, gid);
+ if(!(*e)) {
+ static __thread char buf[1024];
+ const char *name = gid_to_groupname(gid, buf, sizeof(buf));
+ size_t size = strlen(name) + 1;
+
+ *e = callocz(1, sizeof(struct word_t2str_hashtable_entry) + size);
+ (*e)->len = size - 1;
+ (*e)->hash = gid;
+ memcpy((*e)->str, name, size);
+ }
+
+ spinlock_unlock(&gid_hashtable.spinlock);
+
+ *length = (*e)->len;
+ return (*e)->str;
+}
+
+DICTIONARY *boot_ids_to_first_ut = NULL;
+
+void netdata_systemd_journal_transform_boot_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ const char *boot_id = buffer_tostring(wb);
+ if(*boot_id && isxdigit(*boot_id)) {
+ usec_t ut = UINT64_MAX;
+ usec_t *p_ut = dictionary_get(boot_ids_to_first_ut, boot_id);
+ if(!p_ut) {
+#ifndef HAVE_SD_JOURNAL_RESTART_FIELDS
+ struct journal_file *jf;
+ dfe_start_read(journal_files_registry, jf) {
+ const char *files[2] = {
+ [0] = jf_dfe.name,
+ [1] = NULL,
+ };
+
+ sd_journal *j = NULL;
+ int r = sd_journal_open_files(&j, files, ND_SD_JOURNAL_OPEN_FLAGS);
+ if(r < 0 || !j) {
+ internal_error(true, "JOURNAL: while looking for the first timestamp of boot_id '%s', "
+ "sd_journal_open_files('%s') returned %d",
+ boot_id, jf_dfe.name, r);
+ continue;
+ }
+
+ ut = journal_file_update_annotation_boot_id(j, jf, boot_id);
+ sd_journal_close(j);
+ }
+ dfe_done(jf);
+#endif
+ }
+ else
+ ut = *p_ut;
+
+ if(ut && ut != UINT64_MAX) {
+ char buffer[RFC3339_MAX_LENGTH];
+ rfc3339_datetime_ut(buffer, sizeof(buffer), ut, 0, true);
+
+ switch(scope) {
+ default:
+ case FACETS_TRANSFORM_DATA:
+ case FACETS_TRANSFORM_VALUE:
+ buffer_sprintf(wb, " (%s) ", buffer);
+ break;
+
+ case FACETS_TRANSFORM_FACET:
+ case FACETS_TRANSFORM_FACET_SORT:
+ case FACETS_TRANSFORM_HISTOGRAM:
+ buffer_flush(wb);
+ buffer_sprintf(wb, "%s", buffer);
+ break;
+ }
+ }
+ }
+}
+
+void netdata_systemd_journal_transform_uid(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ if(scope == FACETS_TRANSFORM_FACET_SORT)
+ return;
+
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ uid_t uid = str2i(buffer_tostring(wb));
+ size_t len;
+ const char *name = uid_to_username_cached(uid, &len);
+ buffer_contents_replace(wb, name, len);
+ }
+}
+
+void netdata_systemd_journal_transform_gid(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ if(scope == FACETS_TRANSFORM_FACET_SORT)
+ return;
+
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ gid_t gid = str2i(buffer_tostring(wb));
+ size_t len;
+ const char *name = gid_to_groupname_cached(gid, &len);
+ buffer_contents_replace(wb, name, len);
+ }
+}
+
+void netdata_systemd_journal_transform_cap_effective(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ if(scope == FACETS_TRANSFORM_FACET_SORT)
+ return;
+
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ uint64_t cap = strtoul(buffer_tostring(wb), NULL, 16);
+ if(cap) {
+ buffer_fast_strcat(wb, " (", 2);
+ for (size_t i = 0, added = 0; i < sizeof(linux_capabilities) / sizeof(linux_capabilities[0]); i++) {
+ if (linux_capabilities[i] && (cap & (1ULL << i))) {
+
+ if (added)
+ buffer_fast_strcat(wb, " | ", 3);
+
+ buffer_strcat(wb, linux_capabilities[i]);
+ added++;
+ }
+ }
+ buffer_fast_strcat(wb, ")", 1);
+ }
+ }
+}
+
+void netdata_systemd_journal_transform_timestamp_usec(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ if(scope == FACETS_TRANSFORM_FACET_SORT)
+ return;
+
+ const char *v = buffer_tostring(wb);
+ if(*v && isdigit(*v)) {
+ uint64_t ut = str2ull(buffer_tostring(wb), NULL);
+ if(ut) {
+ char buffer[RFC3339_MAX_LENGTH];
+ rfc3339_datetime_ut(buffer, sizeof(buffer), ut, 6, true);
+ buffer_sprintf(wb, " (%s)", buffer);
+ }
+ }
+}
+
+// ----------------------------------------------------------------------------
+
+void netdata_systemd_journal_dynamic_row_id(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data __maybe_unused) {
+ FACET_ROW_KEY_VALUE *pid_rkv = dictionary_get(row->dict, "_PID");
+ const char *pid = pid_rkv ? buffer_tostring(pid_rkv->wb) : FACET_VALUE_UNSET;
+
+ const char *identifier = NULL;
+ FACET_ROW_KEY_VALUE *container_name_rkv = dictionary_get(row->dict, "CONTAINER_NAME");
+ if(container_name_rkv && !container_name_rkv->empty)
+ identifier = buffer_tostring(container_name_rkv->wb);
+
+ if(!identifier) {
+ FACET_ROW_KEY_VALUE *syslog_identifier_rkv = dictionary_get(row->dict, "SYSLOG_IDENTIFIER");
+ if(syslog_identifier_rkv && !syslog_identifier_rkv->empty)
+ identifier = buffer_tostring(syslog_identifier_rkv->wb);
+
+ if(!identifier) {
+ FACET_ROW_KEY_VALUE *comm_rkv = dictionary_get(row->dict, "_COMM");
+ if(comm_rkv && !comm_rkv->empty)
+ identifier = buffer_tostring(comm_rkv->wb);
+ }
+ }
+
+ buffer_flush(rkv->wb);
+
+ if(!identifier || !*identifier)
+ buffer_strcat(rkv->wb, FACET_VALUE_UNSET);
+ else if(!pid || !*pid)
+ buffer_sprintf(rkv->wb, "%s", identifier);
+ else
+ buffer_sprintf(rkv->wb, "%s[%s]", identifier, pid);
+
+ buffer_json_add_array_item_string(json_array, buffer_tostring(rkv->wb));
+}
+
+
+// ----------------------------------------------------------------------------
+
+struct message_id_info {
+ const char *msg;
+};
+
+static DICTIONARY *known_journal_messages_ids = NULL;
+
+void netdata_systemd_journal_message_ids_init(void) {
+ known_journal_messages_ids = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE);
+
+ struct message_id_info i = { 0 };
+ i.msg = "Journal start"; dictionary_set(known_journal_messages_ids, "f77379a8490b408bbe5f6940505a777b", &i, sizeof(i));
+ i.msg = "Journal stop"; dictionary_set(known_journal_messages_ids, "d93fb3c9c24d451a97cea615ce59c00b", &i, sizeof(i));
+ i.msg = "Journal dropped"; dictionary_set(known_journal_messages_ids, "a596d6fe7bfa4994828e72309e95d61e", &i, sizeof(i));
+ i.msg = "Journal missed"; dictionary_set(known_journal_messages_ids, "e9bf28e6e834481bb6f48f548ad13606", &i, sizeof(i));
+ i.msg = "Journal usage"; dictionary_set(known_journal_messages_ids, "ec387f577b844b8fa948f33cad9a75e6", &i, sizeof(i));
+ i.msg = "Coredump"; dictionary_set(known_journal_messages_ids, "fc2e22bc6ee647b6b90729ab34a250b1", &i, sizeof(i));
+ i.msg = "Truncated core"; dictionary_set(known_journal_messages_ids, "5aadd8e954dc4b1a8c954d63fd9e1137", &i, sizeof(i));
+ i.msg = "Backtrace"; dictionary_set(known_journal_messages_ids, "1f4e0a44a88649939aaea34fc6da8c95", &i, sizeof(i));
+ i.msg = "Session start"; dictionary_set(known_journal_messages_ids, "8d45620c1a4348dbb17410da57c60c66", &i, sizeof(i));
+ i.msg = "Session stop"; dictionary_set(known_journal_messages_ids, "3354939424b4456d9802ca8333ed424a", &i, sizeof(i));
+ i.msg = "Seat start"; dictionary_set(known_journal_messages_ids, "fcbefc5da23d428093f97c82a9290f7b", &i, sizeof(i));
+ i.msg = "Seat stop"; dictionary_set(known_journal_messages_ids, "e7852bfe46784ed0accde04bc864c2d5", &i, sizeof(i));
+ i.msg = "Machine start"; dictionary_set(known_journal_messages_ids, "24d8d4452573402496068381a6312df2", &i, sizeof(i));
+ i.msg = "Machine stop"; dictionary_set(known_journal_messages_ids, "58432bd3bace477cb514b56381b8a758", &i, sizeof(i));
+ i.msg = "Time change"; dictionary_set(known_journal_messages_ids, "c7a787079b354eaaa9e77b371893cd27", &i, sizeof(i));
+ i.msg = "Timezone change"; dictionary_set(known_journal_messages_ids, "45f82f4aef7a4bbf942ce861d1f20990", &i, sizeof(i));
+ i.msg = "Tainted"; dictionary_set(known_journal_messages_ids, "50876a9db00f4c40bde1a2ad381c3a1b", &i, sizeof(i));
+ i.msg = "Startup finished"; dictionary_set(known_journal_messages_ids, "b07a249cd024414a82dd00cd181378ff", &i, sizeof(i));
+ i.msg = "User startup finished"; dictionary_set(known_journal_messages_ids, "eed00a68ffd84e31882105fd973abdd1", &i, sizeof(i));
+ i.msg = "Sleep start"; dictionary_set(known_journal_messages_ids, "6bbd95ee977941e497c48be27c254128", &i, sizeof(i));
+ i.msg = "Sleep stop"; dictionary_set(known_journal_messages_ids, "8811e6df2a8e40f58a94cea26f8ebf14", &i, sizeof(i));
+ i.msg = "Shutdown"; dictionary_set(known_journal_messages_ids, "98268866d1d54a499c4e98921d93bc40", &i, sizeof(i));
+ i.msg = "Factory reset"; dictionary_set(known_journal_messages_ids, "c14aaf76ec284a5fa1f105f88dfb061c", &i, sizeof(i));
+ i.msg = "Crash exit"; dictionary_set(known_journal_messages_ids, "d9ec5e95e4b646aaaea2fd05214edbda", &i, sizeof(i));
+ i.msg = "Crash failed"; dictionary_set(known_journal_messages_ids, "3ed0163e868a4417ab8b9e210407a96c", &i, sizeof(i));
+ i.msg = "Crash freeze"; dictionary_set(known_journal_messages_ids, "645c735537634ae0a32b15a7c6cba7d4", &i, sizeof(i));
+ i.msg = "Crash no coredump"; dictionary_set(known_journal_messages_ids, "5addb3a06a734d3396b794bf98fb2d01", &i, sizeof(i));
+ i.msg = "Crash no fork"; dictionary_set(known_journal_messages_ids, "5c9e98de4ab94c6a9d04d0ad793bd903", &i, sizeof(i));
+ i.msg = "Crash unknown signal"; dictionary_set(known_journal_messages_ids, "5e6f1f5e4db64a0eaee3368249d20b94", &i, sizeof(i));
+ i.msg = "Crash systemd signal"; dictionary_set(known_journal_messages_ids, "83f84b35ee264f74a3896a9717af34cb", &i, sizeof(i));
+ i.msg = "Crash process signal"; dictionary_set(known_journal_messages_ids, "3a73a98baf5b4b199929e3226c0be783", &i, sizeof(i));
+ i.msg = "Crash waitpid failed"; dictionary_set(known_journal_messages_ids, "2ed18d4f78ca47f0a9bc25271c26adb4", &i, sizeof(i));
+ i.msg = "Crash coredump failed"; dictionary_set(known_journal_messages_ids, "56b1cd96f24246c5b607666fda952356", &i, sizeof(i));
+ i.msg = "Crash coredump pid"; dictionary_set(known_journal_messages_ids, "4ac7566d4d7548f4981f629a28f0f829", &i, sizeof(i));
+ i.msg = "Crash shell fork failed"; dictionary_set(known_journal_messages_ids, "38e8b1e039ad469291b18b44c553a5b7", &i, sizeof(i));
+ i.msg = "Crash execle failed"; dictionary_set(known_journal_messages_ids, "872729b47dbe473eb768ccecd477beda", &i, sizeof(i));
+ i.msg = "Selinux failed"; dictionary_set(known_journal_messages_ids, "658a67adc1c940b3b3316e7e8628834a", &i, sizeof(i));
+ i.msg = "Battery low warning"; dictionary_set(known_journal_messages_ids, "e6f456bd92004d9580160b2207555186", &i, sizeof(i));
+ i.msg = "Battery low poweroff"; dictionary_set(known_journal_messages_ids, "267437d33fdd41099ad76221cc24a335", &i, sizeof(i));
+ i.msg = "Core mainloop failed"; dictionary_set(known_journal_messages_ids, "79e05b67bc4545d1922fe47107ee60c5", &i, sizeof(i));
+ i.msg = "Core no xdgdir path"; dictionary_set(known_journal_messages_ids, "dbb136b10ef4457ba47a795d62f108c9", &i, sizeof(i));
+ i.msg = "Core capability bounding user"; dictionary_set(known_journal_messages_ids, "ed158c2df8884fa584eead2d902c1032", &i, sizeof(i));
+ i.msg = "Core capability bounding"; dictionary_set(known_journal_messages_ids, "42695b500df048298bee37159caa9f2e", &i, sizeof(i));
+ i.msg = "Core disable privileges"; dictionary_set(known_journal_messages_ids, "bfc2430724ab44499735b4f94cca9295", &i, sizeof(i));
+ i.msg = "Core start target failed"; dictionary_set(known_journal_messages_ids, "59288af523be43a28d494e41e26e4510", &i, sizeof(i));
+ i.msg = "Core isolate target failed"; dictionary_set(known_journal_messages_ids, "689b4fcc97b4486ea5da92db69c9e314", &i, sizeof(i));
+ i.msg = "Core fd set failed"; dictionary_set(known_journal_messages_ids, "5ed836f1766f4a8a9fc5da45aae23b29", &i, sizeof(i));
+ i.msg = "Core pid1 environment"; dictionary_set(known_journal_messages_ids, "6a40fbfbd2ba4b8db02fb40c9cd090d7", &i, sizeof(i));
+ i.msg = "Core manager allocate"; dictionary_set(known_journal_messages_ids, "0e54470984ac419689743d957a119e2e", &i, sizeof(i));
+ i.msg = "Smack failed write"; dictionary_set(known_journal_messages_ids, "d67fa9f847aa4b048a2ae33535331adb", &i, sizeof(i));
+ i.msg = "Shutdown error"; dictionary_set(known_journal_messages_ids, "af55a6f75b544431b72649f36ff6d62c", &i, sizeof(i));
+ i.msg = "Valgrind helper fork"; dictionary_set(known_journal_messages_ids, "d18e0339efb24a068d9c1060221048c2", &i, sizeof(i));
+ i.msg = "Unit starting"; dictionary_set(known_journal_messages_ids, "7d4958e842da4a758f6c1cdc7b36dcc5", &i, sizeof(i));
+ i.msg = "Unit started"; dictionary_set(known_journal_messages_ids, "39f53479d3a045ac8e11786248231fbf", &i, sizeof(i));
+ i.msg = "Unit failed"; dictionary_set(known_journal_messages_ids, "be02cf6855d2428ba40df7e9d022f03d", &i, sizeof(i));
+ i.msg = "Unit stopping"; dictionary_set(known_journal_messages_ids, "de5b426a63be47a7b6ac3eaac82e2f6f", &i, sizeof(i));
+ i.msg = "Unit stopped"; dictionary_set(known_journal_messages_ids, "9d1aaa27d60140bd96365438aad20286", &i, sizeof(i));
+ i.msg = "Unit reloading"; dictionary_set(known_journal_messages_ids, "d34d037fff1847e6ae669a370e694725", &i, sizeof(i));
+ i.msg = "Unit reloaded"; dictionary_set(known_journal_messages_ids, "7b05ebc668384222baa8881179cfda54", &i, sizeof(i));
+ i.msg = "Unit restart scheduled"; dictionary_set(known_journal_messages_ids, "5eb03494b6584870a536b337290809b3", &i, sizeof(i));
+ i.msg = "Unit resources"; dictionary_set(known_journal_messages_ids, "ae8f7b866b0347b9af31fe1c80b127c0", &i, sizeof(i));
+ i.msg = "Unit success"; dictionary_set(known_journal_messages_ids, "7ad2d189f7e94e70a38c781354912448", &i, sizeof(i));
+ i.msg = "Unit skipped"; dictionary_set(known_journal_messages_ids, "0e4284a0caca4bfc81c0bb6786972673", &i, sizeof(i));
+ i.msg = "Unit failure result"; dictionary_set(known_journal_messages_ids, "d9b373ed55a64feb8242e02dbe79a49c", &i, sizeof(i));
+ i.msg = "Spawn failed"; dictionary_set(known_journal_messages_ids, "641257651c1b4ec9a8624d7a40a9e1e7", &i, sizeof(i));
+ i.msg = "Unit process exit"; dictionary_set(known_journal_messages_ids, "98e322203f7a4ed290d09fe03c09fe15", &i, sizeof(i));
+ i.msg = "Forward syslog missed"; dictionary_set(known_journal_messages_ids, "0027229ca0644181a76c4e92458afa2e", &i, sizeof(i));
+ i.msg = "Overmounting"; dictionary_set(known_journal_messages_ids, "1dee0369c7fc4736b7099b38ecb46ee7", &i, sizeof(i));
+ i.msg = "Unit oomd kill"; dictionary_set(known_journal_messages_ids, "d989611b15e44c9dbf31e3c81256e4ed", &i, sizeof(i));
+ i.msg = "Unit out of memory"; dictionary_set(known_journal_messages_ids, "fe6faa94e7774663a0da52717891d8ef", &i, sizeof(i));
+ i.msg = "Lid opened"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b06f", &i, sizeof(i));
+ i.msg = "Lid closed"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b070", &i, sizeof(i));
+ i.msg = "System docked"; dictionary_set(known_journal_messages_ids, "f5f416b862074b28927a48c3ba7d51ff", &i, sizeof(i));
+ i.msg = "System undocked"; dictionary_set(known_journal_messages_ids, "51e171bd585248568110144c517cca53", &i, sizeof(i));
+ i.msg = "Power key"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b071", &i, sizeof(i));
+ i.msg = "Power key long press"; dictionary_set(known_journal_messages_ids, "3e0117101eb243c1b9a50db3494ab10b", &i, sizeof(i));
+ i.msg = "Reboot key"; dictionary_set(known_journal_messages_ids, "9fa9d2c012134ec385451ffe316f97d0", &i, sizeof(i));
+ i.msg = "Reboot key long press"; dictionary_set(known_journal_messages_ids, "f1c59a58c9d943668965c337caec5975", &i, sizeof(i));
+ i.msg = "Suspend key"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b072", &i, sizeof(i));
+ i.msg = "Suspend key long press"; dictionary_set(known_journal_messages_ids, "bfdaf6d312ab4007bc1fe40a15df78e8", &i, sizeof(i));
+ i.msg = "Hibernate key"; dictionary_set(known_journal_messages_ids, "b72ea4a2881545a0b50e200e55b9b073", &i, sizeof(i));
+ i.msg = "Hibernate key long press"; dictionary_set(known_journal_messages_ids, "167836df6f7f428e98147227b2dc8945", &i, sizeof(i));
+ i.msg = "Invalid configuration"; dictionary_set(known_journal_messages_ids, "c772d24e9a884cbeb9ea12625c306c01", &i, sizeof(i));
+ i.msg = "Dnssec failure"; dictionary_set(known_journal_messages_ids, "1675d7f172174098b1108bf8c7dc8f5d", &i, sizeof(i));
+ i.msg = "Dnssec trust anchor revoked"; dictionary_set(known_journal_messages_ids, "4d4408cfd0d144859184d1e65d7c8a65", &i, sizeof(i));
+ i.msg = "Dnssec downgrade"; dictionary_set(known_journal_messages_ids, "36db2dfa5a9045e1bd4af5f93e1cf057", &i, sizeof(i));
+ i.msg = "Unsafe user name"; dictionary_set(known_journal_messages_ids, "b61fdac612e94b9182285b998843061f", &i, sizeof(i));
+ i.msg = "Mount point path not suitable"; dictionary_set(known_journal_messages_ids, "1b3bb94037f04bbf81028e135a12d293", &i, sizeof(i));
+ i.msg = "Device path not suitable"; dictionary_set(known_journal_messages_ids, "010190138f494e29a0ef6669749531aa", &i, sizeof(i));
+ i.msg = "Nobody user unsuitable"; dictionary_set(known_journal_messages_ids, "b480325f9c394a7b802c231e51a2752c", &i, sizeof(i));
+ i.msg = "Systemd udev settle deprecated"; dictionary_set(known_journal_messages_ids, "1c0454c1bd2241e0ac6fefb4bc631433", &i, sizeof(i));
+ i.msg = "Time sync"; dictionary_set(known_journal_messages_ids, "7c8a41f37b764941a0e1780b1be2f037", &i, sizeof(i));
+ i.msg = "Time bump"; dictionary_set(known_journal_messages_ids, "7db73c8af0d94eeb822ae04323fe6ab6", &i, sizeof(i));
+ i.msg = "Shutdown scheduled"; dictionary_set(known_journal_messages_ids, "9e7066279dc8403da79ce4b1a69064b2", &i, sizeof(i));
+ i.msg = "Shutdown canceled"; dictionary_set(known_journal_messages_ids, "249f6fb9e6e2428c96f3f0875681ffa3", &i, sizeof(i));
+ i.msg = "TPM pcr extend"; dictionary_set(known_journal_messages_ids, "3f7d5ef3e54f4302b4f0b143bb270cab", &i, sizeof(i));
+ i.msg = "Memory trim"; dictionary_set(known_journal_messages_ids, "f9b0be465ad540d0850ad32172d57c21", &i, sizeof(i));
+ i.msg = "Sysv generator deprecated"; dictionary_set(known_journal_messages_ids, "a8fa8dacdb1d443e9503b8be367a6adb", &i, sizeof(i));
+
+ // gnome
+ // https://gitlab.gnome.org/GNOME/gnome-session/-/blob/main/gnome-session/gsm-manager.c
+ i.msg = "Gnome SM startup succeeded"; dictionary_set(known_journal_messages_ids, "0ce153587afa4095832d233c17a88001", &i, sizeof(i));
+ i.msg = "Gnome SM unrecoverable failure"; dictionary_set(known_journal_messages_ids, "10dd2dc188b54a5e98970f56499d1f73", &i, sizeof(i));
+
+ // gnome-shell
+ // https://gitlab.gnome.org/GNOME/gnome-shell/-/blob/main/js/ui/main.js#L56
+ i.msg = "Gnome shell started";dictionary_set(known_journal_messages_ids, "f3ea493c22934e26811cd62abe8e203a", &i, sizeof(i));
+
+ // flathub
+ // https://docs.flatpak.org/de/latest/flatpak-command-reference.html
+ i.msg = "Flatpak cache"; dictionary_set(known_journal_messages_ids, "c7b39b1e006b464599465e105b361485", &i, sizeof(i));
+
+ // ???
+ i.msg = "Flathub pulls"; dictionary_set(known_journal_messages_ids, "75ba3deb0af041a9a46272ff85d9e73e", &i, sizeof(i));
+ i.msg = "Flathub pull errors"; dictionary_set(known_journal_messages_ids, "f02bce89a54e4efab3a94a797d26204a", &i, sizeof(i));
+
+ // ??
+ i.msg = "Boltd starting"; dictionary_set(known_journal_messages_ids, "dd11929c788e48bdbb6276fb5f26b08a", &i, sizeof(i));
+
+ // Netdata
+ i.msg = "Netdata connection from child"; dictionary_set(known_journal_messages_ids, "ed4cdb8f1beb4ad3b57cb3cae2d162fa", &i, sizeof(i));
+ i.msg = "Netdata connection to parent"; dictionary_set(known_journal_messages_ids, "6e2e3839067648968b646045dbf28d66", &i, sizeof(i));
+ i.msg = "Netdata alert transition"; dictionary_set(known_journal_messages_ids, "9ce0cb58ab8b44df82c4bf1ad9ee22de", &i, sizeof(i));
+ i.msg = "Netdata alert notification"; dictionary_set(known_journal_messages_ids, "6db0018e83e34320ae2a659d78019fb7", &i, sizeof(i));
+}
+
+void netdata_systemd_journal_transform_message_id(FACETS *facets __maybe_unused, BUFFER *wb, FACETS_TRANSFORMATION_SCOPE scope __maybe_unused, void *data __maybe_unused) {
+ const char *message_id = buffer_tostring(wb);
+ struct message_id_info *i = dictionary_get(known_journal_messages_ids, message_id);
+
+ if(!i)
+ return;
+
+ switch(scope) {
+ default:
+ case FACETS_TRANSFORM_DATA:
+ case FACETS_TRANSFORM_VALUE:
+ buffer_sprintf(wb, " (%s)", i->msg);
+ break;
+
+ case FACETS_TRANSFORM_FACET:
+ case FACETS_TRANSFORM_FACET_SORT:
+ case FACETS_TRANSFORM_HISTOGRAM:
+ buffer_flush(wb);
+ buffer_strcat(wb, i->msg);
+ break;
+ }
+}
+
+// ----------------------------------------------------------------------------
+
+static void netdata_systemd_journal_rich_message(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row __maybe_unused, void *data __maybe_unused) {
+ buffer_json_add_array_item_object(json_array);
+ buffer_json_member_add_string(json_array, "value", buffer_tostring(rkv->wb));
+ buffer_json_object_close(json_array);
+}
diff --git a/collectors/systemd-journal.plugin/systemd-journal-files.c b/collectors/systemd-journal.plugin/systemd-journal-files.c
new file mode 100644
index 00000000000000..56496df2258b3d
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-journal-files.c
@@ -0,0 +1,857 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "systemd-internals.h"
+
+#define SYSTEMD_JOURNAL_MAX_SOURCE_LEN 64
+#define VAR_LOG_JOURNAL_MAX_DEPTH 10
+
+struct journal_directory journal_directories[MAX_JOURNAL_DIRECTORIES] = { 0 };
+DICTIONARY *journal_files_registry = NULL;
+DICTIONARY *used_hashes_registry = NULL;
+
+static usec_t systemd_journal_session = 0;
+
+void buffer_json_journal_versions(BUFFER *wb) {
+ buffer_json_member_add_object(wb, "versions");
+ {
+ buffer_json_member_add_uint64(wb, "sources",
+ systemd_journal_session + dictionary_version(journal_files_registry));
+ }
+ buffer_json_object_close(wb);
+}
+
+static bool journal_sd_id128_parse(const char *in, sd_id128_t *ret) {
+ while(isspace(*in))
+ in++;
+
+ char uuid[33];
+ strncpyz(uuid, in, 32);
+ uuid[32] = '\0';
+
+ if(strlen(uuid) == 32) {
+ sd_id128_t read;
+ if(sd_id128_from_string(uuid, &read) == 0) {
+ *ret = read;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static void journal_file_get_header_from_journalctl(const char *filename, struct journal_file *jf) {
+ // unfortunately, our capabilities are not inheritted by journalctl
+ // so, it fails to give us the information we need.
+
+ bool read_writer = false, read_head = false, read_tail = false;
+
+ char cmd[FILENAME_MAX * 2];
+ snprintfz(cmd, sizeof(cmd), "journalctl --header --file '%s'", filename);
+ CLEAN_BUFFER *wb = run_command_and_get_output_to_buffer(cmd, 1024);
+ if(wb) {
+ const char *s = buffer_tostring(wb);
+
+ const char *sequential_id_header = "Sequential Number ID:";
+ const char *sequential_id_data = strcasestr(s, sequential_id_header);
+ if(sequential_id_data) {
+ sequential_id_data += strlen(sequential_id_header);
+ if(journal_sd_id128_parse(sequential_id_data, &jf->first_writer_id))
+ read_writer = true;
+ }
+
+ const char *head_sequential_number_header = "Head sequential number:";
+ const char *head_sequential_number_data = strcasestr(s, head_sequential_number_header);
+ if(head_sequential_number_data) {
+ head_sequential_number_data += strlen(head_sequential_number_header);
+
+ while(isspace(*head_sequential_number_data))
+ head_sequential_number_data++;
+
+ if(isdigit(*head_sequential_number_data)) {
+ jf->first_seqnum = strtoul(head_sequential_number_data, NULL, 10);
+ if(jf->first_seqnum)
+ read_head = true;
+ }
+ }
+
+ const char *tail_sequential_number_header = "Tail sequential number:";
+ const char *tail_sequential_number_data = strcasestr(s, tail_sequential_number_header);
+ if(tail_sequential_number_data) {
+ tail_sequential_number_data += strlen(tail_sequential_number_header);
+
+ while(isspace(*tail_sequential_number_data))
+ tail_sequential_number_data++;
+
+ if(isdigit(*tail_sequential_number_data)) {
+ jf->last_seqnum = strtoul(tail_sequential_number_data, NULL, 10);
+ if(jf->last_seqnum)
+ read_tail = true;
+ }
+ }
+
+ if(read_head && read_tail && jf->last_seqnum > jf->first_seqnum)
+ jf->messages_in_file = jf->last_seqnum - jf->first_seqnum;
+ }
+
+ if(!jf->logged_journalctl_failure && (!read_head || !read_head || !read_tail)) {
+
+ nd_log(NDLS_COLLECTORS, NDLP_NOTICE,
+ "Failed to read %s%s%s from journalctl's output on filename '%s', using the command: %s",
+ read_writer?"":"writer id,",
+ read_head?"":"head id,",
+ read_tail?"":"tail id,",
+ filename, cmd);
+
+ jf->logged_journalctl_failure = true;
+ }
+}
+
+usec_t journal_file_update_annotation_boot_id(sd_journal *j, struct journal_file *jf, const char *boot_id) {
+ usec_t ut = UINT64_MAX;
+ int r;
+
+ char m[100];
+ size_t len = snprintfz(m, sizeof(m), "_BOOT_ID=%s", boot_id);
+
+ sd_journal_flush_matches(j);
+
+ r = sd_journal_add_match(j, m, len);
+ if(r < 0) {
+ errno = -r;
+ internal_error(true,
+ "JOURNAL: while looking for the first timestamp of boot_id '%s', "
+ "sd_journal_add_match('%s') on file '%s' returned %d",
+ boot_id, m, jf->filename, r);
+ return UINT64_MAX;
+ }
+
+ r = sd_journal_seek_head(j);
+ if(r < 0) {
+ errno = -r;
+ internal_error(true,
+ "JOURNAL: while looking for the first timestamp of boot_id '%s', "
+ "sd_journal_seek_head() on file '%s' returned %d",
+ boot_id, jf->filename, r);
+ return UINT64_MAX;
+ }
+
+ r = sd_journal_next(j);
+ if(r < 0) {
+ errno = -r;
+ internal_error(true,
+ "JOURNAL: while looking for the first timestamp of boot_id '%s', "
+ "sd_journal_next() on file '%s' returned %d",
+ boot_id, jf->filename, r);
+ return UINT64_MAX;
+ }
+
+ r = sd_journal_get_realtime_usec(j, &ut);
+ if(r < 0 || !ut || ut == UINT64_MAX) {
+ errno = -r;
+ internal_error(r != -EADDRNOTAVAIL,
+ "JOURNAL: while looking for the first timestamp of boot_id '%s', "
+ "sd_journal_get_realtime_usec() on file '%s' returned %d",
+ boot_id, jf->filename, r);
+ return UINT64_MAX;
+ }
+
+ if(ut && ut != UINT64_MAX) {
+ dictionary_set(boot_ids_to_first_ut, boot_id, &ut, sizeof(ut));
+ return ut;
+ }
+
+ return UINT64_MAX;
+}
+
+static void journal_file_get_boot_id_annotations(sd_journal *j __maybe_unused, struct journal_file *jf __maybe_unused) {
+#ifdef HAVE_SD_JOURNAL_RESTART_FIELDS
+ sd_journal_flush_matches(j);
+
+ int r = sd_journal_query_unique(j, "_BOOT_ID");
+ if (r < 0) {
+ errno = -r;
+ internal_error(true,
+ "JOURNAL: while querying for the unique _BOOT_ID values, "
+ "sd_journal_query_unique() on file '%s' returned %d",
+ jf->filename, r);
+ errno = -r;
+ return;
+ }
+
+ const void *data = NULL;
+ size_t data_length;
+
+ DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED);
+
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, data_length) {
+ const char *key, *value;
+ size_t key_length, value_length;
+
+ if(!parse_journal_field(data, data_length, &key, &key_length, &value, &value_length))
+ continue;
+
+ if(value_length != 32)
+ continue;
+
+ char buf[33];
+ memcpy(buf, value, 32);
+ buf[32] = '\0';
+
+ dictionary_set(dict, buf, NULL, 0);
+ }
+
+ void *nothing;
+ dfe_start_read(dict, nothing){
+ journal_file_update_annotation_boot_id(j, jf, nothing_dfe.name);
+ }
+ dfe_done(nothing);
+
+ dictionary_destroy(dict);
+#endif
+}
+
+void journal_file_update_header(const char *filename, struct journal_file *jf) {
+ if(jf->last_scan_header_vs_last_modified_ut == jf->file_last_modified_ut)
+ return;
+
+ fstat_cache_enable_on_thread();
+
+ const char *files[2] = {
+ [0] = filename,
+ [1] = NULL,
+ };
+
+ sd_journal *j = NULL;
+ if(sd_journal_open_files(&j, files, ND_SD_JOURNAL_OPEN_FLAGS) < 0 || !j) {
+ netdata_log_error("JOURNAL: cannot open file '%s' to update msg_ut", filename);
+ fstat_cache_disable_on_thread();
+
+ if(!jf->logged_failure) {
+ netdata_log_error("cannot open journal file '%s', using file timestamps to understand time-frame.", filename);
+ jf->logged_failure = true;
+ }
+
+ jf->msg_first_ut = 0;
+ jf->msg_last_ut = jf->file_last_modified_ut;
+ jf->last_scan_header_vs_last_modified_ut = jf->file_last_modified_ut;
+ return;
+ }
+
+ usec_t first_ut = 0, last_ut = 0;
+ uint64_t first_seqnum = 0, last_seqnum = 0;
+ sd_id128_t first_writer_id = SD_ID128_NULL, last_writer_id = SD_ID128_NULL;
+
+ if(sd_journal_seek_head(j) < 0 || sd_journal_next(j) < 0 || sd_journal_get_realtime_usec(j, &first_ut) < 0 || !first_ut) {
+ internal_error(true, "cannot find the timestamp of the first message in '%s'", filename);
+ first_ut = 0;
+ }
+#ifdef HAVE_SD_JOURNAL_GET_SEQNUM
+ else {
+ if(sd_journal_get_seqnum(j, &first_seqnum, &first_writer_id) < 0 || !first_seqnum) {
+ internal_error(true, "cannot find the first seqnums of the first message in '%s'", filename);
+ first_seqnum = 0;
+ memset(&first_writer_id, 0, sizeof(first_writer_id));
+ }
+ }
+#endif
+
+ if(sd_journal_seek_tail(j) < 0 || sd_journal_previous(j) < 0 || sd_journal_get_realtime_usec(j, &last_ut) < 0 || !last_ut) {
+ internal_error(true, "cannot find the timestamp of the last message in '%s'", filename);
+ last_ut = jf->file_last_modified_ut;
+ }
+#ifdef HAVE_SD_JOURNAL_GET_SEQNUM
+ else {
+ if(sd_journal_get_seqnum(j, &last_seqnum, &last_writer_id) < 0 || !last_seqnum) {
+ internal_error(true, "cannot find the last seqnums of the first message in '%s'", filename);
+ last_seqnum = 0;
+ memset(&last_writer_id, 0, sizeof(last_writer_id));
+ }
+ }
+#endif
+
+ if(first_ut > last_ut) {
+ internal_error(true, "timestamps are flipped in file '%s'", filename);
+ usec_t t = first_ut;
+ first_ut = last_ut;
+ last_ut = t;
+ }
+
+ if(!first_seqnum || !first_ut) {
+ // extract these from the filename - if possible
+
+ const char *at = strchr(filename, '@');
+ if(at) {
+ const char *dash_seqnum = strchr(at + 1, '-');
+ if(dash_seqnum) {
+ const char *dash_first_msg_ut = strchr(dash_seqnum + 1, '-');
+ if(dash_first_msg_ut) {
+ const char *dot_journal = strstr(dash_first_msg_ut + 1, ".journal");
+ if(dot_journal) {
+ if(dash_seqnum - at - 1 == 32 &&
+ dash_first_msg_ut - dash_seqnum - 1 == 16 &&
+ dot_journal - dash_first_msg_ut - 1 == 16) {
+ sd_id128_t writer;
+ if(journal_sd_id128_parse(at + 1, &writer)) {
+ char *endptr = NULL;
+ uint64_t seqnum = strtoul(dash_seqnum + 1, &endptr, 16);
+ if(endptr == dash_first_msg_ut) {
+ uint64_t ts = strtoul(dash_first_msg_ut + 1, &endptr, 16);
+ if(endptr == dot_journal) {
+ first_seqnum = seqnum;
+ first_writer_id = writer;
+ first_ut = ts;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ jf->first_seqnum = first_seqnum;
+ jf->last_seqnum = last_seqnum;
+
+ jf->first_writer_id = first_writer_id;
+ jf->last_writer_id = last_writer_id;
+
+ jf->msg_first_ut = first_ut;
+ jf->msg_last_ut = last_ut;
+
+ if(!jf->msg_last_ut)
+ jf->msg_last_ut = jf->file_last_modified_ut;
+
+ if(last_seqnum > first_seqnum) {
+ if(!sd_id128_equal(first_writer_id, last_writer_id)) {
+ jf->messages_in_file = 0;
+ nd_log(NDLS_COLLECTORS, NDLP_NOTICE,
+ "The writers of the first and the last message in file '%s' differ."
+ , filename);
+ }
+ else
+ jf->messages_in_file = last_seqnum - first_seqnum + 1;
+ }
+ else
+ jf->messages_in_file = 0;
+
+// if(!jf->messages_in_file)
+// journal_file_get_header_from_journalctl(filename, jf);
+
+ journal_file_get_boot_id_annotations(j, jf);
+ sd_journal_close(j);
+ fstat_cache_disable_on_thread();
+
+ jf->last_scan_header_vs_last_modified_ut = jf->file_last_modified_ut;
+
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "Journal file header updated '%s'",
+ jf->filename);
+}
+
+static STRING *string_strdupz_source(const char *s, const char *e, size_t max_len, const char *prefix) {
+ char buf[max_len];
+ size_t len;
+ char *dst = buf;
+
+ if(prefix) {
+ len = strlen(prefix);
+ memcpy(buf, prefix, len);
+ dst = &buf[len];
+ max_len -= len;
+ }
+
+ len = e - s;
+ if(len >= max_len)
+ len = max_len - 1;
+ memcpy(dst, s, len);
+ dst[len] = '\0';
+ buf[max_len - 1] = '\0';
+
+ for(size_t i = 0; buf[i] ;i++)
+ if(!isalnum(buf[i]) && buf[i] != '-' && buf[i] != '.' && buf[i] != ':')
+ buf[i] = '_';
+
+ return string_strdupz(buf);
+}
+
+static void files_registry_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) {
+ struct journal_file *jf = value;
+ jf->filename = dictionary_acquired_item_name(item);
+ jf->filename_len = strlen(jf->filename);
+ jf->source_type = SDJF_ALL;
+
+ // based on the filename
+ // decide the source to show to the user
+ const char *s = strrchr(jf->filename, '/');
+ if(s) {
+ if(strstr(jf->filename, "/remote/")) {
+ jf->source_type |= SDJF_REMOTE_ALL;
+
+ if(strncmp(s, "/remote-", 8) == 0) {
+ s = &s[8]; // skip "/remote-"
+
+ char *e = strchr(s, '@');
+ if(!e)
+ e = strstr(s, ".journal");
+
+ if(e) {
+ const char *d = s;
+ for(; d < e && (isdigit(*d) || *d == '.' || *d == ':') ; d++) ;
+ if(d == e) {
+ // a valid IP address
+ char ip[e - s + 1];
+ memcpy(ip, s, e - s);
+ ip[e - s] = '\0';
+ char buf[SYSTEMD_JOURNAL_MAX_SOURCE_LEN];
+ if(ip_to_hostname(ip, buf, sizeof(buf)))
+ jf->source = string_strdupz_source(buf, &buf[strlen(buf)], SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-");
+ else {
+ internal_error(true, "Cannot find the hostname for IP '%s'", ip);
+ jf->source = string_strdupz_source(s, e, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-");
+ }
+ }
+ else
+ jf->source = string_strdupz_source(s, e, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "remote-");
+ }
+ }
+ }
+ else {
+ jf->source_type |= SDJF_LOCAL_ALL;
+
+ const char *t = s - 1;
+ while(t >= jf->filename && *t != '.' && *t != '/')
+ t--;
+
+ if(t >= jf->filename && *t == '.') {
+ jf->source_type |= SDJF_LOCAL_NAMESPACE;
+ jf->source = string_strdupz_source(t + 1, s, SYSTEMD_JOURNAL_MAX_SOURCE_LEN, "namespace-");
+ }
+ else if(strncmp(s, "/system", 7) == 0)
+ jf->source_type |= SDJF_LOCAL_SYSTEM;
+
+ else if(strncmp(s, "/user", 5) == 0)
+ jf->source_type |= SDJF_LOCAL_USER;
+
+ else
+ jf->source_type |= SDJF_LOCAL_OTHER;
+ }
+ }
+ else
+ jf->source_type |= SDJF_LOCAL_ALL | SDJF_LOCAL_OTHER;
+
+ jf->msg_last_ut = jf->file_last_modified_ut;
+
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "Journal file added to the journal files registry: '%s'",
+ jf->filename);
+}
+
+static bool files_registry_conflict_cb(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data __maybe_unused) {
+ struct journal_file *jf = old_value;
+ struct journal_file *njf = new_value;
+
+ if(njf->last_scan_monotonic_ut > jf->last_scan_monotonic_ut)
+ jf->last_scan_monotonic_ut = njf->last_scan_monotonic_ut;
+
+ if(njf->file_last_modified_ut > jf->file_last_modified_ut) {
+ jf->file_last_modified_ut = njf->file_last_modified_ut;
+ jf->size = njf->size;
+
+ jf->msg_last_ut = jf->file_last_modified_ut;
+
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "Journal file updated to the journal files registry '%s'",
+ jf->filename);
+ }
+
+ return false;
+}
+
+struct journal_file_source {
+ usec_t first_ut;
+ usec_t last_ut;
+ size_t count;
+ uint64_t size;
+};
+
+static void human_readable_size_ib(uint64_t size, char *dst, size_t dst_len) {
+ if(size > 1024ULL * 1024 * 1024 * 1024)
+ snprintfz(dst, dst_len, "%0.2f TiB", (double)size / 1024.0 / 1024.0 / 1024.0 / 1024.0);
+ else if(size > 1024ULL * 1024 * 1024)
+ snprintfz(dst, dst_len, "%0.2f GiB", (double)size / 1024.0 / 1024.0 / 1024.0);
+ else if(size > 1024ULL * 1024)
+ snprintfz(dst, dst_len, "%0.2f MiB", (double)size / 1024.0 / 1024.0);
+ else if(size > 1024ULL)
+ snprintfz(dst, dst_len, "%0.2f KiB", (double)size / 1024.0);
+ else
+ snprintfz(dst, dst_len, "%"PRIu64" B", size);
+}
+
+#define print_duration(dst, dst_len, pos, remaining, duration, one, many, printed) do { \
+ if((remaining) > (duration)) { \
+ uint64_t _count = (remaining) / (duration); \
+ uint64_t _rem = (remaining) - (_count * (duration)); \
+ (pos) += snprintfz(&(dst)[pos], (dst_len) - (pos), "%s%s%"PRIu64" %s", (printed) ? ", " : "", _rem ? "" : "and ", _count, _count > 1 ? (many) : (one)); \
+ (remaining) = _rem; \
+ (printed) = true; \
+ } \
+} while(0)
+
+static void human_readable_duration_s(time_t duration_s, char *dst, size_t dst_len) {
+ if(duration_s < 0)
+ duration_s = -duration_s;
+
+ size_t pos = 0;
+ dst[0] = 0 ;
+
+ bool printed = false;
+ print_duration(dst, dst_len, pos, duration_s, 86400 * 365, "year", "years", printed);
+ print_duration(dst, dst_len, pos, duration_s, 86400 * 30, "month", "months", printed);
+ print_duration(dst, dst_len, pos, duration_s, 86400 * 1, "day", "days", printed);
+ print_duration(dst, dst_len, pos, duration_s, 3600 * 1, "hour", "hours", printed);
+ print_duration(dst, dst_len, pos, duration_s, 60 * 1, "min", "mins", printed);
+ print_duration(dst, dst_len, pos, duration_s, 1, "sec", "secs", printed);
+}
+
+static int journal_file_to_json_array_cb(const DICTIONARY_ITEM *item, void *entry, void *data) {
+ struct journal_file_source *jfs = entry;
+ BUFFER *wb = data;
+
+ const char *name = dictionary_acquired_item_name(item);
+
+ buffer_json_add_array_item_object(wb);
+ {
+ char size_for_humans[100];
+ human_readable_size_ib(jfs->size, size_for_humans, sizeof(size_for_humans));
+
+ char duration_for_humans[1024];
+ human_readable_duration_s((time_t)((jfs->last_ut - jfs->first_ut) / USEC_PER_SEC),
+ duration_for_humans, sizeof(duration_for_humans));
+
+ char info[1024];
+ snprintfz(info, sizeof(info), "%zu files, with a total size of %s, covering %s",
+ jfs->count, size_for_humans, duration_for_humans);
+
+ buffer_json_member_add_string(wb, "id", name);
+ buffer_json_member_add_string(wb, "name", name);
+ buffer_json_member_add_string(wb, "pill", size_for_humans);
+ buffer_json_member_add_string(wb, "info", info);
+ }
+ buffer_json_object_close(wb); // options object
+
+ return 1;
+}
+
+static bool journal_file_merge_sizes(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value , void *data __maybe_unused) {
+ struct journal_file_source *jfs = old_value, *njfs = new_value;
+ jfs->count += njfs->count;
+ jfs->size += njfs->size;
+
+ if(njfs->first_ut && njfs->first_ut < jfs->first_ut)
+ jfs->first_ut = njfs->first_ut;
+
+ if(njfs->last_ut && njfs->last_ut > jfs->last_ut)
+ jfs->last_ut = njfs->last_ut;
+
+ return false;
+}
+
+void available_journal_file_sources_to_json_array(BUFFER *wb) {
+ DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_NAME_LINK_DONT_CLONE|DICT_OPTION_DONT_OVERWRITE_VALUE);
+ dictionary_register_conflict_callback(dict, journal_file_merge_sizes, NULL);
+
+ struct journal_file_source t = { 0 };
+
+ struct journal_file *jf;
+ dfe_start_read(journal_files_registry, jf) {
+ t.first_ut = jf->msg_first_ut;
+ t.last_ut = jf->msg_last_ut;
+ t.count = 1;
+ t.size = jf->size;
+
+ dictionary_set(dict, SDJF_SOURCE_ALL_NAME, &t, sizeof(t));
+
+ if(jf->source_type & SDJF_LOCAL_ALL)
+ dictionary_set(dict, SDJF_SOURCE_LOCAL_NAME, &t, sizeof(t));
+ if(jf->source_type & SDJF_LOCAL_SYSTEM)
+ dictionary_set(dict, SDJF_SOURCE_LOCAL_SYSTEM_NAME, &t, sizeof(t));
+ if(jf->source_type & SDJF_LOCAL_USER)
+ dictionary_set(dict, SDJF_SOURCE_LOCAL_USERS_NAME, &t, sizeof(t));
+ if(jf->source_type & SDJF_LOCAL_OTHER)
+ dictionary_set(dict, SDJF_SOURCE_LOCAL_OTHER_NAME, &t, sizeof(t));
+ if(jf->source_type & SDJF_LOCAL_NAMESPACE)
+ dictionary_set(dict, SDJF_SOURCE_NAMESPACES_NAME, &t, sizeof(t));
+ if(jf->source_type & SDJF_REMOTE_ALL)
+ dictionary_set(dict, SDJF_SOURCE_REMOTES_NAME, &t, sizeof(t));
+ if(jf->source)
+ dictionary_set(dict, string2str(jf->source), &t, sizeof(t));
+ }
+ dfe_done(jf);
+
+ dictionary_sorted_walkthrough_read(dict, journal_file_to_json_array_cb, wb);
+
+ dictionary_destroy(dict);
+}
+
+static void files_registry_delete_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) {
+ struct journal_file *jf = value; (void)jf;
+ const char *filename = dictionary_acquired_item_name(item); (void)filename;
+
+ internal_error(true, "removed journal file '%s'", filename);
+ string_freez(jf->source);
+}
+
+void journal_directory_scan_recursively(DICTIONARY *files, DICTIONARY *dirs, const char *dirname, int depth) {
+ static const char *ext = ".journal";
+ static const ssize_t ext_len = sizeof(".journal") - 1;
+
+ if (depth > VAR_LOG_JOURNAL_MAX_DEPTH)
+ return;
+
+ DIR *dir;
+ struct dirent *entry;
+ char full_path[FILENAME_MAX];
+
+ // Open the directory.
+ if ((dir = opendir(dirname)) == NULL) {
+ if(errno != ENOENT && errno != ENOTDIR)
+ netdata_log_error("Cannot opendir() '%s'", dirname);
+ return;
+ }
+
+ bool existing = false;
+ bool *found = dictionary_set(dirs, dirname, &existing, sizeof(existing));
+ if(*found) return;
+ *found = true;
+
+ // Read each entry in the directory.
+ while ((entry = readdir(dir)) != NULL) {
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ ssize_t len = snprintfz(full_path, sizeof(full_path), "%s/%s", dirname, entry->d_name);
+
+ if (entry->d_type == DT_DIR) {
+ journal_directory_scan_recursively(files, dirs, full_path, depth++);
+ }
+ else if (entry->d_type == DT_REG && len > ext_len && strcmp(full_path + len - ext_len, ext) == 0) {
+ if(files)
+ dictionary_set(files, full_path, NULL, 0);
+
+ send_newline_and_flush();
+ }
+ else if (entry->d_type == DT_LNK) {
+ struct stat info;
+ if (stat(full_path, &info) == -1)
+ continue;
+
+ if (S_ISDIR(info.st_mode)) {
+ // The symbolic link points to a directory
+ char resolved_path[FILENAME_MAX + 1];
+ if (realpath(full_path, resolved_path) != NULL) {
+ journal_directory_scan_recursively(files, dirs, resolved_path, depth++);
+ }
+ }
+ else if(S_ISREG(info.st_mode) && len > ext_len && strcmp(full_path + len - ext_len, ext) == 0) {
+ if(files)
+ dictionary_set(files, full_path, NULL, 0);
+
+ send_newline_and_flush();
+ }
+ }
+ }
+
+ closedir(dir);
+}
+
+static size_t journal_files_scans = 0;
+bool journal_files_completed_once(void) {
+ return journal_files_scans > 0;
+}
+
+int filenames_compar(const void *a, const void *b) {
+ const char *p1 = *(const char **)a;
+ const char *p2 = *(const char **)b;
+
+ const char *at1 = strchr(p1, '@');
+ const char *at2 = strchr(p2, '@');
+
+ if(!at1 && at2)
+ return -1;
+
+ if(at1 && !at2)
+ return 1;
+
+ if(!at1 && !at2)
+ return strcmp(p1, p2);
+
+ const char *dash1 = strrchr(at1, '-');
+ const char *dash2 = strrchr(at2, '-');
+
+ if(!dash1 || !dash2)
+ return strcmp(p1, p2);
+
+ uint64_t ts1 = strtoul(dash1 + 1, NULL, 16);
+ uint64_t ts2 = strtoul(dash2 + 1, NULL, 16);
+
+ if(ts1 > ts2)
+ return -1;
+
+ if(ts1 < ts2)
+ return 1;
+
+ return -strcmp(p1, p2);
+}
+
+void journal_files_registry_update(void) {
+ static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
+
+ if(spinlock_trylock(&spinlock)) {
+ usec_t scan_monotonic_ut = now_monotonic_usec();
+
+ DICTIONARY *files = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
+ DICTIONARY *dirs = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_DONT_OVERWRITE_VALUE);
+
+ for(unsigned i = 0; i < MAX_JOURNAL_DIRECTORIES; i++) {
+ if(!journal_directories[i].path) break;
+ journal_directory_scan_recursively(files, dirs, journal_directories[i].path, 0);
+ }
+
+ const char **array = mallocz(sizeof(const char *) * dictionary_entries(files));
+ size_t used = 0;
+
+ void *x;
+ dfe_start_read(files, x) {
+ if(used >= dictionary_entries(files)) continue;
+ array[used++] = x_dfe.name;
+ }
+ dfe_done(x);
+
+ qsort(array, used, sizeof(const char *), filenames_compar);
+
+ for(size_t i = 0; i < used ;i++) {
+ const char *full_path = array[i];
+
+ struct stat info;
+ if (stat(full_path, &info) == -1)
+ continue;
+
+ struct journal_file t = {
+ .file_last_modified_ut = info.st_mtim.tv_sec * USEC_PER_SEC + info.st_mtim.tv_nsec / NSEC_PER_USEC,
+ .last_scan_monotonic_ut = scan_monotonic_ut,
+ .size = info.st_size,
+ .max_journal_vs_realtime_delta_ut = JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT,
+ };
+ struct journal_file *jf = dictionary_set(journal_files_registry, full_path, &t, sizeof(t));
+ journal_file_update_header(jf->filename, jf);
+ }
+ freez(array);
+ dictionary_destroy(files);
+ dictionary_destroy(dirs);
+
+ struct journal_file *jf;
+ dfe_start_write(journal_files_registry, jf){
+ if(jf->last_scan_monotonic_ut < scan_monotonic_ut)
+ dictionary_del(journal_files_registry, jf_dfe.name);
+ }
+ dfe_done(jf);
+
+ journal_files_scans++;
+ spinlock_unlock(&spinlock);
+
+ internal_error(true,
+ "Journal library scan completed in %.3f ms",
+ (double)(now_monotonic_usec() - scan_monotonic_ut) / (double)USEC_PER_MS);
+ }
+}
+
+// ----------------------------------------------------------------------------
+
+int journal_file_dict_items_backward_compar(const void *a, const void *b) {
+ const DICTIONARY_ITEM **ad = (const DICTIONARY_ITEM **)a, **bd = (const DICTIONARY_ITEM **)b;
+ struct journal_file *jfa = dictionary_acquired_item_value(*ad);
+ struct journal_file *jfb = dictionary_acquired_item_value(*bd);
+
+ // compare the last message timestamps
+ if(jfa->msg_last_ut < jfb->msg_last_ut)
+ return 1;
+
+ if(jfa->msg_last_ut > jfb->msg_last_ut)
+ return -1;
+
+ // compare the file last modification timestamps
+ if(jfa->file_last_modified_ut < jfb->file_last_modified_ut)
+ return 1;
+
+ if(jfa->file_last_modified_ut > jfb->file_last_modified_ut)
+ return -1;
+
+ // compare the first message timestamps
+ if(jfa->msg_first_ut < jfb->msg_first_ut)
+ return 1;
+
+ if(jfa->msg_first_ut > jfb->msg_first_ut)
+ return -1;
+
+ return 0;
+}
+
+int journal_file_dict_items_forward_compar(const void *a, const void *b) {
+ return -journal_file_dict_items_backward_compar(a, b);
+}
+
+static bool boot_id_conflict_cb(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data __maybe_unused) {
+ usec_t *old_usec = old_value;
+ usec_t *new_usec = new_value;
+
+ if(*new_usec < *old_usec) {
+ *old_usec = *new_usec;
+ return true;
+ }
+
+ return false;
+}
+
+void journal_init_files_and_directories(void) {
+ unsigned d = 0;
+
+ // ------------------------------------------------------------------------
+ // setup the journal directories
+
+ journal_directories[d++].path = strdupz("/run/log/journal");
+ journal_directories[d++].path = strdupz("/var/log/journal");
+
+ if(*netdata_configured_host_prefix) {
+ char path[PATH_MAX];
+ snprintfz(path, sizeof(path), "%s/var/log/journal", netdata_configured_host_prefix);
+ journal_directories[d++].path = strdupz(path);
+ snprintfz(path, sizeof(path), "%s/run/log/journal", netdata_configured_host_prefix);
+ journal_directories[d++].path = strdupz(path);
+ }
+
+ // terminate the list
+ journal_directories[d].path = NULL;
+
+ // ------------------------------------------------------------------------
+ // initialize the used hashes files registry
+
+ used_hashes_registry = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE);
+
+ systemd_journal_session = (now_realtime_usec() / USEC_PER_SEC) * USEC_PER_SEC;
+
+ journal_files_registry = dictionary_create_advanced(
+ DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
+ NULL, sizeof(struct journal_file));
+
+ dictionary_register_insert_callback(journal_files_registry, files_registry_insert_cb, NULL);
+ dictionary_register_delete_callback(journal_files_registry, files_registry_delete_cb, NULL);
+ dictionary_register_conflict_callback(journal_files_registry, files_registry_conflict_cb, NULL);
+
+ boot_ids_to_first_ut = dictionary_create_advanced(
+ DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
+ NULL, sizeof(usec_t));
+
+ dictionary_register_conflict_callback(boot_ids_to_first_ut, boot_id_conflict_cb, NULL);
+
+}
diff --git a/collectors/systemd-journal.plugin/systemd-journal-fstat.c b/collectors/systemd-journal.plugin/systemd-journal-fstat.c
new file mode 100644
index 00000000000000..45ea78174c9be7
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-journal-fstat.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "systemd-internals.h"
+
+
+// ----------------------------------------------------------------------------
+// fstat64 overloading to speed up libsystemd
+// https://github.com/systemd/systemd/pull/29261
+
+#include
+#include
+
+#define FSTAT_CACHE_MAX 1024
+struct fdstat64_cache_entry {
+ bool enabled;
+ bool updated;
+ int err_no;
+ struct stat64 stat;
+ int ret;
+ size_t cached_count;
+ size_t session;
+};
+
+struct fdstat64_cache_entry fstat64_cache[FSTAT_CACHE_MAX] = {0 };
+__thread size_t fstat_thread_calls = 0;
+__thread size_t fstat_thread_cached_responses = 0;
+static __thread bool enable_thread_fstat = false;
+static __thread size_t fstat_caching_thread_session = 0;
+static size_t fstat_caching_global_session = 0;
+
+void fstat_cache_enable_on_thread(void) {
+ fstat_caching_thread_session = __atomic_add_fetch(&fstat_caching_global_session, 1, __ATOMIC_ACQUIRE);
+ enable_thread_fstat = true;
+}
+
+void fstat_cache_disable_on_thread(void) {
+ fstat_caching_thread_session = __atomic_add_fetch(&fstat_caching_global_session, 1, __ATOMIC_RELEASE);
+ enable_thread_fstat = false;
+}
+
+int fstat64(int fd, struct stat64 *buf) {
+ static int (*real_fstat)(int, struct stat64 *) = NULL;
+ if (!real_fstat)
+ real_fstat = dlsym(RTLD_NEXT, "fstat64");
+
+ fstat_thread_calls++;
+
+ if(fd >= 0 && fd < FSTAT_CACHE_MAX) {
+ if(enable_thread_fstat && fstat64_cache[fd].session != fstat_caching_thread_session) {
+ fstat64_cache[fd].session = fstat_caching_thread_session;
+ fstat64_cache[fd].enabled = true;
+ fstat64_cache[fd].updated = false;
+ }
+
+ if(fstat64_cache[fd].enabled && fstat64_cache[fd].updated && fstat64_cache[fd].session == fstat_caching_thread_session) {
+ fstat_thread_cached_responses++;
+ errno = fstat64_cache[fd].err_no;
+ *buf = fstat64_cache[fd].stat;
+ fstat64_cache[fd].cached_count++;
+ return fstat64_cache[fd].ret;
+ }
+ }
+
+ int ret = real_fstat(fd, buf);
+
+ if(fd >= 0 && fd < FSTAT_CACHE_MAX && fstat64_cache[fd].enabled && fstat64_cache[fd].session == fstat_caching_thread_session) {
+ fstat64_cache[fd].ret = ret;
+ fstat64_cache[fd].updated = true;
+ fstat64_cache[fd].err_no = errno;
+ fstat64_cache[fd].stat = *buf;
+ }
+
+ return ret;
+}
diff --git a/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh b/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh
new file mode 100755
index 00000000000000..ada735f1fbc215
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-journal-self-signed-certs.sh
@@ -0,0 +1,267 @@
+#!/usr/bin/env bash
+
+me="${0}"
+dst="/etc/ssl/systemd-journal"
+
+show_usage() {
+ cat <&2 "directory set to: ${dst}"
+ shift
+ ;;
+
+ *)
+ break 2
+ ;;
+ esac
+
+ shift
+done
+
+if [ -z "${1}" ]; then
+ show_usage
+ exit 1
+fi
+
+
+# Define a regular expression pattern for a valid canonical name
+valid_canonical_name_pattern="^[a-zA-Z0-9][a-zA-Z0-9.-]+$"
+
+# Check if ${1} matches the pattern
+if [[ ! "${1}" =~ ${valid_canonical_name_pattern} ]]; then
+ echo "Certificate name '${1}' is not valid."
+ exit 1
+fi
+
+# -----------------------------------------------------------------------------
+# Create the CA
+
+# stop on all errors
+set -e
+
+if [ $UID -ne 0 ]
+then
+ echo >&2 "Hey! sudo me: sudo ${me}"
+ exit 1
+fi
+
+if ! getent group systemd-journal >/dev/null 2>&1; then
+ echo >&2 "Missing system group: systemd-journal. Did you install systemd-journald?"
+ exit 1
+fi
+
+if ! getent passwd systemd-journal-remote >/dev/null 2>&1; then
+ echo >&2 "Missing system user: systemd-journal-remote. Did you install systemd-journal-remote?"
+ exit 1
+fi
+
+if [ ! -d "${dst}" ]
+then
+ mkdir -p "${dst}"
+ chown systemd-journal-remote:systemd-journal "${dst}"
+ chmod 750 "${dst}"
+fi
+
+cd "${dst}"
+
+test ! -f ca.conf && cat >ca.conf <serial
+
+if [ ! -f ca.pem -o ! -f ca.key ]; then
+ echo >&2 "Generating ca.pem ..."
+
+ openssl req -newkey rsa:2048 -days 3650 -x509 -nodes -out ca.pem -keyout ca.key -subj "/CN=systemd-journal-remote-ca/"
+ chown systemd-journal-remote:systemd-journal ca.pem
+ chmod 0640 ca.pem
+fi
+
+# -----------------------------------------------------------------------------
+# Create a server certificate
+
+generate_server_certificate() {
+ local cn="${1}"; shift
+
+ if [ ! -f "${cn}.pem" -o ! -f "${cn}.key" ]; then
+ if [ -z "${*}" ]; then
+ echo >"${cn}.conf"
+ else
+ echo "subjectAltName = $(echo "${@}" | tr " " ",")" >"${cn}.conf"
+ fi
+
+ echo >&2 "Generating server: ${cn}.pem and ${cn}.key ..."
+
+ openssl req -newkey rsa:2048 -nodes -out "${cn}.csr" -keyout "${cn}.key" -subj "/CN=${cn}/"
+ openssl ca -batch -config ca.conf -notext -in "${cn}.csr" -out "${cn}.pem" -extfile "${cn}.conf"
+ else
+ echo >&2 "certificates for ${cn} are already available."
+ fi
+
+ chown systemd-journal-remote:systemd-journal "${cn}.pem" "${cn}.key"
+ chmod 0640 "${cn}.pem" "${cn}.key"
+}
+
+
+# -----------------------------------------------------------------------------
+# Create a script to install the certificate on each server
+
+generate_install_script() {
+ local cn="${1}"
+ local dst="/etc/ssl/systemd-journal"
+
+ cat >"runme-on-${cn}.sh" <&2 "Hey! sudo me: sudo \${0}"
+ exit 1
+fi
+
+# make sure the systemd-journal group exists
+# all certificates will be owned by this group
+if ! getent group systemd-journal >/dev/null 2>&1; then
+ echo >&2 "Missing system group: systemd-journal. Did you install systemd-journald?"
+ exit 1
+fi
+
+if ! getent passwd systemd-journal-remote >/dev/null 2>&1; then
+ echo >&2 "Missing system user: systemd-journal-remote. Did you install systemd-journal-remote?"
+ exit 1
+fi
+
+if [ ! -d ${dst} ]; then
+ echo >&2 "creating directory: ${dst}"
+ mkdir -p "${dst}"
+fi
+chown systemd-journal-remote:systemd-journal "${dst}"
+chmod 750 "${dst}"
+cd "${dst}"
+
+echo >&2 "saving trusted certificate file as: ${dst}/ca.pem"
+cat >ca.pem <&2 "saving server ${cn} certificate file as: ${dst}/${cn}.pem"
+cat >"${cn}.pem" <&2 "saving server ${cn} key file as: ${dst}/${cn}.key"
+cat >"${cn}.key" <&2 "updating the certificates in \${cfg}"
+ sed -i "s|^#\\?\\s*ServerKeyFile=.*$|ServerKeyFile=${dst}/${cn}.key|" \${cfg}
+ sed -i "s|^#\\?\\s*ServerCertificateFile=.*$|ServerCertificateFile=${dst}/${cn}.pem|" \${cfg}
+ sed -i "s|^#\\?\\s*TrustedCertificateFile=.*$|TrustedCertificateFile=${dst}/ca.pem|" \${cfg}
+ fi
+done
+
+echo >&2 "certificates installed - you may need to restart services to active them"
+echo >&2
+echo >&2 "If this is a central server:"
+echo >&2 "# systemctl restart systemd-journal-remote.socket"
+echo >&2
+echo >&2 "If this is a passive client:"
+echo >&2 "# systemctl restart systemd-journal-upload.service"
+echo >&2
+echo >&2 "If this is an active client:"
+echo >&2 "# systemctl restart systemd-journal-gateway.socket"
+EOFC1
+
+ chmod 0700 "runme-on-${cn}.sh"
+}
+
+# -----------------------------------------------------------------------------
+# Create the client certificates
+
+generate_server_certificate "${@}"
+generate_install_script "${1}"
+
+
+# Set ANSI escape code for colors
+yellow_color="\033[1;33m"
+green_color="\033[0;32m"
+# Reset ANSI color after the message
+reset_color="\033[0m"
+
+
+echo >&2 -e "use this script to install it on ${1}: ${yellow_color}$(ls ${dst}/runme-on-${1}.sh)${reset_color}"
+echo >&2 "copy it to your server ${1}, like this:"
+echo >&2 -e "# ${green_color}scp ${dst}/runme-on-${1}.sh ${1}:/tmp/${reset_color}"
+echo >&2 "and then run it on that server to install the certificates"
+echo >&2
diff --git a/collectors/systemd-journal.plugin/systemd-journal-watcher.c b/collectors/systemd-journal.plugin/systemd-journal-watcher.c
new file mode 100644
index 00000000000000..ed41f624744b01
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-journal-watcher.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "systemd-internals.h"
+#include
+
+#define EVENT_SIZE (sizeof(struct inotify_event))
+#define INITIAL_WATCHES 256
+
+#define WATCH_FOR (IN_CREATE | IN_MODIFY | IN_DELETE | IN_DELETE_SELF | IN_MOVED_FROM | IN_MOVED_TO | IN_UNMOUNT)
+
+typedef struct watch_entry {
+ int slot;
+
+ int wd; // Watch descriptor
+ char *path; // Dynamically allocated path
+
+ struct watch_entry *next; // for the free list
+} WatchEntry;
+
+typedef struct {
+ WatchEntry *watchList;
+ WatchEntry *freeList;
+ int watchCount;
+ int watchListSize;
+
+ size_t errors;
+
+ DICTIONARY *pending;
+} Watcher;
+
+static WatchEntry *get_slot(Watcher *watcher) {
+ WatchEntry *t;
+
+ if (watcher->freeList != NULL) {
+ t = watcher->freeList;
+ watcher->freeList = t->next;
+ t->next = NULL;
+ return t;
+ }
+
+ if (watcher->watchCount == watcher->watchListSize) {
+ watcher->watchListSize *= 2;
+ watcher->watchList = reallocz(watcher->watchList, watcher->watchListSize * sizeof(WatchEntry));
+ }
+
+ watcher->watchList[watcher->watchCount] = (WatchEntry){
+ .slot = watcher->watchCount,
+ .wd = -1,
+ .path = NULL,
+ .next = NULL,
+ };
+ t = &watcher->watchList[watcher->watchCount];
+ watcher->watchCount++;
+
+ return t;
+}
+
+static void free_slot(Watcher *watcher, WatchEntry *t) {
+ t->wd = -1;
+ freez(t->path);
+ t->path = NULL;
+
+ // link it to the free list
+ t->next = watcher->freeList;
+ watcher->freeList = t;
+}
+
+static int add_watch(Watcher *watcher, int inotifyFd, const char *path) {
+ WatchEntry *t = get_slot(watcher);
+
+ t->wd = inotify_add_watch(inotifyFd, path, WATCH_FOR);
+ if (t->wd == -1) {
+ nd_log(NDLS_COLLECTORS, NDLP_ERR,
+ "JOURNAL WATCHER: cannot watch directory: '%s'",
+ path);
+
+ free_slot(watcher, t);
+
+ struct stat info;
+ if(stat(path, &info) == 0 && S_ISDIR(info.st_mode)) {
+ // the directory exists, but we failed to add the watch
+ // increase errors
+ watcher->errors++;
+ }
+ }
+ else {
+ t->path = strdupz(path);
+
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: watching directory: '%s'",
+ path);
+
+ }
+ return t->wd;
+}
+
+static void remove_watch(Watcher *watcher, int inotifyFd, int wd) {
+ int i;
+ for (i = 0; i < watcher->watchCount; ++i) {
+ if (watcher->watchList[i].wd == wd) {
+
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: removing watch from directory: '%s'",
+ watcher->watchList[i].path);
+
+ inotify_rm_watch(inotifyFd, watcher->watchList[i].wd);
+ free_slot(watcher, &watcher->watchList[i]);
+ return;
+ }
+ }
+
+ nd_log(NDLS_COLLECTORS, NDLP_WARNING,
+ "JOURNAL WATCHER: cannot find directory watch %d to remove.",
+ wd);
+}
+
+static void free_watches(Watcher *watcher, int inotifyFd) {
+ for (int i = 0; i < watcher->watchCount; ++i) {
+ if (watcher->watchList[i].wd != -1) {
+ inotify_rm_watch(inotifyFd, watcher->watchList[i].wd);
+ free_slot(watcher, &watcher->watchList[i]);
+ }
+ }
+ freez(watcher->watchList);
+ watcher->watchList = NULL;
+
+ dictionary_destroy(watcher->pending);
+ watcher->pending = NULL;
+}
+
+static char* get_path_from_wd(Watcher *watcher, int wd) {
+ for (int i = 0; i < watcher->watchCount; ++i) {
+ if (watcher->watchList[i].wd == wd)
+ return watcher->watchList[i].path;
+ }
+ return NULL;
+}
+
+static bool is_directory_watched(Watcher *watcher, const char *path) {
+ for (int i = 0; i < watcher->watchCount; ++i) {
+ if (watcher->watchList[i].wd != -1 && strcmp(watcher->watchList[i].path, path) == 0) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static void watch_directory_and_subdirectories(Watcher *watcher, int inotifyFd, const char *basePath) {
+ DICTIONARY *dirs = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE);
+
+ journal_directory_scan_recursively(NULL, dirs, basePath, 0);
+
+ void *x;
+ dfe_start_read(dirs, x) {
+ const char *dirname = x_dfe.name;
+ // Check if this directory is already being watched
+ if (!is_directory_watched(watcher, dirname)) {
+ add_watch(watcher, inotifyFd, dirname);
+ }
+ }
+ dfe_done(x);
+
+ dictionary_destroy(dirs);
+}
+
+static bool is_subpath(const char *path, const char *subpath) {
+ // Use strncmp to compare the paths
+ if (strncmp(path, subpath, strlen(path)) == 0) {
+ // Ensure that the next character is a '/' or '\0'
+ char next_char = subpath[strlen(path)];
+ return next_char == '/' || next_char == '\0';
+ }
+
+ return false;
+}
+
+void remove_directory_watch(Watcher *watcher, int inotifyFd, const char *dirPath) {
+ for (int i = 0; i < watcher->watchCount; ++i) {
+ WatchEntry *t = &watcher->watchList[i];
+ if (t->wd != -1 && is_subpath(t->path, dirPath)) {
+ inotify_rm_watch(inotifyFd, t->wd);
+ free_slot(watcher, t);
+ }
+ }
+
+ struct journal_file *jf;
+ dfe_start_write(journal_files_registry, jf) {
+ if(is_subpath(jf->filename, dirPath))
+ dictionary_del(journal_files_registry, jf->filename);
+ }
+ dfe_done(jf);
+
+ dictionary_garbage_collect(journal_files_registry);
+}
+
+void process_event(Watcher *watcher, int inotifyFd, struct inotify_event *event) {
+ if(!event->len) {
+ nd_log(NDLS_COLLECTORS, NDLP_NOTICE
+ , "JOURNAL WATCHER: received event with mask %u and len %u (this is zero) for path: '%s' - ignoring it."
+ , event->mask, event->len, event->name);
+ return;
+ }
+
+ char *dirPath = get_path_from_wd(watcher, event->wd);
+ if(!dirPath) {
+ nd_log(NDLS_COLLECTORS, NDLP_NOTICE,
+ "JOURNAL WATCHER: received event with mask %u and len %u for path: '%s' - "
+ "but we can't find its watch descriptor - ignoring it."
+ , event->mask, event->len, event->name);
+ return;
+ }
+
+ if(event->mask & IN_DELETE_SELF) {
+ remove_watch(watcher, inotifyFd, event->wd);
+ return;
+ }
+
+ static __thread char fullPath[PATH_MAX];
+ snprintfz(fullPath, sizeof(fullPath), "%s/%s", dirPath, event->name);
+ // fullPath contains the full path to the file
+
+ size_t len = strlen(event->name);
+
+ if(event->mask & IN_ISDIR) {
+ if (event->mask & (IN_DELETE | IN_MOVED_FROM)) {
+ // A directory is deleted or moved out
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: Directory deleted or moved out: '%s'",
+ fullPath);
+
+ // Remove the watch - implement this function based on how you manage your watches
+ remove_directory_watch(watcher, inotifyFd, fullPath);
+ }
+ else if (event->mask & (IN_CREATE | IN_MOVED_TO)) {
+ // A new directory is created or moved in
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: New directory created or moved in: '%s'",
+ fullPath);
+
+ // Start watching the new directory - recursive watch
+ watch_directory_and_subdirectories(watcher, inotifyFd, fullPath);
+ }
+ else
+ nd_log(NDLS_COLLECTORS, NDLP_WARNING,
+ "JOURNAL WATCHER: Received unhandled event with mask %u for directory '%s'",
+ event->mask, fullPath);
+ }
+ else if(len > sizeof(".journal") - 1 && strcmp(&event->name[len - (sizeof(".journal") - 1)], ".journal") == 0) {
+ // It is a file that ends in .journal
+ // add it to our pending list
+ dictionary_set(watcher->pending, fullPath, NULL, 0);
+ }
+ else
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: ignoring event with mask %u for file '%s'",
+ event->mask, fullPath);
+}
+
+static void process_pending(Watcher *watcher) {
+ void *x;
+ dfe_start_write(watcher->pending, x) {
+ struct stat info;
+ const char *fullPath = x_dfe.name;
+
+ if(stat(fullPath, &info) != 0) {
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: file '%s' no longer exists, removing it from the registry",
+ fullPath);
+
+ dictionary_del(journal_files_registry, fullPath);
+ }
+ else if(S_ISREG(info.st_mode)) {
+ nd_log(NDLS_COLLECTORS, NDLP_DEBUG,
+ "JOURNAL WATCHER: file '%s' has been added/updated, updating the registry",
+ fullPath);
+
+ struct journal_file t = {
+ .file_last_modified_ut = info.st_mtim.tv_sec * USEC_PER_SEC +
+ info.st_mtim.tv_nsec / NSEC_PER_USEC,
+ .last_scan_monotonic_ut = now_monotonic_usec(),
+ .size = info.st_size,
+ .max_journal_vs_realtime_delta_ut = JOURNAL_VS_REALTIME_DELTA_DEFAULT_UT,
+ };
+ struct journal_file *jf = dictionary_set(journal_files_registry, fullPath, &t, sizeof(t));
+ journal_file_update_header(jf->filename, jf);
+ }
+
+ dictionary_del(watcher->pending, fullPath);
+ }
+ dfe_done(x);
+
+ dictionary_garbage_collect(watcher->pending);
+}
+
+void *journal_watcher_main(void *arg __maybe_unused) {
+ while(1) {
+ Watcher watcher = {
+ .watchList = mallocz(INITIAL_WATCHES * sizeof(WatchEntry)),
+ .freeList = NULL,
+ .watchCount = 0,
+ .watchListSize = INITIAL_WATCHES,
+ .pending = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE|DICT_OPTION_SINGLE_THREADED),
+ .errors = 0,
+ };
+
+ int inotifyFd = inotify_init();
+ if (inotifyFd < 0) {
+ nd_log(NDLS_COLLECTORS, NDLP_ERR, "inotify_init() failed.");
+ free_watches(&watcher, inotifyFd);
+ return NULL;
+ }
+
+ for (unsigned i = 0; i < MAX_JOURNAL_DIRECTORIES; i++) {
+ if (!journal_directories[i].path) break;
+ watch_directory_and_subdirectories(&watcher, inotifyFd, journal_directories[i].path);
+ }
+
+ usec_t last_headers_update_ut = now_monotonic_usec();
+ struct buffered_reader reader;
+ while (1) {
+ buffered_reader_ret_t rc = buffered_reader_read_timeout(
+ &reader, inotifyFd, SYSTEMD_JOURNAL_EXECUTE_WATCHER_PENDING_EVERY_MS, false);
+
+ if (rc != BUFFERED_READER_READ_OK && rc != BUFFERED_READER_READ_POLL_TIMEOUT) {
+ nd_log(NDLS_COLLECTORS, NDLP_CRIT,
+ "JOURNAL WATCHER: cannot read inotify events, buffered_reader_read_timeout() returned %d - "
+ "restarting the watcher.",
+ rc);
+ break;
+ }
+
+ if(rc == BUFFERED_READER_READ_OK) {
+ bool unmount_event = false;
+
+ ssize_t i = 0;
+ while (i < reader.read_len) {
+ struct inotify_event *event = (struct inotify_event *) &reader.read_buffer[i];
+
+ if(event->mask & IN_UNMOUNT) {
+ unmount_event = true;
+ break;
+ }
+
+ process_event(&watcher, inotifyFd, event);
+ i += (ssize_t)EVENT_SIZE + event->len;
+ }
+
+ reader.read_buffer[0] = '\0';
+ reader.read_len = 0;
+ reader.pos = 0;
+
+ if(unmount_event)
+ break;
+ }
+
+ usec_t ut = now_monotonic_usec();
+ if (dictionary_entries(watcher.pending) && (rc == BUFFERED_READER_READ_POLL_TIMEOUT ||
+ last_headers_update_ut + (SYSTEMD_JOURNAL_EXECUTE_WATCHER_PENDING_EVERY_MS * USEC_PER_MS) <= ut)) {
+ process_pending(&watcher);
+ last_headers_update_ut = ut;
+ }
+
+ if(watcher.errors) {
+ nd_log(NDLS_COLLECTORS, NDLP_NOTICE,
+ "JOURNAL WATCHER: there were errors in setting up inotify watches - restarting the watcher.");
+ }
+ }
+
+ close(inotifyFd);
+ free_watches(&watcher, inotifyFd);
+
+ // this will scan the directories and cleanup the registry
+ journal_files_registry_update();
+
+ sleep_usec(5 * USEC_PER_SEC);
+ }
+
+ return NULL;
+}
diff --git a/collectors/systemd-journal.plugin/systemd-journal.c b/collectors/systemd-journal.plugin/systemd-journal.c
index 304ff244ada4ed..f812b2161ecdad 100644
--- a/collectors/systemd-journal.plugin/systemd-journal.c
+++ b/collectors/systemd-journal.plugin/systemd-journal.c
@@ -5,400 +5,1903 @@
* GPL v3+
*/
-// TODO - 1) MARKDOC
+#include "systemd-internals.h"
-#include "collectors/all.h"
-#include "libnetdata/libnetdata.h"
-#include "libnetdata/required_dummies.h"
+/*
+ * TODO
+ *
+ * _UDEV_DEVLINK is frequently set more than once per field - support multi-value faces
+ *
+ */
-#ifndef SD_JOURNAL_ALL_NAMESPACES
-#define JOURNAL_NAMESPACE SD_JOURNAL_LOCAL_ONLY
-#else
-#define JOURNAL_NAMESPACE SD_JOURNAL_ALL_NAMESPACES
+#define FACET_MAX_VALUE_LENGTH 8192
+
+#define SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION "View, search and analyze systemd journal entries."
+#define SYSTEMD_JOURNAL_FUNCTION_NAME "systemd-journal"
+#define SYSTEMD_JOURNAL_DEFAULT_TIMEOUT 60
+#define SYSTEMD_JOURNAL_MAX_PARAMS 1000
+#define SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION (1 * 3600)
+#define SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY 200
+#define SYSTEMD_JOURNAL_DEFAULT_ITEMS_SAMPLING 1000000
+#define SYSTEMD_JOURNAL_SAMPLING_SLOTS 1000
+#define SYSTEMD_JOURNAL_SAMPLING_RECALIBRATE 10000
+
+#define JOURNAL_PARAMETER_HELP "help"
+#define JOURNAL_PARAMETER_AFTER "after"
+#define JOURNAL_PARAMETER_BEFORE "before"
+#define JOURNAL_PARAMETER_ANCHOR "anchor"
+#define JOURNAL_PARAMETER_LAST "last"
+#define JOURNAL_PARAMETER_QUERY "query"
+#define JOURNAL_PARAMETER_FACETS "facets"
+#define JOURNAL_PARAMETER_HISTOGRAM "histogram"
+#define JOURNAL_PARAMETER_DIRECTION "direction"
+#define JOURNAL_PARAMETER_IF_MODIFIED_SINCE "if_modified_since"
+#define JOURNAL_PARAMETER_DATA_ONLY "data_only"
+#define JOURNAL_PARAMETER_SOURCE "source"
+#define JOURNAL_PARAMETER_INFO "info"
+#define JOURNAL_PARAMETER_ID "id"
+#define JOURNAL_PARAMETER_PROGRESS "progress"
+#define JOURNAL_PARAMETER_SLICE "slice"
+#define JOURNAL_PARAMETER_DELTA "delta"
+#define JOURNAL_PARAMETER_TAIL "tail"
+#define JOURNAL_PARAMETER_SAMPLING "sampling"
+
+#define JOURNAL_KEY_ND_JOURNAL_FILE "ND_JOURNAL_FILE"
+#define JOURNAL_KEY_ND_JOURNAL_PROCESS "ND_JOURNAL_PROCESS"
+
+#define JOURNAL_DEFAULT_SLICE_MODE true
+#define JOURNAL_DEFAULT_DIRECTION FACETS_ANCHOR_DIRECTION_BACKWARD
+
+#define SYSTEMD_ALWAYS_VISIBLE_KEYS NULL
+
+#define SYSTEMD_KEYS_EXCLUDED_FROM_FACETS \
+ "!MESSAGE_ID" \
+ "|*MESSAGE*" \
+ "|*_RAW" \
+ "|*_USEC" \
+ "|*_NSEC" \
+ "|*TIMESTAMP*" \
+ "|*_ID" \
+ "|*_ID_*" \
+ "|__*" \
+ ""
+
+#define SYSTEMD_KEYS_INCLUDED_IN_FACETS \
+ \
+ /* --- USER JOURNAL FIELDS --- */ \
+ \
+ /* "|MESSAGE" */ \
+ "|MESSAGE_ID" \
+ "|PRIORITY" \
+ "|CODE_FILE" \
+ /* "|CODE_LINE" */ \
+ "|CODE_FUNC" \
+ "|ERRNO" \
+ /* "|INVOCATION_ID" */ \
+ /* "|USER_INVOCATION_ID" */ \
+ "|SYSLOG_FACILITY" \
+ "|SYSLOG_IDENTIFIER" \
+ /* "|SYSLOG_PID" */ \
+ /* "|SYSLOG_TIMESTAMP" */ \
+ /* "|SYSLOG_RAW" */ \
+ /* "!DOCUMENTATION" */ \
+ /* "|TID" */ \
+ "|UNIT" \
+ "|USER_UNIT" \
+ "|UNIT_RESULT" /* undocumented */ \
+ \
+ \
+ /* --- TRUSTED JOURNAL FIELDS --- */ \
+ \
+ /* "|_PID" */ \
+ "|_UID" \
+ "|_GID" \
+ "|_COMM" \
+ "|_EXE" \
+ /* "|_CMDLINE" */ \
+ "|_CAP_EFFECTIVE" \
+ /* "|_AUDIT_SESSION" */ \
+ "|_AUDIT_LOGINUID" \
+ "|_SYSTEMD_CGROUP" \
+ "|_SYSTEMD_SLICE" \
+ "|_SYSTEMD_UNIT" \
+ "|_SYSTEMD_USER_UNIT" \
+ "|_SYSTEMD_USER_SLICE" \
+ "|_SYSTEMD_SESSION" \
+ "|_SYSTEMD_OWNER_UID" \
+ "|_SELINUX_CONTEXT" \
+ /* "|_SOURCE_REALTIME_TIMESTAMP" */ \
+ "|_BOOT_ID" \
+ "|_MACHINE_ID" \
+ /* "|_SYSTEMD_INVOCATION_ID" */ \
+ "|_HOSTNAME" \
+ "|_TRANSPORT" \
+ "|_STREAM_ID" \
+ /* "|LINE_BREAK" */ \
+ "|_NAMESPACE" \
+ "|_RUNTIME_SCOPE" \
+ \
+ \
+ /* --- KERNEL JOURNAL FIELDS --- */ \
+ \
+ /* "|_KERNEL_DEVICE" */ \
+ "|_KERNEL_SUBSYSTEM" \
+ /* "|_UDEV_SYSNAME" */ \
+ "|_UDEV_DEVNODE" \
+ /* "|_UDEV_DEVLINK" */ \
+ \
+ \
+ /* --- LOGGING ON BEHALF --- */ \
+ \
+ "|OBJECT_UID" \
+ "|OBJECT_GID" \
+ "|OBJECT_COMM" \
+ "|OBJECT_EXE" \
+ /* "|OBJECT_CMDLINE" */ \
+ /* "|OBJECT_AUDIT_SESSION" */ \
+ "|OBJECT_AUDIT_LOGINUID" \
+ "|OBJECT_SYSTEMD_CGROUP" \
+ "|OBJECT_SYSTEMD_SESSION" \
+ "|OBJECT_SYSTEMD_OWNER_UID" \
+ "|OBJECT_SYSTEMD_UNIT" \
+ "|OBJECT_SYSTEMD_USER_UNIT" \
+ \
+ \
+ /* --- CORE DUMPS --- */ \
+ \
+ "|COREDUMP_COMM" \
+ "|COREDUMP_UNIT" \
+ "|COREDUMP_USER_UNIT" \
+ "|COREDUMP_SIGNAL_NAME" \
+ "|COREDUMP_CGROUP" \
+ \
+ \
+ /* --- DOCKER --- */ \
+ \
+ "|CONTAINER_ID" \
+ /* "|CONTAINER_ID_FULL" */ \
+ "|CONTAINER_NAME" \
+ "|CONTAINER_TAG" \
+ "|IMAGE_NAME" /* undocumented */ \
+ /* "|CONTAINER_PARTIAL_MESSAGE" */ \
+ \
+ \
+ /* --- NETDATA --- */ \
+ \
+ "|ND_NIDL_NODE" \
+ "|ND_NIDL_CONTEXT" \
+ "|ND_LOG_SOURCE" \
+ /*"|ND_MODULE" */ \
+ "|ND_ALERT_NAME" \
+ "|ND_ALERT_CLASS" \
+ "|ND_ALERT_COMPONENT" \
+ "|ND_ALERT_TYPE" \
+ \
+ ""
+
+// ----------------------------------------------------------------------------
+
+typedef struct function_query_status {
+ bool *cancelled; // a pointer to the cancelling boolean
+ usec_t stop_monotonic_ut;
+
+ usec_t started_monotonic_ut;
+
+ // request
+ SD_JOURNAL_FILE_SOURCE_TYPE source_type;
+ SIMPLE_PATTERN *sources;
+ usec_t after_ut;
+ usec_t before_ut;
+
+ struct {
+ usec_t start_ut;
+ usec_t stop_ut;
+ } anchor;
+
+ FACETS_ANCHOR_DIRECTION direction;
+ size_t entries;
+ usec_t if_modified_since;
+ bool delta;
+ bool tail;
+ bool data_only;
+ bool slice;
+ size_t sampling;
+ size_t filters;
+ usec_t last_modified;
+ const char *query;
+ const char *histogram;
+
+ struct {
+ usec_t start_ut; // the starting time of the query - we start from this
+ usec_t stop_ut; // the ending time of the query - we stop at this
+ usec_t first_msg_ut;
+
+ sd_id128_t first_msg_writer;
+ uint64_t first_msg_seqnum;
+ } query_file;
+
+ struct {
+ uint32_t enable_after_samples;
+ uint32_t slots;
+ uint32_t sampled;
+ uint32_t unsampled;
+ uint32_t estimated;
+ } samples;
+
+ struct {
+ uint32_t enable_after_samples;
+ uint32_t every;
+ uint32_t skipped;
+ uint32_t recalibrate;
+ uint32_t sampled;
+ uint32_t unsampled;
+ uint32_t estimated;
+ } samples_per_file;
+
+ struct {
+ usec_t start_ut;
+ usec_t end_ut;
+ usec_t step_ut;
+ uint32_t enable_after_samples;
+ uint32_t sampled[SYSTEMD_JOURNAL_SAMPLING_SLOTS];
+ uint32_t unsampled[SYSTEMD_JOURNAL_SAMPLING_SLOTS];
+ } samples_per_time_slot;
+
+ // per file progress info
+ // size_t cached_count;
+
+ // progress statistics
+ usec_t matches_setup_ut;
+ size_t rows_useful;
+ size_t rows_read;
+ size_t bytes_read;
+ size_t files_matched;
+ size_t file_working;
+} FUNCTION_QUERY_STATUS;
+
+static void log_fqs(FUNCTION_QUERY_STATUS *fqs, const char *msg) {
+ netdata_log_error("ERROR: %s, on query "
+ "timeframe [%"PRIu64" - %"PRIu64"], "
+ "anchor [%"PRIu64" - %"PRIu64"], "
+ "if_modified_since %"PRIu64", "
+ "data_only:%s, delta:%s, tail:%s, direction:%s"
+ , msg
+ , fqs->after_ut, fqs->before_ut
+ , fqs->anchor.start_ut, fqs->anchor.stop_ut
+ , fqs->if_modified_since
+ , fqs->data_only ? "true" : "false"
+ , fqs->delta ? "true" : "false"
+ , fqs->tail ? "tail" : "false"
+ , fqs->direction == FACETS_ANCHOR_DIRECTION_FORWARD ? "forward" : "backward");
+}
+
+static inline bool netdata_systemd_journal_seek_to(sd_journal *j, usec_t timestamp) {
+ if(sd_journal_seek_realtime_usec(j, timestamp) < 0) {
+ netdata_log_error("SYSTEMD-JOURNAL: Failed to seek to %" PRIu64, timestamp);
+ if(sd_journal_seek_tail(j) < 0) {
+ netdata_log_error("SYSTEMD-JOURNAL: Failed to seek to journal's tail");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define JD_SOURCE_REALTIME_TIMESTAMP "_SOURCE_REALTIME_TIMESTAMP"
+
+// ----------------------------------------------------------------------------
+// sampling support
+
+static void sampling_query_init(FUNCTION_QUERY_STATUS *fqs, FACETS *facets) {
+ if(!fqs->sampling)
+ return;
+
+ if(!fqs->slice) {
+ // the user is doing a full data query
+ // disable sampling
+ fqs->sampling = 0;
+ return;
+ }
+
+ if(fqs->data_only) {
+ // the user is doing a data query
+ // disable sampling
+ fqs->sampling = 0;
+ return;
+ }
+
+ if(!fqs->files_matched) {
+ // no files have been matched
+ // disable sampling
+ fqs->sampling = 0;
+ return;
+ }
+
+ fqs->samples.slots = facets_histogram_slots(facets);
+ if(fqs->samples.slots < 2) fqs->samples.slots = 2;
+ if(fqs->samples.slots > SYSTEMD_JOURNAL_SAMPLING_SLOTS)
+ fqs->samples.slots = SYSTEMD_JOURNAL_SAMPLING_SLOTS;
+
+ if(!fqs->after_ut || !fqs->before_ut || fqs->after_ut >= fqs->before_ut) {
+ // we don't have enough information for sampling
+ fqs->sampling = 0;
+ return;
+ }
+
+ usec_t delta = fqs->before_ut - fqs->after_ut;
+ usec_t step = delta / facets_histogram_slots(facets) - 1;
+ if(step < 1) step = 1;
+
+ fqs->samples_per_time_slot.start_ut = fqs->after_ut;
+ fqs->samples_per_time_slot.end_ut = fqs->before_ut;
+ fqs->samples_per_time_slot.step_ut = step;
+
+ // the minimum number of rows to enable sampling
+ fqs->samples.enable_after_samples = fqs->sampling / 2;
+
+ size_t files_matched = fqs->files_matched;
+ if(!files_matched)
+ files_matched = 1;
+
+ // the minimum number of rows per file to enable sampling
+ fqs->samples_per_file.enable_after_samples = (fqs->sampling / 4) / files_matched;
+ if(fqs->samples_per_file.enable_after_samples < fqs->entries)
+ fqs->samples_per_file.enable_after_samples = fqs->entries;
+
+ // the minimum number of rows per time slot to enable sampling
+ fqs->samples_per_time_slot.enable_after_samples = (fqs->sampling / 4) / fqs->samples.slots;
+ if(fqs->samples_per_time_slot.enable_after_samples < fqs->entries)
+ fqs->samples_per_time_slot.enable_after_samples = fqs->entries;
+}
+
+static void sampling_file_init(FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf __maybe_unused) {
+ fqs->samples_per_file.sampled = 0;
+ fqs->samples_per_file.unsampled = 0;
+ fqs->samples_per_file.estimated = 0;
+ fqs->samples_per_file.every = 0;
+ fqs->samples_per_file.skipped = 0;
+ fqs->samples_per_file.recalibrate = 0;
+}
+
+static size_t sampling_file_lines_scanned_so_far(FUNCTION_QUERY_STATUS *fqs) {
+ size_t sampled = fqs->samples_per_file.sampled + fqs->samples_per_file.unsampled;
+ if(!sampled) sampled = 1;
+ return sampled;
+}
+
+static void sampling_running_file_query_overlapping_timeframe_ut(
+ FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, FACETS_ANCHOR_DIRECTION direction,
+ usec_t msg_ut, usec_t *after_ut, usec_t *before_ut) {
+
+ // find the overlap of the query and file timeframes
+ // taking into account the first message we encountered
+
+ usec_t oldest_ut, newest_ut;
+ if(direction == FACETS_ANCHOR_DIRECTION_FORWARD) {
+ // the first message we know (oldest)
+ oldest_ut = fqs->query_file.first_msg_ut ? fqs->query_file.first_msg_ut : jf->msg_first_ut;
+ if(!oldest_ut) oldest_ut = fqs->query_file.start_ut;
+
+ if(jf->msg_last_ut)
+ newest_ut = MIN(fqs->query_file.stop_ut, jf->msg_last_ut);
+ else if(jf->file_last_modified_ut)
+ newest_ut = MIN(fqs->query_file.stop_ut, jf->file_last_modified_ut);
+ else
+ newest_ut = fqs->query_file.stop_ut;
+
+ if(msg_ut < oldest_ut)
+ oldest_ut = msg_ut - 1;
+ }
+ else /* BACKWARD */ {
+ // the latest message we know (newest)
+ newest_ut = fqs->query_file.first_msg_ut ? fqs->query_file.first_msg_ut : jf->msg_last_ut;
+ if(!newest_ut) newest_ut = fqs->query_file.start_ut;
+
+ if(jf->msg_first_ut)
+ oldest_ut = MAX(fqs->query_file.stop_ut, jf->msg_first_ut);
+ else
+ oldest_ut = fqs->query_file.stop_ut;
+
+ if(newest_ut < msg_ut)
+ newest_ut = msg_ut + 1;
+ }
+
+ *after_ut = oldest_ut;
+ *before_ut = newest_ut;
+}
+
+static double sampling_running_file_query_progress_by_time(FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf,
+ FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut) {
+
+ usec_t after_ut, before_ut, elapsed_ut;
+ sampling_running_file_query_overlapping_timeframe_ut(fqs, jf, direction, msg_ut, &after_ut, &before_ut);
+
+ if(direction == FACETS_ANCHOR_DIRECTION_FORWARD)
+ elapsed_ut = msg_ut - after_ut;
+ else
+ elapsed_ut = before_ut - msg_ut;
+
+ usec_t total_ut = before_ut - after_ut;
+ double progress = (double)elapsed_ut / (double)total_ut;
+
+ return progress;
+}
+
+static usec_t sampling_running_file_query_remaining_time(FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf,
+ FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut,
+ usec_t *total_time_ut, usec_t *remaining_start_ut,
+ usec_t *remaining_end_ut) {
+ usec_t after_ut, before_ut;
+ sampling_running_file_query_overlapping_timeframe_ut(fqs, jf, direction, msg_ut, &after_ut, &before_ut);
+
+ // since we have a timestamp in msg_ut
+ // this timestamp can extend the overlap
+ if(msg_ut <= after_ut)
+ after_ut = msg_ut - 1;
+
+ if(msg_ut >= before_ut)
+ before_ut = msg_ut + 1;
+
+ // return the remaining duration
+ usec_t remaining_from_ut, remaining_to_ut;
+ if(direction == FACETS_ANCHOR_DIRECTION_FORWARD) {
+ remaining_from_ut = msg_ut;
+ remaining_to_ut = before_ut;
+ }
+ else {
+ remaining_from_ut = after_ut;
+ remaining_to_ut = msg_ut;
+ }
+
+ usec_t remaining_ut = remaining_to_ut - remaining_from_ut;
+
+ if(total_time_ut)
+ *total_time_ut = (before_ut > after_ut) ? before_ut - after_ut : 1;
+
+ if(remaining_start_ut)
+ *remaining_start_ut = remaining_from_ut;
+
+ if(remaining_end_ut)
+ *remaining_end_ut = remaining_to_ut;
+
+ return remaining_ut;
+}
+
+static size_t sampling_running_file_query_estimate_remaining_lines_by_time(FUNCTION_QUERY_STATUS *fqs,
+ struct journal_file *jf,
+ FACETS_ANCHOR_DIRECTION direction,
+ usec_t msg_ut) {
+ size_t scanned_lines = sampling_file_lines_scanned_so_far(fqs);
+
+ // Calculate the proportion of time covered
+ usec_t total_time_ut, remaining_start_ut, remaining_end_ut;
+ usec_t remaining_time_ut = sampling_running_file_query_remaining_time(fqs, jf, direction, msg_ut, &total_time_ut,
+ &remaining_start_ut, &remaining_end_ut);
+ if (total_time_ut == 0) total_time_ut = 1;
+
+ double proportion_by_time = (double) (total_time_ut - remaining_time_ut) / (double) total_time_ut;
+
+ if (proportion_by_time == 0 || proportion_by_time > 1.0 || !isfinite(proportion_by_time))
+ proportion_by_time = 1.0;
+
+ // Estimate the total number of lines in the file
+ size_t expected_matching_logs_by_time = (size_t)((double)scanned_lines / proportion_by_time);
+
+ if(jf->messages_in_file && expected_matching_logs_by_time > jf->messages_in_file)
+ expected_matching_logs_by_time = jf->messages_in_file;
+
+ // Calculate the estimated number of remaining lines
+ size_t remaining_logs_by_time = expected_matching_logs_by_time - scanned_lines;
+ if (remaining_logs_by_time < 1) remaining_logs_by_time = 1;
+
+// nd_log(NDLS_COLLECTORS, NDLP_INFO,
+// "JOURNAL ESTIMATION: '%s' "
+// "scanned_lines=%zu [sampled=%zu, unsampled=%zu, estimated=%zu], "
+// "file [%"PRIu64" - %"PRIu64", duration %"PRId64", known lines in file %zu], "
+// "query [%"PRIu64" - %"PRIu64", duration %"PRId64"], "
+// "first message read from the file at %"PRIu64", current message at %"PRIu64", "
+// "proportion of time %.2f %%, "
+// "expected total lines in file %zu, "
+// "remaining lines %zu, "
+// "remaining time %"PRIu64" [%"PRIu64" - %"PRIu64", duration %"PRId64"]"
+// , jf->filename
+// , scanned_lines, fqs->samples_per_file.sampled, fqs->samples_per_file.unsampled, fqs->samples_per_file.estimated
+// , jf->msg_first_ut, jf->msg_last_ut, jf->msg_last_ut - jf->msg_first_ut, jf->messages_in_file
+// , fqs->query_file.start_ut, fqs->query_file.stop_ut, fqs->query_file.stop_ut - fqs->query_file.start_ut
+// , fqs->query_file.first_msg_ut, msg_ut
+// , proportion_by_time * 100.0
+// , expected_matching_logs_by_time
+// , remaining_logs_by_time
+// , remaining_time_ut, remaining_start_ut, remaining_end_ut, remaining_end_ut - remaining_start_ut
+// );
+
+ return remaining_logs_by_time;
+}
+
+static size_t sampling_running_file_query_estimate_remaining_lines(sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut) {
+ size_t expected_matching_logs_by_seqnum = 0;
+ double proportion_by_seqnum = 0.0;
+ size_t remaining_logs_by_seqnum = 0;
+
+#ifdef HAVE_SD_JOURNAL_GET_SEQNUM
+ uint64_t current_msg_seqnum;
+ sd_id128_t current_msg_writer;
+ if(!fqs->query_file.first_msg_seqnum || sd_journal_get_seqnum(j, ¤t_msg_seqnum, ¤t_msg_writer) < 0) {
+ fqs->query_file.first_msg_seqnum = 0;
+ fqs->query_file.first_msg_writer = SD_ID128_NULL;
+ }
+ else if(jf->messages_in_file) {
+ size_t scanned_lines = sampling_file_lines_scanned_so_far(fqs);
+
+ double proportion_of_all_lines_so_far;
+ if(direction == FACETS_ANCHOR_DIRECTION_FORWARD)
+ proportion_of_all_lines_so_far = (double)scanned_lines / (double)(current_msg_seqnum - jf->first_seqnum);
+ else
+ proportion_of_all_lines_so_far = (double)scanned_lines / (double)(jf->last_seqnum - current_msg_seqnum);
+
+ if(proportion_of_all_lines_so_far > 1.0)
+ proportion_of_all_lines_so_far = 1.0;
+
+ expected_matching_logs_by_seqnum = (size_t)(proportion_of_all_lines_so_far * (double)jf->messages_in_file);
+
+ proportion_by_seqnum = (double)scanned_lines / (double)expected_matching_logs_by_seqnum;
+
+ if (proportion_by_seqnum == 0 || proportion_by_seqnum > 1.0 || !isfinite(proportion_by_seqnum))
+ proportion_by_seqnum = 1.0;
+
+ remaining_logs_by_seqnum = expected_matching_logs_by_seqnum - scanned_lines;
+ if(!remaining_logs_by_seqnum) remaining_logs_by_seqnum = 1;
+ }
#endif
-#include
-#include
+ if(remaining_logs_by_seqnum)
+ return remaining_logs_by_seqnum;
-#define FACET_MAX_VALUE_LENGTH 8192
+ return sampling_running_file_query_estimate_remaining_lines_by_time(fqs, jf, direction, msg_ut);
+}
+
+static void sampling_decide_file_sampling_every(sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, FACETS_ANCHOR_DIRECTION direction, usec_t msg_ut) {
+ size_t files_matched = fqs->files_matched;
+ if(!files_matched) files_matched = 1;
+
+ size_t remaining_lines = sampling_running_file_query_estimate_remaining_lines(j, fqs, jf, direction, msg_ut);
+ size_t wanted_samples = (fqs->sampling / 2) / files_matched;
+ if(!wanted_samples) wanted_samples = 1;
+
+ fqs->samples_per_file.every = remaining_lines / wanted_samples;
+
+ if(fqs->samples_per_file.every < 1)
+ fqs->samples_per_file.every = 1;
+}
+
+typedef enum {
+ SAMPLING_STOP_AND_ESTIMATE = -1,
+ SAMPLING_FULL = 0,
+ SAMPLING_SKIP_FIELDS = 1,
+} sampling_t;
+
+static inline sampling_t is_row_in_sample(sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, usec_t msg_ut, FACETS_ANCHOR_DIRECTION direction, bool candidate_to_keep) {
+ if(!fqs->sampling || candidate_to_keep)
+ return SAMPLING_FULL;
+
+ if(unlikely(msg_ut < fqs->samples_per_time_slot.start_ut))
+ msg_ut = fqs->samples_per_time_slot.start_ut;
+ if(unlikely(msg_ut > fqs->samples_per_time_slot.end_ut))
+ msg_ut = fqs->samples_per_time_slot.end_ut;
+
+ size_t slot = (msg_ut - fqs->samples_per_time_slot.start_ut) / fqs->samples_per_time_slot.step_ut;
+ if(slot >= fqs->samples.slots)
+ slot = fqs->samples.slots - 1;
+
+ bool should_sample = false;
+
+ if(fqs->samples.sampled < fqs->samples.enable_after_samples ||
+ fqs->samples_per_file.sampled < fqs->samples_per_file.enable_after_samples ||
+ fqs->samples_per_time_slot.sampled[slot] < fqs->samples_per_time_slot.enable_after_samples)
+ should_sample = true;
+
+ else if(fqs->samples_per_file.recalibrate >= SYSTEMD_JOURNAL_SAMPLING_RECALIBRATE || !fqs->samples_per_file.every) {
+ // this is the first to be unsampled for this file
+ sampling_decide_file_sampling_every(j, fqs, jf, direction, msg_ut);
+ fqs->samples_per_file.recalibrate = 0;
+ should_sample = true;
+ }
+ else {
+ // we sample 1 every fqs->samples_per_file.every
+ if(fqs->samples_per_file.skipped >= fqs->samples_per_file.every) {
+ fqs->samples_per_file.skipped = 0;
+ should_sample = true;
+ }
+ else
+ fqs->samples_per_file.skipped++;
+ }
+
+ if(should_sample) {
+ fqs->samples.sampled++;
+ fqs->samples_per_file.sampled++;
+ fqs->samples_per_time_slot.sampled[slot]++;
+
+ return SAMPLING_FULL;
+ }
+
+ fqs->samples_per_file.recalibrate++;
+
+ fqs->samples.unsampled++;
+ fqs->samples_per_file.unsampled++;
+ fqs->samples_per_time_slot.unsampled[slot]++;
+
+ if(fqs->samples_per_file.unsampled > fqs->samples_per_file.sampled) {
+ double progress_by_time = sampling_running_file_query_progress_by_time(fqs, jf, direction, msg_ut);
+
+ if(progress_by_time > SYSTEMD_JOURNAL_ENABLE_ESTIMATIONS_FILE_PERCENTAGE)
+ return SAMPLING_STOP_AND_ESTIMATE;
+ }
+
+ return SAMPLING_SKIP_FIELDS;
+}
+
+static void sampling_update_running_query_file_estimates(FACETS *facets, sd_journal *j, FUNCTION_QUERY_STATUS *fqs, struct journal_file *jf, usec_t msg_ut, FACETS_ANCHOR_DIRECTION direction) {
+ usec_t total_time_ut, remaining_start_ut, remaining_end_ut;
+ sampling_running_file_query_remaining_time(fqs, jf, direction, msg_ut, &total_time_ut, &remaining_start_ut,
+ &remaining_end_ut);
+ size_t remaining_lines = sampling_running_file_query_estimate_remaining_lines(j, fqs, jf, direction, msg_ut);
+ facets_update_estimations(facets, remaining_start_ut, remaining_end_ut, remaining_lines);
+ fqs->samples.estimated += remaining_lines;
+ fqs->samples_per_file.estimated += remaining_lines;
+}
+
+// ----------------------------------------------------------------------------
+
+static inline size_t netdata_systemd_journal_process_row(sd_journal *j, FACETS *facets, struct journal_file *jf, usec_t *msg_ut) {
+ const void *data;
+ size_t length, bytes = 0;
+
+ facets_add_key_value_length(facets, JOURNAL_KEY_ND_JOURNAL_FILE, sizeof(JOURNAL_KEY_ND_JOURNAL_FILE) - 1, jf->filename, jf->filename_len);
+
+ SD_JOURNAL_FOREACH_DATA(j, data, length) {
+ const char *key, *value;
+ size_t key_length, value_length;
+
+ if(!parse_journal_field(data, length, &key, &key_length, &value, &value_length))
+ continue;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ usec_t origin_journal_ut = *msg_ut;
+#endif
+ if(unlikely(key_length == sizeof(JD_SOURCE_REALTIME_TIMESTAMP) - 1 &&
+ memcmp(key, JD_SOURCE_REALTIME_TIMESTAMP, sizeof(JD_SOURCE_REALTIME_TIMESTAMP) - 1) == 0)) {
+ usec_t ut = str2ull(value, NULL);
+ if(ut && ut < *msg_ut) {
+ usec_t delta = *msg_ut - ut;
+ *msg_ut = ut;
+
+ if(delta > JOURNAL_VS_REALTIME_DELTA_MAX_UT)
+ delta = JOURNAL_VS_REALTIME_DELTA_MAX_UT;
+
+ // update max_journal_vs_realtime_delta_ut if the delta increased
+ usec_t expected = jf->max_journal_vs_realtime_delta_ut;
+ do {
+ if(delta <= expected)
+ break;
+ } while(!__atomic_compare_exchange_n(&jf->max_journal_vs_realtime_delta_ut, &expected, delta, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
+
+ internal_error(delta > expected,
+ "increased max_journal_vs_realtime_delta_ut from %"PRIu64" to %"PRIu64", "
+ "journal %"PRIu64", actual %"PRIu64" (delta %"PRIu64")"
+ , expected, delta, origin_journal_ut, *msg_ut, origin_journal_ut - (*msg_ut));
+ }
+ }
+
+ bytes += length;
+ facets_add_key_value_length(facets, key, key_length, value, value_length <= FACET_MAX_VALUE_LENGTH ? value_length : FACET_MAX_VALUE_LENGTH);
+ }
+
+ return bytes;
+}
+
+#define FUNCTION_PROGRESS_UPDATE_ROWS(rows_read, rows) __atomic_fetch_add(&(rows_read), rows, __ATOMIC_RELAXED)
+#define FUNCTION_PROGRESS_UPDATE_BYTES(bytes_read, bytes) __atomic_fetch_add(&(bytes_read), bytes, __ATOMIC_RELAXED)
+#define FUNCTION_PROGRESS_EVERY_ROWS (1ULL << 13)
+#define FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS (1ULL << 7)
+
+static inline ND_SD_JOURNAL_STATUS check_stop(const bool *cancelled, const usec_t *stop_monotonic_ut) {
+ if(cancelled && __atomic_load_n(cancelled, __ATOMIC_RELAXED)) {
+ internal_error(true, "Function has been cancelled");
+ return ND_SD_JOURNAL_CANCELLED;
+ }
+
+ if(now_monotonic_usec() > __atomic_load_n(stop_monotonic_ut, __ATOMIC_RELAXED)) {
+ internal_error(true, "Function timed out");
+ return ND_SD_JOURNAL_TIMED_OUT;
+ }
+
+ return ND_SD_JOURNAL_OK;
+}
+
+ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_backward(
+ sd_journal *j, BUFFER *wb __maybe_unused, FACETS *facets,
+ struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) {
+
+ usec_t anchor_delta = __atomic_load_n(&jf->max_journal_vs_realtime_delta_ut, __ATOMIC_RELAXED);
+
+ usec_t start_ut = ((fqs->data_only && fqs->anchor.start_ut) ? fqs->anchor.start_ut : fqs->before_ut) + anchor_delta;
+ usec_t stop_ut = (fqs->data_only && fqs->anchor.stop_ut) ? fqs->anchor.stop_ut : fqs->after_ut;
+ bool stop_when_full = (fqs->data_only && !fqs->anchor.stop_ut);
+
+ fqs->query_file.start_ut = start_ut;
+ fqs->query_file.stop_ut = stop_ut;
+
+ if(!netdata_systemd_journal_seek_to(j, start_ut))
+ return ND_SD_JOURNAL_FAILED_TO_SEEK;
+
+ size_t errors_no_timestamp = 0;
+ usec_t latest_msg_ut = 0; // the biggest timestamp we have seen so far
+ usec_t first_msg_ut = 0; // the first message we got from the db
+ size_t row_counter = 0, last_row_counter = 0, rows_useful = 0;
+ size_t bytes = 0, last_bytes = 0;
+
+ usec_t last_usec_from = 0;
+ usec_t last_usec_to = 0;
+
+ ND_SD_JOURNAL_STATUS status = ND_SD_JOURNAL_OK;
+
+ facets_rows_begin(facets);
+ while (status == ND_SD_JOURNAL_OK && sd_journal_previous(j) > 0) {
+ usec_t msg_ut = 0;
+ if(sd_journal_get_realtime_usec(j, &msg_ut) < 0 || !msg_ut) {
+ errors_no_timestamp++;
+ continue;
+ }
+
+ if (unlikely(msg_ut > start_ut))
+ continue;
+
+ if (unlikely(msg_ut < stop_ut))
+ break;
+
+ if(unlikely(msg_ut > latest_msg_ut))
+ latest_msg_ut = msg_ut;
+
+ if(unlikely(!first_msg_ut)) {
+ first_msg_ut = msg_ut;
+ fqs->query_file.first_msg_ut = msg_ut;
+
+#ifdef HAVE_SD_JOURNAL_GET_SEQNUM
+ if(sd_journal_get_seqnum(j, &fqs->query_file.first_msg_seqnum, &fqs->query_file.first_msg_writer) < 0) {
+ fqs->query_file.first_msg_seqnum = 0;
+ fqs->query_file.first_msg_writer = SD_ID128_NULL;
+ }
+#endif
+ }
+
+ sampling_t sample = is_row_in_sample(j, fqs, jf, msg_ut,
+ FACETS_ANCHOR_DIRECTION_BACKWARD,
+ facets_row_candidate_to_keep(facets, msg_ut));
+
+ if(sample == SAMPLING_FULL) {
+ bytes += netdata_systemd_journal_process_row(j, facets, jf, &msg_ut);
+
+ // make sure each line gets a unique timestamp
+ if(unlikely(msg_ut >= last_usec_from && msg_ut <= last_usec_to))
+ msg_ut = --last_usec_from;
+ else
+ last_usec_from = last_usec_to = msg_ut;
+
+ if(facets_row_finished(facets, msg_ut))
+ rows_useful++;
+
+ row_counter++;
+ if(unlikely((row_counter % FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS) == 0 &&
+ stop_when_full &&
+ facets_rows(facets) >= fqs->entries)) {
+ // stop the data only query
+ usec_t oldest = facets_row_oldest_ut(facets);
+ if(oldest && msg_ut < (oldest - anchor_delta))
+ break;
+ }
+
+ if(unlikely(row_counter % FUNCTION_PROGRESS_EVERY_ROWS == 0)) {
+ FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter);
+ last_row_counter = row_counter;
+
+ FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes);
+ last_bytes = bytes;
+
+ status = check_stop(fqs->cancelled, &fqs->stop_monotonic_ut);
+ }
+ }
+ else if(sample == SAMPLING_SKIP_FIELDS)
+ facets_row_finished_unsampled(facets, msg_ut);
+ else {
+ sampling_update_running_query_file_estimates(facets, j, fqs, jf, msg_ut, FACETS_ANCHOR_DIRECTION_BACKWARD);
+ break;
+ }
+ }
+
+ FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter);
+ FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes);
+
+ fqs->rows_useful += rows_useful;
+
+ if(errors_no_timestamp)
+ netdata_log_error("SYSTEMD-JOURNAL: %zu lines did not have timestamps", errors_no_timestamp);
+
+ if(latest_msg_ut > fqs->last_modified)
+ fqs->last_modified = latest_msg_ut;
+
+ return status;
+}
+
+ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_forward(
+ sd_journal *j, BUFFER *wb __maybe_unused, FACETS *facets,
+ struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) {
+
+ usec_t anchor_delta = __atomic_load_n(&jf->max_journal_vs_realtime_delta_ut, __ATOMIC_RELAXED);
+
+ usec_t start_ut = (fqs->data_only && fqs->anchor.start_ut) ? fqs->anchor.start_ut : fqs->after_ut;
+ usec_t stop_ut = ((fqs->data_only && fqs->anchor.stop_ut) ? fqs->anchor.stop_ut : fqs->before_ut) + anchor_delta;
+ bool stop_when_full = (fqs->data_only && !fqs->anchor.stop_ut);
+
+ fqs->query_file.start_ut = start_ut;
+ fqs->query_file.stop_ut = stop_ut;
+
+ if(!netdata_systemd_journal_seek_to(j, start_ut))
+ return ND_SD_JOURNAL_FAILED_TO_SEEK;
+
+ size_t errors_no_timestamp = 0;
+ usec_t latest_msg_ut = 0; // the biggest timestamp we have seen so far
+ usec_t first_msg_ut = 0; // the first message we got from the db
+ size_t row_counter = 0, last_row_counter = 0, rows_useful = 0;
+ size_t bytes = 0, last_bytes = 0;
+
+ usec_t last_usec_from = 0;
+ usec_t last_usec_to = 0;
+
+ ND_SD_JOURNAL_STATUS status = ND_SD_JOURNAL_OK;
+
+ facets_rows_begin(facets);
+ while (status == ND_SD_JOURNAL_OK && sd_journal_next(j) > 0) {
+ usec_t msg_ut = 0;
+ if(sd_journal_get_realtime_usec(j, &msg_ut) < 0 || !msg_ut) {
+ errors_no_timestamp++;
+ continue;
+ }
+
+ if (unlikely(msg_ut < start_ut))
+ continue;
+
+ if (unlikely(msg_ut > stop_ut))
+ break;
+
+ if(likely(msg_ut > latest_msg_ut))
+ latest_msg_ut = msg_ut;
+
+ if(unlikely(!first_msg_ut)) {
+ first_msg_ut = msg_ut;
+ fqs->query_file.first_msg_ut = msg_ut;
+ }
+
+ sampling_t sample = is_row_in_sample(j, fqs, jf, msg_ut,
+ FACETS_ANCHOR_DIRECTION_FORWARD,
+ facets_row_candidate_to_keep(facets, msg_ut));
+
+ if(sample == SAMPLING_FULL) {
+ bytes += netdata_systemd_journal_process_row(j, facets, jf, &msg_ut);
+
+ // make sure each line gets a unique timestamp
+ if(unlikely(msg_ut >= last_usec_from && msg_ut <= last_usec_to))
+ msg_ut = ++last_usec_to;
+ else
+ last_usec_from = last_usec_to = msg_ut;
+
+ if(facets_row_finished(facets, msg_ut))
+ rows_useful++;
+
+ row_counter++;
+ if(unlikely((row_counter % FUNCTION_DATA_ONLY_CHECK_EVERY_ROWS) == 0 &&
+ stop_when_full &&
+ facets_rows(facets) >= fqs->entries)) {
+ // stop the data only query
+ usec_t newest = facets_row_newest_ut(facets);
+ if(newest && msg_ut > (newest + anchor_delta))
+ break;
+ }
+
+ if(unlikely(row_counter % FUNCTION_PROGRESS_EVERY_ROWS == 0)) {
+ FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter);
+ last_row_counter = row_counter;
+
+ FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes);
+ last_bytes = bytes;
+
+ status = check_stop(fqs->cancelled, &fqs->stop_monotonic_ut);
+ }
+ }
+ else if(sample == SAMPLING_SKIP_FIELDS)
+ facets_row_finished_unsampled(facets, msg_ut);
+ else {
+ sampling_update_running_query_file_estimates(facets, j, fqs, jf, msg_ut, FACETS_ANCHOR_DIRECTION_FORWARD);
+ break;
+ }
+ }
+
+ FUNCTION_PROGRESS_UPDATE_ROWS(fqs->rows_read, row_counter - last_row_counter);
+ FUNCTION_PROGRESS_UPDATE_BYTES(fqs->bytes_read, bytes - last_bytes);
+
+ fqs->rows_useful += rows_useful;
+
+ if(errors_no_timestamp)
+ netdata_log_error("SYSTEMD-JOURNAL: %zu lines did not have timestamps", errors_no_timestamp);
+
+ if(latest_msg_ut > fqs->last_modified)
+ fqs->last_modified = latest_msg_ut;
+
+ return status;
+}
+
+bool netdata_systemd_journal_check_if_modified_since(sd_journal *j, usec_t seek_to, usec_t last_modified) {
+ // return true, if data have been modified since the timestamp
+
+ if(!last_modified || !seek_to)
+ return false;
+
+ if(!netdata_systemd_journal_seek_to(j, seek_to))
+ return false;
+
+ usec_t first_msg_ut = 0;
+ while (sd_journal_previous(j) > 0) {
+ usec_t msg_ut;
+ if(sd_journal_get_realtime_usec(j, &msg_ut) < 0)
+ continue;
+
+ first_msg_ut = msg_ut;
+ break;
+ }
+
+ return first_msg_ut != last_modified;
+}
+
+#ifdef HAVE_SD_JOURNAL_RESTART_FIELDS
+static bool netdata_systemd_filtering_by_journal(sd_journal *j, FACETS *facets, FUNCTION_QUERY_STATUS *fqs) {
+ const char *field = NULL;
+ const void *data = NULL;
+ size_t data_length;
+ size_t added_keys = 0;
+ size_t failures = 0;
+ size_t filters_added = 0;
+
+ SD_JOURNAL_FOREACH_FIELD(j, field) { // for each key
+ bool interesting;
+
+ if(fqs->data_only)
+ interesting = facets_key_name_is_filter(facets, field);
+ else
+ interesting = facets_key_name_is_facet(facets, field);
+
+ if(interesting) {
+ if(sd_journal_query_unique(j, field) >= 0) {
+ bool added_this_key = false;
+ size_t added_values = 0;
+
+ SD_JOURNAL_FOREACH_UNIQUE(j, data, data_length) { // for each value of the key
+ const char *key, *value;
+ size_t key_length, value_length;
+
+ if(!parse_journal_field(data, data_length, &key, &key_length, &value, &value_length))
+ continue;
+
+ facets_add_possible_value_name_to_key(facets, key, key_length, value, value_length);
+
+ if(!facets_key_name_value_length_is_selected(facets, key, key_length, value, value_length))
+ continue;
+
+ if(added_keys && !added_this_key) {
+ if(sd_journal_add_conjunction(j) < 0) // key AND key AND key
+ failures++;
+
+ added_this_key = true;
+ added_keys++;
+ }
+ else if(added_values)
+ if(sd_journal_add_disjunction(j) < 0) // value OR value OR value
+ failures++;
+
+ if(sd_journal_add_match(j, data, data_length) < 0)
+ failures++;
+
+ if(!added_keys) {
+ added_keys++;
+ added_this_key = true;
+ }
+
+ added_values++;
+ filters_added++;
+ }
+ }
+ }
+ }
+
+ if(failures) {
+ log_fqs(fqs, "failed to setup journal filter, will run the full query.");
+ sd_journal_flush_matches(j);
+ return true;
+ }
+
+ return filters_added ? true : false;
+}
+#endif // HAVE_SD_JOURNAL_RESTART_FIELDS
+
+static ND_SD_JOURNAL_STATUS netdata_systemd_journal_query_one_file(
+ const char *filename, BUFFER *wb, FACETS *facets,
+ struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) {
+
+ sd_journal *j = NULL;
+ errno = 0;
+
+ fstat_cache_enable_on_thread();
+
+ const char *paths[2] = {
+ [0] = filename,
+ [1] = NULL,
+ };
+
+ if(sd_journal_open_files(&j, paths, ND_SD_JOURNAL_OPEN_FLAGS) < 0 || !j) {
+ netdata_log_error("JOURNAL: cannot open file '%s' for query", filename);
+ fstat_cache_disable_on_thread();
+ return ND_SD_JOURNAL_FAILED_TO_OPEN;
+ }
+
+ ND_SD_JOURNAL_STATUS status;
+ bool matches_filters = true;
+
+#ifdef HAVE_SD_JOURNAL_RESTART_FIELDS
+ if(fqs->slice) {
+ usec_t started = now_monotonic_usec();
+
+ matches_filters = netdata_systemd_filtering_by_journal(j, facets, fqs) || !fqs->filters;
+ usec_t ended = now_monotonic_usec();
+
+ fqs->matches_setup_ut += (ended - started);
+ }
+#endif // HAVE_SD_JOURNAL_RESTART_FIELDS
+
+ if(matches_filters) {
+ if(fqs->direction == FACETS_ANCHOR_DIRECTION_FORWARD)
+ status = netdata_systemd_journal_query_forward(j, wb, facets, jf, fqs);
+ else
+ status = netdata_systemd_journal_query_backward(j, wb, facets, jf, fqs);
+ }
+ else
+ status = ND_SD_JOURNAL_NO_FILE_MATCHED;
+
+ sd_journal_close(j);
+ fstat_cache_disable_on_thread();
+
+ return status;
+}
+
+static bool jf_is_mine(struct journal_file *jf, FUNCTION_QUERY_STATUS *fqs) {
+
+ if((fqs->source_type == SDJF_NONE && !fqs->sources) || (jf->source_type & fqs->source_type) ||
+ (fqs->sources && simple_pattern_matches(fqs->sources, string2str(jf->source)))) {
+
+ if(!jf->msg_last_ut || !jf->msg_last_ut)
+ // the file is not scanned yet, or the timestamps have not been updated,
+ // so we don't know if it can contribute or not - let's add it.
+ return true;
+
+ usec_t anchor_delta = JOURNAL_VS_REALTIME_DELTA_MAX_UT;
+ usec_t first_ut = jf->msg_first_ut - anchor_delta;
+ usec_t last_ut = jf->msg_last_ut + anchor_delta;
+
+ if(last_ut >= fqs->after_ut && first_ut <= fqs->before_ut)
+ return true;
+ }
+
+ return false;
+}
+
+static int netdata_systemd_journal_query(BUFFER *wb, FACETS *facets, FUNCTION_QUERY_STATUS *fqs) {
+ ND_SD_JOURNAL_STATUS status = ND_SD_JOURNAL_NO_FILE_MATCHED;
+ struct journal_file *jf;
+
+ fqs->files_matched = 0;
+ fqs->file_working = 0;
+ fqs->rows_useful = 0;
+ fqs->rows_read = 0;
+ fqs->bytes_read = 0;
+
+ size_t files_used = 0;
+ size_t files_max = dictionary_entries(journal_files_registry);
+ const DICTIONARY_ITEM *file_items[files_max];
+
+ // count the files
+ bool files_are_newer = false;
+ dfe_start_read(journal_files_registry, jf) {
+ if(!jf_is_mine(jf, fqs))
+ continue;
+
+ file_items[files_used++] = dictionary_acquired_item_dup(journal_files_registry, jf_dfe.item);
+
+ if(jf->msg_last_ut > fqs->if_modified_since)
+ files_are_newer = true;
+ }
+ dfe_done(jf);
+
+ fqs->files_matched = files_used;
+
+ if(fqs->if_modified_since && !files_are_newer) {
+ buffer_flush(wb);
+ return HTTP_RESP_NOT_MODIFIED;
+ }
-#define SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION "View, search and analyze systemd journal entries."
-#define SYSTEMD_JOURNAL_FUNCTION_NAME "systemd-journal"
-#define SYSTEMD_JOURNAL_DEFAULT_TIMEOUT 30
-#define SYSTEMD_JOURNAL_MAX_PARAMS 100
-#define SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION (3 * 3600)
-#define SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY 200
+ // sort the files, so that they are optimal for facets
+ if(files_used >= 2) {
+ if (fqs->direction == FACETS_ANCHOR_DIRECTION_BACKWARD)
+ qsort(file_items, files_used, sizeof(const DICTIONARY_ITEM *),
+ journal_file_dict_items_backward_compar);
+ else
+ qsort(file_items, files_used, sizeof(const DICTIONARY_ITEM *),
+ journal_file_dict_items_forward_compar);
+ }
-#define JOURNAL_PARAMETER_HELP "help"
-#define JOURNAL_PARAMETER_AFTER "after"
-#define JOURNAL_PARAMETER_BEFORE "before"
-#define JOURNAL_PARAMETER_ANCHOR "anchor"
-#define JOURNAL_PARAMETER_LAST "last"
-#define JOURNAL_PARAMETER_QUERY "query"
+ bool partial = false;
+ usec_t query_started_ut = now_monotonic_usec();
+ usec_t started_ut = query_started_ut;
+ usec_t ended_ut = started_ut;
+ usec_t duration_ut = 0, max_duration_ut = 0;
-#define SYSTEMD_ALWAYS_VISIBLE_KEYS NULL
-#define SYSTEMD_KEYS_EXCLUDED_FROM_FACETS NULL
-#define SYSTEMD_KEYS_INCLUDED_IN_FACETS \
- "_TRANSPORT" \
- "|SYSLOG_IDENTIFIER" \
- "|SYSLOG_FACILITY" \
- "|PRIORITY" \
- "|_HOSTNAME" \
- "|_RUNTIME_SCOPE" \
- "|_PID" \
- "|_UID" \
- "|_GID" \
- "|_SYSTEMD_UNIT" \
- "|_SYSTEMD_SLICE" \
- "|_SYSTEMD_USER_SLICE" \
- "|_COMM" \
- "|_EXE" \
- "|_SYSTEMD_CGROUP" \
- "|_SYSTEMD_USER_UNIT" \
- "|USER_UNIT" \
- "|UNIT" \
- ""
+ sampling_query_init(fqs, facets);
-static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER;
-static bool plugin_should_exit = false;
+ buffer_json_member_add_array(wb, "_journal_files");
+ for(size_t f = 0; f < files_used ;f++) {
+ const char *filename = dictionary_acquired_item_name(file_items[f]);
+ jf = dictionary_acquired_item_value(file_items[f]);
-DICTIONARY *uids = NULL;
-DICTIONARY *gids = NULL;
+ if(!jf_is_mine(jf, fqs))
+ continue;
+ started_ut = ended_ut;
-// ----------------------------------------------------------------------------
+ // do not even try to do the query if we expect it to pass the timeout
+ if(ended_ut > (query_started_ut + (fqs->stop_monotonic_ut - query_started_ut) * 3 / 4) &&
+ ended_ut + max_duration_ut * 2 >= fqs->stop_monotonic_ut) {
-int systemd_journal_query(BUFFER *wb, FACETS *facets, usec_t after_ut, usec_t before_ut, usec_t stop_monotonic_ut) {
- sd_journal *j;
- int r;
+ partial = true;
+ status = ND_SD_JOURNAL_TIMED_OUT;
+ break;
+ }
- // Open the system journal for reading
- r = sd_journal_open(&j, JOURNAL_NAMESPACE);
- if (r < 0)
- return HTTP_RESP_INTERNAL_SERVER_ERROR;
+ fqs->file_working++;
+ // fqs->cached_count = 0;
+
+ size_t fs_calls = fstat_thread_calls;
+ size_t fs_cached = fstat_thread_cached_responses;
+ size_t rows_useful = fqs->rows_useful;
+ size_t rows_read = fqs->rows_read;
+ size_t bytes_read = fqs->bytes_read;
+ size_t matches_setup_ut = fqs->matches_setup_ut;
+
+ sampling_file_init(fqs, jf);
+
+ ND_SD_JOURNAL_STATUS tmp_status = netdata_systemd_journal_query_one_file(filename, wb, facets, jf, fqs);
+
+// nd_log(NDLS_COLLECTORS, NDLP_INFO,
+// "JOURNAL ESTIMATION FINAL: '%s' "
+// "total lines %zu [sampled=%zu, unsampled=%zu, estimated=%zu], "
+// "file [%"PRIu64" - %"PRIu64", duration %"PRId64", known lines in file %zu], "
+// "query [%"PRIu64" - %"PRIu64", duration %"PRId64"], "
+// , jf->filename
+// , fqs->samples_per_file.sampled + fqs->samples_per_file.unsampled + fqs->samples_per_file.estimated
+// , fqs->samples_per_file.sampled, fqs->samples_per_file.unsampled, fqs->samples_per_file.estimated
+// , jf->msg_first_ut, jf->msg_last_ut, jf->msg_last_ut - jf->msg_first_ut, jf->messages_in_file
+// , fqs->query_file.start_ut, fqs->query_file.stop_ut, fqs->query_file.stop_ut - fqs->query_file.start_ut
+// );
+
+ rows_useful = fqs->rows_useful - rows_useful;
+ rows_read = fqs->rows_read - rows_read;
+ bytes_read = fqs->bytes_read - bytes_read;
+ matches_setup_ut = fqs->matches_setup_ut - matches_setup_ut;
+ fs_calls = fstat_thread_calls - fs_calls;
+ fs_cached = fstat_thread_cached_responses - fs_cached;
+
+ ended_ut = now_monotonic_usec();
+ duration_ut = ended_ut - started_ut;
+
+ if(duration_ut > max_duration_ut)
+ max_duration_ut = duration_ut;
+
+ buffer_json_add_array_item_object(wb); // journal file
+ {
+ // information about the file
+ buffer_json_member_add_string(wb, "_filename", filename);
+ buffer_json_member_add_uint64(wb, "_source_type", jf->source_type);
+ buffer_json_member_add_string(wb, "_source", string2str(jf->source));
+ buffer_json_member_add_uint64(wb, "_last_modified_ut", jf->file_last_modified_ut);
+ buffer_json_member_add_uint64(wb, "_msg_first_ut", jf->msg_first_ut);
+ buffer_json_member_add_uint64(wb, "_msg_last_ut", jf->msg_last_ut);
+ buffer_json_member_add_uint64(wb, "_journal_vs_realtime_delta_ut", jf->max_journal_vs_realtime_delta_ut);
+
+ // information about the current use of the file
+ buffer_json_member_add_uint64(wb, "duration_ut", ended_ut - started_ut);
+ buffer_json_member_add_uint64(wb, "rows_read", rows_read);
+ buffer_json_member_add_uint64(wb, "rows_useful", rows_useful);
+ buffer_json_member_add_double(wb, "rows_per_second", (double) rows_read / (double) duration_ut * (double) USEC_PER_SEC);
+ buffer_json_member_add_uint64(wb, "bytes_read", bytes_read);
+ buffer_json_member_add_double(wb, "bytes_per_second", (double) bytes_read / (double) duration_ut * (double) USEC_PER_SEC);
+ buffer_json_member_add_uint64(wb, "duration_matches_ut", matches_setup_ut);
+ buffer_json_member_add_uint64(wb, "fstat_query_calls", fs_calls);
+ buffer_json_member_add_uint64(wb, "fstat_query_cached_responses", fs_cached);
+
+ if(fqs->sampling) {
+ buffer_json_member_add_object(wb, "_sampling");
+ {
+ buffer_json_member_add_uint64(wb, "sampled", fqs->samples_per_file.sampled);
+ buffer_json_member_add_uint64(wb, "unsampled", fqs->samples_per_file.unsampled);
+ buffer_json_member_add_uint64(wb, "estimated", fqs->samples_per_file.estimated);
+ }
+ buffer_json_object_close(wb); // _sampling
+ }
+ }
+ buffer_json_object_close(wb); // journal file
- facets_rows_begin(facets);
+ bool stop = false;
+ switch(tmp_status) {
+ case ND_SD_JOURNAL_OK:
+ case ND_SD_JOURNAL_NO_FILE_MATCHED:
+ status = (status == ND_SD_JOURNAL_OK) ? ND_SD_JOURNAL_OK : tmp_status;
+ break;
- bool timed_out = false;
- size_t row_counter = 0;
- sd_journal_seek_realtime_usec(j, before_ut);
- SD_JOURNAL_FOREACH_BACKWARDS(j) {
- row_counter++;
+ case ND_SD_JOURNAL_FAILED_TO_OPEN:
+ case ND_SD_JOURNAL_FAILED_TO_SEEK:
+ partial = true;
+ if(status == ND_SD_JOURNAL_NO_FILE_MATCHED)
+ status = tmp_status;
+ break;
- uint64_t msg_ut;
- sd_journal_get_realtime_usec(j, &msg_ut);
- if (msg_ut < after_ut)
+ case ND_SD_JOURNAL_CANCELLED:
+ case ND_SD_JOURNAL_TIMED_OUT:
+ partial = true;
+ stop = true;
+ status = tmp_status;
break;
- const void *data;
- size_t length;
- SD_JOURNAL_FOREACH_DATA(j, data, length) {
- const char *key = data;
- const char *equal = strchr(key, '=');
- if(unlikely(!equal))
- continue;
+ case ND_SD_JOURNAL_NOT_MODIFIED:
+ internal_fatal(true, "this should never be returned here");
+ break;
+ }
- const char *value = ++equal;
- size_t key_length = value - key; // including '\0'
+ if(stop)
+ break;
+ }
+ buffer_json_array_close(wb); // _journal_files
- char key_copy[key_length];
- memcpy(key_copy, key, key_length - 1);
- key_copy[key_length - 1] = '\0';
+ // release the files
+ for(size_t f = 0; f < files_used ;f++)
+ dictionary_acquired_item_release(journal_files_registry, file_items[f]);
- size_t value_length = length - key_length; // without '\0'
- facets_add_key_value_length(facets, key_copy, value, value_length <= FACET_MAX_VALUE_LENGTH ? value_length : FACET_MAX_VALUE_LENGTH);
+ switch (status) {
+ case ND_SD_JOURNAL_OK:
+ if(fqs->if_modified_since && !fqs->rows_useful) {
+ buffer_flush(wb);
+ return HTTP_RESP_NOT_MODIFIED;
}
+ break;
- facets_row_finished(facets, msg_ut);
+ case ND_SD_JOURNAL_TIMED_OUT:
+ case ND_SD_JOURNAL_NO_FILE_MATCHED:
+ break;
- if((row_counter % 100) == 0 && now_monotonic_usec() > stop_monotonic_ut) {
- timed_out = true;
- break;
- }
- }
+ case ND_SD_JOURNAL_CANCELLED:
+ buffer_flush(wb);
+ return HTTP_RESP_CLIENT_CLOSED_REQUEST;
- sd_journal_close(j);
+ case ND_SD_JOURNAL_NOT_MODIFIED:
+ buffer_flush(wb);
+ return HTTP_RESP_NOT_MODIFIED;
+
+ default:
+ case ND_SD_JOURNAL_FAILED_TO_OPEN:
+ case ND_SD_JOURNAL_FAILED_TO_SEEK:
+ buffer_flush(wb);
+ return HTTP_RESP_INTERNAL_SERVER_ERROR;
+ }
buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
- buffer_json_member_add_boolean(wb, "partial", timed_out);
+ buffer_json_member_add_boolean(wb, "partial", partial);
buffer_json_member_add_string(wb, "type", "table");
- buffer_json_member_add_time_t(wb, "update_every", 1);
- buffer_json_member_add_string(wb, "help", SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION);
- facets_report(facets, wb);
+ // build a message for the query
+ if(!fqs->data_only) {
+ CLEAN_BUFFER *msg = buffer_create(0, NULL);
+ CLEAN_BUFFER *msg_description = buffer_create(0, NULL);
+ ND_LOG_FIELD_PRIORITY msg_priority = NDLP_INFO;
+
+ if(!journal_files_completed_once()) {
+ buffer_strcat(msg, "Journals are still being scanned. ");
+ buffer_strcat(msg_description
+ , "LIBRARY SCAN: The journal files are still being scanned, you are probably viewing incomplete data. ");
+ msg_priority = NDLP_WARNING;
+ }
+
+ if(partial) {
+ buffer_strcat(msg, "Query timed-out, incomplete data. ");
+ buffer_strcat(msg_description
+ , "QUERY TIMEOUT: The query timed out and may not include all the data of the selected window. ");
+ msg_priority = NDLP_WARNING;
+ }
+
+ if(fqs->samples.estimated || fqs->samples.unsampled) {
+ double percent = (double) (fqs->samples.sampled * 100.0 /
+ (fqs->samples.estimated + fqs->samples.unsampled + fqs->samples.sampled));
+ buffer_sprintf(msg, "%.2f%% real data", percent);
+ buffer_sprintf(msg_description, "ACTUAL DATA: The filters counters reflect %0.2f%% of the data. ", percent);
+ msg_priority = MIN(msg_priority, NDLP_NOTICE);
+ }
+
+ if(fqs->samples.unsampled) {
+ double percent = (double) (fqs->samples.unsampled * 100.0 /
+ (fqs->samples.estimated + fqs->samples.unsampled + fqs->samples.sampled));
+ buffer_sprintf(msg, ", %.2f%% unsampled", percent);
+ buffer_sprintf(msg_description
+ , "UNSAMPLED DATA: %0.2f%% of the events exist and have been counted, but their values have not been evaluated, so they are not included in the filters counters. "
+ , percent);
+ msg_priority = MIN(msg_priority, NDLP_NOTICE);
+ }
+
+ if(fqs->samples.estimated) {
+ double percent = (double) (fqs->samples.estimated * 100.0 /
+ (fqs->samples.estimated + fqs->samples.unsampled + fqs->samples.sampled));
+ buffer_sprintf(msg, ", %.2f%% estimated", percent);
+ buffer_sprintf(msg_description
+ , "ESTIMATED DATA: The query selected a large amount of data, so to avoid delaying too much, the presented data are estimated by %0.2f%%. "
+ , percent);
+ msg_priority = MIN(msg_priority, NDLP_NOTICE);
+ }
+
+ buffer_json_member_add_object(wb, "message");
+ if(buffer_tostring(msg)) {
+ buffer_json_member_add_string(wb, "title", buffer_tostring(msg));
+ buffer_json_member_add_string(wb, "description", buffer_tostring(msg_description));
+ buffer_json_member_add_string(wb, "status", nd_log_id2priority(msg_priority));
+ }
+ // else send an empty object if there is nothing to tell
+ buffer_json_object_close(wb); // message
+ }
+
+ if(!fqs->data_only) {
+ buffer_json_member_add_time_t(wb, "update_every", 1);
+ buffer_json_member_add_string(wb, "help", SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION);
+ }
+
+ if(!fqs->data_only || fqs->tail)
+ buffer_json_member_add_uint64(wb, "last_modified", fqs->last_modified);
+
+ facets_sort_and_reorder_keys(facets);
+ facets_report(facets, wb, used_hashes_registry);
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + (fqs->data_only ? 3600 : 0));
+
+ buffer_json_member_add_object(wb, "_fstat_caching");
+ {
+ buffer_json_member_add_uint64(wb, "calls", fstat_thread_calls);
+ buffer_json_member_add_uint64(wb, "cached", fstat_thread_cached_responses);
+ }
+ buffer_json_object_close(wb); // _fstat_caching
+
+ if(fqs->sampling) {
+ buffer_json_member_add_object(wb, "_sampling");
+ {
+ buffer_json_member_add_uint64(wb, "sampled", fqs->samples.sampled);
+ buffer_json_member_add_uint64(wb, "unsampled", fqs->samples.unsampled);
+ buffer_json_member_add_uint64(wb, "estimated", fqs->samples.estimated);
+ }
+ buffer_json_object_close(wb); // _sampling
+ }
- buffer_json_member_add_time_t(wb, "expires", now_realtime_sec());
buffer_json_finalize(wb);
return HTTP_RESP_OK;
}
-static void systemd_journal_function_help(const char *transaction) {
- pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600);
- fprintf(stdout,
+static void netdata_systemd_journal_function_help(const char *transaction) {
+ BUFFER *wb = buffer_create(0, NULL);
+ buffer_sprintf(wb,
"%s / %s\n"
"\n"
"%s\n"
"\n"
- "The following filters are supported:\n"
+ "The following parameters are supported:\n"
"\n"
- " help\n"
+ " "JOURNAL_PARAMETER_HELP"\n"
" Shows this help message.\n"
"\n"
- " before:TIMESTAMP\n"
+ " "JOURNAL_PARAMETER_INFO"\n"
+ " Request initial configuration information about the plugin.\n"
+ " The key entity returned is the required_params array, which includes\n"
+ " all the available systemd journal sources.\n"
+ " When `"JOURNAL_PARAMETER_INFO"` is requested, all other parameters are ignored.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_ID":STRING\n"
+ " Caller supplied unique ID of the request.\n"
+ " This can be used later to request a progress report of the query.\n"
+ " Optional, but if omitted no `"JOURNAL_PARAMETER_PROGRESS"` can be requested.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_PROGRESS"\n"
+ " Request a progress report (the `id` of a running query is required).\n"
+ " When `"JOURNAL_PARAMETER_PROGRESS"` is requested, only parameter `"JOURNAL_PARAMETER_ID"` is used.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_DATA_ONLY":true or "JOURNAL_PARAMETER_DATA_ONLY":false\n"
+ " Quickly respond with data requested, without generating a\n"
+ " `histogram`, `facets` counters and `items`.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_DELTA":true or "JOURNAL_PARAMETER_DELTA":false\n"
+ " When doing data only queries, include deltas for histogram, facets and items.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_TAIL":true or "JOURNAL_PARAMETER_TAIL":false\n"
+ " When doing data only queries, respond with the newest messages,\n"
+ " and up to the anchor, but calculate deltas (if requested) for\n"
+ " the duration [anchor - before].\n"
+ "\n"
+ " "JOURNAL_PARAMETER_SLICE":true or "JOURNAL_PARAMETER_SLICE":false\n"
+ " When it is turned on, the plugin is executing filtering via libsystemd,\n"
+ " utilizing all the available indexes of the journal files.\n"
+ " When it is off, only the time constraint is handled by libsystemd and\n"
+ " all filtering is done by the plugin.\n"
+ " The default is: %s\n"
+ "\n"
+ " "JOURNAL_PARAMETER_SOURCE":SOURCE\n"
+ " Query only the specified journal sources.\n"
+ " Do an `"JOURNAL_PARAMETER_INFO"` query to find the sources.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_BEFORE":TIMESTAMP_IN_SECONDS\n"
" Absolute or relative (to now) timestamp in seconds, to start the query.\n"
" The query is always executed from the most recent to the oldest log entry.\n"
" If not given the default is: now.\n"
"\n"
- " after:TIMESTAMP\n"
+ " "JOURNAL_PARAMETER_AFTER":TIMESTAMP_IN_SECONDS\n"
" Absolute or relative (to `before`) timestamp in seconds, to end the query.\n"
" If not given, the default is %d.\n"
"\n"
- " last:ITEMS\n"
+ " "JOURNAL_PARAMETER_LAST":ITEMS\n"
" The number of items to return.\n"
" The default is %d.\n"
"\n"
- " anchor:NUMBER\n"
- " The `timestamp` of the item last received, to return log entries after that.\n"
- " If not given, the query will return the top `ITEMS` from the most recent.\n"
+ " "JOURNAL_PARAMETER_SAMPLING":ITEMS\n"
+ " The number of log entries to sample to estimate facets counters and histogram.\n"
+ " The default is %d.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_ANCHOR":TIMESTAMP_IN_MICROSECONDS\n"
+ " Return items relative to this timestamp.\n"
+ " The exact items to be returned depend on the query `"JOURNAL_PARAMETER_DIRECTION"`.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_DIRECTION":forward or "JOURNAL_PARAMETER_DIRECTION":backward\n"
+ " When set to `backward` (default) the items returned are the newest before the\n"
+ " `"JOURNAL_PARAMETER_ANCHOR"`, (or `"JOURNAL_PARAMETER_BEFORE"` if `"JOURNAL_PARAMETER_ANCHOR"` is not set)\n"
+ " When set to `forward` the items returned are the oldest after the\n"
+ " `"JOURNAL_PARAMETER_ANCHOR"`, (or `"JOURNAL_PARAMETER_AFTER"` if `"JOURNAL_PARAMETER_ANCHOR"` is not set)\n"
+ " The default is: %s\n"
+ "\n"
+ " "JOURNAL_PARAMETER_QUERY":SIMPLE_PATTERN\n"
+ " Do a full text search to find the log entries matching the pattern given.\n"
+ " The plugin is searching for matches on all fields of the database.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_IF_MODIFIED_SINCE":TIMESTAMP_IN_MICROSECONDS\n"
+ " Each successful response, includes a `last_modified` field.\n"
+ " By providing the timestamp to the `"JOURNAL_PARAMETER_IF_MODIFIED_SINCE"` parameter,\n"
+ " the plugin will return 200 with a successful response, or 304 if the source has not\n"
+ " been modified since that timestamp.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_HISTOGRAM":facet_id\n"
+ " Use the given `facet_id` for the histogram.\n"
+ " This parameter is ignored in `"JOURNAL_PARAMETER_DATA_ONLY"` mode.\n"
+ "\n"
+ " "JOURNAL_PARAMETER_FACETS":facet_id1,facet_id2,facet_id3,...\n"
+ " Add the given facets to the list of fields for which analysis is required.\n"
+ " The plugin will offer both a histogram and facet value counters for its values.\n"
+ " This parameter is ignored in `"JOURNAL_PARAMETER_DATA_ONLY"` mode.\n"
"\n"
" facet_id:value_id1,value_id2,value_id3,...\n"
" Apply filters to the query, based on the facet IDs returned.\n"
" Each `facet_id` can be given once, but multiple `facet_ids` can be given.\n"
"\n"
- "Filters can be combined. Each filter can be given only one time.\n"
, program_name
, SYSTEMD_JOURNAL_FUNCTION_NAME
, SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION
+ , JOURNAL_DEFAULT_SLICE_MODE ? "true" : "false" // slice
, -SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION
, SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY
+ , SYSTEMD_JOURNAL_DEFAULT_ITEMS_SAMPLING
+ , JOURNAL_DEFAULT_DIRECTION == FACETS_ANCHOR_DIRECTION_BACKWARD ? "backward" : "forward"
);
- pluginsd_function_result_end_to_stdout();
-}
-
-static const char *syslog_facility_to_name(int facility) {
- switch (facility) {
- case LOG_FAC(LOG_KERN): return "kern";
- case LOG_FAC(LOG_USER): return "user";
- case LOG_FAC(LOG_MAIL): return "mail";
- case LOG_FAC(LOG_DAEMON): return "daemon";
- case LOG_FAC(LOG_AUTH): return "auth";
- case LOG_FAC(LOG_SYSLOG): return "syslog";
- case LOG_FAC(LOG_LPR): return "lpr";
- case LOG_FAC(LOG_NEWS): return "news";
- case LOG_FAC(LOG_UUCP): return "uucp";
- case LOG_FAC(LOG_CRON): return "cron";
- case LOG_FAC(LOG_AUTHPRIV): return "authpriv";
- case LOG_FAC(LOG_FTP): return "ftp";
- case LOG_FAC(LOG_LOCAL0): return "local0";
- case LOG_FAC(LOG_LOCAL1): return "local1";
- case LOG_FAC(LOG_LOCAL2): return "local2";
- case LOG_FAC(LOG_LOCAL3): return "local3";
- case LOG_FAC(LOG_LOCAL4): return "local4";
- case LOG_FAC(LOG_LOCAL5): return "local5";
- case LOG_FAC(LOG_LOCAL6): return "local6";
- case LOG_FAC(LOG_LOCAL7): return "local7";
- default: return NULL;
- }
-}
-
-static const char *syslog_priority_to_name(int priority) {
- switch (priority) {
- case LOG_ALERT: return "alert";
- case LOG_CRIT: return "critical";
- case LOG_DEBUG: return "debug";
- case LOG_EMERG: return "panic";
- case LOG_ERR: return "error";
- case LOG_INFO: return "info";
- case LOG_NOTICE: return "notice";
- case LOG_WARNING: return "warning";
- default: return NULL;
- }
-}
-
-static char *uid_to_username(uid_t uid, char *buffer, size_t buffer_size) {
- struct passwd pw, *result;
- char tmp[1024 + 1];
-
- if (getpwuid_r(uid, &pw, tmp, 1024, &result) != 0 || result == NULL)
- return NULL;
-
- strncpy(buffer, pw.pw_name, buffer_size - 1);
- buffer[buffer_size - 1] = '\0'; // Null-terminate just in case
- return buffer;
-}
-static char *gid_to_groupname(gid_t gid, char* buffer, size_t buffer_size) {
- struct group grp, *result;
- char tmp[1024 + 1];
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb);
+ netdata_mutex_unlock(&stdout_mutex);
- if (getgrgid_r(gid, &grp, tmp, 1024, &result) != 0 || result == NULL)
- return NULL;
-
- strncpy(buffer, grp.gr_name, buffer_size - 1);
- buffer[buffer_size - 1] = '\0'; // Null-terminate just in case
- return buffer;
+ buffer_free(wb);
}
-static void systemd_journal_transform_syslog_facility(FACETS *facets __maybe_unused, BUFFER *wb, void *data __maybe_unused) {
- const char *v = buffer_tostring(wb);
- if(*v && isdigit(*v)) {
- int facility = str2i(buffer_tostring(wb));
- const char *name = syslog_facility_to_name(facility);
- if (name) {
- buffer_flush(wb);
- buffer_strcat(wb, name);
- }
- }
-}
+DICTIONARY *function_query_status_dict = NULL;
-static void systemd_journal_transform_priority(FACETS *facets __maybe_unused, BUFFER *wb, void *data __maybe_unused) {
- const char *v = buffer_tostring(wb);
- if(*v && isdigit(*v)) {
- int priority = str2i(buffer_tostring(wb));
- const char *name = syslog_priority_to_name(priority);
- if (name) {
- buffer_flush(wb);
- buffer_strcat(wb, name);
- }
+static void function_systemd_journal_progress(BUFFER *wb, const char *transaction, const char *progress_id) {
+ if(!progress_id || !(*progress_id)) {
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_BAD_REQUEST, "missing progress id");
+ netdata_mutex_unlock(&stdout_mutex);
+ return;
}
-}
-
-static void systemd_journal_transform_uid(FACETS *facets __maybe_unused, BUFFER *wb, void *data) {
- DICTIONARY *cache = data;
- const char *v = buffer_tostring(wb);
- if(*v && isdigit(*v)) {
- const char *sv = dictionary_get(cache, v);
- if(!sv) {
- char buf[1024 + 1];
- int uid = str2i(buffer_tostring(wb));
- const char *name = uid_to_username(uid, buf, 1024);
- if (!name)
- name = v;
- sv = dictionary_set(cache, v, (void *)name, strlen(name) + 1);
- }
+ const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(function_query_status_dict, progress_id);
- buffer_flush(wb);
- buffer_strcat(wb, sv);
+ if(!item) {
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_NOT_FOUND, "progress id is not found here");
+ netdata_mutex_unlock(&stdout_mutex);
+ return;
}
-}
-
-static void systemd_journal_transform_gid(FACETS *facets __maybe_unused, BUFFER *wb, void *data) {
- DICTIONARY *cache = data;
- const char *v = buffer_tostring(wb);
- if(*v && isdigit(*v)) {
- const char *sv = dictionary_get(cache, v);
- if(!sv) {
- char buf[1024 + 1];
- int gid = str2i(buffer_tostring(wb));
- const char *name = gid_to_groupname(gid, buf, 1024);
- if (!name)
- name = v;
- sv = dictionary_set(cache, v, (void *)name, strlen(name) + 1);
- }
+ FUNCTION_QUERY_STATUS *fqs = dictionary_acquired_item_value(item);
- buffer_flush(wb);
- buffer_strcat(wb, sv);
- }
-}
+ usec_t now_monotonic_ut = now_monotonic_usec();
+ if(now_monotonic_ut + 10 * USEC_PER_SEC > fqs->stop_monotonic_ut)
+ fqs->stop_monotonic_ut = now_monotonic_ut + 10 * USEC_PER_SEC;
-static void systemd_journal_dynamic_row_id(FACETS *facets __maybe_unused, BUFFER *json_array, FACET_ROW_KEY_VALUE *rkv, FACET_ROW *row, void *data __maybe_unused) {
- FACET_ROW_KEY_VALUE *pid_rkv = dictionary_get(row->dict, "_PID");
- const char *pid = pid_rkv ? buffer_tostring(pid_rkv->wb) : FACET_VALUE_UNSET;
+ usec_t duration_ut = now_monotonic_ut - fqs->started_monotonic_ut;
- FACET_ROW_KEY_VALUE *syslog_identifier_rkv = dictionary_get(row->dict, "SYSLOG_IDENTIFIER");
- const char *identifier = syslog_identifier_rkv ? buffer_tostring(syslog_identifier_rkv->wb) : FACET_VALUE_UNSET;
+ size_t files_matched = fqs->files_matched;
+ size_t file_working = fqs->file_working;
+ if(file_working > files_matched)
+ files_matched = file_working;
- if(strcmp(identifier, FACET_VALUE_UNSET) == 0) {
- FACET_ROW_KEY_VALUE *comm_rkv = dictionary_get(row->dict, "_COMM");
- identifier = comm_rkv ? buffer_tostring(comm_rkv->wb) : FACET_VALUE_UNSET;
- }
+ size_t rows_read = __atomic_load_n(&fqs->rows_read, __ATOMIC_RELAXED);
+ size_t bytes_read = __atomic_load_n(&fqs->bytes_read, __ATOMIC_RELAXED);
- buffer_flush(rkv->wb);
+ buffer_flush(wb);
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_uint64(wb, "running_duration_usec", duration_ut);
+ buffer_json_member_add_double(wb, "progress", (double)file_working * 100.0 / (double)files_matched);
+ char msg[1024 + 1];
+ snprintfz(msg, sizeof(msg) - 1,
+ "Read %zu rows (%0.0f rows/s), "
+ "data %0.1f MB (%0.1f MB/s), "
+ "file %zu of %zu",
+ rows_read, (double)rows_read / (double)duration_ut * (double)USEC_PER_SEC,
+ (double)bytes_read / 1024.0 / 1024.0, ((double)bytes_read / (double)duration_ut * (double)USEC_PER_SEC) / 1024.0 / 1024.0,
+ file_working, files_matched
+ );
+ buffer_json_member_add_string(wb, "message", msg);
+ buffer_json_finalize(wb);
- if(strcmp(pid, FACET_VALUE_UNSET) == 0)
- buffer_strcat(rkv->wb, identifier);
- else
- buffer_sprintf(rkv->wb, "%s[%s]", identifier, pid);
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", now_realtime_sec() + 1, wb);
+ netdata_mutex_unlock(&stdout_mutex);
- buffer_json_add_array_item_string(json_array, buffer_tostring(rkv->wb));
+ dictionary_acquired_item_release(function_query_status_dict, item);
}
-static void function_systemd_journal(const char *transaction, char *function, char *line_buffer __maybe_unused, int line_max __maybe_unused, int timeout __maybe_unused) {
- char *words[SYSTEMD_JOURNAL_MAX_PARAMS] = { NULL };
- size_t num_words = quoted_strings_splitter_pluginsd(function, words, SYSTEMD_JOURNAL_MAX_PARAMS);
+void function_systemd_journal(const char *transaction, char *function, int timeout, bool *cancelled) {
+ fstat_thread_calls = 0;
+ fstat_thread_cached_responses = 0;
BUFFER *wb = buffer_create(0, NULL);
buffer_flush(wb);
- buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS);
-
- FACETS *facets = facets_create(50, 0, FACETS_OPTION_ALL_KEYS_FTS,
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
+
+ usec_t now_monotonic_ut = now_monotonic_usec();
+ FUNCTION_QUERY_STATUS tmp_fqs = {
+ .cancelled = cancelled,
+ .started_monotonic_ut = now_monotonic_ut,
+ .stop_monotonic_ut = now_monotonic_ut + (timeout * USEC_PER_SEC),
+ };
+ FUNCTION_QUERY_STATUS *fqs = NULL;
+ const DICTIONARY_ITEM *fqs_item = NULL;
+
+ FACETS *facets = facets_create(50, FACETS_OPTION_ALL_KEYS_FTS,
SYSTEMD_ALWAYS_VISIBLE_KEYS,
SYSTEMD_KEYS_INCLUDED_IN_FACETS,
SYSTEMD_KEYS_EXCLUDED_FROM_FACETS);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_INFO);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_SOURCE);
facets_accepted_param(facets, JOURNAL_PARAMETER_AFTER);
facets_accepted_param(facets, JOURNAL_PARAMETER_BEFORE);
facets_accepted_param(facets, JOURNAL_PARAMETER_ANCHOR);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_DIRECTION);
facets_accepted_param(facets, JOURNAL_PARAMETER_LAST);
facets_accepted_param(facets, JOURNAL_PARAMETER_QUERY);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_FACETS);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_HISTOGRAM);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_IF_MODIFIED_SINCE);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_DATA_ONLY);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_ID);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_PROGRESS);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_DELTA);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_TAIL);
+ facets_accepted_param(facets, JOURNAL_PARAMETER_SAMPLING);
+
+#ifdef HAVE_SD_JOURNAL_RESTART_FIELDS
+ facets_accepted_param(facets, JOURNAL_PARAMETER_SLICE);
+#endif // HAVE_SD_JOURNAL_RESTART_FIELDS
// register the fields in the order you want them on the dashboard
- facets_register_dynamic_key(facets, "ND_JOURNAL_PROCESS", FACET_KEY_OPTION_NO_FACET|FACET_KEY_OPTION_VISIBLE|FACET_KEY_OPTION_FTS,
- systemd_journal_dynamic_row_id, NULL);
+ facets_register_row_severity(facets, syslog_priority_to_facet_severity, NULL);
+
+ facets_register_key_name(facets, "_HOSTNAME",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_VISIBLE);
+
+ facets_register_dynamic_key_name(facets, JOURNAL_KEY_ND_JOURNAL_PROCESS,
+ FACET_KEY_OPTION_NEVER_FACET | FACET_KEY_OPTION_VISIBLE,
+ netdata_systemd_journal_dynamic_row_id, NULL);
+
+ facets_register_key_name(facets, "MESSAGE",
+ FACET_KEY_OPTION_NEVER_FACET | FACET_KEY_OPTION_MAIN_TEXT |
+ FACET_KEY_OPTION_VISIBLE | FACET_KEY_OPTION_FTS);
+
+// facets_register_dynamic_key_name(facets, "MESSAGE",
+// FACET_KEY_OPTION_NEVER_FACET | FACET_KEY_OPTION_MAIN_TEXT | FACET_KEY_OPTION_RICH_TEXT |
+// FACET_KEY_OPTION_VISIBLE | FACET_KEY_OPTION_FTS,
+// netdata_systemd_journal_rich_message, NULL);
+
+ facets_register_key_name_transformation(facets, "PRIORITY",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW |
+ FACET_KEY_OPTION_EXPANDED_FILTER,
+ netdata_systemd_journal_transform_priority, NULL);
+
+ facets_register_key_name_transformation(facets, "SYSLOG_FACILITY",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW |
+ FACET_KEY_OPTION_EXPANDED_FILTER,
+ netdata_systemd_journal_transform_syslog_facility, NULL);
+
+ facets_register_key_name_transformation(facets, "ERRNO",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_errno, NULL);
+
+ facets_register_key_name(facets, JOURNAL_KEY_ND_JOURNAL_FILE,
+ FACET_KEY_OPTION_NEVER_FACET);
+
+ facets_register_key_name(facets, "SYSLOG_IDENTIFIER",
+ FACET_KEY_OPTION_FACET);
+
+ facets_register_key_name(facets, "UNIT",
+ FACET_KEY_OPTION_FACET);
+
+ facets_register_key_name(facets, "USER_UNIT",
+ FACET_KEY_OPTION_FACET);
+
+ facets_register_key_name_transformation(facets, "MESSAGE_ID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW |
+ FACET_KEY_OPTION_EXPANDED_FILTER,
+ netdata_systemd_journal_transform_message_id, NULL);
+
+ facets_register_key_name_transformation(facets, "_BOOT_ID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_boot_id, NULL);
- facets_register_key(facets, "MESSAGE",
- FACET_KEY_OPTION_NO_FACET|FACET_KEY_OPTION_MAIN_TEXT|FACET_KEY_OPTION_VISIBLE|FACET_KEY_OPTION_FTS);
+ facets_register_key_name_transformation(facets, "_SYSTEMD_OWNER_UID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_uid, NULL);
- facets_register_key_transformation(facets, "PRIORITY", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS,
- systemd_journal_transform_priority, NULL);
+ facets_register_key_name_transformation(facets, "_UID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_uid, NULL);
- facets_register_key_transformation(facets, "SYSLOG_FACILITY", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS,
- systemd_journal_transform_syslog_facility, NULL);
+ facets_register_key_name_transformation(facets, "OBJECT_SYSTEMD_OWNER_UID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_uid, NULL);
- facets_register_key(facets, "SYSLOG_IDENTIFIER", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS);
- facets_register_key(facets, "UNIT", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS);
- facets_register_key(facets, "USER_UNIT", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS);
+ facets_register_key_name_transformation(facets, "OBJECT_UID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_uid, NULL);
- facets_register_key_transformation(facets, "_UID", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS,
- systemd_journal_transform_uid, uids);
+ facets_register_key_name_transformation(facets, "_GID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_gid, NULL);
- facets_register_key_transformation(facets, "_GID", FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS,
- systemd_journal_transform_gid, gids);
+ facets_register_key_name_transformation(facets, "OBJECT_GID",
+ FACET_KEY_OPTION_FACET | FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_gid, NULL);
+ facets_register_key_name_transformation(facets, "_CAP_EFFECTIVE",
+ FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_cap_effective, NULL);
+
+ facets_register_key_name_transformation(facets, "_AUDIT_LOGINUID",
+ FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_uid, NULL);
+
+ facets_register_key_name_transformation(facets, "OBJECT_AUDIT_LOGINUID",
+ FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_uid, NULL);
+
+ facets_register_key_name_transformation(facets, "_SOURCE_REALTIME_TIMESTAMP",
+ FACET_KEY_OPTION_TRANSFORM_VIEW,
+ netdata_systemd_journal_transform_timestamp_usec, NULL);
+
+ // ------------------------------------------------------------------------
+ // parse the parameters
+
+ bool info = false, data_only = false, progress = false, slice = JOURNAL_DEFAULT_SLICE_MODE, delta = false, tail = false;
time_t after_s = 0, before_s = 0;
usec_t anchor = 0;
+ usec_t if_modified_since = 0;
size_t last = 0;
+ FACETS_ANCHOR_DIRECTION direction = JOURNAL_DEFAULT_DIRECTION;
const char *query = NULL;
+ const char *chart = NULL;
+ SIMPLE_PATTERN *sources = NULL;
+ const char *progress_id = NULL;
+ SD_JOURNAL_FILE_SOURCE_TYPE source_type = SDJF_ALL;
+ size_t filters = 0;
+ size_t sampling = SYSTEMD_JOURNAL_DEFAULT_ITEMS_SAMPLING;
- buffer_json_member_add_object(wb, "request");
- buffer_json_member_add_object(wb, "filters");
+ buffer_json_member_add_object(wb, "_request");
+ char *words[SYSTEMD_JOURNAL_MAX_PARAMS] = { NULL };
+ size_t num_words = quoted_strings_splitter_pluginsd(function, words, SYSTEMD_JOURNAL_MAX_PARAMS);
for(int i = 1; i < SYSTEMD_JOURNAL_MAX_PARAMS ;i++) {
- const char *keyword = get_word(words, num_words, i);
+ char *keyword = get_word(words, num_words, i);
if(!keyword) break;
if(strcmp(keyword, JOURNAL_PARAMETER_HELP) == 0) {
- systemd_journal_function_help(transaction);
+ netdata_systemd_journal_function_help(transaction);
goto cleanup;
}
- else if(strncmp(keyword, JOURNAL_PARAMETER_AFTER ":", strlen(JOURNAL_PARAMETER_AFTER ":")) == 0) {
- after_s = str2l(&keyword[strlen(JOURNAL_PARAMETER_AFTER ":")]);
+ else if(strcmp(keyword, JOURNAL_PARAMETER_INFO) == 0) {
+ info = true;
+ }
+ else if(strcmp(keyword, JOURNAL_PARAMETER_PROGRESS) == 0) {
+ progress = true;
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_DELTA ":", sizeof(JOURNAL_PARAMETER_DELTA ":") - 1) == 0) {
+ char *v = &keyword[sizeof(JOURNAL_PARAMETER_DELTA ":") - 1];
+
+ if(strcmp(v, "false") == 0 || strcmp(v, "no") == 0 || strcmp(v, "0") == 0)
+ delta = false;
+ else
+ delta = true;
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_TAIL ":", sizeof(JOURNAL_PARAMETER_TAIL ":") - 1) == 0) {
+ char *v = &keyword[sizeof(JOURNAL_PARAMETER_TAIL ":") - 1];
+
+ if(strcmp(v, "false") == 0 || strcmp(v, "no") == 0 || strcmp(v, "0") == 0)
+ tail = false;
+ else
+ tail = true;
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_SAMPLING ":", sizeof(JOURNAL_PARAMETER_SAMPLING ":") - 1) == 0) {
+ sampling = str2ul(&keyword[sizeof(JOURNAL_PARAMETER_SAMPLING ":") - 1]);
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_DATA_ONLY ":", sizeof(JOURNAL_PARAMETER_DATA_ONLY ":") - 1) == 0) {
+ char *v = &keyword[sizeof(JOURNAL_PARAMETER_DATA_ONLY ":") - 1];
+
+ if(strcmp(v, "false") == 0 || strcmp(v, "no") == 0 || strcmp(v, "0") == 0)
+ data_only = false;
+ else
+ data_only = true;
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_SLICE ":", sizeof(JOURNAL_PARAMETER_SLICE ":") - 1) == 0) {
+ char *v = &keyword[sizeof(JOURNAL_PARAMETER_SLICE ":") - 1];
+
+ if(strcmp(v, "false") == 0 || strcmp(v, "no") == 0 || strcmp(v, "0") == 0)
+ slice = false;
+ else
+ slice = true;
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_ID ":", sizeof(JOURNAL_PARAMETER_ID ":") - 1) == 0) {
+ char *id = &keyword[sizeof(JOURNAL_PARAMETER_ID ":") - 1];
+
+ if(*id)
+ progress_id = id;
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_SOURCE ":", sizeof(JOURNAL_PARAMETER_SOURCE ":") - 1) == 0) {
+ const char *value = &keyword[sizeof(JOURNAL_PARAMETER_SOURCE ":") - 1];
+
+ buffer_json_member_add_array(wb, JOURNAL_PARAMETER_SOURCE);
+
+ BUFFER *sources_list = buffer_create(0, NULL);
+
+ source_type = SDJF_NONE;
+ while(value) {
+ char *sep = strchr(value, ',');
+ if(sep)
+ *sep++ = '\0';
+
+ buffer_json_add_array_item_string(wb, value);
+
+ if(strcmp(value, SDJF_SOURCE_ALL_NAME) == 0) {
+ source_type |= SDJF_ALL;
+ value = NULL;
+ }
+ else if(strcmp(value, SDJF_SOURCE_LOCAL_NAME) == 0) {
+ source_type |= SDJF_LOCAL_ALL;
+ value = NULL;
+ }
+ else if(strcmp(value, SDJF_SOURCE_REMOTES_NAME) == 0) {
+ source_type |= SDJF_REMOTE_ALL;
+ value = NULL;
+ }
+ else if(strcmp(value, SDJF_SOURCE_NAMESPACES_NAME) == 0) {
+ source_type |= SDJF_LOCAL_NAMESPACE;
+ value = NULL;
+ }
+ else if(strcmp(value, SDJF_SOURCE_LOCAL_SYSTEM_NAME) == 0) {
+ source_type |= SDJF_LOCAL_SYSTEM;
+ value = NULL;
+ }
+ else if(strcmp(value, SDJF_SOURCE_LOCAL_USERS_NAME) == 0) {
+ source_type |= SDJF_LOCAL_USER;
+ value = NULL;
+ }
+ else if(strcmp(value, SDJF_SOURCE_LOCAL_OTHER_NAME) == 0) {
+ source_type |= SDJF_LOCAL_OTHER;
+ value = NULL;
+ }
+ else {
+ // else, match the source, whatever it is
+ if(buffer_strlen(sources_list))
+ buffer_strcat(sources_list, ",");
+
+ buffer_strcat(sources_list, value);
+ }
+
+ value = sep;
+ }
+
+ if(buffer_strlen(sources_list)) {
+ simple_pattern_free(sources);
+ sources = simple_pattern_create(buffer_tostring(sources_list), ",", SIMPLE_PATTERN_EXACT, false);
+ }
+
+ buffer_free(sources_list);
+
+ buffer_json_array_close(wb); // source
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_AFTER ":", sizeof(JOURNAL_PARAMETER_AFTER ":") - 1) == 0) {
+ after_s = str2l(&keyword[sizeof(JOURNAL_PARAMETER_AFTER ":") - 1]);
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_BEFORE ":", sizeof(JOURNAL_PARAMETER_BEFORE ":") - 1) == 0) {
+ before_s = str2l(&keyword[sizeof(JOURNAL_PARAMETER_BEFORE ":") - 1]);
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_IF_MODIFIED_SINCE ":", sizeof(JOURNAL_PARAMETER_IF_MODIFIED_SINCE ":") - 1) == 0) {
+ if_modified_since = str2ull(&keyword[sizeof(JOURNAL_PARAMETER_IF_MODIFIED_SINCE ":") - 1], NULL);
}
- else if(strncmp(keyword, JOURNAL_PARAMETER_BEFORE ":", strlen(JOURNAL_PARAMETER_BEFORE ":")) == 0) {
- before_s = str2l(&keyword[strlen(JOURNAL_PARAMETER_BEFORE ":")]);
+ else if(strncmp(keyword, JOURNAL_PARAMETER_ANCHOR ":", sizeof(JOURNAL_PARAMETER_ANCHOR ":") - 1) == 0) {
+ anchor = str2ull(&keyword[sizeof(JOURNAL_PARAMETER_ANCHOR ":") - 1], NULL);
}
- else if(strncmp(keyword, JOURNAL_PARAMETER_ANCHOR ":", strlen(JOURNAL_PARAMETER_ANCHOR ":")) == 0) {
- anchor = str2ull(&keyword[strlen(JOURNAL_PARAMETER_ANCHOR ":")], NULL);
+ else if(strncmp(keyword, JOURNAL_PARAMETER_DIRECTION ":", sizeof(JOURNAL_PARAMETER_DIRECTION ":") - 1) == 0) {
+ direction = strcasecmp(&keyword[sizeof(JOURNAL_PARAMETER_DIRECTION ":") - 1], "forward") == 0 ? FACETS_ANCHOR_DIRECTION_FORWARD : FACETS_ANCHOR_DIRECTION_BACKWARD;
}
- else if(strncmp(keyword, JOURNAL_PARAMETER_LAST ":", strlen(JOURNAL_PARAMETER_LAST ":")) == 0) {
- last = str2ul(&keyword[strlen(JOURNAL_PARAMETER_LAST ":")]);
+ else if(strncmp(keyword, JOURNAL_PARAMETER_LAST ":", sizeof(JOURNAL_PARAMETER_LAST ":") - 1) == 0) {
+ last = str2ul(&keyword[sizeof(JOURNAL_PARAMETER_LAST ":") - 1]);
}
- else if(strncmp(keyword, JOURNAL_PARAMETER_QUERY ":", strlen(JOURNAL_PARAMETER_QUERY ":")) == 0) {
- query= &keyword[strlen(JOURNAL_PARAMETER_QUERY ":")];
+ else if(strncmp(keyword, JOURNAL_PARAMETER_QUERY ":", sizeof(JOURNAL_PARAMETER_QUERY ":") - 1) == 0) {
+ query= &keyword[sizeof(JOURNAL_PARAMETER_QUERY ":") - 1];
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_HISTOGRAM ":", sizeof(JOURNAL_PARAMETER_HISTOGRAM ":") - 1) == 0) {
+ chart = &keyword[sizeof(JOURNAL_PARAMETER_HISTOGRAM ":") - 1];
+ }
+ else if(strncmp(keyword, JOURNAL_PARAMETER_FACETS ":", sizeof(JOURNAL_PARAMETER_FACETS ":") - 1) == 0) {
+ char *value = &keyword[sizeof(JOURNAL_PARAMETER_FACETS ":") - 1];
+ if(*value) {
+ buffer_json_member_add_array(wb, JOURNAL_PARAMETER_FACETS);
+
+ while(value) {
+ char *sep = strchr(value, ',');
+ if(sep)
+ *sep++ = '\0';
+
+ facets_register_facet_id(facets, value, FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS|FACET_KEY_OPTION_REORDER);
+ buffer_json_add_array_item_string(wb, value);
+
+ value = sep;
+ }
+
+ buffer_json_array_close(wb); // JOURNAL_PARAMETER_FACETS
+ }
}
else {
char *value = strchr(keyword, ':');
@@ -412,8 +1915,9 @@ static void function_systemd_journal(const char *transaction, char *function, ch
if(sep)
*sep++ = '\0';
- facets_register_facet_filter(facets, keyword, value, FACET_KEY_OPTION_REORDER);
+ facets_register_facet_id_filter(facets, keyword, value, FACET_KEY_OPTION_FACET|FACET_KEY_OPTION_FTS|FACET_KEY_OPTION_REORDER);
buffer_json_add_array_item_string(wb, value);
+ filters++;
value = sep;
}
@@ -423,18 +1927,31 @@ static void function_systemd_journal(const char *transaction, char *function, ch
}
}
- buffer_json_object_close(wb); // filters
+ // ------------------------------------------------------------------------
+ // put this request into the progress db
+
+ if(progress_id && *progress_id) {
+ fqs_item = dictionary_set_and_acquire_item(function_query_status_dict, progress_id, &tmp_fqs, sizeof(tmp_fqs));
+ fqs = dictionary_acquired_item_value(fqs_item);
+ }
+ else {
+ // no progress id given, proceed without registering our progress in the dictionary
+ fqs = &tmp_fqs;
+ fqs_item = NULL;
+ }
+
+ // ------------------------------------------------------------------------
+ // validate parameters
- time_t expires = now_realtime_sec() + 1;
- time_t now_s;
+ time_t now_s = now_realtime_sec();
+ time_t expires = now_s + 1;
if(!after_s && !before_s) {
- now_s = now_realtime_sec();
before_s = now_s;
after_s = before_s - SYSTEMD_JOURNAL_DEFAULT_QUERY_DURATION;
}
else
- rrdr_relative_window_to_absolute(&after_s, &before_s, &now_s, false);
+ rrdr_relative_window_to_absolute(&after_s, &before_s, now_s);
if(after_s > before_s) {
time_t tmp = after_s;
@@ -448,149 +1965,175 @@ static void function_systemd_journal(const char *transaction, char *function, ch
if(!last)
last = SYSTEMD_JOURNAL_DEFAULT_ITEMS_PER_QUERY;
- buffer_json_member_add_time_t(wb, "after", after_s);
- buffer_json_member_add_time_t(wb, "before", before_s);
- buffer_json_member_add_uint64(wb, "anchor", anchor);
- buffer_json_member_add_uint64(wb, "last", last);
- buffer_json_member_add_string(wb, "query", query);
- buffer_json_member_add_time_t(wb, "timeout", timeout);
- buffer_json_object_close(wb); // request
-
- facets_set_items(facets, last);
- facets_set_anchor(facets, anchor);
- facets_set_query(facets, query);
- int response = systemd_journal_query(wb, facets, after_s * USEC_PER_SEC, before_s * USEC_PER_SEC,
- now_monotonic_usec() + (timeout - 1) * USEC_PER_SEC);
- if(response != HTTP_RESP_OK) {
- pluginsd_function_json_error(transaction, response, "failed");
- goto cleanup;
+ // ------------------------------------------------------------------------
+ // set query time-frame, anchors and direction
+
+ fqs->after_ut = after_s * USEC_PER_SEC;
+ fqs->before_ut = (before_s * USEC_PER_SEC) + USEC_PER_SEC - 1;
+ fqs->if_modified_since = if_modified_since;
+ fqs->data_only = data_only;
+ fqs->delta = (fqs->data_only) ? delta : false;
+ fqs->tail = (fqs->data_only && fqs->if_modified_since) ? tail : false;
+ fqs->sources = sources;
+ fqs->source_type = source_type;
+ fqs->entries = last;
+ fqs->last_modified = 0;
+ fqs->filters = filters;
+ fqs->query = (query && *query) ? query : NULL;
+ fqs->histogram = (chart && *chart) ? chart : NULL;
+ fqs->direction = direction;
+ fqs->anchor.start_ut = anchor;
+ fqs->anchor.stop_ut = 0;
+ fqs->sampling = sampling;
+
+ if(fqs->anchor.start_ut && fqs->tail) {
+ // a tail request
+ // we need the top X entries from BEFORE
+ // but, we need to calculate the facets and the
+ // histogram up to the anchor
+ fqs->direction = direction = FACETS_ANCHOR_DIRECTION_BACKWARD;
+ fqs->anchor.start_ut = 0;
+ fqs->anchor.stop_ut = anchor;
}
- pluginsd_function_result_begin_to_stdout(transaction, HTTP_RESP_OK, "application/json", expires);
- fwrite(buffer_tostring(wb), buffer_strlen(wb), 1, stdout);
+ if(anchor && anchor < fqs->after_ut) {
+ log_fqs(fqs, "received anchor is too small for query timeframe, ignoring anchor");
+ anchor = 0;
+ fqs->anchor.start_ut = 0;
+ fqs->anchor.stop_ut = 0;
+ fqs->direction = direction = FACETS_ANCHOR_DIRECTION_BACKWARD;
+ }
+ else if(anchor > fqs->before_ut) {
+ log_fqs(fqs, "received anchor is too big for query timeframe, ignoring anchor");
+ anchor = 0;
+ fqs->anchor.start_ut = 0;
+ fqs->anchor.stop_ut = 0;
+ fqs->direction = direction = FACETS_ANCHOR_DIRECTION_BACKWARD;
+ }
- pluginsd_function_result_end_to_stdout();
+ facets_set_anchor(facets, fqs->anchor.start_ut, fqs->anchor.stop_ut, fqs->direction);
-cleanup:
- facets_destroy(facets);
- buffer_free(wb);
-}
+ facets_set_additional_options(facets,
+ ((fqs->data_only) ? FACETS_OPTION_DATA_ONLY : 0) |
+ ((fqs->delta) ? FACETS_OPTION_SHOW_DELTAS : 0));
-static void *reader_main(void *arg __maybe_unused) {
- char buffer[PLUGINSD_LINE_MAX + 1];
+ // ------------------------------------------------------------------------
+ // set the rest of the query parameters
- char *s = NULL;
- while(!plugin_should_exit && (s = fgets(buffer, PLUGINSD_LINE_MAX, stdin))) {
- char *words[PLUGINSD_MAX_WORDS] = { NULL };
- size_t num_words = quoted_strings_splitter_pluginsd(buffer, words, PLUGINSD_MAX_WORDS);
+ facets_set_items(facets, fqs->entries);
+ facets_set_query(facets, fqs->query);
- const char *keyword = get_word(words, num_words, 0);
+#ifdef HAVE_SD_JOURNAL_RESTART_FIELDS
+ fqs->slice = slice;
+ if(slice)
+ facets_enable_slice_mode(facets);
+#else
+ fqs->slice = false;
+#endif
- if(keyword && strcmp(keyword, PLUGINSD_KEYWORD_FUNCTION) == 0) {
- char *transaction = get_word(words, num_words, 1);
- char *timeout_s = get_word(words, num_words, 2);
- char *function = get_word(words, num_words, 3);
+ if(fqs->histogram)
+ facets_set_timeframe_and_histogram_by_id(facets, fqs->histogram, fqs->after_ut, fqs->before_ut);
+ else
+ facets_set_timeframe_and_histogram_by_name(facets, "PRIORITY", fqs->after_ut, fqs->before_ut);
- if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) {
- netdata_log_error("Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.",
- keyword,
- transaction?transaction:"(unset)",
- timeout_s?timeout_s:"(unset)",
- function?function:"(unset)");
- }
- else {
- int timeout = str2i(timeout_s);
- if(timeout <= 0) timeout = SYSTEMD_JOURNAL_DEFAULT_TIMEOUT;
- netdata_mutex_lock(&mutex);
+ // ------------------------------------------------------------------------
+ // complete the request object
+
+ buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_INFO, false);
+ buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_SLICE, fqs->slice);
+ buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_DATA_ONLY, fqs->data_only);
+ buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_PROGRESS, false);
+ buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_DELTA, fqs->delta);
+ buffer_json_member_add_boolean(wb, JOURNAL_PARAMETER_TAIL, fqs->tail);
+ buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_SAMPLING, fqs->sampling);
+ buffer_json_member_add_string(wb, JOURNAL_PARAMETER_ID, progress_id);
+ buffer_json_member_add_uint64(wb, "source_type", fqs->source_type);
+ buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_AFTER, fqs->after_ut / USEC_PER_SEC);
+ buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_BEFORE, fqs->before_ut / USEC_PER_SEC);
+ buffer_json_member_add_uint64(wb, "if_modified_since", fqs->if_modified_since);
+ buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_ANCHOR, anchor);
+ buffer_json_member_add_string(wb, JOURNAL_PARAMETER_DIRECTION, fqs->direction == FACETS_ANCHOR_DIRECTION_FORWARD ? "forward" : "backward");
+ buffer_json_member_add_uint64(wb, JOURNAL_PARAMETER_LAST, fqs->entries);
+ buffer_json_member_add_string(wb, JOURNAL_PARAMETER_QUERY, fqs->query);
+ buffer_json_member_add_string(wb, JOURNAL_PARAMETER_HISTOGRAM, fqs->histogram);
+ buffer_json_object_close(wb); // request
- if(strncmp(function, SYSTEMD_JOURNAL_FUNCTION_NAME, strlen(SYSTEMD_JOURNAL_FUNCTION_NAME)) == 0)
- function_systemd_journal(transaction, function, buffer, PLUGINSD_LINE_MAX + 1, timeout);
- else
- pluginsd_function_json_error(transaction, HTTP_RESP_NOT_FOUND, "No function with this name found in systemd-journal.plugin.");
+ buffer_json_journal_versions(wb);
- fflush(stdout);
- netdata_mutex_unlock(&mutex);
+ // ------------------------------------------------------------------------
+ // run the request
+
+ int response;
+
+ if(info) {
+ facets_accepted_parameters_to_json_array(facets, wb, false);
+ buffer_json_member_add_array(wb, "required_params");
+ {
+ buffer_json_add_array_item_object(wb);
+ {
+ buffer_json_member_add_string(wb, "id", "source");
+ buffer_json_member_add_string(wb, "name", "source");
+ buffer_json_member_add_string(wb, "help", "Select the SystemD Journal source to query");
+ buffer_json_member_add_string(wb, "type", "multiselect");
+ buffer_json_member_add_array(wb, "options");
+ {
+ available_journal_file_sources_to_json_array(wb);
+ }
+ buffer_json_array_close(wb); // options array
}
+ buffer_json_object_close(wb); // required params object
}
- else
- netdata_log_error("Received unknown command: %s", keyword?keyword:"(unset)");
- }
-
- if(!s || feof(stdin) || ferror(stdin)) {
- plugin_should_exit = true;
- netdata_log_error("Received error on stdin.");
- }
-
- exit(1);
-}
-
-int main(int argc __maybe_unused, char **argv __maybe_unused) {
- stderror = stderr;
- clocks_init();
-
- program_name = "systemd-journal.plugin";
-
- // disable syslog
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
-
- // initialize the threads
- netdata_threads_init_for_external_plugins(0); // set the default threads stack size here
-
- uids = dictionary_create(0);
- gids = dictionary_create(0);
+ buffer_json_array_close(wb); // required_params array
- // ------------------------------------------------------------------------
- // debug
+ facets_table_config(wb);
- if(argc == 2 && strcmp(argv[1], "debug") == 0) {
- char buf[] = "systemd-journal after:-86400 before:0 last:500";
- function_systemd_journal("123", buf, "", 0, 30);
- exit(1);
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_string(wb, "help", SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION);
+ buffer_json_finalize(wb);
+ response = HTTP_RESP_OK;
+ goto output;
}
- // ------------------------------------------------------------------------
+ if(progress) {
+ function_systemd_journal_progress(wb, transaction, progress_id);
+ goto cleanup;
+ }
- netdata_thread_t reader_thread;
- netdata_thread_create(&reader_thread, "SDJ_READER", NETDATA_THREAD_OPTION_DONT_LOG, reader_main, NULL);
+ response = netdata_systemd_journal_query(wb, facets, fqs);
// ------------------------------------------------------------------------
+ // handle error response
- time_t started_t = now_monotonic_sec();
-
- size_t iteration;
- usec_t step = 1000 * USEC_PER_MS;
- bool tty = isatty(fileno(stderr)) == 1;
-
- netdata_mutex_lock(&mutex);
- fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\"\n",
- SYSTEMD_JOURNAL_FUNCTION_NAME, SYSTEMD_JOURNAL_DEFAULT_TIMEOUT, SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION);
-
- heartbeat_t hb;
- heartbeat_init(&hb);
- for(iteration = 0; 1 ; iteration++) {
- netdata_mutex_unlock(&mutex);
- heartbeat_next(&hb, step);
- netdata_mutex_lock(&mutex);
+ if(response != HTTP_RESP_OK) {
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_json_error_to_stdout(transaction, response, "failed");
+ netdata_mutex_unlock(&stdout_mutex);
+ goto cleanup;
+ }
- if(!tty)
- fprintf(stdout, "\n");
+output:
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_result_to_stdout(transaction, response, "application/json", expires, wb);
+ netdata_mutex_unlock(&stdout_mutex);
- fflush(stdout);
+cleanup:
+ simple_pattern_free(sources);
+ facets_destroy(facets);
+ buffer_free(wb);
- time_t now = now_monotonic_sec();
- if(now - started_t > 86400)
- break;
+ if(fqs_item) {
+ dictionary_del(function_query_status_dict, dictionary_acquired_item_name(fqs_item));
+ dictionary_acquired_item_release(function_query_status_dict, fqs_item);
+ dictionary_garbage_collect(function_query_status_dict);
}
+}
- dictionary_destroy(uids);
- dictionary_destroy(gids);
-
- exit(0);
+void journal_init_query_status(void) {
+ function_query_status_dict = dictionary_create_advanced(
+ DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE,
+ NULL, sizeof(FUNCTION_QUERY_STATUS));
}
diff --git a/collectors/systemd-journal.plugin/systemd-main.c b/collectors/systemd-journal.plugin/systemd-main.c
new file mode 100644
index 00000000000000..d335fd82befb3c
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-main.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "systemd-internals.h"
+#include "libnetdata/required_dummies.h"
+
+#define SYSTEMD_JOURNAL_WORKER_THREADS 5
+
+netdata_mutex_t stdout_mutex = NETDATA_MUTEX_INITIALIZER;
+static bool plugin_should_exit = false;
+
+int main(int argc __maybe_unused, char **argv __maybe_unused) {
+ clocks_init();
+ netdata_thread_set_tag("SDMAIN");
+ nd_log_initialize_for_external_plugins("systemd-journal.plugin");
+
+ netdata_configured_host_prefix = getenv("NETDATA_HOST_PREFIX");
+ if(verify_netdata_host_prefix() == -1) exit(1);
+
+ // ------------------------------------------------------------------------
+ // initialization
+
+ netdata_systemd_journal_message_ids_init();
+ journal_init_query_status();
+ journal_init_files_and_directories();
+
+ // ------------------------------------------------------------------------
+ // debug
+
+ if(argc == 2 && strcmp(argv[1], "debug") == 0) {
+ journal_files_registry_update();
+
+ bool cancelled = false;
+ char buf[] = "systemd-journal after:-8640000 before:0 direction:backward last:200 data_only:false slice:true source:all";
+ // char buf[] = "systemd-journal after:1695332964 before:1695937764 direction:backward last:100 slice:true source:all DHKucpqUoe1:PtVoyIuX.MU";
+ // char buf[] = "systemd-journal after:1694511062 before:1694514662 anchor:1694514122024403";
+ function_systemd_journal("123", buf, 600, &cancelled);
+// function_systemd_units("123", "systemd-units", 600, &cancelled);
+ exit(1);
+ }
+#ifdef ENABLE_SYSTEMD_DBUS
+ if(argc == 2 && strcmp(argv[1], "debug-units") == 0) {
+ bool cancelled = false;
+ function_systemd_units("123", "systemd-units", 600, &cancelled);
+ exit(1);
+ }
+#endif
+
+ // ------------------------------------------------------------------------
+ // watcher thread
+
+ netdata_thread_t watcher_thread;
+ netdata_thread_create(&watcher_thread, "SDWATCH",
+ NETDATA_THREAD_OPTION_DONT_LOG, journal_watcher_main, NULL);
+
+ // ------------------------------------------------------------------------
+ // the event loop for functions
+
+ struct functions_evloop_globals *wg =
+ functions_evloop_init(SYSTEMD_JOURNAL_WORKER_THREADS, "SDJ", &stdout_mutex, &plugin_should_exit);
+
+ functions_evloop_add_function(wg, SYSTEMD_JOURNAL_FUNCTION_NAME, function_systemd_journal,
+ SYSTEMD_JOURNAL_DEFAULT_TIMEOUT);
+
+#ifdef ENABLE_SYSTEMD_DBUS
+ functions_evloop_add_function(wg, SYSTEMD_UNITS_FUNCTION_NAME, function_systemd_units,
+ SYSTEMD_UNITS_DEFAULT_TIMEOUT);
+#endif
+
+ // ------------------------------------------------------------------------
+ // register functions to netdata
+
+ netdata_mutex_lock(&stdout_mutex);
+
+ fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\"\n",
+ SYSTEMD_JOURNAL_FUNCTION_NAME, SYSTEMD_JOURNAL_DEFAULT_TIMEOUT, SYSTEMD_JOURNAL_FUNCTION_DESCRIPTION);
+
+#ifdef ENABLE_SYSTEMD_DBUS
+ fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\"\n",
+ SYSTEMD_UNITS_FUNCTION_NAME, SYSTEMD_UNITS_DEFAULT_TIMEOUT, SYSTEMD_UNITS_FUNCTION_DESCRIPTION);
+#endif
+
+ fflush(stdout);
+ netdata_mutex_unlock(&stdout_mutex);
+
+ // ------------------------------------------------------------------------
+
+ usec_t step_ut = 100 * USEC_PER_MS;
+ usec_t send_newline_ut = 0;
+ usec_t since_last_scan_ut = SYSTEMD_JOURNAL_ALL_FILES_SCAN_EVERY_USEC * 2; // something big to trigger scanning at start
+ bool tty = isatty(fileno(stderr)) == 1;
+
+ heartbeat_t hb;
+ heartbeat_init(&hb);
+ while(!plugin_should_exit) {
+
+ if(since_last_scan_ut > SYSTEMD_JOURNAL_ALL_FILES_SCAN_EVERY_USEC) {
+ journal_files_registry_update();
+ since_last_scan_ut = 0;
+ }
+
+ usec_t dt_ut = heartbeat_next(&hb, step_ut);
+ since_last_scan_ut += dt_ut;
+ send_newline_ut += dt_ut;
+
+ if(!tty && send_newline_ut > USEC_PER_SEC) {
+ send_newline_and_flush();
+ send_newline_ut = 0;
+ }
+ }
+
+ exit(0);
+}
diff --git a/collectors/systemd-journal.plugin/systemd-units.c b/collectors/systemd-journal.plugin/systemd-units.c
new file mode 100644
index 00000000000000..dac15881748ab3
--- /dev/null
+++ b/collectors/systemd-journal.plugin/systemd-units.c
@@ -0,0 +1,1965 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "systemd-internals.h"
+
+#ifdef ENABLE_SYSTEMD_DBUS
+#include
+
+#define SYSTEMD_UNITS_MAX_PARAMS 10
+#define SYSTEMD_UNITS_DBUS_TYPES "(ssssssouso)"
+
+// ----------------------------------------------------------------------------
+// copied from systemd: string-table.h
+
+typedef char sd_char;
+#define XCONCATENATE(x, y) x ## y
+#define CONCATENATE(x, y) XCONCATENATE(x, y)
+
+#ifndef __COVERITY__
+# define VOID_0 ((void)0)
+#else
+# define VOID_0 ((void*)0)
+#endif
+
+#define ELEMENTSOF(x) \
+ (__builtin_choose_expr( \
+ !__builtin_types_compatible_p(typeof(x), typeof(&*(x))), \
+ sizeof(x)/sizeof((x)[0]), \
+ VOID_0))
+
+#define UNIQ_T(x, uniq) CONCATENATE(__unique_prefix_, CONCATENATE(x, uniq))
+#define UNIQ __COUNTER__
+#define __CMP(aq, a, bq, b) \
+ ({ \
+ const typeof(a) UNIQ_T(A, aq) = (a); \
+ const typeof(b) UNIQ_T(B, bq) = (b); \
+ UNIQ_T(A, aq) < UNIQ_T(B, bq) ? -1 : \
+ UNIQ_T(A, aq) > UNIQ_T(B, bq) ? 1 : 0; \
+ })
+#define CMP(a, b) __CMP(UNIQ, (a), UNIQ, (b))
+
+static inline int strcmp_ptr(const sd_char *a, const sd_char *b) {
+ if (a && b)
+ return strcmp(a, b);
+
+ return CMP(a, b);
+}
+
+static inline bool streq_ptr(const sd_char *a, const sd_char *b) {
+ return strcmp_ptr(a, b) == 0;
+}
+
+ssize_t string_table_lookup(const char * const *table, size_t len, const char *key) {
+ if (!key || !*key)
+ return -EINVAL;
+
+ for (size_t i = 0; i < len; ++i)
+ if (streq_ptr(table[i], key))
+ return (ssize_t) i;
+
+ return -EINVAL;
+}
+
+/* For basic lookup tables with strictly enumerated entries */
+#define _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ scope const char *name##_to_string(type i) { \
+ if (i < 0 || i >= (type) ELEMENTSOF(name##_table)) \
+ return NULL; \
+ return name##_table[i]; \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope) \
+ scope type name##_from_string(const char *s) { \
+ return (type) string_table_lookup(name##_table, ELEMENTSOF(name##_table), s); \
+ }
+
+#define _DEFINE_STRING_TABLE_LOOKUP(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_TO_STRING(name,type,scope) \
+ _DEFINE_STRING_TABLE_LOOKUP_FROM_STRING(name,type,scope)
+
+#define DEFINE_STRING_TABLE_LOOKUP(name,type) _DEFINE_STRING_TABLE_LOOKUP(name,type,)
+
+// ----------------------------------------------------------------------------
+// copied from systemd: unit-def.h
+
+typedef enum UnitType {
+ UNIT_SERVICE,
+ UNIT_MOUNT,
+ UNIT_SWAP,
+ UNIT_SOCKET,
+ UNIT_TARGET,
+ UNIT_DEVICE,
+ UNIT_AUTOMOUNT,
+ UNIT_TIMER,
+ UNIT_PATH,
+ UNIT_SLICE,
+ UNIT_SCOPE,
+ _UNIT_TYPE_MAX,
+ _UNIT_TYPE_INVALID = -EINVAL,
+} UnitType;
+
+typedef enum UnitLoadState {
+ UNIT_STUB,
+ UNIT_LOADED,
+ UNIT_NOT_FOUND, /* error condition #1: unit file not found */
+ UNIT_BAD_SETTING, /* error condition #2: we couldn't parse some essential unit file setting */
+ UNIT_ERROR, /* error condition #3: other "system" error, catchall for the rest */
+ UNIT_MERGED,
+ UNIT_MASKED,
+ _UNIT_LOAD_STATE_MAX,
+ _UNIT_LOAD_STATE_INVALID = -EINVAL,
+} UnitLoadState;
+
+typedef enum UnitActiveState {
+ UNIT_ACTIVE,
+ UNIT_RELOADING,
+ UNIT_INACTIVE,
+ UNIT_FAILED,
+ UNIT_ACTIVATING,
+ UNIT_DEACTIVATING,
+ UNIT_MAINTENANCE,
+ _UNIT_ACTIVE_STATE_MAX,
+ _UNIT_ACTIVE_STATE_INVALID = -EINVAL,
+} UnitActiveState;
+
+typedef enum AutomountState {
+ AUTOMOUNT_DEAD,
+ AUTOMOUNT_WAITING,
+ AUTOMOUNT_RUNNING,
+ AUTOMOUNT_FAILED,
+ _AUTOMOUNT_STATE_MAX,
+ _AUTOMOUNT_STATE_INVALID = -EINVAL,
+} AutomountState;
+
+typedef enum DeviceState {
+ DEVICE_DEAD,
+ DEVICE_TENTATIVE, /* mounted or swapped, but not (yet) announced by udev */
+ DEVICE_PLUGGED, /* announced by udev */
+ _DEVICE_STATE_MAX,
+ _DEVICE_STATE_INVALID = -EINVAL,
+} DeviceState;
+
+typedef enum MountState {
+ MOUNT_DEAD,
+ MOUNT_MOUNTING, /* /usr/bin/mount is running, but the mount is not done yet. */
+ MOUNT_MOUNTING_DONE, /* /usr/bin/mount is running, and the mount is done. */
+ MOUNT_MOUNTED,
+ MOUNT_REMOUNTING,
+ MOUNT_UNMOUNTING,
+ MOUNT_REMOUNTING_SIGTERM,
+ MOUNT_REMOUNTING_SIGKILL,
+ MOUNT_UNMOUNTING_SIGTERM,
+ MOUNT_UNMOUNTING_SIGKILL,
+ MOUNT_FAILED,
+ MOUNT_CLEANING,
+ _MOUNT_STATE_MAX,
+ _MOUNT_STATE_INVALID = -EINVAL,
+} MountState;
+
+typedef enum PathState {
+ PATH_DEAD,
+ PATH_WAITING,
+ PATH_RUNNING,
+ PATH_FAILED,
+ _PATH_STATE_MAX,
+ _PATH_STATE_INVALID = -EINVAL,
+} PathState;
+
+typedef enum ScopeState {
+ SCOPE_DEAD,
+ SCOPE_START_CHOWN,
+ SCOPE_RUNNING,
+ SCOPE_ABANDONED,
+ SCOPE_STOP_SIGTERM,
+ SCOPE_STOP_SIGKILL,
+ SCOPE_FAILED,
+ _SCOPE_STATE_MAX,
+ _SCOPE_STATE_INVALID = -EINVAL,
+} ScopeState;
+
+typedef enum ServiceState {
+ SERVICE_DEAD,
+ SERVICE_CONDITION,
+ SERVICE_START_PRE,
+ SERVICE_START,
+ SERVICE_START_POST,
+ SERVICE_RUNNING,
+ SERVICE_EXITED, /* Nothing is running anymore, but RemainAfterExit is true hence this is OK */
+ SERVICE_RELOAD, /* Reloading via ExecReload= */
+ SERVICE_RELOAD_SIGNAL, /* Reloading via SIGHUP requested */
+ SERVICE_RELOAD_NOTIFY, /* Waiting for READY=1 after RELOADING=1 notify */
+ SERVICE_STOP, /* No STOP_PRE state, instead just register multiple STOP executables */
+ SERVICE_STOP_WATCHDOG,
+ SERVICE_STOP_SIGTERM,
+ SERVICE_STOP_SIGKILL,
+ SERVICE_STOP_POST,
+ SERVICE_FINAL_WATCHDOG, /* In case the STOP_POST executable needs to be aborted. */
+ SERVICE_FINAL_SIGTERM, /* In case the STOP_POST executable hangs, we shoot that down, too */
+ SERVICE_FINAL_SIGKILL,
+ SERVICE_FAILED,
+ SERVICE_DEAD_BEFORE_AUTO_RESTART,
+ SERVICE_FAILED_BEFORE_AUTO_RESTART,
+ SERVICE_DEAD_RESOURCES_PINNED, /* Like SERVICE_DEAD, but with pinned resources */
+ SERVICE_AUTO_RESTART,
+ SERVICE_AUTO_RESTART_QUEUED,
+ SERVICE_CLEANING,
+ _SERVICE_STATE_MAX,
+ _SERVICE_STATE_INVALID = -EINVAL,
+} ServiceState;
+
+typedef enum SliceState {
+ SLICE_DEAD,
+ SLICE_ACTIVE,
+ _SLICE_STATE_MAX,
+ _SLICE_STATE_INVALID = -EINVAL,
+} SliceState;
+
+typedef enum SocketState {
+ SOCKET_DEAD,
+ SOCKET_START_PRE,
+ SOCKET_START_CHOWN,
+ SOCKET_START_POST,
+ SOCKET_LISTENING,
+ SOCKET_RUNNING,
+ SOCKET_STOP_PRE,
+ SOCKET_STOP_PRE_SIGTERM,
+ SOCKET_STOP_PRE_SIGKILL,
+ SOCKET_STOP_POST,
+ SOCKET_FINAL_SIGTERM,
+ SOCKET_FINAL_SIGKILL,
+ SOCKET_FAILED,
+ SOCKET_CLEANING,
+ _SOCKET_STATE_MAX,
+ _SOCKET_STATE_INVALID = -EINVAL,
+} SocketState;
+
+typedef enum SwapState {
+ SWAP_DEAD,
+ SWAP_ACTIVATING, /* /sbin/swapon is running, but the swap not yet enabled. */
+ SWAP_ACTIVATING_DONE, /* /sbin/swapon is running, and the swap is done. */
+ SWAP_ACTIVE,
+ SWAP_DEACTIVATING,
+ SWAP_DEACTIVATING_SIGTERM,
+ SWAP_DEACTIVATING_SIGKILL,
+ SWAP_FAILED,
+ SWAP_CLEANING,
+ _SWAP_STATE_MAX,
+ _SWAP_STATE_INVALID = -EINVAL,
+} SwapState;
+
+typedef enum TargetState {
+ TARGET_DEAD,
+ TARGET_ACTIVE,
+ _TARGET_STATE_MAX,
+ _TARGET_STATE_INVALID = -EINVAL,
+} TargetState;
+
+typedef enum TimerState {
+ TIMER_DEAD,
+ TIMER_WAITING,
+ TIMER_RUNNING,
+ TIMER_ELAPSED,
+ TIMER_FAILED,
+ _TIMER_STATE_MAX,
+ _TIMER_STATE_INVALID = -EINVAL,
+} TimerState;
+
+typedef enum FreezerState {
+ FREEZER_RUNNING,
+ FREEZER_FREEZING,
+ FREEZER_FROZEN,
+ FREEZER_THAWING,
+ _FREEZER_STATE_MAX,
+ _FREEZER_STATE_INVALID = -EINVAL,
+} FreezerState;
+
+// ----------------------------------------------------------------------------
+// copied from systemd: unit-def.c
+
+static const char* const unit_type_table[_UNIT_TYPE_MAX] = {
+ [UNIT_SERVICE] = "service",
+ [UNIT_SOCKET] = "socket",
+ [UNIT_TARGET] = "target",
+ [UNIT_DEVICE] = "device",
+ [UNIT_MOUNT] = "mount",
+ [UNIT_AUTOMOUNT] = "automount",
+ [UNIT_SWAP] = "swap",
+ [UNIT_TIMER] = "timer",
+ [UNIT_PATH] = "path",
+ [UNIT_SLICE] = "slice",
+ [UNIT_SCOPE] = "scope",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_type, UnitType);
+
+static const char* const unit_load_state_table[_UNIT_LOAD_STATE_MAX] = {
+ [UNIT_STUB] = "stub",
+ [UNIT_LOADED] = "loaded",
+ [UNIT_NOT_FOUND] = "not-found",
+ [UNIT_BAD_SETTING] = "bad-setting",
+ [UNIT_ERROR] = "error",
+ [UNIT_MERGED] = "merged",
+ [UNIT_MASKED] = "masked"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_load_state, UnitLoadState);
+
+static const char* const unit_active_state_table[_UNIT_ACTIVE_STATE_MAX] = {
+ [UNIT_ACTIVE] = "active",
+ [UNIT_RELOADING] = "reloading",
+ [UNIT_INACTIVE] = "inactive",
+ [UNIT_FAILED] = "failed",
+ [UNIT_ACTIVATING] = "activating",
+ [UNIT_DEACTIVATING] = "deactivating",
+ [UNIT_MAINTENANCE] = "maintenance",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(unit_active_state, UnitActiveState);
+
+static const char* const automount_state_table[_AUTOMOUNT_STATE_MAX] = {
+ [AUTOMOUNT_DEAD] = "dead",
+ [AUTOMOUNT_WAITING] = "waiting",
+ [AUTOMOUNT_RUNNING] = "running",
+ [AUTOMOUNT_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(automount_state, AutomountState);
+
+static const char* const device_state_table[_DEVICE_STATE_MAX] = {
+ [DEVICE_DEAD] = "dead",
+ [DEVICE_TENTATIVE] = "tentative",
+ [DEVICE_PLUGGED] = "plugged",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(device_state, DeviceState);
+
+static const char* const mount_state_table[_MOUNT_STATE_MAX] = {
+ [MOUNT_DEAD] = "dead",
+ [MOUNT_MOUNTING] = "mounting",
+ [MOUNT_MOUNTING_DONE] = "mounting-done",
+ [MOUNT_MOUNTED] = "mounted",
+ [MOUNT_REMOUNTING] = "remounting",
+ [MOUNT_UNMOUNTING] = "unmounting",
+ [MOUNT_REMOUNTING_SIGTERM] = "remounting-sigterm",
+ [MOUNT_REMOUNTING_SIGKILL] = "remounting-sigkill",
+ [MOUNT_UNMOUNTING_SIGTERM] = "unmounting-sigterm",
+ [MOUNT_UNMOUNTING_SIGKILL] = "unmounting-sigkill",
+ [MOUNT_FAILED] = "failed",
+ [MOUNT_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(mount_state, MountState);
+
+static const char* const path_state_table[_PATH_STATE_MAX] = {
+ [PATH_DEAD] = "dead",
+ [PATH_WAITING] = "waiting",
+ [PATH_RUNNING] = "running",
+ [PATH_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(path_state, PathState);
+
+static const char* const scope_state_table[_SCOPE_STATE_MAX] = {
+ [SCOPE_DEAD] = "dead",
+ [SCOPE_START_CHOWN] = "start-chown",
+ [SCOPE_RUNNING] = "running",
+ [SCOPE_ABANDONED] = "abandoned",
+ [SCOPE_STOP_SIGTERM] = "stop-sigterm",
+ [SCOPE_STOP_SIGKILL] = "stop-sigkill",
+ [SCOPE_FAILED] = "failed",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(scope_state, ScopeState);
+
+static const char* const service_state_table[_SERVICE_STATE_MAX] = {
+ [SERVICE_DEAD] = "dead",
+ [SERVICE_CONDITION] = "condition",
+ [SERVICE_START_PRE] = "start-pre",
+ [SERVICE_START] = "start",
+ [SERVICE_START_POST] = "start-post",
+ [SERVICE_RUNNING] = "running",
+ [SERVICE_EXITED] = "exited",
+ [SERVICE_RELOAD] = "reload",
+ [SERVICE_RELOAD_SIGNAL] = "reload-signal",
+ [SERVICE_RELOAD_NOTIFY] = "reload-notify",
+ [SERVICE_STOP] = "stop",
+ [SERVICE_STOP_WATCHDOG] = "stop-watchdog",
+ [SERVICE_STOP_SIGTERM] = "stop-sigterm",
+ [SERVICE_STOP_SIGKILL] = "stop-sigkill",
+ [SERVICE_STOP_POST] = "stop-post",
+ [SERVICE_FINAL_WATCHDOG] = "final-watchdog",
+ [SERVICE_FINAL_SIGTERM] = "final-sigterm",
+ [SERVICE_FINAL_SIGKILL] = "final-sigkill",
+ [SERVICE_FAILED] = "failed",
+ [SERVICE_DEAD_BEFORE_AUTO_RESTART] = "dead-before-auto-restart",
+ [SERVICE_FAILED_BEFORE_AUTO_RESTART] = "failed-before-auto-restart",
+ [SERVICE_DEAD_RESOURCES_PINNED] = "dead-resources-pinned",
+ [SERVICE_AUTO_RESTART] = "auto-restart",
+ [SERVICE_AUTO_RESTART_QUEUED] = "auto-restart-queued",
+ [SERVICE_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(service_state, ServiceState);
+
+static const char* const slice_state_table[_SLICE_STATE_MAX] = {
+ [SLICE_DEAD] = "dead",
+ [SLICE_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(slice_state, SliceState);
+
+static const char* const socket_state_table[_SOCKET_STATE_MAX] = {
+ [SOCKET_DEAD] = "dead",
+ [SOCKET_START_PRE] = "start-pre",
+ [SOCKET_START_CHOWN] = "start-chown",
+ [SOCKET_START_POST] = "start-post",
+ [SOCKET_LISTENING] = "listening",
+ [SOCKET_RUNNING] = "running",
+ [SOCKET_STOP_PRE] = "stop-pre",
+ [SOCKET_STOP_PRE_SIGTERM] = "stop-pre-sigterm",
+ [SOCKET_STOP_PRE_SIGKILL] = "stop-pre-sigkill",
+ [SOCKET_STOP_POST] = "stop-post",
+ [SOCKET_FINAL_SIGTERM] = "final-sigterm",
+ [SOCKET_FINAL_SIGKILL] = "final-sigkill",
+ [SOCKET_FAILED] = "failed",
+ [SOCKET_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(socket_state, SocketState);
+
+static const char* const swap_state_table[_SWAP_STATE_MAX] = {
+ [SWAP_DEAD] = "dead",
+ [SWAP_ACTIVATING] = "activating",
+ [SWAP_ACTIVATING_DONE] = "activating-done",
+ [SWAP_ACTIVE] = "active",
+ [SWAP_DEACTIVATING] = "deactivating",
+ [SWAP_DEACTIVATING_SIGTERM] = "deactivating-sigterm",
+ [SWAP_DEACTIVATING_SIGKILL] = "deactivating-sigkill",
+ [SWAP_FAILED] = "failed",
+ [SWAP_CLEANING] = "cleaning",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(swap_state, SwapState);
+
+static const char* const target_state_table[_TARGET_STATE_MAX] = {
+ [TARGET_DEAD] = "dead",
+ [TARGET_ACTIVE] = "active"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(target_state, TargetState);
+
+static const char* const timer_state_table[_TIMER_STATE_MAX] = {
+ [TIMER_DEAD] = "dead",
+ [TIMER_WAITING] = "waiting",
+ [TIMER_RUNNING] = "running",
+ [TIMER_ELAPSED] = "elapsed",
+ [TIMER_FAILED] = "failed"
+};
+
+DEFINE_STRING_TABLE_LOOKUP(timer_state, TimerState);
+
+static const char* const freezer_state_table[_FREEZER_STATE_MAX] = {
+ [FREEZER_RUNNING] = "running",
+ [FREEZER_FREEZING] = "freezing",
+ [FREEZER_FROZEN] = "frozen",
+ [FREEZER_THAWING] = "thawing",
+};
+
+DEFINE_STRING_TABLE_LOOKUP(freezer_state, FreezerState);
+
+// ----------------------------------------------------------------------------
+// our code
+
+typedef struct UnitAttribute {
+ union {
+ int boolean;
+ char *str;
+ uint64_t uint64;
+ int64_t int64;
+ uint32_t uint32;
+ int32_t int32;
+ double dbl;
+ };
+} UnitAttribute;
+
+struct UnitInfo;
+typedef void (*attribute_handler_t)(struct UnitInfo *u, UnitAttribute *ua);
+
+static void update_freezer_state(struct UnitInfo *u, UnitAttribute *ua);
+
+struct {
+ const char *member;
+ char value_type;
+
+ const char *show_as;
+ const char *info;
+ RRDF_FIELD_OPTIONS options;
+ RRDF_FIELD_FILTER filter;
+
+ attribute_handler_t handler;
+} unit_attributes[] = {
+ {
+ .member = "Type",
+ .value_type = SD_BUS_TYPE_STRING,
+ .show_as = "ServiceType",
+ .info = "Service Type",
+ .options = RRDF_FIELD_OPTS_VISIBLE,
+ .filter = RRDF_FIELD_FILTER_MULTISELECT,
+ }, {
+ .member = "Result",
+ .value_type = SD_BUS_TYPE_STRING,
+ .show_as = "Result",
+ .info = "Result",
+ .options = RRDF_FIELD_OPTS_VISIBLE,
+ .filter = RRDF_FIELD_FILTER_MULTISELECT,
+ }, {
+ .member = "UnitFileState",
+ .value_type = SD_BUS_TYPE_STRING,
+ .show_as = "Enabled",
+ .info = "Unit File State",
+ .options = RRDF_FIELD_OPTS_NONE,
+ .filter = RRDF_FIELD_FILTER_MULTISELECT,
+ }, {
+ .member = "UnitFilePreset",
+ .value_type = SD_BUS_TYPE_STRING,
+ .show_as = "Preset",
+ .info = "Unit File Preset",
+ .options = RRDF_FIELD_OPTS_NONE,
+ .filter = RRDF_FIELD_FILTER_MULTISELECT,
+ }, {
+ .member = "FreezerState",
+ .value_type = SD_BUS_TYPE_STRING,
+ .show_as = "FreezerState",
+ .info = "Freezer State",
+ .options = RRDF_FIELD_OPTS_NONE,
+ .filter = RRDF_FIELD_FILTER_MULTISELECT,
+ .handler = update_freezer_state,
+ },
+// { .member = "Id", .signature = "s", },
+// { .member = "LoadState", .signature = "s", },
+// { .member = "ActiveState", .signature = "s", },
+// { .member = "SubState", .signature = "s", },
+// { .member = "Description", .signature = "s", },
+// { .member = "Following", .signature = "s", },
+// { .member = "Documentation", .signature = "as", },
+// { .member = "FragmentPath", .signature = "s", },
+// { .member = "SourcePath", .signature = "s", },
+// { .member = "ControlGroup", .signature = "s", },
+// { .member = "DropInPaths", .signature = "as", },
+// { .member = "LoadError", .signature = "(ss)", },
+// { .member = "TriggeredBy", .signature = "as", },
+// { .member = "Triggers", .signature = "as", },
+// { .member = "InactiveExitTimestamp", .signature = "t", },
+// { .member = "InactiveExitTimestampMonotonic", .signature = "t", },
+// { .member = "ActiveEnterTimestamp", .signature = "t", },
+// { .member = "ActiveExitTimestamp", .signature = "t", },
+// { .member = "RuntimeMaxUSec", .signature = "t", },
+// { .member = "InactiveEnterTimestamp", .signature = "t", },
+// { .member = "NeedDaemonReload", .signature = "b", },
+// { .member = "Transient", .signature = "b", },
+// { .member = "ExecMainPID", .signature = "u", },
+// { .member = "MainPID", .signature = "u", },
+// { .member = "ControlPID", .signature = "u", },
+// { .member = "StatusText", .signature = "s", },
+// { .member = "PIDFile", .signature = "s", },
+// { .member = "StatusErrno", .signature = "i", },
+// { .member = "FileDescriptorStoreMax", .signature = "u", },
+// { .member = "NFileDescriptorStore", .signature = "u", },
+// { .member = "ExecMainStartTimestamp", .signature = "t", },
+// { .member = "ExecMainExitTimestamp", .signature = "t", },
+// { .member = "ExecMainCode", .signature = "i", },
+// { .member = "ExecMainStatus", .signature = "i", },
+// { .member = "LogNamespace", .signature = "s", },
+// { .member = "ConditionTimestamp", .signature = "t", },
+// { .member = "ConditionResult", .signature = "b", },
+// { .member = "Conditions", .signature = "a(sbbsi)", },
+// { .member = "AssertTimestamp", .signature = "t", },
+// { .member = "AssertResult", .signature = "b", },
+// { .member = "Asserts", .signature = "a(sbbsi)", },
+// { .member = "NextElapseUSecRealtime", .signature = "t", },
+// { .member = "NextElapseUSecMonotonic", .signature = "t", },
+// { .member = "NAccepted", .signature = "u", },
+// { .member = "NConnections", .signature = "u", },
+// { .member = "NRefused", .signature = "u", },
+// { .member = "Accept", .signature = "b", },
+// { .member = "Listen", .signature = "a(ss)", },
+// { .member = "SysFSPath", .signature = "s", },
+// { .member = "Where", .signature = "s", },
+// { .member = "What", .signature = "s", },
+// { .member = "MemoryCurrent", .signature = "t", },
+// { .member = "MemoryAvailable", .signature = "t", },
+// { .member = "DefaultMemoryMin", .signature = "t", },
+// { .member = "DefaultMemoryLow", .signature = "t", },
+// { .member = "DefaultStartupMemoryLow", .signature = "t", },
+// { .member = "MemoryMin", .signature = "t", },
+// { .member = "MemoryLow", .signature = "t", },
+// { .member = "StartupMemoryLow", .signature = "t", },
+// { .member = "MemoryHigh", .signature = "t", },
+// { .member = "StartupMemoryHigh", .signature = "t", },
+// { .member = "MemoryMax", .signature = "t", },
+// { .member = "StartupMemoryMax", .signature = "t", },
+// { .member = "MemorySwapMax", .signature = "t", },
+// { .member = "StartupMemorySwapMax", .signature = "t", },
+// { .member = "MemoryZSwapMax", .signature = "t", },
+// { .member = "StartupMemoryZSwapMax", .signature = "t", },
+// { .member = "MemoryLimit", .signature = "t", },
+// { .member = "CPUUsageNSec", .signature = "t", },
+// { .member = "TasksCurrent", .signature = "t", },
+// { .member = "TasksMax", .signature = "t", },
+// { .member = "IPIngressBytes", .signature = "t", },
+// { .member = "IPEgressBytes", .signature = "t", },
+// { .member = "IOReadBytes", .signature = "t", },
+// { .member = "IOWriteBytes", .signature = "t", },
+// { .member = "ExecCondition", .signature = "a(sasbttttuii)", },
+// { .member = "ExecConditionEx", .signature = "a(sasasttttuii)", },
+// { .member = "ExecStartPre", .signature = "a(sasbttttuii)", },
+// { .member = "ExecStartPreEx", .signature = "a(sasasttttuii)", },
+// { .member = "ExecStart", .signature = "a(sasbttttuii)", },
+// { .member = "ExecStartEx", .signature = "a(sasasttttuii)", },
+// { .member = "ExecStartPost", .signature = "a(sasbttttuii)", },
+// { .member = "ExecStartPostEx", .signature = "a(sasasttttuii)", },
+// { .member = "ExecReload", .signature = "a(sasbttttuii)", },
+// { .member = "ExecReloadEx", .signature = "a(sasasttttuii)", },
+// { .member = "ExecStopPre", .signature = "a(sasbttttuii)", },
+// { .member = "ExecStop", .signature = "a(sasbttttuii)", },
+// { .member = "ExecStopEx", .signature = "a(sasasttttuii)", },
+// { .member = "ExecStopPost", .signature = "a(sasbttttuii)", },
+// { .member = "ExecStopPostEx", .signature = "a(sasasttttuii)", },
+};
+
+#define _UNIT_ATTRIBUTE_MAX (sizeof(unit_attributes) / sizeof(unit_attributes[0]))
+
+typedef struct UnitInfo {
+ char *id;
+ char *type;
+ char *description;
+ char *load_state;
+ char *active_state;
+ char *sub_state;
+ char *following;
+ char *unit_path;
+ uint32_t job_id;
+ char *job_type;
+ char *job_path;
+
+ UnitType UnitType;
+ UnitLoadState UnitLoadState;
+ UnitActiveState UnitActiveState;
+ FreezerState FreezerState;
+
+ union {
+ AutomountState AutomountState;
+ DeviceState DeviceState;
+ MountState MountState;
+ PathState PathState;
+ ScopeState ScopeState;
+ ServiceState ServiceState;
+ SliceState SliceState;
+ SocketState SocketState;
+ SwapState SwapState;
+ TargetState TargetState;
+ TimerState TimerState;
+ };
+
+ struct UnitAttribute attributes[_UNIT_ATTRIBUTE_MAX];
+
+ FACET_ROW_SEVERITY severity;
+ uint32_t prio;
+
+ struct UnitInfo *prev, *next;
+} UnitInfo;
+
+static void update_freezer_state(UnitInfo *u, UnitAttribute *ua) {
+ u->FreezerState = freezer_state_from_string(ua->str);
+}
+
+// ----------------------------------------------------------------------------
+// common helpers
+
+static void log_dbus_error(int r, const char *msg) {
+ netdata_log_error("SYSTEMD_UNITS: %s failed with error %d (%s)", msg, r, strerror(-r));
+}
+
+// ----------------------------------------------------------------------------
+// attributes management
+
+static inline ssize_t unit_property_slot_from_string(const char *s) {
+ if(!s || !*s)
+ return -EINVAL;
+
+ for(size_t i = 0; i < _UNIT_ATTRIBUTE_MAX ;i++)
+ if(streq_ptr(unit_attributes[i].member, s))
+ return (ssize_t)i;
+
+ return -EINVAL;
+}
+
+static inline const char *unit_property_name_to_string_from_slot(ssize_t i) {
+ if(i >= 0 && i < (ssize_t)_UNIT_ATTRIBUTE_MAX)
+ return unit_attributes[i].member;
+
+ return NULL;
+}
+
+static inline void systemd_unit_free_property(char type, struct UnitAttribute *at) {
+ switch(type) {
+ case SD_BUS_TYPE_STRING:
+ case SD_BUS_TYPE_OBJECT_PATH:
+ freez(at->str);
+ at->str = NULL;
+ break;
+
+ default:
+ break;
+ }
+}
+
+static int systemd_unit_get_property(sd_bus_message *m, UnitInfo *u, const char *name) {
+ int r;
+ char type;
+
+ r = sd_bus_message_peek_type(m, &type, NULL);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_peek_type()");
+ return r;
+ }
+
+ ssize_t slot = unit_property_slot_from_string(name);
+ if(slot < 0) {
+ // internal_error(true, "unused attribute '%s' for unit '%s'", name, u->id);
+ sd_bus_message_skip(m, NULL);
+ return 0;
+ }
+
+ systemd_unit_free_property(unit_attributes[slot].value_type, &u->attributes[slot]);
+
+ if(unit_attributes[slot].value_type != type) {
+ netdata_log_error("Type of field '%s' expected to be '%c' but found '%c'. Ignoring field.",
+ unit_attributes[slot].member, unit_attributes[slot].value_type, type);
+ sd_bus_message_skip(m, NULL);
+ return 0;
+ }
+
+ switch (type) {
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_STRING: {
+ char *s;
+
+ r = sd_bus_message_read_basic(m, type, &s);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+
+ if(s && *s)
+ u->attributes[slot].str = strdupz(s);
+ }
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN: {
+ r = sd_bus_message_read_basic(m, type, &u->attributes[slot].boolean);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+ }
+ break;
+
+ case SD_BUS_TYPE_UINT64: {
+ r = sd_bus_message_read_basic(m, type, &u->attributes[slot].uint64);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+ }
+ break;
+
+ case SD_BUS_TYPE_INT64: {
+ r = sd_bus_message_read_basic(m, type, &u->attributes[slot].int64);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+ }
+ break;
+
+ case SD_BUS_TYPE_UINT32: {
+ r = sd_bus_message_read_basic(m, type, &u->attributes[slot].uint32);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+ }
+ break;
+
+ case SD_BUS_TYPE_INT32: {
+ r = sd_bus_message_read_basic(m, type, &u->attributes[slot].int32);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+ }
+ break;
+
+ case SD_BUS_TYPE_DOUBLE: {
+ r = sd_bus_message_read_basic(m, type, &u->attributes[slot].dbl);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic()");
+ return r;
+ }
+ }
+ break;
+
+ case SD_BUS_TYPE_ARRAY: {
+ internal_error(true, "member '%s' is an array", name);
+ sd_bus_message_skip(m, NULL);
+ return 0;
+ }
+ break;
+
+ default: {
+ internal_error(true, "unknown field type '%c' for key '%s'", type, name);
+ sd_bus_message_skip(m, NULL);
+ return 0;
+ }
+ break;
+ }
+
+ if(unit_attributes[slot].handler)
+ unit_attributes[slot].handler(u, &u->attributes[slot]);
+
+ return 0;
+}
+
+static int systemd_unit_get_all_properties(sd_bus *bus, UnitInfo *u) {
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *m = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ int r;
+
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1",
+ u->unit_path,
+ "org.freedesktop.DBus.Properties",
+ "GetAll",
+ &error,
+ &m,
+ "s", "");
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_call_method(p1)");
+ return r;
+ }
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_ARRAY, "{sv}");
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_enter_container(p2)");
+ return r;
+ }
+
+ int c = 0;
+ while ((r = sd_bus_message_enter_container(m, SD_BUS_TYPE_DICT_ENTRY, "sv")) > 0) {
+ const char *member, *contents;
+ c++;
+
+ r = sd_bus_message_read_basic(m, SD_BUS_TYPE_STRING, &member);
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_read_basic(p3)");
+ return r;
+ }
+
+ r = sd_bus_message_peek_type(m, NULL, &contents);
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_peek_type(p4)");
+ return r;
+ }
+
+ r = sd_bus_message_enter_container(m, SD_BUS_TYPE_VARIANT, contents);
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_enter_container(p5)");
+ return r;
+ }
+
+ systemd_unit_get_property(m, u, member);
+
+ r = sd_bus_message_exit_container(m);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_exit_container(p6)");
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_exit_container(p7)");
+ return r;
+ }
+ }
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_enter_container(p8)");
+ return r;
+ }
+
+ r = sd_bus_message_exit_container(m);
+ if(r < 0) {
+ log_dbus_error(r, "sd_bus_message_exit_container(p9)");
+ return r;
+ }
+
+ return 0;
+}
+
+static void systemd_units_get_all_properties(sd_bus *bus, UnitInfo *base) {
+ for(UnitInfo *u = base ; u ;u = u->next)
+ systemd_unit_get_all_properties(bus, u);
+}
+
+
+
+// ----------------------------------------------------------------------------
+// main unit info
+
+int bus_parse_unit_info(sd_bus_message *message, UnitInfo *u) {
+ assert(message);
+ assert(u);
+
+ u->type = NULL;
+
+ int r = sd_bus_message_read(
+ message,
+ SYSTEMD_UNITS_DBUS_TYPES,
+ &u->id,
+ &u->description,
+ &u->load_state,
+ &u->active_state,
+ &u->sub_state,
+ &u->following,
+ &u->unit_path,
+ &u->job_id,
+ &u->job_type,
+ &u->job_path);
+
+ if(r <= 0)
+ return r;
+
+ char *dot;
+ if(u->id && (dot = strrchr(u->id, '.')) != NULL)
+ u->type = &dot[1];
+ else
+ u->type = "unknown";
+
+ u->UnitType = unit_type_from_string(u->type);
+ u->UnitLoadState = unit_load_state_from_string(u->load_state);
+ u->UnitActiveState = unit_active_state_from_string(u->active_state);
+
+ switch(u->UnitType) {
+ case UNIT_SERVICE:
+ u->ServiceState = service_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_MOUNT:
+ u->MountState = mount_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_SWAP:
+ u->SwapState = swap_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_SOCKET:
+ u->SocketState = socket_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_TARGET:
+ u->TargetState = target_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_DEVICE:
+ u->DeviceState = device_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_AUTOMOUNT:
+ u->AutomountState = automount_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_TIMER:
+ u->TimerState = timer_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_PATH:
+ u->PathState = path_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_SLICE:
+ u->SliceState = slice_state_from_string(u->sub_state);
+ break;
+
+ case UNIT_SCOPE:
+ u->ScopeState = scope_state_from_string(u->sub_state);
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+static int hex_to_int(char c) {
+ if (c >= '0' && c <= '9') return c - '0';
+ if (c >= 'a' && c <= 'f') return c - 'a' + 10;
+ if (c >= 'A' && c <= 'F') return c - 'A' + 10;
+ return 0;
+}
+
+// un-escape hex sequences (\xNN) in id
+static void txt_decode(char *txt) {
+ if(!txt || !*txt)
+ return;
+
+ char *src = txt, *dst = txt;
+
+ size_t id_len = strlen(src);
+ size_t s = 0, d = 0;
+ for(; s < id_len ; s++) {
+ if(src[s] == '\\' && src[s + 1] == 'x' && isxdigit(src[s + 2]) && isxdigit(src[s + 3])) {
+ int value = (hex_to_int(src[s + 2]) << 4) + hex_to_int(src[s + 3]);
+ dst[d++] = (char)value;
+ s += 3;
+ }
+ else
+ dst[d++] = src[s];
+ }
+ dst[d] = '\0';
+}
+
+static UnitInfo *systemd_units_get_all(void) {
+ _cleanup_(sd_bus_unrefp) sd_bus *bus = NULL;
+ _cleanup_(sd_bus_error_free) sd_bus_error error = SD_BUS_ERROR_NULL;
+ _cleanup_(sd_bus_message_unrefp) sd_bus_message *reply = NULL;
+
+ UnitInfo *base = NULL;
+ int r;
+
+ r = sd_bus_default_system(&bus);
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_default_system()");
+ return base;
+ }
+
+ // This calls the ListUnits method of the org.freedesktop.systemd1.Manager interface
+ // Replace "ListUnits" with "ListUnitsFiltered" to get specific units based on filters
+ r = sd_bus_call_method(bus,
+ "org.freedesktop.systemd1", /* service to contact */
+ "/org/freedesktop/systemd1", /* object path */
+ "org.freedesktop.systemd1.Manager", /* interface name */
+ "ListUnits", /* method name */
+ &error, /* object to return error in */
+ &reply, /* return message on success */
+ NULL); /* input signature */
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_call_method()");
+ return base;
+ }
+
+ r = sd_bus_message_enter_container(reply, SD_BUS_TYPE_ARRAY, SYSTEMD_UNITS_DBUS_TYPES);
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_enter_container()");
+ return base;
+ }
+
+ UnitInfo u;
+ memset(&u, 0, sizeof(u));
+ while ((r = bus_parse_unit_info(reply, &u)) > 0) {
+ UnitInfo *i = callocz(1, sizeof(u));
+ *i = u;
+
+ i->id = strdupz(u.id && *u.id ? u.id : "-");
+ txt_decode(i->id);
+
+ i->type = strdupz(u.type && *u.type ? u.type : "-");
+ i->description = strdupz(u.description && *u.description ? u.description : "-");
+ txt_decode(i->description);
+
+ i->load_state = strdupz(u.load_state && *u.load_state ? u.load_state : "-");
+ i->active_state = strdupz(u.active_state && *u.active_state ? u.active_state : "-");
+ i->sub_state = strdupz(u.sub_state && *u.sub_state ? u.sub_state : "-");
+ i->following = strdupz(u.following && *u.following ? u.following : "-");
+ i->unit_path = strdupz(u.unit_path && *u.unit_path ? u.unit_path : "-");
+ i->job_type = strdupz(u.job_type && *u.job_type ? u.job_type : "-");
+ i->job_path = strdupz(u.job_path && *u.job_path ? u.job_path : "-");
+ i->job_id = u.job_id;
+
+ DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(base, i, prev, next);
+ memset(&u, 0, sizeof(u));
+ }
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_read()");
+ return base;
+ }
+
+ r = sd_bus_message_exit_container(reply);
+ if (r < 0) {
+ log_dbus_error(r, "sd_bus_message_exit_container()");
+ return base;
+ }
+
+ systemd_units_get_all_properties(bus, base);
+
+ return base;
+}
+
+void systemd_units_free_all(UnitInfo *base) {
+ while(base) {
+ UnitInfo *u = base;
+ DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(base, u, prev, next);
+ freez((void *)u->id);
+ freez((void *)u->type);
+ freez((void *)u->description);
+ freez((void *)u->load_state);
+ freez((void *)u->active_state);
+ freez((void *)u->sub_state);
+ freez((void *)u->following);
+ freez((void *)u->unit_path);
+ freez((void *)u->job_type);
+ freez((void *)u->job_path);
+
+ for(int i = 0; i < (ssize_t)_UNIT_ATTRIBUTE_MAX ;i++)
+ systemd_unit_free_property(unit_attributes[i].value_type, &u->attributes[i]);
+
+ freez(u);
+ }
+}
+
+// ----------------------------------------------------------------------------
+
+static void netdata_systemd_units_function_help(const char *transaction) {
+ BUFFER *wb = buffer_create(0, NULL);
+ buffer_sprintf(wb,
+ "%s / %s\n"
+ "\n"
+ "%s\n"
+ "\n"
+ "The following parameters are supported:\n"
+ "\n"
+ " help\n"
+ " Shows this help message.\n"
+ "\n"
+ " info\n"
+ " Request initial configuration information about the plugin.\n"
+ " The key entity returned is the required_params array, which includes\n"
+ " all the available systemd journal sources.\n"
+ " When `info` is requested, all other parameters are ignored.\n"
+ "\n"
+ , program_name
+ , SYSTEMD_UNITS_FUNCTION_NAME
+ , SYSTEMD_UNITS_FUNCTION_DESCRIPTION
+ );
+
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb);
+ netdata_mutex_unlock(&stdout_mutex);
+
+ buffer_free(wb);
+}
+
+static void netdata_systemd_units_function_info(const char *transaction) {
+ BUFFER *wb = buffer_create(0, NULL);
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
+
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_string(wb, "help", SYSTEMD_UNITS_FUNCTION_DESCRIPTION);
+
+ buffer_json_finalize(wb);
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "text/plain", now_realtime_sec() + 3600, wb);
+ netdata_mutex_unlock(&stdout_mutex);
+
+ buffer_free(wb);
+}
+
+// ----------------------------------------------------------------------------
+
+static void systemd_unit_priority(UnitInfo *u, size_t units) {
+ uint32_t prio;
+
+ switch(u->severity) {
+ case FACET_ROW_SEVERITY_CRITICAL:
+ prio = 0;
+ break;
+
+ default:
+ case FACET_ROW_SEVERITY_WARNING:
+ prio = 1;
+ break;
+
+ case FACET_ROW_SEVERITY_NOTICE:
+ prio = 2;
+ break;
+
+ case FACET_ROW_SEVERITY_NORMAL:
+ prio = 3;
+ break;
+
+ case FACET_ROW_SEVERITY_DEBUG:
+ prio = 4;
+ break;
+ }
+
+ prio = prio * (uint32_t)(_UNIT_TYPE_MAX + 1) + (uint32_t)u->UnitType;
+ u->prio = (prio * units) + u->prio;
+}
+
+#define if_less(current, max, target) ({ \
+ typeof(current) _wanted = (current); \
+ if((current) < (target)) \
+ _wanted = (target) > (max) ? (max) : (target); \
+ _wanted; \
+})
+
+#define if_normal(current, max, target) ({ \
+ typeof(current) _wanted = (current); \
+ if((current) == FACET_ROW_SEVERITY_NORMAL) \
+ _wanted = (target) > (max) ? (max) : (target); \
+ _wanted; \
+})
+
+FACET_ROW_SEVERITY system_unit_severity(UnitInfo *u) {
+ FACET_ROW_SEVERITY severity, max_severity;
+
+ switch(u->UnitLoadState) {
+ case UNIT_ERROR:
+ case UNIT_BAD_SETTING:
+ severity = FACET_ROW_SEVERITY_CRITICAL;
+ max_severity = FACET_ROW_SEVERITY_CRITICAL;
+ break;
+
+ default:
+ severity = FACET_ROW_SEVERITY_WARNING;
+ max_severity = FACET_ROW_SEVERITY_CRITICAL;
+ break;
+
+ case UNIT_NOT_FOUND:
+ severity = FACET_ROW_SEVERITY_NOTICE;
+ max_severity = FACET_ROW_SEVERITY_NOTICE;
+ break;
+
+ case UNIT_LOADED:
+ severity = FACET_ROW_SEVERITY_NORMAL;
+ max_severity = FACET_ROW_SEVERITY_CRITICAL;
+ break;
+
+ case UNIT_MERGED:
+ case UNIT_MASKED:
+ case UNIT_STUB:
+ severity = FACET_ROW_SEVERITY_DEBUG;
+ max_severity = FACET_ROW_SEVERITY_DEBUG;
+ break;
+ }
+
+ switch(u->UnitActiveState) {
+ case UNIT_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case UNIT_RELOADING:
+ case UNIT_ACTIVATING:
+ case UNIT_DEACTIVATING:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case UNIT_MAINTENANCE:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case UNIT_ACTIVE:
+ break;
+
+ case UNIT_INACTIVE:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+
+ switch(u->FreezerState) {
+ default:
+ case FREEZER_FROZEN:
+ case FREEZER_FREEZING:
+ case FREEZER_THAWING:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case FREEZER_RUNNING:
+ break;
+ }
+
+ switch(u->UnitType) {
+ case UNIT_SERVICE:
+ switch(u->ServiceState) {
+ case SERVICE_FAILED:
+ case SERVICE_FAILED_BEFORE_AUTO_RESTART:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case SERVICE_STOP:
+ case SERVICE_STOP_WATCHDOG:
+ case SERVICE_STOP_SIGTERM:
+ case SERVICE_STOP_SIGKILL:
+ case SERVICE_STOP_POST:
+ case SERVICE_FINAL_WATCHDOG:
+ case SERVICE_FINAL_SIGTERM:
+ case SERVICE_FINAL_SIGKILL:
+ case SERVICE_AUTO_RESTART:
+ case SERVICE_AUTO_RESTART_QUEUED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case SERVICE_CONDITION:
+ case SERVICE_START_PRE:
+ case SERVICE_START:
+ case SERVICE_START_POST:
+ case SERVICE_RELOAD:
+ case SERVICE_RELOAD_SIGNAL:
+ case SERVICE_RELOAD_NOTIFY:
+ case SERVICE_DEAD_RESOURCES_PINNED:
+ case SERVICE_CLEANING:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case SERVICE_EXITED:
+ case SERVICE_RUNNING:
+ break;
+
+ case SERVICE_DEAD:
+ case SERVICE_DEAD_BEFORE_AUTO_RESTART:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_MOUNT:
+ switch(u->MountState) {
+ case MOUNT_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case MOUNT_REMOUNTING_SIGTERM:
+ case MOUNT_REMOUNTING_SIGKILL:
+ case MOUNT_UNMOUNTING_SIGTERM:
+ case MOUNT_UNMOUNTING_SIGKILL:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case MOUNT_MOUNTING:
+ case MOUNT_MOUNTING_DONE:
+ case MOUNT_REMOUNTING:
+ case MOUNT_UNMOUNTING:
+ case MOUNT_CLEANING:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case MOUNT_MOUNTED:
+ break;
+
+ case MOUNT_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_SWAP:
+ switch(u->SwapState) {
+ case SWAP_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case SWAP_DEACTIVATING_SIGTERM:
+ case SWAP_DEACTIVATING_SIGKILL:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case SWAP_ACTIVATING:
+ case SWAP_ACTIVATING_DONE:
+ case SWAP_DEACTIVATING:
+ case SWAP_CLEANING:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case SWAP_ACTIVE:
+ break;
+
+ case SWAP_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_SOCKET:
+ switch(u->SocketState) {
+ case SOCKET_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case SOCKET_STOP_PRE_SIGTERM:
+ case SOCKET_STOP_PRE_SIGKILL:
+ case SOCKET_FINAL_SIGTERM:
+ case SOCKET_FINAL_SIGKILL:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case SOCKET_START_PRE:
+ case SOCKET_START_CHOWN:
+ case SOCKET_START_POST:
+ case SOCKET_STOP_PRE:
+ case SOCKET_STOP_POST:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case SOCKET_RUNNING:
+ case SOCKET_LISTENING:
+ break;
+
+ case SOCKET_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_TARGET:
+ switch(u->TargetState) {
+ default:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case TARGET_ACTIVE:
+ break;
+
+ case TARGET_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_DEVICE:
+ switch(u->DeviceState) {
+ default:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case DEVICE_TENTATIVE:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case DEVICE_PLUGGED:
+ break;
+
+ case DEVICE_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_AUTOMOUNT:
+ switch(u->AutomountState) {
+ case AUTOMOUNT_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case AUTOMOUNT_WAITING:
+ case AUTOMOUNT_RUNNING:
+ break;
+
+ case AUTOMOUNT_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_TIMER:
+ switch(u->TimerState) {
+ case TIMER_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case TIMER_ELAPSED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case TIMER_WAITING:
+ case TIMER_RUNNING:
+ break;
+
+ case TIMER_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_PATH:
+ switch(u->PathState) {
+ case PATH_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case PATH_WAITING:
+ case PATH_RUNNING:
+ break;
+
+ case PATH_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_SLICE:
+ switch(u->SliceState) {
+ default:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case SLICE_ACTIVE:
+ break;
+
+ case SLICE_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ case UNIT_SCOPE:
+ switch(u->ScopeState) {
+ case SCOPE_FAILED:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_CRITICAL);
+ break;
+
+ default:
+ case SCOPE_STOP_SIGTERM:
+ case SCOPE_STOP_SIGKILL:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+
+ case SCOPE_ABANDONED:
+ case SCOPE_START_CHOWN:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_NOTICE);
+ break;
+
+ case SCOPE_RUNNING:
+ break;
+
+ case SCOPE_DEAD:
+ severity = if_normal(severity, max_severity, FACET_ROW_SEVERITY_DEBUG);
+ break;
+ }
+ break;
+
+ default:
+ severity = if_less(severity, max_severity, FACET_ROW_SEVERITY_WARNING);
+ break;
+ }
+
+ u->severity = severity;
+ return severity;
+}
+
+int unit_info_compar(const void *a, const void *b) {
+ UnitInfo *u1 = *((UnitInfo **)a);
+ UnitInfo *u2 = *((UnitInfo **)b);
+
+ return strcasecmp(u1->id, u2->id);
+}
+
+void systemd_units_assign_priority(UnitInfo *base) {
+ size_t units = 0, c = 0, prio = 0;
+ for(UnitInfo *u = base; u ; u = u->next)
+ units++;
+
+ UnitInfo *array[units];
+ for(UnitInfo *u = base; u ; u = u->next)
+ array[c++] = u;
+
+ qsort(array, units, sizeof(UnitInfo *), unit_info_compar);
+
+ for(c = 0; c < units ; c++) {
+ array[c]->prio = prio++;
+ system_unit_severity(array[c]);
+ systemd_unit_priority(array[c], units);
+ }
+}
+
+void function_systemd_units(const char *transaction, char *function, int timeout, bool *cancelled) {
+ char *words[SYSTEMD_UNITS_MAX_PARAMS] = { NULL };
+ size_t num_words = quoted_strings_splitter_pluginsd(function, words, SYSTEMD_UNITS_MAX_PARAMS);
+ for(int i = 1; i < SYSTEMD_UNITS_MAX_PARAMS ;i++) {
+ char *keyword = get_word(words, num_words, i);
+ if(!keyword) break;
+
+ if(strcmp(keyword, "info") == 0) {
+ netdata_systemd_units_function_info(transaction);
+ return;
+ }
+ else if(strcmp(keyword, "help") == 0) {
+ netdata_systemd_units_function_help(transaction);
+ return;
+ }
+ }
+
+ UnitInfo *base = systemd_units_get_all();
+ systemd_units_assign_priority(base);
+
+ BUFFER *wb = buffer_create(0, NULL);
+ buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY);
+
+ buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK);
+ buffer_json_member_add_string(wb, "type", "table");
+ buffer_json_member_add_time_t(wb, "update_every", 10);
+ buffer_json_member_add_string(wb, "help", SYSTEMD_UNITS_FUNCTION_DESCRIPTION);
+ buffer_json_member_add_array(wb, "data");
+
+ size_t count[_UNIT_ATTRIBUTE_MAX] = { 0 };
+ struct UnitAttribute max[_UNIT_ATTRIBUTE_MAX];
+
+ for(UnitInfo *u = base; u ;u = u->next) {
+ buffer_json_add_array_item_array(wb);
+ {
+ buffer_json_add_array_item_string(wb, u->id);
+
+ buffer_json_add_array_item_object(wb);
+ {
+ buffer_json_member_add_string(wb, "severity", facets_severity_to_string(u->severity));
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_add_array_item_string(wb, u->type);
+ buffer_json_add_array_item_string(wb, u->description);
+ buffer_json_add_array_item_string(wb, u->load_state);
+ buffer_json_add_array_item_string(wb, u->active_state);
+ buffer_json_add_array_item_string(wb, u->sub_state);
+ buffer_json_add_array_item_string(wb, u->following);
+ buffer_json_add_array_item_string(wb, u->unit_path);
+ buffer_json_add_array_item_uint64(wb, u->job_id);
+ buffer_json_add_array_item_string(wb, u->job_type);
+ buffer_json_add_array_item_string(wb, u->job_path);
+
+ for(ssize_t i = 0; i < (ssize_t)_UNIT_ATTRIBUTE_MAX ;i++) {
+ switch(unit_attributes[i].value_type) {
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_STRING:
+ buffer_json_add_array_item_string(wb, u->attributes[i].str && *u->attributes[i].str ? u->attributes[i].str : "-");
+ break;
+
+ case SD_BUS_TYPE_UINT64:
+ buffer_json_add_array_item_uint64(wb, u->attributes[i].uint64);
+ if(!count[i]++) max[i].uint64 = 0;
+ max[i].uint64 = MAX(max[i].uint64, u->attributes[i].uint64);
+ break;
+
+ case SD_BUS_TYPE_UINT32:
+ buffer_json_add_array_item_uint64(wb, u->attributes[i].uint32);
+ if(!count[i]++) max[i].uint32 = 0;
+ max[i].uint32 = MAX(max[i].uint32, u->attributes[i].uint32);
+ break;
+
+ case SD_BUS_TYPE_INT64:
+ buffer_json_add_array_item_uint64(wb, u->attributes[i].int64);
+ if(!count[i]++) max[i].uint64 = 0;
+ max[i].int64 = MAX(max[i].int64, u->attributes[i].int64);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ buffer_json_add_array_item_uint64(wb, u->attributes[i].int32);
+ if(!count[i]++) max[i].int32 = 0;
+ max[i].int32 = MAX(max[i].int32, u->attributes[i].int32);
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ buffer_json_add_array_item_double(wb, u->attributes[i].dbl);
+ if(!count[i]++) max[i].dbl = 0.0;
+ max[i].dbl = MAX(max[i].dbl, u->attributes[i].dbl);
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ buffer_json_add_array_item_boolean(wb, u->attributes[i].boolean);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ buffer_json_add_array_item_uint64(wb, u->prio);
+ buffer_json_add_array_item_uint64(wb, 1); // count
+ }
+ buffer_json_array_close(wb);
+ }
+
+ buffer_json_array_close(wb); // data
+
+ buffer_json_member_add_object(wb, "columns");
+ {
+ size_t field_id = 0;
+
+ buffer_rrdf_table_add_field(wb, field_id++, "id", "Unit ID",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+
+ buffer_rrdf_table_add_field(
+ wb, field_id++,
+ "rowOptions", "rowOptions",
+ RRDF_FIELD_TYPE_NONE,
+ RRDR_FIELD_VISUAL_ROW_OPTIONS,
+ RRDF_FIELD_TRANSFORM_NONE, 0, NULL, NAN,
+ RRDF_FIELD_SORT_FIXED,
+ NULL,
+ RRDF_FIELD_SUMMARY_COUNT,
+ RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_DUMMY,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "type", "Unit Type",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "description", "Unit Description",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "loadState", "Unit Load State",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "activeState", "Unit Active State",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "subState", "Unit Sub State",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_EXPANDED_FILTER,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "following", "Unit Following",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_WRAP,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "path", "Unit Path",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "jobId", "Unit Job ID",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "jobType", "Unit Job Type",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "jobPath", "Unit Job Path",
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_WRAP | RRDF_FIELD_OPTS_FULL_WIDTH,
+ NULL);
+
+ for(ssize_t i = 0; i < (ssize_t)_UNIT_ATTRIBUTE_MAX ;i++) {
+ char key[256], name[256];
+
+ if(unit_attributes[i].show_as)
+ snprintfz(key, sizeof(key), "%s", unit_attributes[i].show_as);
+ else
+ snprintfz(key, sizeof(key), "attribute%s", unit_property_name_to_string_from_slot(i));
+
+ if(unit_attributes[i].info)
+ snprintfz(name, sizeof(name), "%s", unit_attributes[i].info);
+ else
+ snprintfz(name, sizeof(name), "Attribute %s", unit_property_name_to_string_from_slot(i));
+
+ RRDF_FIELD_OPTIONS options = unit_attributes[i].options;
+ RRDF_FIELD_FILTER filter = unit_attributes[i].filter;
+
+ switch(unit_attributes[i].value_type) {
+ case SD_BUS_TYPE_OBJECT_PATH:
+ case SD_BUS_TYPE_STRING:
+ buffer_rrdf_table_add_field(wb, field_id++, key, name,
+ RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, filter,
+ RRDF_FIELD_OPTS_WRAP | options,
+ NULL);
+ break;
+
+ case SD_BUS_TYPE_INT32:
+ case SD_BUS_TYPE_UINT32:
+ case SD_BUS_TYPE_INT64:
+ case SD_BUS_TYPE_UINT64: {
+ double m;
+ if(unit_attributes[i].value_type == SD_BUS_TYPE_UINT64)
+ m = (double)max[i].uint64;
+ else if(unit_attributes[i].value_type == SD_BUS_TYPE_INT64)
+ m = (double)max[i].int64;
+ else if(unit_attributes[i].value_type == SD_BUS_TYPE_UINT32)
+ m = (double)max[i].uint32;
+ else if(unit_attributes[i].value_type == SD_BUS_TYPE_INT32)
+ m = (double)max[i].int32;
+
+ buffer_rrdf_table_add_field(wb, field_id++, key, name,
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, m, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, filter,
+ RRDF_FIELD_OPTS_WRAP | options,
+ NULL);
+ }
+ break;
+
+ case SD_BUS_TYPE_DOUBLE:
+ buffer_rrdf_table_add_field(wb, field_id++, key, name,
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 2, NULL, max[i].dbl, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_SUM, filter,
+ RRDF_FIELD_OPTS_WRAP | options,
+ NULL);
+ break;
+
+ case SD_BUS_TYPE_BOOLEAN:
+ buffer_rrdf_table_add_field(wb, field_id++, key, name,
+ RRDF_FIELD_TYPE_BOOLEAN, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, filter,
+ RRDF_FIELD_OPTS_WRAP | options,
+ NULL);
+ break;
+
+ default:
+ break;
+ }
+
+ }
+
+ buffer_rrdf_table_add_field(wb, field_id++, "priority", "Priority",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+
+ buffer_rrdf_table_add_field(wb, field_id++, "count", "Count",
+ RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE,
+ 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL,
+ RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE,
+ RRDF_FIELD_OPTS_NONE,
+ NULL);
+ }
+
+ buffer_json_object_close(wb); // columns
+ buffer_json_member_add_string(wb, "default_sort_column", "priority");
+
+ buffer_json_member_add_object(wb, "charts");
+ {
+ buffer_json_member_add_object(wb, "count");
+ {
+ buffer_json_member_add_string(wb, "name", "count");
+ buffer_json_member_add_string(wb, "type", "stacked-bar");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "count");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // charts
+
+ buffer_json_member_add_array(wb, "default_charts");
+ {
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "count");
+ buffer_json_add_array_item_string(wb, "activeState");
+ buffer_json_array_close(wb);
+ buffer_json_add_array_item_array(wb);
+ buffer_json_add_array_item_string(wb, "count");
+ buffer_json_add_array_item_string(wb, "subState");
+ buffer_json_array_close(wb);
+ }
+ buffer_json_array_close(wb);
+
+ buffer_json_member_add_object(wb, "group_by");
+ {
+ buffer_json_member_add_object(wb, "type");
+ {
+ buffer_json_member_add_string(wb, "name", "Top Down Tree");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "type");
+ buffer_json_add_array_item_string(wb, "loadState");
+ buffer_json_add_array_item_string(wb, "activeState");
+ buffer_json_add_array_item_string(wb, "subState");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+
+ buffer_json_member_add_object(wb, "subState");
+ {
+ buffer_json_member_add_string(wb, "name", "Bottom Up Tree");
+ buffer_json_member_add_array(wb, "columns");
+ {
+ buffer_json_add_array_item_string(wb, "subState");
+ buffer_json_add_array_item_string(wb, "activeState");
+ buffer_json_add_array_item_string(wb, "loadState");
+ buffer_json_add_array_item_string(wb, "type");
+ }
+ buffer_json_array_close(wb);
+ }
+ buffer_json_object_close(wb);
+ }
+ buffer_json_object_close(wb); // group_by
+
+ buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1);
+ buffer_json_finalize(wb);
+
+ netdata_mutex_lock(&stdout_mutex);
+ pluginsd_function_result_to_stdout(transaction, HTTP_RESP_OK, "application/json", now_realtime_sec() + 1, wb);
+ netdata_mutex_unlock(&stdout_mutex);
+
+ buffer_free(wb);
+ systemd_units_free_all(base);
+}
+
+#endif // ENABLE_SYSTEMD_DBUS
diff --git a/collectors/tc.plugin/README.md b/collectors/tc.plugin/README.md
deleted file mode 100644
index de5fd4743ceee6..00000000000000
--- a/collectors/tc.plugin/README.md
+++ /dev/null
@@ -1,209 +0,0 @@
-
-
-# tc.plugin
-
-Live demo - **[see it in action here](https://registry.my-netdata.io/#menu_tc)** !
-
-
-
-Netdata monitors `tc` QoS classes for all interfaces.
-
-If you also use [FireQOS](http://firehol.org/tutorial/fireqos-new-user/) it will collect interface and class names.
-
-There is a [shell helper](https://raw.githubusercontent.com/netdata/netdata/master/collectors/tc.plugin/tc-qos-helper.sh.in) for this (all parsing is done by the plugin in `C` code - this shell script is just a configuration for the command to run to get `tc` output).
-
-The source of the tc plugin is [here](https://raw.githubusercontent.com/netdata/netdata/master/collectors/tc.plugin/plugin_tc.c). It is somewhat complex, because a state machine was needed to keep track of all the `tc` classes, including the pseudo classes tc dynamically creates.
-
-## Motivation
-
-One category of metrics missing in Linux monitoring, is bandwidth consumption for each open socket (inbound and outbound traffic). So, you cannot tell how much bandwidth your web server, your database server, your backup, your ssh sessions, etc are using.
-
-To solve this problem, the most *adventurous* Linux monitoring tools install kernel modules to capture all traffic, analyze it and provide reports per application. A lot of work, CPU intensive and with a great degree of risk (due to the kernel modules involved which might affect the stability of the whole system). Not to mention that such solutions are probably better suited for a core linux router in your network.
-
-Others use NFACCT, the netfilter accounting module which is already part of the Linux firewall. However, this would require configuring a firewall on every system you want to measure bandwidth (just FYI, I do install a firewall on every server - and I strongly advise you to do so too - but configuring accounting on all servers seems overkill when you don't really need it for billing purposes).
-
-**There is however a much simpler approach**.
-
-## QoS
-
-One of the features the Linux kernel has, but it is rarely used, is its ability to **apply QoS on traffic**. Even most interesting is that it can apply QoS to **both inbound and outbound traffic**.
-
-QoS is about 2 features:
-
-1. **Classify traffic**
-
- Classification is the process of organizing traffic in groups, called **classes**. Classification can evaluate every aspect of network packets, like source and destination ports, source and destination IPs, netfilter marks, etc.
-
- When you classify traffic, you just assign a label to it. Of course classes have some properties themselves (like queuing mechanisms), but let's say it is that simple: **a label**. For example **I call `web server` traffic, the traffic from my server's tcp/80, tcp/443 and to my server's tcp/80, tcp/443, while I call `web surfing` all other tcp/80 and tcp/443 traffic**. You can use any combinations you like. There is no limit.
-
-2. **Apply traffic shaping rules to these classes**
-
- Traffic shaping is used to control how network interface bandwidth should be shared among the classes. Normally, you need to do this, when there is not enough bandwidth to satisfy all the demand, or when you want to control the supply of bandwidth to certain services. Of course classification is sufficient for monitoring traffic, but traffic shaping is also quite important, as we will explain in the next section.
-
-## Why you want QoS
-
-1. **Monitoring the bandwidth used by services**
-
- Netdata provides wonderful real-time charts, like this one (wait to see the orange `rsync` part):
-
- 
-
-2. **Ensure sensitive administrative tasks will not starve for bandwidth**
-
- Have you tried to ssh to a server when the network is congested? If you have, you already know it does not work very well. QoS can guarantee that services like ssh, dns, ntp, etc will always have a small supply of bandwidth. So, no matter what happens, you will be able to ssh to your server and DNS will always work.
-
-3. **Ensure administrative tasks will not monopolize all the bandwidth**
-
- Services like backups, file copies, database dumps, etc can easily monopolize all the available bandwidth. It is common for example a nightly backup, or a huge file transfer to negatively influence the end-user experience. QoS can fix that.
-
-4. **Ensure each end-user connection will get a fair cut of the available bandwidth.**
-
- Several QoS queuing disciplines in Linux do this automatically, without any configuration from you. The result is that new sockets are favored over older ones, so that users will get a snappier experience, while others are transferring large amounts of traffic.
-
-5. **Protect the servers from DDoS attacks.**
-
- When your system is under a DDoS attack, it will get a lot more bandwidth compared to the one it can handle and probably your applications will crash. Setting a limit on the inbound traffic using QoS, will protect your servers (throttle the requests) and depending on the size of the attack may allow your legitimate users to access the server, while the attack is taking place.
-
- Using QoS together with a [SYNPROXY](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md) will provide a great degree of protection against most DDoS attacks. Actually when I wrote that article, a few folks tried to DDoS the Netdata demo site to see in real-time the SYNPROXY operation. They did not do it right, but anyway a great deal of requests reached the Netdata server. What saved Netdata was QoS. The Netdata demo server has QoS installed, so the requests were throttled and the server did not even reach the point of resource starvation. Read about it [here](https://github.com/netdata/netdata/blob/master/collectors/proc.plugin/README.md).
-
-On top of all these, QoS is extremely light. You will configure it once, and this is it. It will not bother you again and it will not use any noticeable CPU resources, especially on application and database servers.
-
-```
-- ensure administrative tasks (like ssh, dns, etc) will always have a small but guaranteed bandwidth. So, no matter what happens, I will be able to ssh to my server and DNS will work.
-
-- ensure other administrative tasks will not monopolize all the available bandwidth. So, my nightly backup will not hurt my users, a developer that is copying files over the net will not get all the available bandwidth, etc.
-
-- ensure each end-user connection will get a fair cut of the available bandwidth.
-```
-
-Once **traffic classification** is applied, we can use **[netdata](https://github.com/netdata/netdata)** to visualize the bandwidth consumption per class in real-time (no configuration is needed for Netdata - it will figure it out).
-
-QoS, is extremely light. You will configure it once, and this is it. It will not bother you again and it will not use any noticeable CPU resources, especially on application and database servers.
-
-This is QoS from a home linux router. Check these features:
-
-1. It is real-time (per second updates)
-2. QoS really works in Linux - check that the `background` traffic is squeezed when `surfing` needs it.
-
-
-
----
-
-## QoS in Linux?
-
-Of course, `tc` is probably **the most undocumented, complicated and unfriendly** command in Linux.
-
-For example, do you know that for matching a simple port range in `tc`, e.g. all the high ports, from 1025 to 65535 inclusive, you have to match these:
-
-```
-1025/0xffff
-1026/0xfffe
-1028/0xfffc
-1032/0xfff8
-1040/0xfff0
-1056/0xffe0
-1088/0xffc0
-1152/0xff80
-1280/0xff00
-1536/0xfe00
-2048/0xf800
-4096/0xf000
-8192/0xe000
-16384/0xc000
-32768/0x8000
-```
-
-To do it the hard way, you can go through the [tc configuration steps](#qos-configuration-with-tc). An easier way is to use **[FireQOS](https://firehol.org/tutorial/fireqos-new-user/)**, a tool that simplifies QoS management in Linux.
-
-## Qos Configuration with FireHOL
-
-The **[FireHOL](https://firehol.org/)** package already distributes **[FireQOS](https://firehol.org/tutorial/fireqos-new-user/)**. Check the **[FireQOS tutorial](https://firehol.org/tutorial/fireqos-new-user/)** to learn how to write your own QoS configuration.
-
-With **[FireQOS](https://firehol.org/tutorial/fireqos-new-user/)**, it is **really simple for everyone to use QoS in Linux**. Just install the package `firehol`. It should already be available for your distribution. If not, check the **[FireHOL Installation Guide](https://firehol.org/installing/)**. After that, you will have the `fireqos` command which uses a configuration like the following `/etc/firehol/fireqos.conf`, used at the Netdata demo site:
-
-```sh
- # configure the Netdata ports
- server_netdata_ports="tcp/19999"
-
- interface eth0 world bidirectional ethernet balanced rate 50Mbit
- class arp
- match arp
-
- class icmp
- match icmp
-
- class dns commit 1Mbit
- server dns
- client dns
-
- class ntp
- server ntp
- client ntp
-
- class ssh commit 2Mbit
- server ssh
- client ssh
-
- class rsync commit 2Mbit max 10Mbit
- server rsync
- client rsync
-
- class web_server commit 40Mbit
- server http
- server netdata
-
- class client
- client surfing
-
- class nms commit 1Mbit
- match input src 10.2.3.5
-```
-
-Nothing more is needed. You just run `fireqos start` to apply this configuration, restart Netdata and you have real-time visualization of the bandwidth consumption of your applications. FireQOS is not a daemon. It will just convert the configuration to `tc` commands. It will run them and it will exit.
-
-**IMPORTANT**: If you copy this configuration to apply it to your system, please adapt the speeds - experiment in non-production environments to learn the tool, before applying it on your servers.
-
-And this is what you are going to get:
-
-
-
-## QoS Configuration with tc
-
-First, setup the tc rules in rc.local using commands to assign different QoS markings to different classids. You can see one such example in [github issue #4563](https://github.com/netdata/netdata/issues/4563#issuecomment-455711973).
-
-Then, map the classids to names by creating `/etc/iproute2/tc_cls`. For example:
-
-```
-2:1 Standard
-2:8 LowPriorityData
-2:10 HighThroughputData
-2:16 OAM
-2:18 LowLatencyData
-2:24 BroadcastVideo
-2:26 MultimediaStreaming
-2:32 RealTimeInteractive
-2:34 MultimediaConferencing
-2:40 Signalling
-2:46 Telephony
-2:48 NetworkControl
-```
-
-Add the following configuration option in `/etc/netdata.conf`:
-
-```\[plugin:tc]
- enable show all classes and qdiscs for all interfaces = yes
-```
-
-Finally, create `/etc/netdata/tc-qos-helper.conf` with this content:
-`tc_show="class"`
-
-Please note, that by default Netdata will enable monitoring metrics only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Set `yes` for a chart instead of `auto` to enable it permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins.
-
-
diff --git a/collectors/tc.plugin/README.md b/collectors/tc.plugin/README.md
new file mode 120000
index 00000000000000..2a20ff2622e306
--- /dev/null
+++ b/collectors/tc.plugin/README.md
@@ -0,0 +1 @@
+integrations/tc_qos_classes.md
\ No newline at end of file
diff --git a/collectors/tc.plugin/integrations/tc_qos_classes.md b/collectors/tc.plugin/integrations/tc_qos_classes.md
new file mode 100644
index 00000000000000..7a665066085aec
--- /dev/null
+++ b/collectors/tc.plugin/integrations/tc_qos_classes.md
@@ -0,0 +1,171 @@
+
+
+# tc QoS classes
+
+
+
+
+
+Plugin: tc.plugin
+Module: tc.plugin
+
+
+
+## Overview
+
+Examine tc metrics to gain insights into Linux traffic control operations. Study packet flow rates, queue lengths, and drop rates to optimize network traffic flow.
+
+The plugin uses `tc` command to collect information about Traffic control.
+
+This collector is only supported on the following platforms:
+
+- Linux
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs to access command `tc` to get the necessary metrics. To achieve this netdata modifies permission of file `/usr/libexec/netdata/plugins.d/tc-qos-helper.sh`.
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per network device direction
+
+Metrics related to QoS network device directions. Each direction (in/out) produces its own set of the following metrics.
+
+Labels:
+
+| Label | Description |
+|:-----------|:----------------|
+| device | The network interface. |
+| device_name | The network interface name |
+| group | The device family |
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| tc.qos | a dimension per class | kilobits/s |
+| tc.qos_packets | a dimension per class | packets/s |
+| tc.qos_dropped | a dimension per class | packets/s |
+| tc.qos_tokens | a dimension per class | tokens |
+| tc.qos_ctokens | a dimension per class | ctokens |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Create `tc-qos-helper.conf`
+
+In order to view tc classes, you need to create the file `/etc/netdata/tc-qos-helper.conf` with content:
+
+```conf
+tc_show="class"
+```
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:tc]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config option
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| script to run to get tc values | Path to script `tc-qos-helper.sh` | usr/libexec/netdata/plugins.d/tc-qos-helper.s | no |
+| enable show all classes and qdiscs for all interfaces | yes/no flag to control what data is presented. | yes | no |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic example configuration using classes defined in `/etc/iproute2/tc_cls`.
+
+An example of class IDs mapped to names in that file can be:
+
+```conf
+2:1 Standard
+2:8 LowPriorityData
+2:10 HighThroughputData
+2:16 OAM
+2:18 LowLatencyData
+2:24 BroadcastVideo
+2:26 MultimediaStreaming
+2:32 RealTimeInteractive
+2:34 MultimediaConferencing
+2:40 Signalling
+2:46 Telephony
+2:48 NetworkControl
+```
+
+You can read more about setting up the tc rules in rc.local in this [GitHub issue](https://github.com/netdata/netdata/issues/4563#issuecomment-455711973).
+
+
+```yaml
+[plugin:tc]
+ script to run to get tc values = /usr/libexec/netdata/plugins.d/tc-qos-helper.sh
+ enable show all classes and qdiscs for all interfaces = yes
+
+```
+
diff --git a/collectors/tc.plugin/metadata.yaml b/collectors/tc.plugin/metadata.yaml
index dcd03e470825e5..f4039a8c555f2c 100644
--- a/collectors/tc.plugin/metadata.yaml
+++ b/collectors/tc.plugin/metadata.yaml
@@ -36,7 +36,14 @@ modules:
description: ""
setup:
prerequisites:
- list: []
+ list:
+ - title: Create `tc-qos-helper.conf`
+ description: |
+ In order to view tc classes, you need to create the file `/etc/netdata/tc-qos-helper.conf` with content:
+
+ ```conf
+ tc_show="class"
+ ```
configuration:
file:
name: "netdata.conf"
@@ -52,16 +59,42 @@ modules:
description: Path to script `tc-qos-helper.sh`
default_value: "usr/libexec/netdata/plugins.d/tc-qos-helper.s"
required: false
+ - name: enable show all classes and qdiscs for all interfaces
+ description: yes/no flag to control what data is presented.
+ default_value: "yes"
+ required: false
examples:
folding:
enabled: false
title: "Config"
list:
- name: Basic
- description: A basic example configuration.
+ description: |
+ A basic example configuration using classes defined in `/etc/iproute2/tc_cls`.
+
+ An example of class IDs mapped to names in that file can be:
+
+ ```conf
+ 2:1 Standard
+ 2:8 LowPriorityData
+ 2:10 HighThroughputData
+ 2:16 OAM
+ 2:18 LowLatencyData
+ 2:24 BroadcastVideo
+ 2:26 MultimediaStreaming
+ 2:32 RealTimeInteractive
+ 2:34 MultimediaConferencing
+ 2:40 Signalling
+ 2:46 Telephony
+ 2:48 NetworkControl
+ ```
+
+ You can read more about setting up the tc rules in rc.local in this [GitHub issue](https://github.com/netdata/netdata/issues/4563#issuecomment-455711973).
+
config: |
[plugin:tc]
script to run to get tc values = /usr/libexec/netdata/plugins.d/tc-qos-helper.sh
+ enable show all classes and qdiscs for all interfaces = yes
troubleshooting:
problems:
list: []
diff --git a/collectors/tc.plugin/tc-qos-helper.sh.in b/collectors/tc.plugin/tc-qos-helper.sh.in
index 97d4d016db9785..3298c39a30cf40 100755
--- a/collectors/tc.plugin/tc-qos-helper.sh.in
+++ b/collectors/tc.plugin/tc-qos-helper.sh.in
@@ -2,54 +2,113 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2017 Costa Tsaousis
+# (C) 2023 Netdata Inc.
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This script is a helper to allow netdata collect tc data.
# tc output parsing has been implemented in C, inside netdata
# This script allows setting names to dimensions.
-export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin"
+export PATH="${PATH}:/sbin:/usr/sbin:/usr/local/sbin:@sbindir_POST@"
export LC_ALL=C
+cmd_line="'${0}' $(printf "'%s' " "${@}")"
+
# -----------------------------------------------------------------------------
-# logging functions
+# logging
-PROGRAM_NAME="$(basename "$0")"
+PROGRAM_NAME="$(basename "${0}")"
PROGRAM_NAME="${PROGRAM_NAME/.plugin/}"
-logdate() {
- date "+%Y-%m-%d %H:%M:%S"
+# these should be the same with syslog() priorities
+NDLP_EMERG=0 # system is unusable
+NDLP_ALERT=1 # action must be taken immediately
+NDLP_CRIT=2 # critical conditions
+NDLP_ERR=3 # error conditions
+NDLP_WARN=4 # warning conditions
+NDLP_NOTICE=5 # normal but significant condition
+NDLP_INFO=6 # informational
+NDLP_DEBUG=7 # debug-level messages
+
+# the max (numerically) log level we will log
+LOG_LEVEL=$NDLP_INFO
+
+set_log_min_priority() {
+ case "${NETDATA_LOG_LEVEL,,}" in
+ "emerg" | "emergency")
+ LOG_LEVEL=$NDLP_EMERG
+ ;;
+
+ "alert")
+ LOG_LEVEL=$NDLP_ALERT
+ ;;
+
+ "crit" | "critical")
+ LOG_LEVEL=$NDLP_CRIT
+ ;;
+
+ "err" | "error")
+ LOG_LEVEL=$NDLP_ERR
+ ;;
+
+ "warn" | "warning")
+ LOG_LEVEL=$NDLP_WARN
+ ;;
+
+ "notice")
+ LOG_LEVEL=$NDLP_NOTICE
+ ;;
+
+ "info")
+ LOG_LEVEL=$NDLP_INFO
+ ;;
+
+ "debug")
+ LOG_LEVEL=$NDLP_DEBUG
+ ;;
+ esac
}
-log() {
- local status="${1}"
- shift
+set_log_min_priority
- echo >&2 "$(logdate): ${PROGRAM_NAME}: ${status}: ${*}"
+log() {
+ local level="${1}"
+ shift 1
+
+ [[ -n "$level" && -n "$LOG_LEVEL" && "$level" -gt "$LOG_LEVEL" ]] && return
+
+ systemd-cat-native --log-as-netdata --newline="--NEWLINE--" <
-
-# timex.plugin
-
-This plugin monitors the system kernel clock synchronization state.
-
-This plugin creates the following charts:
-
-- System clock synchronization state according to the system kernel
-- System clock status which gives the value of the `time_status` variable in the kernel
-- Computed time offset between local system and reference clock
-
-This is obtained from the information provided by the [ntp_adjtime()](https://man7.org/linux/man-pages/man2/adjtimex.2.html) system call.
-An unsynchronized clock may indicate a hardware clock error, or an issue with UTC synchronization.
-
-## Configuration
-
-Edit the `netdata.conf` configuration file using [`edit-config`](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#use-edit-config-to-edit-configuration-files) from the [Netdata config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory), which is typically at `/etc/netdata`.
-
-Scroll down to the `[plugin:timex]` section to find the available options:
-
-```ini
-[plugin:timex]
- # update every = 1
- # clock synchronization state = yes
- # time offset = yes
-```
diff --git a/collectors/timex.plugin/README.md b/collectors/timex.plugin/README.md
new file mode 120000
index 00000000000000..89c1bd0d410674
--- /dev/null
+++ b/collectors/timex.plugin/README.md
@@ -0,0 +1 @@
+integrations/timex.md
\ No newline at end of file
diff --git a/collectors/timex.plugin/integrations/timex.md b/collectors/timex.plugin/integrations/timex.md
new file mode 100644
index 00000000000000..754b2368ce4856
--- /dev/null
+++ b/collectors/timex.plugin/integrations/timex.md
@@ -0,0 +1,143 @@
+
+
+# Timex
+
+
+
+
+
+Plugin: timex.plugin
+Module: timex.plugin
+
+
+
+## Overview
+
+Examine Timex metrics to gain insights into system clock operations. Study time sync status, clock drift, and adjustments to ensure accurate system timekeeping.
+
+It uses system call adjtimex on Linux and ntp_adjtime on FreeBSD or Mac to monitor the system kernel clock synchronization state.
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+
+### Default Behavior
+
+#### Auto-Detection
+
+This integration doesn't support auto-detection.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Timex instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| system.clock_sync_state | state | state |
+| system.clock_status | unsync, clockerr | status |
+| system.clock_sync_offset | offset | milliseconds |
+
+
+
+## Alerts
+
+
+The following alerts are available:
+
+| Alert name | On metric | Description |
+|:------------|:----------|:------------|
+| [ system_clock_sync_state ](https://github.com/netdata/netdata/blob/master/health/health.d/timex.conf) | system.clock_sync_state | when set to 0, the system kernel believes the system clock is not properly synchronized to a reliable server |
+
+
+## Setup
+
+### Prerequisites
+
+No action required.
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:timex]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+At least one option ('clock synchronization state', 'time offset') needs to be enabled for this collector to run.
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+| clock synchronization state | Make chart showing system clock synchronization state. | yes | yes |
+| time offset | Make chart showing computed time offset between local system and reference clock | yes | yes |
+
+
+
+#### Examples
+
+##### Basic
+
+A basic configuration example.
+
+Config
+
+```yaml
+[plugin:timex]
+ update every = 1
+ clock synchronization state = yes
+ time offset = yes
+
+```
+
+
+
diff --git a/collectors/xenstat.plugin/README.md b/collectors/xenstat.plugin/README.md
deleted file mode 100644
index 8d17a33cd7c3b4..00000000000000
--- a/collectors/xenstat.plugin/README.md
+++ /dev/null
@@ -1,57 +0,0 @@
-
-
-# xenstat.plugin
-
-`xenstat.plugin` collects XenServer and XCP-ng statistics.
-
-## Prerequisites
-
-1. install `xen-dom0-libs-devel` and `yajl-devel` using the package manager of your system.
- Note: On Cent-OS systems you will need `centos-release-xen` repository and the required package for xen is `xen-devel`
-
-2. re-install Netdata from source. The installer will detect that the required libraries are now available and will also build xenstat.plugin.
-
-Keep in mind that `libxenstat` requires root access, so the plugin is setuid to root.
-
-## Charts
-
-The plugin provides XenServer and XCP-ng host and domains statistics:
-
-Host:
-
-1. Number of domains.
-
-Domain:
-
-1. CPU.
-2. Memory.
-3. Networks.
-4. VBDs.
-
-## Configuration
-
-If you need to disable xenstat for Netdata, edit /etc/netdata/netdata.conf and set:
-
-```
-[plugins]
- xenstat = no
-```
-
-## Debugging
-
-You can run the plugin by hand:
-
-```
-sudo /usr/libexec/netdata/plugins.d/xenstat.plugin 1 debug
-```
-
-You will get verbose output on what the plugin does.
-
-
diff --git a/collectors/xenstat.plugin/README.md b/collectors/xenstat.plugin/README.md
new file mode 120000
index 00000000000000..32fe4d213ca2e1
--- /dev/null
+++ b/collectors/xenstat.plugin/README.md
@@ -0,0 +1 @@
+integrations/xen_xcp-ng.md
\ No newline at end of file
diff --git a/collectors/xenstat.plugin/integrations/xen_xcp-ng.md b/collectors/xenstat.plugin/integrations/xen_xcp-ng.md
new file mode 100644
index 00000000000000..17dc8d78570470
--- /dev/null
+++ b/collectors/xenstat.plugin/integrations/xen_xcp-ng.md
@@ -0,0 +1,176 @@
+
+
+# Xen XCP-ng
+
+
+
+
+
+Plugin: xenstat.plugin
+Module: xenstat.plugin
+
+
+
+## Overview
+
+This collector monitors XenServer and XCP-ng host and domains statistics.
+
+
+
+This collector is supported on all platforms.
+
+This collector supports collecting metrics from multiple instances of this integration, including remote instances.
+
+The plugin needs setuid.
+
+### Default Behavior
+
+#### Auto-Detection
+
+This plugin requires the `xen-dom0-libs-devel` and `yajl-devel` libraries to be installed.
+
+#### Limits
+
+The default configuration for this integration does not impose any limits on data collection.
+
+#### Performance Impact
+
+The default configuration for this integration is not expected to impose a significant performance impact on the system.
+
+
+## Metrics
+
+Metrics grouped by *scope*.
+
+The scope defines the instance that the metric belongs to. An instance is uniquely identified by a set of labels.
+
+
+
+### Per Xen XCP-ng instance
+
+These metrics refer to the entire monitored application.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| xenstat.mem | free, used | MiB |
+| xenstat.domains | domains | domains |
+| xenstat.cpus | cpus | cpus |
+| xenstat.cpu_freq | frequency | MHz |
+
+### Per xendomain
+
+Metrics related to Xen domains. Each domain provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| xendomain.states | running, blocked, paused, shutdown, crashed, dying | boolean |
+| xendomain.cpu | used | percentage |
+| xendomain.mem | maximum, current | MiB |
+| xendomain.vcpu | a dimension per vcpu | percentage |
+
+### Per xendomain vbd
+
+Metrics related to Xen domain Virtual Block Device. Each VBD provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| xendomain.oo_req_vbd | requests | requests/s |
+| xendomain.requests_vbd | read, write | requests/s |
+| xendomain.sectors_vbd | read, write | sectors/s |
+
+### Per xendomain network
+
+Metrics related to Xen domain network interfaces. Each network interface provides its own set of the following metrics.
+
+This scope has no labels.
+
+Metrics:
+
+| Metric | Dimensions | Unit |
+|:------|:----------|:----|
+| xendomain.bytes_network | received, sent | kilobits/s |
+| xendomain.packets_network | received, sent | packets/s |
+| xendomain.errors_network | received, sent | errors/s |
+| xendomain.drops_network | received, sent | drops/s |
+
+
+
+## Alerts
+
+There are no alerts configured by default for this integration.
+
+
+## Setup
+
+### Prerequisites
+
+#### Libraries
+
+1. Install `xen-dom0-libs-devel` and `yajl-devel` using the package manager of your system.
+
+ Note: On Cent-OS systems you will need `centos-release-xen` repository and the required package for xen is `xen-devel`
+
+2. Re-install Netdata from source. The installer will detect that the required libraries are now available and will also build xenstat.plugin.
+
+
+
+### Configuration
+
+#### File
+
+The configuration file name for this integration is `netdata.conf`.
+Configuration for this specific integration is located in the `[plugin:xenstat]` section within that file.
+
+The file format is a modified INI syntax. The general structure is:
+
+```ini
+[section1]
+ option1 = some value
+ option2 = some other value
+
+[section2]
+ option3 = some third value
+```
+You can edit the configuration file using the `edit-config` script from the
+Netdata [config directory](https://github.com/netdata/netdata/blob/master/docs/configure/nodes.md#the-netdata-config-directory).
+
+```bash
+cd /etc/netdata 2>/dev/null || cd /opt/netdata/etc/netdata
+sudo ./edit-config netdata.conf
+```
+#### Options
+
+
+
+Config options
+
+| Name | Description | Default | Required |
+|:----|:-----------|:-------|:--------:|
+| update every | Data collection frequency. | 1 | no |
+
+
+
+#### Examples
+There are no configuration examples.
+
+
diff --git a/collectors/xenstat.plugin/metadata.yaml b/collectors/xenstat.plugin/metadata.yaml
index 493183694b556f..e5527dbb17fdba 100644
--- a/collectors/xenstat.plugin/metadata.yaml
+++ b/collectors/xenstat.plugin/metadata.yaml
@@ -4,7 +4,7 @@ modules:
plugin_name: xenstat.plugin
module_name: xenstat.plugin
monitored_instance:
- name: Xen/XCP-ng
+ name: Xen XCP-ng
link: "https://xenproject.org/"
categories:
- data-collection.containers-and-vms
diff --git a/collectors/xenstat.plugin/xenstat_plugin.c b/collectors/xenstat.plugin/xenstat_plugin.c
index acd072605f28fd..319396d4359ff4 100644
--- a/collectors/xenstat.plugin/xenstat_plugin.c
+++ b/collectors/xenstat.plugin/xenstat_plugin.c
@@ -920,20 +920,14 @@ static void xenstat_send_domain_metrics() {
}
int main(int argc, char **argv) {
- stderror = stderr;
clocks_init();
// ------------------------------------------------------------------------
// initialization of netdata plugin
- program_name = "xenstat.plugin";
+ program_name = PLUGIN_XENSTAT_NAME;
- // disable syslog
- error_log_syslog = 0;
-
- // set errors flood protection to 100 logs per hour
- error_log_errors_per_period = 100;
- error_log_throttle_period = 3600;
+ nd_log_initialize_for_external_plugins(PLUGIN_XENSTAT_NAME);
// ------------------------------------------------------------------------
// parse command line parameters
diff --git a/configure.ac b/configure.ac
index ac0d7bff6f138e..8f1e4115ce029d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -54,6 +54,8 @@ else
AC_CHECK_TOOL([AR], [ar])
fi
+CFLAGS="$CFLAGS -fexceptions"
+
# -----------------------------------------------------------------------------
# configurable options
@@ -75,6 +77,18 @@ AC_ARG_ENABLE(
,
[enable_plugin_systemd_journal="detect"]
)
+AC_ARG_ENABLE(
+ [logsmanagement],
+ [AS_HELP_STRING([--disable-logsmanagement], [Disable logsmanagement @<:@default autodetect@:>@])],
+ ,
+ [enable_logsmanagement="detect"]
+)
+AC_ARG_ENABLE(
+ [logsmanagement_tests],
+ [AS_HELP_STRING([--enable-logsmanagement-tests], [Enable logsmanagement tests @<:@default disabled@:>@])],
+ ,
+ [enable_logsmanagement_tests="no"]
+)
AC_ARG_ENABLE(
[plugin-cups],
[AS_HELP_STRING([--enable-plugin-cups], [enable cups plugin @<:@default autodetect@:>@])],
@@ -207,6 +221,12 @@ AC_ARG_ENABLE(
,
[enable_ml="detect"]
)
+AC_ARG_ENABLE(
+ [gtests],
+ [AS_HELP_STRING([--enable-gtests], [Enable google tests @<:@default no@:>@])],
+ ,
+ [enable_gtests="no"]
+)
AC_ARG_ENABLE(
[aclk_ssl_debug],
[AS_HELP_STRING([--enable-aclk-ssl-debug], [Enables possibility for SSL key logging @<:@default no@:>@])],
@@ -555,16 +575,91 @@ OPTIONAL_UV_LIBS="${UV_LIBS}"
AC_CHECK_LIB(
[lz4],
- [LZ4_initStream],
+ [LZ4_createStream],
[LZ4_LIBS_FAST="-llz4"]
)
AC_CHECK_LIB(
[lz4],
- [LZ4_compress_default],
+ [LZ4_compress_fast_continue],
[LZ4_LIBS="-llz4"]
)
+# -----------------------------------------------------------------------------
+# libcurl
+
+PKG_CHECK_MODULES(
+ [LIBCURL],
+ [libcurl],
+ [AC_CHECK_LIB(
+ [curl],
+ [curl_easy_init],
+ [have_libcurl=yes],
+ [have_libcurl=no]
+ )],
+ [have_libcurl=no]
+)
+
+if test "x$have_libcurl" = "xyes"; then
+ AC_DEFINE([HAVE_CURL], [1], [libcurl usability])
+ OPTIONAL_CURL_LIBS="-lcurl"
+fi
+
+# -----------------------------------------------------------------------------
+# PCRE2
+
+PKG_CHECK_MODULES(
+ [LIBPCRE2],
+ [libpcre2-8],
+ [AC_CHECK_LIB(
+ [pcre2-8],
+ [pcre2_compile_8],
+ [have_libpcre2=yes],
+ [have_libpcre2=no]
+ )],
+ [have_libpcre2=no]
+)
+
+if test "x$have_libpcre2" = "xyes"; then
+ AC_DEFINE([HAVE_PCRE2], [1], [PCRE2 usability])
+ OPTIONAL_PCRE2_LIBS="-lpcre2-8"
+fi
+
+AM_CONDITIONAL([ENABLE_LOG2JOURNAL], [test "${have_libpcre2}" = "yes"])
+
+# -----------------------------------------------------------------------------
+# zstd
+
+AC_CHECK_LIB([zstd], [ZSTD_createCStream, ZSTD_createDStream],
+ [LIBZSTD_FOUND=yes],
+ [LIBZSTD_FOUND=no])
+
+if test "x$LIBZSTD_FOUND" = "xyes"; then
+ AC_DEFINE([ENABLE_ZSTD], [1], [libzstd usability])
+ OPTIONAL_ZSTD_LIBS="-lzstd"
+fi
+
+# -----------------------------------------------------------------------------
+# brotli
+
+AC_CHECK_LIB([brotlienc], [BrotliEncoderCreateInstance, BrotliEncoderCompressStream],
+ [LIBBROTLIENC_FOUND=yes],
+ [LIBBROTLIENC_FOUND=no])
+
+if test "x$LIBBROTLIENC_FOUND" = "xyes"; then
+ AC_DEFINE([ENABLE_BROTLIENC], [1], [libbrotlienc usability])
+ OPTIONAL_BROTLIENC_LIBS="-lbrotlienc"
+fi
+
+AC_CHECK_LIB([brotlidec], [BrotliDecoderCreateInstance, BrotliDecoderDecompressStream],
+ [LIBBROTLIDEC_FOUND=yes],
+ [LIBBROTLIDEC_FOUND=no])
+
+if test "x$LIBBROTLIDEC_FOUND" = "xyes"; then
+ AC_DEFINE([ENABLE_BROTLIDEC], [1], [libbrotlidec usability])
+ OPTIONAL_BROTLIDEC_LIBS="-lbrotlidec"
+fi
+
# -----------------------------------------------------------------------------
# zlib
@@ -650,10 +745,10 @@ AC_C_BIGENDIAN([],
[AC_MSG_ERROR([Could not find out system endiannnes])])
AC_CHECK_SIZEOF(void *)
-if test "$ac_cv_sizeof_void_p" = 8; then
+if test "$ac_cv_sizeof_void_p" = 8; then
AC_MSG_RESULT(Detected 64-bit Build Environment)
LIBJUDY_CFLAGS="$LIBJUDY_CFLAGS -DJU_64BIT"
-else
+else
AC_MSG_RESULT(Detected 32-bit Build Environment)
LIBJUDY_CFLAGS="$LIBJUDY_CFLAGS -UJU_64BIT"
fi
@@ -702,7 +797,7 @@ if test "${enable_lz4}" != "no"; then
AC_TRY_LINK(
[ #include ],
[
- LZ4_stream_t* stream = LZ4_initStream(NULL, 0);
+ LZ4_stream_t* stream = LZ4_createStream();
],
[ enable_lz4="yes"],
[ enable_lz4="no" ]
@@ -769,6 +864,26 @@ fi
AC_MSG_RESULT([${enable_jsonc}])
AM_CONDITIONAL([ENABLE_JSONC], [test "${enable_jsonc}" = "yes"])
+# -----------------------------------------------------------------------------
+# libyaml
+
+PKG_CHECK_MODULES(
+ [LIBYAML],
+ [yaml-0.1],
+ [AC_CHECK_LIB(
+ [yaml],
+ [yaml_parser_initialize],
+ [have_libyaml=yes],
+ [have_libyaml=no]
+ )],
+ [have_libyaml=no]
+)
+
+if test "x$have_libyaml" = "xyes"; then
+ AC_DEFINE([HAVE_LIBYAML], [1], [libyaml usability])
+ OPTIONAL_YAML_LIBS="-lyaml"
+fi
+
# -----------------------------------------------------------------------------
# YAML
@@ -835,6 +950,27 @@ if test "${enable_pedantic}" = "yes"; then
CFLAGS="${CFLAGS} -pedantic -Wall -Wextra -Wno-long-long"
fi
+# -----------------------------------------------------------------------------
+# dlsym check
+
+AC_MSG_CHECKING(whether we can use dlsym)
+OLD_LIBS="${LIBS}"
+LIBS="-ldl"
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+ #include
+ static void *(*libc_malloc)(size_t);
+ int main() {
+ libc_malloc = dlsym(RTLD_NEXT, "malloc");
+ }
+]])], CAN_USE_DLSYM=yes, CAN_USE_DLSYM=no)
+LIBS="${OLD_LIBS}"
+AC_MSG_RESULT($CAN_USE_DLSYM)
+
+if test "x$CAN_USE_DLSYM" = xyes; then
+ AC_DEFINE([HAVE_DLSYM], [1], [dlsym usability])
+ OPTIONAL_DL_LIBS="-ldl"
+fi
+AC_SUBST([OPTIONAL_DL_LIBS])
# -----------------------------------------------------------------------------
# memory allocation library
@@ -903,9 +1039,6 @@ if test "${enable_h2o}" != "no"; then
else
can_build_h2o="no"
fi
- if test "${with_zlib}" != "yes"; then
- can_build_h2o="no"
- fi
AC_MSG_RESULT([${can_build_h2o}])
if test "${can_build_h2o}" = "no" -a "${enable_h2o}" = "yes"; then
@@ -1063,7 +1196,6 @@ fi
AC_MSG_RESULT([${enable_plugin_apps}])
AM_CONDITIONAL([ENABLE_PLUGIN_APPS], [test "${enable_plugin_apps}" = "yes"])
-
# -----------------------------------------------------------------------------
# freeipmi.plugin - libipmimonitoring
@@ -1141,8 +1273,44 @@ fi
AC_MSG_RESULT([${enable_plugin_systemd_journal}])
AM_CONDITIONAL([ENABLE_PLUGIN_SYSTEMD_JOURNAL], [test "${enable_plugin_systemd_journal}" = "yes"])
+AC_CHECK_LIB([systemd], [sd_journal_open_files_fd], [have_sd_journal_open_files_fd=yes], [have_sd_journal_open_files_fd=no])
+if test "${have_sd_journal_open_files_fd}" = "yes"; then
+ AC_DEFINE([HAVE_SD_JOURNAL_OPEN_FILES_FD], [1], [sd_journal_open_files_fd usability])
+fi
+
+AC_CHECK_LIB([systemd], [sd_journal_restart_fields], [have_sd_journal_restart_fields=yes], [have_sd_journal_restart_fields=no])
+if test "${have_sd_journal_restart_fields}" = "yes"; then
+ AC_DEFINE([HAVE_SD_JOURNAL_RESTART_FIELDS], [1], [sd_journal_restart_fields usability])
+fi
+
+AC_CHECK_LIB([systemd], [sd_journal_get_seqnum], [have_sd_journal_get_seqnum=yes], [have_sd_journal_get_seqnum=no])
+if test "${have_sd_journal_get_seqnum}" = "yes"; then
+ AC_DEFINE([HAVE_SD_JOURNAL_GET_SEQNUM], [1], [sd_journal_get_seqnum usability])
+fi
+
+AC_CHECK_LIB([systemd], [sd_bus_default_system, sd_bus_call_method, sd_bus_message_enter_container, sd_bus_message_read, sd_bus_message_exit_container],
+ [SYSTEMD_DBUS_FOUND=yes],
+ [SYSTEMD_DBUS_FOUND=no])
+
+if test "x$SYSTEMD_DBUS_FOUND" = "xyes"; then
+ AC_DEFINE([ENABLE_SYSTEMD_DBUS], [1], [libsystemd dbus usability])
+fi
+
AC_MSG_NOTICE([OPTIONAL_SYSTEMD_LIBS is set to: ${OPTIONAL_SYSTEMD_LIBS}])
+if test "${enable_plugin_systemd_journal}" = "yes"; then
+ AC_MSG_CHECKING([for SD_JOURNAL_OS_ROOT in systemd])
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM(
+ [[#include ]],
+ [[int x = SD_JOURNAL_OS_ROOT;]]
+ )],
+ [AC_DEFINE(HAVE_SD_JOURNAL_OS_ROOT, 1, [Define if SD_JOURNAL_OS_ROOT is available])
+ AC_MSG_RESULT(yes)],
+ [AC_MSG_RESULT(no)]
+ )
+fi
+
LIBS="${LIBS_BAK}"
# -----------------------------------------------------------------------------
@@ -1371,17 +1539,17 @@ AM_CONDITIONAL([ENABLE_PLUGIN_PERF], [test "${enable_plugin_perf}" = "yes"])
# -----------------------------------------------------------------------------
# gtest/gmock
-AC_MSG_CHECKING([if gtest and gmock can be found])
+if test "${enable_gtests}" = "yes"; then
+ AC_MSG_CHECKING([if gtest can be found])
-PKG_CHECK_MODULES([GTEST], [gtest], [have_gtest=yes], [have_gtest=no])
-PKG_CHECK_MODULES([GMOCK], [gmock], [have_gmock=yes], [have_gmock=no])
+ PKG_CHECK_MODULES([GTEST], [gtest], [have_gtest=yes], [have_gtest=no])
-if test "${have_gtest}" = "yes" -a "${have_gmock}" = "yes"; then
- OPTIONAL_GTEST_CFLAGS="${GTEST_CFLAGS} ${GMOCK_CFLAGS}"
- OPTIONAL_GTEST_LIBS="${GTEST_LIBS} ${GMOCK_LIBS}"
- have_gtest="yes"
-else
- have_gtest="no"
+ if test "${have_gtest}" = "yes"; then
+ OPTIONAL_GTEST_CFLAGS="${GTEST_CFLAGS}"
+ OPTIONAL_GTEST_LIBS="${GTEST_LIBS}"
+
+ AC_DEFINE([HAVE_GTEST], [1], [gtest availability])
+ fi
fi
# -----------------------------------------------------------------------------
@@ -1426,6 +1594,56 @@ if test "${build_ml}" = "yes"; then
fi
+# -----------------------------------------------------------------------------
+# logsmanagement
+
+LIBS_BAK="${LIBS}"
+
+# Check if submodules have not been fetched. Fail if Logs Management was explicitly requested.
+
+AC_MSG_CHECKING([if git submodules are present for logs management functionality])
+if test -f "fluent-bit/CMakeLists.txt"; then
+ AC_MSG_RESULT([yes])
+ have_logsmanagement_submodules="yes"
+else
+ AC_MSG_RESULT([no])
+ have_logsmanagement_submodules="no"
+fi
+
+if test "${enable_logsmanagement}" != "no" -a "${have_logsmanagement_submodules}" = "no"; then
+ AC_MSG_WARN([Logs management cannot be built because the required git submodules are missing.])
+fi
+
+if test "${enable_logsmanagement}" != "no" -a "x$CAN_USE_DLSYM" = xno; then
+ AC_MSG_WARN([Logs management cannot be built because dlsym cannot be used.])
+fi
+
+# Decide if we should build Logs Management
+if test "${enable_logsmanagement}" != "no" -a "${have_logsmanagement_submodules}" = "yes" -a "x$CAN_USE_DLSYM" = xyes; then
+ build_logsmanagement="yes"
+else
+ build_logsmanagement="no"
+fi
+
+AM_CONDITIONAL([ENABLE_LOGSMANAGEMENT], [test "${build_logsmanagement}" = "yes"])
+if test "${build_logsmanagement}" = "yes"; then
+ AC_DEFINE([ENABLE_LOGSMANAGEMENT], [1], [enable logs management functionality])
+fi
+
+# Decide if we should build Logs Management tests.
+if test "${build_logsmanagement}" = "yes" -a "${enable_logsmanagement_tests}" = "yes"; then
+ build_logsmanagement_tests="yes"
+else
+ build_logsmanagement_tests="no"
+fi
+
+AM_CONDITIONAL([ENABLE_LOGSMANAGEMENT_TESTS], [test "${build_logsmanagement_tests}" = "yes"])
+if test "${build_logsmanagement_tests}" = "yes"; then
+ AC_DEFINE([ENABLE_LOGSMANAGEMENT_TESTS], [1], [logs management tests])
+fi
+
+LIBS="${LIBS_BAK}"
+
# -----------------------------------------------------------------------------
# debugfs.plugin
@@ -1523,18 +1741,6 @@ PKG_CHECK_MODULES(
[have_libssl=no]
)
-PKG_CHECK_MODULES(
- [LIBCURL],
- [libcurl],
- [AC_CHECK_LIB(
- [curl],
- [curl_easy_init],
- [have_libcurl=yes],
- [have_libcurl=no]
- )],
- [have_libcurl=no]
-)
-
PKG_CHECK_MODULES(
[AWS_CPP_SDK_CORE],
[aws-cpp-sdk-core],
@@ -1803,27 +2009,6 @@ AC_LANG_POP([C++])
# -----------------------------------------------------------------------------
-AC_MSG_CHECKING(whether we can use dlsym)
-OLD_LIBS="${LIBS}"
-LIBS="-ldl"
-AC_LINK_IFELSE([AC_LANG_SOURCE([[
- #include
- static void *(*libc_malloc)(size_t);
- int main() {
- libc_malloc = dlsym(RTLD_NEXT, "malloc");
- }
-]])], CAN_USE_DLSYM=yes, CAN_USE_DLSYM=no)
-LIBS="${OLD_LIBS}"
-AC_MSG_RESULT($CAN_USE_DLSYM)
-
-if test "x$CAN_USE_DLSYM" = xyes; then
- AC_DEFINE([HAVE_DLSYM], [1], [dlsym usability])
- OPTIONAL_DL_LIBS="-ldl"
-fi
-AC_SUBST([OPTIONAL_DL_LIBS])
-
-# -----------------------------------------------------------------------------
-
AC_DEFINE_UNQUOTED([NETDATA_USER], ["${with_user}"], [use this user to drop privileged])
@@ -1877,6 +2062,12 @@ AC_SUBST([OPTIONAL_MATH_LIBS])
AC_SUBST([OPTIONAL_DATACHANNEL_LIBS])
AC_SUBST([OPTIONAL_UV_LIBS])
AC_SUBST([OPTIONAL_LZ4_LIBS])
+AC_SUBST([OPTIONAL_BROTLIENC_LIBS])
+AC_SUBST([OPTIONAL_BROTLIDEC_LIBS])
+AC_SUBST([OPTIONAL_YAML_LIBS])
+AC_SUBST([OPTIONAL_CURL_LIBS])
+AC_SUBST([OPTIONAL_PCRE2_LIBS])
+AC_SUBST([OPTIONAL_ZSTD_LIBS])
AC_SUBST([OPTIONAL_SSL_LIBS])
AC_SUBST([OPTIONAL_JSONC_LIBS])
AC_SUBST([OPTIONAL_YAML_LIBS])
@@ -1965,6 +2156,7 @@ AC_CONFIG_FILES([
collectors/freebsd.plugin/Makefile
collectors/freeipmi.plugin/Makefile
collectors/cups.plugin/Makefile
+ collectors/log2journal/Makefile
collectors/idlejitter.plugin/Makefile
collectors/macos.plugin/Makefile
collectors/nfacct.plugin/Makefile
@@ -2003,14 +2195,18 @@ AC_CONFIG_FILES([
libnetdata/aral/Makefile
libnetdata/avl/Makefile
libnetdata/buffer/Makefile
+ libnetdata/buffered_reader/Makefile
libnetdata/clocks/Makefile
libnetdata/completion/Makefile
libnetdata/config/Makefile
+ libnetdata/datetime/Makefile
libnetdata/dictionary/Makefile
libnetdata/ebpf/Makefile
libnetdata/eval/Makefile
libnetdata/facets/Makefile
+ libnetdata/functions_evloop/Makefile
libnetdata/july/Makefile
+ libnetdata/line_splitter/Makefile
libnetdata/locks/Makefile
libnetdata/log/Makefile
libnetdata/onewayalloc/Makefile
@@ -2024,6 +2220,7 @@ AC_CONFIG_FILES([
libnetdata/storage_number/tests/Makefile
libnetdata/threads/Makefile
libnetdata/url/Makefile
+ libnetdata/uuid/Makefile
libnetdata/json/Makefile
libnetdata/health/Makefile
libnetdata/worker_utilization/Makefile
@@ -2034,6 +2231,7 @@ AC_CONFIG_FILES([
web/Makefile
web/api/Makefile
web/api/badges/Makefile
+ web/api/ilove/Makefile
web/api/exporters/Makefile
web/api/exporters/shell/Makefile
web/api/exporters/prometheus/Makefile
@@ -2064,6 +2262,7 @@ AC_CONFIG_FILES([
web/server/static/Makefile
claim/Makefile
spawn/Makefile
+ logsmanagement/Makefile
])
AC_OUTPUT
diff --git a/contrib/debian/control b/contrib/debian/control
index 4f819ac1800428..4163aa1df5cf1b 100644
--- a/contrib/debian/control
+++ b/contrib/debian/control
@@ -4,6 +4,7 @@ Build-Depends: debhelper (>= 9.20160709),
dpkg-dev (>= 1.13.19),
zlib1g-dev,
uuid-dev,
+ libcurl4-openssl-dev,
libelf-dev,
libuv1-dev,
liblz4-dev,
@@ -15,6 +16,7 @@ Build-Depends: debhelper (>= 9.20160709),
libipmimonitoring-dev,
libnetfilter-acct-dev,
libsnappy-dev,
+ libpcre2-dev,
libprotobuf-dev,
libprotoc-dev,
libsystemd-dev,
@@ -24,7 +26,9 @@ Build-Depends: debhelper (>= 9.20160709),
automake,
pkg-config,
curl,
- protobuf-compiler
+ protobuf-compiler,
+ bison,
+ flex
Section: net
Priority: optional
Maintainer: Netdata Builder
@@ -55,7 +59,8 @@ Conflicts: netdata-core,
netdata-web
Suggests: netdata-plugin-cups (= ${source:Version}),
netdata-plugin-freeipmi (= ${source:Version})
-Recommends: netdata-plugin-systemd-journal (= ${source:Version})
+Recommends: netdata-plugin-systemd-journal (= ${source:Version}),
+ netdata-plugin-logs-management (= ${source:Version})
Description: real-time charts for system monitoring
Netdata is a daemon that collects data in realtime (per second)
and presents a web site to view and analyze them. The presentation
@@ -201,3 +206,13 @@ Conflicts: netdata (<< ${source:Version})
Description: The systemd-journal collector for the Netdata Agent
This plugin allows the Netdata Agent to present logs from the systemd
journal on Netdata Cloud or the local Agent dashboard.
+
+Package: netdata-plugin-logs-management
+Architecture: any
+Depends: ${shlibs:Depends},
+ netdata (= ${source:Version})
+Pre-Depends: libcap2-bin, adduser
+Conflicts: netdata (<< ${source:Version})
+Description: The logs-management plugin for the Netdata Agent
+ This plugin allows the Netdata Agent to collect logs from the system
+ and parse them to extract metrics.
diff --git a/contrib/debian/netdata-plugin-logs-management.postinst b/contrib/debian/netdata-plugin-logs-management.postinst
new file mode 100644
index 00000000000000..0565b54e86b1ef
--- /dev/null
+++ b/contrib/debian/netdata-plugin-logs-management.postinst
@@ -0,0 +1,17 @@
+#!/bin/sh
+
+set -e
+
+case "$1" in
+ configure|reconfigure)
+ chown root:netdata /usr/libexec/netdata/plugins.d/logs-management.plugin
+ chmod 0750 /usr/libexec/netdata/plugins.d/logs-management.plugin
+ if ! setcap "cap_dac_read_search=eip cap_syslog=eip" /usr/libexec/netdata/plugins.d/logs-management.plugin; then
+ chmod -f 4750 /usr/libexec/netdata/plugins.d/logs-management.plugin
+ fi
+ ;;
+esac
+
+#DEBHELPER#
+
+exit 0
diff --git a/contrib/debian/netdata-plugin-logs-management.preinst b/contrib/debian/netdata-plugin-logs-management.preinst
new file mode 100644
index 00000000000000..fcabb415aaa039
--- /dev/null
+++ b/contrib/debian/netdata-plugin-logs-management.preinst
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+set -e
+
+case "$1" in
+ install)
+ if ! getent group netdata > /dev/null; then
+ addgroup --quiet --system netdata
+ fi
+ ;;
+esac
+
+#DEBHELPER#
diff --git a/contrib/debian/netdata-plugin-perf.postinst b/contrib/debian/netdata-plugin-perf.postinst
index 76905878ef87ff..aa4f0f8d882b0f 100644
--- a/contrib/debian/netdata-plugin-perf.postinst
+++ b/contrib/debian/netdata-plugin-perf.postinst
@@ -7,16 +7,10 @@ case "$1" in
chown root:netdata /usr/libexec/netdata/plugins.d/perf.plugin
chmod 0750 /usr/libexec/netdata/plugins.d/perf.plugin
- if capsh --supports=cap_perfmon 2>/dev/null; then
- setcap cap_perfmon+ep /usr/libexec/netdata/plugins.d/perf.plugin
- ret="$?"
- else
- setcap cap_sys_admin+ep /usr/libexec/netdata/plugins.d/perf.plugin
- ret="$?"
- fi
-
- if [ "${ret}" -ne 0 ]; then
- chmod -f 4750 /usr/libexec/netdata/plugins.d/perf.plugin
+ if ! setcap cap_perfmon+ep /usr/libexec/netdata/plugins.d/perf.plugin 2>/dev/null; then
+ if ! setcap cap_sys_admin+ep /usr/libexec/netdata/plugins.d/perf.plugin 2>/dev/null; then
+ chmod -f 4750 /usr/libexec/netdata/plugins.d/perf.plugin
+ fi
fi
;;
esac
diff --git a/contrib/debian/rules b/contrib/debian/rules
index c1dbb6f619499d..d0aa353943fd95 100755
--- a/contrib/debian/rules
+++ b/contrib/debian/rules
@@ -128,7 +128,17 @@ override_dh_install:
# Add systemd-journal plugin install rules
mkdir -p $(TOP)-plugin-systemd-journal/usr/libexec/netdata/plugins.d/
mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/systemd-journal.plugin \
- $(TOP)-plugin-systemd-journal/usr/libexec/netdata/plugins.d/systemd-journal.plugin; \
+ $(TOP)-plugin-systemd-journal/usr/libexec/netdata/plugins.d/systemd-journal.plugin
+
+ # Add logs-management plugin install rules
+ mkdir -p $(TOP)-plugin-logs-management/usr/libexec/netdata/plugins.d/
+ mv -f $(TEMPTOP)/usr/libexec/netdata/plugins.d/logs-management.plugin \
+ $(TOP)-plugin-logs-management/usr/libexec/netdata/plugins.d/logs-management.plugin
+ mkdir -p $(TOP)-plugin-logs-management/usr/lib/netdata/conf.d/
+ mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/logsmanagement.d.conf \
+ $(TOP)-plugin-logs-management/usr/lib/netdata/conf.d/logsmanagement.d.conf
+ mv -f $(TEMPTOP)/usr/lib/netdata/conf.d/logsmanagement.d/ \
+ $(TOP)-plugin-logs-management/usr/lib/netdata/conf.d/logsmanagement.d/
# Set the rest of the software in the main package
#
@@ -221,6 +231,9 @@ override_dh_fixperms:
# systemd-journal
chmod 4750 $(TOP)-plugin-systemd-journal/usr/libexec/netdata/plugins.d/systemd-journal.plugin
+ # systemd-journal
+ chmod 4750 $(TOP)-plugin-logs-management/usr/libexec/netdata/plugins.d/logs-management.plugin
+
override_dh_installlogrotate:
cp system/logrotate/netdata debian/netdata.logrotate
dh_installlogrotate
diff --git a/coverity-scan.sh b/coverity-scan.sh
index 2050d13cdb40a2..8466b21a7f9cac 100755
--- a/coverity-scan.sh
+++ b/coverity-scan.sh
@@ -40,7 +40,7 @@ set -e
INSTALL_DIR="/opt"
# the version of coverity to use
-COVERITY_BUILD_VERSION="${COVERITY_BUILD_VERSION:-cov-analysis-linux64-2022.12.2}"
+COVERITY_BUILD_VERSION="${COVERITY_BUILD_VERSION:-cov-analysis-linux64-2023.6.2}"
# TODO: For some reasons this does not fully load on Debian 10 (Haven't checked if it happens on other distros yet), it breaks
source packaging/installer/functions.sh || echo "Failed to fully load the functions library"
diff --git a/daemon/README.md b/daemon/README.md
index 3fb33e5c74335f..0707a406c132be 100644
--- a/daemon/README.md
+++ b/daemon/README.md
@@ -38,7 +38,7 @@ The command line options of the Netdata 1.10.0 version are the following:
Support : https://github.com/netdata/netdata/issues
License : https://github.com/netdata/netdata/blob/master/LICENSE.md
- Twitter : https://twitter.com/linuxnetdata
+ Twitter : https://twitter.com/netdatahq
LinkedIn : https://linkedin.com/company/netdata-cloud/
Facebook : https://facebook.com/linuxnetdata/
@@ -143,6 +143,8 @@ For most Netdata programs (including standard external plugins shipped by netdat
| `ERROR` | Something that might disable a part of netdata.
The log line includes `errno` (if it is not zero). |
| `FATAL` | Something prevented a program from running.
The log line includes `errno` (if it is not zero) and the program exited. |
+The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](https://github.com/netdata/netdata/tree/master/daemon/config#logs-section-options) option.
+
So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the
program continues to run.
diff --git a/daemon/analytics.c b/daemon/analytics.c
index 9323c8e8a08665..b026e34f8fa6f0 100644
--- a/daemon/analytics.c
+++ b/daemon/analytics.c
@@ -109,6 +109,7 @@ void analytics_free_data(void)
freez(analytics_data.netdata_config_use_private_registry);
freez(analytics_data.netdata_config_oom_score);
freez(analytics_data.netdata_prebuilt_distro);
+ freez(analytics_data.netdata_fail_reason);
}
/*
@@ -127,7 +128,7 @@ void analytics_set_data(char **name, char *value)
/*
* Set a string data with a value
*/
-void analytics_set_data_str(char **name, char *value)
+void analytics_set_data_str(char **name, const char *value)
{
size_t value_string_len;
if (*name) {
@@ -148,7 +149,7 @@ void analytics_log_prometheus(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.prometheus_hits < ANALYTICS_MAX_PROMETHEUS_HITS)) {
analytics_data.prometheus_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.prometheus_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b);
}
}
@@ -161,7 +162,7 @@ void analytics_log_shell(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.shell_hits < ANALYTICS_MAX_SHELL_HITS)) {
analytics_data.shell_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.shell_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b);
}
}
@@ -174,7 +175,7 @@ void analytics_log_json(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.json_hits < ANALYTICS_MAX_JSON_HITS)) {
analytics_data.json_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.json_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b);
}
}
@@ -187,7 +188,7 @@ void analytics_log_dashboard(void)
if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.dashboard_hits < ANALYTICS_MAX_DASHBOARD_HITS)) {
analytics_data.dashboard_hits++;
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.dashboard_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits);
analytics_set_data(&analytics_data.netdata_dashboard_used, b);
}
}
@@ -197,7 +198,7 @@ void analytics_log_dashboard(void)
*/
void analytics_report_oom_score(long long int score){
char b[21];
- snprintfz(b, 20, "%lld", score);
+ snprintfz(b, sizeof(b) - 1, "%lld", score);
analytics_set_data(&analytics_data.netdata_config_oom_score, b);
}
@@ -221,11 +222,11 @@ void analytics_mirrored_hosts(void)
}
rrd_unlock();
- snprintfz(b, 20, "%zu", count);
+ snprintfz(b, sizeof(b) - 1, "%zu", count);
analytics_set_data(&analytics_data.netdata_mirrored_host_count, b);
- snprintfz(b, 20, "%zu", reachable);
+ snprintfz(b, sizeof(b) - 1, "%zu", reachable);
analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, b);
- snprintfz(b, 20, "%zu", unreachable);
+ snprintfz(b, sizeof(b) - 1, "%zu", unreachable);
analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, b);
}
@@ -280,7 +281,7 @@ void analytics_collectors(void)
.plugin = rrdset_plugin_name(st),
.module = rrdset_module_name(st)
};
- snprintfz(name, 499, "%s:%s", col.plugin, col.module);
+ snprintfz(name, sizeof(name) - 1, "%s:%s", col.plugin, col.module);
dictionary_set(dict, name, &col, sizeof(struct collector));
}
rrdset_foreach_done(st);
@@ -296,7 +297,7 @@ void analytics_collectors(void)
{
char b[21];
- snprintfz(b, 20, "%d", ap.c);
+ snprintfz(b, sizeof(b) - 1, "%d", ap.c);
analytics_set_data(&analytics_data.netdata_collectors_count, b);
}
@@ -401,7 +402,7 @@ void analytics_charts(void)
analytics_data.charts_count = c;
{
char b[21];
- snprintfz(b, 20, "%zu", c);
+ snprintfz(b, sizeof(b) - 1, "%zu", c);
analytics_set_data(&analytics_data.netdata_charts_count, b);
}
}
@@ -426,7 +427,7 @@ void analytics_metrics(void)
analytics_data.metrics_count = dimensions;
{
char b[21];
- snprintfz(b, 20, "%zu", dimensions);
+ snprintfz(b, sizeof(b) - 1, "%zu", dimensions);
analytics_set_data(&analytics_data.netdata_metrics_count, b);
}
}
@@ -453,11 +454,11 @@ void analytics_alarms(void)
}
foreach_rrdcalc_in_rrdhost_done(rc);
- snprintfz(b, 20, "%zu", alarm_normal);
+ snprintfz(b, sizeof(b) - 1, "%zu", alarm_normal);
analytics_set_data(&analytics_data.netdata_alarms_normal, b);
- snprintfz(b, 20, "%zu", alarm_warn);
+ snprintfz(b, sizeof(b) - 1, "%zu", alarm_warn);
analytics_set_data(&analytics_data.netdata_alarms_warning, b);
- snprintfz(b, 20, "%zu", alarm_crit);
+ snprintfz(b, sizeof(b) - 1, "%zu", alarm_crit);
analytics_set_data(&analytics_data.netdata_alarms_critical, b);
}
@@ -539,19 +540,19 @@ void analytics_gather_mutable_meta_data(void)
{
char b[21];
- snprintfz(b, 20, "%zu", analytics_data.prometheus_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b);
- snprintfz(b, 20, "%zu", analytics_data.shell_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b);
- snprintfz(b, 20, "%zu", analytics_data.json_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits);
analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b);
- snprintfz(b, 20, "%zu", analytics_data.dashboard_hits);
+ snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits);
analytics_set_data(&analytics_data.netdata_dashboard_used, b);
- snprintfz(b, 20, "%zu", rrdhost_hosts_available());
+ snprintfz(b, sizeof(b) - 1, "%zu", rrdhost_hosts_available());
analytics_set_data(&analytics_data.netdata_config_hosts_available, b);
}
}
@@ -663,10 +664,10 @@ void set_late_global_environment(struct rrdhost_system_info *system_info)
#ifdef ENABLE_DBENGINE
{
char b[16];
- snprintfz(b, 15, "%d", default_rrdeng_page_cache_mb);
+ snprintfz(b, sizeof(b) - 1, "%d", default_rrdeng_page_cache_mb);
analytics_set_data(&analytics_data.netdata_config_page_cache_size, b);
- snprintfz(b, 15, "%d", default_multidb_disk_quota_mb);
+ snprintfz(b, sizeof(b) - 1, "%d", default_multidb_disk_quota_mb);
analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, b);
}
#endif
@@ -823,11 +824,10 @@ void get_system_timezone(void)
}
}
-void set_global_environment()
-{
+void set_global_environment() {
{
char b[16];
- snprintfz(b, 15, "%d", default_rrd_update_every);
+ snprintfz(b, sizeof(b) - 1, "%d", default_rrd_update_every);
setenv("NETDATA_UPDATE_EVERY", b, 1);
}
@@ -842,7 +842,6 @@ void set_global_environment()
setenv("NETDATA_LIB_DIR", verify_or_create_required_directory(netdata_configured_varlib_dir), 1);
setenv("NETDATA_LOCK_DIR", verify_or_create_required_directory(netdata_configured_lock_dir), 1);
setenv("NETDATA_LOG_DIR", verify_or_create_required_directory(netdata_configured_log_dir), 1);
- setenv("HOME", verify_or_create_required_directory(netdata_configured_home_dir), 1);
setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1);
{
@@ -899,6 +898,7 @@ void set_global_environment()
analytics_set_data(&analytics_data.netdata_config_use_private_registry, "null");
analytics_set_data(&analytics_data.netdata_config_oom_score, "null");
analytics_set_data(&analytics_data.netdata_prebuilt_distro, "null");
+ analytics_set_data(&analytics_data.netdata_fail_reason, "null");
analytics_data.prometheus_hits = 0;
analytics_data.shell_hits = 0;
@@ -920,16 +920,14 @@ void set_global_environment()
freez(default_port);
// set the path we need
- char path[1024 + 1], *p = getenv("PATH");
- if (!p)
- p = "/bin:/usr/bin";
- snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin");
+ char path[4096], *p = getenv("PATH");
+ if (!p) p = "/bin:/usr/bin";
+ snprintfz(path, sizeof(path), "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin");
setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1);
// python options
p = getenv("PYTHONPATH");
- if (!p)
- p = "";
+ if (!p) p = "";
setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1);
// disable buffering for python plugins
@@ -939,41 +937,51 @@ void set_global_environment()
setenv("LC_ALL", "C", 1);
}
-void send_statistics(const char *action, const char *action_result, const char *action_data)
-{
+void send_statistics(const char *action, const char *action_result, const char *action_data) {
static char *as_script;
if (netdata_anonymous_statistics_enabled == -1) {
char *optout_file = mallocz(
sizeof(char) *
(strlen(netdata_configured_user_config_dir) + strlen(".opt-out-from-anonymous-statistics") + 2));
+
sprintf(optout_file, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics");
+
if (likely(access(optout_file, R_OK) != 0)) {
as_script = mallocz(
sizeof(char) *
(strlen(netdata_configured_primary_plugins_dir) + strlen("anonymous-statistics.sh") + 2));
+
sprintf(as_script, "%s/%s", netdata_configured_primary_plugins_dir, "anonymous-statistics.sh");
+
if (unlikely(access(as_script, R_OK) != 0)) {
netdata_anonymous_statistics_enabled = 0;
- netdata_log_info("Anonymous statistics script %s not found.", as_script);
+
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "Statistics script '%s' not found.",
+ as_script);
+
freez(as_script);
- } else {
- netdata_anonymous_statistics_enabled = 1;
}
- } else {
+ else
+ netdata_anonymous_statistics_enabled = 1;
+ }
+ else {
netdata_anonymous_statistics_enabled = 0;
as_script = NULL;
}
+
freez(optout_file);
}
- if (!netdata_anonymous_statistics_enabled)
- return;
- if (!action)
+
+ if (!netdata_anonymous_statistics_enabled || !action)
return;
+
if (!action_result)
action_result = "";
if (!action_data)
action_data = "";
+
char *command_to_run = mallocz(
sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) +
analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15));
@@ -981,7 +989,7 @@ void send_statistics(const char *action, const char *action_result, const char *
sprintf(
command_to_run,
- "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ",
+ "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ",
as_script,
action,
action_result,
@@ -1024,9 +1032,12 @@ void send_statistics(const char *action, const char *action_result, const char *
analytics_data.netdata_config_is_private_registry,
analytics_data.netdata_config_use_private_registry,
analytics_data.netdata_config_oom_score,
- analytics_data.netdata_prebuilt_distro);
+ analytics_data.netdata_prebuilt_distro,
+ analytics_data.netdata_fail_reason);
- netdata_log_info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "%s '%s' '%s' '%s'",
+ as_script, action, action_result, action_data);
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input);
@@ -1035,11 +1046,21 @@ void send_statistics(const char *action, const char *action_result, const char *
char *s = fgets(buffer, 4, fp_child_output);
int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid);
if (exit_code)
- netdata_log_error("Execution of anonymous statistics script returned %d.", exit_code);
- if (s && strncmp(buffer, "200", 3))
- netdata_log_error("Execution of anonymous statistics script returned http code %s.", buffer);
- } else {
- netdata_log_error("Failed to run anonymous statistics script %s.", as_script);
+
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Statistics script returned error: %d",
+ exit_code);
+
+ if (s && strncmp(buffer, "200", 3) != 0)
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Statistics script returned http code: %s",
+ buffer);
+
}
+ else
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "Failed to run statistics script: %s.",
+ as_script);
+
freez(command_to_run);
}
diff --git a/daemon/analytics.h b/daemon/analytics.h
index 34418316fcf776..0a5cc458d75208 100644
--- a/daemon/analytics.h
+++ b/daemon/analytics.h
@@ -18,7 +18,7 @@
#define ANALYTICS_MAX_DASHBOARD_HITS 255
/* Needed to calculate the space needed for parameters */
-#define ANALYTICS_NO_OF_ITEMS 39
+#define ANALYTICS_NO_OF_ITEMS 40
struct analytics_data {
char *netdata_config_stream_enabled;
@@ -60,6 +60,7 @@ struct analytics_data {
char *netdata_config_use_private_registry;
char *netdata_config_oom_score;
char *netdata_prebuilt_distro;
+ char *netdata_fail_reason;
size_t data_length;
diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in
index 6b27dfea42204c..d12e7e32ae3b1f 100755
--- a/daemon/anonymous-statistics.sh.in
+++ b/daemon/anonymous-statistics.sh.in
@@ -68,6 +68,7 @@ NETDATA_IS_PRIVATE_REGISTRY="${39}"
NETDATA_USE_PRIVATE_REGISTRY="${40}"
NETDATA_CONFIG_OOM_SCORE="${41}"
NETDATA_PREBUILT_DISTRO="${42}"
+NETDATA_FAIL_REASON="${43}"
[ -z "$NETDATA_REGISTRY_UNIQUE_ID" ] && NETDATA_REGISTRY_UNIQUE_ID="00000000-0000-0000-0000-000000000000"
@@ -175,7 +176,8 @@ REQ_BODY="$(cat << EOF
"mirrored_host_count": ${NETDATA_MIRRORED_HOST_COUNT},
"mirrored_hosts_reachable": ${NETDATA_MIRRORED_HOSTS_REACHABLE},
"mirrored_hosts_unreachable": ${NETDATA_MIRRORED_HOSTS_UNREACHABLE},
- "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS}
+ "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS},
+ "netdata_fail_reason": ${NETDATA_FAIL_REASON}
}
}
EOF
diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c
index 4bc1e72a4e9f87..41af56af89dbb6 100644
--- a/daemon/buildinfo.c
+++ b/daemon/buildinfo.c
@@ -48,6 +48,7 @@ typedef enum __attribute__((packed)) {
BIB_FEATURE_CLOUD,
BIB_FEATURE_HEALTH,
BIB_FEATURE_STREAMING,
+ BIB_FEATURE_BACKFILLING,
BIB_FEATURE_REPLICATION,
BIB_FEATURE_STREAMING_COMPRESSION,
BIB_FEATURE_CONTEXTS,
@@ -66,6 +67,7 @@ typedef enum __attribute__((packed)) {
BIB_CONNECTIVITY_NATIVE_HTTPS,
BIB_CONNECTIVITY_TLS_HOST_VERIFY,
BIB_LIB_LZ4,
+ BIB_LIB_ZSTD,
BIB_LIB_ZLIB,
BIB_LIB_JUDY,
BIB_LIB_DLIB,
@@ -99,6 +101,7 @@ typedef enum __attribute__((packed)) {
BIB_PLUGIN_SLABINFO,
BIB_PLUGIN_XEN,
BIB_PLUGIN_XEN_VBD_ERROR,
+ BIB_PLUGIN_LOGS_MANAGEMENT,
BIB_EXPORT_AWS_KINESIS,
BIB_EXPORT_GCP_PUBSUB,
BIB_EXPORT_MONGOC,
@@ -340,7 +343,7 @@ static struct {
.json = "cpu_frequency",
.value = "unknown",
},
- [BIB_HW_RAM_SIZE] = {
+ [BIB_HW_ARCHITECTURE] = {
.category = BIC_HARDWARE,
.type = BIT_STRING,
.analytics = NULL,
@@ -348,7 +351,7 @@ static struct {
.json = "cpu_architecture",
.value = "unknown",
},
- [BIB_HW_DISK_SPACE] = {
+ [BIB_HW_RAM_SIZE] = {
.category = BIC_HARDWARE,
.type = BIT_STRING,
.analytics = NULL,
@@ -356,7 +359,7 @@ static struct {
.json = "ram",
.value = "unknown",
},
- [BIB_HW_ARCHITECTURE] = {
+ [BIB_HW_DISK_SPACE] = {
.category = BIC_HARDWARE,
.type = BIT_STRING,
.analytics = NULL,
@@ -484,6 +487,14 @@ static struct {
.json = "streaming",
.value = NULL,
},
+ [BIB_FEATURE_BACKFILLING] = {
+ .category = BIC_FEATURE,
+ .type = BIT_BOOLEAN,
+ .analytics = NULL,
+ .print = "Back-filling (of higher database tiers)",
+ .json = "back-filling",
+ .value = NULL,
+ },
[BIB_FEATURE_REPLICATION] = {
.category = BIC_FEATURE,
.type = BIT_BOOLEAN,
@@ -498,7 +509,7 @@ static struct {
.analytics = "Stream Compression",
.print = "Streaming and Replication Compression",
.json = "stream-compression",
- .value = "none",
+ .value = NULL,
},
[BIB_FEATURE_CONTEXTS] = {
.category = BIC_FEATURE,
@@ -628,6 +639,14 @@ static struct {
.json = "lz4",
.value = NULL,
},
+ [BIB_LIB_ZSTD] = {
+ .category = BIC_LIBS,
+ .type = BIT_BOOLEAN,
+ .analytics = NULL,
+ .print = "ZSTD (fast, lossless compression algorithm)",
+ .json = "zstd",
+ .value = NULL,
+ },
[BIB_LIB_ZLIB] = {
.category = BIC_LIBS,
.type = BIT_BOOLEAN,
@@ -893,6 +912,14 @@ static struct {
.json = "xen-vbd-error",
.value = NULL,
},
+ [BIB_PLUGIN_LOGS_MANAGEMENT] = {
+ .category = BIC_PLUGINS,
+ .type = BIT_BOOLEAN,
+ .analytics = "Logs Management",
+ .print = "Logs Management",
+ .json = "logs-management",
+ .value = NULL,
+ },
[BIB_EXPORT_MONGOC] = {
.category = BIC_EXPORTERS,
.type = BIT_BOOLEAN,
@@ -1029,6 +1056,23 @@ static void build_info_set_value(BUILD_INFO_SLOT slot, const char *value) {
BUILD_INFO[slot].value = value;
}
+static void build_info_append_value(BUILD_INFO_SLOT slot, const char *value) {
+ size_t size = BUILD_INFO[slot].value ? strlen(BUILD_INFO[slot].value) + 1 : 0;
+ size += strlen(value);
+ char buf[size + 1];
+
+ if(BUILD_INFO[slot].value) {
+ strcpy(buf, BUILD_INFO[slot].value);
+ strcat(buf, " ");
+ strcat(buf, value);
+ }
+ else
+ strcpy(buf, value);
+
+ freez((void *)BUILD_INFO[slot].value);
+ BUILD_INFO[slot].value = strdupz(buf);
+}
+
static void build_info_set_value_strdupz(BUILD_INFO_SLOT slot, const char *value) {
if(!value) value = "";
build_info_set_value(slot, strdupz(value));
@@ -1075,14 +1119,21 @@ __attribute__((constructor)) void initialize_build_info(void) {
build_info_set_status(BIB_FEATURE_HEALTH, true);
build_info_set_status(BIB_FEATURE_STREAMING, true);
+ build_info_set_status(BIB_FEATURE_BACKFILLING, true);
build_info_set_status(BIB_FEATURE_REPLICATION, true);
-#ifdef ENABLE_RRDPUSH_COMPRESSION
build_info_set_status(BIB_FEATURE_STREAMING_COMPRESSION, true);
-#ifdef ENABLE_LZ4
- build_info_set_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4");
+
+#ifdef ENABLE_BROTLI
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli");
#endif
+#ifdef ENABLE_ZSTD
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "zstd");
#endif
+#ifdef ENABLE_LZ4
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4");
+#endif
+ build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "gzip");
build_info_set_status(BIB_FEATURE_CONTEXTS, true);
build_info_set_status(BIB_FEATURE_TIERING, true);
@@ -1117,6 +1168,9 @@ __attribute__((constructor)) void initialize_build_info(void) {
#ifdef ENABLE_LZ4
build_info_set_status(BIB_LIB_LZ4, true);
#endif
+#ifdef ENABLE_ZSTD
+ build_info_set_status(BIB_LIB_ZSTD, true);
+#endif
build_info_set_status(BIB_LIB_ZLIB, true);
@@ -1198,6 +1252,9 @@ __attribute__((constructor)) void initialize_build_info(void) {
#ifdef HAVE_XENSTAT_VBD_ERROR
build_info_set_status(BIB_PLUGIN_XEN_VBD_ERROR, true);
#endif
+#ifdef ENABLE_LOGSMANAGEMENT
+ build_info_set_status(BIB_PLUGIN_LOGS_MANAGEMENT, true);
+#endif
build_info_set_status(BIB_EXPORT_PROMETHEUS_EXPORTER, true);
build_info_set_status(BIB_EXPORT_GRAPHITE, true);
@@ -1234,7 +1291,7 @@ __attribute__((constructor)) void initialize_build_info(void) {
// ----------------------------------------------------------------------------
// system info
-int get_system_info(struct rrdhost_system_info *system_info, bool log);
+int get_system_info(struct rrdhost_system_info *system_info);
static void populate_system_info(void) {
static bool populated = false;
static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER;
@@ -1257,7 +1314,7 @@ static void populate_system_info(void) {
}
else {
system_info = callocz(1, sizeof(struct rrdhost_system_info));
- get_system_info(system_info, false);
+ get_system_info(system_info);
free_system_info = true;
}
diff --git a/daemon/commands.c b/daemon/commands.c
index a8afb5a0017bb7..ed544224ec91bf 100644
--- a/daemon/commands.c
+++ b/daemon/commands.c
@@ -142,10 +142,10 @@ static cmd_status_t cmd_reload_health_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("COMMAND: Reloading HEALTH configuration.");
health_reload();
- error_log_limit_reset();
+ nd_log_limits_reset();
return CMD_STATUS_SUCCESS;
}
@@ -155,11 +155,11 @@ static cmd_status_t cmd_save_database_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("COMMAND: Saving databases.");
rrdhost_save_all();
netdata_log_info("COMMAND: Databases saved.");
- error_log_limit_reset();
+ nd_log_limits_reset();
return CMD_STATUS_SUCCESS;
}
@@ -169,10 +169,9 @@ static cmd_status_t cmd_reopen_logs_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
- netdata_log_info("COMMAND: Reopening all log files.");
- reopen_all_log_files();
- error_log_limit_reset();
+ nd_log_limits_unlimited();
+ nd_log_reopen_log_files();
+ nd_log_limits_reset();
return CMD_STATUS_SUCCESS;
}
@@ -182,7 +181,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message)
(void)args;
(void)message;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("COMMAND: Cleaning up to exit.");
netdata_cleanup_and_exit(0);
exit(0);
diff --git a/daemon/common.h b/daemon/common.h
index 4a3905924c17dd..b1739879f7b8f6 100644
--- a/daemon/common.h
+++ b/daemon/common.h
@@ -28,6 +28,7 @@
#define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed)
+#define config_section_destroy(section) appconfig_section_destroy_non_loaded(&netdata_config, section)
#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name)
// ----------------------------------------------------------------------------
diff --git a/daemon/config/README.md b/daemon/config/README.md
index bc5a5885c10f18..11ba2a1bc7de1b 100644
--- a/daemon/config/README.md
+++ b/daemon/config/README.md
@@ -72,40 +72,40 @@ Please note that your data history will be lost if you have modified `history` p
### [global] section options
-| setting | default | info |
-|:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) |
-| OOM score | `0` | |
-| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
-| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
-| hostname | auto-detected | The hostname of the computer running Netdata. |
-| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
-| timezone | auto-detected | The timezone retrieved from the environment variable |
-| run as user | `netdata` | The user Netdata will run as. |
-| pthread stack size | auto-detected | |
+| setting | default | info |
+|:----------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) |
+| OOM score | `0` | |
+| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
+| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
+| hostname | auto-detected | The hostname of the computer running Netdata. |
+| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
+| timezone | auto-detected | The timezone retrieved from the environment variable |
+| run as user | `netdata` | The user Netdata will run as. |
+| pthread stack size | auto-detected | |
### [db] section options
-| setting | default | info |
-|:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| setting | default | info |
+|:---------------------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`.
`save`: Netdata will save its round robin database on exit and load it on startup.
`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`).
`ram`: The round-robin database will be temporary and it will be lost when Netdata exits.
`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM.
`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. |
-| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. |
-| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. |
-| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. |
-| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier.
`N belongs to [1..4]` ||
- | dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). |
-| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. |
-| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well.
`N belongs to [1..4]` |
-| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). |
-| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`.
`N belongs to [1..4]` |
-| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier.
`New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window).
`none`: No back filling is applied.
`N belongs to [1..4]` |
-| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) |
-| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions |
-| gap when lost iterations above | `1` | |
-| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. |
-| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions |
-| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. |
-| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. |
+| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. |
+| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. |
+| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. |
+| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier.
`N belongs to [1..4]` |
+| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). |
+| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. |
+| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well.
`N belongs to [1..4]` |
+| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). |
+| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`.
`N belongs to [1..4]` |
+| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier.
`New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window).
`none`: No back filling is applied.
`N belongs to [1..4]` |
+| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) |
+| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions |
+| gap when lost iterations above | `1` | |
+| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. |
+| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions |
+| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. |
+| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. |
> ### Info
>
@@ -113,32 +113,33 @@ Please note that your data history will be lost if you have modified `history` p
### [directories] section options
-| setting | default | info |
-|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| config | `/etc/netdata` | The directory configuration files are kept. |
-| stock config | `/usr/lib/netdata/conf.d` | |
-| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. |
-| web | `/usr/share/netdata/web` | The directory the web static files are kept. |
-| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. |
-| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. |
-| home | `/var/cache/netdata` | Contains the db files for the collected metrics. |
-| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. |
-| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. |
-| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations |
-| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector |
-| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent |
+| setting | default | info |
+|:-------------------:|:------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| config | `/etc/netdata` | The directory configuration files are kept. |
+| stock config | `/usr/lib/netdata/conf.d` | |
+| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. |
+| web | `/usr/share/netdata/web` | The directory the web static files are kept. |
+| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. |
+| lib | `/var/lib/netdata` | Contains the alert log and the Netdata instance GUID. |
+| home | `/var/cache/netdata` | Contains the db files for the collected metrics. |
+| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. |
+| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. |
+| health config | `/etc/netdata/health.d` | The directory containing the user alert configuration files, to override the stock configurations |
+| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alert configuration files for each collector |
+| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent |
### [logs] section options
-| setting | default | info |
-|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| setting | default | info |
+|:----------------------------------:|:-----------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). |
| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). |
-| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. |
-| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. |
-| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. |
-| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. |
-| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. |
+| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. |
+| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. |
+| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. |
+| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. |
+| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. |
+| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. |
### [environment variables] section options
@@ -163,20 +164,20 @@ Please note that your data history will be lost if you have modified `history` p
This section controls the general behavior of the health monitoring capabilities of Netdata.
-Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health
+Specific alerts are configured in per-collector config files under the `health.d` directory. For more info, see [health
monitoring](https://github.com/netdata/netdata/blob/master/health/README.md).
-[Alarm notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`.
+[Alert notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`.
-| setting | default | info |
-|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| enabled | `yes` | Set to `no` to disable all alarms and notifications |
-| in memory max health log entries | 1000 | Size of the alarm history held in RAM |
-| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). |
-| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. |
-| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. |
-| health log history | `432000` | Specifies the history of alarm events (in seconds) kept in the agent's sqlite database. |
-| enabled alarms | * | Defines which alarms to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alarm or template names. Can be used to disable specific alarms. For example, `enabled alarms = !oom_kill *` will load all alarms except `oom_kill`. |
+| setting | default | info |
+|:----------------------------------------------:|:------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| enabled | `yes` | Set to `no` to disable all alerts and notifications |
+| in memory max health log entries | 1000 | Size of the alert history held in RAM |
+| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alert notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). |
+| run at least every seconds | `10` | Controls how often all alert conditions should be evaluated. |
+| postpone alarms during hibernation for seconds | `60` | Prevents false alerts. May need to be increased if you get alerts during hibernation. |
+| health log history | `432000` | Specifies the history of alert events (in seconds) kept in the agent's sqlite database. |
+| enabled alarms | * | Defines which alerts to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alert or template names. Can be used to disable specific alerts. For example, `enabled alarms = !oom_kill *` will load all alerts except `oom_kill`. |
### [web] section options
@@ -222,10 +223,10 @@ for all internal Netdata plugins.
External plugins will have only 2 options at `netdata.conf`:
-| setting | default | info |
-|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------|
+| setting | default | info |
+|:---------------:|:--------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). |
-| command options | - | Additional command line options to pass to the plugin. | |
+| command options | - | Additional command line options to pass to the plugin. |
External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their
documentation.
diff --git a/daemon/daemon.c b/daemon/daemon.c
index c7f0b51c6d5608..c82cb29edabb64 100644
--- a/daemon/daemon.c
+++ b/daemon/daemon.c
@@ -31,22 +31,6 @@ void get_netdata_execution_path(void) {
dirname(netdata_exe_path);
}
-static void chown_open_file(int fd, uid_t uid, gid_t gid) {
- if(fd == -1) return;
-
- struct stat buf;
-
- if(fstat(fd, &buf) == -1) {
- netdata_log_error("Cannot fstat() fd %d", fd);
- return;
- }
-
- if((buf.st_uid != uid || buf.st_gid != gid) && S_ISREG(buf.st_mode)) {
- if(fchown(fd, uid, gid) == -1)
- netdata_log_error("Cannot fchown() fd %d.", fd);
- }
-}
-
static void fix_directory_file_permissions(const char *dirname, uid_t uid, gid_t gid, bool recursive)
{
char filename[FILENAME_MAX + 1];
@@ -124,9 +108,6 @@ int become_user(const char *username, int pid_fd) {
uid_t uid = pw->pw_uid;
gid_t gid = pw->pw_gid;
- if (am_i_root)
- netdata_log_info("I am root, so checking permissions");
-
prepare_required_directories(uid, gid);
if(pidfile[0]) {
@@ -150,9 +131,9 @@ int become_user(const char *username, int pid_fd) {
}
}
+ nd_log_chown_log_files(uid, gid);
chown_open_file(STDOUT_FILENO, uid, gid);
chown_open_file(STDERR_FILENO, uid, gid);
- chown_open_file(stdaccess_fd, uid, gid);
chown_open_file(pid_fd, uid, gid);
if(supplementary_groups && ngroups > 0) {
@@ -229,7 +210,7 @@ static void oom_score_adj(void) {
// check the environment
char *s = getenv("OOMScoreAdjust");
if(!s || !*s) {
- snprintfz(buf, 30, "%d", (int)wanted_score);
+ snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score);
s = buf;
}
@@ -264,7 +245,7 @@ static void oom_score_adj(void) {
int written = 0;
int fd = open("/proc/self/oom_score_adj", O_WRONLY);
if(fd != -1) {
- snprintfz(buf, 30, "%d", (int)wanted_score);
+ snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score);
ssize_t len = strlen(buf);
if(len > 0 && write(fd, buf, (size_t)len) == len) written = 1;
close(fd);
diff --git a/daemon/event_loop.c b/daemon/event_loop.c
index fb38791546d4b5..93bac97d0a6812 100644
--- a/daemon/event_loop.c
+++ b/daemon/event_loop.c
@@ -52,6 +52,7 @@ void register_libuv_worker_jobs() {
worker_register_job_name(UV_EVENT_HOST_CONTEXT_LOAD, "metadata load host context");
worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host");
worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup");
+ worker_register_job_name(UV_EVENT_METADATA_ML_LOAD, "metadata load ml models");
// netdatacli
worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command");
diff --git a/daemon/event_loop.h b/daemon/event_loop.h
index 1ff1c2c1cb816a..c1821c64617cc8 100644
--- a/daemon/event_loop.h
+++ b/daemon/event_loop.h
@@ -44,6 +44,7 @@ enum event_loop_job {
UV_EVENT_HOST_CONTEXT_LOAD,
UV_EVENT_METADATA_STORE,
UV_EVENT_METADATA_CLEANUP,
+ UV_EVENT_METADATA_ML_LOAD,
// netdatacli
UV_EVENT_SCHEDULE_CMD,
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index ce8d41402601a2..9fb1df5f8ecffe 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -65,6 +65,11 @@ static struct global_statistics {
uint64_t backfill_queries_made;
uint64_t backfill_db_points_read;
+ uint64_t tier0_hot_gorilla_buffers;
+
+ uint64_t tier0_disk_compressed_bytes;
+ uint64_t tier0_disk_uncompressed_bytes;
+
uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS];
} global_statistics = {
@@ -80,6 +85,10 @@ static struct global_statistics {
.api_data_queries_made = 0,
.api_data_db_points_read = 0,
.api_data_result_points_generated = 0,
+
+ .tier0_hot_gorilla_buffers = 0,
+ .tier0_disk_compressed_bytes = 0,
+ .tier0_disk_uncompressed_bytes = 0,
};
void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) {
@@ -108,6 +117,18 @@ void global_statistics_backfill_query_completed(size_t points_read) {
__atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED);
}
+void global_statistics_gorilla_buffer_add_hot() {
+ __atomic_fetch_add(&global_statistics.tier0_hot_gorilla_buffers, 1, __ATOMIC_RELAXED);
+}
+
+void global_statistics_tier0_disk_compressed_bytes(uint32_t size) {
+ __atomic_fetch_add(&global_statistics.tier0_disk_compressed_bytes, size, __ATOMIC_RELAXED);
+}
+
+void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size) {
+ __atomic_fetch_add(&global_statistics.tier0_disk_uncompressed_bytes, size, __ATOMIC_RELAXED);
+}
+
void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) {
switch(query_source) {
case QUERY_SOURCE_API_DATA:
@@ -210,6 +231,11 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t
gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED);
gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED);
+ gs->tier0_hot_gorilla_buffers = __atomic_load_n(&global_statistics.tier0_hot_gorilla_buffers, __ATOMIC_RELAXED);
+
+ gs->tier0_disk_compressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_compressed_bytes, __ATOMIC_RELAXED);
+ gs->tier0_disk_uncompressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_uncompressed_bytes, __ATOMIC_RELAXED);
+
for(size_t tier = 0; tier < storage_tiers ;tier++)
gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED);
@@ -816,7 +842,7 @@ static void global_statistics_charts(void) {
for(size_t tier = 0; tier < storage_tiers ;tier++) {
char buf[30 + 1];
- snprintfz(buf, 30, "tier%zu", tier);
+ snprintfz(buf, sizeof(buf) - 1, "tier%zu", tier);
rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
}
@@ -828,6 +854,72 @@ static void global_statistics_charts(void) {
}
ml_update_global_statistics_charts(gs.ml_models_consulted);
+
+ // ----------------------------------------------------------------
+
+#ifdef ENABLE_DBENGINE
+ if (tier_page_type[0] == PAGE_GORILLA_METRICS)
+ {
+ static RRDSET *st_tier0_gorilla_pages = NULL;
+ static RRDDIM *rd_num_gorilla_pages = NULL;
+
+ if (unlikely(!st_tier0_gorilla_pages)) {
+ st_tier0_gorilla_pages = rrdset_create_localhost(
+ "netdata"
+ , "tier0_gorilla_pages"
+ , NULL
+ , "tier0_gorilla_pages"
+ , NULL
+ , "Number of gorilla_pages"
+ , "count"
+ , "netdata"
+ , "stats"
+ , 131004
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_num_gorilla_pages = rrddim_add(st_tier0_gorilla_pages, "count", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st_tier0_gorilla_pages, rd_num_gorilla_pages, (collected_number)gs.tier0_hot_gorilla_buffers);
+
+ rrdset_done(st_tier0_gorilla_pages);
+ }
+
+ if (tier_page_type[0] == PAGE_GORILLA_METRICS)
+ {
+ static RRDSET *st_tier0_compression_info = NULL;
+
+ static RRDDIM *rd_compressed_bytes = NULL;
+ static RRDDIM *rd_uncompressed_bytes = NULL;
+
+ if (unlikely(!st_tier0_compression_info)) {
+ st_tier0_compression_info = rrdset_create_localhost(
+ "netdata"
+ , "tier0_compression_info"
+ , NULL
+ , "tier0_compression_info"
+ , NULL
+ , "Tier 0 compression info"
+ , "bytes"
+ , "netdata"
+ , "stats"
+ , 131005
+ , localhost->rrd_update_every
+ , RRDSET_TYPE_LINE
+ );
+
+ rd_compressed_bytes = rrddim_add(st_tier0_compression_info, "compressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_uncompressed_bytes = rrddim_add(st_tier0_compression_info, "uncompressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_tier0_compression_info, rd_compressed_bytes, (collected_number)gs.tier0_disk_compressed_bytes);
+ rrddim_set_by_pointer(st_tier0_compression_info, rd_uncompressed_bytes, (collected_number)gs.tier0_disk_uncompressed_bytes);
+
+ rrdset_done(st_tier0_compression_info);
+ }
+#endif
}
// ----------------------------------------------------------------------------
@@ -1881,8 +1973,6 @@ static void dbengine2_statistics_charts(void) {
static RRDDIM *rd_mrg_metrics = NULL;
static RRDDIM *rd_mrg_acquired = NULL;
static RRDDIM *rd_mrg_collected = NULL;
- static RRDDIM *rd_mrg_with_retention = NULL;
- static RRDDIM *rd_mrg_without_retention = NULL;
static RRDDIM *rd_mrg_multiple_writers = NULL;
if (unlikely(!st_mrg_metrics)) {
@@ -1903,8 +1993,6 @@ static void dbengine2_statistics_charts(void) {
rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_mrg_with_retention = rrddim_add(st_mrg_metrics, "with retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_mrg_without_retention = rrddim_add(st_mrg_metrics, "without retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
priority++;
@@ -1912,8 +2000,6 @@ static void dbengine2_statistics_charts(void) {
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries);
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced);
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers);
- rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_with_retention, (collected_number)mrg_stats.entries_with_retention);
- rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_without_retention, (collected_number)mrg_stats.entries - (collected_number)mrg_stats.entries_with_retention);
rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts);
rrdset_done(st_mrg_metrics);
@@ -2681,9 +2767,12 @@ static void dbengine2_statistics_charts(void) {
static void update_strings_charts() {
static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL;
- static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL, *rd_ops_searches = NULL, *rd_ops_duplications = NULL, *rd_ops_releases = NULL;
- static RRDDIM *rd_entries_entries = NULL, *rd_entries_refs = NULL;
+ static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL;
+ static RRDDIM *rd_entries_entries = NULL;
static RRDDIM *rd_mem = NULL;
+#ifdef NETDATA_INTERNAL_CHECKS
+ static RRDDIM *rd_entries_refs = NULL, *rd_ops_releases = NULL, *rd_ops_duplications = NULL, *rd_ops_searches = NULL;
+#endif
size_t inserts, deletes, searches, entries, references, memory, duplications, releases;
@@ -2706,16 +2795,20 @@ static void update_strings_charts() {
rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+#ifdef NETDATA_INTERNAL_CHECKS
rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+#endif
}
rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts);
rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes);
+#ifdef NETDATA_INTERNAL_CHECKS
rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches);
rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications);
rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases);
+#endif
rrdset_done(st_ops);
if (unlikely(!st_entries)) {
@@ -2734,11 +2827,15 @@ static void update_strings_charts() {
, RRDSET_TYPE_AREA);
rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+#ifdef NETDATA_INTERNAL_CHECKS
rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE);
+#endif
}
rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries);
+#ifdef NETDATA_INTERNAL_CHECKS
rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references);
+#endif
rrdset_done(st_entries);
if (unlikely(!st_mem)) {
@@ -2813,6 +2910,7 @@ struct dictionary_stats dictionary_stats_category_rrdhealth = { .name = "health"
struct dictionary_stats dictionary_stats_category_functions = { .name = "functions" };
struct dictionary_stats dictionary_stats_category_replication = { .name = "replication" };
+#ifdef DICT_WITH_STATS
struct dictionary_categories {
struct dictionary_stats *stats;
const char *family;
@@ -3165,6 +3263,13 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) {
}
}
+static void dictionary_statistics(void) {
+ for(int i = 0; dictionary_categories[i].stats ;i++) {
+ update_dictionary_category_charts(&dictionary_categories[i]);
+ }
+}
+#endif // DICT_WITH_STATS
+
#ifdef NETDATA_TRACE_ALLOCATIONS
struct memory_trace_data {
@@ -3304,12 +3409,6 @@ static void malloc_trace_statistics(void) {
}
#endif
-static void dictionary_statistics(void) {
- for(int i = 0; dictionary_categories[i].stats ;i++) {
- update_dictionary_category_charts(&dictionary_categories[i]);
- }
-}
-
// ---------------------------------------------------------------------------------------------------------------------
// worker utilization
@@ -3432,6 +3531,7 @@ static struct worker_utilization all_workers_utilization[] = {
{ .name = "TC", .family = "workers plugin tc", .priority = 1000000 },
{ .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 },
{ .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 },
+ { .name = "LOGSMANAGPLG",.family = "workers plugin logs management", .priority = 1000000 },
{ .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 },
{ .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 },
{ .name = "SERVICE", .family = "workers service", .priority = 1000000 },
@@ -4171,8 +4271,10 @@ void *global_statistics_main(void *ptr)
worker_is_busy(WORKER_JOB_STRINGS);
update_strings_charts();
+#ifdef DICT_WITH_STATS
worker_is_busy(WORKER_JOB_DICTIONARIES);
dictionary_statistics();
+#endif
#ifdef NETDATA_TRACE_ALLOCATIONS
worker_is_busy(WORKER_JOB_MALLOC_TRACE);
diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h
index 7bdb153dd9692b..44717c6cf4efed 100644
--- a/daemon/global_statistics.h
+++ b/daemon/global_statistics.h
@@ -45,6 +45,11 @@ void global_statistics_sqlite3_query_completed(bool success, bool busy, bool loc
void global_statistics_sqlite3_row_completed(void);
void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array);
+void global_statistics_gorilla_buffer_add_hot();
+
+void global_statistics_tier0_disk_compressed_bytes(uint32_t size);
+void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size);
+
void global_statistics_web_request_completed(uint64_t dt,
uint64_t bytes_received,
uint64_t bytes_sent,
diff --git a/daemon/main.c b/daemon/main.c
index 6ddf57aa174bb8..7351bd65c34331 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -4,6 +4,8 @@
#include "buildinfo.h"
#include "static_threads.h"
+#include "database/engine/page_test.h"
+
#if defined(ENV32BIT)
#warning COMPILING 32BIT NETDATA
#endif
@@ -313,7 +315,7 @@ void netdata_cleanup_and_exit(int ret) {
const char *prev_msg = NULL;
bool timeout = false;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
send_statistics("EXIT", ret?"ERROR":"OK","-");
@@ -371,6 +373,10 @@ void netdata_cleanup_and_exit(int ret) {
SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL
, 3 * USEC_PER_SEC);
+ delta_shutdown_time("prepare metasync shutdown");
+
+ metadata_sync_shutdown_prepare();
+
delta_shutdown_time("disable ML detection and training threads");
ml_stop_threads();
@@ -396,10 +402,6 @@ void netdata_cleanup_and_exit(int ret) {
rrdhost_cleanup_all();
- delta_shutdown_time("prepare metasync shutdown");
-
- metadata_sync_shutdown_prepare();
-
delta_shutdown_time("stop aclk threads");
timeout = !service_wait_exit(
@@ -422,6 +424,13 @@ void netdata_cleanup_and_exit(int ret) {
delta_shutdown_time("flush dbengine tiers");
for (size_t tier = 0; tier < storage_tiers; tier++)
rrdeng_prepare_exit(multidb_ctx[tier]);
+
+ for (size_t tier = 0; tier < storage_tiers; tier++) {
+ if (!multidb_ctx[tier])
+ continue;
+ completion_wait_for(&multidb_ctx[tier]->quiesce.completion);
+ completion_destroy(&multidb_ctx[tier]->quiesce.completion);
+ }
}
#endif
@@ -440,16 +449,20 @@ void netdata_cleanup_and_exit(int ret) {
delta_shutdown_time("wait for dbengine collectors to finish");
size_t running = 1;
- while(running) {
+ size_t count = 10;
+ while(running && count) {
running = 0;
for (size_t tier = 0; tier < storage_tiers; tier++)
running += rrdeng_collectors_running(multidb_ctx[tier]);
if(running) {
- error_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
- error_limit(&erl, "waiting for %zu collectors to finish", running);
+ nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS);
+ nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
+ "waiting for %zu collectors to finish", running);
// sleep_usec(100 * USEC_PER_MS);
+ cleanup_destroyed_dictionaries();
}
+ count--;
}
delta_shutdown_time("wait for dbengine main cache to finish flushing");
@@ -613,8 +626,14 @@ int killpid(pid_t pid) {
int ret;
netdata_log_debug(D_EXIT, "Request to kill pid %d", pid);
+ int signal = SIGTERM;
+//#ifdef NETDATA_INTERNAL_CHECKS
+// if(service_running(SERVICE_COLLECTORS))
+// signal = SIGABRT;
+//#endif
+
errno = 0;
- ret = kill(pid, SIGTERM);
+ ret = kill(pid, signal);
if (ret == -1) {
switch(errno) {
case ESRCH:
@@ -661,7 +680,7 @@ static void set_nofile_limit(struct rlimit *rl) {
}
void cancel_main_threads() {
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
int i, found = 0;
usec_t max = 5 * USEC_PER_SEC, step = 100000;
@@ -751,7 +770,7 @@ int help(int exitcode) {
" | '-' '-' '-' '-' real-time performance monitoring, done right! \n"
" +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n"
"\n"
- " Copyright (C) 2016-2022, Netdata, Inc. \n"
+ " Copyright (C) 2016-2023, Netdata, Inc. \n"
" Released under GNU General Public License v3 or later.\n"
" All rights reserved.\n"
"\n"
@@ -761,7 +780,7 @@ int help(int exitcode) {
" Support : https://github.com/netdata/netdata/issues\n"
" License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n"
"\n"
- " Twitter : https://twitter.com/linuxnetdata\n"
+ " Twitter : https://twitter.com/netdatahq\n"
" LinkedIn : https://linkedin.com/company/netdata-cloud/\n"
" Facebook : https://facebook.com/linuxnetdata/\n"
"\n"
@@ -787,8 +806,7 @@ int help(int exitcode) {
" -W stacksize=N Set the stacksize (in bytes).\n\n"
" -W debug_flags=N Set runtime tracing to debug.log.\n\n"
" -W unittest Run internal unittests and exit.\n\n"
- " -W sqlite-check Check metadata database integrity and exit.\n\n"
- " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n"
+ " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n"
" -W sqlite-compact Reclaim metadata database unused space and exit.\n\n"
#ifdef ENABLE_DBENGINE
" -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n"
@@ -841,40 +859,49 @@ static void security_init(){
#endif
static void log_init(void) {
+ nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon"));
+
+ time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD;
+ size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS;
+ period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period);
+ logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs);
+ nd_log_set_flood_protection(logs, period);
+
+ nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", NDLP_INFO_STR));
+
char filename[FILENAME_MAX + 1];
snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir);
- stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename);
+ nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", filename));
- snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir);
- stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename);
+ bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */;
+ if(with_journal)
+ snprintfz(filename, FILENAME_MAX, "journal");
+ else
+ snprintfz(filename, FILENAME_MAX, "%s/daemon.log", netdata_configured_log_dir);
+ nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", filename));
- snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
- stdcollector_filename = config_get(CONFIG_SECTION_LOGS, "collector", filename);
+ if(with_journal)
+ snprintfz(filename, FILENAME_MAX, "journal");
+ else
+ snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
+ nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", filename));
snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir);
- stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename);
+ nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", filename));
- snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
- stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename);
+ if(with_journal)
+ snprintfz(filename, FILENAME_MAX, "journal");
+ else
+ snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir);
+ nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", filename));
#ifdef ENABLE_ACLK
aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO);
if (aclklog_enabled) {
snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir);
- aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename);
+ nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename));
}
#endif
-
- char deffacility[8];
- snprintfz(deffacility,7,"%s","daemon");
- facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility);
-
- error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period);
- error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period);
- error_log_errors_per_period_backup = error_log_errors_per_period;
-
- setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1);
- setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1);
}
char *initialize_lock_directory_path(char *prefix)
@@ -1046,6 +1073,36 @@ static void backwards_compatible_config() {
config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics",
CONFIG_SECTION_DB, "enable zero metrics");
+ config_move(CONFIG_SECTION_LOGS, "error",
+ CONFIG_SECTION_LOGS, "daemon");
+
+ config_move(CONFIG_SECTION_LOGS, "severity level",
+ CONFIG_SECTION_LOGS, "level");
+
+ config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection",
+ CONFIG_SECTION_LOGS, "logs to trigger flood protection");
+
+ config_move(CONFIG_SECTION_LOGS, "errors flood protection period",
+ CONFIG_SECTION_LOGS, "logs flood protection period");
+ config_move(CONFIG_SECTION_HEALTH, "is ephemeral",
+ CONFIG_SECTION_GLOBAL, "is ephemeral node");
+
+ config_move(CONFIG_SECTION_HEALTH, "has unstable connection",
+ CONFIG_SECTION_GLOBAL, "has unstable connection");
+}
+
+static int get_hostname(char *buf, size_t buf_size) {
+ if (netdata_configured_host_prefix && *netdata_configured_host_prefix) {
+ char filename[FILENAME_MAX + 1];
+ snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix);
+
+ if (!read_file(filename, buf, buf_size)) {
+ trim(buf);
+ return 0;
+ }
+ }
+
+ return gethostname(buf, buf_size);
}
static void get_netdata_configured_variables() {
@@ -1054,10 +1111,12 @@ static void get_netdata_configured_variables() {
// ------------------------------------------------------------------------
// get the hostname
+ netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
+ verify_netdata_host_prefix();
+
char buf[HOSTNAME_MAX + 1];
- if(gethostname(buf, HOSTNAME_MAX) == -1){
+ if (get_hostname(buf, HOSTNAME_MAX))
netdata_log_error("Cannot get machine hostname.");
- }
netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf);
netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname);
@@ -1108,8 +1167,6 @@ static void get_netdata_configured_variables() {
netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir);
netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir);
netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir);
- char *env_home=getenv("HOME");
- netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir);
netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir);
@@ -1119,6 +1176,16 @@ static void get_netdata_configured_variables() {
}
#ifdef ENABLE_DBENGINE
+ // ------------------------------------------------------------------------
+ // get default Database Engine page type
+
+ const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw");
+ if (strcmp(page_type, "gorilla") == 0) {
+ tier_page_type[0] = PAGE_GORILLA_METRICS;
+ } else if (strcmp(page_type, "raw") != 0) {
+ netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type);
+ }
+
// ------------------------------------------------------------------------
// get default Database Engine page cache size in MiB
@@ -1157,10 +1224,6 @@ static void get_netdata_configured_variables() {
default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE;
}
#endif
- // ------------------------------------------------------------------------
-
- netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", "");
- verify_netdata_host_prefix();
// --------------------------------------------------------------------
// get KSM settings
@@ -1180,6 +1243,7 @@ static void get_netdata_configured_variables() {
// --------------------------------------------------------------------
rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
+ rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s);
// Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
// cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
// https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
@@ -1258,7 +1322,7 @@ static inline void coverity_remove_taint(char *s)
(void)s;
}
-int get_system_info(struct rrdhost_system_info *system_info, bool log) {
+int get_system_info(struct rrdhost_system_info *system_info) {
char *script;
script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2));
sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh");
@@ -1290,11 +1354,7 @@ int get_system_info(struct rrdhost_system_info *system_info, bool log) {
if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) {
netdata_log_error("Unexpected environment variable %s=%s", line, value);
- }
- else {
- if(log)
- netdata_log_info("%s=%s", line, value);
-
+ } else {
setenv(line, value, 1);
}
}
@@ -1333,6 +1393,8 @@ int julytest(void);
int pluginsd_parser_unittest(void);
void replication_initialize(void);
void bearer_tokens_init(void);
+int unittest_rrdpush_compressions(void);
+int uuid_unittest(void);
int main(int argc, char **argv) {
// initialize the system clocks
@@ -1342,8 +1404,6 @@ int main(int argc, char **argv) {
usec_t started_ut = now_monotonic_usec();
usec_t last_ut = started_ut;
const char *prev_msg = NULL;
- // Initialize stderror avoiding coredump when netdata_log_info() or netdata_log_error() is called
- stderror = stderr;
int i;
int config_loaded = 0;
@@ -1435,14 +1495,14 @@ int main(int argc, char **argv) {
#ifdef ENABLE_DBENGINE
char* createdataset_string = "createdataset=";
char* stresstest_string = "stresstest=";
-#endif
- if(strcmp(optarg, "sqlite-check") == 0) {
- sql_init_database(DB_CHECK_INTEGRITY, 0);
- return 0;
+
+ if(strcmp(optarg, "pgd-tests") == 0) {
+ return pgd_test(argc, argv);
}
+#endif
- if(strcmp(optarg, "sqlite-fix") == 0) {
- sql_init_database(DB_CHECK_FIX_DB, 0);
+ if(strcmp(optarg, "sqlite-meta-recover") == 0) {
+ sql_init_database(DB_CHECK_RECOVER, 0);
return 0;
}
@@ -1495,6 +1555,8 @@ int main(int argc, char **argv) {
return 1;
if (ctx_unittest())
return 1;
+ if (uuid_unittest())
+ return 1;
fprintf(stderr, "\n\nALL TESTS PASSED\n\n");
return 0;
}
@@ -1509,7 +1571,7 @@ int main(int argc, char **argv) {
unittest_running = true;
return aral_unittest(10000);
}
- else if(strcmp(optarg, "stringtest") == 0) {
+ else if(strcmp(optarg, "stringtest") == 0) {
unittest_running = true;
return string_unittest(10000);
}
@@ -1521,6 +1583,10 @@ int main(int argc, char **argv) {
unittest_running = true;
return buffer_unittest();
}
+ else if(strcmp(optarg, "uuidtest") == 0) {
+ unittest_running = true;
+ return uuid_unittest();
+ }
#ifdef ENABLE_DBENGINE
else if(strcmp(optarg, "mctest") == 0) {
unittest_running = true;
@@ -1550,6 +1616,10 @@ int main(int argc, char **argv) {
unittest_running = true;
return pluginsd_parser_unittest();
}
+ else if(strcmp(optarg, "rrdpush_compressions_test") == 0) {
+ unittest_running = true;
+ return unittest_rrdpush_compressions();
+ }
else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
optarg += strlen(createdataset_string);
unsigned history_seconds = strtoul(optarg, NULL, 0);
@@ -1851,7 +1921,7 @@ int main(int argc, char **argv) {
{
char buf[20 + 1];
- snprintfz(buf, 20, "%d", libuv_worker_threads);
+ snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads);
setenv("UV_THREADPOOL_SIZE", buf, 1);
}
@@ -1894,12 +1964,15 @@ int main(int argc, char **argv) {
// get log filenames and settings
log_init();
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
// initialize the log files
- open_all_log_files();
+ nd_log_initialize();
+ netdata_log_info("Netdata agent version \""VERSION"\" is starting");
ieee754_doubles = is_system_ieee754_double();
+ if(!ieee754_doubles)
+ globally_disabled_capabilities |= STREAM_CAP_IEEE754;
aral_judy_init();
@@ -1909,6 +1982,8 @@ int main(int argc, char **argv) {
replication_initialize();
+ rrd_functions_inflight_init();
+
// --------------------------------------------------------------------
// get the certificate and start security
@@ -1938,8 +2013,6 @@ int main(int argc, char **argv) {
signals_block();
signals_init(); // setup the signals we want to use
- dyn_conf_init();
-
// --------------------------------------------------------------------
// check which threads are enabled and initialize them
@@ -2005,6 +2078,18 @@ int main(int argc, char **argv) {
if(become_daemon(dont_fork, user) == -1)
fatal("Cannot daemonize myself.");
+ // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME".
+ struct passwd *pw = getpwuid(getuid());
+ if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) {
+ netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir);
+ } else {
+ netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir);
+ }
+
+ setenv("HOME", netdata_configured_home_dir, 1);
+
+ dyn_conf_init();
+
netdata_log_info("netdata started on pid %d.", getpid());
delta_startup_time("initialize threads after fork");
@@ -2036,7 +2121,7 @@ int main(int argc, char **argv) {
netdata_anonymous_statistics_enabled=-1;
struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
__atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
- get_system_info(system_info, true);
+ get_system_info(system_info);
(void) registry_get_this_machine_guid();
system_info->hops = 0;
get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
@@ -2073,7 +2158,7 @@ int main(int argc, char **argv) {
// ------------------------------------------------------------------------
// enable log flood protection
- error_log_limit_reset();
+ nd_log_limits_reset();
// Load host labels
delta_startup_time("collect host labels");
diff --git a/daemon/service.c b/daemon/service.c
index a25e2a26b8da45..8a65de66c11c25 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -76,43 +76,55 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
rrddim_free(st, rd);
}
-static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+static inline bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) {
+ if(!all_dimensions && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS))
+ return true;
+
worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS);
+ rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
+
RRDDIM *rd;
time_t now = now_realtime_sec();
- bool done_all_dimensions = true;
+ size_t dim_candidates = 0;
+ size_t dim_archives = 0;
dfe_start_write(st->rrddim_root_index, rd) {
- if(unlikely(
- all_dimensions ||
- (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now))
- )) {
-
- if(dictionary_acquired_item_references(rd_dfe.item) == 1) {
- netdata_log_info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st));
- svc_rrddim_obsolete_to_archive(rd);
+ bool candidate = (all_dimensions || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE));
+
+ if(candidate) {
+ dim_candidates++;
+
+ if(rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now) {
+ size_t references = dictionary_acquired_item_references(rd_dfe.item);
+ if(references == 1) {
+// netdata_log_info("Removing obsolete dimension 'host:%s/chart:%s/dim:%s'",
+// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
+ svc_rrddim_obsolete_to_archive(rd);
+ dim_archives++;
+ }
+// else
+// netdata_log_info("Cannot remove obsolete dimension 'host:%s/chart:%s/dim:%s'",
+// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd));
}
- else
- done_all_dimensions = false;
}
- else
- done_all_dimensions = false;
}
dfe_done(rd);
- return done_all_dimensions;
-}
+ if(dim_archives != dim_candidates) {
+ rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
+ return false;
+ }
-static void svc_rrdset_obsolete_to_archive(RRDSET *st) {
- worker_is_busy(WORKER_JOB_ARCHIVE_CHART);
+ return true;
+}
+static void svc_rrdset_obsolete_to_free(RRDSET *st) {
if(!svc_rrdset_archive_obsolete_dimensions(st, true))
return;
- rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED);
- rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE);
+ worker_is_busy(WORKER_JOB_FREE_CHART);
rrdcalc_unlink_all_rrdset_alerts(st);
@@ -130,53 +142,83 @@ static void svc_rrdset_obsolete_to_archive(RRDSET *st) {
worker_is_busy(WORKER_JOB_SAVE_CHART);
rrdset_save(st);
}
-
- worker_is_busy(WORKER_JOB_FREE_CHART);
- rrdset_free(st);
}
+
+ rrdset_free(st);
}
-static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
+static inline void svc_rrdhost_cleanup_charts_marked_obsolete(RRDHOST *host) {
+ if(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS))
+ return;
+
worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS);
+ rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
+
+ size_t full_candidates = 0;
+ size_t full_archives = 0;
+ size_t partial_candidates = 0;
+ size_t partial_archives = 0;
+
time_t now = now_realtime_sec();
RRDSET *st;
rrdset_foreach_reentrant(st, host) {
if(rrdset_is_replicating(st))
continue;
- if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
- && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
- && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
- && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
- )) {
- svc_rrdset_obsolete_to_archive(st);
+ RRDSET_FLAGS flags = rrdset_flag_get(st);
+ bool obsolete_chart = flags & RRDSET_FLAG_OBSOLETE;
+ bool obsolete_dims = flags & RRDSET_FLAG_OBSOLETE_DIMENSIONS;
+
+ if(obsolete_dims) {
+ partial_candidates++;
+
+ if(svc_rrdset_archive_obsolete_dimensions(st, false))
+ partial_archives++;
}
- else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) {
- rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS);
- svc_rrdset_archive_obsolete_dimensions(st, false);
+
+ if(obsolete_chart) {
+ full_candidates++;
+
+ if(unlikely( st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
+ && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
+ && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
+ )) {
+ svc_rrdset_obsolete_to_free(st);
+ full_archives++;
+ }
}
}
rrdset_foreach_done(st);
+
+ if(partial_archives != partial_candidates)
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
+
+ if(full_archives != full_candidates)
+ rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS);
}
-static void svc_rrdset_check_obsoletion(RRDHOST *host) {
+static void svc_rrdhost_detect_obsolete_charts(RRDHOST *host) {
worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK);
time_t now = now_realtime_sec();
time_t last_entry_t;
RRDSET *st;
+
+ time_t child_connect_time = host->child_connect_time;
+
rrdset_foreach_read(st, host) {
if(rrdset_is_replicating(st))
continue;
last_entry_t = rrdset_last_entry_s(st);
- if(last_entry_t && last_entry_t < host->child_connect_time &&
- host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every
- < now)
+ if (last_entry_t && last_entry_t < child_connect_time &&
+ child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT +
+ (ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every) <
+ now)
- rrdset_is_obsolete(st);
+ rrdset_is_obsolete___safe_from_collector_thread(st);
}
rrdset_foreach_done(st);
}
@@ -191,24 +233,24 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() {
if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host))
continue;
- if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) {
- rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS);
- svc_rrdhost_cleanup_obsolete_charts(host);
- }
+ svc_rrdhost_cleanup_charts_marked_obsolete(host);
- if(host != localhost
- && host->trigger_chart_obsoletion_check
- && (
- (
- host->child_last_chart_command
- && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec()
- )
- || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec())
- )
- ) {
- svc_rrdset_check_obsoletion(host);
+ if (host == localhost)
+ continue;
+
+ netdata_mutex_lock(&host->receiver_lock);
+
+ time_t now = now_realtime_sec();
+
+ if (host->trigger_chart_obsoletion_check &&
+ ((host->child_last_chart_command &&
+ host->child_last_chart_command + host->health.health_delay_up_to < now) ||
+ (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now))) {
+ svc_rrdhost_detect_obsolete_charts(host);
host->trigger_chart_obsoletion_check = 0;
}
+
+ netdata_mutex_unlock(&host->receiver_lock);
}
rrd_unlock();
@@ -227,22 +269,45 @@ static void svc_rrdhost_cleanup_orphan_hosts(RRDHOST *protected_host) {
if(!rrdhost_should_be_removed(host, protected_host, now))
continue;
- netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
+ bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED);
+ if (!is_archived) {
+ netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid);
- if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
- /* don't delete multi-host DB host files */
- && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
- ) {
- worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
- rrdhost_delete_charts(host);
+ if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST)
+ /* don't delete multi-host DB host files */
+ && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance))
+ ) {
+ worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS);
+ rrdhost_delete_charts(host);
+ }
+ else {
+ worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS);
+ rrdhost_save_charts(host);
+ }
}
- else {
- worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS);
- rrdhost_save_charts(host);
+
+ bool force = false;
+
+ if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s)
+ force = true;
+
+ if (!force && is_archived)
+ continue;
+
+ if (force) {
+ netdata_log_info("Host '%s' with machine guid '%s' is archived, ephemeral clean up.", rrdhost_hostname(host), host->machine_guid);
}
worker_is_busy(WORKER_JOB_FREE_HOST);
- rrdhost_free___while_having_rrd_wrlock(host, false);
+#ifdef ENABLE_ACLK
+ // in case we have cloud connection we inform cloud
+ // a child disconnected
+ if (netdata_cloud_enabled && force) {
+ aclk_host_state_update(host, 0, 0);
+ unregister_node(host->machine_guid);
+ }
+#endif
+ rrdhost_free___while_having_rrd_wrlock(host, force);
goto restart_after_removal;
}
diff --git a/daemon/signals.c b/daemon/signals.c
index ae28874cc5b37e..4f22543342bed6 100644
--- a/daemon/signals.c
+++ b/daemon/signals.c
@@ -42,7 +42,7 @@ static void signal_handler(int signo) {
if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) {
char buffer[200 + 1];
- snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
+ snprintfz(buffer, sizeof(buffer) - 1, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name);
if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) {
// nothing to do - we cannot write but there is no way to complain about it
;
@@ -203,28 +203,28 @@ void signals_handle(void) {
switch (signals_waiting[i].action) {
case NETDATA_SIGNAL_RELOAD_HEALTH:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Reloading HEALTH configuration...", name);
- error_log_limit_reset();
+ nd_log_limits_reset();
execute_command(CMD_RELOAD_HEALTH, NULL, NULL);
break;
case NETDATA_SIGNAL_SAVE_DATABASE:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Saving databases...", name);
- error_log_limit_reset();
+ nd_log_limits_reset();
execute_command(CMD_SAVE_DATABASE, NULL, NULL);
break;
case NETDATA_SIGNAL_REOPEN_LOGS:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name);
- error_log_limit_reset();
+ nd_log_limits_reset();
execute_command(CMD_REOPEN_LOGS, NULL, NULL);
break;
case NETDATA_SIGNAL_EXIT_CLEANLY:
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name);
commands_exit();
netdata_cleanup_and_exit(0);
diff --git a/daemon/static_threads.c b/daemon/static_threads.c
index 830b854e6acd3a..0ce010d6edd0d0 100644
--- a/daemon/static_threads.c
+++ b/daemon/static_threads.c
@@ -61,7 +61,7 @@ const struct netdata_static_thread static_threads_common[] = {
.config_name = "netdata monitoring",
.env_name = "NETDATA_INTERNALS_MONITORING",
.global_variable = &global_statistics_enabled,
- .enabled = 1,
+ .enabled = 0,
.thread = NULL,
.init_routine = NULL,
.start_routine = global_statistics_main
@@ -69,10 +69,10 @@ const struct netdata_static_thread static_threads_common[] = {
{
.name = "STATS_WORKERS",
.config_section = CONFIG_SECTION_PLUGINS,
- .config_name = "netdata monitoring",
+ .config_name = "netdata monitoring extended",
.env_name = "NETDATA_INTERNALS_MONITORING",
.global_variable = &global_statistics_enabled,
- .enabled = 1,
+ .enabled = 0,
.thread = NULL,
.init_routine = NULL,
.start_routine = global_statistics_workers_main
@@ -80,10 +80,10 @@ const struct netdata_static_thread static_threads_common[] = {
{
.name = "STATS_SQLITE3",
.config_section = CONFIG_SECTION_PLUGINS,
- .config_name = "netdata monitoring",
+ .config_name = "netdata monitoring extended",
.env_name = "NETDATA_INTERNALS_MONITORING",
.global_variable = &global_statistics_enabled,
- .enabled = 1,
+ .enabled = 0,
.thread = NULL,
.init_routine = NULL,
.start_routine = global_statistics_sqlite3_main
diff --git a/daemon/unit_test.c b/daemon/unit_test.c
index b8d229316b12bc..8f44be39bb70b4 100644
--- a/daemon/unit_test.c
+++ b/daemon/unit_test.c
@@ -97,7 +97,7 @@ static int check_number_printing(void) {
int i, failed = 0;
for(i = 0; values[i].correct ; i++) {
print_netdata_double(netdata, values[i].n);
- snprintfz(system, 512, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n);
+ snprintfz(system, sizeof(system) - 1, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n);
int ok = 1;
if(strcmp(netdata, values[i].correct) != 0) {
@@ -319,7 +319,7 @@ void benchmark_storage_number(int loop, int multiplier) {
for(i = 0; i < loop ;i++) {
n *= multiplier;
if(n > storage_number_positive_max) n = storage_number_positive_min;
- snprintfz(buffer, 100, NETDATA_DOUBLE_FORMAT, n);
+ snprintfz(buffer, sizeof(buffer) - 1, NETDATA_DOUBLE_FORMAT, n);
}
}
@@ -507,7 +507,7 @@ int unit_test_buffer() {
const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s";
buffer_sprintf(wb, fmt, string, string, string, string);
- snprintfz(final, 9000, fmt, string, string, string, string);
+ snprintfz(final, sizeof(final) - 1, fmt, string, string, string, string);
const char *s = buffer_tostring(wb);
@@ -1272,7 +1272,7 @@ int run_test(struct test *test)
default_rrd_update_every = test->update_every;
char name[101];
- snprintfz(name, 100, "unittest-%s", test->name);
+ snprintfz(name, sizeof(name) - 1, "unittest-%s", test->name);
// create the chart
RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1
@@ -1534,7 +1534,7 @@ int unit_test(long delay, long shift)
repeat++;
char name[101];
- snprintfz(name, 100, "unittest-%d-%ld-%ld", repeat, delay, shift);
+ snprintfz(name, sizeof(name) - 1, "unittest-%d-%ld-%ld", repeat, delay, shift);
//debug_flags = 0xffffffff;
default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC;
@@ -1681,13 +1681,6 @@ int test_sqlite(void) {
rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL);
if (rc != SQLITE_OK)
goto error;
- buffer_flush(sql);
-
- buffer_sprintf(sql, INDEX_ACLK_ALERT, uuid_str, uuid_str);
- rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL);
- if (rc != SQLITE_OK)
- goto error;
- buffer_flush(sql);
buffer_free(sql);
fprintf(stderr,"SQLite is OK\n");
@@ -1831,30 +1824,29 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name)
/* We don't want to drop metrics when generating load, we prefer to block data generation itself */
return rrdhost_find_or_create(
- name
- , name
- , name
- , os_type
- , netdata_configured_timezone
- , netdata_configured_abbrev_timezone
- , netdata_configured_utc_offset
- , ""
- , program_name
- , program_version
- , default_rrd_update_every
- , default_rrd_history_entries
- , RRD_MEMORY_MODE_DBENGINE
- , default_health_enabled
- , default_rrdpush_enabled
- , default_rrdpush_destination
- , default_rrdpush_api_key
- , default_rrdpush_send_charts_matching
- , default_rrdpush_enable_replication
- , default_rrdpush_seconds_to_replicate
- , default_rrdpush_replication_step
- , NULL
- , 0
- );
+ name,
+ name,
+ name,
+ os_type,
+ netdata_configured_timezone,
+ netdata_configured_abbrev_timezone,
+ netdata_configured_utc_offset,
+ "",
+ program_name,
+ program_version,
+ default_rrd_update_every,
+ default_rrd_history_entries,
+ RRD_MEMORY_MODE_DBENGINE,
+ default_health_enabled,
+ default_rrdpush_enabled,
+ default_rrdpush_destination,
+ default_rrdpush_api_key,
+ default_rrdpush_send_charts_matching,
+ default_rrdpush_enable_replication,
+ default_rrdpush_seconds_to_replicate,
+ default_rrdpush_replication_step,
+ NULL,
+ 0);
}
// constants for test_dbengine
@@ -1878,7 +1870,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI
char name[101];
for (i = 0 ; i < CHARTS ; ++i) {
- snprintfz(name, 100, "dbengine-chart-%d", i);
+ snprintfz(name, sizeof(name) - 1, "dbengine-chart-%d", i);
// create the chart
st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest",
@@ -1886,7 +1878,7 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI
rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG);
rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST);
for (j = 0 ; j < DIMS ; ++j) {
- snprintfz(name, 100, "dim-%d", j);
+ snprintfz(name, sizeof(name) - 1, "dim-%d", j);
rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
@@ -2108,6 +2100,14 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
return errors + value_errors + time_errors;
}
+void test_dbengine_charts_and_dims_are_not_collected(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]) {
+ for(int c = 0; c < CHARTS ; c++) {
+ st[c]->rrdcontexts.collected = false;
+ for(int d = 0; d < DIMS ; d++)
+ rd[c][d]->rrdcontexts.collected = false;
+ }
+}
+
int test_dbengine(void)
{
fprintf(stderr, "%s() running...\n", __FUNCTION__ );
@@ -2117,7 +2117,7 @@ int test_dbengine(void)
RRDDIM *rd[CHARTS][DIMS];
time_t time_start[REGIONS], time_end[REGIONS];
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
fprintf(stderr, "\nRunning DB-engine test\n");
default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE;
@@ -2135,6 +2135,7 @@ int test_dbengine(void)
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
+ test_dbengine_charts_and_dims_are_not_collected(st, rd);
current_region = 1; //this is the second region of data
update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds
@@ -2152,6 +2153,7 @@ int test_dbengine(void)
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
+ test_dbengine_charts_and_dims_are_not_collected(st, rd);
current_region = 2; //this is the third region of data
update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds
@@ -2169,6 +2171,7 @@ int test_dbengine(void)
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
+ test_dbengine_charts_and_dims_are_not_collected(st, rd);
for (current_region = 0 ; current_region < REGIONS ; ++current_region) {
errors += test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]);
@@ -2343,7 +2346,7 @@ void generate_dbengine_dataset(unsigned history_seconds)
(1024 * 1024);
default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'");
host = dbengine_rrdhost_find_or_create("dbengine-dataset");
@@ -2518,7 +2521,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi
unsigned i, j;
time_t time_start, test_duration;
- error_log_limit_unlimited();
+ nd_log_limits_unlimited();
if (!TEST_DURATION_SEC)
TEST_DURATION_SEC = 10;
diff --git a/database/contexts/api_v1.c b/database/contexts/api_v1.c
index bc7fee496de64d..f144e6f7b881f6 100644
--- a/database/contexts/api_v1.c
+++ b/database/contexts/api_v1.c
@@ -213,7 +213,7 @@ static inline int rrdinstance_to_json_callback(const DICTIONARY_ITEM *item, void
buffer_json_array_close(wb);
}
- if(options & RRDCONTEXT_OPTION_SHOW_LABELS && ri->rrdlabels && dictionary_entries(ri->rrdlabels)) {
+ if(options & RRDCONTEXT_OPTION_SHOW_LABELS && ri->rrdlabels && rrdlabels_entries(ri->rrdlabels)) {
buffer_json_member_add_object(wb, "labels");
rrdlabels_to_buffer_json_members(ri->rrdlabels, wb);
buffer_json_object_close(wb);
@@ -366,7 +366,7 @@ int rrdcontext_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before, R
RRDCONTEXT *rc = rrdcontext_acquired_value(rca);
if(after != 0 && before != 0)
- rrdr_relative_window_to_absolute(&after, &before, NULL, false);
+ rrdr_relative_window_to_absolute_query(&after, &before, NULL, false);
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
struct rrdcontext_to_json t_contexts = {
@@ -403,7 +403,7 @@ int rrdcontexts_to_json(RRDHOST *host, BUFFER *wb, time_t after, time_t before,
uuid_unparse(*host->node_id, node_uuid);
if(after != 0 && before != 0)
- rrdr_relative_window_to_absolute(&after, &before, NULL, false);
+ rrdr_relative_window_to_absolute_query(&after, &before, NULL, false);
buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT);
buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(host));
diff --git a/database/contexts/api_v2.c b/database/contexts/api_v2.c
index 08739160d94dad..3ca49a319522b2 100644
--- a/database/contexts/api_v2.c
+++ b/database/contexts/api_v2.c
@@ -184,6 +184,7 @@ struct alert_v2_entry {
RRDCALC *tmp;
STRING *name;
+ STRING *summary;
size_t ati;
@@ -315,6 +316,7 @@ static void alerts_v2_insert_callback(const DICTIONARY_ITEM *item __maybe_unused
struct alert_v2_entry *t = value;
RRDCALC *rc = t->tmp;
t->name = rc->name;
+ t->summary = rc->summary;
t->ati = ctl->alerts.ati++;
t->nodes = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_VALUE_LINK_DONT_CLONE|DICT_OPTION_NAME_LINK_DONT_CLONE);
@@ -355,6 +357,7 @@ static void alert_instances_v2_insert_callback(const DICTIONARY_ITEM *item __may
t->status = rc->status;
t->flags = rc->run_flags;
t->info = rc->info;
+ t->summary = rc->summary;
t->value = rc->value;
t->last_updated = rc->last_updated;
t->last_status_change = rc->last_status_change;
@@ -418,7 +421,7 @@ static FTS_MATCH rrdcontext_to_json_v2_full_text_search(struct rrdcontext_to_jso
dfe_done(rm);
size_t label_searches = 0;
- if(unlikely(ri->rrdlabels && dictionary_entries(ri->rrdlabels) &&
+ if(unlikely(ri->rrdlabels && rrdlabels_entries(ri->rrdlabels) &&
rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, q, ':', &label_searches))) {
ctl->q.fts.searches += label_searches;
ctl->q.fts.char_searches += label_searches;
@@ -504,7 +507,7 @@ static bool rrdcontext_matches_alert(struct rrdcontext_to_json_v2_data *ctl, RRD
if (ctl->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) {
char key[20 + 1];
- snprintfz(key, 20, "%p", rcl);
+ snprintfz(key, sizeof(key) - 1, "%p", rcl);
struct sql_alert_instance_v2_entry z = {
.ati = ati,
@@ -613,10 +616,10 @@ static void rrdhost_receiver_to_json(BUFFER *wb, RRDHOST_STATUS *s, const char *
buffer_json_member_add_object(wb, "source");
{
char buf[1024 + 1];
- snprintfz(buf, 1024, "[%s]:%d%s", s->ingest.peers.local.ip, s->ingest.peers.local.port, s->ingest.ssl ? ":SSL" : "");
+ snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->ingest.peers.local.ip, s->ingest.peers.local.port, s->ingest.ssl ? ":SSL" : "");
buffer_json_member_add_string(wb, "local", buf);
- snprintfz(buf, 1024, "[%s]:%d%s", s->ingest.peers.peer.ip, s->ingest.peers.peer.port, s->ingest.ssl ? ":SSL" : "");
+ snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->ingest.peers.peer.ip, s->ingest.peers.peer.port, s->ingest.ssl ? ":SSL" : "");
buffer_json_member_add_string(wb, "remote", buf);
stream_capabilities_to_json_array(wb, s->ingest.capabilities, "capabilities");
@@ -656,10 +659,10 @@ static void rrdhost_sender_to_json(BUFFER *wb, RRDHOST_STATUS *s, const char *ke
buffer_json_member_add_object(wb, "destination");
{
char buf[1024 + 1];
- snprintfz(buf, 1024, "[%s]:%d%s", s->stream.peers.local.ip, s->stream.peers.local.port, s->stream.ssl ? ":SSL" : "");
+ snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->stream.peers.local.ip, s->stream.peers.local.port, s->stream.ssl ? ":SSL" : "");
buffer_json_member_add_string(wb, "local", buf);
- snprintfz(buf, 1024, "[%s]:%d%s", s->stream.peers.peer.ip, s->stream.peers.peer.port, s->stream.ssl ? ":SSL" : "");
+ snprintfz(buf, sizeof(buf) - 1, "[%s]:%d%s", s->stream.peers.peer.ip, s->stream.peers.peer.port, s->stream.ssl ? ":SSL" : "");
buffer_json_member_add_string(wb, "remote", buf);
stream_capabilities_to_json_array(wb, s->stream.capabilities, "capabilities");
@@ -671,6 +674,7 @@ static void rrdhost_sender_to_json(BUFFER *wb, RRDHOST_STATUS *s, const char *ke
buffer_json_member_add_uint64(wb, "metadata", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_METADATA]);
buffer_json_member_add_uint64(wb, "functions", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_FUNCTIONS]);
buffer_json_member_add_uint64(wb, "replication", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_REPLICATION]);
+ buffer_json_member_add_uint64(wb, "dyncfg", s->stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_DYNCFG]);
}
buffer_json_object_close(wb); // traffic
@@ -682,7 +686,7 @@ static void rrdhost_sender_to_json(BUFFER *wb, RRDHOST_STATUS *s, const char *ke
{
if (d->ssl) {
- snprintfz(buf, 1024, "%s:SSL", string2str(d->destination));
+ snprintfz(buf, sizeof(buf) - 1, "%s:SSL", string2str(d->destination));
buffer_json_member_add_string(wb, "destination", buf);
}
else
@@ -1009,8 +1013,8 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now
STORAGE_ENGINE *eng = localhost->db[tier].eng;
if (!eng) continue;
- size_t max = storage_engine_disk_space_max(eng->backend, localhost->db[tier].instance);
- size_t used = storage_engine_disk_space_used(eng->backend, localhost->db[tier].instance);
+ uint64_t max = storage_engine_disk_space_max(eng->backend, localhost->db[tier].instance);
+ uint64_t used = storage_engine_disk_space_used(eng->backend, localhost->db[tier].instance);
time_t first_time_s = storage_engine_global_first_time_s(eng->backend, localhost->db[tier].instance);
size_t currently_collected_metrics = storage_engine_collected_metrics(eng->backend, localhost->db[tier].instance);
@@ -1280,6 +1284,7 @@ static void contexts_v2_alert_config_to_json_from_sql_alert_config_data(struct s
buffer_json_member_add_string(wb, "component", t->component);
buffer_json_member_add_string(wb, "type", t->type);
buffer_json_member_add_string(wb, "info", t->info);
+ buffer_json_member_add_string(wb, "summary", t->summary);
// buffer_json_member_add_string(wb, "source", t->source); // moved to alert instance
}
@@ -1343,6 +1348,7 @@ static int contexts_v2_alert_instance_to_json_callback(const DICTIONARY_ITEM *it
buffer_json_member_add_string(wb, "units", string2str(t->units));
buffer_json_member_add_string(wb, "fami", string2str(t->family));
buffer_json_member_add_string(wb, "info", string2str(t->info));
+ buffer_json_member_add_string(wb, "sum", string2str(t->summary));
buffer_json_member_add_string(wb, "ctx", string2str(t->context));
buffer_json_member_add_string(wb, "st", rrdcalc_status2string(t->status));
buffer_json_member_add_uuid(wb, "tr_i", &t->last_transition_id);
@@ -1397,6 +1403,7 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_
{
buffer_json_member_add_uint64(wb, "ati", t->ati);
buffer_json_member_add_string(wb, "nm", string2str(t->name));
+ buffer_json_member_add_string(wb, "sum", string2str(t->summary));
buffer_json_member_add_uint64(wb, "cr", t->critical);
buffer_json_member_add_uint64(wb, "wr", t->warning);
@@ -1438,6 +1445,7 @@ struct sql_alert_transition_fixed_size {
char units[SQL_TRANSITION_DATA_SMALL_STRING];
char exec[SQL_TRANSITION_DATA_BIG_STRING];
char info[SQL_TRANSITION_DATA_BIG_STRING];
+ char summary[SQL_TRANSITION_DATA_BIG_STRING];
char classification[SQL_TRANSITION_DATA_SMALL_STRING];
char type[SQL_TRANSITION_DATA_SMALL_STRING];
char component[SQL_TRANSITION_DATA_SMALL_STRING];
@@ -1477,6 +1485,7 @@ static struct sql_alert_transition_fixed_size *contexts_v2_alert_transition_dup(
strncpyz(n->units, t->units ? t->units : "", sizeof(n->units) - 1);
strncpyz(n->exec, t->exec ? t->exec : "", sizeof(n->exec) - 1);
strncpyz(n->info, t->info ? t->info : "", sizeof(n->info) - 1);
+ strncpyz(n->summary, t->summary ? t->summary : "", sizeof(n->summary) - 1);
strncpyz(n->classification, t->classification ? t->classification : "", sizeof(n->classification) - 1);
strncpyz(n->type, t->type ? t->type : "", sizeof(n->type) - 1);
strncpyz(n->component, t->component ? t->component : "", sizeof(n->component) - 1);
@@ -1734,6 +1743,7 @@ static void contexts_v2_alert_transitions_to_json(BUFFER *wb, struct rrdcontext_
buffer_json_member_add_time_t(wb, "when", t->when_key);
buffer_json_member_add_string(wb, "info", *t->info ? t->info : "");
+ buffer_json_member_add_string(wb, "summary", *t->summary ? t->summary : "");
buffer_json_member_add_string(wb, "units", *t->units ? t->units : NULL);
buffer_json_member_add_object(wb, "new");
{
@@ -1934,7 +1944,9 @@ int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTE
}
if(req->after || req->before) {
- ctl.window.relative = rrdr_relative_window_to_absolute(&ctl.window.after, &ctl.window.before, &ctl.now, false);
+ ctl.window.relative = rrdr_relative_window_to_absolute_query(&ctl.window.after, &ctl.window.before, &ctl.now
+ , false
+ );
ctl.window.enabled = !(mode & CONTEXTS_V2_ALERT_TRANSITIONS);
}
else
@@ -2023,7 +2035,7 @@ int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTE
}
else {
buffer_strcat(wb, "query interrupted");
- resp = HTTP_RESP_BACKEND_FETCH_FAILED;
+ resp = HTTP_RESP_CLIENT_CLOSED_REQUEST;
}
goto cleanup;
}
diff --git a/database/contexts/instance.c b/database/contexts/instance.c
index 7e572fb80a6510..39837dbf677d67 100644
--- a/database/contexts/instance.c
+++ b/database/contexts/instance.c
@@ -35,7 +35,7 @@ inline STRING *rrdinstance_acquired_units_dup(RRDINSTANCE_ACQUIRED *ria) {
return string_dup(ri->units);
}
-inline DICTIONARY *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria) {
+inline RRDLABELS *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria) {
RRDINSTANCE *ri = rrdinstance_acquired_value(ria);
return ri->rrdlabels;
}
@@ -68,7 +68,7 @@ inline time_t rrdinstance_acquired_update_every(RRDINSTANCE_ACQUIRED *ria) {
static void rrdinstance_free(RRDINSTANCE *ri) {
if(rrd_flag_check(ri, RRD_FLAG_OWN_LABELS))
- dictionary_destroy(ri->rrdlabels);
+ rrdlabels_destroy(ri->rrdlabels);
rrdmetrics_destroy_from_rrdinstance(ri);
string_freez(ri->id);
@@ -211,7 +211,7 @@ static bool rrdinstance_conflict_callback(const DICTIONARY_ITEM *item __maybe_un
ri->rrdset = ri_new->rrdset;
if(ri->rrdset && rrd_flag_check(ri, RRD_FLAG_OWN_LABELS)) {
- DICTIONARY *old = ri->rrdlabels;
+ RRDLABELS *old = ri->rrdlabels;
ri->rrdlabels = ri->rrdset->rrdlabels;
rrd_flag_clear(ri, RRD_FLAG_OWN_LABELS);
rrdlabels_destroy(old);
@@ -329,11 +329,11 @@ inline void rrdinstance_from_rrdset(RRDSET *st) {
RRDINSTANCE_ACQUIRED *ria = (RRDINSTANCE_ACQUIRED *)dictionary_set_and_acquire_item(rc->rrdinstances, string2str(tri.id), &tri, sizeof(tri));
- RRDCONTEXT_ACQUIRED *rca_old = st->rrdcontext;
- RRDINSTANCE_ACQUIRED *ria_old = st->rrdinstance;
+ RRDCONTEXT_ACQUIRED *rca_old = st->rrdcontexts.rrdcontext;
+ RRDINSTANCE_ACQUIRED *ria_old = st->rrdcontexts.rrdinstance;
- st->rrdcontext = rca;
- st->rrdinstance = ria;
+ st->rrdcontexts.rrdcontext = rca;
+ st->rrdcontexts.rrdinstance = ria;
if(rca == rca_old) {
rrdcontext_release(rca_old);
@@ -354,16 +354,16 @@ inline void rrdinstance_from_rrdset(RRDSET *st) {
// migrate all dimensions to the new metrics
RRDDIM *rd;
rrddim_foreach_read(rd, st) {
- if (!rd->rrdmetric) continue;
+ if (!rd->rrdcontexts.rrdmetric) continue;
- RRDMETRIC *rm_old = rrdmetric_acquired_value(rd->rrdmetric);
+ RRDMETRIC *rm_old = rrdmetric_acquired_value(rd->rrdcontexts.rrdmetric);
rrd_flags_replace(rm_old, RRD_FLAG_DELETED|RRD_FLAG_UPDATED|RRD_FLAG_LIVE_RETENTION|RRD_FLAG_UPDATE_REASON_UNUSED|RRD_FLAG_UPDATE_REASON_ZERO_RETENTION);
rm_old->rrddim = NULL;
rm_old->first_time_s = 0;
rm_old->last_time_s = 0;
- rrdmetric_release(rd->rrdmetric);
- rd->rrdmetric = NULL;
+ rrdmetric_release(rd->rrdcontexts.rrdmetric);
+ rd->rrdcontexts.rrdmetric = NULL;
rrdmetric_from_rrddim(rd);
}
@@ -406,12 +406,12 @@ inline void rrdinstance_from_rrdset(RRDSET *st) {
#define rrdset_get_rrdinstance(st) rrdset_get_rrdinstance_with_trace(st, __FUNCTION__);
static inline RRDINSTANCE *rrdset_get_rrdinstance_with_trace(RRDSET *st, const char *function) {
- if(unlikely(!st->rrdinstance)) {
+ if(unlikely(!st->rrdcontexts.rrdinstance)) {
netdata_log_error("RRDINSTANCE: RRDSET '%s' is not linked to an RRDINSTANCE at %s()", rrdset_id(st), function);
return NULL;
}
- RRDINSTANCE *ri = rrdinstance_acquired_value(st->rrdinstance);
+ RRDINSTANCE *ri = rrdinstance_acquired_value(st->rrdcontexts.rrdinstance);
if(unlikely(!ri)) {
netdata_log_error("RRDINSTANCE: RRDSET '%s' lost its link to an RRDINSTANCE at %s()", rrdset_id(st), function);
return NULL;
@@ -439,14 +439,17 @@ inline void rrdinstance_rrdset_is_freed(RRDSET *st) {
rrdinstance_trigger_updates(ri, __FUNCTION__ );
- rrdinstance_release(st->rrdinstance);
- st->rrdinstance = NULL;
+ rrdinstance_release(st->rrdcontexts.rrdinstance);
+ st->rrdcontexts.rrdinstance = NULL;
- rrdcontext_release(st->rrdcontext);
- st->rrdcontext = NULL;
+ rrdcontext_release(st->rrdcontexts.rrdcontext);
+ st->rrdcontexts.rrdcontext = NULL;
+ st->rrdcontexts.collected = false;
}
inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) {
+ st->rrdcontexts.collected = false;
+
RRDINSTANCE *ri = rrdset_get_rrdinstance(st);
if(unlikely(!ri)) return;
@@ -455,8 +458,10 @@ inline void rrdinstance_rrdset_has_updated_retention(RRDSET *st) {
}
inline void rrdinstance_updated_rrdset_name(RRDSET *st) {
+ st->rrdcontexts.collected = false;
+
// the chart may not be initialized when this is called
- if(unlikely(!st->rrdinstance)) return;
+ if(unlikely(!st->rrdcontexts.rrdinstance)) return;
RRDINSTANCE *ri = rrdset_get_rrdinstance(st);
if(unlikely(!ri)) return;
@@ -491,10 +496,12 @@ inline void rrdinstance_updated_rrdset_flags_no_action(RRDINSTANCE *ri, RRDSET *
}
inline void rrdinstance_updated_rrdset_flags(RRDSET *st) {
+ st->rrdcontexts.collected = false;
+
RRDINSTANCE *ri = rrdset_get_rrdinstance(st);
if(unlikely(!ri)) return;
- if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_ARCHIVED|RRDSET_FLAG_OBSOLETE)))
+ if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))
rrd_flag_set_archived(ri);
rrdinstance_updated_rrdset_flags_no_action(ri, st);
@@ -503,6 +510,11 @@ inline void rrdinstance_updated_rrdset_flags(RRDSET *st) {
}
inline void rrdinstance_collected_rrdset(RRDSET *st) {
+ if(st->rrdcontexts.collected)
+ return;
+
+ st->rrdcontexts.collected = true;
+
RRDINSTANCE *ri = rrdset_get_rrdinstance(st);
if(unlikely(!ri)) {
rrdcontext_updated_rrdset(st);
diff --git a/database/contexts/internal.h b/database/contexts/internal.h
index 04ad0883a36212..293659fdd0a00d 100644
--- a/database/contexts/internal.h
+++ b/database/contexts/internal.h
@@ -230,7 +230,7 @@ typedef struct rrdinstance {
time_t update_every_s; // data collection frequency
RRDSET *rrdset; // pointer to RRDSET when collected, or NULL
- DICTIONARY *rrdlabels; // linked to RRDSET->chart_labels or own version
+ RRDLABELS *rrdlabels; // linked to RRDSET->chart_labels or own version
struct rrdcontext *rc;
DICTIONARY *rrdmetrics;
diff --git a/database/contexts/metric.c b/database/contexts/metric.c
index 55efde4e9f6d33..0f0785972dfe1a 100644
--- a/database/contexts/metric.c
+++ b/database/contexts/metric.c
@@ -239,10 +239,10 @@ void rrdmetric_from_rrddim(RRDDIM *rd) {
if(unlikely(!rd->rrdset->rrdhost))
fatal("RRDMETRIC: rrdset '%s' does not have a rrdhost", rrdset_id(rd->rrdset));
- if(unlikely(!rd->rrdset->rrdinstance))
+ if(unlikely(!rd->rrdset->rrdcontexts.rrdinstance))
fatal("RRDMETRIC: rrdset '%s' does not have a rrdinstance", rrdset_id(rd->rrdset));
- RRDINSTANCE *ri = rrdinstance_acquired_value(rd->rrdset->rrdinstance);
+ RRDINSTANCE *ri = rrdinstance_acquired_value(rd->rrdset->rrdcontexts.rrdinstance);
RRDMETRIC trm = {
.id = string_dup(rd->id),
@@ -254,20 +254,21 @@ void rrdmetric_from_rrddim(RRDDIM *rd) {
RRDMETRIC_ACQUIRED *rma = (RRDMETRIC_ACQUIRED *)dictionary_set_and_acquire_item(ri->rrdmetrics, string2str(trm.id), &trm, sizeof(trm));
- if(rd->rrdmetric)
- rrdmetric_release(rd->rrdmetric);
+ if(rd->rrdcontexts.rrdmetric)
+ rrdmetric_release(rd->rrdcontexts.rrdmetric);
- rd->rrdmetric = rma;
+ rd->rrdcontexts.rrdmetric = rma;
+ rd->rrdcontexts.collected = false;
}
#define rrddim_get_rrdmetric(rd) rrddim_get_rrdmetric_with_trace(rd, __FUNCTION__)
static inline RRDMETRIC *rrddim_get_rrdmetric_with_trace(RRDDIM *rd, const char *function) {
- if(unlikely(!rd->rrdmetric)) {
+ if(unlikely(!rd->rrdcontexts.rrdmetric)) {
netdata_log_error("RRDMETRIC: RRDDIM '%s' is not linked to an RRDMETRIC at %s()", rrddim_id(rd), function);
return NULL;
}
- RRDMETRIC *rm = rrdmetric_acquired_value(rd->rrdmetric);
+ RRDMETRIC *rm = rrdmetric_acquired_value(rd->rrdcontexts.rrdmetric);
if(unlikely(!rm)) {
netdata_log_error("RRDMETRIC: RRDDIM '%s' lost the link to its RRDMETRIC at %s()", rrddim_id(rd), function);
return NULL;
@@ -288,11 +289,14 @@ inline void rrdmetric_rrddim_is_freed(RRDDIM *rd) {
rm->rrddim = NULL;
rrdmetric_trigger_updates(rm, __FUNCTION__ );
- rrdmetric_release(rd->rrdmetric);
- rd->rrdmetric = NULL;
+ rrdmetric_release(rd->rrdcontexts.rrdmetric);
+ rd->rrdcontexts.rrdmetric = NULL;
+ rd->rrdcontexts.collected = false;
}
inline void rrdmetric_updated_rrddim_flags(RRDDIM *rd) {
+ rd->rrdcontexts.collected = false;
+
RRDMETRIC *rm = rrddim_get_rrdmetric(rd);
if(unlikely(!rm)) return;
@@ -305,6 +309,11 @@ inline void rrdmetric_updated_rrddim_flags(RRDDIM *rd) {
}
inline void rrdmetric_collected_rrddim(RRDDIM *rd) {
+ if(rd->rrdcontexts.collected)
+ return;
+
+ rd->rrdcontexts.collected = true;
+
RRDMETRIC *rm = rrddim_get_rrdmetric(rd);
if(unlikely(!rm)) return;
@@ -316,4 +325,3 @@ inline void rrdmetric_collected_rrddim(RRDDIM *rd) {
rrdmetric_trigger_updates(rm, __FUNCTION__ );
}
-
diff --git a/database/contexts/query_target.c b/database/contexts/query_target.c
index 829640b90902d5..95abc3e654ae2e 100644
--- a/database/contexts/query_target.c
+++ b/database/contexts/query_target.c
@@ -835,8 +835,8 @@ static ssize_t query_context_add(void *data, RRDCONTEXT_ACQUIRED *rca, bool quer
if(query_instance_add(qtl, qn, qc, qt->request.ria, queryable_context, false))
added++;
}
- else if(unlikely(qtl->st && qtl->st->rrdcontext == rca && qtl->st->rrdinstance)) {
- if(query_instance_add(qtl, qn, qc, qtl->st->rrdinstance, queryable_context, false))
+ else if(unlikely(qtl->st && qtl->st->rrdcontexts.rrdcontext == rca && qtl->st->rrdcontexts.rrdinstance)) {
+ if(query_instance_add(qtl, qn, qc, qtl->st->rrdcontexts.rrdinstance, queryable_context, false))
added++;
}
else {
@@ -894,11 +894,11 @@ static ssize_t query_node_add(void *data, RRDHOST *host, bool queryable_host) {
qn->node_id[0] = '\0';
// is the chart given valid?
- if(unlikely(qtl->st && (!qtl->st->rrdinstance || !qtl->st->rrdcontext))) {
+ if(unlikely(qtl->st && (!qtl->st->rrdcontexts.rrdinstance || !qtl->st->rrdcontexts.rrdcontext))) {
netdata_log_error("QUERY TARGET: RRDSET '%s' given, but it is not linked to rrdcontext structures. Linking it now.", rrdset_name(qtl->st));
rrdinstance_from_rrdset(qtl->st);
- if(unlikely(qtl->st && (!qtl->st->rrdinstance || !qtl->st->rrdcontext))) {
+ if(unlikely(qtl->st && (!qtl->st->rrdcontexts.rrdinstance || !qtl->st->rrdcontexts.rrdcontext))) {
netdata_log_error("QUERY TARGET: RRDSET '%s' given, but failed to be linked to rrdcontext structures. Switching to context query.",
rrdset_name(qtl->st));
@@ -918,7 +918,7 @@ static ssize_t query_node_add(void *data, RRDHOST *host, bool queryable_host) {
}
else if(unlikely(qtl->st)) {
// single chart data queries
- if(query_context_add(qtl, qtl->st->rrdcontext, true))
+ if(query_context_add(qtl, qtl->st->rrdcontexts.rrdcontext, true))
added++;
}
else {
@@ -1052,8 +1052,9 @@ QUERY_TARGET *query_target_create(QUERY_TARGET_REQUEST *qtr) {
if(query_target_has_percentage_of_group(qt))
qt->window.options &= ~RRDR_OPTION_PERCENTAGE;
- qt->internal.relative = rrdr_relative_window_to_absolute(&qt->window.after, &qt->window.before, &qt->window.now,
- unittest_running);
+ qt->internal.relative = rrdr_relative_window_to_absolute_query(&qt->window.after, &qt->window.before
+ , &qt->window.now, unittest_running
+ );
// prepare our local variables - we need these across all these functions
QUERY_TARGET_LOCALS qtl = {
diff --git a/database/contexts/rrdcontext.c b/database/contexts/rrdcontext.c
index 8538d17f28919b..9dee39be2a437d 100644
--- a/database/contexts/rrdcontext.c
+++ b/database/contexts/rrdcontext.c
@@ -224,26 +224,31 @@ void rrdcontext_hub_checkpoint_command(void *ptr) {
struct ctxs_checkpoint *cmd = ptr;
if(!rrdhost_check_our_claim_id(cmd->claim_id)) {
- netdata_log_error("RRDCONTEXT: received checkpoint command for claim_id '%s', node id '%s', but this is not our claim id. Ours '%s', received '%s'. Ignoring command.",
- cmd->claim_id, cmd->node_id,
- localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET",
- cmd->claim_id);
+ nd_log(NDLS_DAEMON, NDLP_WARNING,
+ "RRDCONTEXT: received checkpoint command for claim_id '%s', node id '%s', "
+ "but this is not our claim id. Ours '%s', received '%s'. Ignoring command.",
+ cmd->claim_id, cmd->node_id,
+ localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET",
+ cmd->claim_id);
return;
}
RRDHOST *host = rrdhost_find_by_node_id(cmd->node_id);
if(!host) {
- netdata_log_error("RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', but there is no node with such node id here. Ignoring command.",
- cmd->claim_id,
- cmd->node_id);
+ nd_log(NDLS_DAEMON, NDLP_WARNING,
+ "RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', "
+ "but there is no node with such node id here. Ignoring command.",
+ cmd->claim_id, cmd->node_id);
return;
}
if(rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) {
- netdata_log_info("RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', while node '%s' has an active context streaming.",
- cmd->claim_id, cmd->node_id, rrdhost_hostname(host));
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "RRDCONTEXT: received checkpoint command for claim id '%s', node id '%s', "
+ "while node '%s' has an active context streaming.",
+ cmd->claim_id, cmd->node_id, rrdhost_hostname(host));
// disable it temporarily, so that our worker will not attempt to send messages in parallel
rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS);
@@ -252,8 +257,10 @@ void rrdcontext_hub_checkpoint_command(void *ptr) {
uint64_t our_version_hash = rrdcontext_version_hash(host);
if(cmd->version_hash != our_version_hash) {
- netdata_log_error("RRDCONTEXT: received version hash %"PRIu64" for host '%s', does not match our version hash %"PRIu64". Sending snapshot of all contexts.",
- cmd->version_hash, rrdhost_hostname(host), our_version_hash);
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "RRDCONTEXT: received version hash %"PRIu64" for host '%s', does not match our version hash %"PRIu64". "
+ "Sending snapshot of all contexts.",
+ cmd->version_hash, rrdhost_hostname(host), our_version_hash);
#ifdef ENABLE_ACLK
// prepare the snapshot
@@ -275,41 +282,55 @@ void rrdcontext_hub_checkpoint_command(void *ptr) {
#endif
}
- internal_error(true, "RRDCONTEXT: host '%s' enabling streaming of contexts", rrdhost_hostname(host));
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "RRDCONTEXT: host '%s' enabling streaming of contexts",
+ rrdhost_hostname(host));
+
rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS);
char node_str[UUID_STR_LEN];
uuid_unparse_lower(*host->node_id, node_str);
- netdata_log_access("ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED", node_str, rrdhost_hostname(host));
+ nd_log(NDLS_ACCESS, NDLP_DEBUG,
+ "ACLK REQ [%s (%s)]: STREAM CONTEXTS ENABLED",
+ node_str, rrdhost_hostname(host));
}
void rrdcontext_hub_stop_streaming_command(void *ptr) {
struct stop_streaming_ctxs *cmd = ptr;
if(!rrdhost_check_our_claim_id(cmd->claim_id)) {
- netdata_log_error("RRDCONTEXT: received stop streaming command for claim_id '%s', node id '%s', but this is not our claim id. Ours '%s', received '%s'. Ignoring command.",
- cmd->claim_id, cmd->node_id,
- localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET",
- cmd->claim_id);
+ nd_log(NDLS_DAEMON, NDLP_WARNING,
+ "RRDCONTEXT: received stop streaming command for claim_id '%s', node id '%s', "
+ "but this is not our claim id. Ours '%s', received '%s'. Ignoring command.",
+ cmd->claim_id, cmd->node_id,
+ localhost->aclk_state.claimed_id?localhost->aclk_state.claimed_id:"NOT SET",
+ cmd->claim_id);
return;
}
RRDHOST *host = rrdhost_find_by_node_id(cmd->node_id);
if(!host) {
- netdata_log_error("RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', but there is no node with such node id here. Ignoring command.",
- cmd->claim_id, cmd->node_id);
+ nd_log(NDLS_DAEMON, NDLP_WARNING,
+ "RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', "
+ "but there is no node with such node id here. Ignoring command.",
+ cmd->claim_id, cmd->node_id);
return;
}
if(!rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS)) {
- netdata_log_error("RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', but node '%s' does not have active context streaming. Ignoring command.",
- cmd->claim_id, cmd->node_id, rrdhost_hostname(host));
+ nd_log(NDLS_DAEMON, NDLP_NOTICE,
+ "RRDCONTEXT: received stop streaming command for claim id '%s', node id '%s', "
+ "but node '%s' does not have active context streaming. Ignoring command.",
+ cmd->claim_id, cmd->node_id, rrdhost_hostname(host));
return;
}
- internal_error(true, "RRDCONTEXT: host '%s' disabling streaming of contexts", rrdhost_hostname(host));
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "RRDCONTEXT: host '%s' disabling streaming of contexts",
+ rrdhost_hostname(host));
+
rrdhost_flag_clear(host, RRDHOST_FLAG_ACLK_STREAM_CONTEXTS);
}
diff --git a/database/contexts/rrdcontext.h b/database/contexts/rrdcontext.h
index 0bcdb68ded4859..9c497a5a5ee7c6 100644
--- a/database/contexts/rrdcontext.h
+++ b/database/contexts/rrdcontext.h
@@ -40,7 +40,7 @@ const char *rrdinstance_acquired_name(RRDINSTANCE_ACQUIRED *ria);
bool rrdinstance_acquired_has_name(RRDINSTANCE_ACQUIRED *ria);
const char *rrdinstance_acquired_units(RRDINSTANCE_ACQUIRED *ria);
STRING *rrdinstance_acquired_units_dup(RRDINSTANCE_ACQUIRED *ria);
-DICTIONARY *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria);
+RRDLABELS *rrdinstance_acquired_labels(RRDINSTANCE_ACQUIRED *ria);
DICTIONARY *rrdinstance_acquired_functions(RRDINSTANCE_ACQUIRED *ria);
RRDHOST *rrdinstance_acquired_rrdhost(RRDINSTANCE_ACQUIRED *ria);
RRDSET *rrdinstance_acquired_rrdset(RRDINSTANCE_ACQUIRED *ria);
@@ -432,6 +432,7 @@ struct sql_alert_transition_data {
const char *units;
const char *exec;
const char *info;
+ const char *summary;
const char *classification;
const char *type;
const char *component;
@@ -472,6 +473,7 @@ struct sql_alert_config_data {
const char *classification;
const char *component;
const char *type;
+ const char *summary;
struct {
struct {
@@ -531,6 +533,7 @@ struct sql_alert_instance_v2_entry {
RRDCALC_STATUS status;
RRDCALC_FLAGS flags;
STRING *info;
+ STRING *summary;
NETDATA_DOUBLE value;
time_t last_updated;
time_t last_status_change;
diff --git a/database/engine/cache.c b/database/engine/cache.c
index 7a9ccf8d1bb32d..eb1c35298d560c 100644
--- a/database/engine/cache.c
+++ b/database/engine/cache.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
#include "cache.h"
/* STATES AND TRANSITIONS
@@ -1170,9 +1171,10 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic
if(all_of_them && !filter) {
pgc_ll_lock(cache, &cache->clean);
if(cache->clean.stats->entries) {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "DBENGINE CACHE: cannot free all clean pages, %zu are still in the clean queue",
- cache->clean.stats->entries);
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE,
+ "DBENGINE CACHE: cannot free all clean pages, %zu are still in the clean queue",
+ cache->clean.stats->entries);
}
pgc_ll_unlock(cache, &cache->clean);
}
@@ -1801,7 +1803,7 @@ PGC *pgc_create(const char *name,
cache->aral = callocz(cache->config.partitions, sizeof(ARAL *));
for(size_t part = 0; part < cache->config.partitions ; part++) {
char buf[100 +1];
- snprintfz(buf, 100, "%s[%zu]", name, part);
+ snprintfz(buf, sizeof(buf) - 1, "%s[%zu]", name, part);
cache->aral[part] = aral_create(
buf,
sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page,
@@ -1860,7 +1862,7 @@ void pgc_destroy(PGC *cache) {
freez(cache->aral);
#endif
-
+ freez(cache->index);
freez(cache);
}
}
@@ -2517,7 +2519,7 @@ void unittest_stress_test(void) {
for(size_t i = 0; i < pgc_uts.collect_threads ;i++) {
collect_thread_ids[i] = i;
char buffer[100 + 1];
- snprintfz(buffer, 100, "COLLECT_%zu", i);
+ snprintfz(buffer, sizeof(buffer) - 1, "COLLECT_%zu", i);
netdata_thread_create(&collect_threads[i], buffer,
NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
unittest_stress_test_collector, &collect_thread_ids[i]);
@@ -2529,7 +2531,7 @@ void unittest_stress_test(void) {
for(size_t i = 0; i < pgc_uts.query_threads ;i++) {
query_thread_ids[i] = i;
char buffer[100 + 1];
- snprintfz(buffer, 100, "QUERY_%zu", i);
+ snprintfz(buffer, sizeof(buffer) - 1, "QUERY_%zu", i);
initstate_r(1, pgc_uts.rand_statebufs, 1024, &pgc_uts.random_data[i]);
netdata_thread_create(&queries_threads[i], buffer,
NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
diff --git a/database/engine/cache.h b/database/engine/cache.h
index 1486fdc1667136..7cd7c0636f6fe0 100644
--- a/database/engine/cache.h
+++ b/database/engine/cache.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef DBENGINE_CACHE_H
#define DBENGINE_CACHE_H
@@ -71,8 +72,8 @@ struct pgc_statistics {
PGC_CACHE_LINE_PADDING(3);
- size_t entries; // all the entries (includes clean, dirty, host)
- size_t size; // all the entries (includes clean, dirty, host)
+ size_t entries; // all the entries (includes clean, dirty, hot)
+ size_t size; // all the entries (includes clean, dirty, hot)
size_t evicting_entries;
size_t evicting_size;
diff --git a/database/engine/datafile.c b/database/engine/datafile.c
index d5c1285be17822..7322039cd3688a 100644
--- a/database/engine/datafile.c
+++ b/database/engine/datafile.c
@@ -112,7 +112,7 @@ bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) {
"but it has %u lockers (oc:%u, pd:%u), "
"%zu clean and %zu hot open cache pages "
"- will be deleted shortly "
- "(scanned open cache in %llu usecs)",
+ "(scanned open cache in %"PRIu64" usecs)",
df->fileno, df->ctx->config.tier,
df->users.lockers,
df->users.lockers_by_reason[DATAFILE_ACQUIRE_OPEN_CACHE],
@@ -129,7 +129,7 @@ bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) {
"but it has %u lockers (oc:%u, pd:%u), "
"%zu clean and %zu hot open cache pages "
"- will be deleted now "
- "(scanned open cache in %llu usecs)",
+ "(scanned open cache in %"PRIu64" usecs)",
df->fileno, df->ctx->config.tier,
df->users.lockers,
df->users.lockers_by_reason[DATAFILE_ACQUIRE_OPEN_CACHE],
@@ -143,7 +143,7 @@ bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) {
internal_error(true, "DBENGINE: datafile %u of tier %d "
"has %u lockers (oc:%u, pd:%u), "
"%zu clean and %zu hot open cache pages "
- "(scanned open cache in %llu usecs)",
+ "(scanned open cache in %"PRIu64" usecs)",
df->fileno, df->ctx->config.tier,
df->users.lockers,
df->users.lockers_by_reason[DATAFILE_ACQUIRE_OPEN_CACHE],
@@ -160,7 +160,7 @@ bool datafile_acquire_for_deletion(struct rrdengine_datafile *df) {
void generate_datafilepath(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
{
- (void) snprintfz(str, maxlen, "%s/" DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION,
+ (void) snprintfz(str, maxlen - 1, "%s/" DATAFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL DATAFILE_EXTENSION,
datafile->ctx->config.dbfiles_path, datafile->tier, datafile->fileno);
}
@@ -338,7 +338,8 @@ static int load_data_file(struct rrdengine_datafile *datafile)
ctx_fs_error(ctx);
return fd;
}
- netdata_log_info("DBENGINE: initializing data file \"%s\".", path);
+
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: initializing data file \"%s\".", path);
ret = check_file_properties(file, &file_size, sizeof(struct rrdeng_df_sb));
if (ret)
@@ -354,7 +355,8 @@ static int load_data_file(struct rrdengine_datafile *datafile)
datafile->file = file;
datafile->pos = file_size;
- netdata_log_info("DBENGINE: data file \"%s\" initialized (size:%"PRIu64").", path, file_size);
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: data file \"%s\" initialized (size:%" PRIu64 ").", path, file_size);
+
return 0;
error:
@@ -422,6 +424,7 @@ static int scan_data_files(struct rrdengine_instance *ctx)
ctx->atomic.last_fileno = datafiles[matched_files - 1]->fileno;
+ netdata_log_info("DBENGINE: loading %d data/journal of tier %d...", matched_files, ctx->config.tier);
for (failed_to_load = 0, i = 0 ; i < matched_files ; ++i) {
uint8_t must_delete_pair = 0;
@@ -479,14 +482,18 @@ int create_new_datafile_pair(struct rrdengine_instance *ctx, bool having_lock)
int ret;
char path[RRDENG_PATH_MAX];
- netdata_log_info("DBENGINE: creating new data and journal files in path %s", ctx->config.dbfiles_path);
+ nd_log(NDLS_DAEMON, NDLP_DEBUG,
+ "DBENGINE: creating new data and journal files in path %s",
+ ctx->config.dbfiles_path);
+
datafile = datafile_alloc_and_init(ctx, 1, fileno);
ret = create_data_file(datafile);
if(ret)
goto error_after_datafile;
generate_datafilepath(datafile, path, sizeof(path));
- netdata_log_info("DBENGINE: created data file \"%s\".", path);
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "DBENGINE: created data file \"%s\".", path);
journalfile = journalfile_alloc_and_init(datafile);
ret = journalfile_create(journalfile, datafile);
@@ -494,7 +501,8 @@ int create_new_datafile_pair(struct rrdengine_instance *ctx, bool having_lock)
goto error_after_journalfile;
journalfile_v1_generate_path(datafile, path, sizeof(path));
- netdata_log_info("DBENGINE: created journal file \"%s\".", path);
+ nd_log(NDLS_DAEMON, NDLP_INFO,
+ "DBENGINE: created journal file \"%s\".", path);
ctx_current_disk_space_increase(ctx, datafile->pos + journalfile->unsafe.pos);
datafile_list_insert(ctx, datafile, having_lock);
diff --git a/database/engine/journalfile.c b/database/engine/journalfile.c
index abb9d2eb951ab7..9005b81ca2116d 100644
--- a/database/engine/journalfile.c
+++ b/database/engine/journalfile.c
@@ -67,7 +67,7 @@ void journalfile_v2_generate_path(struct rrdengine_datafile *datafile, char *str
void journalfile_v1_generate_path(struct rrdengine_datafile *datafile, char *str, size_t maxlen)
{
- (void) snprintfz(str, maxlen, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION,
+ (void) snprintfz(str, maxlen - 1, "%s/" WALFILE_PREFIX RRDENG_FILE_NUMBER_PRINT_TMPL WALFILE_EXTENSION,
datafile->ctx->config.dbfiles_path, datafile->tier, datafile->fileno);
}
@@ -169,7 +169,7 @@ static void njfv2idx_add(struct rrdengine_datafile *datafile) {
*PValue = datafile;
break;
}
- } while(0);
+ } while(1);
rw_spinlock_write_unlock(&datafile->ctx->njfv2idx.spinlock);
}
@@ -1013,7 +1013,7 @@ void journalfile_v2_populate_retention_to_mrg(struct rrdengine_instance *ctx, st
journalfile_v2_data_release(journalfile);
usec_t ended_ut = now_monotonic_usec();
- netdata_log_info("DBENGINE: journal v2 of tier %d, datafile %u populated, size: %0.2f MiB, metrics: %0.2f k, %0.2f ms"
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: journal v2 of tier %d, datafile %u populated, size: %0.2f MiB, metrics: %0.2f k, %0.2f ms"
, ctx->config.tier, journalfile->datafile->fileno
, (double)data_size / 1024 / 1024
, (double)entries / 1000
@@ -1073,7 +1073,8 @@ int journalfile_v2_load(struct rrdengine_instance *ctx, struct rrdengine_journal
return 1;
}
- netdata_log_info("DBENGINE: checking integrity of '%s'", path_v2);
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: checking integrity of '%s'", path_v2);
+
usec_t validation_start_ut = now_monotonic_usec();
int rc = journalfile_v2_validate(data_start, journal_v2_file_size, journal_v1_file_size);
if (unlikely(rc)) {
@@ -1104,7 +1105,7 @@ int journalfile_v2_load(struct rrdengine_instance *ctx, struct rrdengine_journal
usec_t finished_ut = now_monotonic_usec();
- netdata_log_info("DBENGINE: journal v2 '%s' loaded, size: %0.2f MiB, metrics: %0.2f k, "
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: journal v2 '%s' loaded, size: %0.2f MiB, metrics: %0.2f k, "
"mmap: %0.2f ms, validate: %0.2f ms"
, path_v2
, (double)journal_v2_file_size / 1024 / 1024
@@ -1535,13 +1536,13 @@ int journalfile_load(struct rrdengine_instance *ctx, struct rrdengine_journalfil
}
ctx_io_read_op_bytes(ctx, sizeof(struct rrdeng_jf_sb));
- netdata_log_info("DBENGINE: loading journal file '%s'", path);
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: loading journal file '%s'", path);
max_id = journalfile_iterate_transactions(ctx, journalfile);
__atomic_store_n(&ctx->atomic.transaction_id, MAX(__atomic_load_n(&ctx->atomic.transaction_id, __ATOMIC_RELAXED), max_id + 1), __ATOMIC_RELAXED);
- netdata_log_info("DBENGINE: journal file '%s' loaded (size:%"PRIu64").", path, file_size);
+ nd_log_daemon(NDLP_DEBUG, "DBENGINE: journal file '%s' loaded (size:%" PRIu64 ").", path, file_size);
bool is_last_file = (ctx_last_fileno_get(ctx) == journalfile->datafile->fileno);
if (is_last_file && journalfile->datafile->pos <= rrdeng_target_data_file_size(ctx) / 3) {
diff --git a/database/engine/metric.c b/database/engine/metric.c
index 0b248c09b55999..735ae7ace13e78 100644
--- a/database/engine/metric.c
+++ b/database/engine/metric.c
@@ -1,30 +1,44 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
#include "metric.h"
typedef int32_t REFCOUNT;
#define REFCOUNT_DELETING (-100)
-typedef enum __attribute__ ((__packed__)) {
- METRIC_FLAG_HAS_RETENTION = (1 << 0),
-} METRIC_FLAGS;
-
struct metric {
uuid_t uuid; // never changes
Word_t section; // never changes
- time_t first_time_s; //
- time_t latest_time_s_clean; // archived pages latest time
- time_t latest_time_s_hot; // latest time of the currently collected page
- uint32_t latest_update_every_s; //
+ time_t first_time_s; // the timestamp of the oldest point in the database
+ time_t latest_time_s_clean; // the timestamp of the newest point in the database
+ time_t latest_time_s_hot; // the timestamp of the latest point that has been collected (not yet stored)
+ uint32_t latest_update_every_s; // the latest data collection frequency
pid_t writer;
uint8_t partition;
- METRIC_FLAGS flags;
REFCOUNT refcount;
- SPINLOCK spinlock; // protects all variable members
// THIS IS allocated with malloc()
// YOU HAVE TO INITIALIZE IT YOURSELF !
};
+#define set_metric_field_with_condition(field, value, condition) ({ \
+ typeof(field) _current = __atomic_load_n(&(field), __ATOMIC_RELAXED); \
+ typeof(field) _wanted = value; \
+ bool did_it = true; \
+ \
+ do { \
+ if((condition) && (_current != _wanted)) { \
+ ; \
+ } \
+ else { \
+ did_it = false; \
+ break; \
+ } \
+ } while(!__atomic_compare_exchange_n(&(field), &_current, _wanted, \
+ false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); \
+ \
+ did_it; \
+})
+
static struct aral_statistics mrg_aral_statistics;
struct mrg {
@@ -73,9 +87,6 @@ static inline void MRG_STATS_DELETE_MISS(MRG *mrg, size_t partition) {
#define mrg_index_write_lock(mrg, partition) rw_spinlock_write_lock(&(mrg)->index[partition].rw_spinlock)
#define mrg_index_write_unlock(mrg, partition) rw_spinlock_write_unlock(&(mrg)->index[partition].rw_spinlock)
-#define metric_lock(metric) spinlock_lock(&(metric)->spinlock)
-#define metric_unlock(metric) spinlock_unlock(&(metric)->spinlock)
-
static inline void mrg_stats_size_judyl_change(MRG *mrg, size_t mem_before_judyl, size_t mem_after_judyl, size_t partition) {
if(mem_after_judyl > mem_before_judyl)
__atomic_add_fetch(&mrg->index[partition].stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED);
@@ -97,40 +108,34 @@ static inline size_t uuid_partition(MRG *mrg __maybe_unused, uuid_t *uuid) {
return *n % mrg->partitions;
}
-static inline bool metric_has_retention_unsafe(MRG *mrg __maybe_unused, METRIC *metric) {
- size_t partition = metric->partition;
+static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused, METRIC *metric) {
+ time_t first_time_s = __atomic_load_n(&metric->first_time_s, __ATOMIC_RELAXED);
- bool has_retention = (metric->first_time_s > 0 || metric->latest_time_s_clean > 0 || metric->latest_time_s_hot > 0);
+ if(first_time_s <= 0) {
+ first_time_s = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED);
+ if(first_time_s <= 0)
+ first_time_s = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED);
- if(has_retention && !(metric->flags & METRIC_FLAG_HAS_RETENTION)) {
- metric->flags |= METRIC_FLAG_HAS_RETENTION;
- __atomic_add_fetch(&mrg->index[partition].stats.entries_with_retention, 1, __ATOMIC_RELAXED);
- }
- else if(!has_retention && (metric->flags & METRIC_FLAG_HAS_RETENTION)) {
- metric->flags &= ~METRIC_FLAG_HAS_RETENTION;
- __atomic_sub_fetch(&mrg->index[partition].stats.entries_with_retention, 1, __ATOMIC_RELAXED);
+ if(first_time_s <= 0)
+ first_time_s = 0;
+ else
+ __atomic_store_n(&metric->first_time_s, first_time_s, __ATOMIC_RELAXED);
}
- return has_retention;
+ return first_time_s;
}
-static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric, bool having_spinlock) {
+static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) {
size_t partition = metric->partition;
+ REFCOUNT expected = metric->refcount;
REFCOUNT refcount;
- if(!having_spinlock)
- metric_lock(metric);
-
- if(unlikely(metric->refcount < 0))
- fatal("METRIC: refcount is %d (negative) during acquire", metric->refcount);
-
- refcount = ++metric->refcount;
-
- // update its retention flags
- metric_has_retention_unsafe(mrg, metric);
+ do {
+ if(expected < 0)
+ fatal("METRIC: refcount is %d (negative) during acquire", metric->refcount);
- if(!having_spinlock)
- metric_unlock(metric);
+ refcount = expected + 1;
+ } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, refcount, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
if(refcount == 1)
__atomic_add_fetch(&mrg->index[partition].stats.entries_referenced, 1, __ATOMIC_RELAXED);
@@ -141,28 +146,25 @@ static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric, b
}
static inline bool metric_release_and_can_be_deleted(MRG *mrg __maybe_unused, METRIC *metric) {
- bool ret = true;
size_t partition = metric->partition;
+ REFCOUNT expected = metric->refcount;
REFCOUNT refcount;
- metric_lock(metric);
-
- if(unlikely(metric->refcount <= 0))
- fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
-
- refcount = --metric->refcount;
-
- if(likely(metric_has_retention_unsafe(mrg, metric) || refcount != 0))
- ret = false;
+ do {
+ if(expected <= 0)
+ fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount);
- metric_unlock(metric);
+ refcount = expected - 1;
+ } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, refcount, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
if(unlikely(!refcount))
__atomic_sub_fetch(&mrg->index[partition].stats.entries_referenced, 1, __ATOMIC_RELAXED);
__atomic_sub_fetch(&mrg->index[partition].stats.current_references, 1, __ATOMIC_RELAXED);
- return ret;
+ time_t first, last, ue;
+ mrg_metric_get_retention(mrg, metric, &first, &last, &ue);
+ return (!first || !last || first > last);
}
static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *ret) {
@@ -192,7 +194,7 @@ static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *r
if(unlikely(*PValue != NULL)) {
METRIC *metric = *PValue;
- metric_acquire(mrg, metric, false);
+ metric_acquire(mrg, metric);
MRG_STATS_DUPLICATE_ADD(mrg, partition);
@@ -215,10 +217,8 @@ static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *r
metric->latest_update_every_s = entry->latest_update_every_s;
metric->writer = 0;
metric->refcount = 0;
- metric->flags = 0;
metric->partition = partition;
- spinlock_init(&metric->spinlock);
- metric_acquire(mrg, metric, true); // no spinlock use required here
+ metric_acquire(mrg, metric);
*PValue = metric;
MRG_STATS_ADDED_METRIC(mrg, partition);
@@ -252,7 +252,7 @@ static inline METRIC *metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t sect
METRIC *metric = *PValue;
- metric_acquire(mrg, metric, false);
+ metric_acquire(mrg, metric);
mrg_index_read_unlock(mrg, partition);
@@ -363,7 +363,7 @@ inline bool mrg_metric_release_and_delete(MRG *mrg, METRIC *metric) {
}
inline METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric) {
- metric_acquire(mrg, metric, false);
+ metric_acquire(mrg, metric);
return metric;
}
@@ -389,10 +389,7 @@ inline bool mrg_metric_set_first_time_s(MRG *mrg __maybe_unused, METRIC *metric,
if(unlikely(first_time_s < 0))
return false;
- metric_lock(metric);
- metric->first_time_s = first_time_s;
- metric_has_retention_unsafe(mrg, metric);
- metric_unlock(metric);
+ __atomic_store_n(&metric->first_time_s, first_time_s, __ATOMIC_RELAXED);
return true;
}
@@ -405,112 +402,56 @@ inline void mrg_metric_expand_retention(MRG *mrg __maybe_unused, METRIC *metric,
internal_fatal(last_time_s > max_acceptable_collected_time(),
"DBENGINE METRIC: metric last time is in the future");
- if(unlikely(first_time_s < 0))
- first_time_s = 0;
-
- if(unlikely(last_time_s < 0))
- last_time_s = 0;
-
- if(unlikely(update_every_s < 0))
- update_every_s = 0;
-
- if(unlikely(!first_time_s && !last_time_s && !update_every_s))
- return;
+ if(first_time_s > 0)
+ set_metric_field_with_condition(metric->first_time_s, first_time_s, _current <= 0 || _wanted < _current);
- metric_lock(metric);
-
- if(unlikely(first_time_s && (!metric->first_time_s || first_time_s < metric->first_time_s)))
- metric->first_time_s = first_time_s;
-
- if(likely(last_time_s && (!metric->latest_time_s_clean || last_time_s > metric->latest_time_s_clean))) {
- metric->latest_time_s_clean = last_time_s;
-
- if(likely(update_every_s))
- metric->latest_update_every_s = (uint32_t) update_every_s;
+ if(last_time_s > 0) {
+ if(set_metric_field_with_condition(metric->latest_time_s_clean, last_time_s, _current <= 0 || _wanted > _current) &&
+ update_every_s > 0)
+ // set the latest update every too
+ set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, true);
}
- else if(unlikely(!metric->latest_update_every_s && update_every_s))
- metric->latest_update_every_s = (uint32_t) update_every_s;
-
- metric_has_retention_unsafe(mrg, metric);
- metric_unlock(metric);
+ else if(update_every_s > 0)
+ // set it only if it is invalid
+ set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, _current <= 0);
}
inline bool mrg_metric_set_first_time_s_if_bigger(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s) {
internal_fatal(first_time_s < 0, "DBENGINE METRIC: timestamp is negative");
-
- bool ret = false;
-
- metric_lock(metric);
- if(first_time_s > metric->first_time_s) {
- metric->first_time_s = first_time_s;
- ret = true;
- }
- metric_has_retention_unsafe(mrg, metric);
- metric_unlock(metric);
-
- return ret;
+ return set_metric_field_with_condition(metric->first_time_s, first_time_s, _wanted > _current);
}
inline time_t mrg_metric_get_first_time_s(MRG *mrg __maybe_unused, METRIC *metric) {
- time_t first_time_s;
-
- metric_lock(metric);
-
- if(unlikely(!metric->first_time_s)) {
- if(metric->latest_time_s_clean)
- metric->first_time_s = metric->latest_time_s_clean;
-
- else if(metric->latest_time_s_hot)
- metric->first_time_s = metric->latest_time_s_hot;
- }
-
- first_time_s = metric->first_time_s;
-
- metric_unlock(metric);
-
- return first_time_s;
+ return mrg_metric_get_first_time_s_smart(mrg, metric);
}
inline void mrg_metric_get_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s) {
- metric_lock(metric);
-
- if(unlikely(!metric->first_time_s)) {
- if(metric->latest_time_s_clean)
- metric->first_time_s = metric->latest_time_s_clean;
-
- else if(metric->latest_time_s_hot)
- metric->first_time_s = metric->latest_time_s_hot;
- }
-
- *first_time_s = metric->first_time_s;
- *last_time_s = MAX(metric->latest_time_s_clean, metric->latest_time_s_hot);
- *update_every_s = metric->latest_update_every_s;
+ time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED);
+ time_t hot = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED);
- metric_unlock(metric);
+ *last_time_s = MAX(clean, hot);
+ *first_time_s = mrg_metric_get_first_time_s_smart(mrg, metric);
+ *update_every_s = __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED);
}
inline bool mrg_metric_set_clean_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) {
internal_fatal(latest_time_s < 0, "DBENGINE METRIC: timestamp is negative");
- if(unlikely(latest_time_s < 0))
- return false;
-
- metric_lock(metric);
-
// internal_fatal(latest_time_s > max_acceptable_collected_time(),
// "DBENGINE METRIC: metric latest time is in the future");
// internal_fatal(metric->latest_time_s_clean > latest_time_s,
// "DBENGINE METRIC: metric new clean latest time is older than the previous one");
- metric->latest_time_s_clean = latest_time_s;
+ if(latest_time_s > 0) {
+ if(set_metric_field_with_condition(metric->latest_time_s_clean, latest_time_s, true)) {
+ set_metric_field_with_condition(metric->first_time_s, latest_time_s, _current <= 0 || _wanted < _current);
- if(unlikely(!metric->first_time_s))
- metric->first_time_s = latest_time_s;
+ return true;
+ }
+ }
- metric_has_retention_unsafe(mrg, metric);
- metric_unlock(metric);
- return true;
+ return false;
}
// returns true when metric still has retention
@@ -518,7 +459,6 @@ inline bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metr
Word_t section = mrg_metric_section(mrg, metric);
bool do_again = false;
size_t countdown = 5;
- bool ret = true;
do {
time_t min_first_time_s = LONG_MAX;
@@ -547,22 +487,20 @@ inline bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metr
if (min_first_time_s == LONG_MAX)
min_first_time_s = 0;
- metric_lock(metric);
- if (--countdown && !min_first_time_s && metric->latest_time_s_hot)
+ if (--countdown && !min_first_time_s && __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED))
do_again = true;
else {
internal_error(!countdown, "METRIC: giving up on updating the retention of metric without disk retention");
do_again = false;
- metric->first_time_s = min_first_time_s;
- metric->latest_time_s_clean = max_end_time_s;
-
- ret = metric_has_retention_unsafe(mrg, metric);
+ set_metric_field_with_condition(metric->first_time_s, min_first_time_s, true);
+ set_metric_field_with_condition(metric->latest_time_s_clean, max_end_time_s, true);
}
- metric_unlock(metric);
} while(do_again);
- return ret;
+ time_t first, last, ue;
+ mrg_metric_get_retention(mrg, metric, &first, &last, &ue);
+ return (first && last && first < last);
}
inline bool mrg_metric_set_hot_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) {
@@ -571,88 +509,80 @@ inline bool mrg_metric_set_hot_latest_time_s(MRG *mrg __maybe_unused, METRIC *me
// internal_fatal(latest_time_s > max_acceptable_collected_time(),
// "DBENGINE METRIC: metric latest time is in the future");
- if(unlikely(latest_time_s < 0))
- return false;
-
- metric_lock(metric);
- metric->latest_time_s_hot = latest_time_s;
-
- if(unlikely(!metric->first_time_s))
- metric->first_time_s = latest_time_s;
+ if(likely(latest_time_s > 0)) {
+ __atomic_store_n(&metric->latest_time_s_hot, latest_time_s, __ATOMIC_RELAXED);
+ return true;
+ }
- metric_has_retention_unsafe(mrg, metric);
- metric_unlock(metric);
- return true;
+ return false;
}
inline time_t mrg_metric_get_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric) {
- time_t max;
- metric_lock(metric);
- max = MAX(metric->latest_time_s_clean, metric->latest_time_s_hot);
- metric_unlock(metric);
- return max;
+ time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED);
+ time_t hot = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED);
+
+ return MAX(clean, hot);
}
inline bool mrg_metric_set_update_every(MRG *mrg __maybe_unused, METRIC *metric, time_t update_every_s) {
internal_fatal(update_every_s < 0, "DBENGINE METRIC: timestamp is negative");
- if(update_every_s <= 0)
- return false;
-
- metric_lock(metric);
- metric->latest_update_every_s = (uint32_t) update_every_s;
- metric_unlock(metric);
+ if(update_every_s > 0)
+ return set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, true);
- return true;
+ return false;
}
inline bool mrg_metric_set_update_every_s_if_zero(MRG *mrg __maybe_unused, METRIC *metric, time_t update_every_s) {
internal_fatal(update_every_s < 0, "DBENGINE METRIC: timestamp is negative");
- if(update_every_s <= 0)
- return false;
-
- metric_lock(metric);
- if(!metric->latest_update_every_s)
- metric->latest_update_every_s = (uint32_t) update_every_s;
- metric_unlock(metric);
+ if(update_every_s > 0)
+ return set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, _current <= 0);
- return true;
+ return false;
}
inline time_t mrg_metric_get_update_every_s(MRG *mrg __maybe_unused, METRIC *metric) {
- time_t update_every_s;
-
- metric_lock(metric);
- update_every_s = metric->latest_update_every_s;
- metric_unlock(metric);
-
- return update_every_s;
+ return __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED);
}
inline bool mrg_metric_set_writer(MRG *mrg, METRIC *metric) {
- bool done = false;
- metric_lock(metric);
- if(!metric->writer) {
- metric->writer = gettid();
+ pid_t expected = __atomic_load_n(&metric->writer, __ATOMIC_RELAXED);
+ pid_t wanted = gettid();
+ bool done = true;
+
+ do {
+ if(expected != 0) {
+ done = false;
+ break;
+ }
+ } while(!__atomic_compare_exchange_n(&metric->writer, &expected, wanted, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
+
+ if(done)
__atomic_add_fetch(&mrg->index[metric->partition].stats.writers, 1, __ATOMIC_RELAXED);
- done = true;
- }
else
__atomic_add_fetch(&mrg->index[metric->partition].stats.writers_conflicts, 1, __ATOMIC_RELAXED);
- metric_unlock(metric);
+
return done;
}
inline bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric) {
- bool done = false;
- metric_lock(metric);
- if(metric->writer) {
- metric->writer = 0;
+ // this function can be called from a different thread than the one than the writer
+
+ pid_t expected = __atomic_load_n(&metric->writer, __ATOMIC_RELAXED);
+ pid_t wanted = 0;
+ bool done = true;
+
+ do {
+ if(!expected) {
+ done = false;
+ break;
+ }
+ } while(!__atomic_compare_exchange_n(&metric->writer, &expected, wanted, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED));
+
+ if(done)
__atomic_sub_fetch(&mrg->index[metric->partition].stats.writers, 1, __ATOMIC_RELAXED);
- done = true;
- }
- metric_unlock(metric);
+
return done;
}
@@ -662,27 +592,30 @@ inline void mrg_update_metric_retention_and_granularity_by_uuid(
time_t update_every_s, time_t now_s)
{
if(unlikely(last_time_s > now_s)) {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "DBENGINE JV2: wrong last time on-disk (%ld - %ld, now %ld), "
- "fixing last time to now",
- first_time_s, last_time_s, now_s);
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
+ "DBENGINE JV2: wrong last time on-disk (%ld - %ld, now %ld), "
+ "fixing last time to now",
+ first_time_s, last_time_s, now_s);
last_time_s = now_s;
}
if (unlikely(first_time_s > last_time_s)) {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "DBENGINE JV2: wrong first time on-disk (%ld - %ld, now %ld), "
- "fixing first time to last time",
- first_time_s, last_time_s, now_s);
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
+ "DBENGINE JV2: wrong first time on-disk (%ld - %ld, now %ld), "
+ "fixing first time to last time",
+ first_time_s, last_time_s, now_s);
first_time_s = last_time_s;
}
if (unlikely(first_time_s == 0 || last_time_s == 0)) {
- error_limit_static_global_var(erl, 1, 0);
- error_limit(&erl, "DBENGINE JV2: zero on-disk timestamps (%ld - %ld, now %ld), "
- "using them as-is",
- first_time_s, last_time_s, now_s);
+ nd_log_limit_static_global_var(erl, 1, 0);
+ nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING,
+ "DBENGINE JV2: zero on-disk timestamps (%ld - %ld, now %ld), "
+ "using them as-is",
+ first_time_s, last_time_s, now_s);
}
bool added = false;
@@ -710,7 +643,6 @@ inline void mrg_get_statistics(MRG *mrg, struct mrg_statistics *s) {
for(size_t i = 0; i < mrg->partitions ;i++) {
s->entries += __atomic_load_n(&mrg->index[i].stats.entries, __ATOMIC_RELAXED);
s->entries_referenced += __atomic_load_n(&mrg->index[i].stats.entries_referenced, __ATOMIC_RELAXED);
- s->entries_with_retention += __atomic_load_n(&mrg->index[i].stats.entries_with_retention, __ATOMIC_RELAXED);
s->size += __atomic_load_n(&mrg->index[i].stats.size, __ATOMIC_RELAXED);
s->current_references += __atomic_load_n(&mrg->index[i].stats.current_references, __ATOMIC_RELAXED);
s->additions += __atomic_load_n(&mrg->index[i].stats.additions, __ATOMIC_RELAXED);
@@ -900,7 +832,7 @@ int mrg_unittest(void) {
pthread_t th[threads];
for(size_t i = 0; i < threads ; i++) {
char buf[15 + 1];
- snprintfz(buf, 15, "TH[%zu]", i);
+ snprintfz(buf, sizeof(buf) - 1, "TH[%zu]", i);
netdata_thread_create(&th[i], buf,
NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG,
mrg_stress, &t);
@@ -923,7 +855,7 @@ int mrg_unittest(void) {
netdata_log_info("DBENGINE METRIC: did %zu additions, %zu duplicate additions, "
"%zu deletions, %zu wrong deletions, "
"%zu successful searches, %zu wrong searches, "
- "in %llu usecs",
+ "in %"PRIu64" usecs",
stats.additions, stats.additions_duplicate,
stats.deletions, stats.delete_misses,
stats.search_hits, stats.search_misses,
diff --git a/database/engine/metric.h b/database/engine/metric.h
index 5d5ebd7b139f6a..dbb9493019cfb1 100644
--- a/database/engine/metric.h
+++ b/database/engine/metric.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef DBENGINE_METRIC_H
#define DBENGINE_METRIC_H
@@ -35,9 +36,6 @@ struct mrg_statistics {
size_t entries_referenced;
- MRG_CACHE_LINE_PADDING(1);
- size_t entries_with_retention;
-
MRG_CACHE_LINE_PADDING(2);
size_t current_references;
diff --git a/database/engine/page.c b/database/engine/page.c
new file mode 100644
index 00000000000000..b7a3934835e974
--- /dev/null
+++ b/database/engine/page.c
@@ -0,0 +1,679 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "page.h"
+
+#include "libnetdata/libnetdata.h"
+
+typedef enum __attribute__((packed)) {
+ PAGE_OPTION_ALL_VALUES_EMPTY = (1 << 0),
+} PAGE_OPTIONS;
+
+typedef enum __attribute__((packed)) {
+ PGD_STATE_CREATED_FROM_COLLECTOR = (1 << 0),
+ PGD_STATE_CREATED_FROM_DISK = (1 << 1),
+ PGD_STATE_SCHEDULED_FOR_FLUSHING = (1 << 2),
+ PGD_STATE_FLUSHED_TO_DISK = (1 << 3),
+} PGD_STATES;
+
+typedef struct {
+ uint8_t *data;
+ uint32_t size;
+} page_raw_t;
+
+
+typedef struct {
+ size_t num_buffers;
+ gorilla_writer_t *writer;
+ int aral_index;
+} page_gorilla_t;
+
+struct pgd {
+ // the page type
+ uint8_t type;
+
+ // options related to the page
+ PAGE_OPTIONS options;
+
+ PGD_STATES states;
+
+ // the uses number of slots in the page
+ uint32_t used;
+
+ // the total number of slots available in the page
+ uint32_t slots;
+
+ union {
+ page_raw_t raw;
+ page_gorilla_t gorilla;
+ };
+};
+
+// ----------------------------------------------------------------------------
+// memory management
+
+struct {
+ ARAL *aral_pgd;
+ ARAL *aral_data[RRD_STORAGE_TIERS];
+ ARAL *aral_gorilla_buffer[4];
+ ARAL *aral_gorilla_writer[4];
+} pgd_alloc_globals = {};
+
+static ARAL *pgd_aral_data_lookup(size_t size)
+{
+ for (size_t tier = 0; tier < storage_tiers; tier++)
+ if (size == tier_page_size[tier])
+ return pgd_alloc_globals.aral_data[tier];
+
+ return NULL;
+}
+
+void pgd_init_arals(void)
+{
+ // pgd aral
+ {
+ char buf[20 + 1];
+ snprintfz(buf, sizeof(buf) - 1, "pgd");
+
+ // FIXME: add stats
+ pgd_alloc_globals.aral_pgd = aral_create(
+ buf,
+ sizeof(struct pgd),
+ 64,
+ 512 * (sizeof(struct pgd)),
+ pgc_aral_statistics(),
+ NULL, NULL, false, false);
+ }
+
+ // tier page aral
+ {
+ for (size_t i = storage_tiers; i > 0 ;i--)
+ {
+ size_t tier = storage_tiers - i;
+
+ char buf[20 + 1];
+ snprintfz(buf, sizeof(buf) - 1, "tier%zu-pages", tier);
+
+ pgd_alloc_globals.aral_data[tier] = aral_create(
+ buf,
+ tier_page_size[tier],
+ 64,
+ 512 * (tier_page_size[tier]),
+ pgc_aral_statistics(),
+ NULL, NULL, false, false);
+ }
+ }
+
+ // gorilla buffers aral
+ for (size_t i = 0; i != 4; i++) {
+ char buf[20 + 1];
+ snprintfz(buf, sizeof(buf) - 1, "gbuffer-%zu", i);
+
+ // FIXME: add stats
+ pgd_alloc_globals.aral_gorilla_buffer[i] = aral_create(
+ buf,
+ GORILLA_BUFFER_SIZE,
+ 64,
+ 512 * GORILLA_BUFFER_SIZE,
+ pgc_aral_statistics(),
+ NULL, NULL, false, false);
+ }
+
+ // gorilla writers aral
+ for (size_t i = 0; i != 4; i++) {
+ char buf[20 + 1];
+ snprintfz(buf, sizeof(buf) - 1, "gwriter-%zu", i);
+
+ // FIXME: add stats
+ pgd_alloc_globals.aral_gorilla_writer[i] = aral_create(
+ buf,
+ sizeof(gorilla_writer_t),
+ 64,
+ 512 * sizeof(gorilla_writer_t),
+ pgc_aral_statistics(),
+ NULL, NULL, false, false);
+ }
+}
+
+static void *pgd_data_aral_alloc(size_t size)
+{
+ ARAL *ar = pgd_aral_data_lookup(size);
+ if (!ar)
+ return mallocz(size);
+ else
+ return aral_mallocz(ar);
+}
+
+static void pgd_data_aral_free(void *page, size_t size)
+{
+ ARAL *ar = pgd_aral_data_lookup(size);
+ if (!ar)
+ freez(page);
+ else
+ aral_freez(ar, page);
+}
+
+// ----------------------------------------------------------------------------
+// management api
+
+PGD *pgd_create(uint8_t type, uint32_t slots)
+{
+ PGD *pg = aral_mallocz(pgd_alloc_globals.aral_pgd);
+ pg->type = type;
+ pg->used = 0;
+ pg->slots = slots;
+ pg->options = PAGE_OPTION_ALL_VALUES_EMPTY;
+ pg->states = PGD_STATE_CREATED_FROM_COLLECTOR;
+
+ switch (type) {
+ case PAGE_METRICS:
+ case PAGE_TIER: {
+ uint32_t size = slots * page_type_size[type];
+
+ internal_fatal(!size || slots == 1,
+ "DBENGINE: invalid number of slots (%u) or page type (%u)", slots, type);
+
+ pg->raw.size = size;
+ pg->raw.data = pgd_data_aral_alloc(size);
+ break;
+ }
+ case PAGE_GORILLA_METRICS: {
+ internal_fatal(slots == 1,
+ "DBENGINE: invalid number of slots (%u) or page type (%u)", slots, type);
+
+ pg->slots = 8 * GORILLA_BUFFER_SLOTS;
+
+ // allocate new gorilla writer
+ pg->gorilla.aral_index = gettid() % 4;
+ pg->gorilla.writer = aral_mallocz(pgd_alloc_globals.aral_gorilla_writer[pg->gorilla.aral_index]);
+
+ // allocate new gorilla buffer
+ gorilla_buffer_t *gbuf = aral_mallocz(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index]);
+ memset(gbuf, 0, GORILLA_BUFFER_SIZE);
+ global_statistics_gorilla_buffer_add_hot();
+
+ *pg->gorilla.writer = gorilla_writer_init(gbuf, GORILLA_BUFFER_SLOTS);
+ pg->gorilla.num_buffers = 1;
+
+ break;
+ }
+ default:
+ fatal("Unknown page type: %uc", type);
+ }
+
+ return pg;
+}
+
+PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size)
+{
+ if (!size)
+ return PGD_EMPTY;
+
+ if (size < page_type_size[type])
+ return PGD_EMPTY;
+
+ PGD *pg = aral_mallocz(pgd_alloc_globals.aral_pgd);
+
+ pg->type = type;
+ pg->states = PGD_STATE_CREATED_FROM_DISK;
+ pg->options = ~PAGE_OPTION_ALL_VALUES_EMPTY;
+
+ switch (type)
+ {
+ case PAGE_METRICS:
+ case PAGE_TIER:
+ pg->raw.size = size;
+ pg->used = size / page_type_size[type];
+ pg->slots = pg->used;
+
+ pg->raw.data = pgd_data_aral_alloc(size);
+ memcpy(pg->raw.data, base, size);
+ break;
+ case PAGE_GORILLA_METRICS:
+ internal_fatal(size == 0, "Asked to create page with 0 data!!!");
+ internal_fatal(size % sizeof(uint32_t), "Unaligned gorilla buffer size");
+ internal_fatal(size % GORILLA_BUFFER_SIZE, "Expected size to be a multiple of %zu-bytes", GORILLA_BUFFER_SIZE);
+
+ pg->raw.data = mallocz(size);
+ pg->raw.size = size;
+
+ // TODO: rm this
+ memset(pg->raw.data, 0, size);
+ memcpy(pg->raw.data, base, size);
+
+ uint32_t total_entries = gorilla_buffer_patch((void *) pg->raw.data);
+
+ pg->used = total_entries;
+ pg->slots = pg->used;
+ break;
+ default:
+ fatal("Unknown page type: %uc", type);
+ }
+
+ return pg;
+}
+
+void pgd_free(PGD *pg)
+{
+ if (!pg)
+ return;
+
+ if (pg == PGD_EMPTY)
+ return;
+
+ switch (pg->type)
+ {
+ case PAGE_METRICS:
+ case PAGE_TIER:
+ pgd_data_aral_free(pg->raw.data, pg->raw.size);
+ break;
+ case PAGE_GORILLA_METRICS: {
+ if (pg->states & PGD_STATE_CREATED_FROM_DISK)
+ {
+ internal_fatal(pg->raw.data == NULL, "Tried to free gorilla PGD loaded from disk with NULL data");
+ freez(pg->raw.data);
+ pg->raw.data = NULL;
+ }
+ else if ((pg->states & PGD_STATE_CREATED_FROM_COLLECTOR) ||
+ (pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) ||
+ (pg->states & PGD_STATE_FLUSHED_TO_DISK))
+ {
+ internal_fatal(pg->gorilla.writer == NULL,
+ "PGD does not have an active gorilla writer");
+
+ internal_fatal(pg->gorilla.num_buffers == 0,
+ "PGD does not have any gorilla buffers allocated");
+
+ while (true) {
+ gorilla_buffer_t *gbuf = gorilla_writer_drop_head_buffer(pg->gorilla.writer);
+ if (!gbuf)
+ break;
+ aral_freez(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index], gbuf);
+ pg->gorilla.num_buffers -= 1;
+ }
+
+ internal_fatal(pg->gorilla.num_buffers != 0,
+ "Could not free all gorilla writer buffers");
+
+ aral_freez(pgd_alloc_globals.aral_gorilla_writer[pg->gorilla.aral_index], pg->gorilla.writer);
+ pg->gorilla.writer = NULL;
+ } else {
+ fatal("pgd_free() called on gorilla page with unsupported state");
+ // TODO: should we support any other states?
+ // if (!(pg->states & PGD_STATE_FLUSHED_TO_DISK))
+ // fatal("pgd_free() is not supported yet for pages flushed to disk");
+ }
+
+ break;
+ }
+ default:
+ fatal("Unknown page type: %uc", pg->type);
+ }
+
+ aral_freez(pgd_alloc_globals.aral_pgd, pg);
+}
+
+// ----------------------------------------------------------------------------
+// utility functions
+
+uint32_t pgd_type(PGD *pg)
+{
+ return pg->type;
+}
+
+bool pgd_is_empty(PGD *pg)
+{
+ if (!pg)
+ return true;
+
+ if (pg == PGD_EMPTY)
+ return true;
+
+ if (pg->used == 0)
+ return true;
+
+ if (pg->options & PAGE_OPTION_ALL_VALUES_EMPTY)
+ return true;
+
+ return false;
+}
+
+uint32_t pgd_slots_used(PGD *pg)
+{
+ if (!pg)
+ return 0;
+
+ if (pg == PGD_EMPTY)
+ return 0;
+
+ return pg->used;
+}
+
+uint32_t pgd_memory_footprint(PGD *pg)
+{
+ if (!pg)
+ return 0;
+
+ if (pg == PGD_EMPTY)
+ return 0;
+
+ size_t footprint = 0;
+ switch (pg->type) {
+ case PAGE_METRICS:
+ case PAGE_TIER:
+ footprint = sizeof(PGD) + pg->raw.size;
+ break;
+ case PAGE_GORILLA_METRICS: {
+ if (pg->states & PGD_STATE_CREATED_FROM_DISK)
+ footprint = sizeof(PGD) + pg->raw.size;
+ else
+ footprint = sizeof(PGD) + sizeof(gorilla_writer_t) + (pg->gorilla.num_buffers * GORILLA_BUFFER_SIZE);
+
+ break;
+ }
+ default:
+ fatal("Unknown page type: %uc", pg->type);
+ }
+
+ return footprint;
+}
+
+uint32_t pgd_disk_footprint(PGD *pg)
+{
+ if (!pgd_slots_used(pg))
+ return 0;
+
+ size_t size = 0;
+
+ switch (pg->type) {
+ case PAGE_METRICS:
+ case PAGE_TIER: {
+ uint32_t used_size = pg->used * page_type_size[pg->type];
+ internal_fatal(used_size > pg->raw.size, "Wrong disk footprint page size");
+ size = used_size;
+
+ break;
+ }
+ case PAGE_GORILLA_METRICS: {
+ if (pg->states & PGD_STATE_CREATED_FROM_COLLECTOR ||
+ pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING ||
+ pg->states & PGD_STATE_FLUSHED_TO_DISK)
+ {
+ internal_fatal(!pg->gorilla.writer,
+ "pgd_disk_footprint() not implemented for NULL gorilla writers");
+
+ internal_fatal(pg->gorilla.num_buffers == 0,
+ "Gorilla writer does not have any buffers");
+
+ size = pg->gorilla.num_buffers * GORILLA_BUFFER_SIZE;
+
+ if (pg->states & PGD_STATE_CREATED_FROM_COLLECTOR) {
+ global_statistics_tier0_disk_compressed_bytes(gorilla_writer_nbytes(pg->gorilla.writer));
+ global_statistics_tier0_disk_uncompressed_bytes(gorilla_writer_entries(pg->gorilla.writer) * sizeof(storage_number));
+ }
+ } else if (pg->states & PGD_STATE_CREATED_FROM_DISK) {
+ size = pg->raw.size;
+ } else {
+ fatal("Asked disk footprint on unknown page state");
+ }
+
+ break;
+ }
+ default:
+ fatal("Unknown page type: %uc", pg->type);
+ }
+
+ internal_fatal(pg->states & PGD_STATE_CREATED_FROM_DISK,
+ "Disk footprint asked for page created from disk.");
+ pg->states = PGD_STATE_SCHEDULED_FOR_FLUSHING;
+ return size;
+}
+
+void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size)
+{
+ internal_fatal(pgd_disk_footprint(pg) != dst_size, "Wrong disk footprint size requested (need %u, available %u)",
+ pgd_disk_footprint(pg), dst_size);
+
+ switch (pg->type) {
+ case PAGE_METRICS:
+ case PAGE_TIER:
+ memcpy(dst, pg->raw.data, dst_size);
+ break;
+ case PAGE_GORILLA_METRICS: {
+ if ((pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) == 0)
+ fatal("Copying to extent is supported only for PGDs that are scheduled for flushing.");
+
+ internal_fatal(!pg->gorilla.writer,
+ "pgd_copy_to_extent() not implemented for NULL gorilla writers");
+
+ internal_fatal(pg->gorilla.num_buffers == 0,
+ "pgd_copy_to_extent() gorilla writer does not have any buffers");
+
+ bool ok = gorilla_writer_serialize(pg->gorilla.writer, dst, dst_size);
+ UNUSED(ok);
+ internal_fatal(!ok,
+ "pgd_copy_to_extent() tried to serialize pg=%p, gw=%p (with dst_size=%u bytes, num_buffers=%zu)",
+ pg, pg->gorilla.writer, dst_size, pg->gorilla.num_buffers);
+ break;
+ }
+ default:
+ fatal("Unknown page type: %uc", pg->type);
+ }
+
+ pg->states = PGD_STATE_FLUSHED_TO_DISK;
+}
+
+// ----------------------------------------------------------------------------
+// data collection
+
+void pgd_append_point(PGD *pg,
+ usec_t point_in_time_ut __maybe_unused,
+ NETDATA_DOUBLE n,
+ NETDATA_DOUBLE min_value,
+ NETDATA_DOUBLE max_value,
+ uint16_t count,
+ uint16_t anomaly_count,
+ SN_FLAGS flags,
+ uint32_t expected_slot)
+{
+ if (unlikely(pg->used >= pg->slots))
+ fatal("DBENGINE: attempted to write beyond page size (page type %u, slots %u, used %u)",
+ pg->type, pg->slots, pg->used /* FIXME:, pg->size */);
+
+ if (unlikely(pg->used != expected_slot))
+ fatal("DBENGINE: page is not aligned to expected slot (used %u, expected %u)",
+ pg->used, expected_slot);
+
+ if (!(pg->states & PGD_STATE_CREATED_FROM_COLLECTOR))
+ fatal("DBENGINE: collection on page not created from a collector");
+
+ if (pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING)
+ fatal("Data collection on page already scheduled for flushing");
+
+ switch (pg->type) {
+ case PAGE_METRICS: {
+ storage_number *tier0_metric_data = (storage_number *)pg->raw.data;
+ storage_number t = pack_storage_number(n, flags);
+ tier0_metric_data[pg->used++] = t;
+
+ if ((pg->options & PAGE_OPTION_ALL_VALUES_EMPTY) && does_storage_number_exist(t))
+ pg->options &= ~PAGE_OPTION_ALL_VALUES_EMPTY;
+
+ break;
+ }
+ case PAGE_TIER: {
+ storage_number_tier1_t *tier12_metric_data = (storage_number_tier1_t *)pg->raw.data;
+ storage_number_tier1_t t;
+ t.sum_value = (float) n;
+ t.min_value = (float) min_value;
+ t.max_value = (float) max_value;
+ t.anomaly_count = anomaly_count;
+ t.count = count;
+ tier12_metric_data[pg->used++] = t;
+
+ if ((pg->options & PAGE_OPTION_ALL_VALUES_EMPTY) && fpclassify(n) != FP_NAN)
+ pg->options &= ~PAGE_OPTION_ALL_VALUES_EMPTY;
+
+ break;
+ }
+ case PAGE_GORILLA_METRICS: {
+ pg->used++;
+ storage_number t = pack_storage_number(n, flags);
+
+ if ((pg->options & PAGE_OPTION_ALL_VALUES_EMPTY) && does_storage_number_exist(t))
+ pg->options &= ~PAGE_OPTION_ALL_VALUES_EMPTY;
+
+ bool ok = gorilla_writer_write(pg->gorilla.writer, t);
+ if (!ok) {
+ gorilla_buffer_t *new_buffer = aral_mallocz(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index]);
+ memset(new_buffer, 0, GORILLA_BUFFER_SIZE);
+
+ gorilla_writer_add_buffer(pg->gorilla.writer, new_buffer, GORILLA_BUFFER_SLOTS);
+ pg->gorilla.num_buffers += 1;
+ global_statistics_gorilla_buffer_add_hot();
+
+ ok = gorilla_writer_write(pg->gorilla.writer, t);
+ internal_fatal(ok == false, "Failed to writer value in newly allocated gorilla buffer.");
+ }
+ break;
+ }
+ default:
+ fatal("DBENGINE: unknown page type id %d", pg->type);
+ break;
+ }
+}
+
+// ----------------------------------------------------------------------------
+// querying with cursor
+
+static void pgdc_seek(PGDC *pgdc, uint32_t position)
+{
+ PGD *pg = pgdc->pgd;
+
+ switch (pg->type) {
+ case PAGE_METRICS:
+ case PAGE_TIER:
+ pgdc->slots = pgdc->pgd->used;
+ break;
+ case PAGE_GORILLA_METRICS: {
+ if (pg->states & PGD_STATE_CREATED_FROM_DISK) {
+ pgdc->slots = pgdc->pgd->slots;
+ pgdc->gr = gorilla_reader_init((void *) pg->raw.data);
+ } else {
+ if (!(pg->states & PGD_STATE_CREATED_FROM_COLLECTOR) &&
+ !(pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) &&
+ !(pg->states & PGD_STATE_FLUSHED_TO_DISK))
+ fatal("pgdc_seek() currently is not supported for pages created from disk.");
+
+ if (!pg->gorilla.writer)
+ fatal("Seeking from a page without an active gorilla writer is not supported (yet).");
+
+ pgdc->slots = gorilla_writer_entries(pg->gorilla.writer);
+ pgdc->gr = gorilla_writer_get_reader(pg->gorilla.writer);
+ }
+
+ if (position > pgdc->slots)
+ position = pgdc->slots;
+
+ for (uint32_t i = 0; i != position; i++) {
+ uint32_t value;
+
+ bool ok = gorilla_reader_read(&pgdc->gr, &value);
+
+ if (!ok) {
+ // this is fine, the reader will return empty points
+ break;
+ }
+ }
+
+ break;
+ }
+ default:
+ fatal("DBENGINE: unknown page type id %d", pg->type);
+ break;
+ }
+}
+
+void pgdc_reset(PGDC *pgdc, PGD *pgd, uint32_t position)
+{
+ // pgd might be null and position equal to UINT32_MAX
+
+ pgdc->pgd = pgd;
+ pgdc->position = position;
+
+ if (!pgd)
+ return;
+
+ if (pgd == PGD_EMPTY)
+ return;
+
+ if (position == UINT32_MAX)
+ return;
+
+ pgdc_seek(pgdc, position);
+}
+
+bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *sp)
+{
+ if (!pgdc->pgd || pgdc->pgd == PGD_EMPTY || pgdc->position >= pgdc->slots)
+ {
+ storage_point_empty(*sp, sp->start_time_s, sp->end_time_s);
+ return false;
+ }
+
+ internal_fatal(pgdc->position != expected_position, "Wrong expected cursor position");
+
+ switch (pgdc->pgd->type)
+ {
+ case PAGE_METRICS: {
+ storage_number *array = (storage_number *) pgdc->pgd->raw.data;
+ storage_number n = array[pgdc->position++];
+
+ sp->min = sp->max = sp->sum = unpack_storage_number(n);
+ sp->flags = (SN_FLAGS)(n & SN_USER_FLAGS);
+ sp->count = 1;
+ sp->anomaly_count = is_storage_number_anomalous(n) ? 1 : 0;
+
+ return true;
+ }
+ case PAGE_TIER: {
+ storage_number_tier1_t *array = (storage_number_tier1_t *) pgdc->pgd->raw.data;
+ storage_number_tier1_t n = array[pgdc->position++];
+
+ sp->flags = n.anomaly_count ? SN_FLAG_NONE : SN_FLAG_NOT_ANOMALOUS;
+ sp->count = n.count;
+ sp->anomaly_count = n.anomaly_count;
+ sp->min = n.min_value;
+ sp->max = n.max_value;
+ sp->sum = n.sum_value;
+
+ return true;
+ }
+ case PAGE_GORILLA_METRICS: {
+ pgdc->position++;
+
+ uint32_t n = 666666666;
+ bool ok = gorilla_reader_read(&pgdc->gr, &n);
+ if (ok) {
+ sp->min = sp->max = sp->sum = unpack_storage_number(n);
+ sp->flags = (SN_FLAGS)(n & SN_USER_FLAGS);
+ sp->count = 1;
+ sp->anomaly_count = is_storage_number_anomalous(n) ? 1 : 0;
+ } else {
+ storage_point_empty(*sp, sp->start_time_s, sp->end_time_s);
+ }
+
+ return ok;
+ }
+ default: {
+ static bool logged = false;
+ if (!logged)
+ {
+ netdata_log_error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", pgd_type(pgdc->pgd));
+ logged = true;
+ }
+
+ storage_point_empty(*sp, sp->start_time_s, sp->end_time_s);
+ return false;
+ }
+ }
+}
diff --git a/database/engine/page.h b/database/engine/page.h
new file mode 100644
index 00000000000000..32c87c58072272
--- /dev/null
+++ b/database/engine/page.h
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef DBENGINE_PAGE_H
+#define DBENGINE_PAGE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "libnetdata/libnetdata.h"
+
+typedef struct pgd_cursor {
+ struct pgd *pgd;
+ uint32_t position;
+ uint32_t slots;
+
+ gorilla_reader_t gr;
+} PGDC;
+
+#include "rrdengine.h"
+
+typedef struct pgd PGD;
+
+#define PGD_EMPTY (PGD *)(-1)
+
+void pgd_init_arals(void);
+
+PGD *pgd_create(uint8_t type, uint32_t slots);
+PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size);
+void pgd_free(PGD *pg);
+
+uint32_t pgd_type(PGD *pg);
+bool pgd_is_empty(PGD *pg);
+uint32_t pgd_slots_used(PGD *pg);
+
+uint32_t pgd_memory_footprint(PGD *pg);
+uint32_t pgd_disk_footprint(PGD *pg);
+
+void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size);
+
+void pgd_append_point(PGD *pg,
+ usec_t point_in_time_ut,
+ NETDATA_DOUBLE n,
+ NETDATA_DOUBLE min_value,
+ NETDATA_DOUBLE max_value,
+ uint16_t count,
+ uint16_t anomaly_count,
+ SN_FLAGS flags,
+ uint32_t expected_slot);
+
+void pgdc_reset(PGDC *pgdc, PGD *pgd, uint32_t position);
+bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *sp);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // DBENGINE_PAGE_H
diff --git a/database/engine/page_test.cc b/database/engine/page_test.cc
new file mode 100644
index 00000000000000..d61299bc4870d8
--- /dev/null
+++ b/database/engine/page_test.cc
@@ -0,0 +1,405 @@
+#include "page.h"
+#include "page_test.h"
+
+#ifdef HAVE_GTEST
+
+#include
+#include
+#include
+
+bool operator==(const STORAGE_POINT lhs, const STORAGE_POINT rhs) {
+ if (lhs.min != rhs.min)
+ return false;
+
+ if (lhs.max != rhs.max)
+ return false;
+
+ if (lhs.sum != rhs.sum)
+ return false;
+
+ if (lhs.start_time_s != rhs.start_time_s)
+ return false;
+
+ if (lhs.end_time_s != rhs.end_time_s)
+ return false;
+
+ if (lhs.count != rhs.count)
+ return false;
+
+ if (lhs.flags != rhs.flags)
+ return false;
+
+ return true;
+}
+
+// TODO: use value-parameterized tests
+// http://google.github.io/googletest/advanced.html#value-parameterized-tests
+static uint8_t page_type = PAGE_GORILLA_METRICS;
+
+static size_t slots_for_page(size_t n) {
+ switch (page_type) {
+ case PAGE_METRICS:
+ return 1024;
+ case PAGE_GORILLA_METRICS:
+ return n;
+ default:
+ fatal("Slots requested for unsupported page: %uc", page_type);
+ }
+}
+
+TEST(PGD, EmptyOrNull) {
+ PGD *pg = NULL;
+
+ PGDC cursor;
+ STORAGE_POINT sp;
+
+ EXPECT_TRUE(pgd_is_empty(pg));
+ EXPECT_EQ(pgd_slots_used(pg), 0);
+ EXPECT_EQ(pgd_memory_footprint(pg), 0);
+ EXPECT_EQ(pgd_disk_footprint(pg), 0);
+
+ pgdc_reset(&cursor, pg, 0);
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, 0, &sp));
+
+ pgd_free(pg);
+
+ pg = PGD_EMPTY;
+
+ EXPECT_TRUE(pgd_is_empty(pg));
+ EXPECT_EQ(pgd_slots_used(pg), 0);
+ EXPECT_EQ(pgd_memory_footprint(pg), 0);
+ EXPECT_EQ(pgd_disk_footprint(pg), 0);
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, 0, &sp));
+
+ pgdc_reset(&cursor, pg, 0);
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, 0, &sp));
+
+ pgd_free(pg);
+}
+
+TEST(PGD, Create) {
+ size_t slots = slots_for_page(1024 * 1024);
+ PGD *pg = pgd_create(page_type, slots);
+
+ EXPECT_EQ(pgd_type(pg), page_type);
+ EXPECT_TRUE(pgd_is_empty(pg));
+ EXPECT_EQ(pgd_slots_used(pg), 0);
+
+ for (size_t i = 0; i != slots; i++) {
+ pgd_append_point(pg, i, i, 0, 0, 1, 1, SN_DEFAULT_FLAGS, i);
+ EXPECT_FALSE(pgd_is_empty(pg));
+ }
+ EXPECT_EQ(pgd_slots_used(pg), slots);
+
+ EXPECT_DEATH(
+ pgd_append_point(pg, slots, slots, 0, 0, 1, 1, SN_DEFAULT_FLAGS, slots),
+ ".*"
+ );
+
+ pgd_free(pg);
+}
+
+TEST(PGD, CursorFullPage) {
+ size_t slots = slots_for_page(1024 * 1024);
+ PGD *pg = pgd_create(page_type, slots);
+
+ for (size_t slot = 0; slot != slots; slot++)
+ pgd_append_point(pg, slot, slot, 0, 0, 1, 1, SN_DEFAULT_FLAGS, slot);
+
+ for (size_t i = 0; i != 2; i++) {
+ PGDC cursor;
+ pgdc_reset(&cursor, pg, 0);
+
+ STORAGE_POINT sp;
+ for (size_t slot = 0; slot != slots; slot++) {
+ EXPECT_TRUE(pgdc_get_next_point(&cursor, slot, &sp));
+
+ EXPECT_EQ(slot, static_cast(sp.min));
+ EXPECT_EQ(sp.min, sp.max);
+ EXPECT_EQ(sp.min, sp.sum);
+ EXPECT_EQ(sp.count, 1);
+ EXPECT_EQ(sp.anomaly_count, 0);
+ }
+
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, slots, &sp));
+ }
+
+ for (size_t i = 0; i != 2; i++) {
+ PGDC cursor;
+ pgdc_reset(&cursor, pg, slots / 2);
+
+ STORAGE_POINT sp;
+ for (size_t slot = slots / 2; slot != slots; slot++) {
+ EXPECT_TRUE(pgdc_get_next_point(&cursor, slot, &sp));
+
+ EXPECT_EQ(slot, static_cast(sp.min));
+ EXPECT_EQ(sp.min, sp.max);
+ EXPECT_EQ(sp.min, sp.sum);
+ EXPECT_EQ(sp.count, 1);
+ EXPECT_EQ(sp.anomaly_count, 0);
+ }
+
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, slots, &sp));
+ }
+
+ // out of bounds seek
+ {
+ PGDC cursor;
+ pgdc_reset(&cursor, pg, 2 * slots);
+
+ STORAGE_POINT sp;
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, 2 * slots, &sp));
+ }
+
+ pgd_free(pg);
+}
+
+TEST(PGD, CursorHalfPage) {
+ size_t slots = slots_for_page(1024 * 1024);
+ PGD *pg = pgd_create(page_type, slots);
+
+ PGDC cursor;
+ STORAGE_POINT sp;
+
+ // fill the 1st half of the page
+ for (size_t slot = 0; slot != slots / 2; slot++)
+ pgd_append_point(pg, slot, slot, 0, 0, 1, 1, SN_DEFAULT_FLAGS, slot);
+
+ pgdc_reset(&cursor, pg, 0);
+
+ for (size_t slot = 0; slot != slots / 2; slot++) {
+ EXPECT_TRUE(pgdc_get_next_point(&cursor, slot, &sp));
+
+ EXPECT_EQ(slot, static_cast(sp.min));
+ EXPECT_EQ(sp.min, sp.max);
+ EXPECT_EQ(sp.min, sp.sum);
+ EXPECT_EQ(sp.count, 1);
+ EXPECT_EQ(sp.anomaly_count, 0);
+ }
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, slots / 2, &sp));
+
+ // reset pgdc to the end of the page, we should not be getting more
+ // points even if the page has grown in between.
+
+ pgdc_reset(&cursor, pg, slots / 2);
+
+ for (size_t slot = slots / 2; slot != slots; slot++)
+ pgd_append_point(pg, slot, slot, 0, 0, 1, 1, SN_DEFAULT_FLAGS, slot);
+
+ for (size_t slot = slots / 2; slot != slots; slot++)
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, slot, &sp));
+
+ EXPECT_FALSE(pgdc_get_next_point(&cursor, slots, &sp));
+
+ pgd_free(pg);
+}
+
+TEST(PGD, MemoryFootprint) {
+ size_t slots = slots_for_page(1024 * 1024);
+ PGD *pg = pgd_create(page_type, slots);
+
+ uint32_t footprint = 0;
+ switch (pgd_type(pg)) {
+ case PAGE_METRICS:
+ footprint = slots * sizeof(uint32_t);
+ break;
+ case PAGE_GORILLA_METRICS:
+ footprint = 128 * sizeof(uint32_t);
+ break;
+ default:
+ fatal("Uknown page type: %uc", pgd_type(pg));
+ }
+ EXPECT_NEAR(pgd_memory_footprint(pg), footprint, 128);
+
+ std::random_device rand_dev;
+ std::mt19937 gen(rand_dev());
+ std::uniform_int_distribution