From f19038eff6ade7392655f1209e507ae77b2032d1 Mon Sep 17 00:00:00 2001 From: "nengli.ln" Date: Tue, 9 Jun 2026 15:18:54 +0800 Subject: [PATCH 1/5] feat: add RSS memory leak nightly workflow and tests Cherry-picked and squashed 15 RSS-related commits from memory-leak branch: - Add memory-leak-nightly.yml workflow (cron + DingTalk notify) - Add tests/memory_leak/ with test_memory_leak.py (RSS monitor) - Add gtest binaries: test_node_create.cc, test_temporary_tables.cc - Wire tests/CMakeLists.txt and tests/memory_leak/CMakeLists.txt Excluded valgrind/ASAN-related changes (edge_columns.h fix, neug-test.yml valgrind_memcheck job, ASAN compile flags). --- .github/workflows/memory-leak-nightly.yml | 261 ++++++++++++++ tests/CMakeLists.txt | 3 +- tests/memory_leak/CMakeLists.txt | 10 + tests/memory_leak/test_memory_leak.py | 394 +++++++++++++++++++++ tests/memory_leak/test_node_create.cc | 234 ++++++++++++ tests/memory_leak/test_temporary_tables.cc | 343 ++++++++++++++++++ 6 files changed, 1244 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/memory-leak-nightly.yml create mode 100644 tests/memory_leak/CMakeLists.txt create mode 100644 tests/memory_leak/test_memory_leak.py create mode 100644 tests/memory_leak/test_node_create.cc create mode 100644 tests/memory_leak/test_temporary_tables.cc diff --git a/.github/workflows/memory-leak-nightly.yml b/.github/workflows/memory-leak-nightly.yml new file mode 100644 index 000000000..e0055b4c6 --- /dev/null +++ b/.github/workflows/memory-leak-nightly.yml @@ -0,0 +1,261 @@ +name: Memory Leak Nightly + +on: + workflow_dispatch: + inputs: + outer_iters: + description: 'NEUG_LEAK_OUTER_ITERS for test_node_create' + required: false + default: '1000' + inner_rows: + description: 'NEUG_LEAK_INNER_ROWS for test_node_create' + required: false + default: '1000' + rss_iters: + description: '--iters for tests/memory_leak/test_memory_leak.py' + required: false + default: '2000' + schedule: + # UTC 19:00 = UTC+8 03:00 every day + - cron: '0 19 * * *' + +concurrency: + group: ${{ github.workflow }} + cancel-in-progress: true + +jobs: + memory-leak: + name: Build NeuG (RELEASE, no mimalloc) and run leak checks + # NOTE: temporarily removed `github.repository == 'alibaba/neug'` guard for debugging on forks. + if: github.event_name == 'workflow_dispatch' || github.ref == 'refs/heads/main' + # NOTE: temporarily switched from self-hosted runner + private neug-dev image + # to github-hosted ubuntu-22.04 (no container) for fork debugging. + # Original config: + # runs-on: [self-hosted, daily, linux, x64] + # container: + # image: neug-registry.cn-hongkong.cr.aliyuncs.com/neug/neug-dev:v0.1.2 + runs-on: ubuntu-22.04 + container: + image: neug-registry.cn-hongkong.cr.aliyuncs.com/neug/neug-dev:v0.1.2 + timeout-minutes: 180 + # In container mode GitHub Actions defaults `run:` shell to `sh -e {0}` + # (dash on debian-based images), which does NOT support `set -o pipefail`. + # Force bash so the same `set -o pipefail` snippets used elsewhere keep + # working and pipeline failures are not silently swallowed by `tee`. + defaults: + run: + shell: bash + env: + OUTER_ITERS: ${{ github.event.inputs.outer_iters || '10' }} + INNER_ROWS: ${{ github.event.inputs.inner_rows || '10' }} + RSS_ITERS: ${{ github.event.inputs.rss_iters || '20' }} + LOG_DIR: ${{ github.workspace }}/memory-leak-logs + + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + fetch-depth: 0 + + - name: Cache CCache + uses: actions/cache@v4 + with: + path: ~/.cache/ccache + key: ${{ runner.os }}-ccache-${{ github.ref_name }}-${{ hashFiles('CMakeLists.txt', 'cmake/**', 'proto/**', '.github/workflows/neug-test.yml') }} + restore-keys: | + ${{ runner.os }}-ccache-${{ github.ref_name }}- + ${{ runner.os }}-ccache-main- + ${{ runner.os }}-ccache- + + - name: Setup CCache + run: | + sudo apt-get update && sudo apt-get install -y ccache + mkdir -p ~/.cache/ccache + { + echo "CCACHE_DIR=$HOME/.cache/ccache" + echo "CCACHE_BASEDIR=${GITHUB_WORKSPACE}" + echo "CCACHE_COMPRESS=true" + echo "CCACHE_COMPRESSLEVEL=6" + echo "CCACHE_MAXSIZE=5G" + } >> "$GITHUB_ENV" + ccache --set-config=base_dir=${GITHUB_WORKSPACE} + ccache --set-config=compiler_check=content + ccache --set-config=compression=true + ccache --max-size=5G + ccache --zero-stats + + - name: Increase the maximum number of opened files + run: | + ulimit -n 65535 || true + sudo chmod -R 777 /etc/security/* || true + echo "* soft nofile 1048576" | sudo tee -a /etc/security/limits.conf + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + + - name: Prepare log dir + run: mkdir -p "$LOG_DIR" + + # ---------------------------------------------------------------- + # Build NeuG via tools/python_bind/make build (mirrors neug-test.yml). + # The build produces: + # - the python extension under tools/python_bind/build/lib* + # - the C++ binaries (incl. memory-leak gtest) under + # tools/python_bind/build/neug_py_bind/tests/memory_leak/ + # ---------------------------------------------------------------- + - name: Build NeuG (RELEASE, BUILD_TEST=ON, WITH_MIMALLOC=OFF) + run: | + . /home/neug/.neug_env + cd ${GITHUB_WORKSPACE}/ + echo "CCache stats before build" + ccache --show-stats + export CI=ON # Mark CI environment to enable CI-specific settings(use system level arrow) + export BUILD_TYPE=RELEASE + export CMAKE_BUILD_PARALLEL_LEVEL=$(printf '%s\n' "$(nproc)" 32 | awk 'NR==1 || $0&1 | tee "$LOG_DIR/test_node_create.log" + + # ---------------------------------------------------------------- + # 1b) gtest -- test_temporary_tables (scan / ddl / temp_insert) + # ---------------------------------------------------------------- + - name: gtest -- test_temporary_tables + env: + NEUG_RUN_MEMORY_LEAK_TESTS: '1' + NEUG_LEAK_OUTER_ITERS: ${{ env.OUTER_ITERS }} + NEUG_LEAK_INNER_ROWS: ${{ env.INNER_ROWS }} + NEUG_LEAK_BULK_ROWS: '100000' + NEUG_LEAK_SCAN_ITERS: '200' + GLOG_logtostderr: '1' + run: | + set -o pipefail + ./tools/python_bind/build/neug_py_bind/tests/memory_leak/test_temporary_tables \ + --gtest_color=no \ + 2>&1 | tee "$LOG_DIR/test_temporary_tables.log" + + # ---------------------------------------------------------------- + # 2) Python-side RSS workload — reuse the same build above + # ---------------------------------------------------------------- + - name: python3 tests/memory_leak/test_memory_leak.py + env: + PYTHONPATH: ${{ github.workspace }}/tools/python_bind:${{ github.workspace }}/tools/python_bind/build/neug_py_bind + run: | + set -o pipefail + python3 tests/memory_leak/test_memory_leak.py \ + --iters "$RSS_ITERS" --sample 100 \ + --slope-threshold 0.5 --delta-threshold 5.0 \ + --exit-on-leak \ + 2>&1 | tee "$LOG_DIR/test_memory_leak_py.log" + + # ---------------------------------------------------------------- + # 3) Always upload logs for postmortem + # ---------------------------------------------------------------- + - name: Summarize verdicts + if: always() + run: | + echo '## Memory Leak Nightly Summary' >> "$GITHUB_STEP_SUMMARY" + echo '' >> "$GITHUB_STEP_SUMMARY" + if [ -f "$LOG_DIR/test_node_create.log" ]; then + echo '### gtest test_node_create' >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + tail -n 20 "$LOG_DIR/test_node_create.log" >> "$GITHUB_STEP_SUMMARY" || true + echo '```' >> "$GITHUB_STEP_SUMMARY" + fi + if [ -f "$LOG_DIR/test_temporary_tables.log" ]; then + echo '### gtest test_temporary_tables' >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + tail -n 20 "$LOG_DIR/test_temporary_tables.log" >> "$GITHUB_STEP_SUMMARY" || true + echo '```' >> "$GITHUB_STEP_SUMMARY" + fi + if [ -f "$LOG_DIR/test_memory_leak_py.log" ]; then + echo '### tests/memory_leak/test_memory_leak.py' >> "$GITHUB_STEP_SUMMARY" + echo '```' >> "$GITHUB_STEP_SUMMARY" + tail -n 20 "$LOG_DIR/test_memory_leak_py.log" >> "$GITHUB_STEP_SUMMARY" || true + echo '```' >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Upload logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: memory-leak-nightly-${{ github.run_id }} + path: ${{ env.LOG_DIR }} + if-no-files-found: warn + retention-days: 14 + + # ============================================================ + # Job: send DingTalk notification for the nightly memory-leak run. + # Mirrors benchmark.yml's notify-benchmark-result style. + # Only runs on the canonical alibaba/neug repo to avoid fork noise. + # ============================================================ + notify-memory-leak-result: + needs: [memory-leak] + if: always() && github.repository == 'alibaba/neug' + runs-on: ubuntu-latest + steps: + - name: Send DingTalk notification + env: + DINGTALK_TOKEN: ${{ secrets.DINGTALK_TOKEN }} + run: | + if [ -z "$DINGTALK_TOKEN" ]; then + echo "DINGTALK_TOKEN not set, skipping notification." + exit 0 + fi + WEBHOOK_URL="https://oapi.dingtalk.com/robot/send?access_token=${DINGTALK_TOKEN}" + STATUS="${{ needs.memory-leak.result }}" + if [[ "$STATUS" == "success" ]]; then + TITLE="[Nightly Report] NeuG Memory Leak Passed" + else + TITLE="[Nightly Report] NeuG Memory Leak Failed" + fi + curl -s -X POST "$WEBHOOK_URL" \ + -H 'Content-Type: application/json' \ + -d "{ + \"msgtype\": \"markdown\", + \"markdown\": { + \"title\": \"$TITLE\", + \"text\": \"## $TITLE\n\n- **memory-leak**: ${STATUS}\n- **Repo**: ${{ github.repository }}\n- **Branch**: ${{ github.ref_name }}\n- **Commit**: ${{ github.sha }}\n- [View Run](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }})\" + } + }" diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 466dc104f..dfeaa304d 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -4,4 +4,5 @@ add_subdirectory(storage) add_subdirectory(transaction) add_subdirectory(unittest) add_subdirectory(utils) -add_subdirectory(main) \ No newline at end of file +add_subdirectory(main) +add_subdirectory(memory_leak) \ No newline at end of file diff --git a/tests/memory_leak/CMakeLists.txt b/tests/memory_leak/CMakeLists.txt new file mode 100644 index 000000000..0a6fce1b1 --- /dev/null +++ b/tests/memory_leak/CMakeLists.txt @@ -0,0 +1,10 @@ +# Memory-leak smoke tests. These tests are GTEST_SKIP()-ed by default and +# only execute when the environment variable NEUG_RUN_MEMORY_LEAK_TESTS=1 is +# set. See test_node_create.cc for details. +# +# `add_neug_test` registers the binary with ctest, but the test will report +# as PASS/SKIPPED when the env var is unset, so day-to-day `ctest` runs are +# unaffected. + +add_neug_test(test_node_create test_node_create.cc) +add_neug_test(test_temporary_tables test_temporary_tables.cc) diff --git a/tests/memory_leak/test_memory_leak.py b/tests/memory_leak/test_memory_leak.py new file mode 100644 index 000000000..6f09ba8ca --- /dev/null +++ b/tests/memory_leak/test_memory_leak.py @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +import argparse +import ctypes +import ctypes.util +import gc +import os +import platform +import shutil +import sys +import tempfile +from contextlib import contextmanager +from typing import Callable, Dict, List, Tuple + +# --------------------------------------------------------------------------- +# Platform guard — script is meaningful only on Linux/glibc. +# --------------------------------------------------------------------------- +if not sys.platform.startswith("linux"): + sys.stderr.write( + f"[test_memory_leak] platform '{sys.platform}' is not supported; " + "this script requires Linux/glibc.\n" + ) + sys.exit(2) + + +# --------------------------------------------------------------------------- +# RSS sampling and malloc_trim helpers. +# --------------------------------------------------------------------------- +_LIBC = None + + +def _libc(): + """Lazily resolve libc and bind ``malloc_trim``.""" + global _LIBC + if _LIBC is None: + path = ctypes.util.find_library("c") or "libc.so.6" + _LIBC = ctypes.CDLL(path, use_errno=True) + _LIBC.malloc_trim.argtypes = [ctypes.c_size_t] + _LIBC.malloc_trim.restype = ctypes.c_int + return _LIBC + + +def malloc_trim(pad: int = 0) -> int: + """Call ``malloc_trim(pad)``; return libc's return value (1 if released).""" + return _libc().malloc_trim(pad) + + +def get_rss_mb() -> float: + """Return the current process VmRSS in MB (read from /proc/self/status).""" + with open("/proc/self/status", "r") as fh: + for line in fh: + if line.startswith("VmRSS:"): + # Format: "VmRSS:\t 123456 kB" + kb = int(line.split()[1]) + return kb / 1024.0 + raise RuntimeError("VmRSS not found in /proc/self/status") + + +def linear_slope_per_1000(samples: List[Tuple[int, float]]) -> float: + """Compute slope (MB per 1000 cycles) via least-squares. + + ``samples`` is a list of (cycle_index, rss_mb). Returns 0.0 when there + are fewer than two distinct points. + """ + n = len(samples) + if n < 2: + return 0.0 + sx = sum(p[0] for p in samples) + sy = sum(p[1] for p in samples) + sxx = sum(p[0] * p[0] for p in samples) + sxy = sum(p[0] * p[1] for p in samples) + denom = n * sxx - sx * sx + if denom == 0: + return 0.0 + slope_per_cycle = (n * sxy - sx * sy) / denom + return slope_per_cycle * 1000.0 + + +# --------------------------------------------------------------------------- +# Scenario harness. +# --------------------------------------------------------------------------- +class ScenarioResult: + __slots__ = ( + "name", + "iters", + "sample", + "base", + "before_close", + "after_close", + "after_trim", + "slope", + ) + + def __init__(self, name: str, iters: int, sample: int): + self.name = name + self.iters = iters + self.sample = sample + self.base = 0.0 + self.before_close = 0.0 + self.after_close = 0.0 + self.after_trim = 0.0 + self.slope = 0.0 + + @property + def delta(self) -> float: + return self.after_trim - self.base + + def is_leak(self, slope_threshold: float, delta_threshold: float) -> bool: + return self.slope > slope_threshold or self.delta > delta_threshold + + +@contextmanager +def isolated_dbdir(prefix: str): + path = tempfile.mkdtemp(prefix=f"neug_rss_{prefix}_") + try: + yield path + finally: + shutil.rmtree(path, ignore_errors=True) + + +# --------------------------------------------------------------------------- +# Scenarios — each takes (iters, sample) and returns ScenarioResult. +# --------------------------------------------------------------------------- +def scenario_steady(iters: int, sample: int) -> ScenarioResult: + """A. Long-running fixed query on a stable Database/Connection.""" + from neug import Database # imported lazily so platform guard runs first + + res = ScenarioResult("steady", iters, sample) + with isolated_dbdir("steady") as db_path: + db = Database(db_path=db_path, mode="w") + conn = db.connect() + conn.execute("CREATE NODE TABLE n(id INT64 PRIMARY KEY, name STRING);") + conn.execute("CREATE (a:n {id: 1, name: 'a'});", access_mode="i") + + gc.collect() + res.base = get_rss_mb() + samples: List[Tuple[int, float]] = [(0, res.base)] + + for i in range(1, iters + 1): + conn.execute("MATCH (a:n) RETURN a.id, a.name;", access_mode="r") + if i % sample == 0: + samples.append((i, get_rss_mb())) + + res.before_close = get_rss_mb() + conn.close() + db.close() + del conn, db + gc.collect() + res.after_close = get_rss_mb() + malloc_trim(0) + res.after_trim = get_rss_mb() + res.slope = linear_slope_per_1000(samples) + return res + + +def scenario_lifecycle(iters: int, sample: int) -> ScenarioResult: + """B. Repeated Database open/close cycles.""" + from neug import Database + + res = ScenarioResult("lifecycle", iters, sample) + with isolated_dbdir("lifecycle") as db_path: + # Pre-create the schema once so each open does not perform DDL. + seed = Database(db_path=db_path, mode="w") + seed_conn = seed.connect() + seed_conn.execute("CREATE NODE TABLE n(id INT64 PRIMARY KEY, name STRING);") + seed_conn.close() + seed.close() + del seed_conn, seed + gc.collect() + + res.base = get_rss_mb() + samples: List[Tuple[int, float]] = [(0, res.base)] + + for i in range(1, iters + 1): + db = Database(db_path=db_path, mode="w") + conn = db.connect() + conn.execute("MATCH (a:n) RETURN count(a);", access_mode="r") + conn.close() + db.close() + del conn, db + if i % sample == 0: + gc.collect() + samples.append((i, get_rss_mb())) + + gc.collect() + res.before_close = get_rss_mb() + # nothing left to close in this scenario + res.after_close = res.before_close + malloc_trim(0) + res.after_trim = get_rss_mb() + res.slope = linear_slope_per_1000(samples) + return res + + +def scenario_ddl(iters: int, sample: int) -> ScenarioResult: + """C. CREATE NODE TABLE / DROP TABLE alternating cycles.""" + from neug import Database + + res = ScenarioResult("ddl", iters, sample) + with isolated_dbdir("ddl") as db_path: + db = Database(db_path=db_path, mode="w") + conn = db.connect() + + gc.collect() + res.base = get_rss_mb() + samples: List[Tuple[int, float]] = [(0, res.base)] + + for i in range(1, iters + 1): + tname = f"t_{i}" + conn.execute( + f"CREATE NODE TABLE {tname}(id INT64 PRIMARY KEY, name STRING);", + access_mode="s", + ) + conn.execute(f"DROP TABLE {tname};", access_mode="s") + if i % sample == 0: + samples.append((i, get_rss_mb())) + + res.before_close = get_rss_mb() + conn.close() + db.close() + del conn, db + gc.collect() + res.after_close = get_rss_mb() + malloc_trim(0) + res.after_trim = get_rss_mb() + res.slope = linear_slope_per_1000(samples) + return res + + +def scenario_cache(iters: int, sample: int) -> ScenarioResult: + """D. Distinct query texts to exercise planner / query cache.""" + from neug import Database + + res = ScenarioResult("cache", iters, sample) + with isolated_dbdir("cache") as db_path: + db = Database(db_path=db_path, mode="w") + conn = db.connect() + conn.execute("CREATE NODE TABLE n(id INT64 PRIMARY KEY, name STRING);") + + gc.collect() + res.base = get_rss_mb() + samples: List[Tuple[int, float]] = [(0, res.base)] + + for i in range(1, iters + 1): + # Inject the loop counter into the query text so each call has a + # distinct hash; this stresses any cache keyed by query text. + conn.execute( + f"MATCH (a:n) WHERE a.id = {i} RETURN a.id, a.name;", + access_mode="r", + ) + if i % sample == 0: + samples.append((i, get_rss_mb())) + + res.before_close = get_rss_mb() + conn.close() + db.close() + del conn, db + gc.collect() + res.after_close = get_rss_mb() + malloc_trim(0) + res.after_trim = get_rss_mb() + res.slope = linear_slope_per_1000(samples) + return res + + +SCENARIOS: Dict[str, Callable[[int, int], ScenarioResult]] = { + "steady": scenario_steady, + "lifecycle": scenario_lifecycle, + "ddl": scenario_ddl, + "cache": scenario_cache, +} + + +# --------------------------------------------------------------------------- +# CLI / reporting. +# --------------------------------------------------------------------------- +def format_summary( + results: List[ScenarioResult], + slope_threshold: float, + delta_threshold: float, +) -> str: + header = ( + f"{'scenario':<11} {'iters':>7} {'base':>9} {'before':>9} " + f"{'after_close':>12} {'after_trim':>11} {'delta':>8} " + f"{'slope/1k':>10} verdict" + ) + lines = [header, "-" * len(header)] + for r in results: + verdict = ( + "LEAK?" if r.is_leak(slope_threshold, delta_threshold) else "ok" + ) + lines.append( + f"{r.name:<11} {r.iters:>7d} " + f"{r.base:>9.2f} {r.before_close:>9.2f} " + f"{r.after_close:>12.2f} {r.after_trim:>11.2f} " + f"{r.delta:>+8.2f} {r.slope:>+10.3f} {verdict}" + ) + lines.append("") + lines.append( + f"thresholds: slope > {slope_threshold:.2f} MB/1k cycles " + f"OR after_trim-base > {delta_threshold:.2f} MB => LEAK?" + ) + return "\n".join(lines) + + +def parse_args(argv: List[str]) -> argparse.Namespace: + p = argparse.ArgumentParser( + prog="test_memory_leak", + description=( + "Run RSS-based memory-leak smoke tests against the NeuG Python " + "API. Linux/glibc only. Build NeuG in RELEASE with " + "WITH_MIMALLOC=OFF for meaningful results." + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + p.add_argument( + "--iters", type=int, default=2000, + help="Number of loop cycles per scenario.", + ) + p.add_argument( + "--sample", type=int, default=100, + help="Sampling interval — collect RSS every N cycles.", + ) + p.add_argument( + "--scenarios", nargs="+", default=list(SCENARIOS.keys()), + choices=list(SCENARIOS.keys()), + help="Subset of scenarios to run.", + ) + p.add_argument( + "--slope-threshold", type=float, default=0.5, + help="Slope (MB per 1000 cycles) above which the scenario is flagged.", + ) + p.add_argument( + "--delta-threshold", type=float, default=5.0, + help="after_trim - base (MB) above which the scenario is flagged.", + ) + p.add_argument( + "--exit-on-leak", action="store_true", + help="Exit with code 1 when any scenario is flagged as LEAK?.", + ) + return p.parse_args(argv) + + +def main(argv: List[str]) -> int: + args = parse_args(argv) + if args.sample <= 0 or args.iters <= 0: + sys.stderr.write("[test_memory_leak] --iters and --sample must be > 0\n") + return 2 + + print( + f"[test_memory_leak] platform={platform.platform()} " + f"pid={os.getpid()} iters={args.iters} sample={args.sample}" + ) + print(f"[test_memory_leak] scenarios={args.scenarios}") + + results: List[ScenarioResult] = [] + for name in args.scenarios: + fn = SCENARIOS[name] + print(f"[test_memory_leak] running scenario: {name} ...", flush=True) + results.append(fn(args.iters, args.sample)) + + print() + print(format_summary(results, args.slope_threshold, args.delta_threshold)) + + if args.exit_on_leak: + leaked = any( + r.is_leak(args.slope_threshold, args.delta_threshold) + for r in results + ) + if leaked: + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/tests/memory_leak/test_node_create.cc b/tests/memory_leak/test_node_create.cc new file mode 100644 index 000000000..eddbfe92d --- /dev/null +++ b/tests/memory_leak/test_node_create.cc @@ -0,0 +1,234 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * NeuG counterpart of DuckDB's tests/memory_leak/test_appender.cpp. + * + * Mirrors the same structure: a single long-running test case that hammers + * the write path with many small batches and triggers a checkpoint every N + * outer iterations. DuckDB exercises its `duckdb_appender_*` C-API; NeuG + * has no Appender equivalent, so we use the closest thing — repeated Cypher + * `CREATE (:test {...})` statements through Connection::Query. + * + * The test does **not** verify memory consumption itself. Like the DuckDB + * original it is meant to be observed externally via valgrind / massif / + * heaptrack / RSS sampling (see tests/memory_leak/test_memory_leak.py and + * bin/rss_workload.cc). + * + * Default-skipped: set environment variable + * + * NEUG_RUN_MEMORY_LEAK_TESTS=1 + * + * to actually run. Iteration counts can be lowered for smoke runs via + * + * NEUG_LEAK_OUTER_ITERS= (default 1000) + * NEUG_LEAK_INNER_ROWS= (default 1000) + * NEUG_LEAK_CHECKPOINT_EVERY= (default 500, 0 to disable) + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "neug/main/connection.h" +#include "neug/main/neug_db.h" + +namespace neug { +namespace test { +namespace { + +// Counterpart to DuckDB's TestMemoryLeaks() guard. Returns true only when +// the user has explicitly opted in via NEUG_RUN_MEMORY_LEAK_TESTS=1. Any +// other value (including unset) keeps the test skipped, so default +// `ctest`/`make test` runs are unaffected. +bool RunMemoryLeakTests() { + const char* env = std::getenv("NEUG_RUN_MEMORY_LEAK_TESTS"); + return env != nullptr && std::string(env) == "1"; +} + +int EnvInt(const char* name, int fallback) { + const char* env = std::getenv(name); + if (env == nullptr || env[0] == '\0') { + return fallback; + } + try { + return std::stoi(env); + } catch (...) { + return fallback; + } +} + +// Counterpart to DuckDB's rand_str(): random alphanumeric ASCII (no quotes, +// no escape characters) so we can safely splice it into Cypher literals. +void RandStr(std::mt19937& rng, char* dest, std::size_t length) { + static const char charset[] = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::uniform_int_distribution dist(0, sizeof(charset) - 2); + for (std::size_t i = 0; i < length; ++i) { + dest[i] = charset[dist(rng)]; + } + dest[length] = '\0'; +} + +NeugDBConfig MakeConfig(const std::string& dir) { + NeugDBConfig config; + config.data_dir = dir; + config.mode = DBMode::READ_WRITE; + // Match the spirit of DuckDB's `set memory_limit='100mb'` — keep the + // background out of our way and let the inner loop drive everything. + config.enable_auto_compaction = false; + config.compact_on_close = false; + config.compact_csr = true; + config.checkpoint_on_close = true; + return config; +} + +void OpenAndConnect(std::unique_ptr& db, + std::shared_ptr& conn, + const std::string& dir) { + db = std::make_unique(); + ASSERT_TRUE(db->Open(MakeConfig(dir))) + << "Failed to open NeuG database at " << dir; + conn = db->Connect(); + ASSERT_NE(conn, nullptr); +} + +void CloseAndRelease(std::unique_ptr& db, + std::shared_ptr& conn) { + if (conn) { + conn->Close(); + conn.reset(); + } + if (db) { + db->Close(); + db.reset(); + } +} + +} // namespace + +// Cypher equivalent of DuckDB's "Test repeated appending small chunks to a +// table" — long-running write workload meant for external memory observers. +TEST(MemoryLeakTest, RepeatedCreateNodeChunks) { + if (!RunMemoryLeakTests()) { + GTEST_SKIP() << "memory-leak tests are skipped by default; " + "set NEUG_RUN_MEMORY_LEAK_TESTS=1 to enable."; + } + + const int outer_iters = EnvInt("NEUG_LEAK_OUTER_ITERS", 1000); + const int inner_rows = EnvInt("NEUG_LEAK_INNER_ROWS", 1000); + const int checkpoint_every = EnvInt("NEUG_LEAK_CHECKPOINT_EVERY", 500); + ASSERT_GT(outer_iters, 0); + ASSERT_GT(inner_rows, 0); + + const std::filesystem::path db_path = + std::filesystem::temp_directory_path() / "neug_create_node_leak_test"; + if (std::filesystem::exists(db_path)) { + std::filesystem::remove_all(db_path); + } + std::filesystem::create_directories(db_path); + + std::unique_ptr db; + std::shared_ptr conn; + OpenAndConnect(db, conn, db_path.string()); + + // Schema mirrors DuckDB's "test(col1 varchar, col2 varchar, col3 bigint, + // col4 bigint, col5 double)" with an extra primary key required by NeuG. + { + auto res = conn->Query( + "CREATE NODE TABLE test (id INT64, col1 STRING, col2 STRING, " + "col3 INT64, col4 INT64, col5 DOUBLE, PRIMARY KEY(id));", + "schema"); + ASSERT_TRUE(res) << "Failed to create test table: " + << (res ? "" : res.error().ToString()); + } + + std::mt19937 rng(0xC0FFEEu); + int64_t n1 = 0; + double d1 = 0.5; + int64_t pk = 0; + + const auto t0 = std::chrono::steady_clock::now(); + + for (int i = 0; i < outer_iters; ++i) { + for (int j = 0; j < inner_rows; ++j) { + char str[41]; + RandStr(rng, str, sizeof(str) - 1); + + // Build the CREATE statement. We splice values directly because + // (a) col1 is random alphanumeric so no escaping is needed and + // (b) we want every iteration to produce a distinct query text, + // stressing planner / query-cache code paths just like the + // DuckDB original stresses the appender chunk allocator. + std::string q; + q.reserve(192); + q.append("CREATE (:test {id: "); + q.append(std::to_string(pk++)); + q.append(", col1: '"); + q.append(str); + q.append("', col2: 'hello', col3: "); + q.append(std::to_string(n1++)); + q.append(", col4: "); + q.append(std::to_string(n1++)); + q.append(", col5: "); + q.append(std::to_string(d1)); + q.append("});"); + d1 += 1.25; + + auto res = conn->Query(q, "insert"); + // DuckDB's test FAILs hard on appender errors; mirror that. + ASSERT_TRUE(res) << "CREATE failed at i=" << i << ", j=" << j << ": " + << res.error().ToString(); + } + + if (checkpoint_every > 0 && i % checkpoint_every == 0) { + std::printf("completed %d\n", i); + // NeuG does not expose a CHECKPOINT Cypher statement, so we mimic + // DuckDB's `duckdb_query("checkpoint", ...)` by closing and + // reopening the database — `checkpoint_on_close=true` in the config + // ensures the on-disk image is flushed. + CloseAndRelease(db, conn); + OpenAndConnect(db, conn, db_path.string()); + } + } + + CloseAndRelease(db, conn); + + const auto t1 = std::chrono::steady_clock::now(); + const auto secs = + std::chrono::duration_cast(t1 - t0).count(); + std::printf("[memory_leak] outer=%d inner=%d total_rows=%lld elapsed=%llds\n", + outer_iters, inner_rows, + static_cast(outer_iters) * inner_rows, + static_cast(secs)); + + // Cleanup the on-disk database so repeated runs start fresh. + std::error_code ec; + std::filesystem::remove_all(db_path, ec); + + // Like DuckDB's `REQUIRE(1 == 1)` — the real verdict is delivered + // out-of-band by valgrind / massif / RSS sampling. + EXPECT_TRUE(true); +} + +} // namespace test +} // namespace neug diff --git a/tests/memory_leak/test_temporary_tables.cc b/tests/memory_leak/test_temporary_tables.cc new file mode 100644 index 000000000..27aceab28 --- /dev/null +++ b/tests/memory_leak/test_temporary_tables.cc @@ -0,0 +1,343 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * NeuG counterpart of DuckDB's tests/memory_leak/test_temporary_tables.cpp. + * + * The DuckDB original contains three [memoryleak]-tagged Catch2 cases that + * exercise three distinct allocation paths: + * + * 1. "Test in-memory database scanning from tables" — bulk-load 1M rows, + * then loop `SELECT *` forever to stress the scan/result allocator. + * 2. "Rollback create table" — repeatedly BEGIN / CREATE TABLE / ROLLBACK + * to stress the catalog DDL undo path. + * 3. "DB temporary table insertion" — persistent 1M-row source table + + * a loop of BEGIN / CREATE OR REPLACE TEMPORARY TABLE / INSERT FROM + * source / ROLLBACK to stress temp-table + insert + rollback. + * + * NeuG has no client-side BEGIN/ROLLBACK statements and no temporary table + * concept, so we keep the *shape* of each scenario and substitute the + * closest available primitive: + * + * - "ROLLBACK" → `DROP TABLE` (catalog deletion). The two are not + * semantically identical (rollback unwinds an in-flight tx, drop + * commits a deletion), but both stress the catalog alloc/free path + * that is the actual subject of the leak test. + * - "TEMPORARY TABLE" → ordinary `NODE TABLE` that is dropped at the + * end of every iteration. + * - "SELECT *" → `MATCH (n:t1) RETURN n.id, n.s`. + * + * Like the DuckDB original these tests do not assert anything about + * memory usage themselves — the verdict is delivered out-of-band by + * valgrind / massif / heaptrack / RSS sampling. Set + * + * NEUG_RUN_MEMORY_LEAK_TESTS=1 + * + * to actually run. Iteration counts are tunable via: + * + * NEUG_LEAK_OUTER_ITERS (default 1000) + * NEUG_LEAK_INNER_ROWS (default 1000) + * NEUG_LEAK_BULK_ROWS (default 100000) -- big-table size + * NEUG_LEAK_SCAN_ITERS (default 200) -- scan loop count + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "neug/main/connection.h" +#include "neug/main/neug_db.h" + +namespace neug { +namespace test { +namespace { + +bool RunMemoryLeakTests() { + const char* env = std::getenv("NEUG_RUN_MEMORY_LEAK_TESTS"); + return env != nullptr && std::string(env) == "1"; +} + +int EnvInt(const char* name, int fallback) { + const char* env = std::getenv(name); + if (env == nullptr || env[0] == '\0') { + return fallback; + } + try { + return std::stoi(env); + } catch (...) { + return fallback; + } +} + +void RandStr(std::mt19937& rng, char* dest, std::size_t length) { + static const char charset[] = + "0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + std::uniform_int_distribution dist(0, sizeof(charset) - 2); + for (std::size_t i = 0; i < length; ++i) { + dest[i] = charset[dist(rng)]; + } + dest[length] = '\0'; +} + +NeugDBConfig MakeConfig(const std::string& dir) { + NeugDBConfig config; + config.data_dir = dir; + config.mode = DBMode::READ_WRITE; + config.enable_auto_compaction = false; + config.compact_on_close = false; + config.compact_csr = true; + config.checkpoint_on_close = true; + return config; +} + +std::filesystem::path FreshDbDir(const char* tag) { + auto p = std::filesystem::temp_directory_path() / + (std::string("neug_temp_table_leak_") + tag); + if (std::filesystem::exists(p)) { + std::filesystem::remove_all(p); + } + std::filesystem::create_directories(p); + return p; +} + +void OpenAndConnect(std::unique_ptr& db, + std::shared_ptr& conn, + const std::string& dir) { + db = std::make_unique(); + ASSERT_TRUE(db->Open(MakeConfig(dir))) + << "Failed to open NeuG database at " << dir; + conn = db->Connect(); + ASSERT_NE(conn, nullptr); +} + +void CloseAndRelease(std::unique_ptr& db, + std::shared_ptr& conn) { + if (conn) { + conn->Close(); + conn.reset(); + } + if (db) { + db->Close(); + db.reset(); + } +} + +// Drain a query result so the executor really materialises every row, +// matching what DuckDB's `con.Query("SELECT * ...")` does implicitly. +void DrainResult(QueryResult& qr) { + std::size_t rows = 0; + for (auto it = qr.begin(); it != qr.end(); ++it) { + ++rows; + } + (void)rows; +} + +} // namespace + +// ============================================================================= +// 1) DuckDB: "Test in-memory database scanning from tables" +// NeuG variant: bulk-load N rows once, then loop MATCH ... RETURN to stress +// the scan and result-allocator paths. +// ============================================================================= +TEST(MemoryLeakTempTableTest, RepeatedScanLargeTable) { + if (!RunMemoryLeakTests()) { + GTEST_SKIP() << "memory-leak tests are skipped by default; " + "set NEUG_RUN_MEMORY_LEAK_TESTS=1 to enable."; + } + const int bulk_rows = EnvInt("NEUG_LEAK_BULK_ROWS", 100000); + const int scan_iters = EnvInt("NEUG_LEAK_SCAN_ITERS", 200); + ASSERT_GT(bulk_rows, 0); + ASSERT_GT(scan_iters, 0); + + const auto db_path = FreshDbDir("scan"); + std::unique_ptr db; + std::shared_ptr conn; + OpenAndConnect(db, conn, db_path.string()); + + ASSERT_TRUE(conn->Query( + "CREATE NODE TABLE t1 (id INT64, s STRING, PRIMARY KEY(id));", "schema")) + << "create t1 failed"; + + // One-shot bulk insert, equivalent to DuckDB's + // create table t1 as select i, concat('thisisalongstring', i) ... + std::mt19937 rng(0xBADCAFEu); + for (int i = 0; i < bulk_rows; ++i) { + char str[33]; + RandStr(rng, str, sizeof(str) - 1); + std::string q; + q.reserve(96); + q.append("CREATE (:t1 {id: "); + q.append(std::to_string(i)); + q.append(", s: 'thisisalongstring"); + q.append(str); + q.append("'});"); + auto res = conn->Query(q, "insert"); + ASSERT_TRUE(res) << "bulk insert failed at i=" << i << ": " + << res.error().ToString(); + } + + // Loop scans. DuckDB does `while(true)` and relies on an external + // observer to kill the process; we cap at NEUG_LEAK_SCAN_ITERS so the + // test reaches a well-defined end inside ctest while still giving + // valgrind a long enough window. + const auto t0 = std::chrono::steady_clock::now(); + for (int i = 0; i < scan_iters; ++i) { + auto res = conn->Query("MATCH (n:t1) RETURN n.id, n.s", "read"); + ASSERT_TRUE(res) << "scan failed at i=" << i << ": " + << res.error().ToString(); + DrainResult(res.value()); + } + const auto t1 = std::chrono::steady_clock::now(); + + CloseAndRelease(db, conn); + std::error_code ec; + std::filesystem::remove_all(db_path, ec); + + const auto secs = + std::chrono::duration_cast(t1 - t0).count(); + std::printf("[memleak/scan] bulk_rows=%d scan_iters=%d elapsed=%llds\n", + bulk_rows, scan_iters, static_cast(secs)); + EXPECT_TRUE(true); +} + +// ============================================================================= +// 2) DuckDB: "Rollback create table" +// NeuG variant: CREATE TABLE / DROP TABLE alternating cycles (no rollback). +// ============================================================================= +TEST(MemoryLeakTempTableTest, RepeatedCreateDropTable) { + if (!RunMemoryLeakTests()) { + GTEST_SKIP() << "memory-leak tests are skipped by default; " + "set NEUG_RUN_MEMORY_LEAK_TESTS=1 to enable."; + } + const int outer_iters = EnvInt("NEUG_LEAK_OUTER_ITERS", 1000); + ASSERT_GT(outer_iters, 0); + + const auto db_path = FreshDbDir("ddl"); + std::unique_ptr db; + std::shared_ptr conn; + OpenAndConnect(db, conn, db_path.string()); + + const auto t0 = std::chrono::steady_clock::now(); + for (int i = 0; i < outer_iters; ++i) { + std::string tname = "t_" + std::to_string(i); + { + std::string q = + "CREATE NODE TABLE " + tname + + " (id INT64, name STRING, PRIMARY KEY(id));"; + auto res = conn->Query(q, "schema"); + ASSERT_TRUE(res) << "CREATE failed at i=" << i << ": " + << res.error().ToString(); + } + { + std::string q = "DROP TABLE " + tname + ";"; + auto res = conn->Query(q, "schema"); + ASSERT_TRUE(res) << "DROP failed at i=" << i << ": " + << res.error().ToString(); + } + } + const auto t1 = std::chrono::steady_clock::now(); + + CloseAndRelease(db, conn); + std::error_code ec; + std::filesystem::remove_all(db_path, ec); + + const auto secs = + std::chrono::duration_cast(t1 - t0).count(); + std::printf("[memleak/ddl] outer_iters=%d elapsed=%llds\n", outer_iters, + static_cast(secs)); + EXPECT_TRUE(true); +} + +// ============================================================================= +// 3) DuckDB: "DB temporary table insertion" +// NeuG variant: persistent t_src (bulk loaded once) + repeated cycles of +// CREATE NODE TABLE t_sink +// MATCH (s:t_src) CREATE (:t_sink {...}) -- copy-into +// DROP TABLE t_sink +// ============================================================================= +TEST(MemoryLeakTempTableTest, RepeatedTempTableInsertion) { + if (!RunMemoryLeakTests()) { + GTEST_SKIP() << "memory-leak tests are skipped by default; " + "set NEUG_RUN_MEMORY_LEAK_TESTS=1 to enable."; + } + // Use a smaller default for the source table since each outer iteration + // copies it in full. Override via env var if you want to scale up. + const int outer_iters = EnvInt("NEUG_LEAK_OUTER_ITERS", 200); + const int inner_rows = EnvInt("NEUG_LEAK_INNER_ROWS", 1000); + ASSERT_GT(outer_iters, 0); + ASSERT_GT(inner_rows, 0); + + const auto db_path = FreshDbDir("temp_insert"); + + std::unique_ptr db; + std::shared_ptr conn; + OpenAndConnect(db, conn, db_path.string()); + + // Persistent source table — populated once, never dropped. + ASSERT_TRUE(conn->Query( + "CREATE NODE TABLE t_src (id INT64, PRIMARY KEY(id));", "schema")) + << "create t_src failed"; + for (int i = 0; i < inner_rows; ++i) { + std::string q = "CREATE (:t_src {id: " + std::to_string(i) + "});"; + auto res = conn->Query(q, "insert"); + ASSERT_TRUE(res) << "seed failed at i=" << i << ": " + << res.error().ToString(); + } + + const auto t0 = std::chrono::steady_clock::now(); + for (int i = 0; i < outer_iters; ++i) { + { + auto res = conn->Query( + "CREATE NODE TABLE t_sink (id INT64, PRIMARY KEY(id));", "schema"); + ASSERT_TRUE(res) << "CREATE t_sink failed at i=" << i << ": " + << res.error().ToString(); + } + { + // copy-into via single Cypher statement — closest analogue of + // DuckDB's "INSERT INTO t2 SELECT * FROM t1". + auto res = conn->Query( + "MATCH (s:t_src) CREATE (:t_sink {id: s.id});", "insert"); + ASSERT_TRUE(res) << "copy-into failed at i=" << i << ": " + << res.error().ToString(); + } + { + auto res = conn->Query("DROP TABLE t_sink;", "schema"); + ASSERT_TRUE(res) << "DROP t_sink failed at i=" << i << ": " + << res.error().ToString(); + } + } + const auto t1 = std::chrono::steady_clock::now(); + + CloseAndRelease(db, conn); + std::error_code ec; + std::filesystem::remove_all(db_path, ec); + + const auto secs = + std::chrono::duration_cast(t1 - t0).count(); + std::printf( + "[memleak/temp_insert] outer=%d src_rows=%d elapsed=%llds\n", + outer_iters, inner_rows, static_cast(secs)); + EXPECT_TRUE(true); +} + +} // namespace test +} // namespace neug From d74b0d1701d643de390cc1149567b892f9d6f890 Mon Sep 17 00:00:00 2001 From: "nengli.ln" Date: Tue, 9 Jun 2026 15:51:15 +0800 Subject: [PATCH 2/5] update --- tests/memory_leak/test_node_create.cc | 24 -------------------- tests/memory_leak/test_temporary_tables.cc | 26 ---------------------- 2 files changed, 50 deletions(-) diff --git a/tests/memory_leak/test_node_create.cc b/tests/memory_leak/test_node_create.cc index eddbfe92d..39c309f0c 100644 --- a/tests/memory_leak/test_node_create.cc +++ b/tests/memory_leak/test_node_create.cc @@ -54,10 +54,6 @@ namespace neug { namespace test { namespace { -// Counterpart to DuckDB's TestMemoryLeaks() guard. Returns true only when -// the user has explicitly opted in via NEUG_RUN_MEMORY_LEAK_TESTS=1. Any -// other value (including unset) keeps the test skipped, so default -// `ctest`/`make test` runs are unaffected. bool RunMemoryLeakTests() { const char* env = std::getenv("NEUG_RUN_MEMORY_LEAK_TESTS"); return env != nullptr && std::string(env) == "1"; @@ -75,8 +71,6 @@ int EnvInt(const char* name, int fallback) { } } -// Counterpart to DuckDB's rand_str(): random alphanumeric ASCII (no quotes, -// no escape characters) so we can safely splice it into Cypher literals. void RandStr(std::mt19937& rng, char* dest, std::size_t length) { static const char charset[] = "0123456789" @@ -93,8 +87,6 @@ NeugDBConfig MakeConfig(const std::string& dir) { NeugDBConfig config; config.data_dir = dir; config.mode = DBMode::READ_WRITE; - // Match the spirit of DuckDB's `set memory_limit='100mb'` — keep the - // background out of our way and let the inner loop drive everything. config.enable_auto_compaction = false; config.compact_on_close = false; config.compact_csr = true; @@ -126,8 +118,6 @@ void CloseAndRelease(std::unique_ptr& db, } // namespace -// Cypher equivalent of DuckDB's "Test repeated appending small chunks to a -// table" — long-running write workload meant for external memory observers. TEST(MemoryLeakTest, RepeatedCreateNodeChunks) { if (!RunMemoryLeakTests()) { GTEST_SKIP() << "memory-leak tests are skipped by default; " @@ -151,8 +141,6 @@ TEST(MemoryLeakTest, RepeatedCreateNodeChunks) { std::shared_ptr conn; OpenAndConnect(db, conn, db_path.string()); - // Schema mirrors DuckDB's "test(col1 varchar, col2 varchar, col3 bigint, - // col4 bigint, col5 double)" with an extra primary key required by NeuG. { auto res = conn->Query( "CREATE NODE TABLE test (id INT64, col1 STRING, col2 STRING, " @@ -174,11 +162,6 @@ TEST(MemoryLeakTest, RepeatedCreateNodeChunks) { char str[41]; RandStr(rng, str, sizeof(str) - 1); - // Build the CREATE statement. We splice values directly because - // (a) col1 is random alphanumeric so no escaping is needed and - // (b) we want every iteration to produce a distinct query text, - // stressing planner / query-cache code paths just like the - // DuckDB original stresses the appender chunk allocator. std::string q; q.reserve(192); q.append("CREATE (:test {id: "); @@ -195,17 +178,12 @@ TEST(MemoryLeakTest, RepeatedCreateNodeChunks) { d1 += 1.25; auto res = conn->Query(q, "insert"); - // DuckDB's test FAILs hard on appender errors; mirror that. ASSERT_TRUE(res) << "CREATE failed at i=" << i << ", j=" << j << ": " << res.error().ToString(); } if (checkpoint_every > 0 && i % checkpoint_every == 0) { std::printf("completed %d\n", i); - // NeuG does not expose a CHECKPOINT Cypher statement, so we mimic - // DuckDB's `duckdb_query("checkpoint", ...)` by closing and - // reopening the database — `checkpoint_on_close=true` in the config - // ensures the on-disk image is flushed. CloseAndRelease(db, conn); OpenAndConnect(db, conn, db_path.string()); } @@ -225,8 +203,6 @@ TEST(MemoryLeakTest, RepeatedCreateNodeChunks) { std::error_code ec; std::filesystem::remove_all(db_path, ec); - // Like DuckDB's `REQUIRE(1 == 1)` — the real verdict is delivered - // out-of-band by valgrind / massif / RSS sampling. EXPECT_TRUE(true); } diff --git a/tests/memory_leak/test_temporary_tables.cc b/tests/memory_leak/test_temporary_tables.cc index 27aceab28..f194aa218 100644 --- a/tests/memory_leak/test_temporary_tables.cc +++ b/tests/memory_leak/test_temporary_tables.cc @@ -141,8 +141,6 @@ void CloseAndRelease(std::unique_ptr& db, } } -// Drain a query result so the executor really materialises every row, -// matching what DuckDB's `con.Query("SELECT * ...")` does implicitly. void DrainResult(QueryResult& qr) { std::size_t rows = 0; for (auto it = qr.begin(); it != qr.end(); ++it) { @@ -153,11 +151,6 @@ void DrainResult(QueryResult& qr) { } // namespace -// ============================================================================= -// 1) DuckDB: "Test in-memory database scanning from tables" -// NeuG variant: bulk-load N rows once, then loop MATCH ... RETURN to stress -// the scan and result-allocator paths. -// ============================================================================= TEST(MemoryLeakTempTableTest, RepeatedScanLargeTable) { if (!RunMemoryLeakTests()) { GTEST_SKIP() << "memory-leak tests are skipped by default; " @@ -177,8 +170,6 @@ TEST(MemoryLeakTempTableTest, RepeatedScanLargeTable) { "CREATE NODE TABLE t1 (id INT64, s STRING, PRIMARY KEY(id));", "schema")) << "create t1 failed"; - // One-shot bulk insert, equivalent to DuckDB's - // create table t1 as select i, concat('thisisalongstring', i) ... std::mt19937 rng(0xBADCAFEu); for (int i = 0; i < bulk_rows; ++i) { char str[33]; @@ -195,10 +186,6 @@ TEST(MemoryLeakTempTableTest, RepeatedScanLargeTable) { << res.error().ToString(); } - // Loop scans. DuckDB does `while(true)` and relies on an external - // observer to kill the process; we cap at NEUG_LEAK_SCAN_ITERS so the - // test reaches a well-defined end inside ctest while still giving - // valgrind a long enough window. const auto t0 = std::chrono::steady_clock::now(); for (int i = 0; i < scan_iters; ++i) { auto res = conn->Query("MATCH (n:t1) RETURN n.id, n.s", "read"); @@ -219,10 +206,6 @@ TEST(MemoryLeakTempTableTest, RepeatedScanLargeTable) { EXPECT_TRUE(true); } -// ============================================================================= -// 2) DuckDB: "Rollback create table" -// NeuG variant: CREATE TABLE / DROP TABLE alternating cycles (no rollback). -// ============================================================================= TEST(MemoryLeakTempTableTest, RepeatedCreateDropTable) { if (!RunMemoryLeakTests()) { GTEST_SKIP() << "memory-leak tests are skipped by default; " @@ -267,13 +250,6 @@ TEST(MemoryLeakTempTableTest, RepeatedCreateDropTable) { EXPECT_TRUE(true); } -// ============================================================================= -// 3) DuckDB: "DB temporary table insertion" -// NeuG variant: persistent t_src (bulk loaded once) + repeated cycles of -// CREATE NODE TABLE t_sink -// MATCH (s:t_src) CREATE (:t_sink {...}) -- copy-into -// DROP TABLE t_sink -// ============================================================================= TEST(MemoryLeakTempTableTest, RepeatedTempTableInsertion) { if (!RunMemoryLeakTests()) { GTEST_SKIP() << "memory-leak tests are skipped by default; " @@ -312,8 +288,6 @@ TEST(MemoryLeakTempTableTest, RepeatedTempTableInsertion) { << res.error().ToString(); } { - // copy-into via single Cypher statement — closest analogue of - // DuckDB's "INSERT INTO t2 SELECT * FROM t1". auto res = conn->Query( "MATCH (s:t_src) CREATE (:t_sink {id: s.id});", "insert"); ASSERT_TRUE(res) << "copy-into failed at i=" << i << ": " From 4fa6c8f3d4234b695521a5c0e6c150d9d89e60e9 Mon Sep 17 00:00:00 2001 From: "nengli.ln" Date: Wed, 10 Jun 2026 10:23:36 +0800 Subject: [PATCH 3/5] u --- .github/workflows/memory-leak-nightly.yml | 7 ------- 1 file changed, 7 deletions(-) diff --git a/.github/workflows/memory-leak-nightly.yml b/.github/workflows/memory-leak-nightly.yml index e0055b4c6..85b96f8a2 100644 --- a/.github/workflows/memory-leak-nightly.yml +++ b/.github/workflows/memory-leak-nightly.yml @@ -99,13 +99,6 @@ jobs: - name: Prepare log dir run: mkdir -p "$LOG_DIR" - # ---------------------------------------------------------------- - # Build NeuG via tools/python_bind/make build (mirrors neug-test.yml). - # The build produces: - # - the python extension under tools/python_bind/build/lib* - # - the C++ binaries (incl. memory-leak gtest) under - # tools/python_bind/build/neug_py_bind/tests/memory_leak/ - # ---------------------------------------------------------------- - name: Build NeuG (RELEASE, BUILD_TEST=ON, WITH_MIMALLOC=OFF) run: | . /home/neug/.neug_env From 988b2bf305d9ef9e0c214b6aa2280a630be568e9 Mon Sep 17 00:00:00 2001 From: "nengli.ln" Date: Wed, 10 Jun 2026 14:45:22 +0800 Subject: [PATCH 4/5] fix build_dir --- .github/workflows/memory-leak-nightly.yml | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/memory-leak-nightly.yml b/.github/workflows/memory-leak-nightly.yml index 85b96f8a2..2bbe655fc 100644 --- a/.github/workflows/memory-leak-nightly.yml +++ b/.github/workflows/memory-leak-nightly.yml @@ -129,7 +129,7 @@ jobs: - name: Verify memory-leak gtest binaries exist run: | set -e - BUILD_DIR="${GITHUB_WORKSPACE}/tools/python_bind/build/neug_py_bind" + BUILD_DIR="${GITHUB_WORKSPACE}/build" ls -la "$BUILD_DIR/tests/memory_leak/" || true test -x "$BUILD_DIR/tests/memory_leak/test_node_create" test -x "$BUILD_DIR/tests/memory_leak/test_temporary_tables" @@ -147,7 +147,7 @@ jobs: GLOG_logtostderr: '1' run: | set -o pipefail - ./tools/python_bind/build/neug_py_bind/tests/memory_leak/test_node_create \ + ./build/tests/memory_leak/test_node_create \ --gtest_color=no \ 2>&1 | tee "$LOG_DIR/test_node_create.log" @@ -164,7 +164,7 @@ jobs: GLOG_logtostderr: '1' run: | set -o pipefail - ./tools/python_bind/build/neug_py_bind/tests/memory_leak/test_temporary_tables \ + ./build/tests/memory_leak/test_temporary_tables \ --gtest_color=no \ 2>&1 | tee "$LOG_DIR/test_temporary_tables.log" @@ -173,7 +173,10 @@ jobs: # ---------------------------------------------------------------- - name: python3 tests/memory_leak/test_memory_leak.py env: - PYTHONPATH: ${{ github.workspace }}/tools/python_bind:${{ github.workspace }}/tools/python_bind/build/neug_py_bind + # `tools/python_bind` makes `import neug` work; neug/__init__.py + # then auto-discovers the build/lib.* extdir produced by setup.py, + # so an explicit binary dir on PYTHONPATH is unnecessary. + PYTHONPATH: ${{ github.workspace }}/tools/python_bind run: | set -o pipefail python3 tests/memory_leak/test_memory_leak.py \ From eb89371ce2c5b9a63cc86d5bbdc23c5324b1d0f9 Mon Sep 17 00:00:00 2001 From: "nengli.ln" Date: Wed, 10 Jun 2026 15:40:16 +0800 Subject: [PATCH 5/5] add timeout minutes --- .github/workflows/memory-leak-nightly.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/memory-leak-nightly.yml b/.github/workflows/memory-leak-nightly.yml index 2bbe655fc..8a6551978 100644 --- a/.github/workflows/memory-leak-nightly.yml +++ b/.github/workflows/memory-leak-nightly.yml @@ -37,7 +37,7 @@ jobs: runs-on: ubuntu-22.04 container: image: neug-registry.cn-hongkong.cr.aliyuncs.com/neug/neug-dev:v0.1.2 - timeout-minutes: 180 + timeout-minutes: 300 # In container mode GitHub Actions defaults `run:` shell to `sh -e {0}` # (dash on debian-based images), which does NOT support `set -o pipefail`. # Force bash so the same `set -o pipefail` snippets used elsewhere keep