From 41f36171d15f85ee7e957f5b64cfbc0a62b3778e Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Sun, 29 Jun 2025 17:01:29 -0500 Subject: [PATCH 1/9] test pr-specific cached frozen env --- .github/workflows/pr.yaml | 167 +------------------------------------- 1 file changed, 4 insertions(+), 163 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 4f0a51802..dfadbcbbd 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -12,186 +12,27 @@ concurrency: jobs: pr-builder: needs: - - check-nightly-ci - - changed-files - - checks - conda-cpp-build - - devcontainer - - docs-build - conda-cpp-tests - - conda-python-tests - - conda-python-distributed-tests - - wheel-build-libucxx - - wheel-build-ucxx - - wheel-tests-ucxx - - wheel-build-distributed-ucxx - - wheel-tests-distributed-ucxx - - telemetry-setup secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.08 if: always() with: needs: ${{ toJSON(needs) }} - check-nightly-ci: - runs-on: ubuntu-latest - env: - RAPIDS_GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - steps: - - name: Check if nightly CI is passing - uses: rapidsai/shared-actions/check_nightly_success/dispatch@main - with: - repo: ucxx - telemetry-setup: - runs-on: ubuntu-latest - continue-on-error: true - env: - OTEL_SERVICE_NAME: "pr-ucxx" - steps: - - name: Telemetry setup - # This gate is here and not at the job level because we need the job to not be skipped, - # since other jobs depend on it. - if: ${{ vars.TELEMETRY_ENABLED == 'true' }} - uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@main - changed-files: - secrets: inherit - needs: telemetry-setup - uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-25.08 - with: - files_yaml: | - test_cpp: - - '**' - - '!.devcontainer/**' - - '!.pre-commit-config.yaml' - - '!.shellcheckrc' - - '!README.md' - - '!ci/release/update-version.sh' - - '!docs/**' - - '!python/**' - test_python: - - '**' - - '!.devcontainer/**' - - '!.pre-commit-config.yaml' - - '!.shellcheckrc' - - '!README.md' - - '!ci/release/update-version.sh' - - '!docs/**' - checks: - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.08 - needs: telemetry-setup - with: - enable_check_generated_files: false - ignored_pr_jobs: telemetry-summarize conda-cpp-build: - needs: checks secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08 with: build_type: pull-request script: ci/build_cpp.sh - docs-build: - needs: conda-cpp-build - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-25.08 - with: - build_type: pull-request - node_type: "gpu-l4-latest-1" - arch: "amd64" - container_image: "rapidsai/ci-conda:latest" - script: "ci/build_docs.sh" + conda-cpp-tests: - needs: [conda-cpp-build, changed-files] + needs: [conda-cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-25.08 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@msarahan/add-prepare-conda-env with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: ci/test_cpp.sh - conda-python-tests: - needs: [conda-cpp-build, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python - with: - build_type: pull-request - container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - script: ci/test_python.sh - conda-python-distributed-tests: - needs: [conda-cpp-build, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-25.08 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python - with: - build_type: pull-request - script: "ci/test_python_distributed.sh" - container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - wheel-build-libucxx: - needs: checks - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08 - with: - build_type: pull-request - script: ci/build_wheel_libucxx.sh - # build for every combination of arch and CUDA version, but only for the latest Python - matrix_filter: group_by([.ARCH, (.CUDA_VER|split(".")|map(tonumber)|.[0])]) | map(max_by(.PY_VER|split(".")|map(tonumber))) - package-name: libucxx - package-type: cpp - wheel-build-ucxx: - needs: wheel-build-libucxx - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08 - with: - build_type: pull-request - script: ci/build_wheel_ucxx.sh - package-name: ucxx - package-type: python - wheel-tests-ucxx: - needs: [wheel-build-ucxx, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python - with: - build_type: pull-request - container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - script: ci/test_wheel_ucxx.sh - wheel-build-distributed-ucxx: - needs: checks - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-25.08 - with: - build_type: pull-request - script: ci/build_wheel_distributed_ucxx.sh - package-name: distributed_ucxx - package-type: python - wheel-tests-distributed-ucxx: - needs: [wheel-build-ucxx, wheel-build-distributed-ucxx, changed-files] - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-25.08 - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python - with: - build_type: pull-request - container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" - script: ci/test_wheel_distributed_ucxx.sh - devcontainer: - secrets: inherit - needs: [checks, telemetry-setup] - uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-25.08 - with: - arch: '["amd64"]' - cuda: '["12.9"]' - build_command: | - sccache -z; - build-all --verbose; - sccache -s; - - telemetry-summarize: - # This job must use a self-hosted runner to record telemetry traces. - runs-on: linux-amd64-cpu4 - needs: pr-builder - if: ${{ vars.TELEMETRY_ENABLED == 'true' && !cancelled() }} - continue-on-error: true - steps: - - name: Telemetry summarize - uses: rapidsai/shared-actions/telemetry-dispatch-summarize@main + \ No newline at end of file From ad192e70588228be13e8e7232814b63db66bfa09 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Sun, 29 Jun 2025 17:10:55 -0500 Subject: [PATCH 2/9] change branch name for parseability --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index dfadbcbbd..735c00927 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -30,7 +30,7 @@ jobs: conda-cpp-tests: needs: [conda-cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@msarahan/add-prepare-conda-env + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@msarahan-add-prepare-conda-env with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" From 02ae5aec0b3194cfa5e4860fa303c6b4086d96a5 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Sun, 29 Jun 2025 17:19:55 -0500 Subject: [PATCH 3/9] wrong branch name --- .github/workflows/pr.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 735c00927..25fbf2da1 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -30,7 +30,7 @@ jobs: conda-cpp-tests: needs: [conda-cpp-build] secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@msarahan-add-prepare-conda-env + uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@msarahan-add-prepare-conda-action with: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" From 7cb96046cffed9a66a34d8573af3cbfdfa106971 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Sun, 29 Jun 2025 18:52:14 -0500 Subject: [PATCH 4/9] empty commit to pick up new shared-workflows From 1194ff238e986b49346bec90ec769130ecebca58 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 2 Jul 2025 11:23:55 -0500 Subject: [PATCH 5/9] add looser lower bound for librmm to avoid pre-release version conflicts --- conda/recipes/ucxx/recipe.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/recipes/ucxx/recipe.yaml b/conda/recipes/ucxx/recipe.yaml index f1cef306c..f3b9b3e7b 100644 --- a/conda/recipes/ucxx/recipe.yaml +++ b/conda/recipes/ucxx/recipe.yaml @@ -88,7 +88,7 @@ outputs: - ucx run: - ${{ pin_compatible("cuda-version", upper_bound="x", lower_bound="x") }} - - ${{ pin_compatible("librmm", upper_bound="x.x") }} + - ${{ pin_compatible("librmm", upper_bound="x.x", lower_bound="x.x.x") }} - ucx >=1.15.0,<1.19.0 run_exports: - ${{ pin_subpackage("libucxx", upper_bound="x.x") }} From c7910d5168e6ce5ab60dcbf294f4f85566e3a695 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 2 Jul 2025 12:27:18 -0500 Subject: [PATCH 6/9] remove env creation stuff from conda test scripts --- ci/test_cpp.sh | 14 -------------- ci/test_python.sh | 14 -------------- 2 files changed, 28 deletions(-) diff --git a/ci/test_cpp.sh b/ci/test_cpp.sh index 65fe59b6d..95a73c9e2 100755 --- a/ci/test_cpp.sh +++ b/ci/test_cpp.sh @@ -7,20 +7,6 @@ set -euo pipefail source "$(dirname "$0")/test_common.sh" -rapids-logger "Downloading artifacts from previous jobs" -CPP_CHANNEL=$(rapids-download-conda-from-github cpp) - -rapids-logger "Create test conda environment" -. /opt/conda/etc/profile.d/conda.sh - -rapids-dependency-file-generator \ - --output conda \ - --file-key test_cpp \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch)" \ - --prepend-channel "${CPP_CHANNEL}" \ - | tee env.yaml - -rapids-mamba-retry env create --yes -f env.yaml -n test conda activate test rapids-print-env diff --git a/ci/test_python.sh b/ci/test_python.sh index de0d24510..011c6620b 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -7,20 +7,6 @@ set -euo pipefail source "$(dirname "$0")/test_common.sh" -rapids-logger "Downloading artifacts from previous jobs" -CPP_CHANNEL=$(rapids-download-conda-from-github cpp) - -rapids-logger "Create test conda environment" -. /opt/conda/etc/profile.d/conda.sh - -rapids-dependency-file-generator \ - --output conda \ - --file-key test_python \ - --matrix "cuda=${RAPIDS_CUDA_VERSION%.*};arch=$(arch);py=${RAPIDS_PY_VERSION}" \ - --prepend-channel "${CPP_CHANNEL}" \ - | tee env.yaml - -rapids-mamba-retry env create --yes -f env.yaml -n test conda activate test rapids-print-env From f862ecc814e8c843c81994109625bdc6c861e456 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 2 Jul 2025 12:34:22 -0500 Subject: [PATCH 7/9] enable conda python tests to check lockfile behavior --- .github/workflows/pr.yaml | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 25fbf2da1..585123fb9 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -10,23 +10,12 @@ concurrency: cancel-in-progress: true jobs: - pr-builder: - needs: - - conda-cpp-build - - conda-cpp-tests - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-25.08 - if: always() - with: - needs: ${{ toJSON(needs) }} - conda-cpp-build: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-build.yaml@branch-25.08 with: build_type: pull-request script: ci/build_cpp.sh - conda-cpp-tests: needs: [conda-cpp-build] secrets: inherit @@ -35,4 +24,19 @@ jobs: build_type: pull-request container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" script: ci/test_cpp.sh - \ No newline at end of file + conda-python-tests: + needs: [conda-cpp-build] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@msarahan-add-prepare-conda-action + with: + build_type: pull-request + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" + script: ci/test_python.sh + conda-python-distributed-tests: + needs: [conda-cpp-build] + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@msarahan-add-prepare-conda-action + with: + build_type: pull-request + script: "ci/test_python_distributed.sh" + container-options: "--cap-add CAP_SYS_PTRACE --shm-size=8g --ulimit=nofile=1000000:1000000" From 6557ba8882430a8609f2e6af4fc45800c229812f Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 2 Jul 2025 13:01:55 -0500 Subject: [PATCH 8/9] add conda init if /opt/conda exists --- ci/test_common.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/test_common.sh b/ci/test_common.sh index 7c6c80643..d72bb506f 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -232,3 +232,7 @@ run_distributed_ucxx_tests_internal() { log_command "${CMD_LINE}" UCXPY_PROGRESS_MODE=${PROGRESS_MODE} UCXPY_ENABLE_DELAYED_SUBMISSION=${ENABLE_DELAYED_SUBMISSION} UCXPY_ENABLE_PYTHON_FUTURE=${ENABLE_PYTHON_FUTURE} timeout 10m python -m pytest -vs python/distributed-ucxx/distributed_ucxx/tests_internal/ } + +if [ -d /opt/conda ]; then + conda init --all --system +fi \ No newline at end of file From 992717d28b45f1aa0b8a67aba863acd908651037 Mon Sep 17 00:00:00 2001 From: Michael Sarahan Date: Wed, 2 Jul 2025 13:15:20 -0500 Subject: [PATCH 9/9] source conda.sh in common --- ci/test_common.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ci/test_common.sh b/ci/test_common.sh index d72bb506f..3896b926c 100755 --- a/ci/test_common.sh +++ b/ci/test_common.sh @@ -235,4 +235,6 @@ run_distributed_ucxx_tests_internal() { if [ -d /opt/conda ]; then conda init --all --system + . /etc/profile.d/conda.sh + hash -r fi \ No newline at end of file