Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions .github/workflows/build_ais.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: AIS # Mono-workflow - May be advantageous to split up
run-name: Build, Test, and Analyze AIS
env:
AIS_MOUNT_PATH: /mnt/ais/ext4
AIS_DOCKER_REGISTRY: ghcr.io/rocm/rocfile
AIS_DOCKER_REGISTRY: ghcr.io/rocm/hipfile
AIS_CI_IMAGE_NAME: ais_ci_${{inputs.platform}}
AIS_PR_BASE_URL: https://github.com/ROCm/rocFile/pull
AIS_PR_BASE_URL: https://github.com/ROCm/hipFile/pull
on:
workflow_call:
inputs:
Expand Down Expand Up @@ -401,3 +401,8 @@ jobs:
if: ${{ always() }}
run: |
docker stop ${AIS_CONTAINER_NAME}
Run_hipFile_NVIDIA:
uses: ./.github/workflows/hipfile-nvidia.yml
needs: build_AIS_image
with:
platform: ${{inputs.platform}}
147 changes: 147 additions & 0 deletions .github/workflows/hipfile-nvidia.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
name: hipFile NVIDIA
run-name: Build and run tests on NVIDIA
env:
AIS_MOUNT_PATH: /mnt/ais/ext4
AIS_DOCKER_REGISTRY: ghcr.io/rocm/hipfile
AIS_CI_IMAGE_NAME: ais_ci_${{inputs.platform}}
on:
workflow_call:
inputs:
platform:
required: true
type: string
permissions:
contents: read
packages: read
jobs:
NVIDIA_tests:
runs-on: [linux, NVIDIA]
steps:
- name: Get PR number and store it as a environment variable
run: echo "AIS_PR_NUMBER=$(echo ${{ github.ref }} | sed 's|[^0-9]||g')" >> "$GITHUB_ENV"
- name: Set AIS CI image environment variables
run: |
echo "AIS_CI_DEV_IMAGE=${{ env.AIS_DOCKER_REGISTRY }}/${{ env.AIS_CI_IMAGE_NAME }}_dev:$(echo ${{ github.ref }} \
| sed 's|[^a-zA-Z0-9]|-|g')" >> "$GITHUB_ENV"
echo "AIS_CONTAINER_NAME=${AIS_PR_NUMBER}_${{ github.job }}" >> "$GITHUB_ENV"
- name: Fetching code repository...
uses: actions/checkout@v5
- name: Authenticating to GitHub Container Registry
uses: docker/login-action@v3.6.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
# Detach the container and run separate commands to it.
# Thus we can make separate explicit steps in the Github CI
# as if we were able to parameterize the container image in the first place.
- name: Starting Docker Container
run: |
docker run \
-dt \
--rm \
--ipc host \
-e NVIDIA_GDS=enabled \
--runtime=nvidia \
--gpus all \
--pull always \
--cap-add=CAP_SYSLOG \
-v $(pwd):/mnt/ais:ro \
-v ${{ env.AIS_MOUNT_PATH }}:/mnt/ais-fs \
--name ${AIS_CONTAINER_NAME} \
${AIS_CI_DEV_IMAGE}
- name: Make copy of the code repository
run: |
docker exec \
${AIS_CONTAINER_NAME} \
/bin/bash -c '
cp -R /mnt/ais /ais
mkdir /ais/build
'
- name: Make temporary directory to run tests in
run: |
ROCTMPDIR=$(docker exec \
${AIS_CONTAINER_NAME} \
/bin/bash -c 'mktemp -d -p /mnt/ais-fs/ci')
echo "ROCTMPDIR=${ROCTMPDIR}" >> "$GITHUB_ENV"
- name: Generate build files for hipFile targeting the NVIDIA platform
run: |
docker exec \
-w /ais/build \
${AIS_CONTAINER_NAME} \
/bin/bash -c "
cmake \
-DCMAKE_CXX_COMPILER=g++ \
-DCMAKE_CXX_FLAGS="-Werror" \
-DBUILD_ROCFILE=OFF \
-DBUILD_HIPFILE=ON \
-DCMAKE_HIP_PLATFORM=nvidia \
-DBUILD_AIS_DOCS=ON \
-DAIS_CAPABLE_DIR=\"${ROCTMPDIR}\" \
..
"
- name: Build hipFile for the NVIDIA platform
run: |
docker exec \
-w /ais/build \
${AIS_CONTAINER_NAME} \
/bin/bash -c '
cmake --build . --parallel
'
- name: Get start time of tests
run: |
AIS_START_TIME=$(docker exec \
${AIS_CONTAINER_NAME} \
/bin/bash -c 'date +%s')
echo "AIS_START_TIME=${AIS_START_TIME}" >> "$GITHUB_ENV"
- name: Test hipFile unit and system tests for the NVIDIA platform
id: unit
run: |
docker exec \
-w /ais/build \
${AIS_CONTAINER_NAME} \
/bin/bash -c '
ctest -V -L "unit|system" --parallel
'
- name: Gather logs
if: ${{ failure() && steps.unit.conclusion == 'failure' }}
id: gather
run: |
docker exec \
-w /ais/build \
${AIS_CONTAINER_NAME} \
/bin/bash -c "
find -name cufile.log -print0 | tar -cf nvidia-logs.tar --null -T -
dmesg -T --since \"@${AIS_START_TIME}\" > dmesg.log
tar -rf nvidia-logs.tar dmesg.log
"
- name: Copy nvidia logs from container
if: ${{ failure() && steps.gather.conclusion == 'success' }}
id: copy_log
run: |
docker cp \
${AIS_CONTAINER_NAME}:/ais/build/nvidia-logs.tar \
nvidia-logs-${{inputs.platform}}.tar
- name: Upload nvidia logs
if: ${{ failure() && steps.copy_log.conclusion == 'success' }}
uses: actions/upload-artifact@v5
with:
name: nvidia-logs-${{inputs.platform}}.tar
path: nvidia-logs-${{inputs.platform}}.tar
retention-days: 7
- name: Clean up temporary directory
if: ${{ always() }}
run: |
docker exec \
-w /ais/build \
${AIS_CONTAINER_NAME} \
/bin/bash -c "
if [ -n \"$ROCTMPDIR\" ] && [ -d \"$ROCTMPDIR\" ]
then
rm -rf \"$ROCTMPDIR\"
fi
"
- name: Cleanup & Stop the Docker container
if: ${{ always() }}
run: |
docker stop ${AIS_CONTAINER_NAME}
2 changes: 1 addition & 1 deletion hipfile/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ endif()

ais_gtest_discover_tests(
hipfile_tests
WORKING_DIRECTORY ${AIS_CAPABLE_DIR}
PROPERTIES LABELS unit
TEST_LIST hipfile_unit_tests
)
Expand All @@ -81,6 +80,7 @@ endif()

gtest_discover_tests(
hipfile_system_tests
EXTRA_ARGS --ais-capable-dir ${AIS_CAPABLE_DIR}
WORKING_DIRECTORY ${AIS_CAPABLE_DIR}
PROPERTIES LABELS system
)
Expand Down
Loading