Skip to content

wip: tunercheck action #12

wip: tunercheck action

wip: tunercheck action #12

Workflow file for this run

name: Build Checks
on: [push, pull_request]
permissions:
contents: read
pull-requests: read
jobs:
tuner-decisions-check:
runs-on: ubuntu-22.04
steps:
- name: Install Dependencies
run: |
sudo apt-key del 7fa2af80
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb
sudo apt-get update
sudo apt-get install cuda-toolkit libhwloc-dev
pip install uv
- name: Fetch and Install EFA Installer
run: |
curl -O https://efa-installer.amazonaws.com/aws-efa-installer-latest.tar.gz
tar -xf aws-efa-installer-*.tar.gz
pushd aws-efa-installer/
sudo ./efa_installer.sh -y --skip-kmod
popd
- uses: actions/checkout@v4
- name: Build Plugin
run: |
set -x
# actions/checkout@v4 would drop the plugin source in $PWD,
# so go ahead and build it.
./autogen.sh
./configure --with-mpi=/opt/amazon/openmpi \
--with-libfabric=/opt/amazon/efa \
--with-cuda=/usr/local/cuda/ \
--enable-platform-aws \
--prefix=$PWD/install
make -j 2
make install
- name: P5en Check
run: |
pushd contrib/python/
OFI_NCCL_FORCE_PRODUCT_NAME=p5en.48xlarge uv run show-tuner-decisions ../..//install/lib/libnccl-ofi-tuner.so \
--min-ranks-per-node 1 --max-ranks-per-node 8 \
--min-nnodes 2 --max-nnodes 2048
popd
- name: P5 Check
run: |
pushd contrib/python/
OFI_NCCL_FORCE_PRODUCT_NAME=p5.48xlarge uv run show-tuner-decisions ../..//install/lib/libnccl-ofi-tuner.so \
--min-ranks-per-node 1 --max-ranks-per-node 8 \
--min-nnodes 2 --max-nnodes 2048
popd