Tensor cuda is complete, and nn cuda is too #47

Workflow file for this run

	name: CI

	on:
	push:
	branches: [ "main" ]
	pull_request:
	branches: [ "main" ]

	permissions:
	contents: read
	checks: write
	pull-requests: write

	jobs:
	build-and-test:
	name: ${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.build_type }}
	runs-on: ${{ matrix.os }}

	strategy:
	fail-fast: false
	matrix:
	os: [ubuntu-latest]
	compiler: [clang-20, gcc-14]
	build_type: [Release, Debug]

	steps:
	- uses: actions/checkout@v4

	- name: Set up compiler (Clang)
	if: matrix.compiler == 'clang-20'
	uses: egor-tensin/setup-clang@v1
	with:
	version: 20
	platform: x64

	- name: Set up compiler (GCC)
	if: matrix.compiler == 'gcc-14'
	uses: egor-tensin/setup-gcc@v1
	with:
	version: 14
	platform: x64

	- name: Set compiler environment variables
	run: \|
	if [[ "${{ matrix.compiler }}" == clang* ]]; then
	echo "CC=clang" >> $GITHUB_ENV
	echo "CXX=clang++" >> $GITHUB_ENV
	else
	echo "CC=gcc" >> $GITHUB_ENV
	echo "CXX=g++" >> $GITHUB_ENV
	fi

	- name: Install OpenMP
	run: \|
	sudo apt-get update
	if [[ "${{ matrix.compiler }}" == clang* ]]; then
	sudo apt-get install -y libomp-20-dev
	else
	sudo apt-get install -y libgomp1
	fi

	- name: Set OpenMP environment for Clang
	if: startsWith(matrix.compiler, 'clang')
	run: \|
	echo "OpenMP_ROOT=/usr/lib/llvm-20" >> $GITHUB_ENV

	- name: Set up Python
	uses: actions/setup-python@v5
	with:
	python-version: '3.11'

	- name: Cache Hugging Face model
	id: cache-model
	uses: actions/cache@v4
	with:
	path: ~/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B
	key: ${{ runner.os }}-hf-model-meta-llama-Llama-3.2-1B-v1
	restore-keys: \|
	${{ runner.os }}-hf-model-meta-llama-Llama-3.2-1B-

	- name: Download model from Hugging Face
	if: steps.cache-model.outputs.cache-hit != 'true'
	env:
	HF_TOKEN: ${{ secrets.HF_TOKEN }}
	run: \|
	pip install -q huggingface_hub
	python -c "
	from huggingface_hub import snapshot_download
	snapshot_download(
	repo_id='meta-llama/Llama-3.2-1B',
	token='${{ secrets.HF_TOKEN }}',
	allow_patterns=['*.safetensors', 'tokenizer.json', 'config.json']
	)
	"

	- name: Create model symlink
	run: \|
	mkdir -p tests
	ln -sf ~/.cache/huggingface/hub/models--meta-llama--Llama-3.2-1B/snapshots/*/. tests/model

	- name: Cache CMake dependencies
	uses: actions/cache@v4
	with:
	path: .cmake/fetchcontent
	key: ${{ runner.os }}-${{ matrix.compiler }}-cmake-${{ hashFiles('**/CMakeLists.txt') }}
	restore-keys: \|
	${{ runner.os }}-${{ matrix.compiler }}-cmake-

	- name: Configure CMake
	run: cmake -S . -B build -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}

	- name: Build
	run: cmake --build build --config ${{ matrix.build_type }} --parallel

	- name: Test
	run: ctest --test-dir build --output-on-failure --output-junit test-results.xml

	- name: Publish Test Results
	uses: EnricoMi/publish-unit-test-result-action@v2
	if: always()
	with:
	files: build/test-results.xml
	check_name: Test Results (${{ matrix.os }}-${{ matrix.compiler }}-${{ matrix.build_type }})

	# CUDA compilation check (no GPU required, just verifies code compiles)
	cuda-build:
	name: CUDA Build Check
	runs-on: ubuntu-latest

	steps:
	- uses: actions/checkout@v4

	- name: Install CUDA Toolkit
	uses: Jimver/cuda-toolkit@v0.2.21
	id: cuda-toolkit
	with:
	cuda: '12.8.0'
	method: 'network'
	sub-packages: '["nvcc", "cudart", "thrust"]'
	non-cuda-sub-packages: '["libcublas", "libcublas-dev"]'

	- name: Install OpenMP
	run: \|
	sudo apt-get update
	sudo apt-get install -y libgomp1

	- name: Cache CMake dependencies
	uses: actions/cache@v4
	with:
	path: .cmake/fetchcontent
	key: ${{ runner.os }}-cuda-cmake-${{ hashFiles('**/CMakeLists.txt') }}
	restore-keys: \|
	${{ runner.os }}-cuda-cmake-

	- name: Configure CMake with CUDA
	run: \|
	cmake -S . -B build \
	-DCMAKE_BUILD_TYPE=Release \
	-DTENSOR_BUILD_CUDA=ON \
	-DSKIP_CUDA_TESTS=ON \
	-DCMAKE_CUDA_ARCHITECTURES=89

	- name: Build (including CUDA)
	run: cmake --build build --config Release --parallel

	- name: Run tests (CUDA tests will be skipped)
	run: ctest --test-dir build --output-on-failure

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Tensor cuda is complete, and nn cuda is too #47

Workflow file

Tensor cuda is complete, and nn cuda is too #47

Uh oh!

Workflow file for this run