Skip to content

demo.py: cap diffuse n_steps_list on CI to prevent GPU timeout #23

demo.py: cap diffuse n_steps_list on CI to prevent GPU timeout

demo.py: cap diffuse n_steps_list on CI to prevent GPU timeout #23

Workflow file for this run

name: CI
on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
- cron: '0 6 * * 1' # every Monday at 06:00 UTC
jobs:
build-and-test:
name: Build & test (Apple Silicon, Metal)
# macos-14 = M1 runner — actual Apple Silicon hardware, Metal compute works.
# macos-15 also works but currently 10x more expensive on GitHub's billing.
runs-on: macos-14
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
# libomp enables OpenMP fallback for CPU benchmarks.
# cmake from Homebrew ensures a recent enough version (3.21+).
- name: Install dependencies
run: brew install cmake libomp
env:
HOMEBREW_NO_AUTO_UPDATE: 1
# Ensure the Metal shader compiler is accessible via xcrun.
# This is a no-op on runners that already have Xcode configured.
- name: Bootstrap Xcode
run: sudo xcodebuild -runFirstLaunch
- name: Configure CMake
run: cmake -B build
- name: Build
run: cmake --build build --parallel
- name: Test (GPU)
run: cmake --build build --target test -- ARGS="--output-on-failure"
python-quality:
name: Python quality (Ruff + Ty)
# Runs on a plain Linux runner — no Metal device needed.
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v5
- name: Ruff lint
run: uvx ruff check python/ demo.py wave_demo.py
- name: Ruff format check
run: uvx ruff format --check python/ demo.py wave_demo.py
- name: Ty type check
run: uvx --with numpy ty check python/
python-bindings:
name: Python bindings smoke test (Apple Silicon, Metal)
runs-on: macos-14
needs: build-and-test # skip if the core build fails
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install dependencies
run: brew install cmake libomp
env:
HOMEBREW_NO_AUTO_UPDATE: 1
- name: Bootstrap Xcode
run: sudo xcodebuild -runFirstLaunch
- name: Set up Python venv
run: |
python3 -m venv .venv
.venv/bin/pip install --quiet numpy pybind11
- name: Configure & build with pybind11
run: |
PYBIND11_DIR=$(.venv/bin/python3 -c "import pybind11; print(pybind11.get_cmake_dir())")
cmake -B build -Dpybind11_DIR="$PYBIND11_DIR"
cmake --build build --parallel
- name: Smoke test Python bindings
run: |
.venv/bin/python3 - <<'EOF'
import sys
sys.path.insert(0, "build")
import numpy as np
import m1_gpu_ops as metal
ctx = metal.MetalContext()
ctx.load_library("build/02-GeneralArrayOperations/ops.metallib")
ctx.load_library("build/03-2DKernels/ops.metallib")
ctx.load_library("build/04-Compute/ops.metallib")
ctx.load_library("build/05-WavePropagation/ops.metallib")
# 1D ops — verify values, not just shapes
x = np.ones(1024, dtype=np.float32)
y = np.ones(1024, dtype=np.float32) * 2
assert np.allclose(metal.add_arrays(ctx, x, y), 3.0), "add_arrays wrong values"
assert np.allclose(metal.saxpy(ctx, 2.0, x, y), 4.0), "saxpy wrong values"
# 2D Laplacian — shape + numerical correctness (flat field → zero interior)
u = np.random.rand(64, 64).astype(np.float32)
lap = metal.laplacian2d(ctx, u)
assert lap.shape == (64, 64), "laplacian2d shape wrong"
flat = np.ones((64, 64), dtype=np.float32)
lap_flat = metal.laplacian2d(ctx, flat)
assert np.allclose(lap_flat[1:-1, 1:-1], 0.0, atol=1e-5), "laplacian2d nonzero on flat field"
# Mandelbrot — signature: (ctx, width, height, x_min, x_max, y_min, y_max, max_iter)
img = metal.mandelbrot(ctx, 128, 128, -2.0, 1.0, -1.5, 1.5, 256)
assert img.shape == (128, 128), "mandelbrot shape wrong"
# N-body (one step) — pos_mass: (N,4) [x,y,z,mass], velocities: (N,4)
pos_mass = np.random.rand(64, 4).astype(np.float32)
pos_mass[:, 3] = 1.0 # mass column
vel = np.zeros((64, 4), dtype=np.float32)
pos2, vel2 = metal.nbody_step(ctx, pos_mass, vel, dt=0.01, softening=0.1)
assert pos2.shape == (64, 4), f"nbody_step shape wrong: {pos2.shape}"
# Elastic wave (small grid, few steps)
nx, nz = 50, 50
vp = np.full((nx, nz), 3000.0, dtype=np.float32)
vs = np.full((nx, nz), 1800.0, dtype=np.float32)
rho = np.full((nx, nz), 2700.0, dtype=np.float32)
wvl = np.zeros(20, dtype=np.float32); wvl[5] = 1.0
rx = np.array([30, 35, 40], dtype=np.int32)
rz = np.array([25, 25, 25], dtype=np.int32)
sv, sz, _, _ = metal.elastic_wave_propagate(
ctx, vp, vs, rho, 25, 10, wvl, rx, rz,
dx=10.0, dz=10.0, dt=1e-3, n_boundary=5
)
assert sv.shape == (3, 20), f"seismogram shape wrong: {sv.shape}"
print("All smoke tests passed.")
EOF
pages:
name: Render notebooks → GitHub Pages
runs-on: macos-14
needs: build-and-test
# Only publish on main branch pushes and scheduled runs — not on PRs.
if: github.event_name != 'pull_request'
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deploy.outputs.page_url }}
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- name: Install dependencies
run: brew install cmake libomp
env:
HOMEBREW_NO_AUTO_UPDATE: 1
- name: Bootstrap Xcode
run: sudo xcodebuild -runFirstLaunch
- name: Set up Python venv
run: |
python3 -m venv .venv
.venv/bin/pip install --quiet \
numpy pybind11 matplotlib jupytext nbconvert
- name: Build with pybind11
run: |
PYBIND11_DIR=$(.venv/bin/python3 -c "import pybind11; print(pybind11.get_cmake_dir())")
cmake -B build -Dpybind11_DIR="$PYBIND11_DIR"
cmake --build build --parallel
- name: Render notebooks to HTML
# Run cells with exec() in-process — avoids the Jupyter kernel subprocess
# which cannot reliably access the Metal GPU on macOS CI runners.
# plt.show() is patched to capture figures as base64 PNG cell outputs.
env:
MPLBACKEND: Agg
run: .venv/bin/python3 scripts/render_notebooks.py _site demo.py wave_demo.py
- name: Create index page
run: |
.venv/bin/python3 - << 'PYEOF'
import datetime, pathlib
date = datetime.date.today().isoformat()
html = """<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>m1-gpu-cpp demos</title>
<style>
body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif;
max-width: 680px; margin: 48px auto; padding: 0 24px;
color: #24292e; line-height: 1.6; }
h1 { font-size: 1.6em; border-bottom: 1px solid #e1e4e8; padding-bottom: 10px; }
.card { border: 1px solid #e1e4e8; border-radius: 6px;
padding: 16px 20px; margin: 12px 0; }
.card h2 { font-size: 1.1em; margin: 0 0 6px; }
.card h2 a { color: #0366d6; text-decoration: none; }
.card h2 a:hover { text-decoration: underline; }
.card p { margin: 0; color: #586069; font-size: 0.9em; }
footer { margin-top: 32px; color: #586069; font-size: 0.85em; }
</style>
</head>
<body>
<h1>m1-gpu-cpp &mdash; Metal GPU Computing Demos</h1>
<p>GPU-accelerated scientific computing on Apple Silicon using Metal Shading
Language, exposed to Python via pybind11.</p>
<div class="card">
<h2><a href="demo.html">General operations demo</a></h2>
<p>1D/2D array ops &middot; Laplacian stencils &middot; Mandelbrot
&middot; N-body &middot; heat diffusion</p>
</div>
<div class="card">
<h2><a href="wave_demo.html">Elastic wave propagation demo</a></h2>
<p>2D elastic FD (Virieux staggered-grid) &middot; seismograms
&middot; wavefield snapshots</p>
</div>
<footer>
Rendered DATE_PLACEHOLDER by CI &middot;
<a href="https://github.com/larsgeb/m1-gpu-cpp">source on GitHub</a>
</footer>
</body>
</html>
""".replace("DATE_PLACEHOLDER", date)
pathlib.Path("_site/index.html").write_text(html)
PYEOF
- name: Upload Pages artifact
uses: actions/upload-pages-artifact@v3
with:
path: _site
- name: Deploy to GitHub Pages
id: deploy
uses: actions/deploy-pages@v4