Skip to content

Commit dedbc75

Browse files
[cross-repo from workflow#407] server + workflow: publish self-serve Helm charts for Kubernetes deployments (#60)
1 parent d980718 commit dedbc75

45 files changed

Lines changed: 3418 additions & 30 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
name: Helm Chart Validation
2+
3+
on:
4+
push:
5+
branches: [main]
6+
paths:
7+
- "k8s/helm/**"
8+
- "scripts/helm-chart-kind-smoke.sh"
9+
- ".github/workflows/helm-chart-validation.yml"
10+
pull_request:
11+
paths:
12+
- "k8s/helm/**"
13+
- "scripts/helm-chart-kind-smoke.sh"
14+
- ".github/workflows/helm-chart-validation.yml"
15+
workflow_dispatch:
16+
17+
permissions:
18+
contents: read
19+
20+
env:
21+
CHART_PATH: k8s/helm/durable-workflow
22+
HELM_VERSION: v3.16.2
23+
KUBECONFORM_VERSION: v0.6.7
24+
KUBE_VERSIONS: "1.27.0 1.28.0 1.29.0 1.30.0"
25+
HELM_CT_CLUSTER_NAME: dw-helm-ct-${{ github.run_id }}-${{ github.run_attempt }}
26+
27+
jobs:
28+
lint-and-template:
29+
name: Lint, render, and validate manifests
30+
runs-on: ubuntu-latest
31+
timeout-minutes: 10
32+
steps:
33+
- name: Checkout server
34+
uses: actions/checkout@v6
35+
36+
- name: Set up Helm
37+
uses: azure/setup-helm@v4.2.0
38+
with:
39+
version: ${{ env.HELM_VERSION }}
40+
41+
- name: Helm lint
42+
run: |
43+
helm lint "${CHART_PATH}"
44+
for f in "${CHART_PATH}"/ci/*-values.yaml; do
45+
echo "::group::helm lint with $(basename "$f")"
46+
helm lint "${CHART_PATH}" -f "$f"
47+
echo "::endgroup::"
48+
done
49+
50+
- name: Render every CI fixture
51+
run: |
52+
mkdir -p rendered
53+
for f in "${CHART_PATH}"/ci/*-values.yaml; do
54+
name="$(basename "$f" .yaml)"
55+
echo "::group::helm template ${name}"
56+
helm template "rel-${name}" "${CHART_PATH}" \
57+
--namespace durable-workflow \
58+
-f "$f" \
59+
> "rendered/${name}.yaml"
60+
echo "::endgroup::"
61+
done
62+
63+
- name: Validate rendered manifests against Kubernetes schemas
64+
run: |
65+
docker run --rm \
66+
-v "${PWD}/rendered:/manifests:ro" \
67+
ghcr.io/yannh/kubeconform:${KUBECONFORM_VERSION} \
68+
-strict \
69+
-summary \
70+
-schema-location default \
71+
-schema-location 'https://raw.githubusercontent.com/datreeio/CRDs-catalog/main/{{.Group}}/{{.ResourceKind}}_{{.ResourceAPIVersion}}.json' \
72+
-kubernetes-version 1.29.0 \
73+
/manifests
74+
75+
- name: Validate against the supported Kubernetes version matrix
76+
run: |
77+
for kv in ${KUBE_VERSIONS}; do
78+
echo "::group::kubeconform against ${kv}"
79+
docker run --rm \
80+
-v "${PWD}/rendered:/manifests:ro" \
81+
ghcr.io/yannh/kubeconform:${KUBECONFORM_VERSION} \
82+
-strict \
83+
-summary \
84+
-kubernetes-version "${kv}" \
85+
/manifests
86+
echo "::endgroup::"
87+
done
88+
89+
- name: Upload rendered manifests
90+
if: always()
91+
continue-on-error: true
92+
uses: actions/upload-artifact@v3.2.2
93+
with:
94+
name: helm-rendered-manifests
95+
path: rendered/
96+
if-no-files-found: error
97+
98+
ct-lint-install:
99+
name: chart-testing lint + install (kind)
100+
runs-on: ubuntu-latest
101+
timeout-minutes: 30
102+
steps:
103+
- name: Checkout server
104+
uses: actions/checkout@v6
105+
with:
106+
fetch-depth: 0
107+
108+
- name: Set up Python
109+
uses: actions/setup-python@v5
110+
with:
111+
python-version: "3.12"
112+
113+
- name: Set up Helm
114+
uses: azure/setup-helm@v4.2.0
115+
with:
116+
version: ${{ env.HELM_VERSION }}
117+
118+
- name: Set up chart-testing
119+
uses: helm/chart-testing-action@v2.6.1
120+
121+
- name: Configure kind networking for containerized runners
122+
if: env.JOB_CONTAINER_NAME != ''
123+
run: |
124+
workflow_network="$(docker inspect -f '{{range $k, $v := .NetworkSettings.Networks}}{{printf "%s" $k}}{{end}}' "${JOB_CONTAINER_NAME}")"
125+
if [ -z "${workflow_network}" ]; then
126+
echo "unable to determine workflow network for ${JOB_CONTAINER_NAME}" >&2
127+
exit 1
128+
fi
129+
echo "KIND_EXPERIMENTAL_DOCKER_NETWORK=${workflow_network}" >> "${GITHUB_ENV}"
130+
131+
- name: Set up kind
132+
uses: helm/kind-action@v1.10.0
133+
with:
134+
version: v0.23.0
135+
node_image: kindest/node:v1.29.4
136+
cluster_name: ${{ env.HELM_CT_CLUSTER_NAME }}
137+
wait: 120s
138+
139+
- name: Export internal kind kubeconfig for containerized runners
140+
if: env.JOB_CONTAINER_NAME != ''
141+
run: kind export kubeconfig --name "${HELM_CT_CLUSTER_NAME}" --internal
142+
143+
- name: ct lint
144+
run: |
145+
ct lint \
146+
--target-branch "${{ github.event.repository.default_branch }}" \
147+
--chart-dirs k8s/helm \
148+
--charts "${CHART_PATH}" \
149+
--validate-maintainers=false
150+
151+
- name: Run kind smoke script
152+
env:
153+
K8S_HELM_SMOKE_KIND_NODE_IMAGE: kindest/node:v1.29.4
154+
K8S_HELM_SMOKE_CLUSTER: ${{ env.HELM_CT_CLUSTER_NAME }}
155+
K8S_HELM_SMOKE_REUSE_CLUSTER: "1"
156+
K8S_HELM_SMOKE_IMAGE: durableworkflow/server:helm-smoke-${{ github.run_id }}-${{ github.run_attempt }}
157+
K8S_HELM_SMOKE_ARTIFACT_DIR: ${{ runner.temp }}/helm-smoke
158+
run: scripts/helm-chart-kind-smoke.sh
159+
160+
- name: Print smoke diagnostics
161+
if: failure()
162+
run: |
163+
artifact_dir="${{ runner.temp }}/helm-smoke"
164+
if [ ! -d "${artifact_dir}" ]; then
165+
echo "no smoke artifacts collected"
166+
exit 0
167+
fi
168+
169+
for file in \
170+
helm-status.txt \
171+
events.txt \
172+
describe.txt \
173+
resources.txt \
174+
migration-job.log \
175+
server.log \
176+
worker.log \
177+
mysql.log \
178+
redis.log \
179+
port-forward.log; do
180+
path="${artifact_dir}/${file}"
181+
if [ ! -f "${path}" ]; then
182+
continue
183+
fi
184+
echo "::group::${file}"
185+
tail -n 200 "${path}" || cat "${path}" || true
186+
echo "::endgroup::"
187+
done
188+
189+
- name: Upload smoke artifacts
190+
if: failure()
191+
continue-on-error: true
192+
uses: actions/upload-artifact@v3.2.2
193+
with:
194+
name: helm-chart-kind-smoke
195+
path: ${{ runner.temp }}/helm-smoke
196+
if-no-files-found: ignore

.github/workflows/kubernetes-validation.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464

6565
- name: Upload Kubernetes smoke artifacts
6666
if: failure()
67-
uses: actions/upload-artifact@v4
67+
uses: actions/upload-artifact@v3.2.2
6868
with:
6969
name: kubernetes-kind-smoke-artifacts
7070
path: ${{ runner.temp }}/k8s-kind-smoke-artifacts

.github/workflows/server-perf.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868

6969
- name: Upload perf artifacts
7070
if: always()
71-
uses: actions/upload-artifact@v7
71+
uses: actions/upload-artifact@v3.2.2
7272
with:
7373
name: server-perf-smoke
7474
path: build/perf/
@@ -106,7 +106,7 @@ jobs:
106106

107107
- name: Upload perf artifacts
108108
if: always()
109-
uses: actions/upload-artifact@v7
109+
uses: actions/upload-artifact@v3.2.2
110110
with:
111111
name: server-perf-soak
112112
path: build/perf/

Dockerfile

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,16 @@ RUN chmod +x /usr/local/bin/server-bootstrap /usr/local/bin/server-ensure-sqlite
7474

7575
# Route cache is safe at build time (no env dependency).
7676
# Config cache is deferred to the entrypoint so runtime env vars take effect.
77-
RUN php artisan route:cache
77+
RUN php artisan route:cache \
78+
&& mkdir -p \
79+
storage/logs \
80+
storage/framework/cache/data \
81+
storage/framework/sessions \
82+
storage/framework/views \
83+
storage/framework/testing \
84+
bootstrap/cache \
85+
&& chown -R 1000:1000 storage bootstrap/cache \
86+
&& chmod -R ug+rwX storage bootstrap/cache
7887

7988
LABEL org.opencontainers.image.title="Durable Workflow Server" \
8089
org.opencontainers.image.description="Standalone Durable Workflow server" \

README.md

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ external MySQL or PostgreSQL plus 2 or 3 API nodes behind a stateless load
188188
balancer, shared Redis, and independently scaled external workers. The first
189189
contract requires exactly one scheduler or maintenance runner. SQLite,
190190
Redis-less multi-node mode, duplicate schedulers, rolling upgrades,
191-
active/active multi-region, Helm charts, and provider-specific failover
191+
active/active multi-region, Helm-based Kubernetes deployments, and provider-specific failover
192192
semantics are not part of that first contract.
193193

194194
The CI harness in `docker-compose.small-cluster.yml` runs the MySQL and
@@ -1159,16 +1159,23 @@ docker run --rm --entrypoint sh ghcr.io/durable-workflow/server:0.2.0 -lc \
11591159

11601160
### Kubernetes
11611161

1162-
The raw manifests intentionally stay Kubernetes-native instead of shipping a
1163-
Helm chart. Use Kustomize overlays or direct patches for environment-specific
1164-
names, images, registry secrets, and scaling policy; revisit Helm only when an
1165-
operator needs chart versioning and a chart/image compatibility matrix.
1162+
The published Helm chart in [`k8s/helm/durable-workflow/`](k8s/helm/durable-workflow/)
1163+
is the recommended self-serve path for Kubernetes deployments. The raw
1164+
manifests remain the inspectable low-level alternative for teams that
1165+
intentionally do not want Helm in the rollout.
1166+
1167+
Both paths share the same external-persistence, singleton-scheduler, and
1168+
`/api/ready` readiness contracts. Use Helm values, Kustomize overlays, or
1169+
direct patches for environment-specific names, images, registry secrets, and
1170+
scaling policy.
11661171

11671172
The public manifests default to the pinned Docker Hub image
11681173
`durableworkflow/server:0.2`. For production, patch every workload to the exact
11691174
Docker Hub or GHCR tag or digest you intend to run before applying it. See
1170-
[`k8s/README.md`](k8s/README.md) for the raw-manifest support boundary and
1171-
image-pinning contract.
1175+
[`k8s/README.md`](k8s/README.md) for the raw-manifest support boundary,
1176+
[`docs/helm-validation.md`](docs/helm-validation.md) for the Helm contract and
1177+
validation harness, and [`k8s/helm/durable-workflow/docs/UPGRADING.md`](k8s/helm/durable-workflow/docs/UPGRADING.md)
1178+
for chart upgrade steps.
11721179

11731180
The supported apply order is configuration first, migration second, and
11741181
long-running workloads last. The helper script enforces that order, deletes any

docs/ha-failover-validation.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -326,11 +326,11 @@ pass; the topology itself is part of the product risk:
326326
- Synchronous cross-region database replication (RPO=0).
327327
- Duplicate scheduler/maintenance runners as a steady-state topology.
328328
- Engine-enforced region-pinned task queues as a routing axis.
329-
- Multi-cluster Helm charts and provider-specific managed-Kubernetes
330-
validation. The
331-
[`k8s/`](https://github.com/durable-workflow/server/tree/main/k8s)
332-
manifests stay raw and inspectable; provider-specific HA on top of
333-
them remains a support-led design pass.
329+
- Multi-cluster Helm topologies and provider-specific managed-Kubernetes
330+
validation. The single-cluster self-serve Helm contract lives in
331+
[`docs/helm-validation.md`](helm-validation.md) and
332+
[`k8s/helm/durable-workflow/`](../k8s/helm/durable-workflow/); provider-specific
333+
or multi-cluster HA on top of it remains a support-led design pass.
334334
- Strong "five-nines" or "zero-downtime" SLA promises beyond the
335335
bounded recovery times above. The contract is *bounded recovery
336336
during named events*, not an uptime promise that depends on the

0 commit comments

Comments
 (0)