Monitor Self-Hosted Runners #9685

Workflow file for this run

.github/workflows/monitor_runners.yml at 8b40e91

	name: Monitor Self-Hosted Runners

	on:
	# push: # uncomment for testing if on a branch in the azure_scripts repo
	schedule:
	- cron: '/15 * * *' # every 15 minutes
	- cron: '0 9 * * 1' # weekly report every Monday at 9 AM UTC
	workflow_dispatch:
	inputs:
	send_weekly_report:
	description: 'Send a weekly report to Zulip'
	required: false
	default: false
	type: boolean

	env:
	ZULIP_SERVER: "https://leanprover.zulipchat.com"
	ZULIP_CHANNEL: "CI admins"
	STATE_FILE: "runner-state.json"
	STATS_FILE: "runner-stats.json"
	CACHE_KEY: ${{ github.ref == 'refs/heads/master' && 'runner-monitor-state-v4' \|\| 'runner-monitor-state-testing' }}
	STATS_CACHE_KEY: ${{ github.ref == 'refs/heads/master' && 'runner-monitor-stats-v4' \|\| 'runner-monitor-stats-testing' }}

	jobs:
	monitor-runners:
	runs-on: ubuntu-latest
	steps:
	- name: Restore previous state
	id: cache-restore
	uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
	with:
	path: ${{ env.STATE_FILE }}
	key: ${{ env.CACHE_KEY }}-${{ github.run_id }}
	restore-keys: ${{ env.CACHE_KEY }}

	- name: Restore previous stats
	id: stats-cache-restore
	uses: actions/cache/restore@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
	with:
	path: ${{ env.STATS_FILE }}
	key: ${{ env.STATS_CACHE_KEY }}-${{ github.run_id }}
	restore-keys: ${{ env.STATS_CACHE_KEY }}

	- name: Create empty state
	if: steps.cache-restore.outputs.cache-hit == ''
	run: \|
	echo "No previous state file found, creating empty state"
	echo '{"last_run": "", "runners": {}}' > "${{ env.STATE_FILE }}"

	- name: Create empty stats
	if: steps.stats-cache-restore.outputs.cache-hit == ''
	run: \|
	echo "No previous stats file found, creating empty stats"
	echo '{"runners": {}, "last_cleanup": ""}' > "${{ env.STATS_FILE }}"

	- name: Check self-hosted runners
	id: check-runners
	run: \|
	current_time=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
	echo "Current time: $current_time"
	echo "Fetching organization runners..."

	# Get all self-hosted runners for the organization (the MONITOR_RUNNERS_GITHUB_TOKEN requires admin:org permissions)
	response=$(curl -s -H "Authorization: token ${{ secrets.MONITOR_RUNNERS_GITHUB_TOKEN }}" \
	-H "Accept: application/vnd.github+json" \
	"https://api.github.com/orgs/${{ github.repository_owner }}/actions/runners")

	# Save response for label management step
	echo "$response" > runners_response.json

	# Load previous state and stats
	previous_state=$(cat "${{ env.STATE_FILE }}")
	previous_stats=$(cat "${{ env.STATS_FILE }}")

	echo "::group::previous_state"
	echo "$previous_state"
	echo "::endgroup::"

	echo "::group::previous_stats"
	echo "$previous_stats"
	echo "::endgroup::"

	# Initialize arrays for notifications
	newly_offline_runners=()
	persistent_offline_runners=()
	back_online_runners=()

	# Initialize new stats structure
	new_stats=$(echo "$previous_stats" \| jq --arg time "$current_time" '.last_cleanup = $time')

	# Clean up stats older than 7 days
	seven_days_ago=$(date -u -d "7 days ago" +"%Y-%m-%dT%H:%M:%SZ")
	new_stats=$(echo "$new_stats" \| jq --arg cutoff "$seven_days_ago" '
	.runners = (.runners // {} \|
	to_entries \|
	map(select(.value.history) \|
	.value.history = (.value.history \| map(select(.timestamp >= $cutoff)))
	) \|
	from_entries
	)
	')
	echo "::group::new_stats after 7-day cleanup"
	echo "$new_stats"
	echo "::endgroup::"

	# Process current runners
	echo "$response" \| jq -r '.runners[] \| "\(.name)\|\(.status)\|\(.busy)\|\(.labels \| map(.name) \| join(","))"' \| while IFS='\|' read -r name status busy labels; do
	if [ -z "$name" ]; then continue; fi
	echo "::group::Processing runner: $name"

	# Determine runner state (Idle, Active, Offline)
	if [ "$status" != "online" ]; then
	runner_state="Offline"
	elif [ "$busy" = "true" ]; then
	runner_state="Active"
	else
	runner_state="Idle"
	fi
	echo "runner_state: $runner_state"

	# Get previous state for this runner
	prev_status=$(echo "$previous_state" \| jq -r --arg name "$name" '.runners[$name].status // "unknown"')
	prev_consecutive=$(echo "$previous_state" \| jq -r --arg name "$name" '.runners[$name].consecutive_offline // 0')

	echo "prev_status: $prev_status"
	echo "prev_consecutive: $prev_consecutive"

	if [ "$status" != "online" ]; then
	# Runner is offline
	if [ "$prev_status" = "online" ] \|\| [ "$prev_status" = "unknown" ]; then
	# First time offline
	consecutive_offline=1
	echo "NEWLY_OFFLINE: $name (labels: $labels)" \| tee -a newly_offline.tmp
	else
	# Still offline, increment counter
	consecutive_offline=$((prev_consecutive + 1))
	if [ "$consecutive_offline" -ge 2 ]; then
	echo "PERSISTENT_OFFLINE: $name,$consecutive_offline,$labels" \| tee -a persistent_offline.tmp
	fi
	fi
	else
	# Runner is online
	consecutive_offline=0
	if [ "$prev_status" = "offline" ] && [ "$prev_consecutive" -ge 2 ]; then
	# Runner came back online
	echo "BACK_ONLINE: $name,$prev_consecutive,$labels" \| tee -a back_online.tmp
	fi
	fi

	# Update state for this runner
	echo "updated state:"
	echo "$name,$status,$consecutive_offline,$labels" \| tee -a current_runners.tmp

	# Update stats with current state
	echo "updated stats:"
	echo "$name,$runner_state,$labels" \| tee -a current_runner_states.tmp
	echo "::endgroup::"
	done

	# Update stats file with all current runner states
	while IFS=',' read -r name state labels; do
	if [ -n "$name" ]; then
	new_stats=$(echo "$new_stats" \| jq --arg name "$name" --arg state "$state" --arg time "$current_time" --arg labels "$labels" '
	.runners[$name] = (.runners[$name] // {"history": [], "labels": ""}) \|
	.runners[$name].labels = $labels \|
	.runners[$name].history += [{"timestamp": $time, "state": $state}]
	')
	fi
	done < current_runner_states.tmp

	echo "::group::new_stats after processing"
	echo "$new_stats"
	echo "::endgroup::"

	# Save updated stats
	echo "$new_stats" > "${{ env.STATS_FILE }}"

	# Build new state file
	new_state=$(echo "$previous_state" \| jq --arg time "$current_time" '.last_run = $time \| .runners = {}')

	if [ -f "current_runners.tmp" ]; then
	while IFS=',' read -r name status consecutive labels; do
	if [ -n "$name" ]; then
	new_state=$(echo "$new_state" \| jq --arg name "$name" --arg status "$status" --argjson consecutive "$consecutive" --arg labels "$labels" \
	'.runners[$name] = {"status": $status, "consecutive_offline": $consecutive, "labels": $labels}')
	fi
	done < current_runners.tmp

	echo "::group::Formatted current runners info"
	cat current_runners.tmp
	echo "::endgroup::"
	fi

	# Save new state
	echo "$new_state" > "${{ env.STATE_FILE }}"

	# Prepare notification messages
	notification_message=""

	# Check for runners that came back online
	if [ -f "back_online.tmp" ] && [ -s "back_online.tmp" ]; then
	notification_message+="✅ [Runners](https://github.com/organizations/${{ github.repository_owner }}/settings/actions/runners) back online:\n\n"
	while IFS=',' read -r name prev_consecutive labels; do
	name=$(echo "$name" \| sed 's/BACK_ONLINE: //')
	if [ -n "$labels" ] && [ "$labels" != "" ]; then
	notification_message+="- \`$name\` (was offline for ${prev_consecutive} checks, labels: \`$labels\`)\n"
	else
	notification_message+="- \`$name\` (was offline for ${prev_consecutive} checks, no labels)\n"
	fi
	done < back_online.tmp
	notification_message+="\n"
	fi

	# Check for persistently offline runners (≥2 consecutive runs)
	if [ -f "persistent_offline.tmp" ] && [ -s "persistent_offline.tmp" ]; then
	notification_message+="⚠️ [Runners](https://github.com/organizations/${{ github.repository_owner }}/settings/actions/runners) offline for multiple checks:\n\n"
	while IFS=',' read -r name consecutive labels; do
	name=$(echo "$name" \| sed 's/PERSISTENT_OFFLINE: //')
	if [ -n "$labels" ] && [ "$labels" != "" ]; then
	notification_message+="- \`$name\` (${consecutive} consecutive checks, labels: \`$labels\`)\n"
	else
	notification_message+="- \`$name\` (${consecutive} consecutive checks, no labels)\n"
	fi
	done < persistent_offline.tmp
	notification_message+="\n"
	fi

	# Check if this is a weekly report trigger
	is_weekly_report="false"
	if [ "${{ github.event.schedule }}" = "0 9 * * 1" ] \|\| [ "${{ toJSON(inputs.send_weekly_report) }}" = "true" ]; then
	is_weekly_report="true"
	fi

	echo "is_weekly_report=$is_weekly_report" >> $GITHUB_OUTPUT

	# Set should_notify output
	if [ -n "$notification_message" ]; then
	echo "should_notify=true" >> $GITHUB_OUTPUT
	else
	echo "should_notify=false" >> $GITHUB_OUTPUT
	fi

	# Save message to output
	echo "message<<EOF" >> $GITHUB_OUTPUT
	echo -e "$notification_message" >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT

	# Clean up temp files
	rm -f newly_offline.tmp persistent_offline.tmp back_online.tmp current_runners.tmp current_runner_states.tmp

	- name: Find out if there are active bors batches
	id: bors_active
	run: \|
	response=$(curl -sf "https://mathlib-bors-ca18eefec4cb.herokuapp.com/api/active-batches") \|\| { echo "result=true" >> "$GITHUB_OUTPUT"; exit 0; }
	length=$(echo "$response" \| jq '.batch_ids \| length')
	echo "result=$([ "$length" -gt 0 ] && echo true \|\| echo false)" >> "$GITHUB_OUTPUT"

	- name: Manage runner labels based on bors status
	id: manage-labels
	run: \|
	bors_active="${{ steps.bors_active.outputs.result }}"
	echo "Bors active: $bors_active"

	# Load the runners response
	response=$(cat runners_response.json)

	# Initialize outputs
	label_summary=""
	label_errors=""

	# Helper function to add a label to a runner
	add_label() {
	local runner_id=$1
	local runner_name=$2
	local label=$3

	local update_response=$(curl -s -X POST \
	-H "Authorization: token ${{ secrets.MONITOR_RUNNERS_GITHUB_TOKEN }}" \
	-H "Accept: application/vnd.github+json" \
	"https://api.github.com/orgs/${{ github.repository_owner }}/actions/runners/$runner_id/labels" \
	-d "{\"labels\":[\"$label\"]}")

	if echo "$update_response" \| jq -e '.labels' > /dev/null 2>&1; then
	label_summary+="🏷️ Added \`$label\` label to runner \`$runner_name\`\n"
	return 0
	else
	echo "ERROR: Failed to add label $label to runner $runner_name"
	echo "Response: $update_response"
	label_errors+="❌ Failed to add \`$label\` label to runner \`$runner_name\`\n"
	return 1
	fi
	}

	# Helper function to remove a label from a runner
	remove_label() {
	local runner_id=$1
	local runner_name=$2
	local label=$3

	local update_response=$(curl -s -X DELETE \
	-H "Authorization: token ${{ secrets.MONITOR_RUNNERS_GITHUB_TOKEN }}" \
	-H "Accept: application/vnd.github+json" \
	"https://api.github.com/orgs/${{ github.repository_owner }}/actions/runners/$runner_id/labels/$label")

	# DELETE returns 204 No Content on success, or 404/422 on error
	if [ $? -eq 0 ]; then
	label_summary+="🏷️ Removed \`$label\` label from runner \`$runner_name\`\n"
	return 0
	else
	echo "ERROR: Failed to remove label $label from runner $runner_name"
	echo "Response: $update_response"
	label_errors+="❌ Failed to remove \`$label\` label from runner \`$runner_name\`\n"
	return 1
	fi
	}

	# Extract runner data (id, name, custom labels)
	echo "Extracting runner data..."
	runner_data=$(echo "$response" \| jq -r '.runners[] \| "\(.id)\|\(.name)\|\([.labels[] \| select(.type == "custom") \| .name] \| join(","))"')

	if [ -z "$runner_data" ]; then
	echo "ERROR: No runners found"
	label_errors+="❌ Label Management Error: No runners found in organization\n\n"
	else
	# Ensure all runners have 'bors' label
	echo "Checking if all runners have 'bors' label..."
	while IFS='\|' read -r runner_id runner_name labels; do
	# Check if 'bors' is present
	if [[ ",$labels," != ",bors," ]]; then
	echo "Adding 'bors' label to runner: $runner_name"
	add_label "$runner_id" "$runner_name" "bors"
	fi
	done <<< "$runner_data"

	# Now manage 'pr' label based on bors status
	if [ "$bors_active" = "true" ]; then
	echo "Managing 'pr' labels (bors active - one runner should NOT have 'pr')..."

	# Check if there's already a runner without 'pr'
	runner_without_pr=$(echo "$response" \| jq -r '
	.runners[] \|
	select(
	([.labels[] \| select(.type == "custom") \| .name] \| contains(["pr"]) \| not)
	) \|
	.name
	' \| head -n 1)

	if [ -n "$runner_without_pr" ]; then
	echo "Runner '$runner_without_pr' already lacks 'pr' label - no changes needed"
	label_summary+="✅ Runner \`$runner_without_pr\` already lacks \`pr\` label (no changes needed)\n"
	else
	echo "All runners have 'pr' - selecting one to remove it from"

	# Find an idle runner first, fall back to any online runner
	selected_runner=$(echo "$response" \| jq -r '.runners[] \| select(.status == "online" and .busy == false) \| "\(.id)\|\(.name)"' \| head -n 1)

	if [ -z "$selected_runner" ]; then
	echo "No idle runner found, selecting any online runner"
	selected_runner=$(echo "$response" \| jq -r '.runners[] \| select(.status == "online") \| "\(.id)\|\(.name)"' \| head -n 1)
	fi

	if [ -z "$selected_runner" ]; then
	echo "ERROR: No online runners found"
	label_errors+="❌ Label Management Error: No online runners available to remove \`pr\` label from\n\n"
	else
	IFS='\|' read -r runner_id runner_name <<< "$selected_runner"
	echo "Removing 'pr' label from runner: $runner_name"
	remove_label "$runner_id" "$runner_name" "pr"
	fi
	fi

	else
	echo "Managing 'pr' labels (bors inactive - all runners should have 'pr')..."

	all_have_pr=true
	while IFS='\|' read -r runner_id runner_name labels; do
	# Check if 'pr' is present
	if [[ ",$labels," != ",pr," ]]; then
	all_have_pr=false
	echo "Adding 'pr' label to runner: $runner_name"
	add_label "$runner_id" "$runner_name" "pr"
	fi
	done <<< "$runner_data"

	if [ "$all_have_pr" = true ]; then
	echo "All runners already have 'pr' label"
	label_summary+="✅ All runners already have \`pr\` label\n"
	fi
	fi
	fi

	# Save summary to output (for logging)
	echo "label_summary<<EOF" >> $GITHUB_OUTPUT
	echo -e "$label_summary" >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT

	# Only set error output if there are actual errors
	if [ -n "$label_errors" ]; then
	echo "label_errors<<EOF" >> $GITHUB_OUTPUT
	echo -e "$label_errors" >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT
	echo "has_label_errors=true" >> $GITHUB_OUTPUT
	else
	echo "label_errors=" >> $GITHUB_OUTPUT
	echo "has_label_errors=false" >> $GITHUB_OUTPUT
	fi

	# Clean up
	rm -f runners_response.json

	- name: Generate stats report
	id: weekly-stats
	run: \|
	echo "Generating statistics report..."

	# Load stats
	stats=$(cat "${{ env.STATS_FILE }}")

	# Get list of runners that have data
	runners=$(echo "$stats" \| jq -r '.runners \| keys[]' \| sort)

	# Create temporary file for weekly report content
	weekly_report_file="weekly_report.tmp"

	# Write report header
	tee > "$weekly_report_file" << 'EOF'
	📊 Weekly Runner Statistics Report

	EOF

	echo "Period: Last 7 days • Generated: $(date -u +'%Y-%m-%d %H:%M UTC')" \| tee -a "$weekly_report_file"
	echo "" \| tee -a "$weekly_report_file"

	if [ -z "$runners" ]; then
	echo "No runner data available for the past 7 days." \| tee -a "$weekly_report_file"
	else
	# Write table header
	cat >> "$weekly_report_file" << 'EOF'
	\| Runner \| Idle \| Active \| Offline \| Labels \|
	\|--------\|------\|---------\|---------\|--------\|
	EOF

	# Process each runner and write to temp file
	echo "$runners" \| while read -r runner; do
	if [ -z "$runner" ]; then continue; fi
	echo "Processing stats for runner: $runner"

	# Calculate percentages using jq
	runner_stats=$(echo "$stats" \| jq -r --arg runner "$runner" '
	.runners[$runner] as $data \|
	($data.history \| length) as $total \|
	if $total == 0 then
	"0.0\|0.0\|0.0\|\($data.labels // "")"
	else
	($data.history \| map(select(.state == "Idle")) \| length) as $idle \|
	($data.history \| map(select(.state == "Active")) \| length) as $active \|
	($data.history \| map(select(.state == "Offline")) \| length) as $offline \|
	(($idle * 100.0 / $total) \| .*100 \| round / 100) as $idle_pct \|
	(($active * 100.0 / $total) \| .*100 \| round / 100) as $active_pct \|
	(($offline * 100.0 / $total) \| .*100 \| round / 100) as $offline_pct \|
	"\($idle_pct)\|\($active_pct)\|\($offline_pct)\|\($data.labels // "")"
	end
	')

	IFS='\|' read -r idle_pct active_pct offline_pct labels <<< "$runner_stats"

	# Format labels for display
	if [ -z "$labels" ] \|\| [ "$labels" = "null" ]; then
	labels_display="-"
	else
	labels_display="\`$labels\`"
	fi

	echo "\| \`$runner\` \| ${idle_pct}% \| ${active_pct}% \| ${offline_pct}% \| $labels_display \|" \| tee -a "$weekly_report_file"
	done

	# Calculate concurrent state statistics
	echo "Calculating concurrent state statistics..."
	concurrent_stats=$(echo "$stats" \| jq -r '
	. as $root \|
	# Get all unique timestamps across all runners
	[.runners[].history[].timestamp] \| unique \| sort as $timestamps \|

	# For each timestamp, check the state of each runner.
	# If a runner does not have a state recorded at a specific timestamp,
	# we use the most recent state before that timestamp,
	# or default to "Offline" if no prior state exists.
	$timestamps \| map(. as $ts \|
	[$root.runners[] \| .history \| map(select(.timestamp <= $ts)) \| last \| .state // "Offline"] \|
	{
	timestamp: $ts,
	all_idle: (all(. == "Idle")),
	all_busy: (all(. == "Active")),
	states: .
	}
	) \|

	# Calculate percentages
	length as $total \|
	if $total == 0 then
	"0.0\|0.0"
	else
	(map(select(.all_idle)) \| length) as $all_idle_count \|
	(map(select(.all_busy)) \| length) as $all_busy_count \|
	(($all_idle_count * 100.0 / $total) \| .*100 \| round / 100) as $all_idle_pct \|
	(($all_busy_count * 100.0 / $total) \| .*100 \| round / 100) as $all_busy_pct \|
	"\($all_idle_pct)\|\($all_busy_pct)"
	end
	')

	IFS='\|' read -r all_idle_pct all_busy_pct <<< "$concurrent_stats"

	# Add concurrent statistics to report
	cat >> "$weekly_report_file" << EOF

	Overall Statistics:
	- All runners idle: ${all_idle_pct}% of monitoring periods
	- All runners busy: ${all_busy_pct}% of monitoring periods

	EOF

	# Add legend and footer
	tee >> "$weekly_report_file" << EOF

	Legend:
	• Idle: Runner online but not executing jobs
	• Active: Runner online and executing jobs
	• Offline: Runner not responding

	Statistics based on $(echo "$stats" \| jq -r '[.runners[].history[]] \| length') data points collected every 15 minutes.
	EOF
	fi

	echo "weekly_message<<EOF" >> $GITHUB_OUTPUT
	cat "$weekly_report_file" >> $GITHUB_OUTPUT
	echo "EOF" >> $GITHUB_OUTPUT

	# Clean up temp file
	rm -f "$weekly_report_file"

	- name: Send status message on Zulip
	if: steps.check-runners.outputs.should_notify == 'true'
	uses: zulip/github-actions-zulip/send-message@e4c8f27c732ba9bd98ac6be0583096dea82feea5 # v1.0.2
	with:
	api-key: ${{ secrets.ZULIP_MONITOR_RUNNERS_API_KEY }}
	email: ${{ secrets.ZULIP_MONITOR_RUNNERS_BOT_EMAIL }}
	organization-url: ${{ env.ZULIP_SERVER }}
	to: ${{ env.ZULIP_CHANNEL }}
	type: 'stream'
	topic: 'Runner Status'
	content: \|
	${{ steps.check-runners.outputs.message }}

	- name: Send label management notification on Zulip
	if: steps.manage-labels.outputs.has_label_errors == 'true'
	uses: zulip/github-actions-zulip/send-message@e4c8f27c732ba9bd98ac6be0583096dea82feea5 # v1.0.2
	with:
	api-key: ${{ secrets.ZULIP_MONITOR_RUNNERS_API_KEY }}
	email: ${{ secrets.ZULIP_MONITOR_RUNNERS_BOT_EMAIL }}
	organization-url: ${{ env.ZULIP_SERVER }}
	to: ${{ env.ZULIP_CHANNEL }}
	type: 'stream'
	topic: 'Runner Status'
	content: \|
	${{ steps.manage-labels.outputs.label_errors }}

	- name: Send weekly report on Zulip
	if: steps.check-runners.outputs.is_weekly_report == 'true'
	uses: zulip/github-actions-zulip/send-message@e4c8f27c732ba9bd98ac6be0583096dea82feea5 # v1.0.2
	with:
	api-key: ${{ secrets.ZULIP_MONITOR_RUNNERS_API_KEY }}
	email: ${{ secrets.ZULIP_MONITOR_RUNNERS_BOT_EMAIL }}
	organization-url: ${{ env.ZULIP_SERVER }}
	to: ${{ env.ZULIP_CHANNEL }}
	type: 'stream'
	topic: 'Weekly Runner Report'
	content: \|
	${{ steps.weekly-stats.outputs.weekly_message }}

	- name: Save state to cache
	if: always() && github.event_name != 'workflow_dispatch'
	uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
	with:
	path: ${{ env.STATE_FILE }}
	key: ${{ env.CACHE_KEY }}-${{ github.run_id }}

	- name: Save stats to cache
	if: always() && github.event_name != 'workflow_dispatch'
	uses: actions/cache/save@0400d5f644dc74513175e3cd8d07132dd4860809 # v4.2.4
	with:
	path: ${{ env.STATS_FILE }}
	key: ${{ env.STATS_CACHE_KEY }}-${{ github.run_id }}

	- name: upload files as artifact
	if: always()
	uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
	with:
	name: monitor_runners_artifact
	path: \|
	${{ env.STATE_FILE }}
	${{ env.STATS_FILE }}

	- name: Log summary
	run: \|
	echo "=== Runner Monitor Summary ==="
	echo "::group::State file contents:"
	cat "${{ env.STATE_FILE }}" \| jq .
	echo "::endgroup::"

	echo "::group::Stats file contents:"
	cat "${{ env.STATS_FILE }}" \| jq .
	echo "::endgroup::"

	echo ""
	echo "=== Statistics Summary ==="
	runner_count=$(cat "${{ env.STATS_FILE }}" \| jq '.runners \| keys \| length')
	echo "Runners tracked: $runner_count"
	data_points=$(cat "${{ env.STATS_FILE }}" \| jq '[.runners[].history[]] \| length')
	echo "Total data points: $data_points"

	echo ""
	echo "=== Label Management Summary ==="
	echo "Bors active: ${{ steps.bors_active.outputs.result }}"
	cat << 'EOF'
	${{ steps.manage-labels.outputs.label_summary }}
	EOF
	if [ "${{ steps.manage-labels.outputs.has_label_errors }}" = "true" ]; then
	echo "⚠️ Label management errors occurred (see Zulip notification)"
	fi

	echo ""
	echo "::group::=== 7-Day Statistics Report ==="
	cat << 'EOF'
	${{ steps.weekly-stats.outputs.weekly_message }}
	EOF
	echo "::endgroup::"

	if [ "${{ steps.check-runners.outputs.should_notify }}" = "true" ]; then
	echo ""
	echo "📢 Status notifications sent to Zulip"
	else
	echo ""
	echo "✅ No status notifications needed - all runners stable"
	fi

	if [ "${{ steps.check-runners.outputs.is_weekly_report }}" = "true" ]; then
	echo "📊 Weekly report sent to Zulip"
	else
	echo "📊 Weekly report generated but not sent (not scheduled weekly run)"
	fi

	workflow-keepalive:
	if: github.event_name == 'schedule'
	runs-on: ubuntu-latest
	permissions:
	actions: write
	steps:
	- uses: liskin/gh-workflow-keepalive@f72ff1a1336129f29bf0166c0fd0ca6cf1bcb38c # v1.2.1

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Monitor Self-Hosted Runners #9685

Workflow file

Monitor Self-Hosted Runners #9685

Uh oh!

Jobs

Run details

Workflow file for this run