Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/actions/docker-build/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -289,11 +289,13 @@ runs:

# Run parser with all discovered stages
# Usage: parse_buildkit_output.py <output_json> <stage1_name:log_file> [stage2_name:log_file] ... [--metadata=<file>]
# Note: sccache stats are parsed directly from the build logs (from 'use-sccache.sh show-stats' output)
set +e
python3 .github/scripts/parse_buildkit_output.py \
"$COMPREHENSIVE_JSON" \
"${STAGE_ARGS[@]}" \
"--metadata=${CONTAINER_METADATA}"
"--metadata=${CONTAINER_METADATA}" \
--debug
PARSER_EXIT_CODE=$?
set -e

Expand All @@ -315,4 +317,3 @@ runs:
name: build-metrics-${{ inputs.framework }}-${{ inputs.target }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}
path: build-metrics/build-${{ inputs.framework }}-${{ env.PLATFORM_ARCH }}-${{ github.run_id }}-${{ job.check_run_id }}.json
retention-days: 7

153 changes: 152 additions & 1 deletion .github/scripts/parse_buildkit_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
Parse BuildKit output to extract detailed step-by-step metadata.
BuildKit provides rich information about each build step including timing,
cache status, sizes, and layer IDs.

Also parses sccache statistics from the build log output.
"""

import json
Expand All @@ -14,6 +16,113 @@
from typing import Any, Dict, List


def parse_sccache_from_log(log_content: str, debug: bool = False) -> Dict[str, Any]:
"""
Parse sccache statistics from build log output.

In BuildKit logs, lines are prefixed with step numbers like:
#43 103.6 === sccache statistics AFTER Dynamo ===
#43 103.6 Compile requests 2097
#43 103.6 Cache hits 1670
#43 103.6 Cache hits rate 100.00 %
"""
sccache_data = {}

# Find sccache statistics section(s) - get the last one
# The section ends at the next DONE/CACHED marker or end of content
sections = re.findall(
r"=== sccache statistics AFTER ([^=]+) ===(.*?)(?=#\d+\s+DONE|#\d+\s+CACHED|#\d+\s+\[|$)",
log_content,
re.DOTALL,
)

if not sections:
if debug:
print("DEBUG: No sccache sections found in log", file=sys.stderr)
return {}

# Use the last sccache section (final stats)
build_name, stats_block = sections[-1]
sccache_data["build_name"] = build_name.strip()

if debug:
print(
f"DEBUG: Found sccache section for '{build_name.strip()}'", file=sys.stderr
)
print(
f"DEBUG: Stats block (first 1000 chars):\n{stats_block[:1000]}",
file=sys.stderr,
)

# Parse each statistic line
for line in stats_block.split("\n"):
line = line.strip()
if not line:
continue

# Remove BuildKit prefix if present: "#43 103.6 " or "#43 "
line = re.sub(r"^#\d+\s+[\d.]+\s+", "", line)
line = re.sub(r"^#\d+\s+", "", line)
line = line.strip()

if not line:
continue

# Skip section headers like "Non-cacheable reasons:"
if line.endswith(":"):
if debug:
print(f"DEBUG: Skipping section header: '{line}'", file=sys.stderr)
continue

# Match pattern: "Key Name Value" or "Key Name Value unit"
# Examples:
# Compile requests 2097
# Cache hits (C/C++) 890
# Non-cacheable calls 411
# Cache hits rate 100.00 %
# Average cache read hit 0.050 s
#
# Note: there may be a space before the unit: "100.00 %" or "0.050 s"
# Key can contain: letters, numbers, (), /, space, hyphen, plus
match = re.match(
r"^([A-Za-z][A-Za-z0-9() /+\-]+?)\s{2,}([\d.]+)\s*(%|s)?\s*$", line
)
if match:
key_raw = match.group(1).strip()
value_str = match.group(2)
unit = match.group(3)

# Convert key to snake_case
key = key_raw.lower()
key = re.sub(r"[+()]", "", key) # Remove plus and parentheses
key = re.sub(r"[/\-\s]+", "_", key) # Replace /, -, spaces with _
key = re.sub(r"_+", "_", key) # Collapse multiple underscores
key = key.strip("_") # Remove leading/trailing underscores

# Add unit suffix
if unit == "%":
key = key + "_percent"
elif unit == "s":
key = key + "_seconds"

# Convert value to number
try:
if "." in value_str:
sccache_data[key] = float(value_str)
else:
sccache_data[key] = int(value_str)
except ValueError:
sccache_data[key] = value_str

if debug:
print(f"DEBUG: Parsed: {key} = {sccache_data[key]}", file=sys.stderr)
elif debug and line and any(c.isalpha() for c in line):
# Print non-matching lines in debug mode for troubleshooting
print(f"DEBUG: No match for line: '{line}'", file=sys.stderr)

return sccache_data


class BuildKitParser:
"""Parser for BuildKit output logs"""

Expand Down Expand Up @@ -187,9 +296,12 @@ def main():
# Parse arguments to find stage logs and metadata
stage_logs = [] # List of (stage_name, log_file) tuples
container_metadata_file = None
debug_mode = "--debug" in sys.argv

for arg in sys.argv[2:]:
if arg.startswith("--metadata="):
if arg == "--debug":
continue
elif arg.startswith("--metadata="):
container_metadata_file = arg.split("=", 1)[1]
elif ":" in arg:
stage_name, log_file = arg.split(":", 1)
Expand All @@ -209,6 +321,7 @@ def main():
total_steps = 0
total_cached = 0
total_size = 0
sccache_data = {}

# Parse each stage log
for stage_name, log_file in stage_logs:
Expand All @@ -235,6 +348,15 @@ def main():
total_cached += stage_data["container"]["cached_steps"]
total_size += stage_data["container"]["total_size_transferred_bytes"]

# Extract sccache stats from the log (last one wins)
log_sccache = parse_sccache_from_log(log_content, debug=debug_mode)
if log_sccache:
sccache_data = log_sccache
print(
f"✅ Found sccache stats in {stage_name} log: {len(log_sccache)} metrics",
file=sys.stderr,
)

print(
f"✅ Parsed {stage_name} stage: {stage_data['container']['total_steps']} steps",
file=sys.stderr,
Expand Down Expand Up @@ -274,6 +396,16 @@ def main():
except Exception as e:
print(f"Warning: Could not read container metadata: {e}", file=sys.stderr)

# Add sccache statistics (parsed from build logs)
if sccache_data:
build_data["container"]["sccache"] = sccache_data
print(
f"✅ sccache metrics added to container: {len(sccache_data)} metrics",
file=sys.stderr,
)
else:
print("ℹ️ No sccache stats found in build logs", file=sys.stderr)

# Output JSON
try:
with open(output_json, "w") as f:
Expand All @@ -298,6 +430,25 @@ def main():
file=sys.stderr,
)

# Print sccache summary if available
if "sccache" in container:
sccache = container["sccache"]
print("", file=sys.stderr)
print("🔨 sccache Summary:", file=sys.stderr)
if "compile_requests" in sccache:
print(
f" Compile Requests: {sccache['compile_requests']}", file=sys.stderr
)
if "cache_hits" in sccache:
print(f" Cache Hits: {sccache['cache_hits']}", file=sys.stderr)
if "cache_misses" in sccache:
print(f" Cache Misses: {sccache['cache_misses']}", file=sys.stderr)
if "cache_hits_rate_percent" in sccache:
print(
f" Cache Hit Rate: {sccache['cache_hits_rate_percent']:.2f}%",
file=sys.stderr,
)


if __name__ == "__main__":
main()
102 changes: 99 additions & 3 deletions container/use-sccache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@ usage() {
Usage: $0 [COMMAND] [OPTIONS]

Commands:
install Install sccache binary (requires ARCH_ALT environment variable)
show-stats Display sccache statistics with optional build name
help Show this help message
install Install sccache binary (requires ARCH_ALT environment variable)
show-stats Display sccache statistics with optional build name
export-stats-json Export sccache statistics as JSON to a file
help Show this help message

Environment variables:
USE_SCCACHE Set to 'true' to enable sccache
Expand All @@ -31,6 +32,8 @@ Examples:
ARCH_ALT=x86_64 $0 install
# Show stats with build name
$0 show-stats "UCX"
# Export stats to JSON file
$0 export-stats-json /path/to/output.json "BuildName"
EOF
}

Expand Down Expand Up @@ -59,6 +62,95 @@ show_stats() {
fi
}

export_stats_json() {
local output_file="${1:-/tmp/sccache-stats.json}"
local build_name="${2:-unknown}"

if ! command -v sccache >/dev/null 2>&1; then
# Create empty stats file so COPY commands don't fail
echo '{"build_name": "'"$build_name"'", "sccache_available": false}' > "$output_file"
echo "ℹ️ sccache not available, created placeholder: $output_file"
return 0 # Don't fail the build
fi

# Get raw stats output
local stats_output
stats_output=$(sccache --show-stats 2>&1)

# Parse the output and convert to JSON using awk
echo "$stats_output" | awk -v build_name="$build_name" '
BEGIN {
print "{"
print " \"build_name\": \"" build_name "\","
first = 1
}

# Match lines with format: "Key Value" or "Key Value %"
/^[A-Za-z]/ {
# Extract key and value
key = ""
value = ""
unit = ""

# Find the position where multiple spaces start (separator between key and value)
for (i = 1; i <= NF; i++) {
if ($i ~ /^[0-9.]+$/) {
# Found the start of the value
value = $i
# Check if there'\''s a unit after the value
if (i + 1 <= NF && ($( i+1) == "%" || $(i+1) == "s")) {
unit = $(i+1)
}
# Everything before this is the key
for (j = 1; j < i; j++) {
if (key == "") {
key = $j
} else {
key = key " " $j
}
}
break
}
}

if (key != "" && value != "") {
# Convert key to snake_case
gsub(/[()]/, "", key) # Remove parentheses
gsub(/\//, "_", key) # Replace / with _
gsub(/ /, "_", key) # Replace spaces with _
key = tolower(key)

# Add unit suffix if applicable
if (unit == "%") {
key = key "_percent"
} else if (unit == "s") {
key = key "_seconds"
}

# Check if value is a valid JSON number (integer or single decimal)
# Version strings like "0.8.2" need to be quoted
is_number = (value ~ /^[0-9]+$/ || value ~ /^[0-9]+\.[0-9]+$/)

# Print JSON field
if (!first) print ","
if (is_number) {
printf " \"%s\": %s", key, value
} else {
printf " \"%s\": \"%s\"", key, value
}
first = 0
}
}

END {
print ""
print "}"
}
' > "$output_file"

echo "✅ sccache stats exported to: $output_file"
}

main() {
case "${1:-help}" in
install)
Expand All @@ -72,6 +164,10 @@ main() {
shift # Remove the command from arguments
show_stats "$@" # Pass all remaining arguments
;;
export-stats-json)
shift # Remove the command from arguments
export_stats_json "$@" # Pass all remaining arguments
;;
help|--help|-h)
usage
;;
Expand Down
Loading