diff --git a/.buildkite/commands/build-wda.sh b/.buildkite/commands/build-wda.sh new file mode 100755 index 000000000000..5b079b0c6c30 --- /dev/null +++ b/.buildkite/commands/build-wda.sh @@ -0,0 +1,69 @@ +#!/usr/bin/env bash +# Clone and build WebDriverAgent for iOS Simulator testing. +# +# Skips the build only when a usable build-for-testing artifact already exists. +# +# Required (one of): +# SIMULATOR_UDID Simulator UDID for the build destination +# SIMULATOR_NAME Simulator name for the build destination (e.g., iPhone 16) +# +# Optional: +# WEBDRIVERAGENT_REPO_URL Repo URL (default: appium/WebDriverAgent) +# WEBDRIVERAGENT_REF Git ref or commit to build (default: current remote HEAD / existing checkout) + +set -euo pipefail + +if [[ -z "${SIMULATOR_UDID:-}" && -z "${SIMULATOR_NAME:-}" ]]; then + echo "Error: set SIMULATOR_UDID or SIMULATOR_NAME" >&2 + exit 1 +fi + +WDA_DIR=".build/WebDriverAgent" +WDA_PROJECT="${WDA_DIR}/WebDriverAgent.xcodeproj" +WDA_DERIVED_DATA="${WDA_DIR}/DerivedData" +WEBDRIVERAGENT_REPO_URL="${WEBDRIVERAGENT_REPO_URL:-https://github.com/appium/WebDriverAgent.git}" +WEBDRIVERAGENT_REF="${WEBDRIVERAGENT_REF:-}" + +if [[ -n "${SIMULATOR_UDID:-}" ]]; then + DESTINATION="platform=iOS Simulator,id=${SIMULATOR_UDID}" +else + DESTINATION="platform=iOS Simulator,name=${SIMULATOR_NAME}" +fi + +ensure_wda_checkout() { + mkdir -p .build + + if [[ ! -d "${WDA_DIR}/.git" ]]; then + git clone --depth 1 "${WEBDRIVERAGENT_REPO_URL}" "${WDA_DIR}" + fi + + if [[ -n "${WEBDRIVERAGENT_REF}" ]]; then + git -C "${WDA_DIR}" fetch --depth 1 origin "${WEBDRIVERAGENT_REF}" + git -C "${WDA_DIR}" checkout --detach "${WEBDRIVERAGENT_REF}" + fi +} + +has_built_artifacts() { + [[ -d "${WDA_DERIVED_DATA}/Build/Products" ]] && \ + find "${WDA_DERIVED_DATA}/Build/Products" -name '*.xctestrun' -print -quit | grep -q . +} + +ensure_wda_checkout + +if [[ -d "$WDA_PROJECT" ]] && has_built_artifacts; then + echo "WebDriverAgent already built, skipping." + exit 0 +fi + +xcodebuild build-for-testing \ + -project "$WDA_PROJECT" \ + -scheme WebDriverAgentRunner \ + -destination "$DESTINATION" \ + -derivedDataPath "$WDA_DERIVED_DATA" \ + CODE_SIGNING_ALLOWED=NO \ + | tail -1 + +if ! has_built_artifacts; then + echo "Error: WebDriverAgent build completed without an .xctestrun artifact" >&2 + exit 1 +fi diff --git a/.buildkite/commands/run-ai-e2e-tests.sh b/.buildkite/commands/run-ai-e2e-tests.sh new file mode 100755 index 000000000000..21a03fde6c6c --- /dev/null +++ b/.buildkite/commands/run-ai-e2e-tests.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash +# Run AI-driven E2E tests on an iOS Simulator using simulator-llm-pilot. +# +# This script manages the full lifecycle: +# 1. Check for "Testing" label on PR (Buildkite only, skips if missing) +# 2. Download build artifacts and install app (Buildkite only) +# 3. Install the simulator-llm-pilot gem from GitHub +# 4. Run tests (gem handles simulator, WDA, agent loop, and results) +# +# The gem provides a sandboxed agent that drives the simulator through a +# fixed set of tools (tap, swipe, type, REST API, etc.) — no arbitrary +# code execution, no shell access. +# +# Required environment variables: +# ANTHROPIC_API_KEY Claude API key +# SIMULATOR_LLM_PILOT_SITE_URL WordPress test site URL +# SIMULATOR_LLM_PILOT_USERNAME WordPress username +# SIMULATOR_LLM_PILOT_APP_PASSWORD WordPress application password +# +# Optional environment variables: +# APP wordpress | jetpack (default: jetpack) +# SIMULATOR_NAME Simulator to boot if none running (default: iPhone 16) +# TEST_DIR Test directory (default: Tests/AgentTests/ui-tests) +# SIMULATOR_LLM_PILOT_REPO_URL Remote repo URL for simulator-llm-pilot +# SIMULATOR_LLM_PILOT_SOURCE_PATH Local source checkout override for simulator-llm-pilot + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +cd "$REPO_ROOT" + +normalize_site_url() { + local site_url="$1" + if [[ "$site_url" == http://* || "$site_url" == https://* ]]; then + printf '%s' "$site_url" + else + printf 'https://%s' "$site_url" + fi +} + +# ── Label gate (Buildkite only) ───────────────────────────────────── +if [[ -n "${BUILDKITE_PULL_REQUEST_LABELS:-}" ]]; then + echo "--- Checking for 'Testing' label" + + if ! echo ",${BUILDKITE_PULL_REQUEST_LABELS}," | grep -qF ",Testing,"; then + echo "PR does not have the 'Testing' label. Skipping." + echo "Add the label and re-run this step to trigger AI E2E tests." + exit 0 + fi + echo "'Testing' label found." +fi + +# ── Required env vars ──────────────────────────────────────────────── +: "${ANTHROPIC_API_KEY:?Set ANTHROPIC_API_KEY}" +: "${SIMULATOR_LLM_PILOT_SITE_URL:?Set SIMULATOR_LLM_PILOT_SITE_URL}" +: "${SIMULATOR_LLM_PILOT_USERNAME:?Set SIMULATOR_LLM_PILOT_USERNAME}" +: "${SIMULATOR_LLM_PILOT_APP_PASSWORD:?Set SIMULATOR_LLM_PILOT_APP_PASSWORD}" +export SIMULATOR_LLM_PILOT_SITE_URL="$(normalize_site_url "$SIMULATOR_LLM_PILOT_SITE_URL")" + +# ── Defaults ───────────────────────────────────────────────────────── +APP="${APP:-jetpack}" +export SIMULATOR_NAME="${SIMULATOR_NAME:-iPhone 16}" +TEST_DIR="${TEST_DIR:-Tests/AgentTests/ui-tests}" +SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com/Automattic/simulator-llm-pilot.git}" +SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}" + +case "$APP" in + wordpress) APP_BUNDLE_ID="org.wordpress"; APP_DISPLAY_NAME="WordPress" ;; + jetpack) APP_BUNDLE_ID="com.automattic.jetpack"; APP_DISPLAY_NAME="Jetpack" ;; + *) echo "Error: APP must be 'wordpress' or 'jetpack', got '$APP'" >&2; exit 1 ;; +esac + +APP_INSTRUCTIONS_FILE="${REPO_ROOT}/Tests/AgentTests/app-instructions.md" + +# ── Artifact download (Buildkite only) ─────────────────────────────── +if [[ -n "${BUILDKITE:-}" ]]; then + echo "--- Downloading Build Artifacts" + download_artifact "build-products-${APP}.tar" + tar -xf "build-products-${APP}.tar" + + echo "--- Setting up Gems" + install_gems +fi + +# ── Install simulator-llm-pilot ────────────────────────────────────── +echo "--- Installing simulator-llm-pilot" +bash Scripts/ci/install-simulator-llm-pilot.sh +echo "simulator-llm-pilot $(simulator-llm-pilot version)" + +# ── Resolve simulator and install app (Buildkite only) ─────────────── +echo "--- Setting up Simulator" + +UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 2>/dev/null || true)" +if [[ -z "$UDID" ]]; then + echo "No booted simulator named '$SIMULATOR_NAME' found. Booting..." + xcrun simctl boot "$SIMULATOR_NAME" 2>/dev/null || true + UDID="$(ruby Scripts/ci/find-booted-simulator.rb "$SIMULATOR_NAME" 30 1 2>/dev/null || true)" +fi + +if [[ -z "$UDID" ]]; then + echo "Error: could not find a booted simulator named '$SIMULATOR_NAME'" >&2 + exit 1 +fi + +export SIMULATOR_UDID="$UDID" +echo "Simulator UDID: $UDID" + +if [[ -n "${BUILDKITE:-}" ]]; then + APP_PATH=$(find DerivedData/Build/Products -name "${APP_DISPLAY_NAME}.app" -path "*Debug-iphonesimulator*" | head -1) + if [[ -z "$APP_PATH" ]]; then + echo "Error: ${APP_DISPLAY_NAME}.app not found in build products" >&2 + exit 1 + fi + echo "Installing $APP_PATH on simulator..." + xcrun simctl install "$UDID" "$APP_PATH" +fi + +# ── Build WebDriverAgent (if not present) ──────────────────────────── +echo "--- Building WebDriverAgent" +"$(dirname "$0")/build-wda.sh" + +# ── Run tests ──────────────────────────────────────────────────────── +echo "--- Running AI E2E Tests" + +TIMESTAMP="$(date +%Y-%m-%d-%H%M)" +RESULTS_DIR="Tests/AgentTests/results/${TIMESTAMP}" + +EXIT_CODE=0 +simulator-llm-pilot run "$TEST_DIR" \ + --app-bundle-id "$APP_BUNDLE_ID" \ + --app-name "$APP_DISPLAY_NAME" \ + --app-instructions-file "$APP_INSTRUCTIONS_FILE" \ + --simulator-udid "$UDID" \ + --results-dir "$RESULTS_DIR" \ + || EXIT_CODE=$? + +# ── Report results ─────────────────────────────────────────────────── +echo "--- Results" +RESULTS_FILE="${RESULTS_DIR}/results.md" +if [[ -f "$RESULTS_FILE" ]]; then + cat "$RESULTS_FILE" +else + echo "Warning: no results.md found at $RESULTS_FILE" +fi + +exit "$EXIT_CODE" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 16ef55c325bf..0116632a3f22 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -138,6 +138,24 @@ steps: command: .buildkite/commands/lint-localized-strings-format.sh plugins: [$CI_TOOLKIT_PLUGIN] + ################# + # AI E2E Tests (requires "Testing" label on PR) + ################# + - label: "🤖 AI E2E Tests" + command: .buildkite/commands/run-ai-e2e-tests.sh + depends_on: "build_jetpack" + if: "build.pull_request.id != null" + soft_fail: true + timeout_in_minutes: 60 + plugins: [$CI_TOOLKIT_PLUGIN] + env: + APP: jetpack + artifact_paths: + - "Tests/AgentTests/results/**/*" + notify: + - github_commit_status: + context: "AI E2E Tests" + ################# # Claude Build Analysis - dynamically uploaded so Build result conditions evaluate at runtime after the wait ################# diff --git a/.claude/skills/ai-test-runner/SKILL.md b/.claude/skills/ai-test-runner/SKILL.md index c8f611c1dd9d..0828d9640920 100644 --- a/.claude/skills/ai-test-runner/SKILL.md +++ b/.claude/skills/ai-test-runner/SKILL.md @@ -96,7 +96,7 @@ Use the ios-sim-navigation skill for WDA interaction reference. ## Context -- App Bundle ID: +- App Bundle ID: - WDA Session ID: - Simulator UDID: - Test file: (absolute path) @@ -117,7 +117,7 @@ Use the ios-sim-navigation skill for WDA interaction reference. 2. **Relaunch the app** for a clean state: ```bash - xcrun simctl launch --terminate-running-process \ + xcrun simctl launch --terminate-running-process \ -ui-test-site-url \ -ui-test-site-user \ -ui-test-site-pass diff --git a/.claude/skills/ios-sim-navigation/SKILL.md b/.claude/skills/ios-sim-navigation/SKILL.md index 0ec4ab7efab1..96cb6311aeb4 100644 --- a/.claude/skills/ios-sim-navigation/SKILL.md +++ b/.claude/skills/ios-sim-navigation/SKILL.md @@ -366,7 +366,7 @@ If actions consistently fail or the tree looks unexpected, the app may have cras xcrun simctl list devices booted # Re-launch the app -xcrun simctl launch +xcrun simctl launch ``` After re-launching, create a new WDA session before continuing. diff --git a/Scripts/ci/find-booted-simulator.rb b/Scripts/ci/find-booted-simulator.rb new file mode 100755 index 000000000000..67f33992366a --- /dev/null +++ b/Scripts/ci/find-booted-simulator.rb @@ -0,0 +1,38 @@ +#!/usr/bin/env ruby +# frozen_string_literal: true + +require 'json' +require 'open3' + +requested_name = ARGV[0].to_s +wait_seconds = ARGV[1].to_f +poll_interval = ARGV[2].to_f +poll_interval = 1.0 if poll_interval <= 0 +deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + [wait_seconds, 0].max + +loop do + output, status = Open3.capture2('xcrun', 'simctl', 'list', 'devices', 'booted', '-j') + exit 1 unless status.success? + + data = JSON.parse(output) + devices = data.fetch('devices', {}).each_value.flat_map do |list| + list.select { |device| device['state'] == 'Booted' } + end + + device = if requested_name.empty? + devices.first + else + devices.find { |entry| entry['name'] == requested_name } + end + + if device + print(device['udid']) + exit 0 + end + + break if wait_seconds <= 0 || Process.clock_gettime(Process::CLOCK_MONOTONIC) >= deadline + + sleep poll_interval +end + +exit 1 diff --git a/Scripts/ci/install-simulator-llm-pilot.sh b/Scripts/ci/install-simulator-llm-pilot.sh new file mode 100644 index 000000000000..35f3b23bd2d7 --- /dev/null +++ b/Scripts/ci/install-simulator-llm-pilot.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +DEFAULT_LOCAL_GEM_PATH="$(cd "$REPO_ROOT/.." && pwd)/simulator-llm-pilot" + +SIMULATOR_LLM_PILOT_REPO_URL="${SIMULATOR_LLM_PILOT_REPO_URL:-https://github.com/Automattic/simulator-llm-pilot.git}" +SIMULATOR_LLM_PILOT_SOURCE_PATH="${SIMULATOR_LLM_PILOT_SOURCE_PATH:-}" + +build_dir="$(mktemp -d)" +trap 'rm -rf "$build_dir"' EXIT + +source_path="${SIMULATOR_LLM_PILOT_SOURCE_PATH}" +if [[ -z "$source_path" && -f "${DEFAULT_LOCAL_GEM_PATH}/simulator-llm-pilot.gemspec" ]]; then + source_path="${DEFAULT_LOCAL_GEM_PATH}" +fi + +if [[ -n "$source_path" ]]; then + echo "Using local simulator-llm-pilot source at ${source_path}" + if [[ -d "${source_path}/.git" ]]; then + source_revision="$(git -C "${source_path}" rev-parse HEAD)" + git -C "${source_path}" archive HEAD | tar -x -C "$build_dir" + else + source_revision="local-filesystem" + tar -cf - -C "${source_path}" . | tar -xf - -C "$build_dir" + fi +else + echo "Cloning simulator-llm-pilot from ${SIMULATOR_LLM_PILOT_REPO_URL}" + git clone --depth 1 "${SIMULATOR_LLM_PILOT_REPO_URL}" "$build_dir" + source_revision="$(git -C "$build_dir" rev-parse HEAD)" +fi + +pushd "$build_dir" >/dev/null +gem build simulator-llm-pilot.gemspec >/dev/null +shopt -s nullglob +gem_files=(simulator-llm-pilot-*.gem) +shopt -u nullglob + +if [[ ${#gem_files[@]} -ne 1 ]]; then + echo "Error: expected exactly one built simulator-llm-pilot gem, found ${#gem_files[@]}" >&2 + exit 1 +fi + +gem install --no-document --force "./${gem_files[0]}" +popd >/dev/null + +echo "Installed simulator-llm-pilot from ${source_revision}" diff --git a/Tests/AgentTests/app-instructions.md b/Tests/AgentTests/app-instructions.md new file mode 100644 index 000000000000..fc9bc168c3b3 --- /dev/null +++ b/Tests/AgentTests/app-instructions.md @@ -0,0 +1,13 @@ +## Login + +This app uses a self-hosted WordPress site login flow. The app password is +passed via launch arguments — NEVER type a password manually. + +- NEVER tap "Continue with WordPress.com", NEVER enter WordPress.com + email/password, and NEVER request a login link. +- Tap "Enter your existing site address", then enter the site host first + (without scheme, for example `example.com`). If the app rejects the + host-only form, try the full site URL once. +- If you reach any WordPress.com email/password screen, back out and + return to the self-hosted flow. +- If the app is already logged in (e.g., My Site tab visible), skip login.