Fetch from products.json endpoints #19
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Fetch from products.json endpoints | |
| on: | |
| # Run weekly on Wednesday at 15:00 UTC (7 AM PST / 8 AM PDT) | |
| schedule: | |
| - cron: '0 15 * * 3' | |
| # Allow manual trigger | |
| workflow_dispatch: | |
| permissions: | |
| contents: write # Required for committing changes | |
| issues: write # Required for creating issues on failure | |
| concurrency: | |
| group: data-branch-writer | |
| cancel-in-progress: false | |
| jobs: | |
| extract-and-process: | |
| runs-on: ubuntu-latest | |
| env: | |
| PAT_TOKEN: ${{ secrets.PAT_TOKEN }} | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@v4 | |
| with: | |
| token: ${{ env.PAT_TOKEN != '' && env.PAT_TOKEN || github.token }} | |
| - name: Setup Node.js | |
| uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| cache: 'npm' | |
| - name: Install dependencies | |
| run: npm ci | |
| - name: Create .env file | |
| run: | | |
| echo "CONVEX_URL=${{ secrets.CONVEX_URL }}" > .env | |
| - name: Run pipeline | |
| id: pipeline | |
| continue-on-error: true | |
| run: | | |
| set +e # Don't exit on error | |
| npm run pipeline | |
| EXIT_CODE=$? | |
| echo "π Pipeline exit code: $EXIT_CODE" | |
| echo "exit_code=$EXIT_CODE" >> $GITHUB_OUTPUT | |
| exit $EXIT_CODE | |
| - name: Update latest symlinks | |
| if: steps.pipeline.outputs.exit_code == '0' | |
| run: npm run update-symlinks | |
| - name: Get current date | |
| id: date | |
| run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_OUTPUT | |
| - name: Commit and push results | |
| if: steps.pipeline.outputs.exit_code == '0' | |
| run: | | |
| DATA_BRANCH="data" | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Debug: Show what files exist | |
| echo "π Checking data directory contents:" | |
| ls -la data/ || echo "data/ directory doesn't exist" | |
| # Prepare a separate worktree for the data branch so we don't push artifacts to main. | |
| git fetch origin "${DATA_BRANCH}" || true | |
| rm -rf ../data-branch | |
| if git show-ref --verify --quiet "refs/remotes/origin/${DATA_BRANCH}"; then | |
| git worktree add ../data-branch "origin/${DATA_BRANCH}" | |
| else | |
| git worktree add ../data-branch --detach | |
| cd ../data-branch | |
| git checkout --orphan "${DATA_BRANCH}" | |
| git rm -rf . || true | |
| cd - | |
| fi | |
| rsync -a --delete --exclude '.git' data/ ../data-branch/data/ | |
| cd ../data-branch | |
| cat > README.md << 'EOF' | |
| This branch contains generated YogaMatLabData pipeline outputs under `data/`. | |
| It is updated automatically by GitHub Actions. | |
| EOF | |
| git add -A README.md | |
| # Use --force to override .gitignore (data/ is ignored locally but not in CI) | |
| # Exclude volatile per-run state files that cause merge conflicts when multiple runs happen. | |
| git add --force data/ ':(exclude)data/.hash-registry.json' ':(exclude)data/changes/latest-changeset.json' | |
| # Debug: Show what's staged | |
| echo "π¦ Staged changes:" | |
| git status | |
| # Check if there are changes to commit | |
| if git diff --staged --quiet; then | |
| echo "β οΈ No changes to commit" | |
| else | |
| echo "β Changes detected, preparing commit..." | |
| # Read changeset summary for commit message | |
| CHANGESET_FILE="data/changes/latest-changeset.json" | |
| if [ -f "$CHANGESET_FILE" ]; then | |
| NEW_PRODUCTS=$(jq -r '.summary.newProducts' "$CHANGESET_FILE") | |
| REMOVED_PRODUCTS=$(jq -r '.summary.removedProducts' "$CHANGESET_FILE") | |
| PRICE_CHANGES=$(jq -r '.summary.priceChanges' "$CHANGESET_FILE") | |
| TOTAL=$(jq -r '.summary.totalChanges' "$CHANGESET_FILE") | |
| cat > /tmp/commit_msg.txt << 'EOF' | |
| Data update: ${{ steps.date.outputs.date }} | |
| Changes detected: | |
| - New products: ${NEW_PRODUCTS} | |
| - Removed products: ${REMOVED_PRODUCTS} | |
| - Price changes: ${PRICE_CHANGES} | |
| - Total changes: ${TOTAL} | |
| Generated with YogaMatLab Data Pipeline | |
| Run: ${{ github.run_number }} | |
| EOF | |
| # Substitute variables | |
| sed -i "s/\${NEW_PRODUCTS}/$NEW_PRODUCTS/g" /tmp/commit_msg.txt | |
| sed -i "s/\${REMOVED_PRODUCTS}/$REMOVED_PRODUCTS/g" /tmp/commit_msg.txt | |
| sed -i "s/\${PRICE_CHANGES}/$PRICE_CHANGES/g" /tmp/commit_msg.txt | |
| sed -i "s/\${TOTAL}/$TOTAL/g" /tmp/commit_msg.txt | |
| git commit -F /tmp/commit_msg.txt | |
| else | |
| git commit -m "Data update: ${{ steps.date.outputs.date }}" \ | |
| -m "Generated with YogaMatLab Data Pipeline" \ | |
| -m "Run: ${{ github.run_number }}" | |
| fi | |
| git push origin HEAD:"${DATA_BRANCH}" | |
| fi | |
| - name: Upload extraction logs | |
| if: always() | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: extraction-logs-${{ steps.date.outputs.date }} | |
| path: logs/ | |
| retention-days: 30 | |
| - name: Create issue on failure | |
| if: steps.pipeline.outputs.exit_code != '0' | |
| uses: actions/github-script@v7 | |
| with: | |
| script: | | |
| const date = '${{ steps.date.outputs.date }}'; | |
| const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`; | |
| const runNumber = '${{ github.run_number }}'; | |
| const body = [ | |
| `The daily extraction pipeline failed on ${date}.`, | |
| '', | |
| `**Run details:** ${runUrl}`, | |
| '', | |
| `**Date:** ${date}`, | |
| `**Run number:** ${runNumber}`, | |
| '', | |
| 'Please check the logs for details.', | |
| '', | |
| 'This issue was automatically created by GitHub Actions.' | |
| ].join('\n'); | |
| github.rest.issues.create({ | |
| owner: context.repo.owner, | |
| repo: context.repo.repo, | |
| title: `Pipeline failed: ${date}`, | |
| body: body, | |
| labels: ['pipeline-failure', 'automated'] | |
| }); | |
| - name: Post summary | |
| if: always() | |
| run: | | |
| echo "## Pipeline Execution Summary" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "**Date:** ${{ steps.date.outputs.date }}" >> $GITHUB_STEP_SUMMARY | |
| echo "**Status:** ${{ steps.pipeline.outputs.exit_code == '0' && 'β Success' || 'β Failed' }}" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| if [ -f "data/aggregated/${{ steps.date.outputs.date }}/stats.json" ]; then | |
| echo "### Statistics" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Total Products:** $(jq -r '.totalProducts' data/aggregated/${{ steps.date.outputs.date }}/stats.json)" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Total Brands:** $(jq -r '.totalBrands' data/aggregated/${{ steps.date.outputs.date }}/stats.json)" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Price Range:** \$$(jq -r '.priceStats.min' data/aggregated/${{ steps.date.outputs.date }}/stats.json) - \$$(jq -r '.priceStats.max' data/aggregated/${{ steps.date.outputs.date }}/stats.json)" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| if [ -f "data/changes/latest-changeset.json" ]; then | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Changes" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "- **New Products:** $(jq -r '.summary.newProducts' data/changes/latest-changeset.json)" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Removed Products:** $(jq -r '.summary.removedProducts' data/changes/latest-changeset.json)" >> $GITHUB_STEP_SUMMARY | |
| echo "- **Price Changes:** $(jq -r '.summary.priceChanges' data/changes/latest-changeset.json)" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| # Show per-brand breakdown | |
| if [ -f "data/raw/${{ steps.date.outputs.date }}/_summary.json" ]; then | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| echo "### Brand Breakdown" >> $GITHUB_STEP_SUMMARY | |
| echo "" >> $GITHUB_STEP_SUMMARY | |
| # Get brands list and iterate | |
| jq -r '.results[] | "\(.brandName)|\(.success)|\(.productCount)|\(.error // "")"' "data/raw/${{ steps.date.outputs.date }}/_summary.json" | while IFS='|' read -r brand success count error; do | |
| if [ "$success" = "true" ]; then | |
| echo "- β **$brand**: $count products" >> $GITHUB_STEP_SUMMARY | |
| else | |
| if [ -n "$error" ]; then | |
| echo "- β **$brand**: 0 products (${error})" >> $GITHUB_STEP_SUMMARY | |
| else | |
| echo "- β **$brand**: 0 products (failed)" >> $GITHUB_STEP_SUMMARY | |
| fi | |
| fi | |
| done | |
| fi |