nextstrain · tsibley · Oct 29, 2025 · Nov 3, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/.github/workflows/index-resources.yml b/.github/workflows/index-resources.yml
@@ -85,7 +85,7 @@ jobs:
           node resourceIndexer/main.js \
             --gzip --output resources.json.gz \
             --resourceTypes dataset intermediate \
-            --collections core staging
+            --collections core staging nextclade
       - name: Upload the new index, overwriting the existing index
         if: ${{ startsWith(env.RESOURCE_INDEX, 's3://') }}
         run: |

diff --git a/data/manifest_core.json b/data/manifest_core.json
@@ -582,12 +582,6 @@
           "default": "open"
         }
       },
-      "nextclade": {
-        "dataset": {
-          "sars-cov-2": "",
-          "default": "sars-cov-2"
-        }
-      },
       "nipah": {
         "resolution": {
           "all": "",

diff --git a/env/production/config.json b/env/production/config.json
@@ -110,6 +110,6 @@
   "OIDC_GROUPS_CLAIM": "cognito:groups",
   "SESSION_COOKIE_DOMAIN": "nextstrain.org",
   "GROUPS_DATA_FILE": "groups.json",
-  "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz",
+  "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz",
   "PLAUSIBLE_ANALYTICS_DOMAIN": "nextstrain.org"
 }
diff --git a/env/testing/config.json b/env/testing/config.json
@@ -108,5 +108,5 @@
   "OIDC_USERNAME_CLAIM": "cognito:username",
   "OIDC_GROUPS_CLAIM": "cognito:groups",
   "GROUPS_DATA_FILE": "groups.json",
-  "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz"
+  "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz"
 }
diff --git a/resourceIndexer/inventory.js b/resourceIndexer/inventory.js
@@ -86,7 +86,7 @@ const fetchInventoryRemote = async ({bucket, prefix, name, save}) => {
 /**
  * Parse an on-disk inventory. This expects the following files to be present:
  *    - `./devData/${name}.manifest.json`
- *    - `./devData/${name}.inventory.csv.gz`
+ *    - `./devData/${name}-*.csv.gz`
  * 
  * Returns an object with properties:
  * - inventory: object[]      list of entries in the inventory, using the schema to define keys

diff --git a/resourceIndexer/main.js b/resourceIndexer/main.js
@@ -2,6 +2,7 @@
 import { ArgumentParser } from 'argparse';
 import fs from 'fs';
 import { coreS3Data, stagingS3Data } from "./coreStagingS3.js";
+import { NextcladeData } from "./nextclade.js";
 import zlib from 'zlib';
 import { promisify } from 'util';
 import { ResourceIndexerError } from './errors.js';
@@ -19,8 +20,8 @@ const gzip = promisify(zlib.gzip)
  * (sub-)class and resourcePath to parallel the information in the Resource
  * (sub-)class.
  *
- * Currently only sources {core, staging} and resource types {dataset,
- * intermediate} are part of the index.
+ * Currently only sources {core, staging, nextclade} and resource types
+ * {dataset, intermediate} are part of the index.
  *
  * As an example, the core WNV/NA (nextstrain.org/WNV/NA) dataset is indexed
  * like so:
@@ -34,6 +35,7 @@ const gzip = promisify(zlib.gzip)
 const COLLECTIONS = [
   coreS3Data,
   stagingS3Data,
+  new NextcladeData(),
 ];
 
 function parseArgs() {
@@ -47,13 +49,13 @@ function parseArgs() {
     `,
   });
   argparser.addArgument("--local", {action: 'storeTrue',
-    help: 'Access a local copy of S3 inventories within ./devData/. See docstring of fetchInventoryLocal() for expected filenames.'})
+    help: 'Access a local copy of S3 inventories ({core,staging}.manifest.json and {core,staging}-*.csv.gz) and Nextclade indexes (nextclade/index.json and nextclade/**/pathogen.json) within ./devData/ instead of downloading them'})
   argparser.addArgument("--collections", {metavar: "<name>", type: "string", nargs: '+', choices: COLLECTIONS.map((c) => c.name),
     help: "Only fetch data from a subset of collections. Source names are those defined in COLLECTIONS"});
   argparser.addArgument("--resourceTypes", {metavar: "<name>", type: "string", nargs: '+', choices: ['dataset', 'intermediate'],
     help: "Only index data matching specified resource types"});
   argparser.addArgument("--save-inventories", {action: 'storeTrue',
-    help: "Save the fetched inventories + manifest files to ./devData so that future invocations can use --local"});
+    help: "Save a local copy of S3 inventories and Nextclade indexes to ./devData/ so that future invocations can use --local"});
   argparser.addArgument("--output", {metavar: "<json>", required: true})
   argparser.addArgument("--indent", {action: 'storeTrue', help: 'Indent the output JSON'})
   argparser.addArgument("--gzip", {action: 'storeTrue', help: 'GZip the output JSON'})
@@ -118,4 +120,4 @@ async function main(args) {
     output = await gzip(output)
   }
   fs.writeFileSync(args.output, output);
-}
+}
diff --git a/resourceIndexer/nextclade.js b/resourceIndexer/nextclade.js
@@ -0,0 +1,150 @@
+/**
+ * Index Nextclade dataset reference trees, including past versions.
+ *
+ * Transforms Nextclade's own index for use with our resourceIndexer/… and
+ * src/resourceIndex.js framework.
+ */
+import { strict as assert } from "assert";
+import { DateTime } from "luxon";
+import { readFile, writeFile, mkdir } from "node:fs/promises";
+import path from "node:path";
+
+import { fetch } from "../src/fetch.js";
+import { NextcladeSource } from "../src/sources/nextclade.js";
+import { rootDirFullPath } from "../src/utils/index.js";
+
+
+const LOCAL_DATA = path.relative(".", path.join(rootDirFullPath, "devData", "nextclade"));
+const LOCAL_INDEX = path.join(LOCAL_DATA, "index.json");
+
+
+/* All class members are part of the "collection" interface expected by
+ * resourceIndexer/main.js and use its terminology for arguments and return
+ * values.  This interface is kind of a weird fit for things that aren't S3
+ * inventories, so the chain of methods and way they pass values are a bit
+ * contrived.
+ */
+export class NextcladeData {
+  #source;
+
+  name = "nextclade";
+
+  async collect({local, save}) {
+    if (local) {
+      console.log(`Reading ${LOCAL_INDEX}`);
+      this.#source = new NextcladeSource(JSON.parse(await readFile(LOCAL_INDEX)));
+    }
+    else {
+      this.#source = new NextcladeSource();
+
+      if (save) {
+        console.log(`Saving ${LOCAL_INDEX}`);
+        await mkdir(path.dirname(LOCAL_INDEX), {recursive: true});
+        await writeFile(LOCAL_INDEX, JSON.stringify(await this.#source._index(), null, 2));
+      }
+    }
+
+    const datasetPaths = await this.#source.availableDatasets();
+
+    return (await Promise.all(
+      datasetPaths.map(async (datasetPath) => {
+        const dataset = this.#source.dataset(datasetPath.split("/"));
+        const indexDataset = await dataset._indexDataset();
+
+        /* Sort and collapse versions per our documented behaviour:
+         *
+         * > All times are UTC.  A datestamp refers to datasets uploaded
+         * > between 00h00 and 23h59 UTC on that day.
+         *
+         * > If multiple datasets are uploaded on the same day we take the most
+         * > recent.
+         *
+         * See <https://docs.nextstrain.org/page/guides/snapshots.html#details-for-dataset-maintainers>.
+         */
+        const datesSeen = new Set();
+        const indexVersions =
+          indexDataset.versions
+            .map(v => ({...v, _timestamp: DateTime.fromISO(v.updatedAt, {zone:"UTC"})}))
+            .toSorted((a, b) => b._timestamp - a._timestamp)
+            .map(v => ({...v, _date: v._timestamp.toISODate()}))
+            .filter(v => !datesSeen.has(v._date) && datesSeen.add(v._date))
+
+        // Produce one resourceIndexer/main.js "item" per dataset version
+        return (await Promise.all(
+          indexVersions.map(async (indexVersion) => {
+            const versionMetaPath = `${indexDataset.path}/${indexVersion.tag}/pathogen.json`;
+
+            const localFile = path.join(LOCAL_DATA, versionMetaPath);
+
+            let versionMeta;
+
+            if (local) {
+              console.log(`Reading ${localFile}`);
+              versionMeta = JSON.parse(await readFile(localFile));
+            }
+            else {
+              const remoteUrl = await this.#source.urlFor(versionMetaPath);
+
+              console.log(`Fetching ${remoteUrl}`);
+              const response = await fetch(remoteUrl, {cache: "no-cache"});
+              assert(response.status === 200);
+
+              versionMeta = await response.json();
+
+              if (save) {
+                console.log(`Saving ${localFile}`);
+                await mkdir(path.dirname(localFile), {recursive: true});
+                await writeFile(localFile, JSON.stringify(versionMeta, null, 2));
+              }
+            }
+
+            /* This filter must be *after* we fetch the version's own
+             * pathogen.json.  Because versions are filtered to one-per-day
+             * *before* we fetch, it's possible there's an older version from
+             * the same day that *does* include a treeJson, and we'd miss it.
+             * The fix would be fetching *all* versions and only then filtering
+             * to one-per-day (i.e. in createResource() below).
+             *
+             * Doing so, however, seems unnecessary.  The scenario seems
+             * unlikely and it's not entirely clear how we'd want to interpret
+             * such a dataset update anyway (e.g. was the earlier version on
+             * the same day in error?).
+             *
+             * Also note that this filters out some datasets entirely: those
+             * that don't have a reference tree at all.
+             *   -trs, 27 Oct 2025
+             */
+            if (!versionMeta.files.treeJson)
+              return;
+
+            // One "item" produced by collect()
+            return {
+              // Used by resourceIndexer/main.js
+              source: this.#source.name,
+              resourceType: "dataset",
+              resourcePath: datasetPath,
+
+              // Used in createResource() below
+              version: {
+                date: indexVersion._date,
+                fileUrls: {
+                  main: await this.#source.urlFor(`${indexDataset.path}/${indexVersion.tag}/${versionMeta.files.treeJson}`)
+                }
+              },
+            };
+          })
+        )).flat();
+      })
+    )).flat();
+  }
+
+  categorise(item) {
+    return item;
+  }
+
+  createResource(resourceType, resourcePath, items) {
+    return {
+      versions: items.map(i => i.version),
+    };
+  }
+}
diff --git a/src/app.js b/src/app.js
@@ -14,6 +14,7 @@ const {
   errors,
   fetch,
   groups,
+  nextclade,
   openid,
   pathogenRepos,
   schemas,
@@ -68,7 +69,6 @@ charon.setup(app);
  *   /monkeypox
  *   /mpox
  *   /ncov
- *   /nextclade
  *   /rsv
  *   /rubella
  *   /seasonal-flu
@@ -92,6 +92,15 @@ core.setup(app);
 staging.setup(app);
 
 
+/* Nextclade reference datasets
+ *
+ * Routes:
+ *   /nextclade
+ *   /nextclade/*
+ */
+nextclade.setup(app);
+
+
 /* Community on GitHub
  *
  * Routes:

diff --git a/src/async.js b/src/async.js
@@ -228,13 +228,25 @@ function addAsync(app) {
     return addAsync(this.route.apply(this, arguments));
   };
 
-  app.useAsync = function() {
-    const fn = arguments[arguments.length - 1];
-    assert.ok(typeof fn === 'function',
-      'Last argument to `useAsync()` must be a function');
-    const args = wrapArgs(arguments);
-    return app.use.apply(app, args);
-  };
+  if (app.use) {
+    app.useAsync = function() {
+      const fn = arguments[arguments.length - 1];
+      assert.ok(typeof fn === 'function',
+        'Last argument to `useAsync()` must be a function');
+      const args = wrapArgs(arguments);
+      return app.use.apply(app, args);
+    };
+  }
+
+  if (app.all) {
+    app.allAsync = function() {
+      const fn = arguments[arguments.length - 1];
+      assert.ok(typeof fn === 'function',
+        'Last argument to `allAsync()` must be a function');
+      const args = wrapArgs(arguments);
+      return app.all.apply(app, args);
+    };
+  }
 
   app.deleteAsync = function() {
     const fn = arguments[arguments.length - 1];

diff --git a/src/endpoints/charon/index.js b/src/endpoints/charon/index.js
@@ -1,5 +1,5 @@
 import { BadRequest, isHttpError } from '../../httpErrors.js';
-import { splitPrefixIntoParts } from '../../utils/prefix.js';
+import { splitPrefixIntoParts, joinPartsIntoPrefix } from '../../utils/prefix.js';
 import { setSource, setDataset, canonicalizeDataset, setNarrative } from '../sources.js';
 import './setAvailableDatasets.js'; // sets globals
 export { getAvailable } from './getAvailable.js';
@@ -25,13 +25,19 @@ const setSourceFromPrefix = setSource(req => {
 
 const setDatasetFromPrefix = setDataset(req => req.context.splitPrefixIntoParts.prefixParts.join("/"));
 
-const canonicalizeDatasetPrefix = canonicalizeDataset((req, resolvedPrefix) => {
-  // A absolute base is required but we won't use it, so use something bogus.
-  const resolvedUrl = new URL(req.originalUrl, "http://x");
-  resolvedUrl.searchParams.set("prefix", resolvedPrefix);
-
-  return resolvedUrl.pathname + resolvedUrl.search;
-});
+/**
+ * Leave the URL path (e.g. /charon/getDataset) unchanged with only the
+ * "prefix" query param updated with the resolved dataset path.
+ */
+const canonicalizeDatasetPrefix = canonicalizeDataset(async (req, path) => ({
+  query: {
+    ...req.query,
+    prefix: await joinPartsIntoPrefix({
+      source: req.context.source,
+      prefixParts: path.split("/")
+    }),
+  }
+}));
 
 const setNarrativeFromPrefix = setNarrative(req => {
   const {prefixParts} = req.context.splitPrefixIntoParts;