From 1b0d849be5c1f3a3d5e69be4da6fb45041d7ffc2 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 29 Oct 2025 16:15:58 -0700 Subject: [PATCH 01/18] sources: Clean up versionInfo() methods Removes the mixed use of a versionDescriptor function param and instance property in favor of just the instance property. Moves Resource.versionDescriptor()'s JSDoc where it's supposed to be: just prior to the method instead of just inside the method. --- src/sources/core.js | 7 +++---- src/sources/models.js | 25 ++++++++++++------------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/src/sources/core.js b/src/sources/core.js index db8882e04..01d7df691 100644 --- a/src/sources/core.js +++ b/src/sources/core.js @@ -138,18 +138,17 @@ class CoreDataset extends Dataset { * * We only want to do this for core datasets, not staging. * - * @param {(string|false)} versionDescriptor from the URL * @throws {BadRequest || NotFound} * @returns {([string, Object]|[null, undefined])} [0]: versionDate [1]: * versionUrls */ - versionInfo(versionDescriptor) { + versionInfo() { if (this.source.name!=='core') { - return super.versionInfo(versionDescriptor); + return super.versionInfo(); } - if (!versionDescriptor) { + if (!this.versionDescriptor) { return [null, undefined]; } diff --git a/src/sources/models.js b/src/sources/models.js index ed6f8d236..24984854e 100644 --- a/src/sources/models.js +++ b/src/sources/models.js @@ -180,18 +180,17 @@ class Resource { get baseName() { return this.baseParts.join("_"); } - versionInfo(versionDescriptor) { - /** - * Interrogates the resource index to find the appropriate version of the - * resource and associated subresource URLs by comparing to - * this.versionDescriptor. - * This method should be overridden by subclasses when they are used to - * handle URLs which extract version descriptors. - * @param {(string|false)} versionDescriptor from the URL string - * @throws {BadRequest} - * @returns {([string, Object]|[null, undefined])} [0]: versionDate [1]: versionUrls - */ - if (versionDescriptor) { + /** + * Interrogates the resource index to find the appropriate version of the + * resource and associated subresource URLs by comparing to + * this.versionDescriptor. + * This method should be overridden by subclasses when they are used to + * handle URLs which extract version descriptors. + * @throws {BadRequest} + * @returns {([string, Object]|[null, undefined])} [0]: versionDate [1]: versionUrls + */ + versionInfo() { + if (this.versionDescriptor) { throw new BadRequest(`This resource cannot handle versioned dataset requests (version descriptor requested: "${this.versionDescriptor}")`) } return [null, undefined]; @@ -241,7 +240,7 @@ class Subresource { * this as needed. */ - const versionUrls = this.resource.versionInfo(this.resource.versionDescriptor)[1]; + const versionUrls = this.resource.versionInfo()[1]; if (versionUrls) { if (!['HEAD', 'GET'].includes(method)) { From 1507cb1162e870ba8e85b58190ae85dfb46754ef Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:03:44 -0800 Subject: [PATCH 02/18] sources/fetch: Override static .Subresource property instead of the .subresource() method This makes .subresources() work as designed (though no behaviour change is expected). This change was missed in "sources: Refactor subresource construction into Resource base class" (ef13cd64), which introduced the static .Subresource property. --- src/sources/fetch.js | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/sources/fetch.js b/src/sources/fetch.js index f948f3276..e0c6416b6 100644 --- a/src/sources/fetch.js +++ b/src/sources/fetch.js @@ -46,6 +46,10 @@ class UrlDefinedSource extends Source { } class UrlDefinedDataset extends Dataset { + static get Subresource() { + return UrlDefinedDatasetSubresource; + } + // eslint-disable-next-line no-unused-vars assertValidPathParts(pathParts) { // Override check for underscores (_), as we want to allow arbitrary @@ -63,9 +67,6 @@ class UrlDefinedDataset extends Dataset { const version = this.versionDescriptor ? `@${this.versionDescriptor}` : "" return this.baseParts.join("/") + version; } - subresource(type) { - return new UrlDefinedDatasetSubresource(this, type); - } async exists() { /* Assume existence. There's little benefit to checking with extra * requests when we don't have a natural fallback page (e.g. the Group page @@ -94,6 +95,10 @@ class UrlDefinedDatasetSubresource extends DatasetSubresource { } class UrlDefinedNarrative extends Narrative { + static get Subresource() { + return UrlDefinedNarrativeSubresource; + } + // eslint-disable-next-line no-unused-vars assertValidPathParts(pathParts) { // Override check for underscores (_), as we want to allow arbitrary @@ -108,9 +113,6 @@ class UrlDefinedNarrative extends Narrative { const version = this.versionDescriptor ? `@${this.versionDescriptor}` : "" return this.baseParts.join("/") + version; } - subresource(type) { - return new UrlDefinedNarrativeSubresource(this, type); - } async exists() { /* Assume existence. There's little benefit to checking with extra * requests when we don't have a natural fallback page (e.g. the Group page From 8ea82146691300cb00d9eb31abfdee118e60ce92 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 15 Oct 2025 12:13:46 -0700 Subject: [PATCH 03/18] Revert "Simplify allowed pathBuilder functions" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit d467229db1fa139cf55cdeb87bdf7c5331f1370d. I'm in this canonicalization code again for a new source and want to address the query param handling centrally (rather than proliferate it further) as mentioned in my original review.¹ Doing so includes going back to a cleaner pathBuilder interface for routes that don't need "req". ¹ --- src/endpoints/sources.js | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/src/endpoints/sources.js b/src/endpoints/sources.js index 6819952ac..f464c745b 100644 --- a/src/endpoints/sources.js +++ b/src/endpoints/sources.js @@ -59,7 +59,9 @@ const canonicalizeDataset = (pathBuilder) => (req, res, next) => { const version = dataset.versionDescriptor ? `@${dataset.versionDescriptor}` : ''; - const canonicalPath = pathBuilder(req, resolvedDataset.pathParts.join("/") + version); + const canonicalPath = pathBuilder.length >= 2 + ? pathBuilder(req, resolvedDataset.pathParts.join("/") + version) + : pathBuilder(resolvedDataset.pathParts.join("/") + version); /* 307 Temporary Redirect preserves request method, unlike 302 Found, which * is important since this middleware function may be used in non-GET routes. @@ -358,13 +360,27 @@ function receiveSubresource(subresourceExtractor) { * @returns {String} Path for {@link Source#dataset} or {@link Source#narrative} */ +/* Confused about the duplication below? It's the documented way to handle + * overloaded (e.g. arity-dependent) function signatures.¹ Note that it relies + * on the "nestled" or "cuddled" end and start comment markers. + * -trs, 16 June 2022 + * + * ¹ https://github.com/jsdoc/jsdoc/issues/1017 + */ /** * @callback pathBuilder - * @param {express.request} req - * @param {String} path - Canonical path (not including query) for the dataset - * within the context of the current {@link Source} - * @returns {String} Fully-specified path (including query) to redirect to - */ + * + * @param {String} path - Canonical path for the dataset within the context of + * the current {@link Source} + * @returns {String} Fully-specified path to redirect to + *//** +* @callback pathBuilder +* +* @param {express.request} req +* @param {String} path - Canonical path for the dataset within the context of +* the current {@link Source} +* @returns {String} Fully-specified path to redirect to +*/ /** * @callback subresourceExtractor From a1b9dd65a8bf8003ae59a94049fc5081ef86c884 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Wed, 15 Oct 2025 14:25:46 -0700 Subject: [PATCH 04/18] endpoints/sources: Preserve query by default in canonicalizeDataset() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preserving by default makes more sense for most of our routes, and for routes where it does not (e.g. /charon/…), adjusting the query via the callback is now simpler and more declarative like the other routes. Motivated by being in this canonicalization code again for a new source and wanting to address the query param handling centrally (rather than proliferate it further) as mentioned in my original review.¹ No external behaviour change; effectively a different approach to "Preserve URL queries across redirects" (1c7838ae). ¹ --- src/endpoints/charon/index.js | 17 ++++++++++------- src/endpoints/sources.js | 32 +++++++++++++++++++++----------- src/routing/core.js | 7 +------ src/routing/staging.js | 7 +------ 4 files changed, 33 insertions(+), 30 deletions(-) diff --git a/src/endpoints/charon/index.js b/src/endpoints/charon/index.js index d3fb38307..1e9744f5f 100644 --- a/src/endpoints/charon/index.js +++ b/src/endpoints/charon/index.js @@ -25,13 +25,16 @@ const setSourceFromPrefix = setSource(req => { const setDatasetFromPrefix = setDataset(req => req.context.splitPrefixIntoParts.prefixParts.join("/")); -const canonicalizeDatasetPrefix = canonicalizeDataset((req, resolvedPrefix) => { - // A absolute base is required but we won't use it, so use something bogus. - const resolvedUrl = new URL(req.originalUrl, "http://x"); - resolvedUrl.searchParams.set("prefix", resolvedPrefix); - - return resolvedUrl.pathname + resolvedUrl.search; -}); +/** + * Leave the URL path (e.g. /charon/getDataset) unchanged with only the + * "prefix" query param updated with the resolved dataset path. + */ +const canonicalizeDatasetPrefix = canonicalizeDataset((req, path) => ({ + query: { + ...req.query, + prefix: path, + } +})); const setNarrativeFromPrefix = setNarrative(req => { const {prefixParts} = req.context.splitPrefixIntoParts; diff --git a/src/endpoints/sources.js b/src/endpoints/sources.js index f464c745b..5bc463f4f 100644 --- a/src/endpoints/sources.js +++ b/src/endpoints/sources.js @@ -1,4 +1,5 @@ import contentDisposition from 'content-disposition'; +import url from 'url'; import { NotFound } from '../httpErrors.js'; @@ -46,12 +47,13 @@ const setDataset = (pathExtractor) => (req, res, next) => { /** * Generate Express middleware that redirects to the canonical path for the * current {@link Dataset} if it is not fully resolved. Any provided version - * descriptor is included in the redirect. + * descriptor is included in the redirect. Original query params are preserved + * across the redirect unless overridden by canonicalBuilder. * - * @param {pathBuilder} pathBuilder - Function to build a fully-specified path + * @param {canonicalBuilder} canonicalBuilder - Function to build a fully-specified path or URL object suitable for {@link url#format} * @returns {expressMiddleware} */ -const canonicalizeDataset = (pathBuilder) => (req, res, next) => { +const canonicalizeDataset = (canonicalBuilder) => (req, res, next) => { const dataset = req.context.dataset; const resolvedDataset = dataset.resolve(); @@ -59,14 +61,22 @@ const canonicalizeDataset = (pathBuilder) => (req, res, next) => { const version = dataset.versionDescriptor ? `@${dataset.versionDescriptor}` : ''; - const canonicalPath = pathBuilder.length >= 2 - ? pathBuilder(req, resolvedDataset.pathParts.join("/") + version) - : pathBuilder(resolvedDataset.pathParts.join("/") + version); + let canonical = canonicalBuilder.length >= 2 + ? canonicalBuilder(req, resolvedDataset.pathParts.join("/") + version) + : canonicalBuilder(resolvedDataset.pathParts.join("/") + version); + + // Convert plain path string to an object for url.format() + if (typeof canonical === "string") + canonical = {pathname: canonical} + + // Default to current path and current query + canonical.pathname ??= req.baseUrl + req.path; // baseUrl is really "basePath" + canonical.query ??= req.query; /* 307 Temporary Redirect preserves request method, unlike 302 Found, which * is important since this middleware function may be used in non-GET routes. */ - return res.redirect(307, canonicalPath); + return res.redirect(307, url.format(canonical)); }; @@ -368,18 +378,18 @@ function receiveSubresource(subresourceExtractor) { * ¹ https://github.com/jsdoc/jsdoc/issues/1017 */ /** - * @callback pathBuilder + * @callback canonicalBuilder * * @param {String} path - Canonical path for the dataset within the context of * the current {@link Source} - * @returns {String} Fully-specified path to redirect to + * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} *//** -* @callback pathBuilder +* @callback canonicalBuilder * * @param {express.request} req * @param {String} path - Canonical path for the dataset within the context of * the current {@link Source} -* @returns {String} Fully-specified path to redirect to +* @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} */ /** diff --git a/src/routing/core.js b/src/routing/core.js index 44763ccdf..b512591d1 100644 --- a/src/routing/core.js +++ b/src/routing/core.js @@ -1,5 +1,3 @@ -import url from 'url'; - import * as endpoints from '../endpoints/index.js'; import * as sources from '../sources/index.js'; @@ -69,10 +67,7 @@ export function setup(app) { app.use([coreBuildRoutes, "/narratives/*"], setSource(req => new CoreSource())); // eslint-disable-line no-unused-vars app.routeAsync(coreBuildRoutes) - .all( - setDataset(req => req.path), - canonicalizeDataset((req, path) => url.format({pathname: `/${path}`, query: req.query})) - ) + .all(setDataset(req => req.path), canonicalizeDataset(path => `/${path}`)) .getAsync(getDataset) .putAsync(putDataset) .deleteAsync(deleteDataset) diff --git a/src/routing/staging.js b/src/routing/staging.js index 43ad444d9..931c35cb6 100644 --- a/src/routing/staging.js +++ b/src/routing/staging.js @@ -1,5 +1,3 @@ -import url from 'url'; - import * as endpoints from '../endpoints/index.js'; import * as sources from '../sources/index.js'; @@ -42,10 +40,7 @@ export function setup(app) { ; app.routeAsync("/staging/*") - .all( - setDataset(req => req.params[0]), - canonicalizeDataset((req, path) => url.format({pathname: `/staging/${path}`, query: req.query})) - ) + .all(setDataset(req => req.params[0]), canonicalizeDataset(path => `/staging/${path}`)) .getAsync(getDataset) .putAsync(putDataset) .deleteAsync(deleteDataset) From bb97f45295730e6e3bb4fc3c533290fd4796eabd Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 30 Oct 2025 12:36:42 -0700 Subject: [PATCH 05/18] async: Don't define .useAsync() if .use() isn't defined The same async wrapper is applied to the Express application instance, Router instances, and Route instances (i.e. from .route()). The latter don't have .use(). Breaks much earlier at runtime (e.g. when setting up routes), crashing the server, rather than allowing the server to start and erroring on requests to the affected route. --- src/async.js | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/async.js b/src/async.js index 9862eaee5..df773f2bf 100644 --- a/src/async.js +++ b/src/async.js @@ -228,13 +228,15 @@ function addAsync(app) { return addAsync(this.route.apply(this, arguments)); }; - app.useAsync = function() { - const fn = arguments[arguments.length - 1]; - assert.ok(typeof fn === 'function', - 'Last argument to `useAsync()` must be a function'); - const args = wrapArgs(arguments); - return app.use.apply(app, args); - }; + if (app.use) { + app.useAsync = function() { + const fn = arguments[arguments.length - 1]; + assert.ok(typeof fn === 'function', + 'Last argument to `useAsync()` must be a function'); + const args = wrapArgs(arguments); + return app.use.apply(app, args); + }; + } app.deleteAsync = function() { const fn = arguments[arguments.length - 1]; From 711b172e679cba57e42d2bb3204de5058094dcfc Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 30 Oct 2025 12:42:23 -0700 Subject: [PATCH 06/18] async: Add .allSync() when .all() exists, e.g. on app.routeAsync() We use .all() to avoid repeating middleware for each HTTP method on a route. I need to start using async middleware in those places. --- src/async.js | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/async.js b/src/async.js index df773f2bf..13272be41 100644 --- a/src/async.js +++ b/src/async.js @@ -238,6 +238,16 @@ function addAsync(app) { }; } + if (app.all) { + app.allAsync = function() { + const fn = arguments[arguments.length - 1]; + assert.ok(typeof fn === 'function', + 'Last argument to `allAsync()` must be a function'); + const args = wrapArgs(arguments); + return app.all.apply(app, args); + }; + } + app.deleteAsync = function() { const fn = arguments[arguments.length - 1]; assert.ok(typeof fn === 'function', From 7cd951bdd84716f79453fb09f7649c5ddef091c6 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 30 Oct 2025 12:47:25 -0700 Subject: [PATCH 07/18] endpoints/sources: Support async callbacks in canonicalizeDataset() Useful to be flexible here so the callback can be written less awkwardly, and necessary to fix a bug (coming soon). --- src/endpoints/sources.js | 26 +++++++++++++++++++++----- src/routing/core.js | 2 +- src/routing/staging.js | 2 +- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/endpoints/sources.js b/src/endpoints/sources.js index 5bc463f4f..b45fe55e7 100644 --- a/src/endpoints/sources.js +++ b/src/endpoints/sources.js @@ -50,10 +50,10 @@ const setDataset = (pathExtractor) => (req, res, next) => { * descriptor is included in the redirect. Original query params are preserved * across the redirect unless overridden by canonicalBuilder. * - * @param {canonicalBuilder} canonicalBuilder - Function to build a fully-specified path or URL object suitable for {@link url#format} - * @returns {expressMiddleware} + * @param {canonicalBuilder|canonicalBuilderAsync} canonicalBuilder - Function to build a fully-specified path or URL object suitable for {@link url#format} + * @returns {expressMiddlewareAsync} */ -const canonicalizeDataset = (canonicalBuilder) => (req, res, next) => { +const canonicalizeDataset = (canonicalBuilder) => async (req, res, next) => { const dataset = req.context.dataset; const resolvedDataset = dataset.resolve(); @@ -62,8 +62,8 @@ const canonicalizeDataset = (canonicalBuilder) => (req, res, next) => { const version = dataset.versionDescriptor ? `@${dataset.versionDescriptor}` : ''; let canonical = canonicalBuilder.length >= 2 - ? canonicalBuilder(req, resolvedDataset.pathParts.join("/") + version) - : canonicalBuilder(resolvedDataset.pathParts.join("/") + version); + ? await canonicalBuilder(req, resolvedDataset.pathParts.join("/") + version) + : await canonicalBuilder(resolvedDataset.pathParts.join("/") + version); // Convert plain path string to an object for url.format() if (typeof canonical === "string") @@ -391,6 +391,22 @@ function receiveSubresource(subresourceExtractor) { * the current {@link Source} * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} */ +/** + * @callback canonicalBuilderAsync + * + * @async + * @param {String} path - Canonical path for the dataset within the context of + * the current {@link Source} + * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} + *//** +* @callback canonicalBuilderAsync +* +* @async +* @param {express.request} req +* @param {String} path - Canonical path for the dataset within the context of +* the current {@link Source} + * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} +*/ /** * @callback subresourceExtractor diff --git a/src/routing/core.js b/src/routing/core.js index b512591d1..74641919e 100644 --- a/src/routing/core.js +++ b/src/routing/core.js @@ -67,7 +67,7 @@ export function setup(app) { app.use([coreBuildRoutes, "/narratives/*"], setSource(req => new CoreSource())); // eslint-disable-line no-unused-vars app.routeAsync(coreBuildRoutes) - .all(setDataset(req => req.path), canonicalizeDataset(path => `/${path}`)) + .allAsync(setDataset(req => req.path), canonicalizeDataset(path => `/${path}`)) .getAsync(getDataset) .putAsync(putDataset) .deleteAsync(deleteDataset) diff --git a/src/routing/staging.js b/src/routing/staging.js index 931c35cb6..499ed1a72 100644 --- a/src/routing/staging.js +++ b/src/routing/staging.js @@ -40,7 +40,7 @@ export function setup(app) { ; app.routeAsync("/staging/*") - .all(setDataset(req => req.params[0]), canonicalizeDataset(path => `/staging/${path}`)) + .allAsync(setDataset(req => req.params[0]), canonicalizeDataset(path => `/staging/${path}`)) .getAsync(getDataset) .putAsync(putDataset) .deleteAsync(deleteDataset) From 5ac2e88a501e2d2686fe5f31727dd9ae7f40f1b3 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Thu, 30 Oct 2025 12:51:53 -0700 Subject: [PATCH 08/18] endpoints/charon: Fix dataset canonicalization to account for the current Source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes a subtle bug where changing the dataset *in Auspice* to a non-canonical path caused a redirection that switched sources, e.g. staging → core. For example, if you were on /staging/measles/genome and switched measles to enterovirus, Auspice made a /charon/getDataset request for /staging/enterovirus which *should* have canonicalized to /staging/enterovirus/d68/genome but *was* canonicalized to /enterovirus/d68/genome. This was demonstrable by: $ curl -IL https://nextstrain.org/charon/getDataset?prefix=/staging/enterovirus -so /dev/null -w '%{url_effective}\n' https://nextstrain.org/charon/getDataset?prefix=enterovirus%2Fd68%2Fgenome Note that the "prefix" query param was missing "staging/". --- src/endpoints/charon/index.js | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/endpoints/charon/index.js b/src/endpoints/charon/index.js index 1e9744f5f..23f10fba0 100644 --- a/src/endpoints/charon/index.js +++ b/src/endpoints/charon/index.js @@ -1,5 +1,5 @@ import { BadRequest, isHttpError } from '../../httpErrors.js'; -import { splitPrefixIntoParts } from '../../utils/prefix.js'; +import { splitPrefixIntoParts, joinPartsIntoPrefix } from '../../utils/prefix.js'; import { setSource, setDataset, canonicalizeDataset, setNarrative } from '../sources.js'; import './setAvailableDatasets.js'; // sets globals export { getAvailable } from './getAvailable.js'; @@ -29,10 +29,13 @@ const setDatasetFromPrefix = setDataset(req => req.context.splitPrefixIntoParts. * Leave the URL path (e.g. /charon/getDataset) unchanged with only the * "prefix" query param updated with the resolved dataset path. */ -const canonicalizeDatasetPrefix = canonicalizeDataset((req, path) => ({ +const canonicalizeDatasetPrefix = canonicalizeDataset(async (req, path) => ({ query: { ...req.query, - prefix: path, + prefix: await joinPartsIntoPrefix({ + source: req.context.source, + prefixParts: path.split("/") + }), } })); From 929840cbbc37d826cae6a01b9d81f62cd4af8973 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 09:57:42 -0800 Subject: [PATCH 09/18] endpoints/sources: Convert Dataset.resolve() to an async method So we can more easily do external lookups for canonicalization. --- src/endpoints/sources.js | 2 +- src/sources/core.js | 4 ++-- src/sources/models.js | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/endpoints/sources.js b/src/endpoints/sources.js index b45fe55e7..cbd0c13af 100644 --- a/src/endpoints/sources.js +++ b/src/endpoints/sources.js @@ -55,7 +55,7 @@ const setDataset = (pathExtractor) => (req, res, next) => { */ const canonicalizeDataset = (canonicalBuilder) => async (req, res, next) => { const dataset = req.context.dataset; - const resolvedDataset = dataset.resolve(); + const resolvedDataset = await dataset.resolve(); if (dataset === resolvedDataset) return next(); diff --git a/src/sources/core.js b/src/sources/core.js index 01d7df691..8ac9517fd 100644 --- a/src/sources/core.js +++ b/src/sources/core.js @@ -95,7 +95,7 @@ class CoreStagingSource extends CoreSource { class CoreDataset extends Dataset { /* NOTE: This class is also used for staging datasets */ - resolve() { + async resolve() { /* XXX TODO: Reimplement this in terms of methods on the source, not by * breaking encapsulation by using a process-wide global. * -trs, 26 Oct 2021 (based on a similar comment 5 Sept 2019) @@ -123,7 +123,7 @@ class CoreDataset extends Dataset { if (nextDefaultPart) { const dataset = new this.constructor(this.source, [...prefixParts, nextDefaultPart], this.versionDescriptor); - return dataset.resolve(); + return await dataset.resolve(); } return this; diff --git a/src/sources/models.js b/src/sources/models.js index 24984854e..325443be8 100644 --- a/src/sources/models.js +++ b/src/sources/models.js @@ -306,7 +306,7 @@ class Dataset extends Resource { * * @returns {Dataset} */ - resolve() { + async resolve() { return this; } From 9c2ab2343ad1dc421baf167b0413c8bbabeb8a2c Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 09:59:52 -0800 Subject: [PATCH 10/18] sources/models: Convert Subresource.baseName property to an async method So we can more easily construct or lookup the value based on external information. I didn't bubble this up to Resource.baseName because it was not necessary for my purposes, but I wouldn't be surprised if we find a need to do that in the future. --- src/sources/fetch.js | 4 ++-- src/sources/models.js | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/sources/fetch.js b/src/sources/fetch.js index e0c6416b6..0389a49ce 100644 --- a/src/sources/fetch.js +++ b/src/sources/fetch.js @@ -80,7 +80,7 @@ class UrlDefinedDataset extends Dataset { } class UrlDefinedDatasetSubresource extends DatasetSubresource { - get baseName() { + async baseName() { const type = this.type; const baseName = this.resource.baseName; @@ -126,7 +126,7 @@ class UrlDefinedNarrative extends Narrative { } class UrlDefinedNarrativeSubresource extends NarrativeSubresource { - get baseName() { + async baseName() { return this.resource.baseName; } } diff --git a/src/sources/models.js b/src/sources/models.js index 325443be8..e866c3337 100644 --- a/src/sources/models.js +++ b/src/sources/models.js @@ -252,9 +252,9 @@ class Subresource { throw new NotFound(`This version of the resource does not have a subresource for ${this.type}`); } - return await this.resource.source.urlFor(this.baseName, method, headers); + return await this.resource.source.urlFor(await this.baseName(), method, headers); } - get baseName() { + async baseName() { throw new Error("baseName() must be implemented by Subresource subclasses"); } get mediaType() { @@ -337,7 +337,7 @@ class DatasetSubresource extends Subresource { "application/octet-stream; q=0.01", ].join(", "); - get baseName() { + async baseName() { return this.type === "main" ? `${this.resource.baseName}.json` : `${this.resource.baseName}_${this.type}.json`; @@ -392,7 +392,7 @@ class NarrativeSubresource extends Subresource { "text/*; q=0.1", ].join(", "); - get baseName() { + async baseName() { return `${this.resource.baseName}.md`; } From ec796985c1e60a09e6781ff969a21c1bc7d3d200 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 11:50:42 -0800 Subject: [PATCH 11/18] resourceIndexer: Correct expected filenames for fetchInventoryLocal() Missed in "Derive inventory path from key in manifest" (4b2257cb). --- resourceIndexer/inventory.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resourceIndexer/inventory.js b/resourceIndexer/inventory.js index 13975ebf6..4b1946460 100644 --- a/resourceIndexer/inventory.js +++ b/resourceIndexer/inventory.js @@ -86,7 +86,7 @@ const fetchInventoryRemote = async ({bucket, prefix, name, save}) => { /** * Parse an on-disk inventory. This expects the following files to be present: * - `./devData/${name}.manifest.json` - * - `./devData/${name}.inventory.csv.gz` + * - `./devData/${name}-*.csv.gz` * * Returns an object with properties: * - inventory: object[] list of entries in the inventory, using the schema to define keys From 957f7b3932b8b3b9b7a3fce98b6c2ff5a15fd744 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:13:22 -0800 Subject: [PATCH 12/18] templateLiterals: Add re for safe-by-construction RegExps I will use this in new code that's forthcoming. --- src/templateLiterals.js | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/templateLiterals.js b/src/templateLiterals.js index d206022c1..c491684f2 100644 --- a/src/templateLiterals.js +++ b/src/templateLiterals.js @@ -1,3 +1,6 @@ +import escapeStringRegexp from 'escape-string-regexp'; + + /** * Safe-by-construction URI strings via [template literals]{@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#tagged_templates}. * @@ -21,3 +24,21 @@ export function uri(literalParts, ...exprParts) { literalParts[0] ); } + + +/** + * Safe-by-construction RegExps from strings via [template literals]{@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#tagged_templates}. + * + * Interpolations in the template literal are automatically escaped so no regex + * metachars are interpretted. + * + * @returns RegExp + */ +export function re(literalParts, ...exprParts) { + /* See comment above in uri() about the data structures involved. + */ + return literalParts.slice(1).reduce( + (re, literalPart, idx) => new RegExp(`${re.source}${escapeStringRegexp(exprParts[idx])}${literalPart}`), + new RegExp(literalParts[0]) + ); +} From d028447488aa89b2120aa66db1f7b0c436bb0f30 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:15:26 -0800 Subject: [PATCH 13/18] static-site[list-resources]: Link to historical datasets by URL path not name This bug had no visible effect on "core" datasets where the URL path (e.g. /a/b/c) and name (a/b/c) are equivalent. For other sources, however, like "staging", the URL path (e.g. /staging/a/b/c) and name (a/b/c) differ. The bug remained latent because historical versions were only ever enabled for the "core" source. I noticed when adding UI for a "nextclade" source that also enabled versions. --- .../list-resources/modal-contents-dataset-history.tsx | 2 +- static-site/components/list-resources/modal_draw.js | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/static-site/components/list-resources/modal-contents-dataset-history.tsx b/static-site/components/list-resources/modal-contents-dataset-history.tsx index adccb04c3..f6ac6d7bb 100644 --- a/static-site/components/list-resources/modal-contents-dataset-history.tsx +++ b/static-site/components/list-resources/modal-contents-dataset-history.tsx @@ -42,7 +42,7 @@ export function DatasetHistory({ diff --git a/static-site/components/list-resources/modal_draw.js b/static-site/components/list-resources/modal_draw.js index 24c7502ee..1681f8595 100644 --- a/static-site/components/list-resources/modal_draw.js +++ b/static-site/components/list-resources/modal_draw.js @@ -180,7 +180,7 @@ export default function modal_draw(ref, resource, lightGrey) { }) // @ts-expect-error no-unused-vars .on("click", function (e, d) { - window.open(`/${resource.name}@${d.data.version}`, "_blank"); // TEST! + window.open(`${resource.url}@${d.data.version}`, "_blank"); // TEST! }); /** @@ -244,7 +244,7 @@ export default function modal_draw(ref, resource, lightGrey) { }) .on("click", function (e) { const { datum } = getVersion(e); - window.open(`/${resource.name}@${datum.data.version}`, "_blank"); + window.open(`${resource.url}@${datum.data.version}`, "_blank"); }); function selectSnapshot(selection, selectedDatum) { From c22e233167ef54d46646d38acd227c17ea78cf25 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:23:45 -0800 Subject: [PATCH 14/18] static-site[list-resources]: Actually use the computed "sortingName" to sort The "sortingName" was computed for every resource by _sortableName(), but then not used in the actual sort operations. Datasets with recency timeframes (e.g. 2y, 6y, 12y, etc) are now sorted as expected in the UI instead of being ASCII-betical. --- static-site/components/list-resources/use-sort-and-filter.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/static-site/components/list-resources/use-sort-and-filter.ts b/static-site/components/list-resources/use-sort-and-filter.ts index 78915db76..0eb2132ed 100644 --- a/static-site/components/list-resources/use-sort-and-filter.ts +++ b/static-site/components/list-resources/use-sort-and-filter.ts @@ -85,7 +85,7 @@ export default function useSortAndFilter( ) { // resources updated on the same day or without a last updated date // sort alphabetically - return _lexicographicSort(a.name, b.name); + return _lexicographicSort(a.sortingName, b.sortingName); } else { return _newestFirstSort(a.lastUpdated, b.lastUpdated); } @@ -100,7 +100,7 @@ export default function useSortAndFilter( groupB.groupName.toLowerCase(), ); const _sortResources = (a: Resource, b: Resource) => - _lexicographicSort(a.name, b.name); + _lexicographicSort(a.sortingName, b.sortingName); const resourceGroups = sortAndFilter(groups, _sortGroups, _sortResources); setState(resourceGroups); } From 22bf7fb16c0be95656853fed6cfa49c5f2c5260f Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:38:30 -0800 Subject: [PATCH 15/18] static-site[list-resources]: Refactor "groupName" (i.e. pathogen name) construction Parameterize it so that callers can lump resources in the UI by names other than the first slash-delimited part of the resource name. I'll be using this shortly. --- src/resourceIndex.js | 7 ++++++ .../list-resources/listResourcesApi.tsx | 25 ++++++++++++++----- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/resourceIndex.js b/src/resourceIndex.js index fb01b65b8..5e2a53f28 100644 --- a/src/resourceIndex.js +++ b/src/resourceIndex.js @@ -216,6 +216,13 @@ class ListResources { if (this.groupHistory) { key = "pathVersions"; valuePairs = _resources + /* XXX TODO: This comparison relies on use of the default + * groupNameBuilder.¹ Currently the only sources that index + * intermediates use the default. + * -trs, 3 Nov 2025 + * + * ¹in static-site/components/list-resources/listResourcesApi.ts + */ .filter(([name,]) => name.split('/')[0]===this.groupHistory) .map(_formatIntermediates); } else { diff --git a/static-site/components/list-resources/listResourcesApi.tsx b/static-site/components/list-resources/listResourcesApi.tsx index 18f354cbd..0521644e5 100644 --- a/static-site/components/list-resources/listResourcesApi.tsx +++ b/static-site/components/list-resources/listResourcesApi.tsx @@ -30,11 +30,18 @@ interface ResourceListingIntermediates { export async function listResourcesAPI( sourceId: string, resourceType: ResourceType, - {versioned, groupDisplayNames, groupUrl, groupUrlTooltip}: { + { + versioned, + groupNameBuilder = (name: string) => name.split("/")[0]!, // eslint-disable-line @typescript-eslint/no-non-null-assertion + groupDisplayNames, + groupUrl, + groupUrlTooltip + }: { /** Report prior versions of each resource. * TODO: infer this from the API data itself */ versioned: boolean, + groupNameBuilder?: (name: string) => string, groupDisplayNames?: Record, groupUrl?: (groupName: string) => string, groupUrlTooltip?: (groupName: string) => string @@ -52,8 +59,8 @@ export async function listResourcesAPI( } const groups = Object.entries( areDatasets(data) ? - groupDatasetsByPathogen(data.pathVersions, urlBuilder, versioned) : - groupIntermediatesByPathogen(data.latestVersions) + groupDatasetsByPathogen(data.pathVersions, urlBuilder, versioned, groupNameBuilder) : + groupIntermediatesByPathogen(data.latestVersions, groupNameBuilder) ).map(([groupName, resources]) => { const group = resourceGroup(groupName, resources); if (groupDisplayNames && groupName in groupDisplayNames) { @@ -113,6 +120,9 @@ function groupDatasetsByPathogen( /** boolean controlling addition of version-specific fields */ versioned: boolean, + + /** constructs the name (e.g. pathogen) under which to group a dataset */ + groupNameBuilder: (name: string) => string, ): Record { return Object.entries(pathVersions).reduce( (store: Record, [name, dates]) => { @@ -122,7 +132,7 @@ function groupDatasetsByPathogen( throw new InternalError(`Name is not properly formatted: '${name}'`); } - const groupName = nameParts[0]; + const groupName = groupNameBuilder(name); const resourceDetails: Resource = { name, @@ -159,7 +169,10 @@ function groupDatasetsByPathogen( } function groupIntermediatesByPathogen( - latestVersions: ResourceListingIntermediates['latestVersions'] + latestVersions: ResourceListingIntermediates['latestVersions'], + + /** constructs the name (e.g. pathogen) under which to group a file */ + groupNameBuilder: (name: string) => string, ): Record { return Object.entries(latestVersions).reduce( (store: Record, [baseName, d]) => { @@ -168,7 +181,7 @@ function groupIntermediatesByPathogen( if (baseParts[0] === undefined) { throw new InternalError(`Resource is not properly formatted (empty name)`); } - const groupName = baseParts[0]; + const groupName = groupNameBuilder(baseName); for (const [filename, urlDatePair] of Object.entries(d)) { if (filename==='mostRecentlyIndexed') continue; const nameParts = [...baseParts, filename] From b900c41e274dfc56b52fe5d5ab5debb7a50490e6 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:51:00 -0800 Subject: [PATCH 16/18] static-site[list-resources]: Enable customization of sorting by "groupName" I'll be using this shortly to statically sort one set of resource groups after another. --- static-site/app/groups/[group]/page.tsx | 1 + static-site/app/groups/available.tsx | 1 + static-site/components/list-resources/listResourcesApi.tsx | 6 ++++++ static-site/components/list-resources/types.ts | 1 + .../components/list-resources/use-sort-and-filter.ts | 4 ++-- 5 files changed, 11 insertions(+), 2 deletions(-) diff --git a/static-site/app/groups/[group]/page.tsx b/static-site/app/groups/[group]/page.tsx index 07d22f7ff..11d1cf8a0 100644 --- a/static-site/app/groups/[group]/page.tsx +++ b/static-site/app/groups/[group]/page.tsx @@ -123,6 +123,7 @@ export default function IndividualGroupPage({ return [{ groupName: group, groupDisplayName: group, + sortingGroupName: group, resources, nResources: resources.length, nVersions: undefined, diff --git a/static-site/app/groups/available.tsx b/static-site/app/groups/available.tsx index 2a3eba459..694c0077e 100644 --- a/static-site/app/groups/available.tsx +++ b/static-site/app/groups/available.tsx @@ -99,6 +99,7 @@ export default function Available(): React.ReactElement { return { groupName, groupDisplayName: groupName, + sortingGroupName: groupName, groupUrl: `/groups/${groupName}`, groupUrlTooltip: `Click to view the page for ${groupName}`, resources: filteredResources, diff --git a/static-site/components/list-resources/listResourcesApi.tsx b/static-site/components/list-resources/listResourcesApi.tsx index 0521644e5..fcef971bd 100644 --- a/static-site/components/list-resources/listResourcesApi.tsx +++ b/static-site/components/list-resources/listResourcesApi.tsx @@ -34,6 +34,7 @@ export async function listResourcesAPI( versioned, groupNameBuilder = (name: string) => name.split("/")[0]!, // eslint-disable-line @typescript-eslint/no-non-null-assertion groupDisplayNames, + groupSortableName, groupUrl, groupUrlTooltip }: { @@ -43,6 +44,7 @@ export async function listResourcesAPI( versioned: boolean, groupNameBuilder?: (name: string) => string, groupDisplayNames?: Record, + groupSortableName?: (group: Group) => string, groupUrl?: (groupName: string) => string, groupUrlTooltip?: (groupName: string) => string } @@ -66,6 +68,9 @@ export async function listResourcesAPI( if (groupDisplayNames && groupName in groupDisplayNames) { group.groupDisplayName = groupDisplayNames[groupName]; } + if (groupSortableName) { + group.sortingGroupName = groupSortableName(group); + } if (groupUrl) { group.groupUrl = groupUrl(groupName); } @@ -94,6 +99,7 @@ function resourceGroup(groupName: string, resources: Resource[]): Group { const groupInfo: Group = { groupName, + sortingGroupName: groupName, groupImgSrc: nextstrainLogoSmall.src, groupImgAlt: "nextstrain logo", resources, diff --git a/static-site/components/list-resources/types.ts b/static-site/components/list-resources/types.ts index 9e372f6bc..92a57c349 100644 --- a/static-site/components/list-resources/types.ts +++ b/static-site/components/list-resources/types.ts @@ -10,6 +10,7 @@ export type SortMethod = "lastUpdated" | "alphabetical"; export type Group = { groupName: string; + sortingGroupName: string; groupImgSrc?: string; groupImgAlt?: string; nResources: number; diff --git a/static-site/components/list-resources/use-sort-and-filter.ts b/static-site/components/list-resources/use-sort-and-filter.ts index 0eb2132ed..cafa41e5f 100644 --- a/static-site/components/list-resources/use-sort-and-filter.ts +++ b/static-site/components/list-resources/use-sort-and-filter.ts @@ -96,8 +96,8 @@ export default function useSortAndFilter( const groups = originalData; const _sortGroups = (groupA: Group, groupB: Group) => _lexicographicSort( - groupA.groupName.toLowerCase(), - groupB.groupName.toLowerCase(), + groupA.sortingGroupName.toLowerCase(), + groupB.sortingGroupName.toLowerCase(), ); const _sortResources = (a: Resource, b: Resource) => _lexicographicSort(a.sortingName, b.sortingName); From 4e59d79ca1d092f2bfb39256d7ea08b354ecc504 Mon Sep 17 00:00:00 2001 From: Thomas Sibley Date: Mon, 3 Nov 2025 10:52:24 -0800 Subject: [PATCH 17/18] =?UTF-8?q?Expose=20Nextclade=20dataset=20reference?= =?UTF-8?q?=20trees=20under=20/nextclade/=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The new Nextclade source class and related classes in the Source/Resource/Subresource framework provide access the "latest" Nextclade dataset reference trees and resolve a myriad of supported aliases. The resource indexer is extended with a new "resource collection" for Nextclade that essentially transforms the existing Nextclade indexes into what our resource indexer expects. This, in turns, fits into existing resourceIndexer/ and src/resourceIndex.js code to provide access to historical versions of Nextclade dataset reference trees. Bumps the resource index version to v9 since changes were made to resourceIndex/. The static-site/app/nextclade/… files were largely copied from static-site/app/staging/… and then modified to refer to the "nextclade" source instead. There is a lot of boilerplate and duplication. But that appears to be the way it's been done for other usages, and I don't have time to make it better so close to the eve of my departure. The biggest differences are in the resources.tsx file. Resolves: --- .github/workflows/index-resources.yml | 2 +- data/manifest_core.json | 6 - env/production/config.json | 2 +- env/testing/config.json | 2 +- resourceIndexer/main.js | 12 +- resourceIndexer/nextclade.js | 150 ++++++++++ src/app.js | 11 +- src/resourceIndex.js | 2 + src/routing/core.js | 1 - src/routing/index.js | 1 + src/routing/nextclade.js | 29 ++ src/sources/index.js | 1 + src/sources/nextclade.js | 264 ++++++++++++++++++ src/utils/prefix.js | 3 + .../nextclade/[[...nextclade]]/content.tsx | 55 ++++ .../[[...nextclade]]/error-banner.tsx | 43 +++ .../nextclade/[[...nextclade]]/not-found.tsx | 20 ++ .../app/nextclade/[[...nextclade]]/page.tsx | 69 +++++ .../nextclade/[[...nextclade]]/resources.tsx | 42 +++ 19 files changed, 699 insertions(+), 16 deletions(-) create mode 100644 resourceIndexer/nextclade.js create mode 100644 src/routing/nextclade.js create mode 100644 src/sources/nextclade.js create mode 100644 static-site/app/nextclade/[[...nextclade]]/content.tsx create mode 100644 static-site/app/nextclade/[[...nextclade]]/error-banner.tsx create mode 100644 static-site/app/nextclade/[[...nextclade]]/not-found.tsx create mode 100644 static-site/app/nextclade/[[...nextclade]]/page.tsx create mode 100644 static-site/app/nextclade/[[...nextclade]]/resources.tsx diff --git a/.github/workflows/index-resources.yml b/.github/workflows/index-resources.yml index 37e97928d..d0a4d6ba9 100644 --- a/.github/workflows/index-resources.yml +++ b/.github/workflows/index-resources.yml @@ -85,7 +85,7 @@ jobs: node resourceIndexer/main.js \ --gzip --output resources.json.gz \ --resourceTypes dataset intermediate \ - --collections core staging + --collections core staging nextclade - name: Upload the new index, overwriting the existing index if: ${{ startsWith(env.RESOURCE_INDEX, 's3://') }} run: | diff --git a/data/manifest_core.json b/data/manifest_core.json index 7f6592620..310a51414 100644 --- a/data/manifest_core.json +++ b/data/manifest_core.json @@ -582,12 +582,6 @@ "default": "open" } }, - "nextclade": { - "dataset": { - "sars-cov-2": "", - "default": "sars-cov-2" - } - }, "nipah": { "resolution": { "all": "", diff --git a/env/production/config.json b/env/production/config.json index 4cc577385..c9a476ceb 100644 --- a/env/production/config.json +++ b/env/production/config.json @@ -110,6 +110,6 @@ "OIDC_GROUPS_CLAIM": "cognito:groups", "SESSION_COOKIE_DOMAIN": "nextstrain.org", "GROUPS_DATA_FILE": "groups.json", - "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz", + "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz", "PLAUSIBLE_ANALYTICS_DOMAIN": "nextstrain.org" } diff --git a/env/testing/config.json b/env/testing/config.json index 5175b807e..8e2437ce1 100644 --- a/env/testing/config.json +++ b/env/testing/config.json @@ -108,5 +108,5 @@ "OIDC_USERNAME_CLAIM": "cognito:username", "OIDC_GROUPS_CLAIM": "cognito:groups", "GROUPS_DATA_FILE": "groups.json", - "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz" + "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz" } diff --git a/resourceIndexer/main.js b/resourceIndexer/main.js index 8f5ac0e19..6b7f78bae 100644 --- a/resourceIndexer/main.js +++ b/resourceIndexer/main.js @@ -2,6 +2,7 @@ import { ArgumentParser } from 'argparse'; import fs from 'fs'; import { coreS3Data, stagingS3Data } from "./coreStagingS3.js"; +import { NextcladeData } from "./nextclade.js"; import zlib from 'zlib'; import { promisify } from 'util'; import { ResourceIndexerError } from './errors.js'; @@ -19,8 +20,8 @@ const gzip = promisify(zlib.gzip) * (sub-)class and resourcePath to parallel the information in the Resource * (sub-)class. * - * Currently only sources {core, staging} and resource types {dataset, - * intermediate} are part of the index. + * Currently only sources {core, staging, nextclade} and resource types + * {dataset, intermediate} are part of the index. * * As an example, the core WNV/NA (nextstrain.org/WNV/NA) dataset is indexed * like so: @@ -34,6 +35,7 @@ const gzip = promisify(zlib.gzip) const COLLECTIONS = [ coreS3Data, stagingS3Data, + new NextcladeData(), ]; function parseArgs() { @@ -47,13 +49,13 @@ function parseArgs() { `, }); argparser.addArgument("--local", {action: 'storeTrue', - help: 'Access a local copy of S3 inventories within ./devData/. See docstring of fetchInventoryLocal() for expected filenames.'}) + help: 'Access a local copy of S3 inventories ({core,staging}.manifest.json and {core,staging}-*.csv.gz) and Nextclade indexes (nextclade/index.json and nextclade/**/pathogen.json) within ./devData/ instead of downloading them'}) argparser.addArgument("--collections", {metavar: "", type: "string", nargs: '+', choices: COLLECTIONS.map((c) => c.name), help: "Only fetch data from a subset of collections. Source names are those defined in COLLECTIONS"}); argparser.addArgument("--resourceTypes", {metavar: "", type: "string", nargs: '+', choices: ['dataset', 'intermediate'], help: "Only index data matching specified resource types"}); argparser.addArgument("--save-inventories", {action: 'storeTrue', - help: "Save the fetched inventories + manifest files to ./devData so that future invocations can use --local"}); + help: "Save a local copy of S3 inventories and Nextclade indexes to ./devData/ so that future invocations can use --local"}); argparser.addArgument("--output", {metavar: "", required: true}) argparser.addArgument("--indent", {action: 'storeTrue', help: 'Indent the output JSON'}) argparser.addArgument("--gzip", {action: 'storeTrue', help: 'GZip the output JSON'}) @@ -118,4 +120,4 @@ async function main(args) { output = await gzip(output) } fs.writeFileSync(args.output, output); -} \ No newline at end of file +} diff --git a/resourceIndexer/nextclade.js b/resourceIndexer/nextclade.js new file mode 100644 index 000000000..8058fe24c --- /dev/null +++ b/resourceIndexer/nextclade.js @@ -0,0 +1,150 @@ +/** + * Index Nextclade dataset reference trees, including past versions. + * + * Transforms Nextclade's own index for use with our resourceIndexer/… and + * src/resourceIndex.js framework. + */ +import { strict as assert } from "assert"; +import { DateTime } from "luxon"; +import { readFile, writeFile, mkdir } from "node:fs/promises"; +import path from "node:path"; + +import { fetch } from "../src/fetch.js"; +import { NextcladeSource } from "../src/sources/nextclade.js"; +import { rootDirFullPath } from "../src/utils/index.js"; + + +const LOCAL_DATA = path.relative(".", path.join(rootDirFullPath, "devData", "nextclade")); +const LOCAL_INDEX = path.join(LOCAL_DATA, "index.json"); + + +/* All class members are part of the "collection" interface expected by + * resourceIndexer/main.js and use its terminology for arguments and return + * values. This interface is kind of a weird fit for things that aren't S3 + * inventories, so the chain of methods and way they pass values are a bit + * contrived. + */ +export class NextcladeData { + #source; + + name = "nextclade"; + + async collect({local, save}) { + if (local) { + console.log(`Reading ${LOCAL_INDEX}`); + this.#source = new NextcladeSource(JSON.parse(await readFile(LOCAL_INDEX))); + } + else { + this.#source = new NextcladeSource(); + + if (save) { + console.log(`Saving ${LOCAL_INDEX}`); + await mkdir(path.dirname(LOCAL_INDEX), {recursive: true}); + await writeFile(LOCAL_INDEX, JSON.stringify(await this.#source._index(), null, 2)); + } + } + + const datasetPaths = await this.#source.availableDatasets(); + + return (await Promise.all( + datasetPaths.map(async (datasetPath) => { + const dataset = this.#source.dataset(datasetPath.split("/")); + const indexDataset = await dataset._indexDataset(); + + /* Sort and collapse versions per our documented behaviour: + * + * > All times are UTC. A datestamp refers to datasets uploaded + * > between 00h00 and 23h59 UTC on that day. + * + * > If multiple datasets are uploaded on the same day we take the most + * > recent. + * + * See . + */ + const datesSeen = new Set(); + const indexVersions = + indexDataset.versions + .map(v => ({...v, _timestamp: DateTime.fromISO(v.updatedAt, {zone:"UTC"})})) + .toSorted((a, b) => b._timestamp - a._timestamp) + .map(v => ({...v, _date: v._timestamp.toISODate()})) + .filter(v => !datesSeen.has(v._date) && datesSeen.add(v._date)) + + // Produce one resourceIndexer/main.js "item" per dataset version + return (await Promise.all( + indexVersions.map(async (indexVersion) => { + const versionMetaPath = `${indexDataset.path}/${indexVersion.tag}/pathogen.json`; + + const localFile = path.join(LOCAL_DATA, versionMetaPath); + + let versionMeta; + + if (local) { + console.log(`Reading ${localFile}`); + versionMeta = JSON.parse(await readFile(localFile)); + } + else { + const remoteUrl = await this.#source.urlFor(versionMetaPath); + + console.log(`Fetching ${remoteUrl}`); + const response = await fetch(remoteUrl, {cache: "no-cache"}); + assert(response.status === 200); + + versionMeta = await response.json(); + + if (save) { + console.log(`Saving ${localFile}`); + await mkdir(path.dirname(localFile), {recursive: true}); + await writeFile(localFile, JSON.stringify(versionMeta, null, 2)); + } + } + + /* This filter must be *after* we fetch the version's own + * pathogen.json. Because versions are filtered to one-per-day + * *before* we fetch, it's possible there's an older version from + * the same day that *does* include a treeJson, and we'd miss it. + * The fix would be fetching *all* versions and only then filtering + * to one-per-day (i.e. in createResource() below). + * + * Doing so, however, seems unnecessary. The scenario seems + * unlikely and it's not entirely clear how we'd want to interpret + * such a dataset update anyway (e.g. was the earlier version on + * the same day in error?). + * + * Also note that this filters out some datasets entirely: those + * that don't have a reference tree at all. + * -trs, 27 Oct 2025 + */ + if (!versionMeta.files.treeJson) + return; + + // One "item" produced by collect() + return { + // Used by resourceIndexer/main.js + source: this.#source.name, + resourceType: "dataset", + resourcePath: datasetPath, + + // Used in createResource() below + version: { + date: indexVersion._date, + fileUrls: { + main: await this.#source.urlFor(`${indexDataset.path}/${indexVersion.tag}/${versionMeta.files.treeJson}`) + } + }, + }; + }) + )).flat(); + }) + )).flat(); + } + + categorise(item) { + return item; + } + + createResource(resourceType, resourcePath, items) { + return { + versions: items.map(i => i.version), + }; + } +} diff --git a/src/app.js b/src/app.js index d704fd46d..9869ac070 100644 --- a/src/app.js +++ b/src/app.js @@ -14,6 +14,7 @@ const { errors, fetch, groups, + nextclade, openid, pathogenRepos, schemas, @@ -68,7 +69,6 @@ charon.setup(app); * /monkeypox * /mpox * /ncov - * /nextclade * /rsv * /rubella * /seasonal-flu @@ -92,6 +92,15 @@ core.setup(app); staging.setup(app); +/* Nextclade reference datasets + * + * Routes: + * /nextclade + * /nextclade/* + */ +nextclade.setup(app); + + /* Community on GitHub * * Routes: diff --git a/src/resourceIndex.js b/src/resourceIndex.js index 5e2a53f28..71ee931d8 100644 --- a/src/resourceIndex.js +++ b/src/resourceIndex.js @@ -191,6 +191,8 @@ class ListResources { return "" case "staging": return "staging/" + case "nextclade": + return "nextclade/" default: throw new InternalServerError(`Source "${name}" does not have a corresponding prefix`) } diff --git a/src/routing/core.js b/src/routing/core.js index 74641919e..37b2ac0ff 100644 --- a/src/routing/core.js +++ b/src/routing/core.js @@ -40,7 +40,6 @@ const coreBuildPaths = [ "/monkeypox", // Not actively updated, but YYYY-MM-DD URLs remain & don't redirect "/mpox", // monkeypox URLs will redirect to /mpox (except for datestamped URLs) "/ncov", - "/nextclade", "/nipah", "/norovirus", "/oropouche", diff --git a/src/routing/index.js b/src/routing/index.js index ae743a0ff..899ff693a 100644 --- a/src/routing/index.js +++ b/src/routing/index.js @@ -8,6 +8,7 @@ export * as errors from "./errors.js"; export * as fetch from './fetch.js'; export * as groups from './groups.js'; export * as listResources from './listResources.js'; +export * as nextclade from './nextclade.js'; export * as openid from './openid.js'; export * as pathogenRepos from './pathogenRepos.js'; export * as schemas from './schemas.js'; diff --git a/src/routing/nextclade.js b/src/routing/nextclade.js new file mode 100644 index 000000000..f8c6f3b02 --- /dev/null +++ b/src/routing/nextclade.js @@ -0,0 +1,29 @@ +import * as endpoints from '../endpoints/index.js'; +import * as sources from '../sources/index.js'; + +const { + setSource, + setDataset, + canonicalizeDataset, + getDataset, + optionsDataset, +} = endpoints.sources; + +const { + NextcladeSource, +} = sources; + + +export function setup(app) { + app.use("/nextclade", setSource(req => new NextcladeSource())); // eslint-disable-line no-unused-vars + + app.routeAsync("/nextclade") + .getAsync(endpoints.nextJsApp.handleRequest) + ; + + app.routeAsync("/nextclade/*") + .allAsync(setDataset(req => req.params[0]), canonicalizeDataset(path => `/nextclade/${path}`)) + .getAsync(getDataset) + .optionsAsync(optionsDataset) + ; +} diff --git a/src/sources/index.js b/src/sources/index.js index 979f6dec6..e6c14b04f 100644 --- a/src/sources/index.js +++ b/src/sources/index.js @@ -5,3 +5,4 @@ export { CoreSource, CoreStagingSource } from './core.js'; export { CommunitySource } from './community.js'; export { UrlDefinedSource } from './fetch.js'; export { GroupSource } from './groups.js'; +export { NextcladeSource } from './nextclade.js'; diff --git a/src/sources/nextclade.js b/src/sources/nextclade.js new file mode 100644 index 000000000..b0bbf0fdb --- /dev/null +++ b/src/sources/nextclade.js @@ -0,0 +1,264 @@ +import * as authz from '../authz/index.js'; +import { fetch } from '../fetch.js'; +import { NotFound } from '../httpErrors.js'; +import { ResourceVersions } from '../resourceIndex.js'; +import { re } from '../templateLiterals.js'; +import { Source, Dataset, DatasetSubresource } from './models.js'; + +// We privilege our own collection since this is our site +const NEXTSTRAIN_COLLECTION_ID = "nextstrain"; +const NEXTSTRAIN_COLLECTION_PREFIX = re`^${NEXTSTRAIN_COLLECTION_ID}/`; + +/* We hardcode what collections to expose so that adding a new collection + * upstream in Nextclade's index doesn't automatically and unexpectedly expose + * it here too. The idea is that we should opt-in knowingly by adding the new + * collection here and testing things work. This hardcoding also lets us + * implement the NextcladeDataset.baseName getter without an additional async + * lookup operation. + * -trs, 3 Nov 2025 + */ +const COLLECTION_IDS = new Set([NEXTSTRAIN_COLLECTION_ID, "community"]); + + +/** + * Nextclade dataset reference trees. + * + * Decisions we made about behaviour: + * + * • Drop the leading nextstrain/ from dataset names, but accept it as an + * alias by redirecting, e.g. + * + * https://nextstrain.org/nextclade/nextstrain/mpox/clade-iib + * → https://nextstrain.org/nextclade/mpox/clade-iib + * + * • Accept the index's shortcut names, but expand them by redirection to the + * canonical name, e.g. + * + * https://nextstrain.org/nextclade/hMPXV + * → https://nextstrain.org/nextclade/mpox/clade-iib + * + * Some shortcuts have "_" in their name (e.g. flu_h1n1pdm_na); accept + * those both as-is and with s{_}{/}g applied (e.g. flu/h1n1pdm/na). + * + * • Prefer full names (minus leading nextstrain/) as the canonical name + * + * See also . + * + * The convention in this file is that underscored names are used for + * extensions to the sources models and private fields are used for internal + * data. This helps keep clear what's part of the sources interface and what's + * not. + */ +export class NextcladeSource extends Source { + #index; + #indexDatasets; + + /* This constructor param is only used by the resourceIndexer/ code when + * loading the Nextclade index from disk instead of the network. + */ + constructor(index) { + super(); + if (index) + this.#index = index; + } + + get name() { return "nextclade"; } + async baseUrl() { return "https://data.clades.nextstrain.org/v3/"; } + + async _index() { + /* Source instances are constructed for each request, so this + * instance-local cache results in one index fetch per request. The + * fetch()-level HTTP caching results in conditional fetches to the + * upstream that mostly return as 304 Not Modified. This seems Fine, at + * least For Now. + * -trs, 16 Oct 2025 + */ + return this.#index ??= await (await fetch(await this.urlFor("index.json"), {cache: "no-cache"})).json(); + } + + dataset(pathParts, versionDescriptor) { + return new NextcladeDataset(this, pathParts, versionDescriptor); + } + + async availableDatasets() { + return (await this._indexDatasets()) + .map(({path}) => path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "")); + } + + async _datasetAliases() { + return new Map( + (await this._indexDatasets()) + .flatMap(({path, shortcuts}) => [ + /* Canonicalize nextstrain/a/b/c → a/b/c since we're on nextstrain.org + * after all. + */ + [path, path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "")], + + /* Include index-defined shortcuts under the permutations of a) + * removing the leading "nextstrain/" and b) replacing underscores (_) + * with slashes (/). Spell out all permutations so an + * iterative/recursive alias resolution is not necessary. + */ + ...((shortcuts ?? []).flatMap(shortcut => [ + [ + shortcut + .replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + .replace(/_/g, "/"), + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + [ + shortcut + .replace(NEXTSTRAIN_COLLECTION_PREFIX, ""), + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + [ + shortcut + .replace(/_/g, "/"), + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + [ + shortcut, + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + ])), + ]) + .filter(([alias, path]) => alias !== path) + ); + } + + async _indexDatasets() { + return this.#indexDatasets ??= + (await this._index()) + .collections + .filter(c => COLLECTION_IDS.has(c.meta.id)) + .flatMap(c => c.datasets) + .filter(d => d.files.treeJson); + } + + async getInfo() { + return { + title: "Nextclade reference dataset trees", + showDatasets: true, + showNarratives: false, + }; + } + + get authzPolicy() { + return [ + {tag: authz.tags.Visibility.Public, role: "*", allow: [authz.actions.Read]}, + ]; + } + get authzTags() { + return new Set([ + authz.tags.Type.Source, + authz.tags.Visibility.Public, + ]); + } + get authzTagsToPropagate() { + return new Set([ + authz.tags.Visibility.Public, + ]); + } +} + +class NextcladeDataset extends Dataset { + static get Subresource() { + return NextcladeDatasetSubresource; + } + + // eslint-disable-next-line no-unused-vars + assertValidPathParts(pathParts) { + // Override check for underscores (_), as we want to allow Nextclade + // dataset paths that include them. There is no risk of "confused deputy" + // problems as this source 1) only allows fixed datasets from an index and + // 2) uses slashes (/) not underscores (_) when joining path parts. + } + get baseParts() { + return this.pathParts.slice(); + } + get baseName() { + const explicitCollections = new Set( + Array.from(COLLECTION_IDS) + .filter(id => id !== NEXTSTRAIN_COLLECTION_ID) + ); + + return explicitCollections.has(this.baseParts[0]) + ? this.baseParts.join("/") + : `${NEXTSTRAIN_COLLECTION_ID}/${this.baseParts.join("/")}`; + } + + async resolve() { + /* Resolve using a complete and static map of aliases for all paths we + * support. This avoids the need for recursive resolving like other + * sources which dynamically determine supported aliases. + */ + const aliases = await this.source._datasetAliases(); + + const resolvedPath = aliases.get(this.pathParts.join("/")); + if (resolvedPath) + return new this.constructor(this.source, resolvedPath.split("/"), this.versionDescriptor); + + return this; + } + + versionInfo() { + /* Copied wholesale from src/sources/core.js. This is part of the tension + * between the Source/Resource/Subresource framework and the + * resourceIndexer/ResourceVersions/ListResources framework. + * -trs, 23 Oct 2025 + */ + if (!this.versionDescriptor) { + return [null, undefined]; + } + + const versions = new ResourceVersions(this.source.name, 'dataset', this.pathParts.join("/")); + const versionDate = versions.versionDateFromDescriptor(this.versionDescriptor); + const versionUrls = versionDate ? versions.subresourceUrls(versionDate) : undefined + return [versionDate, versionUrls]; + } + + async _indexDataset() { + /* XXX TODO: Consider making this resolve this.versionDescriptor to a + * specific version and returning the fetched pathogen.json for that + * version. + * + * This would be most appropriate in the case of abandoning the bolted-on + * versionInfo() method for version resolution that's more integrated into + * the Source/Resource/Subresource framework instead of done completely + * separately. + * -trs, 3 Nov 2025 + */ + return (await this.source._indexDatasets()) + .find(d => d.path === this.baseName); + } +} + + +class NextcladeDatasetSubresource extends DatasetSubresource { + constructor(resource, type) { + super(resource, type); + + if (this.type !== "main") + throw new NotFound(`Nextclade datasets do not provide a '${this.type}' sidecar`); + } + + async baseName() { + /* Note that this method ignores this.resource.versionDescriptor because + * it's not expected to be called if that property has a value; it expects + * that its caller, the Subresource.url() method, will instead go thru the + * this.resource.versionInfo() code path in that case. See also the + * comment in ._indexDataset() above. + * -trs, 3 Nov 2025 + */ + const indexed = await this.resource._indexDataset(); + + if (!indexed) + throw new NotFound(`Dataset '${this.resource.baseName}' is not in Nextclade's index (or does not have a tree)`); + + /* The version tag here is the "latest" version of the dataset. See also + * . + * -trs, 3 Nov 2025 + */ + return `${indexed.path}/${indexed.version.tag}/${indexed.files.treeJson}`; + } +} diff --git a/src/utils/prefix.js b/src/utils/prefix.js index 00af05057..c8120e52d 100644 --- a/src/utils/prefix.js +++ b/src/utils/prefix.js @@ -9,6 +9,7 @@ import * as sources from '../sources/index.js'; const sourceNameToClass = new Map([ ["core", sources.CoreSource], ["staging", sources.CoreStagingSource], + ["nextclade", sources.NextcladeSource], ["community", sources.CommunitySource], ["fetch", sources.UrlDefinedSource], ["groups", sources.GroupSource], @@ -44,6 +45,7 @@ const splitPrefixIntoParts = (prefix) => { switch (prefixParts[0]) { case "community": case "staging": + case "nextclade": case "fetch": sourceName = prefixParts.shift(); break; @@ -119,6 +121,7 @@ const joinPartsIntoPrefix = async ({source, prefixParts, isNarrative = false}) = switch (sourceName) { case "community": case "staging": + case "nextclade": case "fetch": leadingParts.push(sourceName); break; diff --git a/static-site/app/nextclade/[[...nextclade]]/content.tsx b/static-site/app/nextclade/[[...nextclade]]/content.tsx new file mode 100644 index 000000000..6330d4ec0 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/content.tsx @@ -0,0 +1,55 @@ +import React from "react"; + +import { TitledMetadata } from "../../types"; +import FlexCenter from "../../../components/flex-center"; +import { FocusParagraphCentered } from "../../../components/focus-paragraph"; +import NextcladeResourceListing from "./resources"; +import { SmallSpacer, HugeSpacer } from "../../../components/spacers"; + +/** + * A React Server component that generates the contents of the + * /nextclade page. + * + * This is abstracted out into a distinct component so that it can + * also be used in the "./not-found.tsx" component, to render the + * /nextclade page content beneath an error banner, when a bad URL is + * requested. + */ +export default function NextcladePageContent({ + metadata, +}: { + /** + * A Metadata object, that is assumed to have a `title` key with a + * string value + */ + metadata: TitledMetadata; +}): React.ReactElement { + const title = metadata.title; + + return ( + <> + + + +

{title}

+ + + + + + Part of{" "} +
Nextclade datasets produced by the{" "} + core Nextstrain team and broader Nextclade community. + + + + + + + + ); +} diff --git a/static-site/app/nextclade/[[...nextclade]]/error-banner.tsx b/static-site/app/nextclade/[[...nextclade]]/error-banner.tsx new file mode 100644 index 000000000..38fd6a121 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/error-banner.tsx @@ -0,0 +1,43 @@ +"use client"; + +import React from "react"; +import { useParams } from "next/navigation"; + +import ErrorMessage from "../../../components/error-message"; + +/** + * A React Client component that detects if the requested URL + * contains path elements past `/nextclade`, and, if so, returns a + * component that displays an error message banner. If additional + * path elements are not detected, returns null. + * + * N.b., the way this component is used, we only render it when we've + * already determined that there _is_ a need to display an error + * message. In other words, it is fully expected that the `else` + * branch of the conditional will never actually execute. + */ +export default function ErrorBanner(): React.ReactElement | null { + const params = useParams(); + + if (params && params["nextclade"]) { + // n.b., I don't think `params["nextclade"]` is ever going to be + // anything other than a list, but let's make the type checker + // happy… + const path = + typeof params["nextclade"] === "string" + ? params["nextclade"] + : params["nextclade"].join("/"); + + const resourceType = path.startsWith("narratives") + ? "narrative" + : "dataset"; + + const title = `The Nextclade ${resourceType} "nextstrain.org/nextclade/${path}" doesn't exist.`; + const contents =

Here is the Nextclade reference trees page instead.

; + + return ; + } else { + // this will never happen + return null; + } +} diff --git a/static-site/app/nextclade/[[...nextclade]]/not-found.tsx b/static-site/app/nextclade/[[...nextclade]]/not-found.tsx new file mode 100644 index 000000000..e57bc4904 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/not-found.tsx @@ -0,0 +1,20 @@ +import React from "react"; + +import { ErrorBanner } from "../../../components/error-banner"; + +import NextcladePageContent from "./content"; +import { metadata } from "./page"; + +/** + * A React Server component that renders the usual `/nextclade` page + * content, with an error banner up-top explaining that the requested + * dataset doesn't actually exist. + */ +export default function FourOhFour(): React.ReactElement { + return ( + <> + + + + ); +} diff --git a/static-site/app/nextclade/[[...nextclade]]/page.tsx b/static-site/app/nextclade/[[...nextclade]]/page.tsx new file mode 100644 index 000000000..700bd80b6 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/page.tsx @@ -0,0 +1,69 @@ +import React from "react"; + +import type { TitledMetadata } from "../../types"; +import { ValidateUrl } from "../../../components/error-banner"; + +import NextcladePageContent from "./content"; + +const title = "Nextclade reference trees"; + +export const metadata: TitledMetadata = { + title, +}; + +/** + * A React Server Component for `/nextclade` + * + * A note about how this page works: + * + * We expect three different types of requests for resources under + * `/nextclade`: + * + * 1) Requests for real, existing datasets (e.g., `/nextclade/measles/genome/WHO-2012`) — + * these requests are handled by the Express-level router, and this + * Next.js page never sees them + * + * 2) Requests for the plain `/nextclade` page — that request is handled + * by this page, and we expect it to return a resource listing of + * Nextclade reference trees, with an HTTP status code of 200 + * + * 3) Requests for some longer URL that does NOT correspond to a real, + * existing dataset (e.g., `/nextclade/foo`) — in this case, we want + * to display the same resource listing as the base `/nextclade` + * page, but to also include an error banner indicating that the + * requested resource (`nextstrain.org/nextclade/foo` in our example) + * does not exist. We also want the HTTP status code for the + * response to this request to be a 404 + * + * We accomplish this as follows: + * + * Requests of type #1 are handled completely at the Express level, + * and this page never sees them. + * + * Requests of type #2 and type #3 _are_ handled by this page. It uses + * the `` component to detect whether the + * requested URL was the plain `/nextclade` or whether there are + * additional path components beyond that (again, `/nextclade/foo` in + * our example). If there _are_ additional path elements, + * `` detects that and calls Next.js's + * `notFound()` method, which results in the `./not-found.tsx` page + * being rendered and returned. If there are not additional path + * elements (i.e., if the request was for `/nextclade`), + * `` returns nothing, and the + * `` component delivers the desired resource + * listing. + * + * If the `./not-found.tsx` page is rendered, it handles the display + * of the error banner; it also uses the `` + * component to render the same resource listing as the default case. + * However, because it has been invoked via the Next.js `notFound()` + * method, it will return a 404 status code. + */ +export default function NextcladePage(): React.ReactElement { + return ( + <> + + + + ); +} diff --git a/static-site/app/nextclade/[[...nextclade]]/resources.tsx b/static-site/app/nextclade/[[...nextclade]]/resources.tsx new file mode 100644 index 000000000..6f2a73d15 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/resources.tsx @@ -0,0 +1,42 @@ +"use client"; +// Note: this is only in a separate file as it needs to be run client side +// and we want to run as much of the parent page server-side as possible. + +import React from "react"; +import ListResources from "../../../components/list-resources"; +import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; +import { Group } from "../../../components/list-resources/types"; + +export default function NextcladeResourceListing(): React.ReactElement { + return ( + + ) +} + +async function nextcladeDatasetResourceGroups() { + return await listResourcesAPI('nextclade', 'dataset', { + versioned: true, + + /* For dataset "community/a/b/c", use "community/a" as the grouping instead + * of just "community". + */ + groupNameBuilder: (name: string): string => { + return name.startsWith("community/") + ? name.split("/").slice(0, 2).join("/") + : name.split("/")[0]!; // eslint-disable-line @typescript-eslint/no-non-null-assertion + }, + + // Sort "community/…" datasets after ours + groupSortableName: (group: Group): string => { + const name = group.groupName; + return name.startsWith("community/") ? `001 ${name}` : + `000 ${name}` ; + }, + }); +} From 04468c18665f4c5dc13de7cf1bbf1c5c1a7b252d Mon Sep 17 00:00:00 2001 From: Victor Lin Date: Thu, 4 Dec 2025 15:39:23 -0800 Subject: [PATCH 18/18] =?UTF-8?q?=F0=9F=9A=A7=20logo=20and=20grouping=20im?= =?UTF-8?q?provements?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../app/nextclade/[[...nextclade]]/resources.tsx | 11 ++++++++++- .../app/pathogens/[[...pathogens]]/resources.tsx | 2 ++ static-site/app/pathogens/files/resources.tsx | 2 ++ .../app/staging/[[...staging]]/resources.tsx | 6 +++++- .../list-resources/listResourcesApi.tsx | 16 +++++++++++----- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/static-site/app/nextclade/[[...nextclade]]/resources.tsx b/static-site/app/nextclade/[[...nextclade]]/resources.tsx index 6f2a73d15..78ce8fa44 100644 --- a/static-site/app/nextclade/[[...nextclade]]/resources.tsx +++ b/static-site/app/nextclade/[[...nextclade]]/resources.tsx @@ -6,6 +6,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; import { Group } from "../../../components/list-resources/types"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; export default function NextcladeResourceListing(): React.ReactElement { return ( @@ -28,7 +29,7 @@ async function nextcladeDatasetResourceGroups() { */ groupNameBuilder: (name: string): string => { return name.startsWith("community/") - ? name.split("/").slice(0, 2).join("/") + ? name.split("/").slice(1, 3).join("/") : name.split("/")[0]!; // eslint-disable-line @typescript-eslint/no-non-null-assertion }, @@ -38,5 +39,13 @@ async function nextcladeDatasetResourceGroups() { return name.startsWith("community/") ? `001 ${name}` : `000 ${name}` ; }, + + // Add Nextstrain logo for core datasets + groupImg: (group: Group) => { + const isOfficialDataset = group.resources.some((r) => !r.name.startsWith('community/')); + return isOfficialDataset + ? { src: nextstrainLogoSmall.src, alt: "nextstrain logo" } + : undefined; + }, }); } diff --git a/static-site/app/pathogens/[[...pathogens]]/resources.tsx b/static-site/app/pathogens/[[...pathogens]]/resources.tsx index d8cf47e2f..f6bb41c36 100644 --- a/static-site/app/pathogens/[[...pathogens]]/resources.tsx +++ b/static-site/app/pathogens/[[...pathogens]]/resources.tsx @@ -6,6 +6,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; import { coreResources } from "../../../content/resource-listing"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; /** @@ -31,6 +32,7 @@ async function _coreDatasetResourceGroups() { groupDisplayNames: coreResources["coreGroupDisplayNames"], groupUrl: (groupName: string) => `/${groupName}`, groupUrlTooltip: (groupName: string) => `Click to load the default (and most recent) analysis for ${coreResources["coreGroupDisplayNames"][groupName] || groupName}`, + groupImg: () => ({ src: nextstrainLogoSmall.src, alt: "nextstrain logo" }), }; return await listResourcesAPI('core', 'dataset', opts); } \ No newline at end of file diff --git a/static-site/app/pathogens/files/resources.tsx b/static-site/app/pathogens/files/resources.tsx index 6b3e32666..ad55405b0 100644 --- a/static-site/app/pathogens/files/resources.tsx +++ b/static-site/app/pathogens/files/resources.tsx @@ -6,6 +6,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; import { coreResources } from "../../../content/resource-listing"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; /** * A React Client Component which wraps the ListResources component to list @@ -27,6 +28,7 @@ async function _coreDatasetResourceGroups() { const opts = { versioned: false, groupDisplayNames: coreResources["coreGroupDisplayNames"], + groupImg: () => ({ src: nextstrainLogoSmall.src, alt: "nextstrain logo" }), }; return await listResourcesAPI('core', 'intermediate', opts); } \ No newline at end of file diff --git a/static-site/app/staging/[[...staging]]/resources.tsx b/static-site/app/staging/[[...staging]]/resources.tsx index 9ec536334..11c5de9c5 100644 --- a/static-site/app/staging/[[...staging]]/resources.tsx +++ b/static-site/app/staging/[[...staging]]/resources.tsx @@ -5,6 +5,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; export default function StagingPathogenResourceListing(): React.ReactElement { return ( @@ -19,6 +20,9 @@ export default function StagingPathogenResourceListing(): React.ReactElement { } async function stagingDatasetResourceGroups() { - const opts = {versioned: false}; + const opts = { + versioned: false, + groupImg: () => ({ src: nextstrainLogoSmall.src, alt: "nextstrain logo" }), + }; return await listResourcesAPI('staging', 'dataset', opts); } \ No newline at end of file diff --git a/static-site/components/list-resources/listResourcesApi.tsx b/static-site/components/list-resources/listResourcesApi.tsx index fcef971bd..a6231f59b 100644 --- a/static-site/components/list-resources/listResourcesApi.tsx +++ b/static-site/components/list-resources/listResourcesApi.tsx @@ -1,7 +1,6 @@ import { ResourceType, Resource, Group, PathVersionsForGroup, FetchGroupHistory } from "./types"; import { InternalError } from "../error-boundary"; import fetchAndParseJSON from "../../util/fetch-and-parse-json"; -import nextstrainLogoSmall from "../../static/logos/nextstrain-logo-small.png"; interface APIWrapper { [resourceType: string]: { @@ -36,7 +35,8 @@ export async function listResourcesAPI( groupDisplayNames, groupSortableName, groupUrl, - groupUrlTooltip + groupUrlTooltip, + groupImg }: { /** Report prior versions of each resource. * TODO: infer this from the API data itself @@ -46,7 +46,8 @@ export async function listResourcesAPI( groupDisplayNames?: Record, groupSortableName?: (group: Group) => string, groupUrl?: (groupName: string) => string, - groupUrlTooltip?: (groupName: string) => string + groupUrlTooltip?: (groupName: string) => string, + groupImg?: (group: Group) => { src: string; alt: string } | undefined } ): Promise { const requestPath = `/list-resources/${sourceId}/${resourceType}`; @@ -77,6 +78,13 @@ export async function listResourcesAPI( if (groupUrlTooltip) { group.groupUrlTooltip = groupUrlTooltip(groupName); } + if (groupImg) { + const img = groupImg(group); + if (img) { + group.groupImgSrc = img.src; + group.groupImgAlt = img.alt; + } + } if (resourceType==='intermediate' && sourceId==='core') { group.fetchHistory = fetchIntermediateGroupHistoryFactory(sourceId, groupName); } @@ -100,8 +108,6 @@ function resourceGroup(groupName: string, resources: Resource[]): Group { const groupInfo: Group = { groupName, sortingGroupName: groupName, - groupImgSrc: nextstrainLogoSmall.src, - groupImgAlt: "nextstrain logo", resources, nResources: resources.length, nVersions,