diff --git a/.github/workflows/index-resources.yml b/.github/workflows/index-resources.yml index 37e97928d..d0a4d6ba9 100644 --- a/.github/workflows/index-resources.yml +++ b/.github/workflows/index-resources.yml @@ -85,7 +85,7 @@ jobs: node resourceIndexer/main.js \ --gzip --output resources.json.gz \ --resourceTypes dataset intermediate \ - --collections core staging + --collections core staging nextclade - name: Upload the new index, overwriting the existing index if: ${{ startsWith(env.RESOURCE_INDEX, 's3://') }} run: | diff --git a/data/manifest_core.json b/data/manifest_core.json index 7f6592620..310a51414 100644 --- a/data/manifest_core.json +++ b/data/manifest_core.json @@ -582,12 +582,6 @@ "default": "open" } }, - "nextclade": { - "dataset": { - "sars-cov-2": "", - "default": "sars-cov-2" - } - }, "nipah": { "resolution": { "all": "", diff --git a/env/production/config.json b/env/production/config.json index 4cc577385..c9a476ceb 100644 --- a/env/production/config.json +++ b/env/production/config.json @@ -110,6 +110,6 @@ "OIDC_GROUPS_CLAIM": "cognito:groups", "SESSION_COOKIE_DOMAIN": "nextstrain.org", "GROUPS_DATA_FILE": "groups.json", - "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz", + "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz", "PLAUSIBLE_ANALYTICS_DOMAIN": "nextstrain.org" } diff --git a/env/testing/config.json b/env/testing/config.json index 5175b807e..8e2437ce1 100644 --- a/env/testing/config.json +++ b/env/testing/config.json @@ -108,5 +108,5 @@ "OIDC_USERNAME_CLAIM": "cognito:username", "OIDC_GROUPS_CLAIM": "cognito:groups", "GROUPS_DATA_FILE": "groups.json", - "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz" + "RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz" } diff --git a/resourceIndexer/inventory.js b/resourceIndexer/inventory.js index 13975ebf6..4b1946460 100644 --- a/resourceIndexer/inventory.js +++ b/resourceIndexer/inventory.js @@ -86,7 +86,7 @@ const fetchInventoryRemote = async ({bucket, prefix, name, save}) => { /** * Parse an on-disk inventory. This expects the following files to be present: * - `./devData/${name}.manifest.json` - * - `./devData/${name}.inventory.csv.gz` + * - `./devData/${name}-*.csv.gz` * * Returns an object with properties: * - inventory: object[] list of entries in the inventory, using the schema to define keys diff --git a/resourceIndexer/main.js b/resourceIndexer/main.js index 8f5ac0e19..6b7f78bae 100644 --- a/resourceIndexer/main.js +++ b/resourceIndexer/main.js @@ -2,6 +2,7 @@ import { ArgumentParser } from 'argparse'; import fs from 'fs'; import { coreS3Data, stagingS3Data } from "./coreStagingS3.js"; +import { NextcladeData } from "./nextclade.js"; import zlib from 'zlib'; import { promisify } from 'util'; import { ResourceIndexerError } from './errors.js'; @@ -19,8 +20,8 @@ const gzip = promisify(zlib.gzip) * (sub-)class and resourcePath to parallel the information in the Resource * (sub-)class. * - * Currently only sources {core, staging} and resource types {dataset, - * intermediate} are part of the index. + * Currently only sources {core, staging, nextclade} and resource types + * {dataset, intermediate} are part of the index. * * As an example, the core WNV/NA (nextstrain.org/WNV/NA) dataset is indexed * like so: @@ -34,6 +35,7 @@ const gzip = promisify(zlib.gzip) const COLLECTIONS = [ coreS3Data, stagingS3Data, + new NextcladeData(), ]; function parseArgs() { @@ -47,13 +49,13 @@ function parseArgs() { `, }); argparser.addArgument("--local", {action: 'storeTrue', - help: 'Access a local copy of S3 inventories within ./devData/. See docstring of fetchInventoryLocal() for expected filenames.'}) + help: 'Access a local copy of S3 inventories ({core,staging}.manifest.json and {core,staging}-*.csv.gz) and Nextclade indexes (nextclade/index.json and nextclade/**/pathogen.json) within ./devData/ instead of downloading them'}) argparser.addArgument("--collections", {metavar: "", type: "string", nargs: '+', choices: COLLECTIONS.map((c) => c.name), help: "Only fetch data from a subset of collections. Source names are those defined in COLLECTIONS"}); argparser.addArgument("--resourceTypes", {metavar: "", type: "string", nargs: '+', choices: ['dataset', 'intermediate'], help: "Only index data matching specified resource types"}); argparser.addArgument("--save-inventories", {action: 'storeTrue', - help: "Save the fetched inventories + manifest files to ./devData so that future invocations can use --local"}); + help: "Save a local copy of S3 inventories and Nextclade indexes to ./devData/ so that future invocations can use --local"}); argparser.addArgument("--output", {metavar: "", required: true}) argparser.addArgument("--indent", {action: 'storeTrue', help: 'Indent the output JSON'}) argparser.addArgument("--gzip", {action: 'storeTrue', help: 'GZip the output JSON'}) @@ -118,4 +120,4 @@ async function main(args) { output = await gzip(output) } fs.writeFileSync(args.output, output); -} \ No newline at end of file +} diff --git a/resourceIndexer/nextclade.js b/resourceIndexer/nextclade.js new file mode 100644 index 000000000..8058fe24c --- /dev/null +++ b/resourceIndexer/nextclade.js @@ -0,0 +1,150 @@ +/** + * Index Nextclade dataset reference trees, including past versions. + * + * Transforms Nextclade's own index for use with our resourceIndexer/… and + * src/resourceIndex.js framework. + */ +import { strict as assert } from "assert"; +import { DateTime } from "luxon"; +import { readFile, writeFile, mkdir } from "node:fs/promises"; +import path from "node:path"; + +import { fetch } from "../src/fetch.js"; +import { NextcladeSource } from "../src/sources/nextclade.js"; +import { rootDirFullPath } from "../src/utils/index.js"; + + +const LOCAL_DATA = path.relative(".", path.join(rootDirFullPath, "devData", "nextclade")); +const LOCAL_INDEX = path.join(LOCAL_DATA, "index.json"); + + +/* All class members are part of the "collection" interface expected by + * resourceIndexer/main.js and use its terminology for arguments and return + * values. This interface is kind of a weird fit for things that aren't S3 + * inventories, so the chain of methods and way they pass values are a bit + * contrived. + */ +export class NextcladeData { + #source; + + name = "nextclade"; + + async collect({local, save}) { + if (local) { + console.log(`Reading ${LOCAL_INDEX}`); + this.#source = new NextcladeSource(JSON.parse(await readFile(LOCAL_INDEX))); + } + else { + this.#source = new NextcladeSource(); + + if (save) { + console.log(`Saving ${LOCAL_INDEX}`); + await mkdir(path.dirname(LOCAL_INDEX), {recursive: true}); + await writeFile(LOCAL_INDEX, JSON.stringify(await this.#source._index(), null, 2)); + } + } + + const datasetPaths = await this.#source.availableDatasets(); + + return (await Promise.all( + datasetPaths.map(async (datasetPath) => { + const dataset = this.#source.dataset(datasetPath.split("/")); + const indexDataset = await dataset._indexDataset(); + + /* Sort and collapse versions per our documented behaviour: + * + * > All times are UTC. A datestamp refers to datasets uploaded + * > between 00h00 and 23h59 UTC on that day. + * + * > If multiple datasets are uploaded on the same day we take the most + * > recent. + * + * See . + */ + const datesSeen = new Set(); + const indexVersions = + indexDataset.versions + .map(v => ({...v, _timestamp: DateTime.fromISO(v.updatedAt, {zone:"UTC"})})) + .toSorted((a, b) => b._timestamp - a._timestamp) + .map(v => ({...v, _date: v._timestamp.toISODate()})) + .filter(v => !datesSeen.has(v._date) && datesSeen.add(v._date)) + + // Produce one resourceIndexer/main.js "item" per dataset version + return (await Promise.all( + indexVersions.map(async (indexVersion) => { + const versionMetaPath = `${indexDataset.path}/${indexVersion.tag}/pathogen.json`; + + const localFile = path.join(LOCAL_DATA, versionMetaPath); + + let versionMeta; + + if (local) { + console.log(`Reading ${localFile}`); + versionMeta = JSON.parse(await readFile(localFile)); + } + else { + const remoteUrl = await this.#source.urlFor(versionMetaPath); + + console.log(`Fetching ${remoteUrl}`); + const response = await fetch(remoteUrl, {cache: "no-cache"}); + assert(response.status === 200); + + versionMeta = await response.json(); + + if (save) { + console.log(`Saving ${localFile}`); + await mkdir(path.dirname(localFile), {recursive: true}); + await writeFile(localFile, JSON.stringify(versionMeta, null, 2)); + } + } + + /* This filter must be *after* we fetch the version's own + * pathogen.json. Because versions are filtered to one-per-day + * *before* we fetch, it's possible there's an older version from + * the same day that *does* include a treeJson, and we'd miss it. + * The fix would be fetching *all* versions and only then filtering + * to one-per-day (i.e. in createResource() below). + * + * Doing so, however, seems unnecessary. The scenario seems + * unlikely and it's not entirely clear how we'd want to interpret + * such a dataset update anyway (e.g. was the earlier version on + * the same day in error?). + * + * Also note that this filters out some datasets entirely: those + * that don't have a reference tree at all. + * -trs, 27 Oct 2025 + */ + if (!versionMeta.files.treeJson) + return; + + // One "item" produced by collect() + return { + // Used by resourceIndexer/main.js + source: this.#source.name, + resourceType: "dataset", + resourcePath: datasetPath, + + // Used in createResource() below + version: { + date: indexVersion._date, + fileUrls: { + main: await this.#source.urlFor(`${indexDataset.path}/${indexVersion.tag}/${versionMeta.files.treeJson}`) + } + }, + }; + }) + )).flat(); + }) + )).flat(); + } + + categorise(item) { + return item; + } + + createResource(resourceType, resourcePath, items) { + return { + versions: items.map(i => i.version), + }; + } +} diff --git a/src/app.js b/src/app.js index d704fd46d..9869ac070 100644 --- a/src/app.js +++ b/src/app.js @@ -14,6 +14,7 @@ const { errors, fetch, groups, + nextclade, openid, pathogenRepos, schemas, @@ -68,7 +69,6 @@ charon.setup(app); * /monkeypox * /mpox * /ncov - * /nextclade * /rsv * /rubella * /seasonal-flu @@ -92,6 +92,15 @@ core.setup(app); staging.setup(app); +/* Nextclade reference datasets + * + * Routes: + * /nextclade + * /nextclade/* + */ +nextclade.setup(app); + + /* Community on GitHub * * Routes: diff --git a/src/async.js b/src/async.js index 9862eaee5..13272be41 100644 --- a/src/async.js +++ b/src/async.js @@ -228,13 +228,25 @@ function addAsync(app) { return addAsync(this.route.apply(this, arguments)); }; - app.useAsync = function() { - const fn = arguments[arguments.length - 1]; - assert.ok(typeof fn === 'function', - 'Last argument to `useAsync()` must be a function'); - const args = wrapArgs(arguments); - return app.use.apply(app, args); - }; + if (app.use) { + app.useAsync = function() { + const fn = arguments[arguments.length - 1]; + assert.ok(typeof fn === 'function', + 'Last argument to `useAsync()` must be a function'); + const args = wrapArgs(arguments); + return app.use.apply(app, args); + }; + } + + if (app.all) { + app.allAsync = function() { + const fn = arguments[arguments.length - 1]; + assert.ok(typeof fn === 'function', + 'Last argument to `allAsync()` must be a function'); + const args = wrapArgs(arguments); + return app.all.apply(app, args); + }; + } app.deleteAsync = function() { const fn = arguments[arguments.length - 1]; diff --git a/src/endpoints/charon/index.js b/src/endpoints/charon/index.js index d3fb38307..23f10fba0 100644 --- a/src/endpoints/charon/index.js +++ b/src/endpoints/charon/index.js @@ -1,5 +1,5 @@ import { BadRequest, isHttpError } from '../../httpErrors.js'; -import { splitPrefixIntoParts } from '../../utils/prefix.js'; +import { splitPrefixIntoParts, joinPartsIntoPrefix } from '../../utils/prefix.js'; import { setSource, setDataset, canonicalizeDataset, setNarrative } from '../sources.js'; import './setAvailableDatasets.js'; // sets globals export { getAvailable } from './getAvailable.js'; @@ -25,13 +25,19 @@ const setSourceFromPrefix = setSource(req => { const setDatasetFromPrefix = setDataset(req => req.context.splitPrefixIntoParts.prefixParts.join("/")); -const canonicalizeDatasetPrefix = canonicalizeDataset((req, resolvedPrefix) => { - // A absolute base is required but we won't use it, so use something bogus. - const resolvedUrl = new URL(req.originalUrl, "http://x"); - resolvedUrl.searchParams.set("prefix", resolvedPrefix); - - return resolvedUrl.pathname + resolvedUrl.search; -}); +/** + * Leave the URL path (e.g. /charon/getDataset) unchanged with only the + * "prefix" query param updated with the resolved dataset path. + */ +const canonicalizeDatasetPrefix = canonicalizeDataset(async (req, path) => ({ + query: { + ...req.query, + prefix: await joinPartsIntoPrefix({ + source: req.context.source, + prefixParts: path.split("/") + }), + } +})); const setNarrativeFromPrefix = setNarrative(req => { const {prefixParts} = req.context.splitPrefixIntoParts; diff --git a/src/endpoints/sources.js b/src/endpoints/sources.js index 6819952ac..cbd0c13af 100644 --- a/src/endpoints/sources.js +++ b/src/endpoints/sources.js @@ -1,4 +1,5 @@ import contentDisposition from 'content-disposition'; +import url from 'url'; import { NotFound } from '../httpErrors.js'; @@ -46,25 +47,36 @@ const setDataset = (pathExtractor) => (req, res, next) => { /** * Generate Express middleware that redirects to the canonical path for the * current {@link Dataset} if it is not fully resolved. Any provided version - * descriptor is included in the redirect. + * descriptor is included in the redirect. Original query params are preserved + * across the redirect unless overridden by canonicalBuilder. * - * @param {pathBuilder} pathBuilder - Function to build a fully-specified path - * @returns {expressMiddleware} + * @param {canonicalBuilder|canonicalBuilderAsync} canonicalBuilder - Function to build a fully-specified path or URL object suitable for {@link url#format} + * @returns {expressMiddlewareAsync} */ -const canonicalizeDataset = (pathBuilder) => (req, res, next) => { +const canonicalizeDataset = (canonicalBuilder) => async (req, res, next) => { const dataset = req.context.dataset; - const resolvedDataset = dataset.resolve(); + const resolvedDataset = await dataset.resolve(); if (dataset === resolvedDataset) return next(); const version = dataset.versionDescriptor ? `@${dataset.versionDescriptor}` : ''; - const canonicalPath = pathBuilder(req, resolvedDataset.pathParts.join("/") + version); + let canonical = canonicalBuilder.length >= 2 + ? await canonicalBuilder(req, resolvedDataset.pathParts.join("/") + version) + : await canonicalBuilder(resolvedDataset.pathParts.join("/") + version); + + // Convert plain path string to an object for url.format() + if (typeof canonical === "string") + canonical = {pathname: canonical} + + // Default to current path and current query + canonical.pathname ??= req.baseUrl + req.path; // baseUrl is really "basePath" + canonical.query ??= req.query; /* 307 Temporary Redirect preserves request method, unlike 302 Found, which * is important since this middleware function may be used in non-GET routes. */ - return res.redirect(307, canonicalPath); + return res.redirect(307, url.format(canonical)); }; @@ -358,13 +370,43 @@ function receiveSubresource(subresourceExtractor) { * @returns {String} Path for {@link Source#dataset} or {@link Source#narrative} */ -/** - * @callback pathBuilder - * @param {express.request} req - * @param {String} path - Canonical path (not including query) for the dataset - * within the context of the current {@link Source} - * @returns {String} Fully-specified path (including query) to redirect to +/* Confused about the duplication below? It's the documented way to handle + * overloaded (e.g. arity-dependent) function signatures.¹ Note that it relies + * on the "nestled" or "cuddled" end and start comment markers. + * -trs, 16 June 2022 + * + * ¹ https://github.com/jsdoc/jsdoc/issues/1017 */ +/** + * @callback canonicalBuilder + * + * @param {String} path - Canonical path for the dataset within the context of + * the current {@link Source} + * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} + *//** +* @callback canonicalBuilder +* +* @param {express.request} req +* @param {String} path - Canonical path for the dataset within the context of +* the current {@link Source} +* @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} +*/ +/** + * @callback canonicalBuilderAsync + * + * @async + * @param {String} path - Canonical path for the dataset within the context of + * the current {@link Source} + * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} + *//** +* @callback canonicalBuilderAsync +* +* @async +* @param {express.request} req +* @param {String} path - Canonical path for the dataset within the context of +* the current {@link Source} + * @returns {String|Object} Fully-specified path to redirect to or object suitable for {@link url#format} +*/ /** * @callback subresourceExtractor diff --git a/src/resourceIndex.js b/src/resourceIndex.js index fb01b65b8..71ee931d8 100644 --- a/src/resourceIndex.js +++ b/src/resourceIndex.js @@ -191,6 +191,8 @@ class ListResources { return "" case "staging": return "staging/" + case "nextclade": + return "nextclade/" default: throw new InternalServerError(`Source "${name}" does not have a corresponding prefix`) } @@ -216,6 +218,13 @@ class ListResources { if (this.groupHistory) { key = "pathVersions"; valuePairs = _resources + /* XXX TODO: This comparison relies on use of the default + * groupNameBuilder.¹ Currently the only sources that index + * intermediates use the default. + * -trs, 3 Nov 2025 + * + * ¹in static-site/components/list-resources/listResourcesApi.ts + */ .filter(([name,]) => name.split('/')[0]===this.groupHistory) .map(_formatIntermediates); } else { diff --git a/src/routing/core.js b/src/routing/core.js index 44763ccdf..37b2ac0ff 100644 --- a/src/routing/core.js +++ b/src/routing/core.js @@ -1,5 +1,3 @@ -import url from 'url'; - import * as endpoints from '../endpoints/index.js'; import * as sources from '../sources/index.js'; @@ -42,7 +40,6 @@ const coreBuildPaths = [ "/monkeypox", // Not actively updated, but YYYY-MM-DD URLs remain & don't redirect "/mpox", // monkeypox URLs will redirect to /mpox (except for datestamped URLs) "/ncov", - "/nextclade", "/nipah", "/norovirus", "/oropouche", @@ -69,10 +66,7 @@ export function setup(app) { app.use([coreBuildRoutes, "/narratives/*"], setSource(req => new CoreSource())); // eslint-disable-line no-unused-vars app.routeAsync(coreBuildRoutes) - .all( - setDataset(req => req.path), - canonicalizeDataset((req, path) => url.format({pathname: `/${path}`, query: req.query})) - ) + .allAsync(setDataset(req => req.path), canonicalizeDataset(path => `/${path}`)) .getAsync(getDataset) .putAsync(putDataset) .deleteAsync(deleteDataset) diff --git a/src/routing/index.js b/src/routing/index.js index ae743a0ff..899ff693a 100644 --- a/src/routing/index.js +++ b/src/routing/index.js @@ -8,6 +8,7 @@ export * as errors from "./errors.js"; export * as fetch from './fetch.js'; export * as groups from './groups.js'; export * as listResources from './listResources.js'; +export * as nextclade from './nextclade.js'; export * as openid from './openid.js'; export * as pathogenRepos from './pathogenRepos.js'; export * as schemas from './schemas.js'; diff --git a/src/routing/nextclade.js b/src/routing/nextclade.js new file mode 100644 index 000000000..f8c6f3b02 --- /dev/null +++ b/src/routing/nextclade.js @@ -0,0 +1,29 @@ +import * as endpoints from '../endpoints/index.js'; +import * as sources from '../sources/index.js'; + +const { + setSource, + setDataset, + canonicalizeDataset, + getDataset, + optionsDataset, +} = endpoints.sources; + +const { + NextcladeSource, +} = sources; + + +export function setup(app) { + app.use("/nextclade", setSource(req => new NextcladeSource())); // eslint-disable-line no-unused-vars + + app.routeAsync("/nextclade") + .getAsync(endpoints.nextJsApp.handleRequest) + ; + + app.routeAsync("/nextclade/*") + .allAsync(setDataset(req => req.params[0]), canonicalizeDataset(path => `/nextclade/${path}`)) + .getAsync(getDataset) + .optionsAsync(optionsDataset) + ; +} diff --git a/src/routing/staging.js b/src/routing/staging.js index 43ad444d9..499ed1a72 100644 --- a/src/routing/staging.js +++ b/src/routing/staging.js @@ -1,5 +1,3 @@ -import url from 'url'; - import * as endpoints from '../endpoints/index.js'; import * as sources from '../sources/index.js'; @@ -42,10 +40,7 @@ export function setup(app) { ; app.routeAsync("/staging/*") - .all( - setDataset(req => req.params[0]), - canonicalizeDataset((req, path) => url.format({pathname: `/staging/${path}`, query: req.query})) - ) + .allAsync(setDataset(req => req.params[0]), canonicalizeDataset(path => `/staging/${path}`)) .getAsync(getDataset) .putAsync(putDataset) .deleteAsync(deleteDataset) diff --git a/src/sources/core.js b/src/sources/core.js index db8882e04..8ac9517fd 100644 --- a/src/sources/core.js +++ b/src/sources/core.js @@ -95,7 +95,7 @@ class CoreStagingSource extends CoreSource { class CoreDataset extends Dataset { /* NOTE: This class is also used for staging datasets */ - resolve() { + async resolve() { /* XXX TODO: Reimplement this in terms of methods on the source, not by * breaking encapsulation by using a process-wide global. * -trs, 26 Oct 2021 (based on a similar comment 5 Sept 2019) @@ -123,7 +123,7 @@ class CoreDataset extends Dataset { if (nextDefaultPart) { const dataset = new this.constructor(this.source, [...prefixParts, nextDefaultPart], this.versionDescriptor); - return dataset.resolve(); + return await dataset.resolve(); } return this; @@ -138,18 +138,17 @@ class CoreDataset extends Dataset { * * We only want to do this for core datasets, not staging. * - * @param {(string|false)} versionDescriptor from the URL * @throws {BadRequest || NotFound} * @returns {([string, Object]|[null, undefined])} [0]: versionDate [1]: * versionUrls */ - versionInfo(versionDescriptor) { + versionInfo() { if (this.source.name!=='core') { - return super.versionInfo(versionDescriptor); + return super.versionInfo(); } - if (!versionDescriptor) { + if (!this.versionDescriptor) { return [null, undefined]; } diff --git a/src/sources/fetch.js b/src/sources/fetch.js index f948f3276..0389a49ce 100644 --- a/src/sources/fetch.js +++ b/src/sources/fetch.js @@ -46,6 +46,10 @@ class UrlDefinedSource extends Source { } class UrlDefinedDataset extends Dataset { + static get Subresource() { + return UrlDefinedDatasetSubresource; + } + // eslint-disable-next-line no-unused-vars assertValidPathParts(pathParts) { // Override check for underscores (_), as we want to allow arbitrary @@ -63,9 +67,6 @@ class UrlDefinedDataset extends Dataset { const version = this.versionDescriptor ? `@${this.versionDescriptor}` : "" return this.baseParts.join("/") + version; } - subresource(type) { - return new UrlDefinedDatasetSubresource(this, type); - } async exists() { /* Assume existence. There's little benefit to checking with extra * requests when we don't have a natural fallback page (e.g. the Group page @@ -79,7 +80,7 @@ class UrlDefinedDataset extends Dataset { } class UrlDefinedDatasetSubresource extends DatasetSubresource { - get baseName() { + async baseName() { const type = this.type; const baseName = this.resource.baseName; @@ -94,6 +95,10 @@ class UrlDefinedDatasetSubresource extends DatasetSubresource { } class UrlDefinedNarrative extends Narrative { + static get Subresource() { + return UrlDefinedNarrativeSubresource; + } + // eslint-disable-next-line no-unused-vars assertValidPathParts(pathParts) { // Override check for underscores (_), as we want to allow arbitrary @@ -108,9 +113,6 @@ class UrlDefinedNarrative extends Narrative { const version = this.versionDescriptor ? `@${this.versionDescriptor}` : "" return this.baseParts.join("/") + version; } - subresource(type) { - return new UrlDefinedNarrativeSubresource(this, type); - } async exists() { /* Assume existence. There's little benefit to checking with extra * requests when we don't have a natural fallback page (e.g. the Group page @@ -124,7 +126,7 @@ class UrlDefinedNarrative extends Narrative { } class UrlDefinedNarrativeSubresource extends NarrativeSubresource { - get baseName() { + async baseName() { return this.resource.baseName; } } diff --git a/src/sources/index.js b/src/sources/index.js index 979f6dec6..e6c14b04f 100644 --- a/src/sources/index.js +++ b/src/sources/index.js @@ -5,3 +5,4 @@ export { CoreSource, CoreStagingSource } from './core.js'; export { CommunitySource } from './community.js'; export { UrlDefinedSource } from './fetch.js'; export { GroupSource } from './groups.js'; +export { NextcladeSource } from './nextclade.js'; diff --git a/src/sources/models.js b/src/sources/models.js index ed6f8d236..e866c3337 100644 --- a/src/sources/models.js +++ b/src/sources/models.js @@ -180,18 +180,17 @@ class Resource { get baseName() { return this.baseParts.join("_"); } - versionInfo(versionDescriptor) { - /** - * Interrogates the resource index to find the appropriate version of the - * resource and associated subresource URLs by comparing to - * this.versionDescriptor. - * This method should be overridden by subclasses when they are used to - * handle URLs which extract version descriptors. - * @param {(string|false)} versionDescriptor from the URL string - * @throws {BadRequest} - * @returns {([string, Object]|[null, undefined])} [0]: versionDate [1]: versionUrls - */ - if (versionDescriptor) { + /** + * Interrogates the resource index to find the appropriate version of the + * resource and associated subresource URLs by comparing to + * this.versionDescriptor. + * This method should be overridden by subclasses when they are used to + * handle URLs which extract version descriptors. + * @throws {BadRequest} + * @returns {([string, Object]|[null, undefined])} [0]: versionDate [1]: versionUrls + */ + versionInfo() { + if (this.versionDescriptor) { throw new BadRequest(`This resource cannot handle versioned dataset requests (version descriptor requested: "${this.versionDescriptor}")`) } return [null, undefined]; @@ -241,7 +240,7 @@ class Subresource { * this as needed. */ - const versionUrls = this.resource.versionInfo(this.resource.versionDescriptor)[1]; + const versionUrls = this.resource.versionInfo()[1]; if (versionUrls) { if (!['HEAD', 'GET'].includes(method)) { @@ -253,9 +252,9 @@ class Subresource { throw new NotFound(`This version of the resource does not have a subresource for ${this.type}`); } - return await this.resource.source.urlFor(this.baseName, method, headers); + return await this.resource.source.urlFor(await this.baseName(), method, headers); } - get baseName() { + async baseName() { throw new Error("baseName() must be implemented by Subresource subclasses"); } get mediaType() { @@ -307,7 +306,7 @@ class Dataset extends Resource { * * @returns {Dataset} */ - resolve() { + async resolve() { return this; } @@ -338,7 +337,7 @@ class DatasetSubresource extends Subresource { "application/octet-stream; q=0.01", ].join(", "); - get baseName() { + async baseName() { return this.type === "main" ? `${this.resource.baseName}.json` : `${this.resource.baseName}_${this.type}.json`; @@ -393,7 +392,7 @@ class NarrativeSubresource extends Subresource { "text/*; q=0.1", ].join(", "); - get baseName() { + async baseName() { return `${this.resource.baseName}.md`; } diff --git a/src/sources/nextclade.js b/src/sources/nextclade.js new file mode 100644 index 000000000..b0bbf0fdb --- /dev/null +++ b/src/sources/nextclade.js @@ -0,0 +1,264 @@ +import * as authz from '../authz/index.js'; +import { fetch } from '../fetch.js'; +import { NotFound } from '../httpErrors.js'; +import { ResourceVersions } from '../resourceIndex.js'; +import { re } from '../templateLiterals.js'; +import { Source, Dataset, DatasetSubresource } from './models.js'; + +// We privilege our own collection since this is our site +const NEXTSTRAIN_COLLECTION_ID = "nextstrain"; +const NEXTSTRAIN_COLLECTION_PREFIX = re`^${NEXTSTRAIN_COLLECTION_ID}/`; + +/* We hardcode what collections to expose so that adding a new collection + * upstream in Nextclade's index doesn't automatically and unexpectedly expose + * it here too. The idea is that we should opt-in knowingly by adding the new + * collection here and testing things work. This hardcoding also lets us + * implement the NextcladeDataset.baseName getter without an additional async + * lookup operation. + * -trs, 3 Nov 2025 + */ +const COLLECTION_IDS = new Set([NEXTSTRAIN_COLLECTION_ID, "community"]); + + +/** + * Nextclade dataset reference trees. + * + * Decisions we made about behaviour: + * + * • Drop the leading nextstrain/ from dataset names, but accept it as an + * alias by redirecting, e.g. + * + * https://nextstrain.org/nextclade/nextstrain/mpox/clade-iib + * → https://nextstrain.org/nextclade/mpox/clade-iib + * + * • Accept the index's shortcut names, but expand them by redirection to the + * canonical name, e.g. + * + * https://nextstrain.org/nextclade/hMPXV + * → https://nextstrain.org/nextclade/mpox/clade-iib + * + * Some shortcuts have "_" in their name (e.g. flu_h1n1pdm_na); accept + * those both as-is and with s{_}{/}g applied (e.g. flu/h1n1pdm/na). + * + * • Prefer full names (minus leading nextstrain/) as the canonical name + * + * See also . + * + * The convention in this file is that underscored names are used for + * extensions to the sources models and private fields are used for internal + * data. This helps keep clear what's part of the sources interface and what's + * not. + */ +export class NextcladeSource extends Source { + #index; + #indexDatasets; + + /* This constructor param is only used by the resourceIndexer/ code when + * loading the Nextclade index from disk instead of the network. + */ + constructor(index) { + super(); + if (index) + this.#index = index; + } + + get name() { return "nextclade"; } + async baseUrl() { return "https://data.clades.nextstrain.org/v3/"; } + + async _index() { + /* Source instances are constructed for each request, so this + * instance-local cache results in one index fetch per request. The + * fetch()-level HTTP caching results in conditional fetches to the + * upstream that mostly return as 304 Not Modified. This seems Fine, at + * least For Now. + * -trs, 16 Oct 2025 + */ + return this.#index ??= await (await fetch(await this.urlFor("index.json"), {cache: "no-cache"})).json(); + } + + dataset(pathParts, versionDescriptor) { + return new NextcladeDataset(this, pathParts, versionDescriptor); + } + + async availableDatasets() { + return (await this._indexDatasets()) + .map(({path}) => path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "")); + } + + async _datasetAliases() { + return new Map( + (await this._indexDatasets()) + .flatMap(({path, shortcuts}) => [ + /* Canonicalize nextstrain/a/b/c → a/b/c since we're on nextstrain.org + * after all. + */ + [path, path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "")], + + /* Include index-defined shortcuts under the permutations of a) + * removing the leading "nextstrain/" and b) replacing underscores (_) + * with slashes (/). Spell out all permutations so an + * iterative/recursive alias resolution is not necessary. + */ + ...((shortcuts ?? []).flatMap(shortcut => [ + [ + shortcut + .replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + .replace(/_/g, "/"), + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + [ + shortcut + .replace(NEXTSTRAIN_COLLECTION_PREFIX, ""), + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + [ + shortcut + .replace(/_/g, "/"), + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + [ + shortcut, + path.replace(NEXTSTRAIN_COLLECTION_PREFIX, "") + ], + ])), + ]) + .filter(([alias, path]) => alias !== path) + ); + } + + async _indexDatasets() { + return this.#indexDatasets ??= + (await this._index()) + .collections + .filter(c => COLLECTION_IDS.has(c.meta.id)) + .flatMap(c => c.datasets) + .filter(d => d.files.treeJson); + } + + async getInfo() { + return { + title: "Nextclade reference dataset trees", + showDatasets: true, + showNarratives: false, + }; + } + + get authzPolicy() { + return [ + {tag: authz.tags.Visibility.Public, role: "*", allow: [authz.actions.Read]}, + ]; + } + get authzTags() { + return new Set([ + authz.tags.Type.Source, + authz.tags.Visibility.Public, + ]); + } + get authzTagsToPropagate() { + return new Set([ + authz.tags.Visibility.Public, + ]); + } +} + +class NextcladeDataset extends Dataset { + static get Subresource() { + return NextcladeDatasetSubresource; + } + + // eslint-disable-next-line no-unused-vars + assertValidPathParts(pathParts) { + // Override check for underscores (_), as we want to allow Nextclade + // dataset paths that include them. There is no risk of "confused deputy" + // problems as this source 1) only allows fixed datasets from an index and + // 2) uses slashes (/) not underscores (_) when joining path parts. + } + get baseParts() { + return this.pathParts.slice(); + } + get baseName() { + const explicitCollections = new Set( + Array.from(COLLECTION_IDS) + .filter(id => id !== NEXTSTRAIN_COLLECTION_ID) + ); + + return explicitCollections.has(this.baseParts[0]) + ? this.baseParts.join("/") + : `${NEXTSTRAIN_COLLECTION_ID}/${this.baseParts.join("/")}`; + } + + async resolve() { + /* Resolve using a complete and static map of aliases for all paths we + * support. This avoids the need for recursive resolving like other + * sources which dynamically determine supported aliases. + */ + const aliases = await this.source._datasetAliases(); + + const resolvedPath = aliases.get(this.pathParts.join("/")); + if (resolvedPath) + return new this.constructor(this.source, resolvedPath.split("/"), this.versionDescriptor); + + return this; + } + + versionInfo() { + /* Copied wholesale from src/sources/core.js. This is part of the tension + * between the Source/Resource/Subresource framework and the + * resourceIndexer/ResourceVersions/ListResources framework. + * -trs, 23 Oct 2025 + */ + if (!this.versionDescriptor) { + return [null, undefined]; + } + + const versions = new ResourceVersions(this.source.name, 'dataset', this.pathParts.join("/")); + const versionDate = versions.versionDateFromDescriptor(this.versionDescriptor); + const versionUrls = versionDate ? versions.subresourceUrls(versionDate) : undefined + return [versionDate, versionUrls]; + } + + async _indexDataset() { + /* XXX TODO: Consider making this resolve this.versionDescriptor to a + * specific version and returning the fetched pathogen.json for that + * version. + * + * This would be most appropriate in the case of abandoning the bolted-on + * versionInfo() method for version resolution that's more integrated into + * the Source/Resource/Subresource framework instead of done completely + * separately. + * -trs, 3 Nov 2025 + */ + return (await this.source._indexDatasets()) + .find(d => d.path === this.baseName); + } +} + + +class NextcladeDatasetSubresource extends DatasetSubresource { + constructor(resource, type) { + super(resource, type); + + if (this.type !== "main") + throw new NotFound(`Nextclade datasets do not provide a '${this.type}' sidecar`); + } + + async baseName() { + /* Note that this method ignores this.resource.versionDescriptor because + * it's not expected to be called if that property has a value; it expects + * that its caller, the Subresource.url() method, will instead go thru the + * this.resource.versionInfo() code path in that case. See also the + * comment in ._indexDataset() above. + * -trs, 3 Nov 2025 + */ + const indexed = await this.resource._indexDataset(); + + if (!indexed) + throw new NotFound(`Dataset '${this.resource.baseName}' is not in Nextclade's index (or does not have a tree)`); + + /* The version tag here is the "latest" version of the dataset. See also + * . + * -trs, 3 Nov 2025 + */ + return `${indexed.path}/${indexed.version.tag}/${indexed.files.treeJson}`; + } +} diff --git a/src/templateLiterals.js b/src/templateLiterals.js index d206022c1..c491684f2 100644 --- a/src/templateLiterals.js +++ b/src/templateLiterals.js @@ -1,3 +1,6 @@ +import escapeStringRegexp from 'escape-string-regexp'; + + /** * Safe-by-construction URI strings via [template literals]{@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#tagged_templates}. * @@ -21,3 +24,21 @@ export function uri(literalParts, ...exprParts) { literalParts[0] ); } + + +/** + * Safe-by-construction RegExps from strings via [template literals]{@link https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Template_literals#tagged_templates}. + * + * Interpolations in the template literal are automatically escaped so no regex + * metachars are interpretted. + * + * @returns RegExp + */ +export function re(literalParts, ...exprParts) { + /* See comment above in uri() about the data structures involved. + */ + return literalParts.slice(1).reduce( + (re, literalPart, idx) => new RegExp(`${re.source}${escapeStringRegexp(exprParts[idx])}${literalPart}`), + new RegExp(literalParts[0]) + ); +} diff --git a/src/utils/prefix.js b/src/utils/prefix.js index 00af05057..c8120e52d 100644 --- a/src/utils/prefix.js +++ b/src/utils/prefix.js @@ -9,6 +9,7 @@ import * as sources from '../sources/index.js'; const sourceNameToClass = new Map([ ["core", sources.CoreSource], ["staging", sources.CoreStagingSource], + ["nextclade", sources.NextcladeSource], ["community", sources.CommunitySource], ["fetch", sources.UrlDefinedSource], ["groups", sources.GroupSource], @@ -44,6 +45,7 @@ const splitPrefixIntoParts = (prefix) => { switch (prefixParts[0]) { case "community": case "staging": + case "nextclade": case "fetch": sourceName = prefixParts.shift(); break; @@ -119,6 +121,7 @@ const joinPartsIntoPrefix = async ({source, prefixParts, isNarrative = false}) = switch (sourceName) { case "community": case "staging": + case "nextclade": case "fetch": leadingParts.push(sourceName); break; diff --git a/static-site/app/groups/[group]/page.tsx b/static-site/app/groups/[group]/page.tsx index 07d22f7ff..11d1cf8a0 100644 --- a/static-site/app/groups/[group]/page.tsx +++ b/static-site/app/groups/[group]/page.tsx @@ -123,6 +123,7 @@ export default function IndividualGroupPage({ return [{ groupName: group, groupDisplayName: group, + sortingGroupName: group, resources, nResources: resources.length, nVersions: undefined, diff --git a/static-site/app/groups/available.tsx b/static-site/app/groups/available.tsx index 2a3eba459..694c0077e 100644 --- a/static-site/app/groups/available.tsx +++ b/static-site/app/groups/available.tsx @@ -99,6 +99,7 @@ export default function Available(): React.ReactElement { return { groupName, groupDisplayName: groupName, + sortingGroupName: groupName, groupUrl: `/groups/${groupName}`, groupUrlTooltip: `Click to view the page for ${groupName}`, resources: filteredResources, diff --git a/static-site/app/nextclade/[[...nextclade]]/content.tsx b/static-site/app/nextclade/[[...nextclade]]/content.tsx new file mode 100644 index 000000000..6330d4ec0 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/content.tsx @@ -0,0 +1,55 @@ +import React from "react"; + +import { TitledMetadata } from "../../types"; +import FlexCenter from "../../../components/flex-center"; +import { FocusParagraphCentered } from "../../../components/focus-paragraph"; +import NextcladeResourceListing from "./resources"; +import { SmallSpacer, HugeSpacer } from "../../../components/spacers"; + +/** + * A React Server component that generates the contents of the + * /nextclade page. + * + * This is abstracted out into a distinct component so that it can + * also be used in the "./not-found.tsx" component, to render the + * /nextclade page content beneath an error banner, when a bad URL is + * requested. + */ +export default function NextcladePageContent({ + metadata, +}: { + /** + * A Metadata object, that is assumed to have a `title` key with a + * string value + */ + metadata: TitledMetadata; +}): React.ReactElement { + const title = metadata.title; + + return ( + <> + + + +

{title}

+ + + + + + Part of{" "} + Nextclade datasets produced by the{" "} + core Nextstrain team and broader Nextclade community. + + + + + + + + ); +} diff --git a/static-site/app/nextclade/[[...nextclade]]/error-banner.tsx b/static-site/app/nextclade/[[...nextclade]]/error-banner.tsx new file mode 100644 index 000000000..38fd6a121 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/error-banner.tsx @@ -0,0 +1,43 @@ +"use client"; + +import React from "react"; +import { useParams } from "next/navigation"; + +import ErrorMessage from "../../../components/error-message"; + +/** + * A React Client component that detects if the requested URL + * contains path elements past `/nextclade`, and, if so, returns a + * component that displays an error message banner. If additional + * path elements are not detected, returns null. + * + * N.b., the way this component is used, we only render it when we've + * already determined that there _is_ a need to display an error + * message. In other words, it is fully expected that the `else` + * branch of the conditional will never actually execute. + */ +export default function ErrorBanner(): React.ReactElement | null { + const params = useParams(); + + if (params && params["nextclade"]) { + // n.b., I don't think `params["nextclade"]` is ever going to be + // anything other than a list, but let's make the type checker + // happy… + const path = + typeof params["nextclade"] === "string" + ? params["nextclade"] + : params["nextclade"].join("/"); + + const resourceType = path.startsWith("narratives") + ? "narrative" + : "dataset"; + + const title = `The Nextclade ${resourceType} "nextstrain.org/nextclade/${path}" doesn't exist.`; + const contents =

Here is the Nextclade reference trees page instead.

; + + return ; + } else { + // this will never happen + return null; + } +} diff --git a/static-site/app/nextclade/[[...nextclade]]/not-found.tsx b/static-site/app/nextclade/[[...nextclade]]/not-found.tsx new file mode 100644 index 000000000..e57bc4904 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/not-found.tsx @@ -0,0 +1,20 @@ +import React from "react"; + +import { ErrorBanner } from "../../../components/error-banner"; + +import NextcladePageContent from "./content"; +import { metadata } from "./page"; + +/** + * A React Server component that renders the usual `/nextclade` page + * content, with an error banner up-top explaining that the requested + * dataset doesn't actually exist. + */ +export default function FourOhFour(): React.ReactElement { + return ( + <> + + + + ); +} diff --git a/static-site/app/nextclade/[[...nextclade]]/page.tsx b/static-site/app/nextclade/[[...nextclade]]/page.tsx new file mode 100644 index 000000000..700bd80b6 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/page.tsx @@ -0,0 +1,69 @@ +import React from "react"; + +import type { TitledMetadata } from "../../types"; +import { ValidateUrl } from "../../../components/error-banner"; + +import NextcladePageContent from "./content"; + +const title = "Nextclade reference trees"; + +export const metadata: TitledMetadata = { + title, +}; + +/** + * A React Server Component for `/nextclade` + * + * A note about how this page works: + * + * We expect three different types of requests for resources under + * `/nextclade`: + * + * 1) Requests for real, existing datasets (e.g., `/nextclade/measles/genome/WHO-2012`) — + * these requests are handled by the Express-level router, and this + * Next.js page never sees them + * + * 2) Requests for the plain `/nextclade` page — that request is handled + * by this page, and we expect it to return a resource listing of + * Nextclade reference trees, with an HTTP status code of 200 + * + * 3) Requests for some longer URL that does NOT correspond to a real, + * existing dataset (e.g., `/nextclade/foo`) — in this case, we want + * to display the same resource listing as the base `/nextclade` + * page, but to also include an error banner indicating that the + * requested resource (`nextstrain.org/nextclade/foo` in our example) + * does not exist. We also want the HTTP status code for the + * response to this request to be a 404 + * + * We accomplish this as follows: + * + * Requests of type #1 are handled completely at the Express level, + * and this page never sees them. + * + * Requests of type #2 and type #3 _are_ handled by this page. It uses + * the `` component to detect whether the + * requested URL was the plain `/nextclade` or whether there are + * additional path components beyond that (again, `/nextclade/foo` in + * our example). If there _are_ additional path elements, + * `` detects that and calls Next.js's + * `notFound()` method, which results in the `./not-found.tsx` page + * being rendered and returned. If there are not additional path + * elements (i.e., if the request was for `/nextclade`), + * `` returns nothing, and the + * `` component delivers the desired resource + * listing. + * + * If the `./not-found.tsx` page is rendered, it handles the display + * of the error banner; it also uses the `` + * component to render the same resource listing as the default case. + * However, because it has been invoked via the Next.js `notFound()` + * method, it will return a 404 status code. + */ +export default function NextcladePage(): React.ReactElement { + return ( + <> + + + + ); +} diff --git a/static-site/app/nextclade/[[...nextclade]]/resources.tsx b/static-site/app/nextclade/[[...nextclade]]/resources.tsx new file mode 100644 index 000000000..78ce8fa44 --- /dev/null +++ b/static-site/app/nextclade/[[...nextclade]]/resources.tsx @@ -0,0 +1,51 @@ +"use client"; +// Note: this is only in a separate file as it needs to be run client side +// and we want to run as much of the parent page server-side as possible. + +import React from "react"; +import ListResources from "../../../components/list-resources"; +import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; +import { Group } from "../../../components/list-resources/types"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; + +export default function NextcladeResourceListing(): React.ReactElement { + return ( + + ) +} + +async function nextcladeDatasetResourceGroups() { + return await listResourcesAPI('nextclade', 'dataset', { + versioned: true, + + /* For dataset "community/a/b/c", use "community/a" as the grouping instead + * of just "community". + */ + groupNameBuilder: (name: string): string => { + return name.startsWith("community/") + ? name.split("/").slice(1, 3).join("/") + : name.split("/")[0]!; // eslint-disable-line @typescript-eslint/no-non-null-assertion + }, + + // Sort "community/…" datasets after ours + groupSortableName: (group: Group): string => { + const name = group.groupName; + return name.startsWith("community/") ? `001 ${name}` : + `000 ${name}` ; + }, + + // Add Nextstrain logo for core datasets + groupImg: (group: Group) => { + const isOfficialDataset = group.resources.some((r) => !r.name.startsWith('community/')); + return isOfficialDataset + ? { src: nextstrainLogoSmall.src, alt: "nextstrain logo" } + : undefined; + }, + }); +} diff --git a/static-site/app/pathogens/[[...pathogens]]/resources.tsx b/static-site/app/pathogens/[[...pathogens]]/resources.tsx index d8cf47e2f..f6bb41c36 100644 --- a/static-site/app/pathogens/[[...pathogens]]/resources.tsx +++ b/static-site/app/pathogens/[[...pathogens]]/resources.tsx @@ -6,6 +6,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; import { coreResources } from "../../../content/resource-listing"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; /** @@ -31,6 +32,7 @@ async function _coreDatasetResourceGroups() { groupDisplayNames: coreResources["coreGroupDisplayNames"], groupUrl: (groupName: string) => `/${groupName}`, groupUrlTooltip: (groupName: string) => `Click to load the default (and most recent) analysis for ${coreResources["coreGroupDisplayNames"][groupName] || groupName}`, + groupImg: () => ({ src: nextstrainLogoSmall.src, alt: "nextstrain logo" }), }; return await listResourcesAPI('core', 'dataset', opts); } \ No newline at end of file diff --git a/static-site/app/pathogens/files/resources.tsx b/static-site/app/pathogens/files/resources.tsx index 6b3e32666..ad55405b0 100644 --- a/static-site/app/pathogens/files/resources.tsx +++ b/static-site/app/pathogens/files/resources.tsx @@ -6,6 +6,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; import { coreResources } from "../../../content/resource-listing"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; /** * A React Client Component which wraps the ListResources component to list @@ -27,6 +28,7 @@ async function _coreDatasetResourceGroups() { const opts = { versioned: false, groupDisplayNames: coreResources["coreGroupDisplayNames"], + groupImg: () => ({ src: nextstrainLogoSmall.src, alt: "nextstrain logo" }), }; return await listResourcesAPI('core', 'intermediate', opts); } \ No newline at end of file diff --git a/static-site/app/staging/[[...staging]]/resources.tsx b/static-site/app/staging/[[...staging]]/resources.tsx index 9ec536334..11c5de9c5 100644 --- a/static-site/app/staging/[[...staging]]/resources.tsx +++ b/static-site/app/staging/[[...staging]]/resources.tsx @@ -5,6 +5,7 @@ import React from "react"; import ListResources from "../../../components/list-resources"; import { listResourcesAPI } from "../../../components/list-resources/listResourcesApi"; +import nextstrainLogoSmall from "../../../static/logos/nextstrain-logo-small.png"; export default function StagingPathogenResourceListing(): React.ReactElement { return ( @@ -19,6 +20,9 @@ export default function StagingPathogenResourceListing(): React.ReactElement { } async function stagingDatasetResourceGroups() { - const opts = {versioned: false}; + const opts = { + versioned: false, + groupImg: () => ({ src: nextstrainLogoSmall.src, alt: "nextstrain logo" }), + }; return await listResourcesAPI('staging', 'dataset', opts); } \ No newline at end of file diff --git a/static-site/components/list-resources/listResourcesApi.tsx b/static-site/components/list-resources/listResourcesApi.tsx index 18f354cbd..a6231f59b 100644 --- a/static-site/components/list-resources/listResourcesApi.tsx +++ b/static-site/components/list-resources/listResourcesApi.tsx @@ -1,7 +1,6 @@ import { ResourceType, Resource, Group, PathVersionsForGroup, FetchGroupHistory } from "./types"; import { InternalError } from "../error-boundary"; import fetchAndParseJSON from "../../util/fetch-and-parse-json"; -import nextstrainLogoSmall from "../../static/logos/nextstrain-logo-small.png"; interface APIWrapper { [resourceType: string]: { @@ -30,14 +29,25 @@ interface ResourceListingIntermediates { export async function listResourcesAPI( sourceId: string, resourceType: ResourceType, - {versioned, groupDisplayNames, groupUrl, groupUrlTooltip}: { + { + versioned, + groupNameBuilder = (name: string) => name.split("/")[0]!, // eslint-disable-line @typescript-eslint/no-non-null-assertion + groupDisplayNames, + groupSortableName, + groupUrl, + groupUrlTooltip, + groupImg + }: { /** Report prior versions of each resource. * TODO: infer this from the API data itself */ versioned: boolean, + groupNameBuilder?: (name: string) => string, groupDisplayNames?: Record, + groupSortableName?: (group: Group) => string, groupUrl?: (groupName: string) => string, - groupUrlTooltip?: (groupName: string) => string + groupUrlTooltip?: (groupName: string) => string, + groupImg?: (group: Group) => { src: string; alt: string } | undefined } ): Promise { const requestPath = `/list-resources/${sourceId}/${resourceType}`; @@ -52,19 +62,29 @@ export async function listResourcesAPI( } const groups = Object.entries( areDatasets(data) ? - groupDatasetsByPathogen(data.pathVersions, urlBuilder, versioned) : - groupIntermediatesByPathogen(data.latestVersions) + groupDatasetsByPathogen(data.pathVersions, urlBuilder, versioned, groupNameBuilder) : + groupIntermediatesByPathogen(data.latestVersions, groupNameBuilder) ).map(([groupName, resources]) => { const group = resourceGroup(groupName, resources); if (groupDisplayNames && groupName in groupDisplayNames) { group.groupDisplayName = groupDisplayNames[groupName]; } + if (groupSortableName) { + group.sortingGroupName = groupSortableName(group); + } if (groupUrl) { group.groupUrl = groupUrl(groupName); } if (groupUrlTooltip) { group.groupUrlTooltip = groupUrlTooltip(groupName); } + if (groupImg) { + const img = groupImg(group); + if (img) { + group.groupImgSrc = img.src; + group.groupImgAlt = img.alt; + } + } if (resourceType==='intermediate' && sourceId==='core') { group.fetchHistory = fetchIntermediateGroupHistoryFactory(sourceId, groupName); } @@ -87,8 +107,7 @@ function resourceGroup(groupName: string, resources: Resource[]): Group { const groupInfo: Group = { groupName, - groupImgSrc: nextstrainLogoSmall.src, - groupImgAlt: "nextstrain logo", + sortingGroupName: groupName, resources, nResources: resources.length, nVersions, @@ -113,6 +132,9 @@ function groupDatasetsByPathogen( /** boolean controlling addition of version-specific fields */ versioned: boolean, + + /** constructs the name (e.g. pathogen) under which to group a dataset */ + groupNameBuilder: (name: string) => string, ): Record { return Object.entries(pathVersions).reduce( (store: Record, [name, dates]) => { @@ -122,7 +144,7 @@ function groupDatasetsByPathogen( throw new InternalError(`Name is not properly formatted: '${name}'`); } - const groupName = nameParts[0]; + const groupName = groupNameBuilder(name); const resourceDetails: Resource = { name, @@ -159,7 +181,10 @@ function groupDatasetsByPathogen( } function groupIntermediatesByPathogen( - latestVersions: ResourceListingIntermediates['latestVersions'] + latestVersions: ResourceListingIntermediates['latestVersions'], + + /** constructs the name (e.g. pathogen) under which to group a file */ + groupNameBuilder: (name: string) => string, ): Record { return Object.entries(latestVersions).reduce( (store: Record, [baseName, d]) => { @@ -168,7 +193,7 @@ function groupIntermediatesByPathogen( if (baseParts[0] === undefined) { throw new InternalError(`Resource is not properly formatted (empty name)`); } - const groupName = baseParts[0]; + const groupName = groupNameBuilder(baseName); for (const [filename, urlDatePair] of Object.entries(d)) { if (filename==='mostRecentlyIndexed') continue; const nameParts = [...baseParts, filename] diff --git a/static-site/components/list-resources/modal-contents-dataset-history.tsx b/static-site/components/list-resources/modal-contents-dataset-history.tsx index adccb04c3..f6ac6d7bb 100644 --- a/static-site/components/list-resources/modal-contents-dataset-history.tsx +++ b/static-site/components/list-resources/modal-contents-dataset-history.tsx @@ -42,7 +42,7 @@ export function DatasetHistory({ diff --git a/static-site/components/list-resources/modal_draw.js b/static-site/components/list-resources/modal_draw.js index 24c7502ee..1681f8595 100644 --- a/static-site/components/list-resources/modal_draw.js +++ b/static-site/components/list-resources/modal_draw.js @@ -180,7 +180,7 @@ export default function modal_draw(ref, resource, lightGrey) { }) // @ts-expect-error no-unused-vars .on("click", function (e, d) { - window.open(`/${resource.name}@${d.data.version}`, "_blank"); // TEST! + window.open(`${resource.url}@${d.data.version}`, "_blank"); // TEST! }); /** @@ -244,7 +244,7 @@ export default function modal_draw(ref, resource, lightGrey) { }) .on("click", function (e) { const { datum } = getVersion(e); - window.open(`/${resource.name}@${datum.data.version}`, "_blank"); + window.open(`${resource.url}@${datum.data.version}`, "_blank"); }); function selectSnapshot(selection, selectedDatum) { diff --git a/static-site/components/list-resources/types.ts b/static-site/components/list-resources/types.ts index 9e372f6bc..92a57c349 100644 --- a/static-site/components/list-resources/types.ts +++ b/static-site/components/list-resources/types.ts @@ -10,6 +10,7 @@ export type SortMethod = "lastUpdated" | "alphabetical"; export type Group = { groupName: string; + sortingGroupName: string; groupImgSrc?: string; groupImgAlt?: string; nResources: number; diff --git a/static-site/components/list-resources/use-sort-and-filter.ts b/static-site/components/list-resources/use-sort-and-filter.ts index 78915db76..cafa41e5f 100644 --- a/static-site/components/list-resources/use-sort-and-filter.ts +++ b/static-site/components/list-resources/use-sort-and-filter.ts @@ -85,7 +85,7 @@ export default function useSortAndFilter( ) { // resources updated on the same day or without a last updated date // sort alphabetically - return _lexicographicSort(a.name, b.name); + return _lexicographicSort(a.sortingName, b.sortingName); } else { return _newestFirstSort(a.lastUpdated, b.lastUpdated); } @@ -96,11 +96,11 @@ export default function useSortAndFilter( const groups = originalData; const _sortGroups = (groupA: Group, groupB: Group) => _lexicographicSort( - groupA.groupName.toLowerCase(), - groupB.groupName.toLowerCase(), + groupA.sortingGroupName.toLowerCase(), + groupB.sortingGroupName.toLowerCase(), ); const _sortResources = (a: Resource, b: Resource) => - _lexicographicSort(a.name, b.name); + _lexicographicSort(a.sortingName, b.sortingName); const resourceGroups = sortAndFilter(groups, _sortGroups, _sortResources); setState(resourceGroups); }