Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
1b0d849
sources: Clean up versionInfo() methods
tsibley Oct 29, 2025
1507cb1
sources/fetch: Override static .Subresource property instead of the .…
tsibley Nov 3, 2025
8ea8214
Revert "Simplify allowed pathBuilder functions"
tsibley Oct 15, 2025
a1b9dd6
endpoints/sources: Preserve query by default in canonicalizeDataset()
tsibley Oct 15, 2025
bb97f45
async: Don't define .useAsync() if .use() isn't defined
tsibley Oct 30, 2025
711b172
async: Add .allSync() when .all() exists, e.g. on app.routeAsync()
tsibley Oct 30, 2025
7cd951b
endpoints/sources: Support async callbacks in canonicalizeDataset()
tsibley Oct 30, 2025
5ac2e88
endpoints/charon: Fix dataset canonicalization to account for the cur…
tsibley Oct 30, 2025
929840c
endpoints/sources: Convert Dataset.resolve() to an async method
tsibley Nov 3, 2025
9c2ab23
sources/models: Convert Subresource.baseName property to an async method
tsibley Nov 3, 2025
ec79698
resourceIndexer: Correct expected filenames for fetchInventoryLocal()
tsibley Nov 3, 2025
957f7b3
templateLiterals: Add re for safe-by-construction RegExps
tsibley Nov 3, 2025
d028447
static-site[list-resources]: Link to historical datasets by URL path …
tsibley Nov 3, 2025
c22e233
static-site[list-resources]: Actually use the computed "sortingName" …
tsibley Nov 3, 2025
22bf7fb
static-site[list-resources]: Refactor "groupName" (i.e. pathogen name…
tsibley Nov 3, 2025
b900c41
static-site[list-resources]: Enable customization of sorting by "grou…
tsibley Nov 3, 2025
4e59d79
Expose Nextclade dataset reference trees under /nextclade/…
tsibley Nov 3, 2025
04468c1
🚧 logo and grouping improvements
victorlin Dec 4, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/index-resources.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
node resourceIndexer/main.js \
--gzip --output resources.json.gz \
--resourceTypes dataset intermediate \
--collections core staging
--collections core staging nextclade
- name: Upload the new index, overwriting the existing index
if: ${{ startsWith(env.RESOURCE_INDEX, 's3://') }}
run: |
Expand Down
6 changes: 0 additions & 6 deletions data/manifest_core.json
Original file line number Diff line number Diff line change
Expand Up @@ -582,12 +582,6 @@
"default": "open"
}
},
"nextclade": {
"dataset": {
"sars-cov-2": "",
"default": "sars-cov-2"
}
},
"nipah": {
"resolution": {
"all": "",
Expand Down
2 changes: 1 addition & 1 deletion env/production/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,6 @@
"OIDC_GROUPS_CLAIM": "cognito:groups",
"SESSION_COOKIE_DOMAIN": "nextstrain.org",
"GROUPS_DATA_FILE": "groups.json",
"RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz",
"RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz",
"PLAUSIBLE_ANALYTICS_DOMAIN": "nextstrain.org"
}
2 changes: 1 addition & 1 deletion env/testing/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -108,5 +108,5 @@
"OIDC_USERNAME_CLAIM": "cognito:username",
"OIDC_GROUPS_CLAIM": "cognito:groups",
"GROUPS_DATA_FILE": "groups.json",
"RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v8.json.gz"
"RESOURCE_INDEX": "s3://nextstrain-inventories/resources/v9.json.gz"
}
2 changes: 1 addition & 1 deletion resourceIndexer/inventory.js
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ const fetchInventoryRemote = async ({bucket, prefix, name, save}) => {
/**
* Parse an on-disk inventory. This expects the following files to be present:
* - `./devData/${name}.manifest.json`
* - `./devData/${name}.inventory.csv.gz`
* - `./devData/${name}-*.csv.gz`
*
* Returns an object with properties:
* - inventory: object[] list of entries in the inventory, using the schema to define keys
Expand Down
12 changes: 7 additions & 5 deletions resourceIndexer/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import { ArgumentParser } from 'argparse';
import fs from 'fs';
import { coreS3Data, stagingS3Data } from "./coreStagingS3.js";
import { NextcladeData } from "./nextclade.js";
import zlib from 'zlib';
import { promisify } from 'util';
import { ResourceIndexerError } from './errors.js';
Expand All @@ -19,8 +20,8 @@ const gzip = promisify(zlib.gzip)
* (sub-)class and resourcePath to parallel the information in the Resource
* (sub-)class.
*
* Currently only sources {core, staging} and resource types {dataset,
* intermediate} are part of the index.
* Currently only sources {core, staging, nextclade} and resource types
* {dataset, intermediate} are part of the index.
*
* As an example, the core WNV/NA (nextstrain.org/WNV/NA) dataset is indexed
* like so:
Expand All @@ -34,6 +35,7 @@ const gzip = promisify(zlib.gzip)
const COLLECTIONS = [
coreS3Data,
stagingS3Data,
new NextcladeData(),
];

function parseArgs() {
Expand All @@ -47,13 +49,13 @@ function parseArgs() {
`,
});
argparser.addArgument("--local", {action: 'storeTrue',
help: 'Access a local copy of S3 inventories within ./devData/. See docstring of fetchInventoryLocal() for expected filenames.'})
help: 'Access a local copy of S3 inventories ({core,staging}.manifest.json and {core,staging}-*.csv.gz) and Nextclade indexes (nextclade/index.json and nextclade/**/pathogen.json) within ./devData/ instead of downloading them'})
argparser.addArgument("--collections", {metavar: "<name>", type: "string", nargs: '+', choices: COLLECTIONS.map((c) => c.name),
help: "Only fetch data from a subset of collections. Source names are those defined in COLLECTIONS"});
argparser.addArgument("--resourceTypes", {metavar: "<name>", type: "string", nargs: '+', choices: ['dataset', 'intermediate'],
help: "Only index data matching specified resource types"});
argparser.addArgument("--save-inventories", {action: 'storeTrue',
help: "Save the fetched inventories + manifest files to ./devData so that future invocations can use --local"});
help: "Save a local copy of S3 inventories and Nextclade indexes to ./devData/ so that future invocations can use --local"});
argparser.addArgument("--output", {metavar: "<json>", required: true})
argparser.addArgument("--indent", {action: 'storeTrue', help: 'Indent the output JSON'})
argparser.addArgument("--gzip", {action: 'storeTrue', help: 'GZip the output JSON'})
Expand Down Expand Up @@ -118,4 +120,4 @@ async function main(args) {
output = await gzip(output)
}
fs.writeFileSync(args.output, output);
}
}
150 changes: 150 additions & 0 deletions resourceIndexer/nextclade.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/**
* Index Nextclade dataset reference trees, including past versions.
*
* Transforms Nextclade's own index for use with our resourceIndexer/… and
* src/resourceIndex.js framework.
*/
import { strict as assert } from "assert";
import { DateTime } from "luxon";
import { readFile, writeFile, mkdir } from "node:fs/promises";
import path from "node:path";

import { fetch } from "../src/fetch.js";
import { NextcladeSource } from "../src/sources/nextclade.js";
import { rootDirFullPath } from "../src/utils/index.js";


const LOCAL_DATA = path.relative(".", path.join(rootDirFullPath, "devData", "nextclade"));
const LOCAL_INDEX = path.join(LOCAL_DATA, "index.json");


/* All class members are part of the "collection" interface expected by
* resourceIndexer/main.js and use its terminology for arguments and return
* values. This interface is kind of a weird fit for things that aren't S3
* inventories, so the chain of methods and way they pass values are a bit
* contrived.
*/
export class NextcladeData {
#source;

name = "nextclade";

async collect({local, save}) {
if (local) {
console.log(`Reading ${LOCAL_INDEX}`);
this.#source = new NextcladeSource(JSON.parse(await readFile(LOCAL_INDEX)));
}
else {
this.#source = new NextcladeSource();

if (save) {
console.log(`Saving ${LOCAL_INDEX}`);
await mkdir(path.dirname(LOCAL_INDEX), {recursive: true});
await writeFile(LOCAL_INDEX, JSON.stringify(await this.#source._index(), null, 2));
}
}

const datasetPaths = await this.#source.availableDatasets();

return (await Promise.all(
datasetPaths.map(async (datasetPath) => {
const dataset = this.#source.dataset(datasetPath.split("/"));
const indexDataset = await dataset._indexDataset();

/* Sort and collapse versions per our documented behaviour:
*
* > All times are UTC. A datestamp refers to datasets uploaded
* > between 00h00 and 23h59 UTC on that day.
*
* > If multiple datasets are uploaded on the same day we take the most
* > recent.
*
* See <https://docs.nextstrain.org/page/guides/snapshots.html#details-for-dataset-maintainers>.
*/
const datesSeen = new Set();
const indexVersions =
indexDataset.versions
.map(v => ({...v, _timestamp: DateTime.fromISO(v.updatedAt, {zone:"UTC"})}))
.toSorted((a, b) => b._timestamp - a._timestamp)
.map(v => ({...v, _date: v._timestamp.toISODate()}))
.filter(v => !datesSeen.has(v._date) && datesSeen.add(v._date))

// Produce one resourceIndexer/main.js "item" per dataset version
return (await Promise.all(
indexVersions.map(async (indexVersion) => {
const versionMetaPath = `${indexDataset.path}/${indexVersion.tag}/pathogen.json`;

const localFile = path.join(LOCAL_DATA, versionMetaPath);

let versionMeta;

if (local) {
console.log(`Reading ${localFile}`);
versionMeta = JSON.parse(await readFile(localFile));
}
else {
const remoteUrl = await this.#source.urlFor(versionMetaPath);

console.log(`Fetching ${remoteUrl}`);
const response = await fetch(remoteUrl, {cache: "no-cache"});
assert(response.status === 200);

versionMeta = await response.json();

if (save) {
console.log(`Saving ${localFile}`);
await mkdir(path.dirname(localFile), {recursive: true});
await writeFile(localFile, JSON.stringify(versionMeta, null, 2));
}
}

/* This filter must be *after* we fetch the version's own
* pathogen.json. Because versions are filtered to one-per-day
* *before* we fetch, it's possible there's an older version from
* the same day that *does* include a treeJson, and we'd miss it.
* The fix would be fetching *all* versions and only then filtering
* to one-per-day (i.e. in createResource() below).
*
* Doing so, however, seems unnecessary. The scenario seems
* unlikely and it's not entirely clear how we'd want to interpret
* such a dataset update anyway (e.g. was the earlier version on
* the same day in error?).
*
* Also note that this filters out some datasets entirely: those
* that don't have a reference tree at all.
* -trs, 27 Oct 2025
*/
if (!versionMeta.files.treeJson)
return;

// One "item" produced by collect()
return {
// Used by resourceIndexer/main.js
source: this.#source.name,
resourceType: "dataset",
resourcePath: datasetPath,

// Used in createResource() below
version: {
date: indexVersion._date,
fileUrls: {
main: await this.#source.urlFor(`${indexDataset.path}/${indexVersion.tag}/${versionMeta.files.treeJson}`)
}
},
};
})
)).flat();
})
)).flat();
}

categorise(item) {
return item;
}

createResource(resourceType, resourcePath, items) {
return {
versions: items.map(i => i.version),
};
}
}
11 changes: 10 additions & 1 deletion src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const {
errors,
fetch,
groups,
nextclade,
openid,
pathogenRepos,
schemas,
Expand Down Expand Up @@ -68,7 +69,6 @@ charon.setup(app);
* /monkeypox
* /mpox
* /ncov
* /nextclade
* /rsv
* /rubella
* /seasonal-flu
Expand All @@ -92,6 +92,15 @@ core.setup(app);
staging.setup(app);


/* Nextclade reference datasets
*
* Routes:
* /nextclade
* /nextclade/*
*/
nextclade.setup(app);


/* Community on GitHub
*
* Routes:
Expand Down
26 changes: 19 additions & 7 deletions src/async.js
Original file line number Diff line number Diff line change
Expand Up @@ -228,13 +228,25 @@ function addAsync(app) {
return addAsync(this.route.apply(this, arguments));
};

app.useAsync = function() {
const fn = arguments[arguments.length - 1];
assert.ok(typeof fn === 'function',
'Last argument to `useAsync()` must be a function');
const args = wrapArgs(arguments);
return app.use.apply(app, args);
};
if (app.use) {
app.useAsync = function() {
const fn = arguments[arguments.length - 1];
assert.ok(typeof fn === 'function',
'Last argument to `useAsync()` must be a function');
const args = wrapArgs(arguments);
return app.use.apply(app, args);
};
}

if (app.all) {
app.allAsync = function() {
const fn = arguments[arguments.length - 1];
assert.ok(typeof fn === 'function',
'Last argument to `allAsync()` must be a function');
const args = wrapArgs(arguments);
return app.all.apply(app, args);
};
}

app.deleteAsync = function() {
const fn = arguments[arguments.length - 1];
Expand Down
22 changes: 14 additions & 8 deletions src/endpoints/charon/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { BadRequest, isHttpError } from '../../httpErrors.js';
import { splitPrefixIntoParts } from '../../utils/prefix.js';
import { splitPrefixIntoParts, joinPartsIntoPrefix } from '../../utils/prefix.js';
import { setSource, setDataset, canonicalizeDataset, setNarrative } from '../sources.js';
import './setAvailableDatasets.js'; // sets globals
export { getAvailable } from './getAvailable.js';
Expand All @@ -25,13 +25,19 @@ const setSourceFromPrefix = setSource(req => {

const setDatasetFromPrefix = setDataset(req => req.context.splitPrefixIntoParts.prefixParts.join("/"));

const canonicalizeDatasetPrefix = canonicalizeDataset((req, resolvedPrefix) => {
// A absolute base is required but we won't use it, so use something bogus.
const resolvedUrl = new URL(req.originalUrl, "http://x");
resolvedUrl.searchParams.set("prefix", resolvedPrefix);

return resolvedUrl.pathname + resolvedUrl.search;
});
/**
* Leave the URL path (e.g. /charon/getDataset) unchanged with only the
* "prefix" query param updated with the resolved dataset path.
*/
const canonicalizeDatasetPrefix = canonicalizeDataset(async (req, path) => ({
query: {
...req.query,
prefix: await joinPartsIntoPrefix({
source: req.context.source,
prefixParts: path.split("/")
}),
}
}));

const setNarrativeFromPrefix = setNarrative(req => {
const {prefixParts} = req.context.splitPrefixIntoParts;
Expand Down
Loading