diff --git a/Makefile b/Makefile index e613211..3328754 100644 --- a/Makefile +++ b/Makefile @@ -1,28 +1,17 @@ PYTHON ?= python3 -ZENSICAL ?= zensical - -.PHONY: update-branches -update-branches: - @echo "Updating GitHub branch links in docs..." - @$(PYTHON) scripts/update_doc_links.py - -.PHONY: prepare -prepare: - @$(PYTHON) scripts/prepare_docs.py +ZENSICAL ?= $(if $(wildcard ./venv/bin/zensical),./venv/bin/zensical,zensical) .PHONY: build -build: prepare +build: @$(ZENSICAL) build --clean .PHONY: serve -serve: prepare +serve: @$(ZENSICAL) serve .PHONY: help help: @echo "Makefile targets:" - @echo " prepare - Stage local docs plus imported upstream tool docs into .generated/docs" @echo " build - Build the Zensical site with the active Python environment" @echo " serve - Serve the Zensical site with the active Python environment" - @echo " update-branches - Update docs to point to configured development branches for repos" @echo " help - Show this message" diff --git a/docs/calypr/data/git-drs.md b/docs/calypr/data/git-drs.md index 72a3b86..7e8e0a5 100644 --- a/docs/calypr/data/git-drs.md +++ b/docs/calypr/data/git-drs.md @@ -20,15 +20,15 @@ The following guide details the steps a data contributor must take to submit a p CALYPR project management is handled using standard Git workflows. you will need the **Large File Storage (LFS)** plugin to track genomic data files and the **Git-DRS** plugin to interface with CALYPR's storage and indexing systems. -Visit the [Quick Start Guide](../quick-start.md) for detailed, OS-specific installation instructions for these tools. +Visit the [Quick Start Guide](/calypr/quick-start/) for detailed, OS-specific installation instructions for these tools. | Tool | Purpose | | :--- | :--- | | **git-drs** | Manages large file tracking, storage, and DRS indexing. | | **forge** | Handles metadata validation, transformation (ETL), and publishing. | -| **data-client** | Administrative tool for managing [collaborators and access requests](../../tools/data-client/docs/access_requests.md). | +| **data-client** | Administrative tool for managing [collaborators and access requests](/tools/data-client/docs/access_requests/). | {: .caption } ## Git DRS Workflows -For complete Git DRS documentation including project initialization, file management, and upload workflows, see the [Git DRS Quick Start](../../tools/git-drs/quickstart.md). +For complete Git DRS documentation including project initialization, file management, and upload workflows, see the [Git DRS Quick Start](/tools/git-drs/quickstart/). diff --git a/docs/calypr/data/integration.md b/docs/calypr/data/integration.md index 912000d..6369f31 100644 --- a/docs/calypr/data/integration.md +++ b/docs/calypr/data/integration.md @@ -5,7 +5,7 @@ Converting tabular data (CSV, TSV, spreadsheet, database table) into FHIR (Fast ## Overview -When you create and upload files, you can tag them with identifiers to establish an initial skeleton graph. You can then retrieve that data using the [git-drs](../../tools/git-drs/index.md) command line tool and enhance the metadata using [forge](../../tools/forge/docs/index.md) to create a more complete graph representing your study. +When you create and upload files, you can tag them with identifiers to establish an initial skeleton graph. You can then retrieve that data using the [git-drs](/tools/git-drs/) command line tool and enhance the metadata using [forge](/tools/forge/docs/) to create a more complete graph representing your study. You may work with data in its "native" JSON format or convert it to a tabular format for integration. The system automatically re-converts tabular data back to JSON for submission. diff --git a/docs/calypr/data/managing-metadata.md b/docs/calypr/data/managing-metadata.md index f53fcbf..abb95af 100644 --- a/docs/calypr/data/managing-metadata.md +++ b/docs/calypr/data/managing-metadata.md @@ -93,7 +93,7 @@ Example: ## Validating Metadata -To ensure that the FHIR files you added are valid and graph-consistent, use [Forge validation](../../tools/forge/docs/validation.md). +To ensure that the FHIR files you added are valid and graph-consistent, use [Forge validation](/tools/forge/docs/validation/). ```bash forge validate data --path META @@ -192,4 +192,4 @@ Automated tools or CI processes must: * Confirm every DocumentReference.url matches an existing file path. * Check proper .ndjson formatting. ---- \ No newline at end of file +--- diff --git a/docs/calypr/index.md b/docs/calypr/index.md index 9fe89d6..1876a16 100644 --- a/docs/calypr/index.md +++ b/docs/calypr/index.md @@ -3,7 +3,7 @@ Welcome to the **CALYPR Platform**. CALYPR is a next-generation genomic data science ecosystem designed to bridge the gap between massive, centralized data commons and the agile, distributed workflows of modern researchers. !!! info "Private Beta" - CALYPR platform is currently in a private beta phase. We are actively working with a select group of research partners to refine the platform. If you encounter any issues or have feature requests, please reach out to the team. The individual [tools](../tools/index.md) are available for public use. + CALYPR platform is currently in a private beta phase. We are actively working with a select group of research partners to refine the platform. If you encounter any issues or have feature requests, please reach out to the team. The individual [tools](/tools/) are available for public use. --- @@ -28,11 +28,11 @@ Whether you are working with a few genomes or petabyte-scale cohorts, CALYPR's a CALYPR acts as the "connective tissue" between your research environment and the cloud: * **Data Commons ([Gen3](https://gen3.org)):** Provides the robust backend for metadata management, indexing, and authentication. -* **Data Service ([Syfon](../tools/syfon/index.md)):** Implements the DRS-facing storage layer, handling object registration, presigned upload and download URLs, bucket routing, and server-side access control between clients and object storage. -* **Version Control ([Git-DRS](../tools/git-drs/index.md)):** Manages the check-in and check-out operations for large files, allowing you to treat remote DRS objects as local files. -* **Metadata Orchestration ([Forge](../tools/forge/index.md)):** Streamlines the validation, publishing, and harmonizing of genomic metadata. -* **Compute ([Funnel](../tools/funnel/index.md)):** Executes complex pipelines across distributed environments using standardized task definitions. -* **Graph Insights ([GRIP](../tools/grip/index.md)):** Enables high-performance queries across heterogeneous datasets once integrated. +* **Data Service ([Syfon](/tools/syfon/)):** Implements the DRS-facing storage layer, handling object registration, presigned upload and download URLs, bucket routing, and server-side access control between clients and object storage. +* **Version Control ([Git-DRS](/tools/git-drs/)):** Manages the check-in and check-out operations for large files, allowing you to treat remote DRS objects as local files. +* **Metadata Orchestration ([Forge](/tools/forge/)):** Streamlines the validation, publishing, and harmonizing of genomic metadata. +* **Compute ([Funnel](/tools/funnel/)):** Executes complex pipelines across distributed environments using standardized task definitions. +* **Graph Insights ([GRIP](/tools/grip/)):** Enables high-performance queries across heterogeneous datasets once integrated. --- diff --git a/docs/calypr/project-management/calypr-admin/approve-requests.md b/docs/calypr/project-management/calypr-admin/approve-requests.md index 2577139..5044002 100644 --- a/docs/calypr/project-management/calypr-admin/approve-requests.md +++ b/docs/calypr/project-management/calypr-admin/approve-requests.md @@ -64,7 +64,7 @@ We use Gen3's role based access control (RBAC) to manage access to data. * /programs/ucl * /programs/manchester -Designated users within each institution have privileges to update requests. "Update" in this context means setting the status of a user's request to [SIGNED]. +Designated users within each institution have privileges to update requests. "Update" in this context means setting the status of a user's request to `SIGNED`. Since this approach relies on Gen3's [Requestor](https://github.com/uc-cdis/requestor/blob/master/docs/functionality_and_flow.md#example-backend-flow) for all assignments of policies to users we get the following benefits: diff --git a/docs/calypr/project-management/publishing-project.md b/docs/calypr/project-management/publishing-project.md index 3262d8b..65f4502 100644 --- a/docs/calypr/project-management/publishing-project.md +++ b/docs/calypr/project-management/publishing-project.md @@ -35,7 +35,7 @@ Successful output: Check job status: forge status \ Get all job ids: forge list -📖 More details: [Forge](../../tools/forge/docs/index.md) +📖 More details: [Forge](/tools/forge/docs/) --- @@ -50,4 +50,4 @@ After completing the workflow: * Sower job completed without errors * Data searchable in CALYPR web interface * Can query patients/observations in Gen3 -* Files accessible via S3 (no duplicate storage) \ No newline at end of file +* Files accessible via S3 (no duplicate storage) diff --git a/docs/calypr/quick-start.md b/docs/calypr/quick-start.md index 1696c18..da694bc 100644 --- a/docs/calypr/quick-start.md +++ b/docs/calypr/quick-start.md @@ -3,7 +3,7 @@ title: Quick Start Guide --- !!! info "Private Beta" - CALYPR platform is currently in a private beta phase. We are actively working with a select group of research partners to refine the platform. If you encounter any issues or have feature requests, please reach out to the team. The individual [tools](../tools/index.md) are available for public use. + CALYPR platform is currently in a private beta phase. We are actively working with a select group of research partners to refine the platform. If you encounter any issues or have feature requests, please reach out to the team. The individual [tools](/tools/) are available for public use. --- @@ -50,7 +50,7 @@ To interact with CALYPR, you need API credentials from the Gen3 data commons. Yo API credentials expire after 30 days, so you'll need to download fresh credentials regularly. -**Learn More:** [Download Gen3 API Credentials](../tools/git-drs/quickstart.md#2-get-credentials) — Step-by-step instructions and setup context +**Learn More:** [Download Gen3 API Credentials](/tools/git-drs/quickstart/#2-get-credentials) — Step-by-step instructions and setup context --- @@ -65,7 +65,7 @@ Git-DRS lets you: When you push files, Git-DRS uploads them to S3, registers DRS records in Gen3, and stores only lightweight pointer files in your Git repository. -**Learn More:** [Git-DRS Complete Documentation](../tools/git-drs/quickstart.md) — Installation, setup, and detailed workflows +**Learn More:** [Git-DRS Complete Documentation](/tools/git-drs/quickstart/) — Installation, setup, and detailed workflows --- @@ -80,7 +80,7 @@ Forge helps you: While you can upload files before metadata, adding metadata early maximizes the value of your data by making it discoverable and queryable. -**Learn More:** [Forge Documentation](../tools/forge/docs/index.md) — Installation, validation, and publishing workflows +**Learn More:** [Forge Documentation](/tools/forge/docs/) — Installation, validation, and publishing workflows --- @@ -95,7 +95,7 @@ Funnel enables you to: Funnel is typically used for production pipelines and large-scale analysis. For exploratory work, you might run analyses locally first. -**Learn More:** [Funnel Documentation](../tools/funnel/docs/index.md) — Task definitions, execution, and cluster integration +**Learn More:** [Funnel Documentation](/tools/funnel/docs/docs/) — Task definitions, execution, and cluster integration --- @@ -110,7 +110,7 @@ GRIP allows you to: GRIP is most useful after you've integrated metadata and established relationships between entities. -**Learn More:** [GRIP Documentation](../tools/grip/docs/index.md) — Query syntax, graph traversals, and examples +**Learn More:** [GRIP Documentation](/tools/grip/) — Query syntax, graph traversals, and examples --- @@ -126,10 +126,10 @@ Now that you understand the basic CALYPR workflow, here are some recommended nex ### 🔧 Tool Documentation -- **[Git-DRS Complete Guide](../tools/git-drs/quickstart.md)** - Comprehensive Git-DRS documentation -- **[Forge Reference](../tools/forge/docs/index.md)** - Metadata validation and publishing -- **[Funnel Workflows](../tools/funnel/docs/index.md)** - Task execution and pipeline management -- **[GRIP Queries](../tools/grip/docs/index.md)** - Graph-based data queries +- **[Git-DRS Complete Guide](/tools/git-drs/quickstart/)** - Comprehensive Git-DRS documentation +- **[Forge Reference](/tools/forge/docs/)** - Metadata validation and publishing +- **[Funnel Workflows](/tools/funnel/docs/docs/)** - Task execution and pipeline management +- **[GRIP Queries](/tools/grip/)** - Graph-based data queries ### 🆘 Get Help diff --git a/docs/calypr/troubleshooting.md b/docs/calypr/troubleshooting.md index 9b69555..1e7a37f 100644 --- a/docs/calypr/troubleshooting.md +++ b/docs/calypr/troubleshooting.md @@ -58,4 +58,4 @@ git push --- !!! tip "Getting Help" - If your issue isn't listed here, please reach out to our team at [support@calypr.org](mailto:support@calypr.org) or search the individual tool documentation in the [Tools Section](../tools/index.md). + If your issue isn't listed here, please reach out to our team at [support@calypr.org](mailto:support@calypr.org) or search the individual tool documentation in the [Tools Section](/tools/). diff --git a/docs/tools/funnel/docs/compute/gcp-batch.md b/docs/tools/funnel/docs/compute/gcp-batch.md index abee0a9..c04abd7 100644 --- a/docs/tools/funnel/docs/compute/gcp-batch.md +++ b/docs/tools/funnel/docs/compute/gcp-batch.md @@ -6,23 +6,37 @@ menu: weight: 20 --- -!!! warning "Current Limitations ⚠️" - - Latest Funnel release ([v0.11.11](https://github.com/ohsu-comp-bio/funnel/releases/tag/v0.11.11)) requires specific bucket prefixing in the inputs and outputs - - e.g. `/mnt/disks//` instead of `/` - - Nextflow workflows are currently not supported (as Nextflow expects root-level working directories → `/work`) - # Overview -The following steps illustrate how to run a TES tasks via GCP Batch utilizing Google Storage Buckets. +The following steps illustrate how to run TES tasks via GCP Batch utilizing Google Storage Buckets. + +GCS buckets are automatically mounted inside task containers. Input and output paths use standard filesystem paths (e.g. `/input/file.txt`) — no `/mnt/disks/` prefixing required. Nextflow workflows are supported via the TES executor `workdir` field. # Quick Start +## 1. Download Funnel + ```sh -curl -fsSL https://calypr.github.io/funnel/install.sh | bash +curl -fsSL https://calypr.org/funnel/install.sh | bash +``` + +## 2. Start Server + +
+ Config Example -funnel server run --Compute "gcp-batch" \ - --GCPBatch.Project "example-project" \ - --GCPBatch.Location "us-central1" +```yaml +Compute: gcp-batch + +GCPBatch: + Project: example-project + Location: us-central1 +``` + +
+ +```sh +funnel server run --Compute "gcp-batch" --GCPBatch.Project "example-project" --GCPBatch.Location "us-central1" ``` ## 3. Submit Task @@ -35,22 +49,23 @@ funnel server run --Compute "gcp-batch" \ "name": "Input/Output Test", "inputs": [ { - "url": "gs://tes-batch-integration/README.md", - "path": "/mnt/disks/tes-batch-integration/README.md" + "url": "gs://my-bucket/input/README.md", + "path": "/input/README.md" } ], "outputs": [ { - "url": "gs://tes-batch-integration/README.md.sha256", - "path": "/mnt/disks/tes-batch-integration/README.md.sha256" + "url": "gs://my-bucket/output/README.md.sha256", + "path": "/output/README.md.sha256" } ], "executors": [ { "image": "alpine", "command": [ - "sha256sum", - "/mnt/disks/tes-batch-integration/README.md | tee /mnt/disks/tes-batch-integration/README.md.sha256" + "sh", + "-c", + "sha256sum /input/README.md | tee /output/README.md.sha256" ] } ] @@ -75,8 +90,9 @@ funnel task get "executors": [ { "command": [ - "sha256sum", - "/mnt/disks/tes-batch-integration/README.md | tee /mnt/disks/tes-batch-integration/README.md.sha256" + "sh", + "-c", + "sha256sum /input/README.md | tee /output/README.md.sha256" ], "image": "alpine" } @@ -84,15 +100,15 @@ funnel task get "id": "d6f0tgpurbu7o23pgj20", "inputs": [ { - "path": "/mnt/disks/tes-batch-integration/README.md", - "url": "gs://tes-batch-integration/README.md" + "path": "/input/README.md", + "url": "gs://my-bucket/input/README.md" } ], - "name": "GCP Batch Task Example", + "name": "Input/Output Test", "outputs": [ { - "path": "/mnt/disks/tes-batch-integration/README.md.sha256", - "url": "gs://tes-batch-integration/README.md.sha256" + "path": "/output/README.md.sha256", + "url": "gs://my-bucket/output/README.md.sha256" } ], "state": "COMPLETE" @@ -102,8 +118,24 @@ funnel task get ## 5. Verify Outputs ```sh -gsutil cat gs://tes-batch-integration/README.md.sha256 -9b9916cea5348edd6ad78893231edb81fc96772d1dd99fae9c2a64f84646cb1c /mnt/disks/tes-batch-integration/README.md +gsutil cat gs://my-bucket/output/README.md.sha256 +9b9916cea5348edd6ad78893231edb81fc96772d1dd99fae9c2a64f84646cb1c /input/README.md +``` + +# Nextflow + +Nextflow tasks can specify a working directory via the TES executor `workdir` field, which maps to Docker's `--workdir` flag inside the GCP Batch container: + +```json +{ + "executors": [ + { + "image": "nextflow/nextflow:latest", + "command": ["nextflow", "run", "main.nf"], + "workdir": "/work" + } + ] +} ``` # Additional Resources diff --git a/docs/tools/funnel/docs/compute/grid-engine.md b/docs/tools/funnel/docs/compute/grid-engine.md index d5b5921..f885c55 100644 --- a/docs/tools/funnel/docs/compute/grid-engine.md +++ b/docs/tools/funnel/docs/compute/grid-engine.md @@ -54,4 +54,3 @@ The following variables are available for use in the template: See https://golang.org/pkg/text/template for information on creating templates. -[ge]: http://gridscheduler.sourceforge.net/documentation.html diff --git a/docs/tools/funnel/docs/development/developers.md b/docs/tools/funnel/docs/development/developers.md index 2d19f0a..c18a4ec 100644 --- a/docs/tools/funnel/docs/development/developers.md +++ b/docs/tools/funnel/docs/development/developers.md @@ -36,7 +36,7 @@ A Funnel development environment includes: - [gRPC Gateway][gateway] for HTTP communication. - [Angular][angular] and [SASS][sass] for the web dashboard. - [GNU Make][make] for development tasks. -- [Docker][docker] for executing task containers (tested with v1.12, v1.13). +- [Docker](https://docker.io) for executing task containers (tested with v1.12, v1.13). - [dep][dep] for Go dependency vendoring. - [Make][make] for development/build commands. - [NodeJS][node] and [NPM][npm] for web dashboard development. @@ -64,8 +64,6 @@ mock interfaces in test code, for example, to mock the Google Cloud APIs. [grpc]: http://www.grpc.io/ [sass]: http://sass-lang.com/ [make]: https://www.gnu.org/software/make/ -[docker]: https://docker.io -[python]: https://www.python.org/ [dep]: https://golang.github.io/dep/ [node]: https://nodejs.org [npm]: https://www.npmjs.com/ diff --git a/docs/tools/funnel/docs/docs.md b/docs/tools/funnel/docs/docs.md index 016da15..cfc87e4 100644 --- a/docs/tools/funnel/docs/docs.md +++ b/docs/tools/funnel/docs/docs.md @@ -19,7 +19,7 @@ output files to upload, state, and logs. The API allows you to create, get, list Tasks are accessed via the `funnel task` command. Additionally the HTTP API is based on a standarized GA4GH protocol, and any client the is compatible with that API will work with Funnel. -There's a lot more you can do with the task API. See the [tasks docs](./tasks.md) for more. +There's a lot more you can do with the task API. See the [tasks docs](/tools/funnel/docs/tasks/) for more. ### Server @@ -32,7 +32,7 @@ Storage provides access to file systems such as S3, Google Storage, and local fi Tasks define locations where files should be downloaded from and uploaded to. Workers handle the downloading/uploading. -See the [storage docs](./storage/index.md) for more information on configuring storage backends. +See the [storage docs](/tools/funnel/docs/storage/) for more information on configuring storage backends. ### Worker @@ -66,6 +66,6 @@ external system for scheduling tasks and managing cluster resources, such as AWS or HPC systems like HTCondor, Slurm, Grid Engine, etc. Funnel provides integration with these services that doesn't include nodes or scheduling by Funnel. -See [Deploying a cluster](./compute/deployment.md) for more information about running a cluster of nodes. +See [Deploying a cluster](/tools/funnel/docs/compute/deployment/) for more information about running a cluster of nodes. -The node is accessible via the `funnel node` command. \ No newline at end of file +The node is accessible via the `funnel node` command. diff --git a/docs/tools/funnel/docs/download.md b/docs/tools/funnel/docs/download.md index 2b5d961..abfbcd8 100644 --- a/docs/tools/funnel/docs/download.md +++ b/docs/tools/funnel/docs/download.md @@ -32,4 +32,3 @@ Funnel requires Go 1.21+. Check out the [development docs][dev] for more detail. [dev]: ./development/developers.md -[docker]: https://docker.io diff --git a/docs/tools/funnel/docs/metrics/prometheus.md b/docs/tools/funnel/docs/metrics/prometheus.md index 1b3495b..c59894b 100644 --- a/docs/tools/funnel/docs/metrics/prometheus.md +++ b/docs/tools/funnel/docs/metrics/prometheus.md @@ -32,5 +32,4 @@ Funnel exports these metrics: of bytes of disk space available by all nodes. [prom]: https://prometheus.io/ -[gauge]: https://prometheus.io/docs/concepts/metric_types/#gauge [graf]: https://grafana.com/ diff --git a/docs/tools/funnel/docs/storage/index.md b/docs/tools/funnel/docs/storage/index.md index 905d95b..8e2b7ab 100644 --- a/docs/tools/funnel/docs/storage/index.md +++ b/docs/tools/funnel/docs/storage/index.md @@ -10,12 +10,12 @@ Each input/output URL in a task is resolved by its URL scheme (for example `s3:/ ### Choosing a storage backend -- Use [Local](./local.md) for files on worker-accessible disks. -- Use [HTTP(S)](./http.md) for public URLs and presigned object links. -- Use [FTP](./ftp.md) for FTP-hosted files. -- Use [S3](./s3.md) for Amazon S3 and S3-compatible object stores. -- Use [OpenStack Swift](./swift.md) for Swift object storage. -- Use [Google Storage](./google-storage.md) for Google Cloud Storage. +- Use [Local](/tools/funnel/docs/storage/local/) for files on worker-accessible disks. +- Use [HTTP(S)](/tools/funnel/docs/storage/http/) for public URLs and presigned object links. +- Use [FTP](/tools/funnel/docs/storage/ftp/) for FTP-hosted files. +- Use [S3](/tools/funnel/docs/storage/s3/) for Amazon S3 and S3-compatible object stores. +- Use [OpenStack Swift](/tools/funnel/docs/storage/swift/) for Swift object storage. +- Use [Google Storage](/tools/funnel/docs/storage/google-storage/) for Google Cloud Storage. ### URL schemes diff --git a/docs/tools/git-drs/docs/quickstart.md b/docs/tools/git-drs/docs/quickstart.md index a999bf3..cbb1b11 100644 --- a/docs/tools/git-drs/docs/quickstart.md +++ b/docs/tools/git-drs/docs/quickstart.md @@ -5,7 +5,7 @@ This page is a deeper walkthrough of the current `git-drs` workflow. !!! note "Git LFS is optional" `git-drs` does not require Git LFS for normal setup, tracking, push, or pull workflows. - Git LFS compatibility is still supported for older repos and mixed environments. See [Git LFS Compatibility](git-lfs.md) if you need that mode. + Git LFS compatibility is still supported for older repos and mixed environments. See [Git LFS Compatibility](git-lfs/) if you need that mode. ## Prerequisites @@ -17,7 +17,7 @@ Visit [https://git-scm.com](https://git-scm.com) to download and install Git for ## Install Git DRS -Use the project installer or release workflow described in the main [Quick Start](../quickstart.md) and [Installation Guide](../installation.md). +Use the project installer or release workflow described in the main [Quick Start](/tools/git-drs/quickstart/) and [Installation Guide](/tools/git-drs/installation/). One installer path is: @@ -118,7 +118,7 @@ git add .gitattributes git commit -m "Track BAM files" ``` -If you are working in a legacy mixed setup that still depends on Git LFS concepts, see [Git LFS Compatibility](git-lfs.md). +If you are working in a legacy mixed setup that still depends on Git LFS concepts, see [Git LFS Compatibility](git-lfs/). ### Add, Commit, and Push Data @@ -286,7 +286,7 @@ git drs remote list ### Cross-Remote Promotion -Transfer and promotion workflows depend on the current command surface and deployment conventions. Use the main [Commands Reference](../commands.md) and your environment-specific process rather than older `fetch`-based examples. +Transfer and promotion workflows depend on the current command surface and deployment conventions. Use the main [Commands Reference](/tools/git-drs/commands/) and your environment-specific process rather than older `fetch`-based examples. ## Command Quick Reference @@ -308,6 +308,6 @@ Transfer and promotion workflows depend on the current command surface and deplo ## Further Reading -- [Troubleshooting](troubleshooting.md) — Common issues and solutions -- [Developer Guide](developer-guide.md) — Architecture, command reference, and internals -- [Git LFS Compatibility](git-lfs.md) — Optional compatibility notes for legacy mixed setups +- [Troubleshooting](troubleshooting/) — Common issues and solutions +- [Developer Guide](developer-guide/) — Architecture, command reference, and internals +- [Git LFS Compatibility](git-lfs/) — Optional compatibility notes for legacy mixed setups diff --git a/docs/tools/git-drs/index.md b/docs/tools/git-drs/index.md index 2bfa719..2172384 100644 --- a/docs/tools/git-drs/index.md +++ b/docs/tools/git-drs/index.md @@ -17,25 +17,25 @@ What Git-DRS does: Use the page that matches your goal: -- [Quick Start](quickstart.md) for the shortest path to a working machine and repository -- [Getting Started](getting-started.md) for the workflow model after first setup -- [Installation Guide](installation.md) if you only need install or source-build details -- [Commands Reference](commands.md) if you already know the workflow and need exact syntax -- [Troubleshooting](troubleshooting.md) if a real workflow is failing +- [Quick Start](quickstart/) for the shortest path to a working machine and repository +- [Getting Started](getting-started/) for the workflow model after first setup +- [Installation Guide](installation/) if you only need install or source-build details +- [Commands Reference](commands/) if you already know the workflow and need exact syntax +- [Troubleshooting](troubleshooting/) if a real workflow is failing ## Suggested Path For a new user: -1. [Quick Start](quickstart.md) -2. [Getting Started](getting-started.md) -3. [Commands Reference](commands.md) only when you need exact flags or edge-case behavior +1. [Quick Start](quickstart/) +2. [Getting Started](getting-started/) +3. [Commands Reference](commands/) only when you need exact flags or edge-case behavior ## Reference Deeper detail lives under the reference section: -- [Bucket Mapping](docs/bucket-mapping.md) -- [Removing Files](docs/remove-files.md) -- [How It Works](docs/index.md) -- [Developer Guide](docs/developer-guide.md) +- [Bucket Mapping](docs/bucket-mapping/) +- [Removing Files](docs/remove-files/) +- [How It Works](docs/) +- [Developer Guide](docs/developer-guide/) diff --git a/docs/tools/grip/docs/clients.md b/docs/tools/grip/docs/clients.md index 986755e..62f7e5b 100644 --- a/docs/tools/grip/docs/clients.md +++ b/docs/tools/grip/docs/clients.md @@ -9,7 +9,7 @@ menu: # Getting Started -GRIP has an API for making graph queries using structured data. Queries are defined using a series of step [operations](./queries/index.md). +GRIP has an API for making graph queries using structured data. Queries are defined using a series of step [operations](/tools/grip/docs/queries/). ## Install the Python Client @@ -89,13 +89,13 @@ This represents the vertex we queried for above. All vertexes in the system will * _\_id_: This represents the global identifier for this vertex. In order to draw edges between different vertexes from different data sets we need an identifier that can be constructed from available data. Often, the `_id` will be the field that you query on as a starting point for a traversal. * _\_label_: The label represents the type of the vertex. All vertexes with a given label will share many property keys and edge labels, and form a logical group within the system. -The data on a query result can be accessed as properties on the result object; for example `result[0].data.symbol` would return: +The data on a query result can be accessed as properties on the result object; for example `result[0].data.symbol` would return: ```python u'TP53' ``` -You can also do a `has` query with a list of items using `gripql.within([...])` (other conditions exist, see the `Conditions` section below): +You can also do a `has` query with a list of items using `gripql.within([...])` (other conditions exist, see the `Conditions` section below): ```python result = G.V().hasLabel("Gene").has(gripql.within("symbol", ["TP53", "BRCA1"])).render({"_id": "_id", "symbol":"symbol"}).execute() @@ -134,7 +134,7 @@ Additionally, we have provided `TranscriptFor` as an argument to `.in_()`. This ] ``` -View a list of all available query operations [here](./queries/index.md). +View a list of all available query operations [here](/tools/grip/docs/queries/). ### Using the command line diff --git a/docs/tools/grip/docs/databases.md b/docs/tools/grip/docs/databases.md index 3ad6b01..7519e66 100644 --- a/docs/tools/grip/docs/databases.md +++ b/docs/tools/grip/docs/databases.md @@ -104,7 +104,7 @@ Drivers: # SQLite -GRIP supports storing vertices and edges in [SQLite] +GRIP supports storing vertices and edges in [SQLite](https://sqlite.org/) Config: @@ -116,5 +116,3 @@ Drivers: Sqlite: DBName: tester/sqliteDB ``` - -[psql]: https://sqlite.org/ diff --git a/docs/tools/grip/docs/queries/jsonpath.md b/docs/tools/grip/docs/queries/jsonpath.md index 2f26511..d711d90 100644 --- a/docs/tools/grip/docs/queries/jsonpath.md +++ b/docs/tools/grip/docs/queries/jsonpath.md @@ -60,11 +60,11 @@ Below is a table of field and the values they would reference in subsequent trav | _id | "NM_007294.3:c.4963_4981delTGGCCTGACCCCAGAAG" | | _label | "variant" | | type | "deletion" | -| publications[0].pmid | 29480828 | -| publications[:].pmid | [29480828, 23666017] | -| publications.pmid | [29480828, 23666017] | -| $gene.symbol.hugo | "BRCA1" | -| $gene.transcripts[0] | "ENST00000471181.7" | +| `publications[0].pmid` | 29480828 | +| `publications[:].pmid` | `[29480828, 23666017]` | +| `publications.pmid` | `[29480828, 23666017]` | +| `$gene.symbol.hugo` | "BRCA1" | +| `$gene.transcripts[0]` | "ENST00000471181.7" | ## Usage Example: diff --git a/docs/tools/index.md b/docs/tools/index.md index 24d47a2..9dbd964 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -4,29 +4,29 @@ The CALYPR platform provides a suite of powerful, open-source tools designed to --- -### [Git-DRS](git-drs/index.md) +### [Git-DRS](/tools/git-drs/) **The Version Control Layer.** Git-DRS is a specialized extension for Git that manages massive genomic datasets using the GA4GH Data Repository Service (DRS) standard. It allows researchers to track, version, and share petabyte-scale files as easily as code, replacing heavy binaries with lightweight pointer files that resolve to immutable cloud objects. -### [Syfon](syfon/index.md) +### [Syfon](/tools/syfon/) **The Data Service Layer.** Syfon is CALYPR's DRS and storage mediation service. It handles object registration, presigned upload and download URLs, bucket routing, authentication modes, and the server-side configuration that lets higher-level tools move data cleanly between repositories, commons services, and object storage. -### [Funnel](funnel/index.md) +### [Funnel](/tools/funnel/) **The Compute Layer.** Funnel is a distributed task execution engine that implements the GA4GH Task Execution Service (TES) API. It provides a standardized way to run Docker-based analysis pipelines across diverse environments—including Kubernetes, AWS, and Google Cloud—ensuring that your workflows are portable and independent of the underlying infrastructure. -### [GRIP](grip/index.md) +### [GRIP](/tools/grip/) **The Discovery Layer.** GRIP (Graph Resource Integration Platform) is a high-performance graph database and query engine designed for complex biological data. It enables analysts to integrate heterogeneous datasets into a unified knowledge graph and perform sophisticated queries that reveal deep relational insights across multi-omic cohorts. -### [Forge](forge/index.md) +### [Forge](/tools/forge/) **Project formatting** Forge scans a data repository to build an integrated FHIR based graph of samples and all the files connected to the project. It is responsible for schema checking and database loading. You can use it client side to verify and debug your project and on the server side, it is used to load databases. -### [Data Client](data-client/index.md) +### [Data Client](/tools/data-client/) A client command line interface for interfacing with the Calypr system. -### [Sifter](sifter/index.md) +### [Sifter](/tools/sifter/) **Data Transformation** Sifter is a tool for rapid data extraction and transformation. diff --git a/docs/tools/sifter/docs/index.md b/docs/tools/sifter/docs/index.md index 5e94f14..edcc89b 100644 --- a/docs/tools/sifter/docs/index.md +++ b/docs/tools/sifter/docs/index.md @@ -109,36 +109,36 @@ Various fields in the script file will be be parsed using a [Mustache](https://m # Inputs The input block defines the various data extractors that will be used to open resources and create streams of JSON messages for processing. The possible input engines include: - - [AVRO](./inputs/avro.md) - - [JSON](./inputs/json.md) - - [XML](./inputs/xml.md) - - [SQL-dump](./inputs/xml.md) - - [SQLite](./inputs/sqlite.md) - - [TSV/CSV](./inputs/table.md) - - [GLOB](./inputs/glob.md) +- [AVRO](/tools/sifter/docs/inputs/avro/) +- [JSON](/tools/sifter/docs/inputs/json/) +- [XML](/tools/sifter/docs/inputs/xml/) +- [SQL-dump](/tools/sifter/docs/inputs/xml/) +- [SQLite](/tools/sifter/docs/inputs/sqlite/) +- [TSV/CSV](/tools/sifter/docs/inputs/table/) +- [GLOB](/tools/sifter/docs/inputs/glob/) For any other file types, there is also a plugin option to allow the user to call their own code for opening files. # Pipeline The `pipelines` defined a set of named processing pipelines that can be used to transform data. Each pipeline starts with a `from` statement that defines where data comes from. It then defines a linear set of transforms that are chained togeather to do processing. Pipelines may used `emit` steps to output messages to disk. The possible data transform steps include: -- [Accumulate](./transforms/accumulate.md) -- [Clean](./transforms/clean.md) -- [Distinct](./transforms/distinct.md) -- [Field Parse](./transforms/fieldParse.md) -- [Field Process](./transforms/fieldProcess.md) -- [Field Type](./transforms/fieldType.md) -- [Filter](./transforms/filter.md) -- [FlatMap](./transforms/flatmap.md) -- [Hash](./transforms/hash.md) -- [Lookup](./transforms/lookup.md) -- [Value Mapping](./transforms/map.md) -- [Object Validation](./transforms/objectValidate.md) -- [Project](./transforms/project.md) -- [Reduce](./transforms/reduce.md) -- [Regex](./transforms/reduce.md) -- [Split](./transforms/split.md) -- [UUID Generation](./transforms/uuid.md) +- [Accumulate](/tools/sifter/docs/transforms/accumulate/) +- [Clean](/tools/sifter/docs/transforms/clean/) +- [Distinct](/tools/sifter/docs/transforms/distinct/) +- [Field Parse](/tools/sifter/docs/transforms/fieldParse/) +- [Field Process](/tools/sifter/docs/transforms/fieldProcess/) +- [Field Type](/tools/sifter/docs/transforms/fieldType/) +- [Filter](/tools/sifter/docs/transforms/filter/) +- [FlatMap](/tools/sifter/docs/transforms/flatmap/) +- [Hash](/tools/sifter/docs/transforms/hash/) +- [Lookup](/tools/sifter/docs/transforms/lookup/) +- [Value Mapping](/tools/sifter/docs/transforms/map/) +- [Object Validation](/tools/sifter/docs/transforms/objectValidate/) +- [Project](/tools/sifter/docs/transforms/project/) +- [Reduce](/tools/sifter/docs/transforms/reduce/) +- [Regex](/tools/sifter/docs/transforms/reduce/) +- [Split](/tools/sifter/docs/transforms/split/) +- [UUID Generation](/tools/sifter/docs/transforms/uuid/) Additionally, users are able to define their one transform step types using the `plugin` step. diff --git a/docs/tools/sifter/docs/inputs/sqldump.md b/docs/tools/sifter/docs/inputs/sqldump.md index 5fdbd9a..a5168a6 100644 --- a/docs/tools/sifter/docs/inputs/sqldump.md +++ b/docs/tools/sifter/docs/inputs/sqldump.md @@ -11,7 +11,7 @@ Scan file produced produced from sqldump. | Name | Type | Description | |-------|---|--------| | path | string | Path to the SQL dump file | -| tables | []string | Names of tables to read out | +| tables | `[]string` | Names of tables to read out | ## Example diff --git a/docs/tools/sifter/docs/inputs/table.md b/docs/tools/sifter/docs/inputs/table.md index a931eb5..87e2d89 100644 --- a/docs/tools/sifter/docs/inputs/table.md +++ b/docs/tools/sifter/docs/inputs/table.md @@ -13,7 +13,7 @@ Extract data from tabular file, includiong TSV and CSV files. |-------|---|--------| | path | string | File to be transformed | | rowSkip | int | Number of header rows to skip | -| columns | []string | Manually set names of columns | +| columns | `[]string` | Manually set names of columns | | extraColumns | string | Columns beyond originally declared columns will be placed in this array | | sep | string | Separator \\t for TSVs or , for CSVs | diff --git a/docs/tools/sifter/docs/transforms/clean.md b/docs/tools/sifter/docs/transforms/clean.md index fcff2c0..0fa4c54 100644 --- a/docs/tools/sifter/docs/transforms/clean.md +++ b/docs/tools/sifter/docs/transforms/clean.md @@ -14,7 +14,7 @@ Remove fields that don't appear in the desingated list. | name | Type | Description | | --- | --- | --- | -| fields | [] string | Fields to keep | +| fields | `[]string` | Fields to keep | | removeEmpty | bool | Fields with empty values will also be removed | | storeExtra | string | Field name to store removed fields | diff --git a/docs/tools/sifter/docs/transforms/fieldProcess.md b/docs/tools/sifter/docs/transforms/fieldProcess.md index 7ed2bae..c661a3d 100644 --- a/docs/tools/sifter/docs/transforms/fieldProcess.md +++ b/docs/tools/sifter/docs/transforms/fieldProcess.md @@ -14,7 +14,7 @@ each of the items in the array will become an independent row. | name | Type | Description | | --- | --- | --- | | field | string | Name of field to be processed | -| mapping | map[string]string | Project templated values into child element | +| mapping | `map[string]string` | Project templated values into child element | | itemField | string | If processing an array of non-dict elements, create a dict as `{itemField:element}` | diff --git a/docs/tools/sifter/docs/transforms/lookup.md b/docs/tools/sifter/docs/transforms/lookup.md index 08cb802..bacc729 100644 --- a/docs/tools/sifter/docs/transforms/lookup.md +++ b/docs/tools/sifter/docs/transforms/lookup.md @@ -12,7 +12,7 @@ Using key from current row, get values from a reference source | --- | --- | --- | | replace | string (field path) | Field to replace | | lookup | string (template string) | Key to use for looking up data | -| copy | map[string]string | Copy values from record that was found by lookup. The Key/Value record uses the Key as the destination field and copies the field from the retrieved records using the field named in Value | +| copy | `map[string]string` | Copy values from record that was found by lookup. The Key/Value record uses the Key as the destination field and copies the field from the retrieved records using the field named in Value | | tsv | TSVTable | TSV translation table file | | json | JSONTable | JSON data file | | table | LookupTable | Inline lookup table | diff --git a/docs/tools/sifter/docs/transforms/project.md b/docs/tools/sifter/docs/transforms/project.md index 6ad7cc9..2be4346 100644 --- a/docs/tools/sifter/docs/transforms/project.md +++ b/docs/tools/sifter/docs/transforms/project.md @@ -12,8 +12,8 @@ Populate row with templated values | name | Type | Description | | --- | --- | --- | -| mapping | map[string]any | New fields to be generated from template | -| rename | map[string]string | Rename field (no template engine) | +| mapping | `map[string]any` | New fields to be generated from template | +| rename | `map[string]string` | Rename field (no template engine) | # Example diff --git a/docs/tools/sifter/docs/transforms/reduce.md b/docs/tools/sifter/docs/transforms/reduce.md index 77d18bc..5343149 100644 --- a/docs/tools/sifter/docs/transforms/reduce.md +++ b/docs/tools/sifter/docs/transforms/reduce.md @@ -18,7 +18,7 @@ Using key from rows, reduce matched records into a single entry | method | string | Method name | | python | string | Python code string | | gpython | string | Python code string run using (https://github.com/go-python/gpython) | -| init | map[string]any | Data to use for first reduce | +| init | `map[string]any` | Data to use for first reduce | ## Example diff --git a/netlify.toml b/netlify.toml index 3cbeff4..654ee21 100644 --- a/netlify.toml +++ b/netlify.toml @@ -1,3 +1,3 @@ [build] - command = "python scripts/prepare_docs.py && zensical build --clean" + command = "zensical build --clean" publish = "site" diff --git a/requirements.txt b/requirements.txt index d87ca68..2ede9fd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ zensical termynal +mkdocs-multirepo-plugin diff --git a/scripts/README.md b/scripts/README.md deleted file mode 100644 index 76a3d2a..0000000 --- a/scripts/README.md +++ /dev/null @@ -1,69 +0,0 @@ -Usage ------ - -This folder contains helper scripts related to the documentation site. The main utility is -`update_doc_links.py` which updates GitHub links in the site documentation to point at the -current development branch for selected `calypr` repositories. - -Quick start ------------ - -From the repository root you can run the Makefile target: - -```bash -make update-branches -``` - -Or run the script directly: - -```bash -python3 scripts/update_doc_links.py -``` - -What the script does --------------------- - -- Looks for markdown and HTML files under `docs/` and `overrides/`. -- For each repository configured, queries remote branches with `git ls-remote --heads`. -- Chooses a preferred branch according to the preference list and rewrites links such as: - - `https://github.com/calypr//tree//...` - - `https://github.com/calypr//blob//...` - - `https://raw.githubusercontent.com/calypr///...` -- Creates a backup of any file it modifies; backups are named `.bak`, `.bak1`, `.bak2`, ... - -Configuration -------------- - -The script reads a JSON configuration mapping repository names to an ordered list of -preferred branch names. The script looks for a config in this order and uses the first -one it finds: - -1. `scripts/branch_config.json` -2. `branch_config.json` at the repo root -3. Built-in defaults inside the script - -Example `scripts/branch_config.json`: - -```json -{ - "git-drs": ["development", "develop", "main", "master"], - "syfon": ["development", "develop", "main", "master"] -} -``` - -Edit this JSON to add repositories or change branch priorities. - -Notes & next steps ------------------- - -- The script is intentionally conservative; it will only update recognized URL forms. -- If you want a preview-only run, or automatic commits, we can add a `--dry-run` or - `--commit` flag in a follow-up change. - -Contact -------- - -If you need changes to the update behavior (different URL patterns, different directories, -or automatic commits), open an issue or pull request describing the desired behavior so -the maintainers can review it. - diff --git a/scripts/branch_config.json b/scripts/branch_config.json deleted file mode 100644 index 4ee737a..0000000 --- a/scripts/branch_config.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "git-drs": ["development", "develop", "main", "master"], - "syfon": ["development", "develop", "main", "master"] -} - diff --git a/scripts/prepare_docs.py b/scripts/prepare_docs.py deleted file mode 100644 index 42a34eb..0000000 --- a/scripts/prepare_docs.py +++ /dev/null @@ -1,254 +0,0 @@ -#!/usr/bin/env python3 -"""Stage the Zensical docs tree with fetched upstream tool docs.""" - -from __future__ import annotations - -import json -import os -import shutil -import subprocess -import tempfile -from pathlib import Path - -from termynal.markdown import get_default_css, get_default_js - - -ROOT = Path(__file__).resolve().parents[1] -SOURCE_DOCS = ROOT / "docs" -GENERATED_ROOT = ROOT / ".generated" -GENERATED_DOCS = GENERATED_ROOT / "docs" -BRANCH_CONFIG = ROOT / "scripts" / "branch_config.json" - -DEFAULT_BRANCH_PREFERENCES = ["development", "develop", "main", "master"] -PREFER_LOCAL_ENV = "CALYPR_IMPORT_PREFER_LOCAL" - -REPO_IMPORTS = { - "git-drs": { - "repo_url": "https://github.com/calypr/git-drs.git", - "local_dir": ROOT.parent / "git-drs", - "mappings": [ - ("docs/quickstart.md", "tools/git-drs/quickstart.md"), - ("docs/getting-started.md", "tools/git-drs/getting-started.md"), - ("docs/installation.md", "tools/git-drs/installation.md"), - ("docs/commands.md", "tools/git-drs/commands.md"), - ("docs/troubleshooting.md", "tools/git-drs/troubleshooting.md"), - ("docs/bucket-mapping.md", "tools/git-drs/docs/bucket-mapping.md"), - ("docs/git-lfs.md", "tools/git-drs/docs/git-lfs.md"), - ("docs/developer-guide.md", "tools/git-drs/docs/developer-guide.md"), - ("docs/remove-files.md", "tools/git-drs/docs/remove-files.md"), - ("docs/troubleshooting.md", "tools/git-drs/docs/troubleshooting.md"), - ], - }, - "syfon": { - "repo_url": "https://github.com/calypr/syfon.git", - "local_dir": ROOT.parent / "syfon", - "mappings": [ - ("docs/index.md", "tools/syfon/index.md"), - ("docs/quickstart.md", "tools/syfon/quickstart.md"), - ("docs/configuration.md", "tools/syfon/configuration.md"), - ("docs/deployment.md", "tools/syfon/deployment.md"), - ("docs/kubernetes-deployment.md", "tools/syfon/kubernetes-deployment.md"), - ("docs/local-deployment.md", "tools/syfon/local-deployment.md"), - ("docs/encryption.md", "tools/syfon/encryption.md"), - ("docs/troubleshooting.md", "tools/syfon/troubleshooting.md"), - ("docs/images/syfon-logo.png", "tools/syfon/images/syfon-logo.png"), - ], - }, -} - -MARKDOWN_REWRITES: dict[tuple[str, str], list[tuple[str, str]]] = { - ("git-drs", "tools/git-drs/getting-started.md"): [ - ("(remove-files.md)", "(docs/remove-files.md)"), - ], - ("git-drs", "tools/git-drs/docs/bucket-mapping.md"): [ - ("(getting-started.md)", "(../getting-started.md)"), - ("(commands.md)", "(../commands.md)"), - ("(troubleshooting.md)", "(../troubleshooting.md)"), - ], - ("git-drs", "tools/git-drs/docs/troubleshooting.md"): [ - ("(getting-started.md)", "(../getting-started.md)"), - ("(commands.md)", "(../commands.md)"), - ], -} - - -def load_branch_preferences() -> dict[str, list[str]]: - if not BRANCH_CONFIG.exists(): - return {} - with BRANCH_CONFIG.open(encoding="utf-8") as handle: - data = json.load(handle) - return { - name: [str(branch) for branch in branches] - for name, branches in data.items() - if isinstance(branches, list) - } - - -def run_git(*args: str, cwd: Path | None = None) -> str: - completed = subprocess.run( - ["git", *args], - cwd=cwd, - check=True, - text=True, - capture_output=True, - ) - return completed.stdout - - -def copy_source_docs() -> None: - if GENERATED_ROOT.exists(): - shutil.rmtree(GENERATED_ROOT) - GENERATED_DOCS.mkdir(parents=True, exist_ok=True) - - for source in SOURCE_DOCS.rglob("*"): - relative = source.relative_to(SOURCE_DOCS) - if any(part.startswith(".") for part in relative.parts): - continue - destination = GENERATED_DOCS / relative - if source.is_dir(): - destination.mkdir(parents=True, exist_ok=True) - continue - destination.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(source, destination) - - -def prefer_local_repos() -> bool: - value = os.environ.get(PREFER_LOCAL_ENV, "1").strip().lower() - return value not in {"0", "false", "no", "off"} - - -def local_repo_path(repo_name: str, repo_config: dict) -> Path | None: - env_name = f"CALYPR_IMPORT_{repo_name.upper().replace('-', '_')}_DIR" - override = os.environ.get(env_name) - if override: - candidate = Path(override).expanduser().resolve() - return candidate if candidate.exists() else None - candidate = Path(repo_config["local_dir"]).resolve() - return candidate if candidate.exists() else None - - -def choose_remote_branch(repo_name: str, repo_url: str, preferences: dict[str, list[str]]) -> str: - preferred = preferences.get(repo_name, DEFAULT_BRANCH_PREFERENCES) - output = run_git("ls-remote", "--heads", repo_url) - branches: set[str] = set() - for line in output.splitlines(): - parts = line.split() - if len(parts) != 2 or not parts[1].startswith("refs/heads/"): - continue - branches.add(parts[1].removeprefix("refs/heads/")) - - for branch in preferred: - if branch in branches: - return branch - if "main" in branches: - return "main" - if not branches: - raise RuntimeError(f"no remote branches found for {repo_name}") - return sorted(branches)[0] - - -def copy_mapping(source_root: Path, source_relative: str, destination_relative: str) -> None: - source = source_root / source_relative - if not source.exists(): - raise FileNotFoundError(f"missing source import: {source}") - destination = GENERATED_DOCS / destination_relative - destination.parent.mkdir(parents=True, exist_ok=True) - shutil.copy2(source, destination) - - -def rewrite_imported_markdown(repo_name: str) -> None: - for (rewrite_repo, destination_relative), replacements in MARKDOWN_REWRITES.items(): - if rewrite_repo != repo_name: - continue - destination = GENERATED_DOCS / destination_relative - if not destination.exists() or destination.suffix.lower() != ".md": - continue - content = destination.read_text(encoding="utf-8") - updated = content - for old, new in replacements: - updated = updated.replace(old, new) - if updated != content: - destination.write_text(updated, encoding="utf-8") - - -def import_from_local_repo(repo_name: str, repo_root: Path, repo_config: dict) -> dict[str, str]: - print(f"[prepare-docs] importing {repo_name} from local repo {repo_root}") - for source_relative, destination_relative in repo_config["mappings"]: - copy_mapping(repo_root, source_relative, destination_relative) - rewrite_imported_markdown(repo_name) - return {"mode": "local", "source": str(repo_root)} - - -def import_from_remote_repo( - repo_name: str, - repo_url: str, - repo_config: dict, - preferences: dict[str, list[str]], -) -> dict[str, str]: - branch = choose_remote_branch(repo_name, repo_url, preferences) - sparse_paths = sorted({source for source, _ in repo_config["mappings"]}) - print(f"[prepare-docs] importing {repo_name} from {repo_url} branch {branch}") - - with tempfile.TemporaryDirectory(prefix=f"calypr-docs-{repo_name}-") as tmpdir: - clone_root = Path(tmpdir) / repo_name - subprocess.run( - [ - "git", - "clone", - "--branch", - branch, - "--depth", - "1", - "--filter=blob:none", - "--sparse", - repo_url, - str(clone_root), - ], - check=True, - ) - subprocess.run( - ["git", "sparse-checkout", "set", "--no-cone", *sparse_paths], - cwd=clone_root, - check=True, - ) - for source_relative, destination_relative in repo_config["mappings"]: - copy_mapping(clone_root, source_relative, destination_relative) - rewrite_imported_markdown(repo_name) - - return {"mode": "remote", "source": repo_url, "branch": branch} - - -def write_termynal_assets() -> None: - (GENERATED_DOCS / "termynal.css").write_text(get_default_css(), encoding="utf-8") - (GENERATED_DOCS / "termynal.js").write_text(get_default_js(), encoding="utf-8") - - -def main() -> int: - copy_source_docs() - branch_preferences = load_branch_preferences() - manifest: dict[str, dict[str, str]] = {} - - for repo_name, repo_config in REPO_IMPORTS.items(): - repo_root = local_repo_path(repo_name, repo_config) if prefer_local_repos() else None - if repo_root is not None: - manifest[repo_name] = import_from_local_repo(repo_name, repo_root, repo_config) - continue - manifest[repo_name] = import_from_remote_repo( - repo_name, - repo_config["repo_url"], - repo_config, - branch_preferences, - ) - - write_termynal_assets() - GENERATED_ROOT.mkdir(parents=True, exist_ok=True) - (GENERATED_ROOT / "imports.json").write_text( - json.dumps(manifest, indent=2, sort_keys=True) + "\n", - encoding="utf-8", - ) - print(f"[prepare-docs] staged docs at {GENERATED_DOCS}") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/scripts/update_doc_links.py b/scripts/update_doc_links.py deleted file mode 100644 index b212611..0000000 --- a/scripts/update_doc_links.py +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/env python3 -""" -Update links in the docs that point to GitHub repos to use a chosen branch for each repo. - -This script looks for occurrences of URLs like: - - https://github.com/calypr//tree//... - - https://github.com/calypr//blob//... - - https://raw.githubusercontent.com/calypr///... - -It queries the remote repository for available branches (via `git ls-remote --heads`) and -selects the preferred branch according to a priority list (development, develop, main, master). -It then updates markdown/html files under `docs/` and `overrides/` replacing the branch segment -with the selected branch. - -Usage: python3 scripts/update_doc_links.py - -Exit codes: 0 on success, non-zero on error. -""" -import json -import re -import subprocess -import sys -from pathlib import Path - - -# Default configuration (used if no config file is present) -DEFAULT_REPOS = { - "git-drs": ["development", "develop", "main", "master"], - "syfon": ["development", "develop", "main", "master"], -} - -ROOT = Path(__file__).resolve().parents[1] -DOC_PATHS = [ROOT / "docs", ROOT / "overrides"] -CONFIG_PATHS = [ROOT / "scripts" / "branch_config.json", ROOT / "branch_config.json"] - -def get_remote_branches(repo: str): - """Return a set of branch names available in the remote repo. - Uses `git ls-remote --heads` to avoid needing GitHub API tokens. - """ - repo_url = f"https://github.com/calypr/{repo}.git" - try: - out = subprocess.check_output(["git", "ls-remote", "--heads", repo_url], text=True) - except subprocess.CalledProcessError as e: - # Return None to indicate an unexpected failure querying the remote - print(f"ERROR: failed to query remote for {repo}: {repr(e)}") - return None - except Exception as e: - print(f"ERROR: unexpected error querying remote for {repo}: {repr(e)}") - return None - branches = set() - for line in out.splitlines(): - parts = line.strip().split() - if len(parts) >= 2 and parts[1].startswith("refs/heads/"): - branches.add(parts[1][len("refs/heads/"):]) - return branches - -def choose_branch(available: set, preferred: list): - for p in preferred: - if p in available: - return p - # fallback: pick 'main' if present, else first available - if "main" in available: - return "main" - return sorted(available)[0] if available else None - -def update_file(path: Path, repo: str, branch: str): - text = path.read_text(encoding="utf-8") - orig = text - - # replace github.com links with tree/blob branch updated - # patterns: /tree//, /blob//, raw.githubusercontent.com/// - pattern1 = re.compile(rf"(https://github\.com/calypr/{re.escape(repo)}/(?:tree|blob)/)([^/\s]+)") - text = pattern1.sub(rf"\1{branch}", text) - - # Handle raw.githubusercontent links in a single pass for both: - # - .../{repo}/refs/heads//... (sometimes used) - # - .../{repo}//... - # Using one pattern avoids treating the literal "refs" path segment as a branch name. - pattern_raw = re.compile( - rf"(https://raw\.githubusercontent\.com/calypr/{re.escape(repo)}/)(?:refs/heads/)?([^/\s]+)" - ) - text = pattern_raw.sub(rf"\1{branch}", text) - - # Also replace bare repository links (exact) to point to the chosen branch tree view - # but only when the link is the repo root (no trailing path) - pattern3 = re.compile(rf"(https://github\.com/calypr/{re.escape(repo)})(?![\w\-/])") - text = pattern3.sub(rf"https://github.com/calypr/{repo}/tree/{branch}", text) - - if text != orig: - # choose a unique backup filename - i = 0 - while True: - suffix = ".bak" if i == 0 else f".bak{i}" - backup = path.with_suffix(path.suffix + suffix) - if not backup.exists(): - break - i += 1 - # write backup and new file - backup.write_text(orig, encoding="utf-8") - path.write_text(text, encoding="utf-8") - print(f"Updated {path} (backup at {backup.name})") - return True - return False - -def find_files(root: Path): - for p in root.rglob("*.md"): - yield p - for p in root.rglob("*.html"): - yield p - -def main(): - any_changes = False - any_errors = False - # Load configuration from JSON if present - repos = DEFAULT_REPOS - for p in CONFIG_PATHS: - if p.exists(): - try: - repos = json.loads(p.read_text(encoding="utf-8")) - print(f"Loaded config from {p}") - except Exception as e: - print(f"Warning: failed to load config {p}: {e}. Using defaults.") - break - - for repo, prefs in repos.items(): - print(f"Processing repo: {repo}") - branches = get_remote_branches(repo) - if branches is None: - # get_remote_branches logged the error already - any_errors = True - print(f" Error: remote branch query failed for {repo}; skipping") - continue - if not branches: - print(f" Warning: no remote branches found for {repo}; skipping") - continue - chosen = choose_branch(branches, prefs) - if not chosen: - print(f" Warning: couldn't choose branch for {repo}; skipping") - continue - print(f" Selected branch: {chosen}") - - # scan files - for docroot in DOC_PATHS: - if not docroot.exists(): - continue - for f in find_files(docroot): - try: - changed = update_file(f, repo, chosen) - any_changes = any_changes or changed - except Exception as e: - any_errors = True - print(f" ERROR updating file {f}: {repr(e)}") - - if any_changes: - print("Done. Files were modified. Review .bak files created next to edited files.") - else: - print("Done. No changes necessary.") - - if any_errors: - print("Completed with errors. See messages above.") - return 2 - return 0 - -if __name__ == '__main__': - sys.exit(main()) - diff --git a/zensical.toml b/zensical.toml index f7f5bbd..4651955 100644 --- a/zensical.toml +++ b/zensical.toml @@ -1,7 +1,39 @@ +[plugins.multirepo] +cleanup = true + +[[plugins.multirepo.nav_repos]] +name = "git-drs" +import_url = "https://github.com/calypr/git-drs?branch=development" +imports = [ + "docs/index.md", + "docs/quickstart.md", + "docs/getting-started.md", + "docs/installation.md", + "docs/commands.md", + "docs/troubleshooting.md", + "docs/bucket-mapping.md", + "docs/git-lfs.md", + "docs/developer-guide.md", + "docs/remove-files.md", +] + +[[plugins.multirepo.nav_repos]] +name = "syfon" +import_url = "https://github.com/calypr/syfon?branch=development" +imports = [ + "docs/index.md", + "docs/quickstart.md", + "docs/local-deployment.md", + "docs/kubernetes-deployment.md", + "docs/configuration.md", + "docs/encryption.md", + "docs/troubleshooting.md", + "docs/images/syfon-logo.png", +] + [project] site_name = "CALYPR" site_url = "https://calypr.org" -docs_dir = ".generated/docs" nav = [ { "Home" = "index.md" }, @@ -78,6 +110,7 @@ nav = [ { "Compute" = [ { "Deploying a Cluster" = "tools/funnel/docs/compute/deployment.md" }, { "AWS Batch" = "tools/funnel/docs/compute/aws-batch.md" }, + { "GCP Batch" = "tools/funnel/docs/compute/gcp-batch.md" }, { "Kubernetes" = "tools/funnel/docs/compute/kubernetes.md" }, { "HTCondor" = "tools/funnel/docs/compute/htcondor.md" }, { "Slurm" = "tools/funnel/docs/compute/slurm.md" }, @@ -220,7 +253,6 @@ primary = "custom" title = "bash" buttons = "macos" prompt_literal_start = ["$"] -include_assets = false [project.markdown_extensions.toc] permalink = true [project.markdown_extensions.pymdownx.details]