diff --git a/.gitignore b/.gitignore index 583b596..b77a15b 100644 --- a/.gitignore +++ b/.gitignore @@ -134,8 +134,15 @@ data/ uv.lock -# Quarto +# Docs +docs/.quarto/ docs/_site/ - -# created by quartodoc -docs/api \ No newline at end of file +docs/_site_old/ +docs/_freeze/ +docs/api/ +docs/objects.json +docs/**/*.quarto_ipynb +docs/examples/*_files/ +docs/examples/getting-started.qmd +docs/examples/detect-on-dataframes.qmd +docs/examples/example-water-level.qmd \ No newline at end of file diff --git a/Makefile b/Makefile index 460f4cf..8d4d915 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ LIB = src/tsod -.PHONY: check build lint format test coverage docs clean +.PHONY: check build lint format test coverage docs convert-notebooks clean check: lint test @@ -19,7 +19,10 @@ test: coverage: uv run pytest --cov-report html --cov=$(LIB) tests/ -docs: +convert-notebooks: + uv run python scripts/convert_docs_notebooks.py + +docs: convert-notebooks cd docs && uv run quartodoc build uv run quarto render docs diff --git a/docs/.gitignore b/docs/.gitignore deleted file mode 100644 index ad29309..0000000 --- a/docs/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -/.quarto/ -**/*.quarto_ipynb diff --git a/docs/Makefile b/docs/Makefile deleted file mode 100644 index 7d837ac..0000000 --- a/docs/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -# Minimal makefile for Quarto documentation -# - -.PHONY: help api build preview clean - -help: - @echo "Please use 'make ' where is one of:" - @echo " api to generate API documentation with quartodoc" - @echo " build to build the documentation as HTML" - @echo " preview to preview the documentation with live reload" - @echo " clean to remove generated files" - -api: - uv run quartodoc build - -build: api - uv run quarto render - -preview: api - uv run quarto preview - -clean: - rm -rf _site api objects.json diff --git a/docs/_quarto.yml b/docs/_quarto.yml index 34bab29..ad4e431 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -2,13 +2,15 @@ project: type: website website: - title: "tsod" + title: "" page-footer: "© 2025 DHI Group" repo-url: https://github.com/DHI/tsod repo-actions: [edit] repo-subdir: docs + page-navigation: true navbar: + logo: https://raw.githubusercontent.com/DHI/tsod/main/images/logo/tsod.png tools: - icon: github menu: @@ -17,13 +19,28 @@ website: - text: Report a Bug url: https://github.com/DHI/tsod/issues left: - - href: index.qmd - text: Home - - href: getting-started.qmd - text: Getting Started - - href: design.qmd - - href: api/index.qmd - text: API Reference + - text: Home + href: index.qmd + - text: User Guide + href: user-guide/getting-started.qmd + - text: Examples + href: examples/index.qmd + - text: API Reference + href: api/index.qmd + + sidebar: + - title: "User Guide" + style: docked + contents: + - user-guide/getting-started.qmd + - user-guide/design.qmd + - title: "Examples" + style: docked + contents: + - href: examples/index.qmd + - href: examples/getting-started.qmd + - href: examples/detect-on-dataframes.qmd + - href: examples/example-water-level.qmd filters: - interlinks @@ -63,6 +80,5 @@ quartodoc: format: html: theme: cosmo - toc: true - ipynb: + css: style.css toc: true diff --git a/docs/examples/_metadata.yml b/docs/examples/_metadata.yml new file mode 100644 index 0000000..db46aa1 --- /dev/null +++ b/docs/examples/_metadata.yml @@ -0,0 +1,6 @@ +page-layout: full +execute: + freeze: auto # Cache executed outputs; only re-run cells when source changes. +format: + html: + fig-responsive: true diff --git a/docs/examples/docs_config.json b/docs/examples/docs_config.json new file mode 100644 index 0000000..f846ae7 --- /dev/null +++ b/docs/examples/docs_config.json @@ -0,0 +1,19 @@ +{ + "examples": [ + { + "notebook": "Getting started.ipynb", + "title": "Getting started", + "description": "Basic anomaly detection using RangeDetector, GradientDetector, and more." + }, + { + "notebook": "Detect on DataFrames.ipynb", + "title": "Detect on DataFrames", + "description": "Apply a detector to all columns of a DataFrame at once." + }, + { + "notebook": "Example Water Level.ipynb", + "title": "Example Water Level", + "description": "Clean a real water level time series using a combined detector." + } + ] +} \ No newline at end of file diff --git a/docs/examples/index.qmd b/docs/examples/index.qmd new file mode 100644 index 0000000..7b0dea2 --- /dev/null +++ b/docs/examples/index.qmd @@ -0,0 +1,7 @@ +--- +title: "Examples" +page-layout: article +listing: + type: default + fields: [image, title, description] +--- diff --git a/docs/index.qmd b/docs/index.qmd index 5577cf2..8cf217c 100644 --- a/docs/index.qmd +++ b/docs/index.qmd @@ -20,12 +20,13 @@ format-links: false Install **tsod** with [`pip`](https://pypi.org/project/tsod/) and get up and running in minutes - +[**Getting started**](user-guide/getting-started.qmd) ## {{< fa brands python >}} **It's just Python** Use familiar Python workflows to integrate anomaly detection into your models and pipelines +[**API Reference**](api/index.qmd) ::: @@ -33,13 +34,14 @@ Use familiar Python workflows to integrate anomaly detection into your models an ## {{< fa solid ruler >}} **Rule-based detectors** -Choose from detectors like `RangeDetector` and `ConstantValueDetector` to identify anomalies. +Choose from detectors like `RangeDetector` and `ConstantValueDetector` to identify anomalies. Explore example notebooks in [**Examples**](examples/index.qmd) ## {{< fa scale-balanced >}} **Open Source, MIT** **tsod** is licensed under MIT and the source code is available on [GitHub](https://github.com/DHI/tsod) +[**Design philosophy**](user-guide/design.qmd) ::: diff --git a/docs/style.css b/docs/style.css new file mode 100644 index 0000000..6cca368 --- /dev/null +++ b/docs/style.css @@ -0,0 +1,12 @@ +#quarto-content.page-layout-full main.content.column-body > #title-block-header + p { + display: none; +} + +#quarto-content.page-layout-full main.content.column-body { + max-width: min(1600px, calc(100vw - 4rem)); +} + +.cell-output-display img, .quarto-figure img { + max-width: 100%; + height: auto; +} \ No newline at end of file diff --git a/docs/design.qmd b/docs/user-guide/design.qmd similarity index 97% rename from docs/design.qmd rename to docs/user-guide/design.qmd index ce25eb4..411108c 100644 --- a/docs/design.qmd +++ b/docs/user-guide/design.qmd @@ -1,41 +1,41 @@ -# Design philosophy - - -## {{< fa brands python >}} Familiar - -tsod aims to use a syntax familiar to users of scientific computing libraries such as Pandas & sckit-learn. - -## {{< fa download >}} Easy to install - -```bash -$ pip install tsod -``` - - -## {{< fa brands osi >}} Open Source​ -tsod is an open source project licensed under the MIT license. -The software is provided free of charge with the source code available for inspection and modification. - -Contributions are welcome! - -## {{< fa comments >}} Easy to collaborate -By developing tsod on GitHub along with a completely open discussion, we believe that the collaboration between developers and end-users results in a useful library. - -## {{< fa list-ol >}} Reproducible -By providing the historical versions of tsod on PyPI it is possible to reproduce the behaviour of an older existing system, based on an older version. - -**Install specific version** - -```bash -pip install tsod==0.2.0 -``` - -## {{< fa brands github >}} Easy access to new features -Features are being added all the time, by developers at DHI in offices all around the globe as well as external contributors using tsod in their work. -These new features are always available from the [main branch on GitHub](https://github.com/DHI/tsod) and thanks to automated testing, it is always possible to verify that the tests passes before downloading a new development version. - -**Install development version** - -```bash -$ pip install https://github.com/DHI/tsod/archive/main.zip +# Design philosophy + + +## {{< fa brands python >}} Familiar + +tsod aims to use a syntax familiar to users of scientific computing libraries such as Pandas & sckit-learn. + +## {{< fa download >}} Easy to install + +```bash +$ pip install tsod +``` + + +## {{< fa brands osi >}} Open Source​ +tsod is an open source project licensed under the MIT license. +The software is provided free of charge with the source code available for inspection and modification. + +Contributions are welcome! + +## {{< fa comments >}} Easy to collaborate +By developing tsod on GitHub along with a completely open discussion, we believe that the collaboration between developers and end-users results in a useful library. + +## {{< fa list-ol >}} Reproducible +By providing the historical versions of tsod on PyPI it is possible to reproduce the behaviour of an older existing system, based on an older version. + +**Install specific version** + +```bash +pip install tsod==0.2.0 +``` + +## {{< fa brands github >}} Easy access to new features +Features are being added all the time, by developers at DHI in offices all around the globe as well as external contributors using tsod in their work. +These new features are always available from the [main branch on GitHub](https://github.com/DHI/tsod) and thanks to automated testing, it is always possible to verify that the tests passes before downloading a new development version. + +**Install development version** + +```bash +$ pip install https://github.com/DHI/tsod/archive/main.zip ``` \ No newline at end of file diff --git a/docs/getting-started.qmd b/docs/user-guide/getting-started.qmd similarity index 81% rename from docs/getting-started.qmd rename to docs/user-guide/getting-started.qmd index 5c2a9c0..dbd7195 100644 --- a/docs/getting-started.qmd +++ b/docs/user-guide/getting-started.qmd @@ -1,53 +1,59 @@ -Getting started -=============== - -![](https://raw.githubusercontent.com/DHI/tsod/main/images/anomaly.png) - -Sensors often provide faulty or missing observations. These anomalies must be detected automatically and replaced with more feasible values before feeding the data to numerical simulation engines as boundary conditions or real time decision systems. - -This package aims to provide examples and algorithms for detecting anomalies in time series data specifically tailored to DHI users and the water domain. It is simple to install and deploy operationally and is accessible to everyone (open-source). - -`tsod` is library for timeseries data. The format of a timeseries is always a [](`pandas.Series`) and in some cases with a [](`pandas.DatetimeIndex`) - -1. Get data in the form of a a [](`pandas.Series`) (see Data formats below) -2. Select one or more detectors e.g. [](`~tsod.RangeDetector`) or [](`~tsod.ConstantValueDetector`) -3. Define parameters (e.g. min/max, max rate of change) or... -4. Fit parameters based on normal data, i.e. without outliers -5. Detect outliers in any dataset - -Example -------- - -```{python} -import pandas as pd -from tsod import RangeDetector -rd = RangeDetector(max_value=2.0) -data = pd.Series([0.0, 1.0, 3.0]) # 3.0 is out of range i.e. an anomaly -anom = rd.detect(data) -anom -``` - -```{python} -data[anom] # get anomalous data -``` - -```{python} -data[~anom] # get normal data -``` - - -Saving and loading ------------------- -Save a configured detector -```python -cd = CombinedDetector([ConstantValueDetector(), RangeDetector()]) -cd.fit(normal_data) -cd.save("detector.joblib") -``` - -... and then later load it from disk -```python -my_detector = tsod.load("detector.joblib") -my_detector.detect(some_data) -``` +--- +title: Getting started +execute: + enabled: false +--- + +Getting started +=============== + +![](https://raw.githubusercontent.com/DHI/tsod/main/images/anomaly.png) + +Sensors often provide faulty or missing observations. These anomalies must be detected automatically and replaced with more feasible values before feeding the data to numerical simulation engines as boundary conditions or real time decision systems. + +This package aims to provide examples and algorithms for detecting anomalies in time series data specifically tailored to DHI users and the water domain. It is simple to install and deploy operationally and is accessible to everyone (open-source). + +`tsod` is a library for time series data. The supported input formats are [](`pandas.Series`) and [](`pandas.DataFrame`) (single or multicolumn), and the output type always matches the input type. + +1. Get data in the form of a a [](`pandas.Series`) or [](`pandas.DataFrame`). +2. Select one or more detectors e.g. [](`~tsod.RangeDetector`) or [](`~tsod.ConstantValueDetector`) +3. Define parameters (e.g. min/max, max rate of change) or... +4. Fit parameters based on normal data, i.e. without outliers +5. Detect outliers in any dataset + +Example +------- + +```{python} +import pandas as pd +from tsod import RangeDetector +rd = RangeDetector(max_value=2.0) +data = pd.Series([0.0, 1.0, 3.0]) # 3.0 is out of range i.e. an anomaly +anom = rd.detect(data) +anom +``` + +```{python} +data[anom] # get anomalous data +``` + +```{python} +data[~anom] # get normal data +``` + + +Saving and loading +------------------ +Save a configured detector +```python +cd = CombinedDetector([ConstantValueDetector(), RangeDetector()]) +cd.fit(normal_data) +cd.save("detector.joblib") +``` + +... and then later load it from disk +```python +my_detector = tsod.load("detector.joblib") +my_detector.detect(some_data) +``` \ No newline at end of file diff --git a/notebooks/Detect on DataFrames.ipynb b/notebooks/Detect on DataFrames.ipynb index 9926670..ac89887 100644 --- a/notebooks/Detect on DataFrames.ipynb +++ b/notebooks/Detect on DataFrames.ipynb @@ -5,7 +5,15 @@ "id": "50760804", "metadata": {}, "source": [ - "# Apply detector to entire DataFrame at once " + "# Detect on Dataframes" + ] + }, + { + "cell_type": "markdown", + "id": "c02271c2", + "metadata": {}, + "source": [ + "### **Apply detector to entire DataFrame at once**" ] }, { @@ -30,7 +38,7 @@ "id": "3076a82c", "metadata": {}, "source": [ - "### Prepare example data" + "### **Prepare example data**" ] }, { @@ -105,7 +113,7 @@ "id": "34a9c664", "metadata": {}, "source": [ - "### Detect anomalies using preset range and constant gradient rules" + "### **Detect anomalies using preset range and constant gradient rules**" ] }, { @@ -152,7 +160,7 @@ "id": "7e583b7c", "metadata": {}, "source": [ - "### Add gradient detector that has been fitted on a known good period" + "### **Add gradient detector that has been fitted on a known good period**" ] }, { @@ -218,7 +226,7 @@ "id": "637771d1", "metadata": {}, "source": [ - "### Cleaned data" + "### **Cleaned data**" ] }, { diff --git a/notebooks/Example Water Level.ipynb b/notebooks/Example Water Level.ipynb index f84f4dd..c7de1a6 100644 --- a/notebooks/Example Water Level.ipynb +++ b/notebooks/Example Water Level.ipynb @@ -4,7 +4,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Clean water level data" + "# Example Water Level" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### **Clean water level data**" ] }, { @@ -48,6 +55,11 @@ "ax.grid(True)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/notebooks/Getting started.ipynb b/notebooks/Getting started.ipynb index 25efbb6..679875e 100644 --- a/notebooks/Getting started.ipynb +++ b/notebooks/Getting started.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Getting started" + ] + }, { "cell_type": "code", "execution_count": 1, @@ -33,7 +40,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Range" + "### **Range**" ] }, { @@ -64,7 +71,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Constant value" + "### **Constant value**" ] }, { @@ -95,7 +102,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Combination" + "### **Combination**" ] }, { @@ -129,7 +136,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Constant gradient" + "### **Constant gradient**" ] }, { @@ -160,7 +167,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Gradient" + "### **Gradient**" ] }, { @@ -221,7 +228,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Rolling standard deviation\n", + "### **Rolling standard deviation**\n", "\n", "Can be used to detect sudden large variations" ] @@ -292,7 +299,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Diff\n", + "### **Diff**\n", "\n", "The diff detector detects sudden changes, without consideration of the time elapsed " ] @@ -344,7 +351,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Hampel filter\n", + "### **Hampel filter**\n", "\n", "Detects outliers by comparing each point to the its surrounding window using median absolute deviation." ] diff --git a/pyproject.toml b/pyproject.toml index f11f073..0d8cfa9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dev = [ ] docs = [ + "matplotlib", "nbclient>=0.10.2", "nbformat>=5.10.4", "quarto-cli>=1.8.25", diff --git a/scripts/convert_docs_notebooks.py b/scripts/convert_docs_notebooks.py new file mode 100644 index 0000000..110ab55 --- /dev/null +++ b/scripts/convert_docs_notebooks.py @@ -0,0 +1,95 @@ +"""Convert notebooks listed in docs_config.json to .qmd files for the docs site.""" + +import json +import re +import subprocess +import sys +from pathlib import Path + +repo_root = Path(__file__).parent.parent +# Central config that decides which notebooks become docs pages. +config_path = repo_root / "docs/examples/docs_config.json" +# Folder where generated example .qmd files are written. +examples_dir = repo_root / "docs/examples" + +if not config_path.exists(): + sys.exit(f"Missing config file: {config_path}") + +config = json.loads(config_path.read_text()) +# Ordered list of examples from config (order is used in sidebar + listing). +examples = config.get("examples", []) +if not examples: + sys.exit("Config must contain a non-empty 'examples' array.") + +examples_dir.mkdir(parents=True, exist_ok=True) + +managed_outputs = [] # list to preserve config order + +for entry in examples: + notebook_name = entry.get("notebook", "").strip() + title = entry.get("title", "").strip() + + if not notebook_name or not title: + sys.exit("Each example entry must include non-empty 'notebook' and 'title' values.") + + notebook_path = repo_root / "notebooks" / notebook_name + if not notebook_path.exists(): + sys.exit(f"Notebook not found: {notebook_path}") + + stem = Path(notebook_name).stem + # Convert notebook filename to URL-safe slug for output .qmd. + slug = re.sub(r"[^a-z0-9]+", "-", stem.lower()).strip("-") + output_path = examples_dir / f"{slug}.qmd" + # Track generated files so we can clean stale files and build sidebar in same order. + managed_outputs.append(output_path.name) + + # Convert notebook -> qmd via Quarto. + subprocess.run( + ["uv", "run", "quarto", "convert", str(notebook_path), "--output", str(output_path)], + cwd=repo_root, + check=True, + ) + + escaped_title = title.replace('"', '\\"') + description = entry.get("description", "").strip() + qmd = output_path.read_text(encoding="utf-8") + + # Ensure frontmatter title matches config. + if re.search(r"(?m)^title:\s*", qmd): + qmd = re.sub(r"(?m)^title:.*$", f'title: "{escaped_title}"', qmd, count=1) + elif qmd.startswith("---\n"): + qmd = qmd.replace("---\n", f'---\ntitle: "{escaped_title}"\n', 1) + else: + # If no frontmatter exists, create one. + qmd = f'---\ntitle: "{escaped_title}"\n---\n\n{qmd}' + + # Add description once so index listing shows stable text instead of random body snippet. + if description and not re.search(r"(?m)^description:\s*", qmd): + qmd = re.sub(r"(?m)^(title:.*$)", r"\1" + f'\ndescription: "{description}"', qmd, count=1) + + # Notebooks use "../tests/data/" (relative to repo root), but .qmd files + # are executed from docs/examples/, so the path needs an extra "../" + qmd = qmd.replace("../tests/data/", "../../tests/data/") + + output_path.write_text(qmd, encoding="utf-8") + +for qmd_file in examples_dir.glob("*.qmd"): + # Keep index page, remove generated pages no longer present in config. + if qmd_file.name != "index.qmd" and qmd_file.name not in managed_outputs: + qmd_file.unlink() + +# Update the Examples sidebar in _quarto.yml +quarto_yml_path = repo_root / "docs/_quarto.yml" +quarto_yml = quarto_yml_path.read_text(encoding="utf-8") + +sidebar_entries = " - href: examples/index.qmd\n" +for name in managed_outputs: # order preserved from config + # Sidebar entries are generated from config order. + sidebar_entries += f" - href: examples/{name}\n" + +quarto_yml = re.sub( + r"( - title: \"Examples\"\n style: docked\n contents:\n)(?: - .*\n)*", + r"\1" + sidebar_entries, + quarto_yml, +) +quarto_yml_path.write_text(quarto_yml, encoding="utf-8")