From b9bc4ea463a9ea9ae22562a396cdedffe41c49e1 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Thu, 8 Jan 2026 21:56:42 -0600 Subject: [PATCH] refactor: remove text from core types, keep as native passthrough `text` is no longer a core DataJoint type. It remains available as a native SQL passthrough type (with portability warning). Rationale: - Core types should encourage structured, bounded data - varchar(n) covers most legitimate text needs with explicit bounds - json handles structured text better - is better for large/unbounded text (files, sequences, docs) - text behavior varies across databases, hurting portability Changes: - Remove `text` from CORE_TYPES in declare.py - Update NATIVE_TEXT pattern to match plain `text` (in addition to tinytext, mediumtext, longtext) - Update archive docs to note text is native-only Users who need unlimited text can: - Use varchar(n) with generous limit - Use json for structured content - Use for large text files - Use native text types with portability warning Co-Authored-By: Claude Opus 4.5 --- docs/src/archive/design/tables/attributes.md | 4 +++- docs/src/archive/design/tables/storage-types-spec.md | 4 +++- src/datajoint/declare.py | 4 +--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/src/archive/design/tables/attributes.md b/docs/src/archive/design/tables/attributes.md index 39a80ff67..3753621d5 100644 --- a/docs/src/archive/design/tables/attributes.md +++ b/docs/src/archive/design/tables/attributes.md @@ -34,10 +34,12 @@ Use these portable, scientist-friendly types for cross-database compatibility. - `char(n)`: fixed-length string of exactly *n* characters. - `varchar(n)`: variable-length string up to *n* characters. -- `text`: unlimited-length text for long-form content (notes, descriptions, abstracts). - `enum(...)`: one of several enumerated values, e.g., `enum("low", "medium", "high")`. Do not use enums in primary keys due to difficulty changing definitions. +> **Note:** For unlimited text, use `varchar` with a generous limit, `json` for structured content, +> or `` for large text files. Native SQL `text` types are supported but not portable. + **Encoding policy:** All strings use UTF-8 encoding (`utf8mb4` in MySQL, `UTF8` in PostgreSQL). Character encoding and collation are database-level configuration, not part of type definitions. Comparisons are case-sensitive by default. diff --git a/docs/src/archive/design/tables/storage-types-spec.md b/docs/src/archive/design/tables/storage-types-spec.md index f7aead7de..7157d4d42 100644 --- a/docs/src/archive/design/tables/storage-types-spec.md +++ b/docs/src/archive/design/tables/storage-types-spec.md @@ -75,7 +75,9 @@ MySQL and PostgreSQL backends. Users should prefer these over native database ty |-----------|-------------|-------|------------| | `char(n)` | Fixed-length | `CHAR(n)` | `CHAR(n)` | | `varchar(n)` | Variable-length | `VARCHAR(n)` | `VARCHAR(n)` | -| `text` | Unlimited text | `TEXT` | `TEXT` | + +> **Note:** Native SQL `text` types (`text`, `tinytext`, `mediumtext`, `longtext`) are supported +> but not portable. Prefer `varchar(n)`, `json`, or `` for portable schemas. **Encoding:** All strings use UTF-8 (`utf8mb4` in MySQL, `UTF8` in PostgreSQL). See [Encoding and Collation Policy](#encoding-and-collation-policy) for details. diff --git a/src/datajoint/declare.py b/src/datajoint/declare.py index d8479b124..d86e90ed9 100644 --- a/src/datajoint/declare.py +++ b/src/datajoint/declare.py @@ -45,8 +45,6 @@ # String types (with parameters) "char": (r"char\s*\(\d+\)$", None), "varchar": (r"varchar\s*\(\d+\)$", None), - # Unlimited text - "text": (r"text$", None), # Enumeration "enum": (r"enum\s*\(.+\)$", None), # Fixed-point decimal @@ -78,7 +76,7 @@ STRING=r"(var)?char\s*\(.+\)$", # Catches char/varchar not matched by core types TEMPORAL=r"(time|timestamp|year)(\s*\(.+\))?$", # time, timestamp, year (not date/datetime) NATIVE_BLOB=r"(tiny|small|medium|long)blob$", # Specific blob variants - NATIVE_TEXT=r"(tiny|small|medium|long)text$", # Text variants (use plain 'text' instead) + NATIVE_TEXT=r"(tiny|small|medium|long)?text$", # Native text types (not portable) # Codecs use angle brackets CODEC=r"<.+>$", ).items()