Skip to content

Commit b65ea42

Browse files
committed
refactor: add user id to dataset splits (#9677)
1 parent 9f449a0 commit b65ea42

File tree

5 files changed

+186
-135
lines changed

5 files changed

+186
-135
lines changed

scripts/ddl/postgresql_schema.sql

Lines changed: 149 additions & 132 deletions
Original file line numberDiff line numberDiff line change
@@ -18,56 +18,6 @@ CREATE TABLE public.annotation_configs (
1818
);
1919

2020

21-
-- Table: dataset_splits
22-
-- ---------------------
23-
CREATE TABLE public.dataset_splits (
24-
id bigserial NOT NULL,
25-
name VARCHAR NOT NULL,
26-
description VARCHAR,
27-
color VARCHAR NOT NULL,
28-
metadata JSONB NOT NULL,
29-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
30-
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
31-
CONSTRAINT pk_dataset_splits PRIMARY KEY (id),
32-
CONSTRAINT uq_dataset_splits_name
33-
UNIQUE (name)
34-
);
35-
36-
37-
-- Table: datasets
38-
-- ---------------
39-
CREATE TABLE public.datasets (
40-
id serial NOT NULL,
41-
name VARCHAR NOT NULL,
42-
description VARCHAR,
43-
metadata JSONB NOT NULL,
44-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
45-
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
46-
CONSTRAINT pk_datasets PRIMARY KEY (id),
47-
CONSTRAINT uq_datasets_name
48-
UNIQUE (name)
49-
);
50-
51-
52-
-- Table: dataset_versions
53-
-- -----------------------
54-
CREATE TABLE public.dataset_versions (
55-
id serial NOT NULL,
56-
dataset_id INTEGER NOT NULL,
57-
description VARCHAR,
58-
metadata JSONB NOT NULL,
59-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
60-
CONSTRAINT pk_dataset_versions PRIMARY KEY (id),
61-
CONSTRAINT fk_dataset_versions_dataset_id_datasets FOREIGN KEY
62-
(dataset_id)
63-
REFERENCES public.datasets (id)
64-
ON DELETE CASCADE
65-
);
66-
67-
CREATE INDEX ix_dataset_versions_dataset_id ON public.dataset_versions
68-
USING btree (dataset_id);
69-
70-
7121
-- Table: generative_models
7222
-- ------------------------
7323
CREATE TABLE public.generative_models (
@@ -331,87 +281,6 @@ CREATE INDEX ix_spans_trace_rowid ON public.spans
331281
USING btree (trace_rowid);
332282

333283

334-
-- Table: dataset_examples
335-
-- -----------------------
336-
CREATE TABLE public.dataset_examples (
337-
id serial NOT NULL,
338-
dataset_id INTEGER NOT NULL,
339-
span_rowid INTEGER,
340-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
341-
CONSTRAINT pk_dataset_examples PRIMARY KEY (id),
342-
CONSTRAINT fk_dataset_examples_dataset_id_datasets FOREIGN KEY
343-
(dataset_id)
344-
REFERENCES public.datasets (id)
345-
ON DELETE CASCADE,
346-
CONSTRAINT fk_dataset_examples_span_rowid_spans FOREIGN KEY
347-
(span_rowid)
348-
REFERENCES public.spans (id)
349-
ON DELETE SET NULL
350-
);
351-
352-
CREATE INDEX ix_dataset_examples_dataset_id ON public.dataset_examples
353-
USING btree (dataset_id);
354-
CREATE INDEX ix_dataset_examples_span_rowid ON public.dataset_examples
355-
USING btree (span_rowid);
356-
357-
358-
-- Table: dataset_example_revisions
359-
-- --------------------------------
360-
CREATE TABLE public.dataset_example_revisions (
361-
id serial NOT NULL,
362-
dataset_example_id INTEGER NOT NULL,
363-
dataset_version_id INTEGER NOT NULL,
364-
input JSONB NOT NULL,
365-
output JSONB NOT NULL,
366-
metadata JSONB NOT NULL,
367-
revision_kind VARCHAR NOT NULL,
368-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
369-
CONSTRAINT pk_dataset_example_revisions PRIMARY KEY (id),
370-
CONSTRAINT uq_dataset_example_revisions_dataset_example_id_dataset_bbf2
371-
UNIQUE (dataset_example_id, dataset_version_id),
372-
CHECK (((revision_kind)::text = ANY ((ARRAY[
373-
'CREATE'::character varying,
374-
'PATCH'::character varying,
375-
'DELETE'::character varying
376-
])::text[]))),
377-
CONSTRAINT fk_dataset_example_revisions_dataset_example_id_dataset_c72a
378-
FOREIGN KEY
379-
(dataset_example_id)
380-
REFERENCES public.dataset_examples (id)
381-
ON DELETE CASCADE,
382-
CONSTRAINT fk_dataset_example_revisions_dataset_version_id_dataset_3a56
383-
FOREIGN KEY
384-
(dataset_version_id)
385-
REFERENCES public.dataset_versions (id)
386-
ON DELETE CASCADE
387-
);
388-
389-
CREATE INDEX ix_dataset_example_revisions_dataset_version_id ON public.dataset_example_revisions
390-
USING btree (dataset_version_id);
391-
392-
393-
-- Table: dataset_splits_dataset_examples
394-
-- --------------------------------------
395-
CREATE TABLE public.dataset_splits_dataset_examples (
396-
dataset_split_id BIGINT NOT NULL,
397-
dataset_example_id BIGINT NOT NULL,
398-
CONSTRAINT pk_dataset_splits_dataset_examples PRIMARY KEY (dataset_split_id, dataset_example_id),
399-
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_example_id_d_63b2
400-
FOREIGN KEY
401-
(dataset_example_id)
402-
REFERENCES public.dataset_examples (id)
403-
ON DELETE CASCADE,
404-
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_split_id_dat_a90c
405-
FOREIGN KEY
406-
(dataset_split_id)
407-
REFERENCES public.dataset_splits (id)
408-
ON DELETE CASCADE
409-
);
410-
411-
CREATE INDEX ix_dataset_splits_dataset_examples_dataset_example_id ON public.dataset_splits_dataset_examples
412-
USING btree (dataset_example_id);
413-
414-
415284
-- Table: span_costs
416285
-- -----------------
417286
CREATE TABLE public.span_costs (
@@ -565,6 +434,155 @@ CREATE TABLE public.dataset_labels (
565434
);
566435

567436

437+
-- Table: dataset_splits
438+
-- ---------------------
439+
CREATE TABLE public.dataset_splits (
440+
id bigserial NOT NULL,
441+
user_id INTEGER,
442+
name VARCHAR NOT NULL,
443+
description VARCHAR,
444+
color VARCHAR NOT NULL,
445+
metadata JSONB NOT NULL,
446+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
447+
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
448+
CONSTRAINT pk_dataset_splits PRIMARY KEY (id),
449+
CONSTRAINT uq_dataset_splits_name
450+
UNIQUE (name),
451+
CONSTRAINT fk_dataset_splits_user_id_users FOREIGN KEY
452+
(user_id)
453+
REFERENCES public.users (id)
454+
ON DELETE SET NULL
455+
);
456+
457+
CREATE INDEX ix_dataset_splits_user_id ON public.dataset_splits
458+
USING btree (user_id);
459+
460+
461+
-- Table: datasets
462+
-- ---------------
463+
CREATE TABLE public.datasets (
464+
id serial NOT NULL,
465+
name VARCHAR NOT NULL,
466+
description VARCHAR,
467+
metadata JSONB NOT NULL,
468+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
469+
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
470+
user_id INTEGER,
471+
CONSTRAINT pk_datasets PRIMARY KEY (id),
472+
CONSTRAINT uq_datasets_name
473+
UNIQUE (name),
474+
CONSTRAINT fk_datasets_user_id_users FOREIGN KEY
475+
(user_id)
476+
REFERENCES public.users (id)
477+
ON DELETE SET NULL
478+
);
479+
480+
481+
-- Table: dataset_examples
482+
-- -----------------------
483+
CREATE TABLE public.dataset_examples (
484+
id serial NOT NULL,
485+
dataset_id INTEGER NOT NULL,
486+
span_rowid INTEGER,
487+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
488+
CONSTRAINT pk_dataset_examples PRIMARY KEY (id),
489+
CONSTRAINT fk_dataset_examples_dataset_id_datasets FOREIGN KEY
490+
(dataset_id)
491+
REFERENCES public.datasets (id)
492+
ON DELETE CASCADE,
493+
CONSTRAINT fk_dataset_examples_span_rowid_spans FOREIGN KEY
494+
(span_rowid)
495+
REFERENCES public.spans (id)
496+
ON DELETE SET NULL
497+
);
498+
499+
CREATE INDEX ix_dataset_examples_dataset_id ON public.dataset_examples
500+
USING btree (dataset_id);
501+
CREATE INDEX ix_dataset_examples_span_rowid ON public.dataset_examples
502+
USING btree (span_rowid);
503+
504+
505+
-- Table: dataset_splits_dataset_examples
506+
-- --------------------------------------
507+
CREATE TABLE public.dataset_splits_dataset_examples (
508+
dataset_split_id BIGINT NOT NULL,
509+
dataset_example_id BIGINT NOT NULL,
510+
CONSTRAINT pk_dataset_splits_dataset_examples PRIMARY KEY (dataset_split_id, dataset_example_id),
511+
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_example_id_d_63b2
512+
FOREIGN KEY
513+
(dataset_example_id)
514+
REFERENCES public.dataset_examples (id)
515+
ON DELETE CASCADE,
516+
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_split_id_dat_a90c
517+
FOREIGN KEY
518+
(dataset_split_id)
519+
REFERENCES public.dataset_splits (id)
520+
ON DELETE CASCADE
521+
);
522+
523+
CREATE INDEX ix_dataset_splits_dataset_examples_dataset_example_id ON public.dataset_splits_dataset_examples
524+
USING btree (dataset_example_id);
525+
526+
527+
-- Table: dataset_versions
528+
-- -----------------------
529+
CREATE TABLE public.dataset_versions (
530+
id serial NOT NULL,
531+
dataset_id INTEGER NOT NULL,
532+
description VARCHAR,
533+
metadata JSONB NOT NULL,
534+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
535+
user_id INTEGER,
536+
CONSTRAINT pk_dataset_versions PRIMARY KEY (id),
537+
CONSTRAINT fk_dataset_versions_dataset_id_datasets FOREIGN KEY
538+
(dataset_id)
539+
REFERENCES public.datasets (id)
540+
ON DELETE CASCADE,
541+
CONSTRAINT fk_dataset_versions_user_id_users FOREIGN KEY
542+
(user_id)
543+
REFERENCES public.users (id)
544+
ON DELETE SET NULL
545+
);
546+
547+
CREATE INDEX ix_dataset_versions_dataset_id ON public.dataset_versions
548+
USING btree (dataset_id);
549+
550+
551+
-- Table: dataset_example_revisions
552+
-- --------------------------------
553+
CREATE TABLE public.dataset_example_revisions (
554+
id serial NOT NULL,
555+
dataset_example_id INTEGER NOT NULL,
556+
dataset_version_id INTEGER NOT NULL,
557+
input JSONB NOT NULL,
558+
output JSONB NOT NULL,
559+
metadata JSONB NOT NULL,
560+
revision_kind VARCHAR NOT NULL,
561+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
562+
CONSTRAINT pk_dataset_example_revisions PRIMARY KEY (id),
563+
CONSTRAINT uq_dataset_example_revisions_dataset_example_id_dataset_bbf2
564+
UNIQUE (dataset_example_id, dataset_version_id),
565+
CHECK (((revision_kind)::text = ANY ((ARRAY[
566+
'CREATE'::character varying,
567+
'PATCH'::character varying,
568+
'DELETE'::character varying
569+
])::text[]))),
570+
CONSTRAINT fk_dataset_example_revisions_dataset_example_id_dataset_c72a
571+
FOREIGN KEY
572+
(dataset_example_id)
573+
REFERENCES public.dataset_examples (id)
574+
ON DELETE CASCADE,
575+
CONSTRAINT fk_dataset_example_revisions_dataset_version_id_dataset_3a56
576+
FOREIGN KEY
577+
(dataset_version_id)
578+
REFERENCES public.dataset_versions (id)
579+
ON DELETE CASCADE
580+
);
581+
582+
CREATE INDEX ix_dataset_example_revisions_dataset_version_id ON public.dataset_example_revisions
583+
USING btree (dataset_version_id);
584+
585+
568586
-- Table: datasets_dataset_labels
569587
-- ------------------------------
570588
CREATE TABLE public.datasets_dataset_labels (
@@ -739,7 +757,6 @@ CREATE TABLE public.experiment_tags (
739757
user_id INTEGER,
740758
name VARCHAR NOT NULL,
741759
description VARCHAR,
742-
color VARCHAR NOT NULL,
743760
CONSTRAINT pk_experiment_tags PRIMARY KEY (id),
744761
CONSTRAINT uq_experiment_tags_dataset_id_name
745762
UNIQUE (dataset_id, name),

src/phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,13 @@ def upgrade() -> None:
5656
op.create_table(
5757
"dataset_splits",
5858
sa.Column("id", _Integer, primary_key=True),
59+
sa.Column(
60+
"user_id",
61+
sa.Integer,
62+
sa.ForeignKey("users.id", ondelete="SET NULL"),
63+
nullable=True,
64+
index=True,
65+
),
5966
sa.Column("name", sa.String, nullable=False, unique=True),
6067
sa.Column("description", sa.String, nullable=True),
6168
sa.Column("color", sa.String, nullable=False),

src/phoenix/db/models.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,12 @@ class DatasetExampleRevision(HasId):
12301230

12311231
class DatasetSplit(HasId):
12321232
__tablename__ = "dataset_splits"
1233+
1234+
user_id: Mapped[Optional[int]] = mapped_column(
1235+
ForeignKey("users.id", ondelete="SET NULL"),
1236+
nullable=True,
1237+
index=True,
1238+
)
12331239
name: Mapped[str] = mapped_column(String, nullable=False, unique=True)
12341240
description: Mapped[Optional[str]]
12351241
color: Mapped[str] = mapped_column(String, nullable=False)

src/phoenix/server/api/mutations/dataset_split_mutations.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from phoenix.server.api.auth import IsLocked, IsNotReadOnly
1414
from phoenix.server.api.context import Context
1515
from phoenix.server.api.exceptions import BadRequest, Conflict, NotFound
16+
from phoenix.server.api.helpers.playground_users import get_user
1617
from phoenix.server.api.queries import Query
1718
from phoenix.server.api.types.DatasetSplit import DatasetSplit, to_gql_dataset_split
1819
from phoenix.server.api.types.node import from_global_id_with_expected_type
@@ -89,13 +90,15 @@ class DatasetSplitMutationMixin:
8990
async def create_dataset_split(
9091
self, info: Info[Context, None], input: CreateDatasetSplitInput
9192
) -> DatasetSplitMutationPayload:
93+
user_id = get_user(info)
9294
validated_name = _validated_name(input.name)
9395
async with info.context.db() as session:
9496
dataset_split_orm = models.DatasetSplit(
9597
name=validated_name,
9698
description=input.description,
9799
color=input.color,
98100
metadata_=input.metadata or {},
101+
user_id=user_id,
99102
)
100103
session.add(dataset_split_orm)
101104
try:
@@ -319,6 +322,7 @@ async def remove_dataset_examples_from_dataset_splits(
319322
async def create_dataset_split_with_examples(
320323
self, info: Info[Context, None], input: CreateDatasetSplitWithExamplesInput
321324
) -> DatasetSplitMutationPayload:
325+
user_id = get_user(info)
322326
validated_name = _validated_name(input.name)
323327
unique_example_rowids: set[int] = set()
324328
for example_gid in input.example_ids:
@@ -345,6 +349,7 @@ async def create_dataset_split_with_examples(
345349
description=input.description or None,
346350
color=input.color,
347351
metadata_=input.metadata or {},
352+
user_id=user_id,
348353
)
349354
session.add(dataset_split_orm)
350355
try:

0 commit comments

Comments
 (0)