Skip to content

Commit 63cbf9b

Browse files
committed
fix: make sure one row per client is returned from the _current CTE prior to the join, and add sample_id
1 parent ce88134 commit 63cbf9b

File tree

6 files changed

+83
-47
lines changed

6 files changed

+83
-47
lines changed

sql_generators/terms_of_use/templates/terms_of_use_events_v1/metadata.yaml.jinja

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ friendly_name: Terms of Use - Events
33
description: |-
44
This dataset contains events related to Terms of Use.
55

6+
Clustered by: sample_id
7+
Partitioned by: submission_timestamp by day
8+
69
owners:
710
811
labels:
@@ -18,5 +21,8 @@ bigquery:
1821
field: submission_timestamp
1922
require_partition_filter: false
2023
expiration_days: null
24+
clustering:
25+
fields:
26+
- sample_id
2127
monitoring:
2228
enabled: true

sql_generators/terms_of_use/templates/terms_of_use_events_v1/query.sql.jinja

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
SELECT
33
submission_timestamp,
44
metrics.uuid.messaging_system_client_id AS legacy_telemetry_client_id,
5+
sample_id,
56
metrics.text2.messaging_system_message_id,
67
metrics.string.messaging_system_event,
78
metrics.string.messaging_system_event_source,

sql_generators/terms_of_use/templates/terms_of_use_events_v1/schema.yaml.jinja

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ fields:
1313
description: |
1414
Legacy client identifier.
1515

16+
- mode: NULLABLE
17+
name: sample_id
18+
type: INTEGER
19+
description: |
20+
A number, 0-99, that samples by client_id and allows filtering data
21+
for analysis. It is a pipeline-generated artifact that should match between pings.
22+
1623
- mode: NULLABLE
1724
name: messaging_system_message_id
1825
type: STRING

sql_generators/terms_of_use/templates/terms_of_use_status_v1/metadata.yaml.jinja

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ friendly_name: Terms of Use - Status
33
description: |-
44
This dataset contains the most recent status
55

6-
Cluster by: ??
6+
Cluster by: sample_id
77

88
owners:
99
@@ -12,16 +12,12 @@ labels:
1212
schedule: daily
1313
table_type: client_level
1414
scheduling:
15-
dag_name: bqetl_usage_reporting
15+
dag_name: {{ dag_name }}
1616
task_group: {{ app_name }}
17-
{# bigquery:
18-
time_partitioning:
19-
type: day
20-
field: ??
21-
require_partition_filter: false -- TODO: this table will never expire because we keep overwriting it.
22-
expiration_days: null
17+
depends_on_past: true
18+
bigquery:
2319
clustering:
2420
fields:
25-
- ?? #}
21+
- sample_id
2622
monitoring:
2723
enabled: true
Lines changed: 57 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
-- {{ header }}
22
WITH _previous AS (
33
SELECT
4-
submission_timestamp,
4+
submission_date,
55
client_id,
6+
sample_id,
67
app_version_major,
78
app_version_minor,
89
app_version_patch,
@@ -20,57 +21,75 @@ WITH _previous AS (
2021
FROM
2122
`{{ project_id }}.{{ app_name }}_derived.{{ table_name }}`
2223
WHERE
23-
DATE(submission_timestamp) <> @submission_date
24+
{# DATE(submission_timestamp) <> @submission_date #}
25+
submission_date <> @submission_date
2426
),
2527
_current AS (
2628
SELECT
27-
submission_timestamp,
29+
{# LAST_VALUE(submission_timestamp) OVER _window AS submission_timestamp, #}
30+
DATE(submission_timestamp) AS submission_date,
2831
client_info.client_id,
29-
app_version_major,
30-
app_version_minor,
31-
app_version_patch,
32-
normalized_channel,
33-
normalized_country_code,
34-
normalized_os,
35-
normalized_os_version,
36-
is_bot_generated,
37-
metadata.isp.name AS isp_name,
32+
sample_id,
33+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(app_version_major) OVER _window) AS app_version_major,
34+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(app_version_minor) OVER _window) AS app_version_minor,
35+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(app_version_patch) OVER _window) AS app_version_patch,
36+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(normalized_channel) OVER _window) AS normalized_channel,
37+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(normalized_country_code) OVER _window) AS normalized_country_code,
38+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(normalized_os) OVER _window) AS normalized_os,
39+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(normalized_os_version) OVER _window) AS normalized_os_version,
40+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(is_bot_generated) OVER _window) AS is_bot_generated,
41+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metadata.isp.name) OVER _window) AS isp_name,
3842
{% if app_name == "firefox_ios" %}
39-
metrics.quantity.user_terms_of_use_version_accepted AS terms_of_use_version_accepted,
40-
metrics.datetime.user_terms_of_use_date_accepted AS terms_of_use_date_accepted,
43+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metrics.quantity.user_terms_of_use_version_accepted) OVER _window) AS terms_of_use_version_accepted,
44+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metrics.datetime.user_terms_of_use_date_accepted) OVER _window) AS terms_of_use_date_accepted,
4145
{% elif app_name == "fenix" %}
42-
metrics.quantity.terms_of_use_version AS terms_of_use_version_accepted,
43-
metrics.datetime.terms_of_use_date AS terms_of_use_date_accepted,
46+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metrics.quantity.terms_of_use_version) OVER _window) AS terms_of_use_version_accepted,
47+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metrics.datetime.terms_of_use_date) OVER _window) AS terms_of_use_date_accepted,
4448
{% elif app_name == "firefox_desktop" %}
45-
metrics.quantity.termsofuse_version AS terms_of_use_version_accepted,
46-
metrics.datetime.termsofuse_date AS terms_of_use_date_accepted,
49+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metrics.quantity.termsofuse_version) OVER _window) AS terms_of_use_version_accepted,
50+
`moz-fx-data-shared-prod`.udf.mode_last(ARRAY_AGG(metrics.datetime.termsofuse_date) OVER _window) AS terms_of_use_date_accepted,
4751
metrics.uuid.legacy_telemetry_client_id, -- firefox_desktop exclusive field.
4852
{% endif %}
49-
FROM
50-
`{{ project_id }}.{{ app_name }}.metrics`
51-
WHERE
52-
DATE(submission_timestamp) = @submission_date
53-
AND DATE(submission_timestamp) >= {% if app_name == "firefox_desktop" %}"2025-06-24"{% else %}"2025-09-15"{% endif %}
54-
AND app_version_major >= 142
55-
{% if app_name == "firefox_ios" %}
56-
AND metrics.datetime.user_terms_of_use_date_accepted IS NOT NULL
57-
{% elif app_name == "fenix" %}
58-
AND metrics.datetime.terms_of_use_date IS NOT NULL
59-
{% elif app_name == "firefox_desktop" %}
60-
AND metrics.datetime.termsofuse_date IS NOT NULL
61-
{% endif %}
53+
FROM
54+
`{{ project_id }}.{{ app_name }}.metrics`
55+
WHERE
56+
DATE(submission_timestamp) = @submission_date
57+
-- Adding a hard filter from when we want to start recording this data:
58+
AND DATE(submission_timestamp) >= {% if app_name == "firefox_desktop" %}"2025-06-24"{% else %}"2025-09-15"{% endif %}
59+
AND app_version_major >= 142
60+
{% if app_name == "firefox_ios" %}
61+
AND metrics.datetime.user_terms_of_use_date_accepted IS NOT NULL
62+
{% elif app_name == "fenix" %}
63+
AND metrics.datetime.terms_of_use_date IS NOT NULL
64+
{% elif app_name == "firefox_desktop" %}
65+
AND metrics.datetime.termsofuse_date IS NOT NULL
66+
{% endif %}
67+
QUALIFY
68+
ROW_NUMBER() OVER(PARTITION BY client_id ORDER BY submission_timestamp DESC) = 1
69+
WINDOW
70+
_window AS (
71+
PARTITION BY
72+
sample_id,
73+
client_info.client_id
74+
ORDER BY
75+
submission_timestamp
76+
ROWS BETWEEN
77+
UNBOUNDED PRECEDING
78+
AND UNBOUNDED FOLLOWING
79+
)
6280
)
63-
6481
SELECT
6582
-- update entry if `terms_of_use_version_accepted` or `terms_of_use_date_accepted` value changes:
6683
IF(
67-
(_current.terms_of_use_version_accepted <> _previous.terms_of_use_version_accepted)
68-
OR (_current.terms_of_use_date_accepted <> _previous.terms_of_use_date_accepted),
84+
_previous.client_id IS NULL
85+
OR (
86+
(_current.terms_of_use_version_accepted <> _previous.terms_of_use_version_accepted)
87+
OR (_current.terms_of_use_date_accepted <> _previous.terms_of_use_date_accepted)
88+
),
6989
_current,
7090
_previous
7191
).*
7292
FROM
73-
_previous
74-
FULL OUTER JOIN _current
75-
USING (client_id, normalized_channel)
76-
93+
_current
94+
FULL OUTER JOIN _previous
95+
USING (client_id, sample_id)

sql_generators/terms_of_use/templates/terms_of_use_status_v1/schema.yaml.jinja

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,13 @@ fields:
1313
description: |
1414
A UUID uniquely identifying the client.
1515

16+
- mode: NULLABLE
17+
name: sample_id
18+
type: INTEGER
19+
description: |
20+
A number, 0-99, that samples by client_id and allows filtering data
21+
for analysis. It is a pipeline-generated artifact that should match between pings.
22+
1623
- mode: NULLABLE
1724
name: app_version_major
1825
type: NUMERIC

0 commit comments

Comments
 (0)