Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/sentry/replays/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
Paginators,
execute_query,
make_full_aggregation_query,
make_full_aggregation_query_with_short_id,
query_using_optimized_search,
)
from sentry.search.events.types import SnubaParams
Expand Down Expand Up @@ -111,6 +112,46 @@ def query_replay_instance(
)["data"]


def query_replay_instance_with_short_id(
project_ids: list[int],
replay_id_prefix: str,
start: datetime,
end: datetime,
organization: Organization | None = None,
request_user_id: int | None = None,
) -> list[dict[str, Any]] | None:
"""
Query aggregated replay instance with a string prefix filter.
Date range is chunked into 14 day intervals, newest to oldest, to avoid timeouts.
This query can do large scans over the time range and project list.
"""
window_size = timedelta(days=14)
window_end = end
while window_end > start:
window_start = max(window_end - window_size, start)

snuba_response = execute_query(
query=make_full_aggregation_query_with_short_id(
fields=["replay_id"],
replay_id_prefix=replay_id_prefix,
project_ids=project_ids,
period_start=window_start,
period_end=window_end,
request_user_id=request_user_id,
limit=1,
),
tenant_id={"organization_id": organization.id} if organization else {},
referrer="replays.query.short_id_details_query",
)["data"]

if snuba_response:
return snuba_response

window_end = window_start

return None


def query_replay_viewed_by_ids(
project_id: int | list[int],
replay_id: str,
Expand Down
57 changes: 57 additions & 0 deletions src/sentry/replays/usecases/query/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
Entity,
Function,
Granularity,
Limit,
Op,
Or,
OrderBy,
Expand Down Expand Up @@ -440,6 +441,62 @@ def make_full_aggregation_query(
)


def make_full_aggregation_query_with_short_id(
fields: list[str],
replay_id_prefix: str,
project_ids: list[int],
period_start: datetime,
period_end: datetime,
request_user_id: int | None,
limit: int,
) -> Query:
"""Return a query to fetch a replay with a short ID - an 8-character replay ID prefix.
This query does not make use of the replay_id index and can potentially scan all rows in the time range and project list.

Arguments:
fields -- if non-empty, used to query a subset of fields. Corresponds to the keys in QUERY_ALIAS_COLUMN_MAP.
"""

if len(replay_id_prefix) != 8 or not replay_id_prefix.isalnum():
raise ValueError("Invalid short ID. Must be 8 hexadecimal characters.")

from sentry.replays.query import select_from_fields

select = select_from_fields(fields, user_id=request_user_id)

return Query(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we potentially do a pre-query to only look at the replay id? but no worries if you want to leave it as is

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah makes sense we can! in fact I could refactor this so the util is just a short ID lookup, don't want people calling the inefficient aggregate query

match=Entity("replays"),
select=select,
where=[
Condition(Column("project_id"), Op.IN, project_ids),
# Range queries on UUID column don't work as expected due to comparison on the binary representation, and Clickhouse endianness.
# It's slower as we're no longer using the replay_id index, but the only way to do this is through a string prefix filter.
Condition(
Function(
"startsWith",
parameters=[
Function("toString", parameters=[Column("replay_id")]),
replay_id_prefix,
],
),
Op.EQ,
1,
),
# We can scan an extended time range to account for replays which span either end of
# the range.
Condition(Column("timestamp"), Op.GTE, period_start - timedelta(hours=1)),
Condition(Column("timestamp"), Op.LT, period_end + timedelta(hours=1)),
],
# NOTE: Refer to this note: "make_scalar_search_conditions_query".
#
# This condition ensures that every replay shown to the user is valid.
having=[Condition(Function("min", parameters=[Column("segment_id")]), Op.EQ, 0)],
groupby=[Column("replay_id")],
granularity=Granularity(3600),
limit=Limit(limit),
)


def execute_query(query: Query, tenant_id: dict[str, int], referrer: str) -> Mapping[str, Any]:
try:
return raw_snql_query(
Expand Down
78 changes: 53 additions & 25 deletions src/sentry/seer/explorer/tools.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import logging
import uuid
from datetime import UTC, datetime, timedelta, timezone
from typing import Any, Literal
from typing import Any, Literal, cast

from django.urls import reverse

from sentry import eventstore
from sentry import eventstore, features
from sentry.api import client
from sentry.api.serializers.base import serialize
from sentry.api.serializers.models.event import EventSerializer, IssueEventSerializerResponse
Expand All @@ -16,6 +15,8 @@
from sentry.models.organization import Organization
from sentry.models.project import Project
from sentry.models.repository import Repository
from sentry.replays.post_process import process_raw_response
from sentry.replays.query import query_replay_instance, query_replay_instance_with_short_id
from sentry.search.eap.types import SearchResolverConfig
from sentry.search.events.types import SnubaParams
from sentry.seer.autofix.autofix import get_all_tags_overview
Expand Down Expand Up @@ -521,7 +522,7 @@ def get_replay_metadata(
Get the metadata for a replay through an aggregate replay event query.

Args:
replay_id: The ID of the replay.
replay_id: The ID of the replay. Either a valid UUID or a 8-character hex string prefix. If known, the full ID is recommended for performance.
organization_id: The ID of the organization the replay belongs to.
project_id: The projects to query. If not provided, all projects in the organization will be queried.

Expand All @@ -538,38 +539,65 @@ def get_replay_metadata(
)
return None

path = reverse(
"sentry-api-0-organization-replay-details",
args=(organization.slug, replay_id),
if not features.has("organizations:session-replay", organization):
return None

# Validate the replay ID.
if len(replay_id) >= 32:
try:
replay_id = str(uuid.UUID(replay_id)) # UUID with dashes is recommended for the query.
except ValueError:
return None

elif len(replay_id) != 8 or not replay_id.isalnum():
return None

p_ids_and_slugs = list(
Project.objects.filter(
organization_id=organization.id,
status=ObjectStatus.ACTIVE,
**({"id": project_id} if project_id else {}),
).values_list("id", "slug")
)
path = path.strip("/")[len("api/0") :] + "/"

params = {}
if project_id:
params["project"] = project_id
start, end = default_start_end_dates()

resp = client.get(
auth=ApiKey(organization_id=organization.id, scope_list=["org:read", "project:read"]),
user=None,
path=path,
params=params,
if len(replay_id) >= 32:
snuba_response = query_replay_instance(
project_id=[id for id, _ in p_ids_and_slugs],
replay_id=replay_id,
start=start,
end=end,
organization=organization,
request_user_id=None,
)
else:
snuba_response = query_replay_instance_with_short_id(
project_ids=[id for id, _ in p_ids_and_slugs],
replay_id_prefix=replay_id,
start=start,
end=end,
organization=organization,
request_user_id=None,
)

response = process_raw_response(
snuba_response,
fields=[],
)

if resp.status_code != 200 or not (resp.data or {}).get("data"):
if not response:
logger.warning(
"Failed to get replay metadata",
"Replay instance not found - no data returned from query",
extra={
"replay_id": replay_id,
"organization_id": organization_id,
"project_id": project_id,
"status_code": resp.status_code,
},
)
return None

# Add project_slug field.
result = resp.data["data"]
project = Project.objects.get(id=result["project_id"])
result["project_slug"] = project.slug

result = cast(dict[str, Any], response[0])
_, project_slug = next(filter(lambda x: x[0] == int(result["project_id"]), p_ids_and_slugs))
result["project_slug"] = project_slug
return result
Loading
Loading