Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/clusterfuzz/_internal/metrics/monitoring_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@
'Count of failure in booting up cuttlefish with tip-of-the-tree build ',
field_spec=[
monitor.StringField('build_id'),
monitor.StringField('instance_id'),
monitor.BooleanField('is_candidate'),
monitor.BooleanField('is_succeeded'),
])

Expand Down
14 changes: 13 additions & 1 deletion src/clusterfuzz/_internal/platforms/android/flash.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@

from clusterfuzz._internal.base import dates
from clusterfuzz._internal.base import persistent_cache
from clusterfuzz._internal.base import utils
from clusterfuzz._internal.datastore import locks
from clusterfuzz._internal.google_cloud_utils import compute_metadata
from clusterfuzz._internal.metrics import logs
from clusterfuzz._internal.metrics import monitoring_metrics
from clusterfuzz._internal.system import archive
Expand Down Expand Up @@ -171,10 +173,17 @@ def flash_to_latest_build_if_needed():
'branch %s and target %s.' % (branch, target))
return

instance_id = None
is_candidate = None
if environment.is_android_cuttlefish():
download_latest_build(build_info, FLASH_CUTTLEFISH_REGEXES, image_directory)
adb.recreate_cuttlefish_device()
adb.connect_to_cuttlefish_device()
if compute_metadata.is_gce():
# Get the GCE-assigned VM instance ID for accurate instance tracking.
instance_id = compute_metadata.get('instance/id')
# Determine if the current instance is a candidate.
is_candidate = utils.get_clusterfuzz_release() == 'candidate'
else:
download_latest_build(build_info, FLASH_IMAGE_REGEXES, image_directory)
# We do one device flash at a time on one host, otherwise we run into
Expand Down Expand Up @@ -220,6 +229,8 @@ def flash_to_latest_build_if_needed():
logs.info('Trying to boot cuttlefish instance using stable build.')
monitoring_metrics.CF_TIP_BOOT_FAILED_COUNT.increment({
'build_id': build_info['bid'],
'instance_id': instance_id,
'is_candidate': is_candidate,
'is_succeeded': False
})
boot_stable_build_cuttlefish(branch, target, image_directory)
Expand All @@ -229,9 +240,10 @@ def flash_to_latest_build_if_needed():
else:
logs.error('Unable to find device. Reimaging failed.')
adb.bad_state_reached()

monitoring_metrics.CF_TIP_BOOT_FAILED_COUNT.increment({
'build_id': build_info['bid'],
'instance_id': instance_id,
'is_candidate': is_candidate,
'is_succeeded': True
})
logs.info('Reimaging finished.')
Expand Down
58 changes: 53 additions & 5 deletions src/clusterfuzz/_internal/tests/core/metrics/monitor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ def setUp(self):
'clusterfuzz._internal.base.persistent_cache.get_value',
'clusterfuzz._internal.base.persistent_cache.set_value',
'clusterfuzz._internal.base.persistent_cache.delete_value',
'clusterfuzz._internal.google_cloud_utils.compute_metadata.get',
'clusterfuzz._internal.platforms.android.settings.is_google_device',
'clusterfuzz._internal.platforms.android.fetch_artifact.get_latest_artifact_info',
'clusterfuzz._internal.system.environment.is_android_cuttlefish',
Expand Down Expand Up @@ -116,37 +117,84 @@ def _setup_monitoring_daemon(self, mock_client):
monitor._monitoring_daemon.start()
return call_queue

def _assert_cuttlefish_boot_metric(self, time_series, is_succeeded):
def _assert_cuttlefish_boot_metric(self, time_series, instance_id,
is_candidate, is_succeeded):
"""Asserts Cuttlefish boot failure metric presence and correctness in time series."""
for ts in time_series:
if ts.metric.type == "custom.googleapis.com/tip_boot_failure":
if instance_id is not None and ts.metric.labels['instance_id'] != str(
instance_id):
continue
if is_candidate is not None and ts.metric.labels['is_candidate'] != str(
is_candidate):
continue
if is_succeeded is not None and ts.metric.labels['is_succeeded'] != str(
is_succeeded):
continue
self.assertEqual(ts.metric.labels['instance_id'], str(instance_id))
self.assertEqual(ts.metric.labels['is_candidate'], str(is_candidate))
self.assertEqual(ts.metric.labels['is_succeeded'], str(is_succeeded))
self.assertEqual(ts.metric.labels['build_id'], "test-bid")

def _fake_get(self, path):
if path == "instance/zone":
return "projects/1234567890/zones/us-central1-b"
if path == "instance/id":
return "1234567890"
return ""

@patch(
'clusterfuzz._internal.metrics.monitor.monitoring_v3.MetricServiceClient')
def test_cuttlefish_boot_success_metric_for_candidate_fleet(
self, mock_client):
"""Tests the metric emission for a successful Cuttlefish boot."""
self.mock.get.side_effect = self._fake_get
call_queue = self._setup_monitoring_daemon(mock_client)
self.mock.get_device_state.return_value = 'device'
flash.flash_to_latest_build_if_needed()
args = call_queue.get(timeout=20)
time_series = args['time_series']
self._assert_cuttlefish_boot_metric(time_series, '1234567890', True, True)
monitor._monitoring_daemon.stop()

@patch(
'clusterfuzz._internal.metrics.monitor.monitoring_v3.MetricServiceClient')
def test_cuttlefish_boot_failure_metric_for_candidate_fleet(
self, mock_client):
"""Tests the metric emission for a failed Cuttlefish boot."""
self.mock.get.side_effect = self._fake_get
call_queue = self._setup_monitoring_daemon(mock_client)
flash.flash_to_latest_build_if_needed()
args = call_queue.get(timeout=20)
time_series = args['time_series']
self._assert_cuttlefish_boot_metric(time_series, '1234567890', True, False)
monitor._monitoring_daemon.stop()

@patch(
'clusterfuzz._internal.metrics.monitor.monitoring_v3.MetricServiceClient')
def test_cuttlefish_boot_success_metric(self, mock_client):
def test_cuttlefish_boot_success_metric_for_production_fleet(
self, mock_client):
"""Tests the metric emission for a successful Cuttlefish boot."""
self.mock.get.side_effect = self._fake_get
call_queue = self._setup_monitoring_daemon(mock_client)
self.mock.get_device_state.return_value = 'device'
flash.flash_to_latest_build_if_needed()
args = call_queue.get(timeout=20)
time_series = args['time_series']
self._assert_cuttlefish_boot_metric(time_series, True)
self._assert_cuttlefish_boot_metric(time_series, '1234567890', False, True)
monitor._monitoring_daemon.stop()

@patch(
'clusterfuzz._internal.metrics.monitor.monitoring_v3.MetricServiceClient')
def test_cuttlefish_boot_failure_metric(self, mock_client):
def test_cuttlefish_boot_failure_metric_for_production_fleet(
self, mock_client):
"""Tests the metric emission for a failed Cuttlefish boot."""
self.mock.get.side_effect = self._fake_get
call_queue = self._setup_monitoring_daemon(mock_client)
flash.flash_to_latest_build_if_needed()
args = call_queue.get(timeout=20)
time_series = args['time_series']
self._assert_cuttlefish_boot_metric(time_series, False)
self._assert_cuttlefish_boot_metric(time_series, '1234567890', False, False)
monitor._monitoring_daemon.stop()

def test_counter_metric_success(self):
Expand Down
Loading