Skip to content

Commit a542cd5

Browse files
committed
Enhance stream telemetry
Signed-off-by: Ze Gan <ganze718@gmail.com>
1 parent 1f3abf9 commit a542cd5

5 files changed

Lines changed: 196 additions & 5 deletions

File tree

doc/TAM/SAI-Proposal-TAM-stream-telemetry.md

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,12 @@ The existing telemetry solution relies on a process to proactively query stats a
7777
- The vendor SDK should support querying the minimal polling interval for each counter.
7878
- When reconfiguring any stream settings, whether it is the polling interval or the stats list, the existing stream will be interrupted and regenerated.
7979
- If any of monitored objects is deleted, the existing stream will be interrupted and regenerated.
80+
- The collector is designed to handle single-cycle counter rollovers; however, vendors must ensure that the data does not roll over twice between two collection intervals.
8081

8182
### Phase 2
8283

8384
- Supports updating configuration without interrupting the telemetry stream
85+
- Support stats of tam telemetry for debugging purpose
8486

8587
## Architecture Design
8688

@@ -971,6 +973,13 @@ typedef struct _sai_stat_st_capability_t
971973
*/
972974
uint64_t minimal_polling_interval;
973975
976+
/**
977+
* @brief Maximal polling interval in nanoseconds
978+
*
979+
* If polling interval is more than this value, it will be unacceptable.
980+
*/
981+
uint64_t maximal_polling_interval;
982+
974983
} sai_stat_st_capability_t;
975984
976985
typedef struct _sai_stat_st_capability_list_t
@@ -1007,3 +1016,60 @@ sai_s32_list_t tel_type_mode[2] = {-1, -1};
10071016
sai_query_attribute_enum_values_capability(switch_id, SAI_OBJECT_TYPE_TAM_TEL_TYPE, SAI_TAM_TEL_TYPE_ATTR_MODE, tel_type_mode)
10081017

10091018
```
1019+
1020+
#### Stats for TAM telemetry
1021+
1022+
```c++
1023+
1024+
/**
1025+
* @brief TAM telemetry counter IDs in sai_get_tam_telemetry_stats_ext() call
1026+
*/
1027+
typedef enum _sai_tam_telemetry_stat_t
1028+
{
1029+
/** Tam telemetry stat range start */
1030+
SAI_TAM_TELEMETRY_STAT_START,
1031+
1032+
/**
1033+
* @brief Total number of telemetry records successfully ingested
1034+
*
1035+
* Indicates the cumulative count of telemetry messages received and accepted
1036+
* into the telemetry system.
1037+
* Unit: Count [uint64_t]
1038+
*/
1039+
SAI_TAM_TELEMETRY_STAT_INGESTED_RECORDS = SAI_TAM_TELEMETRY_STAT_START,
1040+
1041+
/**
1042+
* @brief Number of telemetry records pending read or processing
1043+
*
1044+
* Represents current backlog or pending messages awaiting processing.
1045+
* This is a gauge-type value rather than a monotonically increasing counter.
1046+
* Unit: Count [uint64_t]
1047+
*/
1048+
SAI_TAM_TELEMETRY_STAT_PENDING_READ_RECORDS,
1049+
1050+
/**
1051+
* @brief Total number of telemetry records successfully consumed
1052+
*
1053+
* Indicates the cumulative count of telemetry records that have been processed
1054+
* by the consumer.
1055+
* Unit: Count [uint64_t]
1056+
*/
1057+
SAI_TAM_TELEMETRY_STAT_CONSUMED_RECORDS,
1058+
1059+
/**
1060+
* @brief Total number of telemetry records dropped
1061+
*
1062+
* Represents the cumulative number of telemetry messages discarded due to
1063+
* buffer overflow, timeout, or internal error.
1064+
* Unit: Count [uint64_t]
1065+
*/
1066+
SAI_TAM_TELEMETRY_STAT_DROPPED_RECORDS,
1067+
1068+
/** Tam telemetry stat range end */
1069+
SAI_TAM_TELEMETRY_STAT_END,
1070+
1071+
SAI_TAM_TELEMETRY_STAT_CUSTOM_RANGE_BASE = 0x10000000,
1072+
1073+
} sai_tam_telemetry_stat_t;
1074+
1075+
```

inc/saitam.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1716,6 +1716,57 @@ typedef enum _sai_tam_telemetry_attr_t
17161716

17171717
} sai_tam_telemetry_attr_t;
17181718

1719+
/**
1720+
* @brief TAM telemetry counter IDs in sai_get_tam_telemetry_stats_ext() call
1721+
*/
1722+
typedef enum _sai_tam_telemetry_stat_t
1723+
{
1724+
/** Tam telemetry stat range start */
1725+
SAI_TAM_TELEMETRY_STAT_START,
1726+
1727+
/**
1728+
* @brief Total number of telemetry records successfully ingested
1729+
*
1730+
* Indicates the cumulative count of telemetry messages received and accepted
1731+
* into the telemetry system.
1732+
* Unit: Count [uint64_t]
1733+
*/
1734+
SAI_TAM_TELEMETRY_STAT_INGESTED_RECORDS = SAI_TAM_TELEMETRY_STAT_START,
1735+
1736+
/**
1737+
* @brief Number of telemetry records pending read or processing
1738+
*
1739+
* Represents current backlog or pending messages awaiting processing.
1740+
* This is a gauge-type value rather than a monotonically increasing counter.
1741+
* Unit: Count [uint64_t]
1742+
*/
1743+
SAI_TAM_TELEMETRY_STAT_PENDING_READ_RECORDS,
1744+
1745+
/**
1746+
* @brief Total number of telemetry records successfully consumed
1747+
*
1748+
* Indicates the cumulative count of telemetry records that have been processed
1749+
* by the consumer.
1750+
* Unit: Count [uint64_t]
1751+
*/
1752+
SAI_TAM_TELEMETRY_STAT_CONSUMED_RECORDS,
1753+
1754+
/**
1755+
* @brief Total number of telemetry records dropped
1756+
*
1757+
* Represents the cumulative number of telemetry messages discarded due to
1758+
* buffer overflow, timeout, or internal error.
1759+
* Unit: Count [uint64_t]
1760+
*/
1761+
SAI_TAM_TELEMETRY_STAT_DROPPED_RECORDS,
1762+
1763+
/** Tam telemetry stat range end */
1764+
SAI_TAM_TELEMETRY_STAT_END,
1765+
1766+
SAI_TAM_TELEMETRY_STAT_CUSTOM_RANGE_BASE = 0x10000000,
1767+
1768+
} sai_tam_telemetry_stat_t;
1769+
17191770
/**
17201771
* @brief Create and return a telemetry object
17211772
*
@@ -2593,6 +2644,54 @@ sai_status_t sai_tam_telemetry_get_data(
25932644
_Inout_ sai_size_t *buffer_size,
25942645
_Out_ void *buffer);
25952646

2647+
/**
2648+
* @brief Get TAM telemetry statistics counters. Deprecated for backward compatibility.
2649+
*
2650+
* @param[in] tam_telemetry_id TAM telemetry id
2651+
* @param[in] number_of_counters Number of counters in the array
2652+
* @param[in] counter_ids Specifies the array of counter ids
2653+
* @param[out] counters Array of resulting counter values.
2654+
*
2655+
* @return #SAI_STATUS_SUCCESS on success, failure status code on error
2656+
*/
2657+
typedef sai_status_t (*sai_get_tam_telemetry_stats_fn)(
2658+
_In_ sai_object_id_t tam_telemetry_id,
2659+
_In_ uint32_t number_of_counters,
2660+
_In_ const sai_stat_id_t *counter_ids,
2661+
_Out_ uint64_t *counters);
2662+
2663+
/**
2664+
* @brief Get TAM telemetry statistics counters extended.
2665+
*
2666+
* @param[in] tam_telemetry_id TAM telemetry id
2667+
* @param[in] number_of_counters Number of counters in the array
2668+
* @param[in] counter_ids Specifies the array of counter ids
2669+
* @param[in] mode Statistics mode
2670+
* @param[out] counters Array of resulting counter values.
2671+
*
2672+
* @return #SAI_STATUS_SUCCESS on success, failure status code on error
2673+
*/
2674+
typedef sai_status_t (*sai_get_tam_telemetry_stats_ext_fn)(
2675+
_In_ sai_object_id_t tam_telemetry_id,
2676+
_In_ uint32_t number_of_counters,
2677+
_In_ const sai_stat_id_t *counter_ids,
2678+
_In_ sai_stats_mode_t mode,
2679+
_Out_ uint64_t *counters);
2680+
2681+
/**
2682+
* @brief Clear tam_telemetry statistics counters.
2683+
*
2684+
* @param[in] tam_telemetry_id TAM telemetry id
2685+
* @param[in] number_of_counters Number of counters in the array
2686+
* @param[in] counter_ids Specifies the array of counter ids
2687+
*
2688+
* @return #SAI_STATUS_SUCCESS on success, failure status code on error
2689+
*/
2690+
typedef sai_status_t (*sai_clear_tam_telemetry_stats_fn)(
2691+
_In_ sai_object_id_t tam_telemetry_id,
2692+
_In_ uint32_t number_of_counters,
2693+
_In_ const sai_stat_id_t *counter_ids);
2694+
25962695
/**
25972696
* @brief SAI TAM API set
25982697
*/
@@ -2662,6 +2761,10 @@ typedef struct _sai_tam_api_t
26622761
sai_get_tam_counter_subscription_attribute_fn get_tam_counter_subscription_attribute;
26632762
sai_bulk_object_create_fn create_tam_counter_subscriptions;
26642763
sai_bulk_object_remove_fn remove_tam_counter_subscriptions;
2764+
2765+
sai_get_tam_telemetry_stats_fn get_tam_telemetry_stats;
2766+
sai_get_tam_telemetry_stats_ext_fn get_tam_telemetry_stats_ext;
2767+
sai_clear_tam_telemetry_stats_fn clear_tam_telemetry_stats;
26652768
} sai_tam_api_t;
26662769

26672770
/**

inc/saitypes.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1992,6 +1992,12 @@ typedef struct _sai_stat_st_capability_t
19921992
*/
19931993
uint64_t minimal_polling_interval;
19941994

1995+
/**
1996+
* @brief Maximal polling interval in nanoseconds
1997+
*
1998+
* If polling interval is more than this value, it will be unacceptable.
1999+
*/
2000+
uint64_t maximal_polling_interval;
19952001
} sai_stat_st_capability_t;
19962002

19972003
typedef struct _sai_stat_st_capability_list_t

meta/structs.pl

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -164,18 +164,27 @@ sub BuildCommitHistory
164164
# of union may not increase by adding members, and actual union size
165165
# check is performed by sai sanity check
166166

167+
# Structs that are allowed to append new members at the end (ABI extension).
168+
# api_t structs are always allowed. Add other structs here as needed.
169+
my @extensible_structs = (
170+
"sai_switch_health_data_t",
171+
"sai_port_oper_status_notification_t",
172+
"sai_stat_st_capability_t",
173+
);
174+
175+
my %extensible = map { $_ => 1 } @extensible_structs;
176+
167177
if ($currCount != $histCount and not $structTypeName =~ /^sai_\w+_api_t$/
168-
and $structTypeName ne "sai_switch_health_data_t"
169-
and $structTypeName ne "sai_port_oper_status_notification_t")
178+
and not $extensible{$structTypeName})
170179
{
171180
LogError "FATAL: struct $structTypeName member count differs, was $histCount but is $currCount on commit $commit" if $type eq "struct";
172181
}
173182

174183
if ($histCount > $currCount)
175184
{
176-
if ($structTypeName eq "sai_port_oper_status_notification_t")
185+
if ($extensible{$structTypeName})
177186
{
178-
# we allow this to change back backward compatibility
187+
# we allow extensible structs to change for backward compatibility
179188
}
180189
else
181190
{

meta/test.pm

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,14 @@ sub CreateStructUnionSizeCheckTest
646646
$STRUCTS{$name} = $name;
647647

648648
next if $name =~ /^sai_\w+_api_t$/; # skip api structs
649-
next if $name eq "sai_switch_health_data_t";
649+
650+
# Skip extensible structs that are allowed to grow (see also structs.pl)
651+
my %extensible_structs = map { $_ => 1 } (
652+
"sai_switch_health_data_t",
653+
"sai_port_oper_status_notification_t",
654+
"sai_stat_st_capability_t",
655+
);
656+
next if $extensible_structs{$name};
650657

651658
my $upname = uc($name);
652659

0 commit comments

Comments
 (0)