From 490b17aaa0ca454808355215274aebd60eb7ac66 Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Fri, 13 Feb 2026 13:51:49 -0800 Subject: [PATCH 1/6] Enable customer sdk stats. --- AISKU/src/AISku.ts | 30 +- .../src/Interfaces.ts | 5 + .../src/Sender.ts | 58 +- customer_facing_sdk_stats_spec.md | 434 +++++++++ docs/customer_sdk_stats_implementation.md | 870 ++++++++++++++++++ .../src/constants/InternalConstants.ts | 1 + .../src/core/NotificationManager.ts | 22 +- .../src/core/SdkStatsNotificationCbk.ts | 249 +++++ shared/AppInsightsCore/src/index.ts | 1 + .../interfaces/ai/INotificationListener.ts | 8 + .../src/interfaces/ai/INotificationManager.ts | 8 + 11 files changed, 1681 insertions(+), 5 deletions(-) create mode 100644 customer_facing_sdk_stats_spec.md create mode 100644 docs/customer_sdk_stats_implementation.md create mode 100644 shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts diff --git a/AISKU/src/AISku.ts b/AISKU/src/AISku.ts index 51a55e209..304904504 100644 --- a/AISKU/src/AISku.ts +++ b/AISKU/src/AISku.ts @@ -11,11 +11,11 @@ import { IAutoExceptionTelemetry, IChannelControls, IConfig, IConfigDefaults, IConfiguration, ICookieMgr, ICustomProperties, IDependencyTelemetry, IDiagnosticLogger, IDistributedTraceContext, IDynamicConfigHandler, IEventTelemetry, IExceptionTelemetry, ILoadedPlugin, IMetricTelemetry, INotificationManager, IOTelApi, IOTelSpanOptions, IPageViewPerformanceTelemetry, IPageViewTelemetry, IPlugin, - IReadableSpan, IRequestHeaders, ISpanScope, ITelemetryContext as Common_ITelemetryContext, ITelemetryInitializerHandler, ITelemetryItem, + IReadableSpan, IRequestHeaders, ISdkStatsNotifCbk, ISpanScope, ITelemetryContext as Common_ITelemetryContext, ITelemetryInitializerHandler, ITelemetryItem, ITelemetryPlugin, ITelemetryUnloadState, IThrottleInterval, IThrottleLimit, IThrottleMgrConfig, ITraceApi, ITraceProvider, ITraceTelemetry, IUnloadHook, OTelTimeInput, PropertiesPluginIdentifier, ThrottleMgr, UnloadHandler, WatcherFunction, _eInternalMessageId, _throwInternal, addPageHideEventListener, addPageUnloadEventListener, cfgDfMerge, cfgDfValidate, - createDynamicConfig, createOTelApi, createProcessTelemetryContext, createTraceProvider, createUniqueNamespace, doPerf, eLoggingSeverity, + createDynamicConfig, createOTelApi, createProcessTelemetryContext, createSdkStatsNotifCbk, createTraceProvider, createUniqueNamespace, doPerf, eLoggingSeverity, hasDocument, hasWindow, isArray, isFeatureEnabled, isFunction, isNullOrUndefined, isReactNative, isString, mergeEvtNamespace, onConfigChange, parseConnectionString, proxyAssign, proxyFunctions, removePageHideEventListener, removePageUnloadEventListener, useSpan } from "@microsoft/applicationinsights-core-js"; @@ -64,6 +64,9 @@ const IKEY_USAGE = "iKeyUsage"; const CDN_USAGE = "CdnUsage"; const SDK_LOADER_VER = "SdkLoaderVer"; const ZIP_PAYLOAD = "zipPayload"; +const SDK_STATS = "SdkStats"; +const SDK_STATS_VERSION = "#version#"; +const SDK_STATS_FLUSH_INTERVAL = 900000; // 15 minutes in ms const default_limit = { samplingRate: 100, @@ -93,7 +96,8 @@ const defaultConfigValues: IConfigDefaults = { [IKEY_USAGE]: {mode: FeatureOptInMode.enable}, //for versions after 3.1.2 (>= 3.2.0) [CDN_USAGE]: {mode: FeatureOptInMode.disable}, [SDK_LOADER_VER]: {mode: FeatureOptInMode.disable}, - [ZIP_PAYLOAD]: {mode: FeatureOptInMode.none} + [ZIP_PAYLOAD]: {mode: FeatureOptInMode.none}, + [SDK_STATS]: {mode: FeatureOptInMode.enable} }, throttleMgrCfg: cfgDfMerge<{[key:number]: IThrottleMgrConfig}>( { @@ -196,6 +200,7 @@ export class AppInsightsSku implements IApplicationInsights; + let _sdkStatsListener: ISdkStatsNotifCbk; dynamicProto(AppInsightsSku, this, (_self) => { _initDefaults(); @@ -390,6 +395,17 @@ export class AppInsightsSku implements IApplicationInsights { + if (p) { + items.push({ name: "", baseType: p.bT || "EventData" } as ITelemetryItem); + } + }); + return items.length ? items : null; + } + return null; + } + + /** + * Notify listeners of retry events. + */ + function _notifyRetry(payload: IInternalStorageItem[], statusCode: number) { + let mgr = _getNotifyMgr(); + if (mgr && mgr.eventsRetry) { + let items = _extractTelemetryItems(payload); + if (items) { + mgr.eventsRetry(items, statusCode); + } + } + } + /** diff --git a/customer_facing_sdk_stats_spec.md b/customer_facing_sdk_stats_spec.md new file mode 100644 index 000000000..e5585311b --- /dev/null +++ b/customer_facing_sdk_stats_spec.md @@ -0,0 +1,434 @@ +# Customer-Facing SDKStats + +## Owner + +* [Leighton Chen](mailto:lechen@microsoft.com) + +## Approvers + +* [Hector Hernandez Guzman](mailto:hectorh@microsoft.com) +* [Jackson Weber](mailto:jacksonweber@microsoft.com) +* [Jeremy Voss](mailto:jeremyvoss@microsoft.com) +* [Rajkumar Rangaraj](mailto:rajkumar.rangaraj@microsoft.com) +* [Ram Thiru](mailto:Ram.Thiru@microsoft.com) + +## Status + +Stable + +
+Table of Contents + + + +- [Customer-Facing SDKStats](#customer-facing-sdkstats) + - [Owner](#owner) + - [Approvers](#approvers) + - [Status](#status) + - [Overview](#overview) + - [Specifications](#specifications) + - [Key metrics](#key-metrics) + - [Top-level fields](#top-level-fields) + - [iKey](#ikey) + - [SDKVersion](#sdkversion) + - [Item success count](#item-success-count) + - [Item dropped count](#item-dropped-count) + - [Item retry count](#item-retry-count) + - [Getting started](#getting-started) + - [Environment Variable configurations](#environment-variable-configurations) + - [Future considerations](#future-considerations) + - [Include `cloud.*` fields as part of `customDimensions`](#include-cloud-fields-as-part-of-customdimensions) + - [TBD](#tbd) + - [Reference](#reference) + +- + +
+ +## Overview + +SDKStats has proven to be valuable by providing insights into RP integration growth, tracking feature/instrumentation adoption, and monitoring success/failure counts for Application Insights SDKs across languages. +Recognizing the importance of sharing these metrics with customers, our leadership team aims to provide customers with access to specific network SDKStats metrics in their Application Insights resources, +enhancing their self-service experience. + +To enable this functionality, we have decided to emit SDKStats as custom metrics to customers' resources. + +Customers have the option to access these metrics either through the metric explorer on the portal or creating alerts based on Kusto query. + +Customer-facing SDKStats metrics are uniquely identified by metrics' names as shown below. +Ingestion service can determine the type of SDKStats being sent based on metric names. These metrics are ingested into the customer's Application Insights resources. + +## Specifications + +### Key metrics + +Metrics names should follow the OpenTelemetry Specification, more info in the [OpenTelemetry metrics API specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/api.md#instrument). + + +| Metrics | Description | Frequency | Required | +| ----------------------------------------- | ----------------------------------------------------------------- | --------- | -------- | +| [Item Success Count](#item-success-count) | Count of successful telemetry items sent to Application Insights. | *Short | Yes | +| [Item Dropped Count](#item-dropped-count) | Count of dropped telemetry items sent to Application Insights. | *Short | Yes | +| [Item Retry Count](#item-retry-count) | Count of retried telemetry items. | *Short | Yes | + + +*Short interval is once every 15 minutes. + +**Note:** These metrics were chosen as the minimal set to send to customers. This is to reduce confusion while maintaining the necessary information to maximize the likelihood of customers' being able to troubleshoot telemetry +problems themselves (and reduce icm cases opened). + +**Note:** `Item Success Count` and `Item Dropped Count` should theoretically add up to the total amount of telemetry items that are actually sent to the backend. + +### Top-level fields + +Almost all top-level fields of customer SDKStats metrics are automatically populated by ingestion and are the same as that of [Application Insights custom metric](https://msazure.visualstudio.com/One/_git/CommonSchema?path=/v4.0/Mappings/AzureMonitor-AI.md&_a=preview&anchor=microsoft.applicationinsights.metric). +The only exception is that of `sdkVersion`, `iKey`, `cloudRoleInstance` and `cloudRole` which need to be populated by the SDK itself. + +**Note** that since we are trying to keep the payload as minimal as possible, we do not include any `operation.*` fields, as that information is not as relevant in terms of SDKStats analysis. +We can include `cloud.*` fields since there will be no PII violations when sending metrics to customers' own resource. These are also fields that enable customers to identify issues with specific apps or VMs, helping with targeted problem-solving. + +#### iKey + +This represents the instrumentation key of the customers' Application Insights resource this SDKStats telemetry is being sent to. + +#### SDKVersion + +[sdkVersion](../sdk_version_name.md) is crucial to include as part of SDKStats, as it allows us to identify +RP, Attach type, operating system, language, language version, SDK and SDK version from an encoded string. It +is currently used to filter out sdk name and sdk version for BI reports. `sdkVersion` has an inconsistent format and specifications have only been properly created for OpenTelemetry-based SDKs. + +Therefore, it is crucial to have other indicators for language and version (such as the `customDimensions` defined for each Metric Type below). + +### Item success count + +This metric represents the cumulative item success count during the collection interval. A high and persistent item success count will help increase customers' confidence in using our products and services. + +We send telemetry items in batches. Each batch can contain an array of metrics, logs, and traces. When Breeze returns a 200 status code, the SDK counts the number of telemetry items in the batch and accumulates it using the `Item Success Count` metric. + + +| Telemetry Name | Metric Name | Unit | customDimensions | +| -------------------- | -------------------- | ----- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Item Success Count` | `Item_Success_Count` | Count | **compute.type**: The type of compute (aks, appsvc, functions, springcloud, vm, unknown) that the customer's application is running in
**language**: application insights SDK/Agent name
**version**: version of the application insights SDK/Agent
**telemetry_type**: Type of telemetry that this metric was counting. | + + +\* **telemetry_type** + +The `telemetry_type` field provides clarity on what kinds of data were dropped or ingested, aiding troubleshooting and system insights. The possible values correspond with table names in Application Insights. Below is a list of known values for `telemetry_type`: + +`AVAILABILITY` +`CUSTOM_EVENT` +`CUSTOM_METRIC` +`DEPENDENCY` +`EXCEPTION` +`PAGE_VIEW` +`PERFORMANCE_COUNTER` +`REQUEST` +`TRACE` + +This example shows 3000 item success count from a customer who uses the Java Distro 3.5.1 running Java 17. + +```json +{ + "ver": 1, + "name": "Item Success Count", + "time": "2024-05-14T22:51:46.406Z", + "iKey": "", + "tags": { + "ai.internal.sdkVersion": "java:3.5.1", + "ai.cloud.roleInstance": "", + "ai.cloud.role": "" + }, + "data": { + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [ + { + "name": "Item_Success_Count", + "value": 3000.0 + } + ], + "properties": { + "language": "java", + "version": "3.5.1", + "computeType": "unknown", + "telemetry_type": "DEPENDENCY" + } + } + } +} +``` + +### Item dropped count + +This metric provides insights into the reasons for data loss, enabling customers, SDK teams, and the Ingestion service team to investigate the returned status codes and identify opportunities for reducing data loss. + +We send telemetry items in batches. Each batch can contain an array of metrics, logs, and traces. When Breeze returns a [non-retryable](./sdkstats.md#retry-counts) status code or an exception is thrown while sending the telemetry, the SDK counts the number of telemetry items in the batch and accumulates it using the `Item Dropped Count` metric. + + +| Telemetry Name | Metric Name | Unit | customDimensions | +| -------------------- | -------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Item Dropped Count` | `Item_Dropped_Count` | Count | **compute.type**: The type of compute (aks, appsvc, functions, springcloud, vm, unknown) that the customer's application is running in
**language**: application insights SDK/Agent name
**version**: version of the application insights SDK/Agent
**drop.code**
**drop.reason**
**telemetry_type**: Type of telemetry that this metric was counting.
**telemetry_success**: Boolean value indicating whether the tracked customer DEPENDENCY or REQUEST telemetry succeeded (true) or failed (false). Only applicable for DEPENDENCY and REQUEST telemetry types. | + + +\* **drop.code** table below lists the drop codes for different situations that result in dropped items. + + +| drop.code | Description | +| --------- | ----------- | +| CLIENT_EXCEPTION | items dropped due to exceptions thrown or when a response is not returned from Breeze | +| CLIENT_READONLY | items dropped due to READONLY filesystem | +| CLIENT_PERSISTENCE_CAPACITY | items dropped due to disk persistence capacity exceeds | +| CLIENT_STORAGE_DISABLED | items that would have been retried but are dropped since client has local storage disabled | +| `*NON_RETRYABLE_STATUS_CODE` | items dropped when breeze returns a [non-retryable](./sdkstats.md#retry-counts) status code | + + +*NON_RETRYABLE_STATUS_CODE will be the actual value of the non-retryable status code that was returned (i.e. 401, 403, etc.). + +\* **drop.reason** can be populated if `CLIENT_EXCEPTION` or a status code is the `drop.code`. Describes a informative, low-cardinality description of the exception or reason why the status code was returned. For `CLIENT_EXCEPTION`, the drop reason uses well known exception categories rather than raw exception messages. Categories include specific types (storage, timeout, etc.) with a generic fallback for unknown exceptions. The table below lists current well known exception categories: + +| drop.reason | +| ----------------- | +| Timeout exception | +| Network exception | +| Storage exception | +| Client exception | + +\* **telemetry_type** + +The `telemetry_type` field provides clarity on what kinds of data were dropped or ingested, aiding troubleshooting and system insights. The possible values correspond with table names in Application Insights. Below is a list of known values for `telemetry_type`: + +`AVAILABILITY` +`CUSTOM_EVENT` +`CUSTOM_METRIC` +`DEPENDENCY` +`EXCEPTION` +`PAGE_VIEW` +`PERFORMANCE_COUNTER` +`REQUEST` +`TRACE` + +The below example shows 6 item drop count when a customer reaches their daily quota (status code 402). + +```json + +{ + "ver": 1, + "name": "Item Dropped Count", + "time": "", + "iKey": "", + "tags": { + "ai.internal.sdkVersion": "java:3.5.1", + "ai.cloud.roleInstance": "", + "ai.cloud.role": "" + }, + "data": { + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [ + { + "name": "Item_Dropped_Count", + "value": 6.0 + } + ], + "properties": { + "language": "java", + "version": "3.5.1", + "computeType": "", + "drop.code": "402", + "drop.reason": "Exceeded daily quota", + "telemetry_type": "DEPENDENCY", + "telemetry_success": false + } + } + } +} +``` + +Here is another example that shows a 12 item drop count when a customer encounters a storage exception. + +```json + +{ + "ver": 1, + "name": "Item Dropped Count", + "time": "", + "iKey": "", + "tags": { + "ai.internal.sdkVersion": "python:3.11.9", + "ai.cloud.roleInstance": "", + "ai.cloud.role": "" + }, + "data": { + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [ + { + "name": "Item_Dropped_Count", + "value": 12.0 + } + ], + "properties": { + "language": "python", + "version": "3.11.9", + "computeType": "", + "drop.code": "CLIENT_EXCEPTION", + "drop.reason": "Storage exception", + "telemetry_type": "DEPENDENCY", + } + } + } +} +``` + +### Item retry count + +This metric represents the cumulative item retry count during the collection interval. +Customers can benefit from this metric by gaining a better understanding of why the retry item count is high. For example, if they experience a high retry count of unauthorized (401) or forbidden (403) status codes, they might need to double-check their access permissions. Similarly, if they encounter a high retry count of too many requests (429), it could be an opportunity for them to evaluate their system for an upgrade to handle more requests. + + +| Telemetry Name | Metric Name | Unit | customDimensions | +| ------------------ | ------------------ | ----- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Item Retry Count` | `Item_Retry_Count` | Count | **compute.type** (aks, appsvc, functions, springcloud, vm, unknown), language, version
**retry.code**
**retry.reason**
**telemetry_type**: Type of telemetry that this metric was counting. | + + +**retry.code table below lists the retry codes for different situations that result in dropped items. + + +| retry.code | Description | +| ---------- | ----------- | +| CLIENT_EXCEPTION | items to be retried when there is a runtime exception, like network failure, DNS name lookup failure excluding timeout exceptions that result in retryable scenarios. | +| CLIENT_TIMEOUT | items to be retried when there is a timeout exception | +| `*RETRYABLE_STATUS_CODE` | items to be retried when when breeze returns a [retryable](./sdkstats.md#retry-counts) status code | + + +*RETRYABLE_STATUS_CODE will be the actual value of the retryable status code that was returned. + +\* **retry.reason** can be populated if `CLIENT_EXCEPTION` or a status code is the `retry.code`. Describes a informative, low-cardinality description of the exception. For `CLIENT_EXCEPTION`, the exception categorization outlined in the [Item Dropped Count](#item-dropped-count) section is applied here as well to ensure uniformity across metrics. + +\* **telemetry_type** + +The `telemetry_type` field provides clarity on what kinds of data were dropped or ingested, aiding troubleshooting and system insights. The possible values correspond with table names in Application Insights. Below is a list of known values for `telemetry_type`: + +`AVAILABILITY` +`CUSTOM_EVENT` +`CUSTOM_METRIC` +`DEPENDENCY` +`EXCEPTION` +`PAGE_VIEW` +`PERFORMANCE_COUNTER` +`REQUEST` +`TRACE` + +Here is an example that shows 20 retry counts when Breeze returns a status code 429 (too many requests) for a customer using the Java distro 3.5.1 on AKS running Java 21. + +```json +{ + "ver": 1, + "name": "Item Retry Count", + "time": "2024-05-14T22:51:46.406Z", + "iKey": "", + "tags": { + "ai.internal.sdkVersion": "java:3.5.1", + "ai.cloud.roleInstance": "", + "ai.cloud.role": "" + }, + "data": { + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [ + { + "name": "Item_Retry_Count", + "value": 20.0 + } + ], + "properties": { + "language": "java", + "version": "3.5.1", + "computeType": "aks", + "retry.code": "429", + "retry.reason": "Too many requests" + } + } + } +} +``` + +Here is another example that shows 20 retry counts when a TimeoutException happens on the client side using the Java distro 3.5.1 on AKS running Java 21. + +```json +{ + "ver": 1, + "name": "Item Retry Count", + "time": "2024-05-14T22:51:46.406Z", + "iKey": "", + "tags": { + "ai.internal.sdkVersion": "java:3.5.1", + "ai.cloud.roleInstance": "", + "ai.cloud.role": "" + }, + "data": { + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [ + { + "name": "Item_Retry_Count", + "value": 20.0 + } + ], + "properties": { + "language": "java", + "version": "3.5.1", + "computeType": "aks", + "retry.code": "CLIENT_TIMEOUT", + "exception.message": "TimeoutException: timeout while sending telemetry" + } + } + } +} +``` + +## Getting started + +Customers can access the success count, item drop count and item retry count metrics through the Application Insights portal by navigating to their Application Insights resource, which contains a dedicated dashboard for easier access and analysis. + +![Where to find the customer sdk stats dashboard on Application Insights resource](../../Media/customer-sdk-stats-dashboard.png) + +## Environment Variable configurations + +`Enabled` + +Currently, all customer sdk stats [metrics](#key-metrics) are on-by-default. In order to stop sending these metrics, users can disable this feature by setting environment variable `APPLICATIONINSIGHTS_SDKSTATS_DISABLED` to `true`. + +`shortInterval` + +* SDKs MAY provide users an optional configuration for changing the short export interval, which currently defaults to 15 minutes/900 seconds. +* Configured through `APPLICATIONINSIGHTS_SDKSTATS_EXPORT_INTERVAL` in seconds + +## Migrations for existing implementations + +- For implementations that supported these metrics as opt-in and are deciding to move to on=by-default, backwards compatabilty does NOT have to be maintained for the old environment variable `APPLICATIONINSIGHTS_SDKSTATS_ENABLED_PREVIEW`. The new environment variable will also ALWAYS take priority over the opt-in environment variable. Newer versions of the SDK that support on-by-default can also ignore the `APPLICATIONINSIGHTS_SDKSTATS_ENABLED_PREVIEW` environment variable moving forward. + +- For SDK implementations that supported the old `preview.*` naming conventions, SDKs MUST start sending the new metric names if deciding to move this feature to on-by-default. The old naming conventions will be aggregated together with the new names in the dashboards. + +## Future considerations + +### Include `cloud.*` fields as part of `customDimensions` + +The fields `cloud_RoleName` and `cloud_RoleInstance` are already queryable in logs. In the future, we may consider adding these as custom dimensions, which will be mapped to metrics dimensions in MDM for enhanced querying and alerts. This is not as high of a priority since customers are already able to do this today with log querying. + +## TBD + +1. To enable or disable this feature, we will provide customers with configuration options such as: + +- Allow configuration of which SDKStats metrics to collect by default +- Kill switch configuration in Control Plane + +## Reference + +[Status codes from Breeze](../sdk_behavior_breeze.md) +[Internal SDKStats spec](./sdkstats.md) \ No newline at end of file diff --git a/docs/customer_sdk_stats_implementation.md b/docs/customer_sdk_stats_implementation.md new file mode 100644 index 000000000..d383506f8 --- /dev/null +++ b/docs/customer_sdk_stats_implementation.md @@ -0,0 +1,870 @@ +# Customer-Facing SDK Stats — Implementation Plan + +## Table of Contents + +- [Overview](#overview) +- [Goals & Constraints](#goals--constraints) +- [Architecture](#architecture) + - [High-Level Data Flow](#high-level-data-flow) + - [Key Design Decisions](#key-design-decisions) +- [Refactoring the Existing StatsBeat Code](#refactoring-the-existing-statsbeat-code) + - [What Changes](#what-changes) + - [What Stays](#what-stays) +- [Implementation Details](#implementation-details) + - [Phase 1: Sender Notification Gaps](#phase-1-sender-notification-gaps) + - [Phase 2: Refactor StatsBeat → Customer SDK Stats Listener](#phase-2-refactor-statsbeat--customer-sdk-stats-listener) + - [Phase 3: AISKU Integration](#phase-3-aisku-integration) + - [Phase 4: Configuration & Feature Gating](#phase-4-configuration--feature-gating) +- [File-by-File Changes](#file-by-file-changes) +- [Interfaces & Types](#interfaces--types) +- [Metric Payload Format](#metric-payload-format) +- [Bundle Size Strategy](#bundle-size-strategy) +- [Testing Plan](#testing-plan) +- [Rollout & Migration](#rollout--migration) + +--- + +## Overview + +This document describes the implementation plan for **Customer-Facing SDK Stats** in the Application Insights JavaScript SDK. The feature emits three custom metrics — `Item_Success_Count`, `Item_Dropped_Count`, and `Item_Retry_Count` — to the **customer's own Application Insights resource** every 15 minutes, enabling self-service troubleshooting of telemetry delivery issues. + +The implementation **refactors** the existing (commented-out) internal StatsBeat code to: + +1. **Use the Notification Manager** instead of custom sender logic — the Sender channel fires `eventsSent` / `eventsDiscarded` / retry notifications, and a listener accumulates counts. +2. **Send metrics via `core.track()`** to the customer's own instrumentation key (not a stats-specific iKey). +3. **Minimize bundle size impact** using the project's established patterns (`dynamicProto`, `const enum`, `__DynamicConstants`, no ES6+ operators, etc.). + +Reference spec: [Customer-Facing SDKStats Spec](../customer_facing_sdk_stats_spec.md) + +--- + +## Goals & Constraints + +| Goal | Detail | +|------|--------| +| **Three metrics** | `Item_Success_Count`, `Item_Dropped_Count`, `Item_Retry_Count` — spec-required names and dimensions | +| **Customer's iKey** | Metrics are sent to the customer's own AI resource, NOT a separate stats endpoint | +| **15-minute interval** | Counters accumulate and flush on a configurable short interval (default 900s) | +| **On by default** | Feature is enabled by default; kill switch via `featureOptIn` config | +| **Minimal size impact** | Target < 2KB minified gzip addition to the AISKU bundle | +| **ES5 compatible** | No `?.`, `??`, `...`, `async/await` | +| **No separate sender** | Reuse `core.track()` → existing Sender pipeline → customer's endpoint | +| **Notification-driven** | Counters are fed from `INotificationListener` callbacks, not by modifying Sender internals | + +--- + +## Architecture + +### High-Level Data Flow + +``` +┌──────────────────────────────────────────────────────────────────────────┐ +│ Application Code │ +│ appInsights.trackEvent(...) │ +└──────────────┬───────────────────────────────────────────────────────────┘ + │ + ▼ +┌──────────────────────────────────────────────────────────────────────────┐ +│ AppInsightsCore │ +│ core.track(item) │ +│ │ │ +│ Plugin Pipeline │ +│ │ │ +│ ┌────────────┼────────────┐ │ +│ ▼ ▼ ▼ │ +│ Analytics Properties Sender │ +│ │ │ +│ ┌────────────┴────────────┐ │ +│ │ HTTP Send (fetch/xhr) │ │ +│ │ │ │ +│ ▼ ▼ │ +│ On Success On Error/Retry │ +│ │ │ │ +│ ▼ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ NotificationManager dispatches: │ │ +│ │ • eventsSent(items) │ │ +│ │ • eventsDiscarded(items, reason) [NEW] │ │ +│ │ • eventsRetry(items, reason) [NEW] │ │ +│ └────────────────┬────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌─────────────────────────────────────────────┐ │ +│ │ SdkStatsNotificationListener │ │ +│ │ (INotificationListener implementation) │ │ +│ │ │ │ +│ │ Accumulates per 15-min window: │ │ +│ │ • success counts (by telemetry_type) │ │ +│ │ • dropped counts (by code + type) │ │ +│ │ • retry counts (by code + type) │ │ +│ │ │ │ +│ │ On timer flush: │ │ +│ │ → core.track(Item_Success_Count metric) │ │ +│ │ → core.track(Item_Dropped_Count metric) │ │ +│ │ → core.track(Item_Retry_Count metric) │ │ +│ └─────────────────────────────────────────────┘ │ +│ │ +│ core.track(metric) → same pipeline → Sender → customer endpoint │ +└──────────────────────────────────────────────────────────────────────────┘ +``` + +### Key Design Decisions + +1. **Notification-driven accumulation**: Rather than hooking into Sender internals, the SDK Stats listener subscribes to `INotificationManager` events. This keeps the Sender code clean and makes SDK Stats a loosely-coupled consumer. + +2. **`core.track()` for emission**: SDK Stats metrics flow through the same telemetry pipeline as customer telemetry. They use the customer's own iKey (already set on the core) and go through sampling, batching, and sending like any other metric. Because these are low-volume (3 metrics per 15 min per dimension), the overhead is negligible. + +3. **No `IStatsBeat` interface reuse for customer stats**: The existing `IStatsBeat` / `IStatsMgr` / `INetworkStatsbeat` interfaces were designed for internal stats with endpoint-to-iKey mapping and glob-based routing. Customer SDK Stats has a simpler model (everything goes to the customer's iKey), so we create a new, lighter-weight listener. The old interfaces can be retained for future internal stats if needed. + +4. **Listener registered in AISKU**: The `SdkStatsNotificationListener` is created and registered during AISKU initialization. This keeps it out of the core package and makes it tree-shakeable for consumers who don't use the full SKU. + +--- + +## Refactoring the Existing StatsBeat Code + +### What Changes + +| File | Change | +|------|--------| +| `shared/AppInsightsCore/src/core/StatsBeat.ts` | **Retain** the file but decouple from customer stats. The internal stats manager (`createStatsMgr`) remains available for future internal stats use if needed. No immediate code changes needed since it's already commented out of exports. | +| `channels/applicationinsights-channel-js/src/Sender.ts` | **Add** notification dispatch calls for `eventsSent`, `eventsDiscarded`, and a new `eventsRetry` notification. Existing commented-out statsbeat code is left in place for reference. | +| `shared/AppInsightsCore/src/index.ts` | **Keep** statsbeat exports commented out. They are not needed for customer SDK stats. | + +### What Stays + +- All existing commented-out statsbeat code in `Sender.ts`, `AppInsightsCore.ts`, and `IAppInsightsCore.ts` remains untouched for potential future internal stats use. +- `StatsBeat.ts`, its interfaces (`IStatsBeat`, `IStatsMgr`, `INetworkStatsbeat`, etc.), and enums (`eStatsType`) remain in the codebase for potential future internal stats use. They are not exported and add zero bundle cost. +- The `INotificationListener` / `INotificationManager` infrastructure — this is the foundation for the new approach. + +--- + +## Implementation Details + +### Phase 1: Sender Notification Gaps + +**Problem**: The current `applicationinsights-channel-js` Sender only fires `eventsSendRequest`. It does NOT fire `eventsSent` or `eventsDiscarded`. The customer SDK Stats listener needs these events to count successes, drops, and retries. + +**Changes to `Sender.ts`**: + +#### 1a. Fire `eventsSent` on success + +In `_onSuccess()`, after clearing the buffer, dispatch `eventsSent` through the notification manager: + +```typescript +function _onSuccess(payload: IInternalStorageItem[], countOfItemsInPayload: number) { + _self._buffer && _self._buffer.clearSent(payload); + // Notify listeners of successful send + let mgr = _getNotifyMgr(); + if (mgr) { + // Extract original ITelemetryItem[] from the payload + let items = _extractTelemetryItems(payload); + items && mgr.eventsSent(items); + } +} +``` + +#### 1b. Fire `eventsDiscarded` on non-retryable failure + +In `_onError()`, dispatch `eventsDiscarded` with the appropriate reason: + +```typescript +function _onError(payload: IInternalStorageItem[], message: string, event?: ErrorEvent) { + _throwInternal(_self.diagLog(), + eLoggingSeverity.WARNING, + _eInternalMessageId.OnError, + "Failed to send telemetry.", + { message }); + _self._buffer && _self._buffer.clearSent(payload); + // Notify listeners of discarded events + let mgr = _getNotifyMgr(); + if (mgr) { + let items = _extractTelemetryItems(payload); + items && mgr.eventsDiscarded(items, eEventsDiscardedReason.NonRetryableStatus); + } +} +``` + +#### 1c. Add retry notification + +The `INotificationListener` currently has no retry-specific callback. We have two options: + +**Option A (Recommended): Reuse `eventsSendRequest` with retry reason** +The existing `eventsSendRequest(sendReason, isAsync)` already has `SendRequestReason.Retry = 5`. We can extend this to also pass the events being retried, or add a lightweight new callback. + +**Option B: Add a new `eventsRetry` callback to `INotificationListener`** +Add `eventsRetry?(events: ITelemetryItem[], statusCode: number): void` to the listener interface. + +**Recommendation**: Option B — a dedicated `eventsRetry` callback — because it provides the events and status code needed for SDK Stats dimensions. This follows the existing pattern of `eventsSent` and `eventsDiscarded`. + +In `_checkResponsStatus()` and `_resendPayload()`, fire the retry notification: + +```typescript +// In the retry path of _checkResponsStatus: +if (!_isRetryDisabled && _isRetriable(status)) { + _resendPayload(payload); + // Notify listeners of retry + let mgr = _getNotifyMgr(); + if (mgr && mgr.eventsRetry) { + let items = _extractTelemetryItems(payload); + items && mgr.eventsRetry(items, status); + } + // ... existing logging +} +``` + +#### 1d. Helper: `_extractTelemetryItems` + +A helper that extracts the original `ITelemetryItem[]` from the Sender's `IInternalStorageItem[]` payload. Each `IInternalStorageItem` wraps an `ITelemetryItem` in its `.item` property. + +```typescript +function _extractTelemetryItems(payload: IInternalStorageItem[]): ITelemetryItem[] { + if (payload && payload.length) { + let items: ITelemetryItem[] = []; + arrForEach(payload, (p) => { + p && p.item && items.push(p.item); + }); + return items.length ? items : null; + } + return null; +} +``` + +### Phase 2: Refactor StatsBeat → Customer SDK Stats Listener + +Create a new file for the customer SDK stats listener. This is the core of the feature. + +#### New file: `shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts` + +This file exports a factory function `createSdkStatsNotifCbk()` that returns an `INotificationListener`. The listener: + +1. Accumulates success/dropped/retry counts in a lightweight counter object +2. Groups counts by `telemetry_type` (and by `drop.code`/`retry.code` for dropped/retried items) +3. On a 15-minute timer, flushes accumulated counts as `Item_Success_Count`, `Item_Dropped_Count`, `Item_Retry_Count` metrics via a provided `track` callback +4. Resets counters after flush + +```typescript +// shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts + +import { ITimerHandler, scheduleTimeout } from "@nevware21/ts-utils"; +import { INotificationListener } from "../interfaces/ai/INotificationListener"; +import { ITelemetryItem } from "../interfaces/ai/ITelemetryItem"; + +const FLUSH_INTERVAL = 900000; // 15 min default +const MET_SUCCESS = "Item_Success_Count"; +const MET_DROPPED = "Item_Dropped_Count"; +const MET_RETRY = "Item_Retry_Count"; + +// Map baseType to spec telemetry_type values +const _typeMap: { [key: string]: string } = { + "EventData": "CUSTOM_EVENT", + "MetricData": "CUSTOM_METRIC", + "RemoteDependencyData": "DEPENDENCY", + "ExceptionData": "EXCEPTION", + "PageviewData": "PAGE_VIEW", + "PageviewPerformanceData": "PAGE_VIEW", + "MessageData": "TRACE", + "RequestData": "REQUEST", + "AvailabilityData": "AVAILABILITY" +}; + +export interface ISdkStatsConfig { + /** The track function to call when flushing metrics. Typically core.track(). */ + trk: (item: ITelemetryItem) => void; + /** SDK language identifier, e.g. "JavaScript" */ + lang: string; + /** SDK version string */ + ver: string; + /** Flush interval override in ms (default 900000 = 15 min) */ + int?: number; +} + +export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): INotificationListener & { flush: () => void } { + // ... accumulator state, timer, flush logic + // See "Detailed Implementation" section below +} +``` + +**Detailed Implementation Sketch** (follows project patterns): + +```typescript +export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): INotificationListener & { flush: () => void } { + let _successCounts: Record = {}; // telemetry_type → count + let _droppedCounts: Record> = {}; // dropCode → { telemetry_type → count } + let _retryCounts: Record> = {}; // retryCode → { telemetry_type → count } + let _timer: ITimerHandler; + let _interval = cfg.int || FLUSH_INTERVAL; + + function _ensureTimer() { + if (!_timer) { + _timer = scheduleTimeout(_flush, _interval); + } + } + + function _getTelType(item: ITelemetryItem): string { + return _typeMap[item.baseType] || "CUSTOM_EVENT"; + } + + function _incSuccess(items: ITelemetryItem[]) { + for (let i = 0; i < items.length; i++) { + let t = _getTelType(items[i]); + _successCounts[t] = (_successCounts[t] || 0) + 1; + } + _ensureTimer(); + } + + function _incDropped(items: ITelemetryItem[], code: string) { + if (!_droppedCounts[code]) { + _droppedCounts[code] = {}; + } + let bucket = _droppedCounts[code]; + for (let i = 0; i < items.length; i++) { + let t = _getTelType(items[i]); + bucket[t] = (bucket[t] || 0) + 1; + } + _ensureTimer(); + } + + function _incRetry(items: ITelemetryItem[], code: string) { + if (!_retryCounts[code]) { + _retryCounts[code] = {}; + } + let bucket = _retryCounts[code]; + for (let i = 0; i < items.length; i++) { + let t = _getTelType(items[i]); + bucket[t] = (bucket[t] || 0) + 1; + } + _ensureTimer(); + } + + function _createMetric(name: string, value: number, props: { [key: string]: any }): ITelemetryItem { + // Merge standard dimensions + props["language"] = cfg.lang; + props["version"] = cfg.ver; + props["computeType"] = "unknown"; // Browser SDK cannot reliably detect compute type + + return { + name: name, + baseType: "MetricData", + baseData: { + ver: 2, + metrics: [{ name: name, value: value }], + properties: props + } + }; + } + + function _flush() { + _timer && _timer.cancel(); + _timer = null; + + // Flush success counts + for (var telType in _successCounts) { + if (_successCounts.hasOwnProperty(telType)) { + var cnt = _successCounts[telType]; + if (cnt > 0) { + cfg.trk(_createMetric(MET_SUCCESS, cnt, { + "telemetry_type": telType + })); + } + } + } + + // Flush dropped counts + for (var code in _droppedCounts) { + if (_droppedCounts.hasOwnProperty(code)) { + var bucket = _droppedCounts[code]; + for (var telType in bucket) { + if (bucket.hasOwnProperty(telType)) { + var cnt = bucket[telType]; + if (cnt > 0) { + cfg.trk(_createMetric(MET_DROPPED, cnt, { + "telemetry_type": telType, + "drop.code": code + })); + } + } + } + } + } + + // Flush retry counts + for (var code in _retryCounts) { + if (_retryCounts.hasOwnProperty(code)) { + var bucket = _retryCounts[code]; + for (var telType in bucket) { + if (bucket.hasOwnProperty(telType)) { + var cnt = bucket[telType]; + if (cnt > 0) { + cfg.trk(_createMetric(MET_RETRY, cnt, { + "telemetry_type": telType, + "retry.code": code + })); + } + } + } + } + } + + // Reset accumulators + _successCounts = {}; + _droppedCounts = {}; + _retryCounts = {}; + } + + return { + eventsSent: _incSuccess, + eventsDiscarded: (events: ITelemetryItem[], reason: number) => { + // Map EventsDiscardedReason to spec drop codes + var code = _mapDropCode(reason); + _incDropped(events, code); + }, + eventsRetry: (events: ITelemetryItem[], statusCode: number) => { + var code = "" + statusCode; // numeric status code as string per spec + _incRetry(events, code); + }, + flush: _flush, + unload: () => { + // Flush remaining counts before unload + _flush(); + _timer && _timer.cancel(); + _timer = null; + } + }; +} + +function _mapDropCode(reason: number): string { + // Maps eEventsDiscardedReason to spec drop.code values + // 0=Unknown → "CLIENT_EXCEPTION" + // 1=NonRetryableStatus → will be overridden by actual status code in enhanced notification + // 2=InvalidEvent → "CLIENT_EXCEPTION" + // 5=QueueFull → "CLIENT_EXCEPTION" + // 6=BeaconSendFailure → "CLIENT_EXCEPTION" + switch (reason) { + case 1: return "NonRetryableStatus"; // Overridden with actual code when available + default: return "CLIENT_EXCEPTION"; + } +} +``` + +> **NOTE on `eventsDiscarded` enhancement**: To populate `drop.code` with the actual HTTP status code (e.g., `"402"`, `"403"`), we need the Sender to pass the status code when calling `eventsDiscarded`. We propose adding an optional 4th parameter or using a convention where the `sendType` parameter carries the status code for `NonRetryableStatus` scenarios. See the [Interfaces & Types](#interfaces--types) section. + +### Phase 3: AISKU Integration + +The listener is created and registered during AISKU initialization in `AISKU/src/AISku.ts`. + +```typescript +// Inside the dynamicProto constructor of AppInsightsSku + +// After core.initialize() and channel setup: +if (isFeatureEnabled("SdkStats", cfg, true)) { // on by default + let statsListener = createSdkStatsNotifCbk({ + trk: (item) => { core.track(item); }, + lang: "JavaScript", + ver: EnvelopeCreator.Version, + int: cfg.sdkStatsExportInterval || FLUSH_INTERVAL + }); + core.addNotificationListener(statsListener); + // Store reference for unload + _sdkStatsListener = statsListener; +} +``` + +On unload: +```typescript +if (_sdkStatsListener) { + _sdkStatsListener.flush(); + core.removeNotificationListener(_sdkStatsListener); + _sdkStatsListener = null; +} +``` + +### Phase 4: Configuration & Feature Gating + +#### Configuration surface + +| Config Property | Type | Default | Description | +|----------------|------|---------|-------------| +| `featureOptIn.SdkStats.mode` | `FeatureOptInMode` | `enable` (3) | Enables/disables the feature. Set to `disable` (1) to opt out. | +| `sdkStatsExportInterval` | `number` | `900` (seconds) | Short export interval. Min 60s. | + +#### Feature gating + +```typescript +// The feature is ON by default using isFeatureEnabled with default=true +if (isFeatureEnabled("SdkStats", cfg, true)) { + // Initialize listener +} +``` + +This means: +- **No config** → enabled (default) +- `featureOptIn: { "SdkStats": { mode: FeatureOptInMode.disable } }` → disabled +- `featureOptIn: { "SdkStats": { mode: FeatureOptInMode.enable } }` → enabled (explicit) + +--- + +## File-by-File Changes + +### New Files + +| File | Description | +|------|-------------| +| `shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts` | Factory function `createSdkStatsNotifCbk()` → `INotificationListener` that accumulates and flushes SDK Stats metrics | + +### Modified Files + +| File | Changes | +|------|---------| +| **`shared/AppInsightsCore/src/interfaces/ai/INotificationListener.ts`** | Add optional `eventsRetry?(events: ITelemetryItem[], statusCode: number): void` callback | +| **`shared/AppInsightsCore/src/interfaces/ai/INotificationManager.ts`** | Add `eventsRetry?(events: ITelemetryItem[], statusCode: number): void` dispatch method | +| **`shared/AppInsightsCore/src/core/NotificationManager.ts`** | Implement `eventsRetry` dispatch using `_runListeners` (same pattern as `eventsSent`) | +| **`shared/AppInsightsCore/src/constants/InternalConstants.ts`** | Add `STR_EVENTS_RETRY = "eventsRetry"` constant | +| **`shared/AppInsightsCore/src/index.ts`** | Export `createSdkStatsNotifCbk` and `ISdkStatsConfig` | +| **`channels/applicationinsights-channel-js/src/Sender.ts`** | (1) Fire `eventsSent` in `_onSuccess`, (2) Fire `eventsDiscarded` in `_onError`, (3) Fire `eventsRetry` in retry paths of `_checkResponsStatus`, (4) Add `_extractTelemetryItems` helper | +| **`channels/applicationinsights-channel-js/src/Interfaces.ts`** | Ensure `IInternalStorageItem.item` is typed as `ITelemetryItem` (verify this already exists) | +| **`AISKU/src/AISku.ts`** | Register `SdkStatsNotificationListener` on initialization, unregister on teardown | + +### Files to Clean Up (Remove Dead Code) + +| File | Action | +|------|--------| +| `shared/AppInsightsCore/Tests/Unit/src/StatsBeat.Tests.ts` | Remove or update for new approach | +| `channels/applicationinsights-channel-js/Tests/Unit/src/StatsBeat.tests.ts` | Remove or update | + +--- + +## Interfaces & Types + +### Updated `INotificationListener` + +```typescript +export interface INotificationListener { + // ... existing callbacks ... + + /** + * [Optional] A function called when events are being retried. + * @param events - The array of events that are being retried. + * @param statusCode - The HTTP status code that triggered the retry. + * @since 3.x.x + */ + eventsRetry?(events: ITelemetryItem[], statusCode: number): void; +} +``` + +### Updated `INotificationManager` + +```typescript +export interface INotificationManager { + // ... existing methods ... + + /** + * Notification for events being retried. + * @param events - The array of events that are being retried. + * @param statusCode - The HTTP status code that triggered the retry. + */ + eventsRetry?(events: ITelemetryItem[], statusCode: number): void; +} +``` + +### Enhanced `eventsDiscarded` Convention + +To carry the HTTP status code for `NonRetryableStatus` drops, we use the existing `sendType` parameter as the status code when `reason === eEventsDiscardedReason.NonRetryableStatus`: + +```typescript +// In Sender, when a non-retryable status code triggers a drop: +mgr.eventsDiscarded(items, eEventsDiscardedReason.NonRetryableStatus, statusCode); +// ^^^^^^^^^^ +// sendType parameter repurposed as HTTP status code +``` + +The listener checks `reason === 1 (NonRetryableStatus)` and reads `sendType` as the actual drop code: + +```typescript +eventsDiscarded: (events, reason, sendType) => { + var code; + if (reason === 1 && sendType) { // NonRetryableStatus + code = "" + sendType; // HTTP status code as string + } else { + code = "CLIENT_EXCEPTION"; + } + _incDropped(events, code); +} +``` + +### `ISdkStatsConfig` + +```typescript +export interface ISdkStatsConfig { + /** Track function (typically core.track bound) */ + trk: (item: ITelemetryItem) => void; + /** SDK language name */ + lang: string; + /** SDK version */ + ver: string; + /** Flush interval in ms (default 900000) */ + int?: number; +} +``` + +--- + +## Metric Payload Format + +All three metrics follow the Application Insights custom metric envelope format. The metrics are sent as `ITelemetryItem` objects via `core.track()`, which means the Sender will wrap them in standard envelopes with the customer's iKey and `ai.internal.sdkVersion` tag automatically. + +### Item_Success_Count + +```json +{ + "name": "Item Success Count", + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [{ "name": "Item_Success_Count", "value": 150 }], + "properties": { + "language": "JavaScript", + "version": "3.x.x", + "computeType": "unknown", + "telemetry_type": "DEPENDENCY" + } + } +} +``` + +### Item_Dropped_Count + +```json +{ + "name": "Item Dropped Count", + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [{ "name": "Item_Dropped_Count", "value": 5 }], + "properties": { + "language": "JavaScript", + "version": "3.x.x", + "computeType": "unknown", + "telemetry_type": "TRACE", + "drop.code": "402", + "drop.reason": "Exceeded daily quota" + } + } +} +``` + +### Item_Retry_Count + +```json +{ + "name": "Item Retry Count", + "baseType": "MetricData", + "baseData": { + "ver": 2, + "metrics": [{ "name": "Item_Retry_Count", "value": 20 }], + "properties": { + "language": "JavaScript", + "version": "3.x.x", + "computeType": "unknown", + "telemetry_type": "DEPENDENCY", + "retry.code": "429" + } + } +} +``` + +--- + +## Bundle Size Strategy + +The following techniques keep the SDK Stats feature's bundle size impact minimal: + +### 1. Factory Function (No Class) + +`createSdkStatsNotifCbk` returns a plain object literal implementing `INotificationListener`. No `dynamicProto` needed because there is no class — just a closure returning an object. This is the smallest possible pattern for a self-contained module. + +### 2. String Constant Reuse + +Metric names and property keys are assigned to short local `var` names: +```typescript +var MET_S = "Item_Success_Count"; +var MET_D = "Item_Dropped_Count"; +var MET_R = "Item_Retry_Count"; +var P_LANG = "language"; +var P_VER = "version"; +``` +The minifier compresses these to single characters, saving bytes across all usages. + +### 3. Shared `_createMetric` Helper + +One function creates all three metric types, differing only by name and properties. Avoids code duplication for envelope construction. + +### 4. `for..in` Loops Instead of `Object.keys().forEach()` + +Uses `for..in` with `hasOwnProperty` checks — ES5 compatible, no helper imports. + +### 5. No ES6+ Operators + +- No `?.` (optional chaining) — use explicit null checks +- No `??` (nullish coalescing) — use `||` +- No `...` (spread) — use manual property assignment +- No `async/await` — use `scheduleTimeout` for timer + +### 6. `const enum` for Internal Constants + +Any new enum values (e.g., for drop reason mapping) use `const enum` to inline as integers. + +### 7. `__DynamicConstants` Integration + +After implementation, run `npm run lint-fix` and the Grunt build task to auto-generate `__DynamicConstants.ts` entries for frequently-used strings. + +### 8. Tree-Shakeable Export + +The factory function is marked with `/*#__NO_SIDE_EFFECTS__*/` so bundlers can tree-shake it if unused: + +```typescript +/*#__NO_SIDE_EFFECTS__*/ +export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): INotificationListener & { flush: () => void } { + // ... +} +``` + +### 9. Place in Core, Not a New Package + +Placing the listener in `shared/AppInsightsCore` avoids a new package/build artifact. The function is small enough (~1KB minified) to not warrant its own package. + +--- + +## Testing Plan + +### Unit Tests + +#### `shared/AppInsightsCore/Tests/Unit/src/SdkStatsNotificationCbk.Tests.ts` + +| Test | Description | +|------|-------------| +| **Counts success by telemetry_type** | Send `eventsSent` with items of different `baseType`, verify accumulated counts per `telemetry_type` | +| **Counts drops with status code** | Send `eventsDiscarded` with `NonRetryableStatus` reason and status code, verify `drop.code` is correct | +| **Counts drops with CLIENT_EXCEPTION** | Send `eventsDiscarded` with non-status reasons, verify `drop.code` = `CLIENT_EXCEPTION` | +| **Counts retries by status code** | Send `eventsRetry` with 429/503 status codes, verify `retry.code` values | +| **Flushes on 15-min timer** | Use `useFakeTimers`, advance by 15 min, verify metrics are emitted via `trk` callback | +| **Resets counters after flush** | After flush, verify all counters are zero | +| **Multiple flushes accumulate independently** | Two timer periods, verify each period's metrics are independent | +| **No metrics emitted when counts are zero** | Zero activity → no `trk` calls on flush | +| **Unload flushes remaining** | Call `unload()`, verify final flush occurs | +| **telemetry_type mapping** | Verify all `baseType` → `telemetry_type` mappings per spec | +| **SDK Stats metrics don't count themselves** | Verify that Item_Success_Count/Item_Dropped_Count/Item_Retry_Count metrics emitted by the listener are tagged to be excluded from counting (or verify the system doesn't double-count) | + +#### `channels/applicationinsights-channel-js/Tests/Unit/src/Sender.Tests.ts` + +| Test | Description | +|------|-------------| +| **eventsSent fired on 200** | Mock fetch → 200, verify `eventsSent` notification is dispatched with correct items | +| **eventsDiscarded fired on non-retryable status** | Mock fetch → 403, verify `eventsDiscarded` is dispatched with reason `NonRetryableStatus` and status code | +| **eventsRetry fired on retryable status** | Mock fetch → 429, verify `eventsRetry` is dispatched with status code | +| **eventsDiscarded on beacon failure** | Beacon API failure → verify `eventsDiscarded` with `BeaconSendFailure` reason | +| **Partial success (206)** | Mock 206 with partial response, verify `eventsSent` for accepted, `eventsRetry`/`eventsDiscarded` for failed | + +#### `AISKU/Tests/Unit/src/SdkStatsIntegration.Tests.ts` + +| Test | Description | +|------|-------------| +| **Listener registered on init** | Initialize AISKU, verify notification listener is registered | +| **Feature disabled via config** | Set `featureOptIn.SdkStats.mode = disable`, verify no listener registered | +| **End-to-end: track → send → count** | Track events, mock successful send, advance timer, verify SDK Stats metrics appear in pipeline | +| **Unload removes listener** | Call `appInsights.unload()`, verify listener is removed | +| **Dynamic config change** | Change `featureOptIn.SdkStats` after init, verify listener responds | + +### Test Patterns + +All tests follow project conventions: +- Extend `AITestClass` +- Use `this.testCase()` with `IPromise` return for async +- Use `this.useFakeTimers()` for timer control +- Call `core.unload(false)` in cleanup +- Test both static and dynamic configuration changes + +--- + +## Rollout & Migration + +### Phase 1: Internal Validation +1. Implement Sender notifications (`eventsSent` / `eventsDiscarded` / `eventsRetry`) +2. Implement `SdkStatsNotificationCbk` +3. Unit test thoroughly +4. Integration test in AISKU + +### Phase 2: Opt-In Preview +1. Ship with `featureOptIn` default set to `disable` +2. Document opt-in via `featureOptIn: { "SdkStats": { mode: 3 } }` +3. Validate with internal dogfood customers + +### Phase 3: On-By-Default +1. Flip `featureOptIn` default to `enable` (the `isFeatureEnabled("SdkStats", cfg, true)` call uses `true` as default) +2. Document kill switch in README +3. Per spec: ignore legacy `APPLICATIONINSIGHTS_SDKSTATS_ENABLED_PREVIEW` env var +4. Use new metric names (`Item_Success_Count` etc.), not legacy `preview.*` names + +### Migration from Internal StatsBeat + +- The old `IStatsBeat` / `IStatsMgr` / `createStatsMgr()` code was never shipped active (entirely commented out) +- No backward compatibility concerns — there is no public API surface to maintain +- The old code remains in the codebase uncommitted/unexported for potential future internal stats use +- Existing commented-out statsbeat integration points in `Sender.ts`, `AppInsightsCore.ts`, and `IAppInsightsCore.ts` are left in place for reference + +--- + +## Appendix: Mapping `baseType` → `telemetry_type` + +| SDK `baseType` | Spec `telemetry_type` | +|---|---| +| `EventData` | `CUSTOM_EVENT` | +| `MetricData` | `CUSTOM_METRIC` | +| `RemoteDependencyData` | `DEPENDENCY` | +| `ExceptionData` | `EXCEPTION` | +| `PageviewData` | `PAGE_VIEW` | +| `PageviewPerformanceData` | `PAGE_VIEW` | +| `MessageData` | `TRACE` | +| `RequestData` | `REQUEST` | +| `AvailabilityData` | `AVAILABILITY` | + +## Appendix: Drop Code Mapping + +| `eEventsDiscardedReason` | Spec `drop.code` | +|---|---| +| `NonRetryableStatus` (1) | Actual HTTP status code (e.g., `"402"`, `"403"`) | +| `InvalidEvent` (2) | `"CLIENT_EXCEPTION"` | +| `SizeLimitExceeded` (3) | `"CLIENT_EXCEPTION"` | +| `KillSwitch` (4) | `"CLIENT_EXCEPTION"` | +| `QueueFull` (5) | `"CLIENT_EXCEPTION"` | +| `BeaconSendFailure` (6) | `"CLIENT_EXCEPTION"` | +| `Unknown` (0) | `"CLIENT_EXCEPTION"` | + +## Appendix: Retry Code Mapping + +| Scenario | Spec `retry.code` | +|---|---| +| HTTP 401, 403, 408, 429, 500, 502, 503, 504 | Actual status code as string (e.g., `"429"`) | +| Network exception during send | `"CLIENT_EXCEPTION"` | +| Timeout during send | `"CLIENT_TIMEOUT"` | + +## Appendix: Self-Counting Prevention + +SDK Stats metrics themselves are telemetry items that flow through the pipeline. To prevent them from being counted (creating an infinite feedback loop), the listener should check the `name` property of items in `eventsSent`/`eventsDiscarded`/`eventsRetry` and skip any items where `name` matches `"Item Success Count"`, `"Item Dropped Count"`, or `"Item Retry Count"`. + +```typescript +function _isSdkStatsMetric(item: ITelemetryItem): boolean { + var n = item.name; + return n === "Item Success Count" || n === "Item Dropped Count" || n === "Item Retry Count"; +} + +// In eventsSent handler: +function _incSuccess(items: ITelemetryItem[]) { + for (var i = 0; i < items.length; i++) { + if (!_isSdkStatsMetric(items[i])) { + var t = _getTelType(items[i]); + _successCounts[t] = (_successCounts[t] || 0) + 1; + } + } + _ensureTimer(); +} +``` diff --git a/shared/AppInsightsCore/src/constants/InternalConstants.ts b/shared/AppInsightsCore/src/constants/InternalConstants.ts index 12da0a457..f2c3258ff 100644 --- a/shared/AppInsightsCore/src/constants/InternalConstants.ts +++ b/shared/AppInsightsCore/src/constants/InternalConstants.ts @@ -19,6 +19,7 @@ export const STR_PRIORITY = "priority"; export const STR_EVENTS_SENT = "eventsSent"; export const STR_EVENTS_DISCARDED = "eventsDiscarded"; export const STR_EVENTS_SEND_REQUEST = "eventsSendRequest"; +export const STR_EVENTS_RETRY = "eventsRetry"; export const STR_PERF_EVENT = "perfEvent"; export const STR_OFFLINE_STORE = "offlineEventsStored"; export const STR_OFFLINE_SENT = "offlineBatchSent"; diff --git a/shared/AppInsightsCore/src/core/NotificationManager.ts b/shared/AppInsightsCore/src/core/NotificationManager.ts index d2a2834a5..a03e0a9c2 100644 --- a/shared/AppInsightsCore/src/core/NotificationManager.ts +++ b/shared/AppInsightsCore/src/core/NotificationManager.ts @@ -5,7 +5,7 @@ import { IPromise, createAllPromise, createPromise, doAwaitResponse } from "@nev import { ITimerHandler, arrForEach, arrIndexOf, objDefine, safe, scheduleTimeout } from "@nevware21/ts-utils"; import { createDynamicConfig } from "../config/DynamicConfig"; import { - STR_EVENTS_DISCARDED, STR_EVENTS_SEND_REQUEST, STR_EVENTS_SENT, STR_OFFLINE_DROP, STR_OFFLINE_SENT, STR_OFFLINE_STORE, STR_PERF_EVENT + STR_EVENTS_DISCARDED, STR_EVENTS_RETRY, STR_EVENTS_SEND_REQUEST, STR_EVENTS_SENT, STR_OFFLINE_DROP, STR_OFFLINE_SENT, STR_OFFLINE_STORE, STR_PERF_EVENT } from "../constants/InternalConstants"; import { IConfiguration } from "../interfaces/ai/IConfiguration"; import { INotificationListener } from "../interfaces/ai/INotificationListener"; @@ -147,6 +147,17 @@ export class NotificationManager implements INotificationManager { } }; + /** + * Notification for events being retried. + * @param events - The array of events that are being retried. + * @param statusCode - The HTTP status code that triggered the retry. + */ + _self.eventsRetry = (events: ITelemetryItem[], statusCode: number): void => { + _runListeners(_listeners, STR_EVENTS_RETRY, _asyncNotifications, (listener) => { + listener.eventsRetry(events, statusCode); + }); + }; + _self.offlineEventsStored = (events: ITelemetryItem[]): void => { if (events && events.length) { _runListeners(_listeners, STR_OFFLINE_STORE, _asyncNotifications, (listener) => { @@ -254,6 +265,15 @@ export class NotificationManager implements INotificationManager { // @DynamicProtoStub -- DO NOT add any code as this will be removed during packaging } + /** + * Notification for events being retried. + * @param events - The array of events that are being retried. + * @param statusCode - The HTTP status code that triggered the retry. + */ + eventsRetry?(events: ITelemetryItem[], statusCode: number): void { + // @DynamicProtoStub -- DO NOT add any code as this will be removed during packaging + } + /** * [Optional] This event is sent if you have enabled perf events, they are primarily used to track internal performance testing and debugging * the event can be displayed via the debug plugin extension. diff --git a/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts b/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts new file mode 100644 index 000000000..d09d5672e --- /dev/null +++ b/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts @@ -0,0 +1,249 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +"use strict"; + +import { ITimerHandler, scheduleTimeout } from "@nevware21/ts-utils"; +import { INotificationListener } from "../interfaces/ai/INotificationListener"; +import { ITelemetryItem } from "../interfaces/ai/ITelemetryItem"; + +var FLUSH_INTERVAL = 900000; // 15 min default +var MET_SUCCESS = "Item_Success_Count"; +var MET_DROPPED = "Item_Dropped_Count"; +var MET_RETRY = "Item_Retry_Count"; +var P_LANG = "language"; +var P_VER = "version"; +var P_COMPUTE = "computeType"; +var P_TEL_TYPE = "telemetry_type"; +var P_DROP_CODE = "drop.code"; +var P_RETRY_CODE = "retry.code"; +var DROP_CLIENT_EXCEPTION = "CLIENT_EXCEPTION"; + +// Map baseType to spec telemetry_type values +var _typeMap: { [key: string]: string } = { + "EventData": "CUSTOM_EVENT", + "MetricData": "CUSTOM_METRIC", + "RemoteDependencyData": "DEPENDENCY", + "ExceptionData": "EXCEPTION", + "PageviewData": "PAGE_VIEW", + "PageviewPerformanceData": "PAGE_VIEW", + "MessageData": "TRACE", + "RequestData": "REQUEST", + "AvailabilityData": "AVAILABILITY" +}; + +/** + * Configuration interface for the SDK Stats notification callback. + */ +export interface ISdkStatsConfig { + /** + * The track function to call when flushing metrics. Typically core.track(). + */ + trk: (item: ITelemetryItem) => void; + /** + * SDK language identifier, e.g. "JavaScript" + */ + lang: string; + /** + * SDK version string. + */ + ver: string; + /** + * Flush interval override in ms (default 900000 = 15 min). + */ + int?: number; +} + +/** + * Extended INotificationListener interface for SDK Stats that includes flush and unload operations. + */ +export interface ISdkStatsNotifCbk extends INotificationListener { + /** + * Flush accumulated counts and emit metrics via the configured track function. + */ + flush: () => void; + /** + * Flush remaining counts and cancel the timer. + */ + unload: () => void; +} + +/** + * Creates an INotificationListener that accumulates success/dropped/retry counts and periodically + * flushes them as Item_Success_Count, Item_Dropped_Count, and Item_Retry_Count metrics via core.track(). + * @param cfg - The SDK stats configuration + * @returns An INotificationListener with flush and unload methods + */ +/*#__NO_SIDE_EFFECTS__*/ +export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk { + var _successCounts: { [telType: string]: number } = {}; + var _droppedCounts: { [code: string]: { [telType: string]: number } } = {}; + var _retryCounts: { [code: string]: { [telType: string]: number } } = {}; + var _timer: ITimerHandler; + var _interval = cfg.int || FLUSH_INTERVAL; + + function _ensureTimer() { + if (!_timer) { + _timer = scheduleTimeout(_flush, _interval); + } + } + + function _getTelType(item: ITelemetryItem): string { + var bt = item.baseType; + return (bt && _typeMap[bt]) || "CUSTOM_EVENT"; + } + + function _isSdkStatsMetric(item: ITelemetryItem): boolean { + var n = item.name; + return n === "Item Success Count" || n === "Item Dropped Count" || n === "Item Retry Count"; + } + + function _incSuccess(items: ITelemetryItem[]) { + for (var i = 0; i < items.length; i++) { + if (!_isSdkStatsMetric(items[i])) { + var t = _getTelType(items[i]); + _successCounts[t] = (_successCounts[t] || 0) + 1; + } + } + _ensureTimer(); + } + + function _incDropped(items: ITelemetryItem[], code: string) { + if (!_droppedCounts[code]) { + _droppedCounts[code] = {}; + } + var bucket = _droppedCounts[code]; + for (var i = 0; i < items.length; i++) { + if (!_isSdkStatsMetric(items[i])) { + var t = _getTelType(items[i]); + bucket[t] = (bucket[t] || 0) + 1; + } + } + _ensureTimer(); + } + + function _incRetry(items: ITelemetryItem[], code: string) { + if (!_retryCounts[code]) { + _retryCounts[code] = {}; + } + var bucket = _retryCounts[code]; + for (var i = 0; i < items.length; i++) { + if (!_isSdkStatsMetric(items[i])) { + var t = _getTelType(items[i]); + bucket[t] = (bucket[t] || 0) + 1; + } + } + _ensureTimer(); + } + + function _createMetric(name: string, value: number, props: { [key: string]: any }): ITelemetryItem { + // Merge standard dimensions + props[P_LANG] = cfg.lang; + props[P_VER] = cfg.ver; + props[P_COMPUTE] = "unknown"; // Browser SDK cannot reliably detect compute type + + return { + name: name, + baseType: "MetricData", + baseData: { + ver: 2, + metrics: [{ name: name, value: value }], + properties: props + } + } as ITelemetryItem; + } + + function _mapDropCode(reason: number, sendType?: number): string { + // Maps eEventsDiscardedReason to spec drop.code values + // 1 = NonRetryableStatus → actual HTTP status code when available + if (reason === 1 && sendType) { + return "" + sendType; + } + return DROP_CLIENT_EXCEPTION; + } + + function _flush() { + if (_timer) { + _timer.cancel(); + _timer = null; + } + + var telType: string; + var code: string; + var cnt: number; + var bucket: { [telType: string]: number }; + + // Flush success counts + for (telType in _successCounts) { + if (_successCounts.hasOwnProperty(telType)) { + cnt = _successCounts[telType]; + if (cnt > 0) { + var successProps: { [key: string]: any } = {}; + successProps[P_TEL_TYPE] = telType; + cfg.trk(_createMetric(MET_SUCCESS, cnt, successProps)); + } + } + } + + // Flush dropped counts + for (code in _droppedCounts) { + if (_droppedCounts.hasOwnProperty(code)) { + bucket = _droppedCounts[code]; + for (telType in bucket) { + if (bucket.hasOwnProperty(telType)) { + cnt = bucket[telType]; + if (cnt > 0) { + var dropProps: { [key: string]: any } = {}; + dropProps[P_TEL_TYPE] = telType; + dropProps[P_DROP_CODE] = code; + cfg.trk(_createMetric(MET_DROPPED, cnt, dropProps)); + } + } + } + } + } + + // Flush retry counts + for (code in _retryCounts) { + if (_retryCounts.hasOwnProperty(code)) { + bucket = _retryCounts[code]; + for (telType in bucket) { + if (bucket.hasOwnProperty(telType)) { + cnt = bucket[telType]; + if (cnt > 0) { + var retryProps: { [key: string]: any } = {}; + retryProps[P_TEL_TYPE] = telType; + retryProps[P_RETRY_CODE] = code; + cfg.trk(_createMetric(MET_RETRY, cnt, retryProps)); + } + } + } + } + } + + // Reset accumulators + _successCounts = {}; + _droppedCounts = {}; + _retryCounts = {}; + } + + return { + eventsSent: _incSuccess, + eventsDiscarded: function (events: ITelemetryItem[], reason: number, sendType?: number) { + var code = _mapDropCode(reason, sendType); + _incDropped(events, code); + }, + eventsRetry: function (events: ITelemetryItem[], statusCode: number) { + var code = "" + statusCode; // numeric status code as string per spec + _incRetry(events, code); + }, + flush: _flush, + unload: function () { + // Flush remaining counts before unload + _flush(); + if (_timer) { + _timer.cancel(); + _timer = null; + } + } + }; +} diff --git a/shared/AppInsightsCore/src/index.ts b/shared/AppInsightsCore/src/index.ts index cc0749698..276e82807 100644 --- a/shared/AppInsightsCore/src/index.ts +++ b/shared/AppInsightsCore/src/index.ts @@ -38,6 +38,7 @@ export { parseResponse } from "./core/ResponseHelpers"; export { IXDomainRequest, IBackendResponse } from "./interfaces/ai/IXDomainRequest"; export { _ISenderOnComplete, _ISendPostMgrConfig, _ITimeoutOverrideWrapper, _IInternalXhrOverride } from "./interfaces/ai/ISenderPostManager"; export { SenderPostManager } from "./core/SenderPostManager"; +export { createSdkStatsNotifCbk, ISdkStatsConfig, ISdkStatsNotifCbk } from "./core/SdkStatsNotificationCbk"; //export { IStatsBeat, IStatsBeatConfig, IStatsBeatKeyMap as IStatsBeatEndpoints, IStatsBeatState} from "./interfaces/ai/IStatsBeat"; //export { IStatsEventData } from "./interfaces/ai/IStatsEventData"; //export { IStatsMgr, IStatsMgrConfig } from "./interfaces/ai/IStatsMgr"; diff --git a/shared/AppInsightsCore/src/interfaces/ai/INotificationListener.ts b/shared/AppInsightsCore/src/interfaces/ai/INotificationListener.ts index ef78b4b36..d14428e81 100644 --- a/shared/AppInsightsCore/src/interfaces/ai/INotificationListener.ts +++ b/shared/AppInsightsCore/src/interfaces/ai/INotificationListener.ts @@ -50,6 +50,14 @@ export interface INotificationListener { */ unload?(isAsync?: boolean): void | IPromise; + /** + * [Optional] A function called when events are being retried. + * @param events - The array of events that are being retried. + * @param statusCode - The HTTP status code that triggered the retry. + * @since 3.3.6 + */ + eventsRetry?(events: ITelemetryItem[], statusCode: number): void; + /** * [Optional] A function called when the offline events have been stored to the persistent storage * @param events - items that are stored in the persistent storage diff --git a/shared/AppInsightsCore/src/interfaces/ai/INotificationManager.ts b/shared/AppInsightsCore/src/interfaces/ai/INotificationManager.ts index 4457d643a..7a1d67af9 100644 --- a/shared/AppInsightsCore/src/interfaces/ai/INotificationManager.ts +++ b/shared/AppInsightsCore/src/interfaces/ai/INotificationManager.ts @@ -63,6 +63,14 @@ export interface INotificationManager { */ unload?(isAsync?: boolean): void | IPromise; + /** + * Notification for events being retried. + * @param events - The array of events that are being retried. + * @param statusCode - The HTTP status code that triggered the retry. + * @since 3.3.6 + */ + eventsRetry?(events: ITelemetryItem[], statusCode: number): void; + /** * [Optional] A function called when the offline events have been stored to the persistent storage * @param events - items that are stored in the persistent storage From 70138b5910033cec22e2abe7fccd1b358841c032 Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Wed, 18 Feb 2026 13:51:38 -0800 Subject: [PATCH 2/6] Delete customer_facing_sdk_stats_spec.md --- customer_facing_sdk_stats_spec.md | 434 ------------------------------ 1 file changed, 434 deletions(-) delete mode 100644 customer_facing_sdk_stats_spec.md diff --git a/customer_facing_sdk_stats_spec.md b/customer_facing_sdk_stats_spec.md deleted file mode 100644 index e5585311b..000000000 --- a/customer_facing_sdk_stats_spec.md +++ /dev/null @@ -1,434 +0,0 @@ -# Customer-Facing SDKStats - -## Owner - -* [Leighton Chen](mailto:lechen@microsoft.com) - -## Approvers - -* [Hector Hernandez Guzman](mailto:hectorh@microsoft.com) -* [Jackson Weber](mailto:jacksonweber@microsoft.com) -* [Jeremy Voss](mailto:jeremyvoss@microsoft.com) -* [Rajkumar Rangaraj](mailto:rajkumar.rangaraj@microsoft.com) -* [Ram Thiru](mailto:Ram.Thiru@microsoft.com) - -## Status - -Stable - -
-Table of Contents - - - -- [Customer-Facing SDKStats](#customer-facing-sdkstats) - - [Owner](#owner) - - [Approvers](#approvers) - - [Status](#status) - - [Overview](#overview) - - [Specifications](#specifications) - - [Key metrics](#key-metrics) - - [Top-level fields](#top-level-fields) - - [iKey](#ikey) - - [SDKVersion](#sdkversion) - - [Item success count](#item-success-count) - - [Item dropped count](#item-dropped-count) - - [Item retry count](#item-retry-count) - - [Getting started](#getting-started) - - [Environment Variable configurations](#environment-variable-configurations) - - [Future considerations](#future-considerations) - - [Include `cloud.*` fields as part of `customDimensions`](#include-cloud-fields-as-part-of-customdimensions) - - [TBD](#tbd) - - [Reference](#reference) - -- - -
- -## Overview - -SDKStats has proven to be valuable by providing insights into RP integration growth, tracking feature/instrumentation adoption, and monitoring success/failure counts for Application Insights SDKs across languages. -Recognizing the importance of sharing these metrics with customers, our leadership team aims to provide customers with access to specific network SDKStats metrics in their Application Insights resources, -enhancing their self-service experience. - -To enable this functionality, we have decided to emit SDKStats as custom metrics to customers' resources. - -Customers have the option to access these metrics either through the metric explorer on the portal or creating alerts based on Kusto query. - -Customer-facing SDKStats metrics are uniquely identified by metrics' names as shown below. -Ingestion service can determine the type of SDKStats being sent based on metric names. These metrics are ingested into the customer's Application Insights resources. - -## Specifications - -### Key metrics - -Metrics names should follow the OpenTelemetry Specification, more info in the [OpenTelemetry metrics API specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/metrics/api.md#instrument). - - -| Metrics | Description | Frequency | Required | -| ----------------------------------------- | ----------------------------------------------------------------- | --------- | -------- | -| [Item Success Count](#item-success-count) | Count of successful telemetry items sent to Application Insights. | *Short | Yes | -| [Item Dropped Count](#item-dropped-count) | Count of dropped telemetry items sent to Application Insights. | *Short | Yes | -| [Item Retry Count](#item-retry-count) | Count of retried telemetry items. | *Short | Yes | - - -*Short interval is once every 15 minutes. - -**Note:** These metrics were chosen as the minimal set to send to customers. This is to reduce confusion while maintaining the necessary information to maximize the likelihood of customers' being able to troubleshoot telemetry -problems themselves (and reduce icm cases opened). - -**Note:** `Item Success Count` and `Item Dropped Count` should theoretically add up to the total amount of telemetry items that are actually sent to the backend. - -### Top-level fields - -Almost all top-level fields of customer SDKStats metrics are automatically populated by ingestion and are the same as that of [Application Insights custom metric](https://msazure.visualstudio.com/One/_git/CommonSchema?path=/v4.0/Mappings/AzureMonitor-AI.md&_a=preview&anchor=microsoft.applicationinsights.metric). -The only exception is that of `sdkVersion`, `iKey`, `cloudRoleInstance` and `cloudRole` which need to be populated by the SDK itself. - -**Note** that since we are trying to keep the payload as minimal as possible, we do not include any `operation.*` fields, as that information is not as relevant in terms of SDKStats analysis. -We can include `cloud.*` fields since there will be no PII violations when sending metrics to customers' own resource. These are also fields that enable customers to identify issues with specific apps or VMs, helping with targeted problem-solving. - -#### iKey - -This represents the instrumentation key of the customers' Application Insights resource this SDKStats telemetry is being sent to. - -#### SDKVersion - -[sdkVersion](../sdk_version_name.md) is crucial to include as part of SDKStats, as it allows us to identify -RP, Attach type, operating system, language, language version, SDK and SDK version from an encoded string. It -is currently used to filter out sdk name and sdk version for BI reports. `sdkVersion` has an inconsistent format and specifications have only been properly created for OpenTelemetry-based SDKs. - -Therefore, it is crucial to have other indicators for language and version (such as the `customDimensions` defined for each Metric Type below). - -### Item success count - -This metric represents the cumulative item success count during the collection interval. A high and persistent item success count will help increase customers' confidence in using our products and services. - -We send telemetry items in batches. Each batch can contain an array of metrics, logs, and traces. When Breeze returns a 200 status code, the SDK counts the number of telemetry items in the batch and accumulates it using the `Item Success Count` metric. - - -| Telemetry Name | Metric Name | Unit | customDimensions | -| -------------------- | -------------------- | ----- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Item Success Count` | `Item_Success_Count` | Count | **compute.type**: The type of compute (aks, appsvc, functions, springcloud, vm, unknown) that the customer's application is running in
**language**: application insights SDK/Agent name
**version**: version of the application insights SDK/Agent
**telemetry_type**: Type of telemetry that this metric was counting. | - - -\* **telemetry_type** - -The `telemetry_type` field provides clarity on what kinds of data were dropped or ingested, aiding troubleshooting and system insights. The possible values correspond with table names in Application Insights. Below is a list of known values for `telemetry_type`: - -`AVAILABILITY` -`CUSTOM_EVENT` -`CUSTOM_METRIC` -`DEPENDENCY` -`EXCEPTION` -`PAGE_VIEW` -`PERFORMANCE_COUNTER` -`REQUEST` -`TRACE` - -This example shows 3000 item success count from a customer who uses the Java Distro 3.5.1 running Java 17. - -```json -{ - "ver": 1, - "name": "Item Success Count", - "time": "2024-05-14T22:51:46.406Z", - "iKey": "", - "tags": { - "ai.internal.sdkVersion": "java:3.5.1", - "ai.cloud.roleInstance": "", - "ai.cloud.role": "" - }, - "data": { - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [ - { - "name": "Item_Success_Count", - "value": 3000.0 - } - ], - "properties": { - "language": "java", - "version": "3.5.1", - "computeType": "unknown", - "telemetry_type": "DEPENDENCY" - } - } - } -} -``` - -### Item dropped count - -This metric provides insights into the reasons for data loss, enabling customers, SDK teams, and the Ingestion service team to investigate the returned status codes and identify opportunities for reducing data loss. - -We send telemetry items in batches. Each batch can contain an array of metrics, logs, and traces. When Breeze returns a [non-retryable](./sdkstats.md#retry-counts) status code or an exception is thrown while sending the telemetry, the SDK counts the number of telemetry items in the batch and accumulates it using the `Item Dropped Count` metric. - - -| Telemetry Name | Metric Name | Unit | customDimensions | -| -------------------- | -------------------- | ----- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Item Dropped Count` | `Item_Dropped_Count` | Count | **compute.type**: The type of compute (aks, appsvc, functions, springcloud, vm, unknown) that the customer's application is running in
**language**: application insights SDK/Agent name
**version**: version of the application insights SDK/Agent
**drop.code**
**drop.reason**
**telemetry_type**: Type of telemetry that this metric was counting.
**telemetry_success**: Boolean value indicating whether the tracked customer DEPENDENCY or REQUEST telemetry succeeded (true) or failed (false). Only applicable for DEPENDENCY and REQUEST telemetry types. | - - -\* **drop.code** table below lists the drop codes for different situations that result in dropped items. - - -| drop.code | Description | -| --------- | ----------- | -| CLIENT_EXCEPTION | items dropped due to exceptions thrown or when a response is not returned from Breeze | -| CLIENT_READONLY | items dropped due to READONLY filesystem | -| CLIENT_PERSISTENCE_CAPACITY | items dropped due to disk persistence capacity exceeds | -| CLIENT_STORAGE_DISABLED | items that would have been retried but are dropped since client has local storage disabled | -| `*NON_RETRYABLE_STATUS_CODE` | items dropped when breeze returns a [non-retryable](./sdkstats.md#retry-counts) status code | - - -*NON_RETRYABLE_STATUS_CODE will be the actual value of the non-retryable status code that was returned (i.e. 401, 403, etc.). - -\* **drop.reason** can be populated if `CLIENT_EXCEPTION` or a status code is the `drop.code`. Describes a informative, low-cardinality description of the exception or reason why the status code was returned. For `CLIENT_EXCEPTION`, the drop reason uses well known exception categories rather than raw exception messages. Categories include specific types (storage, timeout, etc.) with a generic fallback for unknown exceptions. The table below lists current well known exception categories: - -| drop.reason | -| ----------------- | -| Timeout exception | -| Network exception | -| Storage exception | -| Client exception | - -\* **telemetry_type** - -The `telemetry_type` field provides clarity on what kinds of data were dropped or ingested, aiding troubleshooting and system insights. The possible values correspond with table names in Application Insights. Below is a list of known values for `telemetry_type`: - -`AVAILABILITY` -`CUSTOM_EVENT` -`CUSTOM_METRIC` -`DEPENDENCY` -`EXCEPTION` -`PAGE_VIEW` -`PERFORMANCE_COUNTER` -`REQUEST` -`TRACE` - -The below example shows 6 item drop count when a customer reaches their daily quota (status code 402). - -```json - -{ - "ver": 1, - "name": "Item Dropped Count", - "time": "", - "iKey": "", - "tags": { - "ai.internal.sdkVersion": "java:3.5.1", - "ai.cloud.roleInstance": "", - "ai.cloud.role": "" - }, - "data": { - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [ - { - "name": "Item_Dropped_Count", - "value": 6.0 - } - ], - "properties": { - "language": "java", - "version": "3.5.1", - "computeType": "", - "drop.code": "402", - "drop.reason": "Exceeded daily quota", - "telemetry_type": "DEPENDENCY", - "telemetry_success": false - } - } - } -} -``` - -Here is another example that shows a 12 item drop count when a customer encounters a storage exception. - -```json - -{ - "ver": 1, - "name": "Item Dropped Count", - "time": "", - "iKey": "", - "tags": { - "ai.internal.sdkVersion": "python:3.11.9", - "ai.cloud.roleInstance": "", - "ai.cloud.role": "" - }, - "data": { - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [ - { - "name": "Item_Dropped_Count", - "value": 12.0 - } - ], - "properties": { - "language": "python", - "version": "3.11.9", - "computeType": "", - "drop.code": "CLIENT_EXCEPTION", - "drop.reason": "Storage exception", - "telemetry_type": "DEPENDENCY", - } - } - } -} -``` - -### Item retry count - -This metric represents the cumulative item retry count during the collection interval. -Customers can benefit from this metric by gaining a better understanding of why the retry item count is high. For example, if they experience a high retry count of unauthorized (401) or forbidden (403) status codes, they might need to double-check their access permissions. Similarly, if they encounter a high retry count of too many requests (429), it could be an opportunity for them to evaluate their system for an upgrade to handle more requests. - - -| Telemetry Name | Metric Name | Unit | customDimensions | -| ------------------ | ------------------ | ----- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `Item Retry Count` | `Item_Retry_Count` | Count | **compute.type** (aks, appsvc, functions, springcloud, vm, unknown), language, version
**retry.code**
**retry.reason**
**telemetry_type**: Type of telemetry that this metric was counting. | - - -**retry.code table below lists the retry codes for different situations that result in dropped items. - - -| retry.code | Description | -| ---------- | ----------- | -| CLIENT_EXCEPTION | items to be retried when there is a runtime exception, like network failure, DNS name lookup failure excluding timeout exceptions that result in retryable scenarios. | -| CLIENT_TIMEOUT | items to be retried when there is a timeout exception | -| `*RETRYABLE_STATUS_CODE` | items to be retried when when breeze returns a [retryable](./sdkstats.md#retry-counts) status code | - - -*RETRYABLE_STATUS_CODE will be the actual value of the retryable status code that was returned. - -\* **retry.reason** can be populated if `CLIENT_EXCEPTION` or a status code is the `retry.code`. Describes a informative, low-cardinality description of the exception. For `CLIENT_EXCEPTION`, the exception categorization outlined in the [Item Dropped Count](#item-dropped-count) section is applied here as well to ensure uniformity across metrics. - -\* **telemetry_type** - -The `telemetry_type` field provides clarity on what kinds of data were dropped or ingested, aiding troubleshooting and system insights. The possible values correspond with table names in Application Insights. Below is a list of known values for `telemetry_type`: - -`AVAILABILITY` -`CUSTOM_EVENT` -`CUSTOM_METRIC` -`DEPENDENCY` -`EXCEPTION` -`PAGE_VIEW` -`PERFORMANCE_COUNTER` -`REQUEST` -`TRACE` - -Here is an example that shows 20 retry counts when Breeze returns a status code 429 (too many requests) for a customer using the Java distro 3.5.1 on AKS running Java 21. - -```json -{ - "ver": 1, - "name": "Item Retry Count", - "time": "2024-05-14T22:51:46.406Z", - "iKey": "", - "tags": { - "ai.internal.sdkVersion": "java:3.5.1", - "ai.cloud.roleInstance": "", - "ai.cloud.role": "" - }, - "data": { - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [ - { - "name": "Item_Retry_Count", - "value": 20.0 - } - ], - "properties": { - "language": "java", - "version": "3.5.1", - "computeType": "aks", - "retry.code": "429", - "retry.reason": "Too many requests" - } - } - } -} -``` - -Here is another example that shows 20 retry counts when a TimeoutException happens on the client side using the Java distro 3.5.1 on AKS running Java 21. - -```json -{ - "ver": 1, - "name": "Item Retry Count", - "time": "2024-05-14T22:51:46.406Z", - "iKey": "", - "tags": { - "ai.internal.sdkVersion": "java:3.5.1", - "ai.cloud.roleInstance": "", - "ai.cloud.role": "" - }, - "data": { - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [ - { - "name": "Item_Retry_Count", - "value": 20.0 - } - ], - "properties": { - "language": "java", - "version": "3.5.1", - "computeType": "aks", - "retry.code": "CLIENT_TIMEOUT", - "exception.message": "TimeoutException: timeout while sending telemetry" - } - } - } -} -``` - -## Getting started - -Customers can access the success count, item drop count and item retry count metrics through the Application Insights portal by navigating to their Application Insights resource, which contains a dedicated dashboard for easier access and analysis. - -![Where to find the customer sdk stats dashboard on Application Insights resource](../../Media/customer-sdk-stats-dashboard.png) - -## Environment Variable configurations - -`Enabled` - -Currently, all customer sdk stats [metrics](#key-metrics) are on-by-default. In order to stop sending these metrics, users can disable this feature by setting environment variable `APPLICATIONINSIGHTS_SDKSTATS_DISABLED` to `true`. - -`shortInterval` - -* SDKs MAY provide users an optional configuration for changing the short export interval, which currently defaults to 15 minutes/900 seconds. -* Configured through `APPLICATIONINSIGHTS_SDKSTATS_EXPORT_INTERVAL` in seconds - -## Migrations for existing implementations - -- For implementations that supported these metrics as opt-in and are deciding to move to on=by-default, backwards compatabilty does NOT have to be maintained for the old environment variable `APPLICATIONINSIGHTS_SDKSTATS_ENABLED_PREVIEW`. The new environment variable will also ALWAYS take priority over the opt-in environment variable. Newer versions of the SDK that support on-by-default can also ignore the `APPLICATIONINSIGHTS_SDKSTATS_ENABLED_PREVIEW` environment variable moving forward. - -- For SDK implementations that supported the old `preview.*` naming conventions, SDKs MUST start sending the new metric names if deciding to move this feature to on-by-default. The old naming conventions will be aggregated together with the new names in the dashboards. - -## Future considerations - -### Include `cloud.*` fields as part of `customDimensions` - -The fields `cloud_RoleName` and `cloud_RoleInstance` are already queryable in logs. In the future, we may consider adding these as custom dimensions, which will be mapped to metrics dimensions in MDM for enhanced querying and alerts. This is not as high of a priority since customers are already able to do this today with log querying. - -## TBD - -1. To enable or disable this feature, we will provide customers with configuration options such as: - -- Allow configuration of which SDKStats metrics to collect by default -- Kill switch configuration in Control Plane - -## Reference - -[Status codes from Breeze](../sdk_behavior_breeze.md) -[Internal SDKStats spec](./sdkstats.md) \ No newline at end of file From 377013a623fc65d20483688b494a2c1e27c51b52 Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Wed, 18 Feb 2026 13:52:27 -0800 Subject: [PATCH 3/6] Delete customer_sdk_stats_implementation.md --- docs/customer_sdk_stats_implementation.md | 870 ---------------------- 1 file changed, 870 deletions(-) delete mode 100644 docs/customer_sdk_stats_implementation.md diff --git a/docs/customer_sdk_stats_implementation.md b/docs/customer_sdk_stats_implementation.md deleted file mode 100644 index d383506f8..000000000 --- a/docs/customer_sdk_stats_implementation.md +++ /dev/null @@ -1,870 +0,0 @@ -# Customer-Facing SDK Stats — Implementation Plan - -## Table of Contents - -- [Overview](#overview) -- [Goals & Constraints](#goals--constraints) -- [Architecture](#architecture) - - [High-Level Data Flow](#high-level-data-flow) - - [Key Design Decisions](#key-design-decisions) -- [Refactoring the Existing StatsBeat Code](#refactoring-the-existing-statsbeat-code) - - [What Changes](#what-changes) - - [What Stays](#what-stays) -- [Implementation Details](#implementation-details) - - [Phase 1: Sender Notification Gaps](#phase-1-sender-notification-gaps) - - [Phase 2: Refactor StatsBeat → Customer SDK Stats Listener](#phase-2-refactor-statsbeat--customer-sdk-stats-listener) - - [Phase 3: AISKU Integration](#phase-3-aisku-integration) - - [Phase 4: Configuration & Feature Gating](#phase-4-configuration--feature-gating) -- [File-by-File Changes](#file-by-file-changes) -- [Interfaces & Types](#interfaces--types) -- [Metric Payload Format](#metric-payload-format) -- [Bundle Size Strategy](#bundle-size-strategy) -- [Testing Plan](#testing-plan) -- [Rollout & Migration](#rollout--migration) - ---- - -## Overview - -This document describes the implementation plan for **Customer-Facing SDK Stats** in the Application Insights JavaScript SDK. The feature emits three custom metrics — `Item_Success_Count`, `Item_Dropped_Count`, and `Item_Retry_Count` — to the **customer's own Application Insights resource** every 15 minutes, enabling self-service troubleshooting of telemetry delivery issues. - -The implementation **refactors** the existing (commented-out) internal StatsBeat code to: - -1. **Use the Notification Manager** instead of custom sender logic — the Sender channel fires `eventsSent` / `eventsDiscarded` / retry notifications, and a listener accumulates counts. -2. **Send metrics via `core.track()`** to the customer's own instrumentation key (not a stats-specific iKey). -3. **Minimize bundle size impact** using the project's established patterns (`dynamicProto`, `const enum`, `__DynamicConstants`, no ES6+ operators, etc.). - -Reference spec: [Customer-Facing SDKStats Spec](../customer_facing_sdk_stats_spec.md) - ---- - -## Goals & Constraints - -| Goal | Detail | -|------|--------| -| **Three metrics** | `Item_Success_Count`, `Item_Dropped_Count`, `Item_Retry_Count` — spec-required names and dimensions | -| **Customer's iKey** | Metrics are sent to the customer's own AI resource, NOT a separate stats endpoint | -| **15-minute interval** | Counters accumulate and flush on a configurable short interval (default 900s) | -| **On by default** | Feature is enabled by default; kill switch via `featureOptIn` config | -| **Minimal size impact** | Target < 2KB minified gzip addition to the AISKU bundle | -| **ES5 compatible** | No `?.`, `??`, `...`, `async/await` | -| **No separate sender** | Reuse `core.track()` → existing Sender pipeline → customer's endpoint | -| **Notification-driven** | Counters are fed from `INotificationListener` callbacks, not by modifying Sender internals | - ---- - -## Architecture - -### High-Level Data Flow - -``` -┌──────────────────────────────────────────────────────────────────────────┐ -│ Application Code │ -│ appInsights.trackEvent(...) │ -└──────────────┬───────────────────────────────────────────────────────────┘ - │ - ▼ -┌──────────────────────────────────────────────────────────────────────────┐ -│ AppInsightsCore │ -│ core.track(item) │ -│ │ │ -│ Plugin Pipeline │ -│ │ │ -│ ┌────────────┼────────────┐ │ -│ ▼ ▼ ▼ │ -│ Analytics Properties Sender │ -│ │ │ -│ ┌────────────┴────────────┐ │ -│ │ HTTP Send (fetch/xhr) │ │ -│ │ │ │ -│ ▼ ▼ │ -│ On Success On Error/Retry │ -│ │ │ │ -│ ▼ ▼ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ NotificationManager dispatches: │ │ -│ │ • eventsSent(items) │ │ -│ │ • eventsDiscarded(items, reason) [NEW] │ │ -│ │ • eventsRetry(items, reason) [NEW] │ │ -│ └────────────────┬────────────────────────────┘ │ -│ │ │ -│ ▼ │ -│ ┌─────────────────────────────────────────────┐ │ -│ │ SdkStatsNotificationListener │ │ -│ │ (INotificationListener implementation) │ │ -│ │ │ │ -│ │ Accumulates per 15-min window: │ │ -│ │ • success counts (by telemetry_type) │ │ -│ │ • dropped counts (by code + type) │ │ -│ │ • retry counts (by code + type) │ │ -│ │ │ │ -│ │ On timer flush: │ │ -│ │ → core.track(Item_Success_Count metric) │ │ -│ │ → core.track(Item_Dropped_Count metric) │ │ -│ │ → core.track(Item_Retry_Count metric) │ │ -│ └─────────────────────────────────────────────┘ │ -│ │ -│ core.track(metric) → same pipeline → Sender → customer endpoint │ -└──────────────────────────────────────────────────────────────────────────┘ -``` - -### Key Design Decisions - -1. **Notification-driven accumulation**: Rather than hooking into Sender internals, the SDK Stats listener subscribes to `INotificationManager` events. This keeps the Sender code clean and makes SDK Stats a loosely-coupled consumer. - -2. **`core.track()` for emission**: SDK Stats metrics flow through the same telemetry pipeline as customer telemetry. They use the customer's own iKey (already set on the core) and go through sampling, batching, and sending like any other metric. Because these are low-volume (3 metrics per 15 min per dimension), the overhead is negligible. - -3. **No `IStatsBeat` interface reuse for customer stats**: The existing `IStatsBeat` / `IStatsMgr` / `INetworkStatsbeat` interfaces were designed for internal stats with endpoint-to-iKey mapping and glob-based routing. Customer SDK Stats has a simpler model (everything goes to the customer's iKey), so we create a new, lighter-weight listener. The old interfaces can be retained for future internal stats if needed. - -4. **Listener registered in AISKU**: The `SdkStatsNotificationListener` is created and registered during AISKU initialization. This keeps it out of the core package and makes it tree-shakeable for consumers who don't use the full SKU. - ---- - -## Refactoring the Existing StatsBeat Code - -### What Changes - -| File | Change | -|------|--------| -| `shared/AppInsightsCore/src/core/StatsBeat.ts` | **Retain** the file but decouple from customer stats. The internal stats manager (`createStatsMgr`) remains available for future internal stats use if needed. No immediate code changes needed since it's already commented out of exports. | -| `channels/applicationinsights-channel-js/src/Sender.ts` | **Add** notification dispatch calls for `eventsSent`, `eventsDiscarded`, and a new `eventsRetry` notification. Existing commented-out statsbeat code is left in place for reference. | -| `shared/AppInsightsCore/src/index.ts` | **Keep** statsbeat exports commented out. They are not needed for customer SDK stats. | - -### What Stays - -- All existing commented-out statsbeat code in `Sender.ts`, `AppInsightsCore.ts`, and `IAppInsightsCore.ts` remains untouched for potential future internal stats use. -- `StatsBeat.ts`, its interfaces (`IStatsBeat`, `IStatsMgr`, `INetworkStatsbeat`, etc.), and enums (`eStatsType`) remain in the codebase for potential future internal stats use. They are not exported and add zero bundle cost. -- The `INotificationListener` / `INotificationManager` infrastructure — this is the foundation for the new approach. - ---- - -## Implementation Details - -### Phase 1: Sender Notification Gaps - -**Problem**: The current `applicationinsights-channel-js` Sender only fires `eventsSendRequest`. It does NOT fire `eventsSent` or `eventsDiscarded`. The customer SDK Stats listener needs these events to count successes, drops, and retries. - -**Changes to `Sender.ts`**: - -#### 1a. Fire `eventsSent` on success - -In `_onSuccess()`, after clearing the buffer, dispatch `eventsSent` through the notification manager: - -```typescript -function _onSuccess(payload: IInternalStorageItem[], countOfItemsInPayload: number) { - _self._buffer && _self._buffer.clearSent(payload); - // Notify listeners of successful send - let mgr = _getNotifyMgr(); - if (mgr) { - // Extract original ITelemetryItem[] from the payload - let items = _extractTelemetryItems(payload); - items && mgr.eventsSent(items); - } -} -``` - -#### 1b. Fire `eventsDiscarded` on non-retryable failure - -In `_onError()`, dispatch `eventsDiscarded` with the appropriate reason: - -```typescript -function _onError(payload: IInternalStorageItem[], message: string, event?: ErrorEvent) { - _throwInternal(_self.diagLog(), - eLoggingSeverity.WARNING, - _eInternalMessageId.OnError, - "Failed to send telemetry.", - { message }); - _self._buffer && _self._buffer.clearSent(payload); - // Notify listeners of discarded events - let mgr = _getNotifyMgr(); - if (mgr) { - let items = _extractTelemetryItems(payload); - items && mgr.eventsDiscarded(items, eEventsDiscardedReason.NonRetryableStatus); - } -} -``` - -#### 1c. Add retry notification - -The `INotificationListener` currently has no retry-specific callback. We have two options: - -**Option A (Recommended): Reuse `eventsSendRequest` with retry reason** -The existing `eventsSendRequest(sendReason, isAsync)` already has `SendRequestReason.Retry = 5`. We can extend this to also pass the events being retried, or add a lightweight new callback. - -**Option B: Add a new `eventsRetry` callback to `INotificationListener`** -Add `eventsRetry?(events: ITelemetryItem[], statusCode: number): void` to the listener interface. - -**Recommendation**: Option B — a dedicated `eventsRetry` callback — because it provides the events and status code needed for SDK Stats dimensions. This follows the existing pattern of `eventsSent` and `eventsDiscarded`. - -In `_checkResponsStatus()` and `_resendPayload()`, fire the retry notification: - -```typescript -// In the retry path of _checkResponsStatus: -if (!_isRetryDisabled && _isRetriable(status)) { - _resendPayload(payload); - // Notify listeners of retry - let mgr = _getNotifyMgr(); - if (mgr && mgr.eventsRetry) { - let items = _extractTelemetryItems(payload); - items && mgr.eventsRetry(items, status); - } - // ... existing logging -} -``` - -#### 1d. Helper: `_extractTelemetryItems` - -A helper that extracts the original `ITelemetryItem[]` from the Sender's `IInternalStorageItem[]` payload. Each `IInternalStorageItem` wraps an `ITelemetryItem` in its `.item` property. - -```typescript -function _extractTelemetryItems(payload: IInternalStorageItem[]): ITelemetryItem[] { - if (payload && payload.length) { - let items: ITelemetryItem[] = []; - arrForEach(payload, (p) => { - p && p.item && items.push(p.item); - }); - return items.length ? items : null; - } - return null; -} -``` - -### Phase 2: Refactor StatsBeat → Customer SDK Stats Listener - -Create a new file for the customer SDK stats listener. This is the core of the feature. - -#### New file: `shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts` - -This file exports a factory function `createSdkStatsNotifCbk()` that returns an `INotificationListener`. The listener: - -1. Accumulates success/dropped/retry counts in a lightweight counter object -2. Groups counts by `telemetry_type` (and by `drop.code`/`retry.code` for dropped/retried items) -3. On a 15-minute timer, flushes accumulated counts as `Item_Success_Count`, `Item_Dropped_Count`, `Item_Retry_Count` metrics via a provided `track` callback -4. Resets counters after flush - -```typescript -// shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts - -import { ITimerHandler, scheduleTimeout } from "@nevware21/ts-utils"; -import { INotificationListener } from "../interfaces/ai/INotificationListener"; -import { ITelemetryItem } from "../interfaces/ai/ITelemetryItem"; - -const FLUSH_INTERVAL = 900000; // 15 min default -const MET_SUCCESS = "Item_Success_Count"; -const MET_DROPPED = "Item_Dropped_Count"; -const MET_RETRY = "Item_Retry_Count"; - -// Map baseType to spec telemetry_type values -const _typeMap: { [key: string]: string } = { - "EventData": "CUSTOM_EVENT", - "MetricData": "CUSTOM_METRIC", - "RemoteDependencyData": "DEPENDENCY", - "ExceptionData": "EXCEPTION", - "PageviewData": "PAGE_VIEW", - "PageviewPerformanceData": "PAGE_VIEW", - "MessageData": "TRACE", - "RequestData": "REQUEST", - "AvailabilityData": "AVAILABILITY" -}; - -export interface ISdkStatsConfig { - /** The track function to call when flushing metrics. Typically core.track(). */ - trk: (item: ITelemetryItem) => void; - /** SDK language identifier, e.g. "JavaScript" */ - lang: string; - /** SDK version string */ - ver: string; - /** Flush interval override in ms (default 900000 = 15 min) */ - int?: number; -} - -export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): INotificationListener & { flush: () => void } { - // ... accumulator state, timer, flush logic - // See "Detailed Implementation" section below -} -``` - -**Detailed Implementation Sketch** (follows project patterns): - -```typescript -export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): INotificationListener & { flush: () => void } { - let _successCounts: Record = {}; // telemetry_type → count - let _droppedCounts: Record> = {}; // dropCode → { telemetry_type → count } - let _retryCounts: Record> = {}; // retryCode → { telemetry_type → count } - let _timer: ITimerHandler; - let _interval = cfg.int || FLUSH_INTERVAL; - - function _ensureTimer() { - if (!_timer) { - _timer = scheduleTimeout(_flush, _interval); - } - } - - function _getTelType(item: ITelemetryItem): string { - return _typeMap[item.baseType] || "CUSTOM_EVENT"; - } - - function _incSuccess(items: ITelemetryItem[]) { - for (let i = 0; i < items.length; i++) { - let t = _getTelType(items[i]); - _successCounts[t] = (_successCounts[t] || 0) + 1; - } - _ensureTimer(); - } - - function _incDropped(items: ITelemetryItem[], code: string) { - if (!_droppedCounts[code]) { - _droppedCounts[code] = {}; - } - let bucket = _droppedCounts[code]; - for (let i = 0; i < items.length; i++) { - let t = _getTelType(items[i]); - bucket[t] = (bucket[t] || 0) + 1; - } - _ensureTimer(); - } - - function _incRetry(items: ITelemetryItem[], code: string) { - if (!_retryCounts[code]) { - _retryCounts[code] = {}; - } - let bucket = _retryCounts[code]; - for (let i = 0; i < items.length; i++) { - let t = _getTelType(items[i]); - bucket[t] = (bucket[t] || 0) + 1; - } - _ensureTimer(); - } - - function _createMetric(name: string, value: number, props: { [key: string]: any }): ITelemetryItem { - // Merge standard dimensions - props["language"] = cfg.lang; - props["version"] = cfg.ver; - props["computeType"] = "unknown"; // Browser SDK cannot reliably detect compute type - - return { - name: name, - baseType: "MetricData", - baseData: { - ver: 2, - metrics: [{ name: name, value: value }], - properties: props - } - }; - } - - function _flush() { - _timer && _timer.cancel(); - _timer = null; - - // Flush success counts - for (var telType in _successCounts) { - if (_successCounts.hasOwnProperty(telType)) { - var cnt = _successCounts[telType]; - if (cnt > 0) { - cfg.trk(_createMetric(MET_SUCCESS, cnt, { - "telemetry_type": telType - })); - } - } - } - - // Flush dropped counts - for (var code in _droppedCounts) { - if (_droppedCounts.hasOwnProperty(code)) { - var bucket = _droppedCounts[code]; - for (var telType in bucket) { - if (bucket.hasOwnProperty(telType)) { - var cnt = bucket[telType]; - if (cnt > 0) { - cfg.trk(_createMetric(MET_DROPPED, cnt, { - "telemetry_type": telType, - "drop.code": code - })); - } - } - } - } - } - - // Flush retry counts - for (var code in _retryCounts) { - if (_retryCounts.hasOwnProperty(code)) { - var bucket = _retryCounts[code]; - for (var telType in bucket) { - if (bucket.hasOwnProperty(telType)) { - var cnt = bucket[telType]; - if (cnt > 0) { - cfg.trk(_createMetric(MET_RETRY, cnt, { - "telemetry_type": telType, - "retry.code": code - })); - } - } - } - } - } - - // Reset accumulators - _successCounts = {}; - _droppedCounts = {}; - _retryCounts = {}; - } - - return { - eventsSent: _incSuccess, - eventsDiscarded: (events: ITelemetryItem[], reason: number) => { - // Map EventsDiscardedReason to spec drop codes - var code = _mapDropCode(reason); - _incDropped(events, code); - }, - eventsRetry: (events: ITelemetryItem[], statusCode: number) => { - var code = "" + statusCode; // numeric status code as string per spec - _incRetry(events, code); - }, - flush: _flush, - unload: () => { - // Flush remaining counts before unload - _flush(); - _timer && _timer.cancel(); - _timer = null; - } - }; -} - -function _mapDropCode(reason: number): string { - // Maps eEventsDiscardedReason to spec drop.code values - // 0=Unknown → "CLIENT_EXCEPTION" - // 1=NonRetryableStatus → will be overridden by actual status code in enhanced notification - // 2=InvalidEvent → "CLIENT_EXCEPTION" - // 5=QueueFull → "CLIENT_EXCEPTION" - // 6=BeaconSendFailure → "CLIENT_EXCEPTION" - switch (reason) { - case 1: return "NonRetryableStatus"; // Overridden with actual code when available - default: return "CLIENT_EXCEPTION"; - } -} -``` - -> **NOTE on `eventsDiscarded` enhancement**: To populate `drop.code` with the actual HTTP status code (e.g., `"402"`, `"403"`), we need the Sender to pass the status code when calling `eventsDiscarded`. We propose adding an optional 4th parameter or using a convention where the `sendType` parameter carries the status code for `NonRetryableStatus` scenarios. See the [Interfaces & Types](#interfaces--types) section. - -### Phase 3: AISKU Integration - -The listener is created and registered during AISKU initialization in `AISKU/src/AISku.ts`. - -```typescript -// Inside the dynamicProto constructor of AppInsightsSku - -// After core.initialize() and channel setup: -if (isFeatureEnabled("SdkStats", cfg, true)) { // on by default - let statsListener = createSdkStatsNotifCbk({ - trk: (item) => { core.track(item); }, - lang: "JavaScript", - ver: EnvelopeCreator.Version, - int: cfg.sdkStatsExportInterval || FLUSH_INTERVAL - }); - core.addNotificationListener(statsListener); - // Store reference for unload - _sdkStatsListener = statsListener; -} -``` - -On unload: -```typescript -if (_sdkStatsListener) { - _sdkStatsListener.flush(); - core.removeNotificationListener(_sdkStatsListener); - _sdkStatsListener = null; -} -``` - -### Phase 4: Configuration & Feature Gating - -#### Configuration surface - -| Config Property | Type | Default | Description | -|----------------|------|---------|-------------| -| `featureOptIn.SdkStats.mode` | `FeatureOptInMode` | `enable` (3) | Enables/disables the feature. Set to `disable` (1) to opt out. | -| `sdkStatsExportInterval` | `number` | `900` (seconds) | Short export interval. Min 60s. | - -#### Feature gating - -```typescript -// The feature is ON by default using isFeatureEnabled with default=true -if (isFeatureEnabled("SdkStats", cfg, true)) { - // Initialize listener -} -``` - -This means: -- **No config** → enabled (default) -- `featureOptIn: { "SdkStats": { mode: FeatureOptInMode.disable } }` → disabled -- `featureOptIn: { "SdkStats": { mode: FeatureOptInMode.enable } }` → enabled (explicit) - ---- - -## File-by-File Changes - -### New Files - -| File | Description | -|------|-------------| -| `shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts` | Factory function `createSdkStatsNotifCbk()` → `INotificationListener` that accumulates and flushes SDK Stats metrics | - -### Modified Files - -| File | Changes | -|------|---------| -| **`shared/AppInsightsCore/src/interfaces/ai/INotificationListener.ts`** | Add optional `eventsRetry?(events: ITelemetryItem[], statusCode: number): void` callback | -| **`shared/AppInsightsCore/src/interfaces/ai/INotificationManager.ts`** | Add `eventsRetry?(events: ITelemetryItem[], statusCode: number): void` dispatch method | -| **`shared/AppInsightsCore/src/core/NotificationManager.ts`** | Implement `eventsRetry` dispatch using `_runListeners` (same pattern as `eventsSent`) | -| **`shared/AppInsightsCore/src/constants/InternalConstants.ts`** | Add `STR_EVENTS_RETRY = "eventsRetry"` constant | -| **`shared/AppInsightsCore/src/index.ts`** | Export `createSdkStatsNotifCbk` and `ISdkStatsConfig` | -| **`channels/applicationinsights-channel-js/src/Sender.ts`** | (1) Fire `eventsSent` in `_onSuccess`, (2) Fire `eventsDiscarded` in `_onError`, (3) Fire `eventsRetry` in retry paths of `_checkResponsStatus`, (4) Add `_extractTelemetryItems` helper | -| **`channels/applicationinsights-channel-js/src/Interfaces.ts`** | Ensure `IInternalStorageItem.item` is typed as `ITelemetryItem` (verify this already exists) | -| **`AISKU/src/AISku.ts`** | Register `SdkStatsNotificationListener` on initialization, unregister on teardown | - -### Files to Clean Up (Remove Dead Code) - -| File | Action | -|------|--------| -| `shared/AppInsightsCore/Tests/Unit/src/StatsBeat.Tests.ts` | Remove or update for new approach | -| `channels/applicationinsights-channel-js/Tests/Unit/src/StatsBeat.tests.ts` | Remove or update | - ---- - -## Interfaces & Types - -### Updated `INotificationListener` - -```typescript -export interface INotificationListener { - // ... existing callbacks ... - - /** - * [Optional] A function called when events are being retried. - * @param events - The array of events that are being retried. - * @param statusCode - The HTTP status code that triggered the retry. - * @since 3.x.x - */ - eventsRetry?(events: ITelemetryItem[], statusCode: number): void; -} -``` - -### Updated `INotificationManager` - -```typescript -export interface INotificationManager { - // ... existing methods ... - - /** - * Notification for events being retried. - * @param events - The array of events that are being retried. - * @param statusCode - The HTTP status code that triggered the retry. - */ - eventsRetry?(events: ITelemetryItem[], statusCode: number): void; -} -``` - -### Enhanced `eventsDiscarded` Convention - -To carry the HTTP status code for `NonRetryableStatus` drops, we use the existing `sendType` parameter as the status code when `reason === eEventsDiscardedReason.NonRetryableStatus`: - -```typescript -// In Sender, when a non-retryable status code triggers a drop: -mgr.eventsDiscarded(items, eEventsDiscardedReason.NonRetryableStatus, statusCode); -// ^^^^^^^^^^ -// sendType parameter repurposed as HTTP status code -``` - -The listener checks `reason === 1 (NonRetryableStatus)` and reads `sendType` as the actual drop code: - -```typescript -eventsDiscarded: (events, reason, sendType) => { - var code; - if (reason === 1 && sendType) { // NonRetryableStatus - code = "" + sendType; // HTTP status code as string - } else { - code = "CLIENT_EXCEPTION"; - } - _incDropped(events, code); -} -``` - -### `ISdkStatsConfig` - -```typescript -export interface ISdkStatsConfig { - /** Track function (typically core.track bound) */ - trk: (item: ITelemetryItem) => void; - /** SDK language name */ - lang: string; - /** SDK version */ - ver: string; - /** Flush interval in ms (default 900000) */ - int?: number; -} -``` - ---- - -## Metric Payload Format - -All three metrics follow the Application Insights custom metric envelope format. The metrics are sent as `ITelemetryItem` objects via `core.track()`, which means the Sender will wrap them in standard envelopes with the customer's iKey and `ai.internal.sdkVersion` tag automatically. - -### Item_Success_Count - -```json -{ - "name": "Item Success Count", - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [{ "name": "Item_Success_Count", "value": 150 }], - "properties": { - "language": "JavaScript", - "version": "3.x.x", - "computeType": "unknown", - "telemetry_type": "DEPENDENCY" - } - } -} -``` - -### Item_Dropped_Count - -```json -{ - "name": "Item Dropped Count", - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [{ "name": "Item_Dropped_Count", "value": 5 }], - "properties": { - "language": "JavaScript", - "version": "3.x.x", - "computeType": "unknown", - "telemetry_type": "TRACE", - "drop.code": "402", - "drop.reason": "Exceeded daily quota" - } - } -} -``` - -### Item_Retry_Count - -```json -{ - "name": "Item Retry Count", - "baseType": "MetricData", - "baseData": { - "ver": 2, - "metrics": [{ "name": "Item_Retry_Count", "value": 20 }], - "properties": { - "language": "JavaScript", - "version": "3.x.x", - "computeType": "unknown", - "telemetry_type": "DEPENDENCY", - "retry.code": "429" - } - } -} -``` - ---- - -## Bundle Size Strategy - -The following techniques keep the SDK Stats feature's bundle size impact minimal: - -### 1. Factory Function (No Class) - -`createSdkStatsNotifCbk` returns a plain object literal implementing `INotificationListener`. No `dynamicProto` needed because there is no class — just a closure returning an object. This is the smallest possible pattern for a self-contained module. - -### 2. String Constant Reuse - -Metric names and property keys are assigned to short local `var` names: -```typescript -var MET_S = "Item_Success_Count"; -var MET_D = "Item_Dropped_Count"; -var MET_R = "Item_Retry_Count"; -var P_LANG = "language"; -var P_VER = "version"; -``` -The minifier compresses these to single characters, saving bytes across all usages. - -### 3. Shared `_createMetric` Helper - -One function creates all three metric types, differing only by name and properties. Avoids code duplication for envelope construction. - -### 4. `for..in` Loops Instead of `Object.keys().forEach()` - -Uses `for..in` with `hasOwnProperty` checks — ES5 compatible, no helper imports. - -### 5. No ES6+ Operators - -- No `?.` (optional chaining) — use explicit null checks -- No `??` (nullish coalescing) — use `||` -- No `...` (spread) — use manual property assignment -- No `async/await` — use `scheduleTimeout` for timer - -### 6. `const enum` for Internal Constants - -Any new enum values (e.g., for drop reason mapping) use `const enum` to inline as integers. - -### 7. `__DynamicConstants` Integration - -After implementation, run `npm run lint-fix` and the Grunt build task to auto-generate `__DynamicConstants.ts` entries for frequently-used strings. - -### 8. Tree-Shakeable Export - -The factory function is marked with `/*#__NO_SIDE_EFFECTS__*/` so bundlers can tree-shake it if unused: - -```typescript -/*#__NO_SIDE_EFFECTS__*/ -export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): INotificationListener & { flush: () => void } { - // ... -} -``` - -### 9. Place in Core, Not a New Package - -Placing the listener in `shared/AppInsightsCore` avoids a new package/build artifact. The function is small enough (~1KB minified) to not warrant its own package. - ---- - -## Testing Plan - -### Unit Tests - -#### `shared/AppInsightsCore/Tests/Unit/src/SdkStatsNotificationCbk.Tests.ts` - -| Test | Description | -|------|-------------| -| **Counts success by telemetry_type** | Send `eventsSent` with items of different `baseType`, verify accumulated counts per `telemetry_type` | -| **Counts drops with status code** | Send `eventsDiscarded` with `NonRetryableStatus` reason and status code, verify `drop.code` is correct | -| **Counts drops with CLIENT_EXCEPTION** | Send `eventsDiscarded` with non-status reasons, verify `drop.code` = `CLIENT_EXCEPTION` | -| **Counts retries by status code** | Send `eventsRetry` with 429/503 status codes, verify `retry.code` values | -| **Flushes on 15-min timer** | Use `useFakeTimers`, advance by 15 min, verify metrics are emitted via `trk` callback | -| **Resets counters after flush** | After flush, verify all counters are zero | -| **Multiple flushes accumulate independently** | Two timer periods, verify each period's metrics are independent | -| **No metrics emitted when counts are zero** | Zero activity → no `trk` calls on flush | -| **Unload flushes remaining** | Call `unload()`, verify final flush occurs | -| **telemetry_type mapping** | Verify all `baseType` → `telemetry_type` mappings per spec | -| **SDK Stats metrics don't count themselves** | Verify that Item_Success_Count/Item_Dropped_Count/Item_Retry_Count metrics emitted by the listener are tagged to be excluded from counting (or verify the system doesn't double-count) | - -#### `channels/applicationinsights-channel-js/Tests/Unit/src/Sender.Tests.ts` - -| Test | Description | -|------|-------------| -| **eventsSent fired on 200** | Mock fetch → 200, verify `eventsSent` notification is dispatched with correct items | -| **eventsDiscarded fired on non-retryable status** | Mock fetch → 403, verify `eventsDiscarded` is dispatched with reason `NonRetryableStatus` and status code | -| **eventsRetry fired on retryable status** | Mock fetch → 429, verify `eventsRetry` is dispatched with status code | -| **eventsDiscarded on beacon failure** | Beacon API failure → verify `eventsDiscarded` with `BeaconSendFailure` reason | -| **Partial success (206)** | Mock 206 with partial response, verify `eventsSent` for accepted, `eventsRetry`/`eventsDiscarded` for failed | - -#### `AISKU/Tests/Unit/src/SdkStatsIntegration.Tests.ts` - -| Test | Description | -|------|-------------| -| **Listener registered on init** | Initialize AISKU, verify notification listener is registered | -| **Feature disabled via config** | Set `featureOptIn.SdkStats.mode = disable`, verify no listener registered | -| **End-to-end: track → send → count** | Track events, mock successful send, advance timer, verify SDK Stats metrics appear in pipeline | -| **Unload removes listener** | Call `appInsights.unload()`, verify listener is removed | -| **Dynamic config change** | Change `featureOptIn.SdkStats` after init, verify listener responds | - -### Test Patterns - -All tests follow project conventions: -- Extend `AITestClass` -- Use `this.testCase()` with `IPromise` return for async -- Use `this.useFakeTimers()` for timer control -- Call `core.unload(false)` in cleanup -- Test both static and dynamic configuration changes - ---- - -## Rollout & Migration - -### Phase 1: Internal Validation -1. Implement Sender notifications (`eventsSent` / `eventsDiscarded` / `eventsRetry`) -2. Implement `SdkStatsNotificationCbk` -3. Unit test thoroughly -4. Integration test in AISKU - -### Phase 2: Opt-In Preview -1. Ship with `featureOptIn` default set to `disable` -2. Document opt-in via `featureOptIn: { "SdkStats": { mode: 3 } }` -3. Validate with internal dogfood customers - -### Phase 3: On-By-Default -1. Flip `featureOptIn` default to `enable` (the `isFeatureEnabled("SdkStats", cfg, true)` call uses `true` as default) -2. Document kill switch in README -3. Per spec: ignore legacy `APPLICATIONINSIGHTS_SDKSTATS_ENABLED_PREVIEW` env var -4. Use new metric names (`Item_Success_Count` etc.), not legacy `preview.*` names - -### Migration from Internal StatsBeat - -- The old `IStatsBeat` / `IStatsMgr` / `createStatsMgr()` code was never shipped active (entirely commented out) -- No backward compatibility concerns — there is no public API surface to maintain -- The old code remains in the codebase uncommitted/unexported for potential future internal stats use -- Existing commented-out statsbeat integration points in `Sender.ts`, `AppInsightsCore.ts`, and `IAppInsightsCore.ts` are left in place for reference - ---- - -## Appendix: Mapping `baseType` → `telemetry_type` - -| SDK `baseType` | Spec `telemetry_type` | -|---|---| -| `EventData` | `CUSTOM_EVENT` | -| `MetricData` | `CUSTOM_METRIC` | -| `RemoteDependencyData` | `DEPENDENCY` | -| `ExceptionData` | `EXCEPTION` | -| `PageviewData` | `PAGE_VIEW` | -| `PageviewPerformanceData` | `PAGE_VIEW` | -| `MessageData` | `TRACE` | -| `RequestData` | `REQUEST` | -| `AvailabilityData` | `AVAILABILITY` | - -## Appendix: Drop Code Mapping - -| `eEventsDiscardedReason` | Spec `drop.code` | -|---|---| -| `NonRetryableStatus` (1) | Actual HTTP status code (e.g., `"402"`, `"403"`) | -| `InvalidEvent` (2) | `"CLIENT_EXCEPTION"` | -| `SizeLimitExceeded` (3) | `"CLIENT_EXCEPTION"` | -| `KillSwitch` (4) | `"CLIENT_EXCEPTION"` | -| `QueueFull` (5) | `"CLIENT_EXCEPTION"` | -| `BeaconSendFailure` (6) | `"CLIENT_EXCEPTION"` | -| `Unknown` (0) | `"CLIENT_EXCEPTION"` | - -## Appendix: Retry Code Mapping - -| Scenario | Spec `retry.code` | -|---|---| -| HTTP 401, 403, 408, 429, 500, 502, 503, 504 | Actual status code as string (e.g., `"429"`) | -| Network exception during send | `"CLIENT_EXCEPTION"` | -| Timeout during send | `"CLIENT_TIMEOUT"` | - -## Appendix: Self-Counting Prevention - -SDK Stats metrics themselves are telemetry items that flow through the pipeline. To prevent them from being counted (creating an infinite feedback loop), the listener should check the `name` property of items in `eventsSent`/`eventsDiscarded`/`eventsRetry` and skip any items where `name` matches `"Item Success Count"`, `"Item Dropped Count"`, or `"Item Retry Count"`. - -```typescript -function _isSdkStatsMetric(item: ITelemetryItem): boolean { - var n = item.name; - return n === "Item Success Count" || n === "Item Dropped Count" || n === "Item Retry Count"; -} - -// In eventsSent handler: -function _incSuccess(items: ITelemetryItem[]) { - for (var i = 0; i < items.length; i++) { - if (!_isSdkStatsMetric(items[i])) { - var t = _getTelType(items[i]); - _successCounts[t] = (_successCounts[t] || 0) + 1; - } - } - _ensureTimer(); -} -``` From 6274032e43d13554f8ba2c73bf0548a0b5367a7b Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Wed, 18 Feb 2026 15:53:15 -0800 Subject: [PATCH 4/6] Update flush of sdk stats. --- AISKU/src/AISku.ts | 40 +++++++++++-------- .../src/core/NotificationManager.ts | 3 +- .../src/core/SdkStatsNotificationCbk.ts | 26 +++++++----- 3 files changed, 43 insertions(+), 26 deletions(-) diff --git a/AISKU/src/AISku.ts b/AISKU/src/AISku.ts index 304904504..9e3d43a0f 100644 --- a/AISKU/src/AISku.ts +++ b/AISKU/src/AISku.ts @@ -11,13 +11,14 @@ import { IAutoExceptionTelemetry, IChannelControls, IConfig, IConfigDefaults, IConfiguration, ICookieMgr, ICustomProperties, IDependencyTelemetry, IDiagnosticLogger, IDistributedTraceContext, IDynamicConfigHandler, IEventTelemetry, IExceptionTelemetry, ILoadedPlugin, IMetricTelemetry, INotificationManager, IOTelApi, IOTelSpanOptions, IPageViewPerformanceTelemetry, IPageViewTelemetry, IPlugin, - IReadableSpan, IRequestHeaders, ISdkStatsNotifCbk, ISpanScope, ITelemetryContext as Common_ITelemetryContext, ITelemetryInitializerHandler, ITelemetryItem, - ITelemetryPlugin, ITelemetryUnloadState, IThrottleInterval, IThrottleLimit, IThrottleMgrConfig, ITraceApi, ITraceProvider, - ITraceTelemetry, IUnloadHook, OTelTimeInput, PropertiesPluginIdentifier, ThrottleMgr, UnloadHandler, WatcherFunction, - _eInternalMessageId, _throwInternal, addPageHideEventListener, addPageUnloadEventListener, cfgDfMerge, cfgDfValidate, - createDynamicConfig, createOTelApi, createProcessTelemetryContext, createSdkStatsNotifCbk, createTraceProvider, createUniqueNamespace, doPerf, eLoggingSeverity, - hasDocument, hasWindow, isArray, isFeatureEnabled, isFunction, isNullOrUndefined, isReactNative, isString, mergeEvtNamespace, - onConfigChange, parseConnectionString, proxyAssign, proxyFunctions, removePageHideEventListener, removePageUnloadEventListener, useSpan + IReadableSpan, IRequestHeaders, ISdkStatsNotifCbk, ISpanScope, ITelemetryContext as Common_ITelemetryContext, + ITelemetryInitializerHandler, ITelemetryItem, ITelemetryPlugin, ITelemetryUnloadState, IThrottleInterval, IThrottleLimit, + IThrottleMgrConfig, ITraceApi, ITraceProvider, ITraceTelemetry, IUnloadHook, OTelTimeInput, PropertiesPluginIdentifier, ThrottleMgr, + UnloadHandler, WatcherFunction, _eInternalMessageId, _throwInternal, addPageHideEventListener, addPageUnloadEventListener, cfgDfMerge, + cfgDfValidate, createDynamicConfig, createOTelApi, createProcessTelemetryContext, createSdkStatsNotifCbk, createTraceProvider, + createUniqueNamespace, doPerf, eLoggingSeverity, hasDocument, hasWindow, isArray, isFeatureEnabled, isFunction, isNullOrUndefined, + isReactNative, isString, mergeEvtNamespace, onConfigChange, parseConnectionString, proxyAssign, proxyFunctions, + removePageHideEventListener, removePageUnloadEventListener, useSpan } from "@microsoft/applicationinsights-core-js"; import { AjaxPlugin as DependenciesPlugin, DependencyInitializerFunction, DependencyListenerFunction, IDependencyInitializerHandler, @@ -398,10 +399,15 @@ export class AppInsightsSku implements IApplicationInsights void; } /** @@ -94,7 +99,7 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk function _isSdkStatsMetric(item: ITelemetryItem): boolean { var n = item.name; - return n === "Item Success Count" || n === "Item Dropped Count" || n === "Item Retry Count"; + return n === MET_SUCCESS || n === MET_DROPPED || n === MET_RETRY; } function _incSuccess(items: ITelemetryItem[]) { @@ -145,8 +150,9 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk name: name, baseType: "MetricData", baseData: { - ver: 2, - metrics: [{ name: name, value: value }], + name: name, + average: value, + sampleCount: 1, properties: props } } as ITelemetryItem; @@ -174,7 +180,7 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk // Flush success counts for (telType in _successCounts) { - if (_successCounts.hasOwnProperty(telType)) { + if (objHasOwn(_successCounts, telType)) { cnt = _successCounts[telType]; if (cnt > 0) { var successProps: { [key: string]: any } = {}; @@ -186,10 +192,10 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk // Flush dropped counts for (code in _droppedCounts) { - if (_droppedCounts.hasOwnProperty(code)) { + if (objHasOwn(_droppedCounts, code)) { bucket = _droppedCounts[code]; for (telType in bucket) { - if (bucket.hasOwnProperty(telType)) { + if (objHasOwn(bucket, telType)) { cnt = bucket[telType]; if (cnt > 0) { var dropProps: { [key: string]: any } = {}; @@ -204,10 +210,10 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk // Flush retry counts for (code in _retryCounts) { - if (_retryCounts.hasOwnProperty(code)) { + if (objHasOwn(_retryCounts, code)) { bucket = _retryCounts[code]; for (telType in bucket) { - if (bucket.hasOwnProperty(telType)) { + if (objHasOwn(bucket, telType)) { cnt = bucket[telType]; if (cnt > 0) { var retryProps: { [key: string]: any } = {}; @@ -240,6 +246,8 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk unload: function () { // Flush remaining counts before unload _flush(); + // Flush the channel so the metrics just enqueued actually get sent + cfg.fnFlush && cfg.fnFlush(); if (_timer) { _timer.cancel(); _timer = null; From 789a454429c336545b06d363deab33987cd0bb3e Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Thu, 19 Feb 2026 21:51:44 -0800 Subject: [PATCH 5/6] Fix possible prototype pollution bug. --- .../src/core/SdkStatsNotificationCbk.ts | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts b/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts index 1ca4b02eb..f8f2b34d3 100644 --- a/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts +++ b/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts @@ -94,7 +94,7 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk function _getTelType(item: ITelemetryItem): string { var bt = item.baseType; - return (bt && _typeMap[bt]) || "CUSTOM_EVENT"; + return (bt && objHasOwn(_typeMap, bt) && _typeMap[bt]) || "CUSTOM_EVENT"; } function _isSdkStatsMetric(item: ITelemetryItem): boolean { @@ -113,10 +113,13 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk } function _incDropped(items: ITelemetryItem[], code: string) { - if (!_droppedCounts[code]) { - _droppedCounts[code] = {}; + var bucket: { [telType: string]: number }; + if (objHasOwn(_droppedCounts, code)) { + bucket = _droppedCounts[code]; + } else { + bucket = {}; + _droppedCounts[code] = bucket; } - var bucket = _droppedCounts[code]; for (var i = 0; i < items.length; i++) { if (!_isSdkStatsMetric(items[i])) { var t = _getTelType(items[i]); @@ -127,10 +130,13 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk } function _incRetry(items: ITelemetryItem[], code: string) { - if (!_retryCounts[code]) { - _retryCounts[code] = {}; + var bucket: { [telType: string]: number }; + if (objHasOwn(_retryCounts, code)) { + bucket = _retryCounts[code]; + } else { + bucket = {}; + _retryCounts[code] = bucket; } - var bucket = _retryCounts[code]; for (var i = 0; i < items.length; i++) { if (!_isSdkStatsMetric(items[i])) { var t = _getTelType(items[i]); From 3d3323887d3c7e70b8c04868a8af2cbc4a5e9a3b Mon Sep 17 00:00:00 2001 From: Jackson Weber Date: Fri, 20 Feb 2026 13:52:24 -0800 Subject: [PATCH 6/6] Update to resolve prototype pollution issue. --- .../src/core/SdkStatsNotificationCbk.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts b/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts index f8f2b34d3..74da84fad 100644 --- a/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts +++ b/shared/AppInsightsCore/src/core/SdkStatsNotificationCbk.ts @@ -2,7 +2,7 @@ // Licensed under the MIT License. "use strict"; -import { ITimerHandler, objHasOwn, scheduleTimeout } from "@nevware21/ts-utils"; +import { ITimerHandler, objCreate, objHasOwn, scheduleTimeout } from "@nevware21/ts-utils"; import { INotificationListener } from "../interfaces/ai/INotificationListener"; import { ITelemetryItem } from "../interfaces/ai/ITelemetryItem"; @@ -80,9 +80,9 @@ export interface ISdkStatsNotifCbk extends INotificationListener { */ /*#__NO_SIDE_EFFECTS__*/ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk { - var _successCounts: { [telType: string]: number } = {}; - var _droppedCounts: { [code: string]: { [telType: string]: number } } = {}; - var _retryCounts: { [code: string]: { [telType: string]: number } } = {}; + var _successCounts: { [telType: string]: number } = objCreate(null); + var _droppedCounts: { [code: string]: { [telType: string]: number } } = objCreate(null); + var _retryCounts: { [code: string]: { [telType: string]: number } } = objCreate(null); var _timer: ITimerHandler; var _interval = cfg.int || FLUSH_INTERVAL; @@ -117,7 +117,7 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk if (objHasOwn(_droppedCounts, code)) { bucket = _droppedCounts[code]; } else { - bucket = {}; + bucket = objCreate(null); _droppedCounts[code] = bucket; } for (var i = 0; i < items.length; i++) { @@ -134,7 +134,7 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk if (objHasOwn(_retryCounts, code)) { bucket = _retryCounts[code]; } else { - bucket = {}; + bucket = objCreate(null); _retryCounts[code] = bucket; } for (var i = 0; i < items.length; i++) { @@ -233,9 +233,9 @@ export function createSdkStatsNotifCbk(cfg: ISdkStatsConfig): ISdkStatsNotifCbk } // Reset accumulators - _successCounts = {}; - _droppedCounts = {}; - _retryCounts = {}; + _successCounts = objCreate(null); + _droppedCounts = objCreate(null); + _retryCounts = objCreate(null); } return {