Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions src/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,14 @@ constexpr char kInitialStateFolder[] = "initial_state";
constexpr char kPendingRequestMetric[] = "inf_pending_request_count";
constexpr char kModelLoadTimeMetric[] = "model_load_time";
constexpr char kFirstResponseHistogram[] = "first_response_histogram";
constexpr char kRequestDurationHistogram[] = "request_duration_histogram";
constexpr char kQueueDurationHistogram[] = "queue_duration_histogram";
constexpr char kComputeInputDurationHistogram[] =
"compute_input_duration_histogram";
constexpr char kComputeInferDurationHistogram[] =
"compute_infer_duration_histogram";
constexpr char kComputeOutputDurationHistogram[] =
"compute_output_duration_histogram";

constexpr uint64_t NANOS_PER_SECOND = 1000000000;
constexpr uint64_t NANOS_PER_MILLIS = 1000000;
Expand Down
14 changes: 14 additions & 0 deletions src/infer_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,20 @@ InferenceStatsAggregator::UpdateSuccessWithDuration(
"compute_infer_duration", compute_infer_duration_ns / 1000);
metric_reporter->ObserveSummary(
"compute_output_duration", compute_output_duration_ns / 1000);
// Histogram Latencies
// FIXME [DLIS-4762]: request histogram is disabled when cache is enabled.
if (!reporter_config.cache_enabled_) {
metric_reporter->ObserveHistogram(
kRequestDurationHistogram, request_duration_ns / 1000);
}
metric_reporter->ObserveHistogram(
kQueueDurationHistogram, queue_duration_ns / 1000);
metric_reporter->ObserveHistogram(
kComputeInputDurationHistogram, compute_input_duration_ns / 1000);
metric_reporter->ObserveHistogram(
kComputeInferDurationHistogram, compute_infer_duration_ns / 1000);
metric_reporter->ObserveHistogram(
kComputeOutputDurationHistogram, compute_output_duration_ns / 1000);
}
#endif // TRITON_ENABLE_METRICS
}
Expand Down
10 changes: 10 additions & 0 deletions src/metric_model_reporter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,16 @@ MetricModelReporter::InitializeHistograms(
if (config_.latency_histograms_enabled_) {
histogram_families_[kFirstResponseHistogram] =
&Metrics::FamilyFirstResponseDuration();
histogram_families_[kRequestDurationHistogram] =
&Metrics::FamilyRequestDurationHistogram();
histogram_families_[kQueueDurationHistogram] =
&Metrics::FamilyQueueDurationHistogram();
histogram_families_[kComputeInputDurationHistogram] =
&Metrics::FamilyComputeInputDurationHistogram();
histogram_families_[kComputeInferDurationHistogram] =
&Metrics::FamilyComputeInferDurationHistogram();
histogram_families_[kComputeOutputDurationHistogram] =
&Metrics::FamilyComputeOutputDurationHistogram();
}

for (auto& iter : histogram_families_) {
Expand Down
23 changes: 19 additions & 4 deletions src/metric_model_reporter.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,17 @@ struct MetricReporterConfig {
// Create and use Summaries for per-model latency related metrics
bool latency_summaries_enabled_ = false;
// Default bucket boundaries used for each histogram metric. Each value
// represents a boundary. For example, {100, 500, 2000, 5000} are latencies.
// in milliseconds in first_response_histogram.
// represents a boundary. For example, {100, 500, 2000, 5000} are latencies
// in milliseconds for first_response_histogram. Other duration histograms
// use microseconds as their unit.
std::unordered_map<std::string, prometheus::Histogram::BucketBoundaries>
histogram_options_ = {{kFirstResponseHistogram, {100, 500, 2000, 5000}}};
histogram_options_ = {
{kFirstResponseHistogram, {100, 500, 2000, 5000}},
{kRequestDurationHistogram, {1000, 5000, 25000, 50000, 100000}},
{kQueueDurationHistogram, {100, 1000, 5000, 10000, 50000}},
{kComputeInputDurationHistogram, {100, 500, 1000, 5000, 10000}},
{kComputeInferDurationHistogram, {1000, 5000, 25000, 50000, 100000}},
{kComputeOutputDurationHistogram, {100, 500, 1000, 5000, 10000}}};

// Quantiles used for any summary metrics. Each pair of values represents
// { quantile, error }. For example, {0.90, 0.01} means to compute the
Expand All @@ -82,7 +89,15 @@ struct MetricReporterConfig {
// "ModelMetrics" with the full name displayed from metrics reporting while a
// different name is used internally. All new histograms must update the map.
const std::unordered_map<std::string, std::string> metric_map_ = {
{"nv_inference_first_response_histogram_ms", kFirstResponseHistogram}};
{"nv_inference_first_response_histogram_ms", kFirstResponseHistogram},
{"nv_inference_request_duration_histogram_us", kRequestDurationHistogram},
{"nv_inference_queue_duration_histogram_us", kQueueDurationHistogram},
{"nv_inference_compute_input_duration_histogram_us",
kComputeInputDurationHistogram},
{"nv_inference_compute_infer_duration_histogram_us",
kComputeInferDurationHistogram},
{"nv_inference_compute_output_duration_histogram_us",
kComputeOutputDurationHistogram}};
#endif // TRITON_ENABLE_METRICS
};

Expand Down
26 changes: 26 additions & 0 deletions src/metrics.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,32 @@ Metrics::Metrics()
.Name("nv_inference_first_response_histogram_ms")
.Help("Duration from request to first response in milliseconds")
.Register(*registry_)),
inf_request_duration_histogram_us_family_(
prometheus::BuildHistogram()
.Name("nv_inference_request_duration_histogram_us")
.Help("Histogram of end-to-end inference request duration, "
"in microseconds")
.Register(*registry_)),
inf_queue_duration_histogram_us_family_(
prometheus::BuildHistogram()
.Name("nv_inference_queue_duration_histogram_us")
.Help("Histogram of inference queuing duration, in microseconds")
.Register(*registry_)),
inf_compute_input_duration_histogram_us_family_(
prometheus::BuildHistogram()
.Name("nv_inference_compute_input_duration_histogram_us")
.Help("Histogram of compute input duration, in microseconds")
.Register(*registry_)),
inf_compute_infer_duration_histogram_us_family_(
prometheus::BuildHistogram()
.Name("nv_inference_compute_infer_duration_histogram_us")
.Help("Histogram of compute inference duration, in microseconds")
.Register(*registry_)),
inf_compute_output_duration_histogram_us_family_(
prometheus::BuildHistogram()
.Name("nv_inference_compute_output_duration_histogram_us")
.Help("Histogram of compute output duration, in microseconds")
.Register(*registry_)),

// Summaries
inf_request_summary_us_family_(
Expand Down
36 changes: 36 additions & 0 deletions src/metrics.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,32 @@ class Metrics {
return GetSingleton()->inf_first_response_histogram_ms_family_;
}

static prometheus::Family<prometheus::Histogram>&
FamilyRequestDurationHistogram()
{
return GetSingleton()->inf_request_duration_histogram_us_family_;
}
static prometheus::Family<prometheus::Histogram>&
FamilyQueueDurationHistogram()
{
return GetSingleton()->inf_queue_duration_histogram_us_family_;
}
static prometheus::Family<prometheus::Histogram>&
FamilyComputeInputDurationHistogram()
{
return GetSingleton()->inf_compute_input_duration_histogram_us_family_;
}
static prometheus::Family<prometheus::Histogram>&
FamilyComputeInferDurationHistogram()
{
return GetSingleton()->inf_compute_infer_duration_histogram_us_family_;
}
static prometheus::Family<prometheus::Histogram>&
FamilyComputeOutputDurationHistogram()
{
return GetSingleton()->inf_compute_output_duration_histogram_us_family_;
}

// Metric family of load time per model
static prometheus::Family<prometheus::Gauge>& FamilyModelLoadTime()
{
Expand Down Expand Up @@ -331,6 +357,16 @@ class Metrics {
// Histograms
prometheus::Family<prometheus::Histogram>&
inf_first_response_histogram_ms_family_;
prometheus::Family<prometheus::Histogram>&
inf_request_duration_histogram_us_family_;
prometheus::Family<prometheus::Histogram>&
inf_queue_duration_histogram_us_family_;
prometheus::Family<prometheus::Histogram>&
inf_compute_input_duration_histogram_us_family_;
prometheus::Family<prometheus::Histogram>&
inf_compute_infer_duration_histogram_us_family_;
prometheus::Family<prometheus::Histogram>&
inf_compute_output_duration_histogram_us_family_;

// Summaries
prometheus::Family<prometheus::Summary>& inf_request_summary_us_family_;
Expand Down
Loading