diff --git a/src/metric_family.cc b/src/metric_family.cc index 13fa23ca3..1b10c5d62 100644 --- a/src/metric_family.cc +++ b/src/metric_family.cc @@ -39,6 +39,7 @@ namespace triton { namespace core { // MetricFamily::MetricFamily( TRITONSERVER_MetricKind kind, const char* name, const char* description) + : kind_(kind), storage_(Metrics::GetMetricsStorage()) { auto registry = Metrics::GetRegistry(); @@ -65,8 +66,6 @@ MetricFamily::MetricFamily( throw std::invalid_argument( "Unsupported kind passed to MetricFamily constructor."); } - - kind_ = kind; } void* @@ -74,53 +73,11 @@ MetricFamily::Add( std::map label_map, Metric* metric, const TritonServerMetricArgs* args) { - void* prom_metric = nullptr; - switch (kind_) { - case TRITONSERVER_METRIC_KIND_COUNTER: { - if (args != nullptr) { - throw std::invalid_argument( - "Unexpected args found in counter Metric constructor."); - } - auto counter_family_ptr = - reinterpret_cast*>(family_); - auto counter_ptr = &counter_family_ptr->Add(label_map); - prom_metric = reinterpret_cast(counter_ptr); - break; - } - case TRITONSERVER_METRIC_KIND_GAUGE: { - if (args != nullptr) { - throw std::invalid_argument( - "Unexpected args found in gauge Metric constructor."); - } - auto gauge_family_ptr = - reinterpret_cast*>(family_); - auto gauge_ptr = &gauge_family_ptr->Add(label_map); - prom_metric = reinterpret_cast(gauge_ptr); - break; - } - case TRITONSERVER_METRIC_KIND_HISTOGRAM: { - if (args == nullptr) { - throw std::invalid_argument( - "Bucket boundaries not found in Metric args."); - } - if (args->kind() != TRITONSERVER_METRIC_KIND_HISTOGRAM) { - throw std::invalid_argument("Metric args not set to histogram kind."); - } - auto histogram_family_ptr = - reinterpret_cast*>(family_); - auto histogram_ptr = - &histogram_family_ptr->Add(label_map, args->buckets()); - prom_metric = reinterpret_cast(histogram_ptr); - break; - } - default: - throw std::invalid_argument( - "Unsupported family kind passed to Metric constructor."); - } + void* prom_metric = storage_->Add(kind_, family_, std::move(label_map), args); std::lock_guard lk(metric_mtx_); - ++prom_metric_ref_cnt_[prom_metric]; child_metrics_.insert(metric); + return prom_metric; } @@ -137,48 +94,7 @@ MetricFamily::Remove(void* prom_metric, Metric* metric) return; } - { - std::lock_guard lk(metric_mtx_); - const auto it = prom_metric_ref_cnt_.find(prom_metric); - if (it != prom_metric_ref_cnt_.end()) { - --it->second; - if (it->second == 0) { - prom_metric_ref_cnt_.erase(it); - } else { - // Done as it is not the last reference - return; - } - } - } - - switch (kind_) { - case TRITONSERVER_METRIC_KIND_COUNTER: { - auto counter_family_ptr = - reinterpret_cast*>(family_); - auto counter_ptr = reinterpret_cast(prom_metric); - counter_family_ptr->Remove(counter_ptr); - break; - } - case TRITONSERVER_METRIC_KIND_GAUGE: { - auto gauge_family_ptr = - reinterpret_cast*>(family_); - auto gauge_ptr = reinterpret_cast(prom_metric); - gauge_family_ptr->Remove(gauge_ptr); - break; - } - case TRITONSERVER_METRIC_KIND_HISTOGRAM: { - auto histogram_family_ptr = - reinterpret_cast*>(family_); - auto histogram_ptr = - reinterpret_cast(prom_metric); - histogram_family_ptr->Remove(histogram_ptr); - break; - } - default: - // Invalid kind should be caught in constructor - LOG_ERROR << "Unsupported kind in Metric destructor."; - break; - } + storage_->Remove(kind_, family_, prom_metric); } void diff --git a/src/metric_family.h b/src/metric_family.h index 2ea13eeda..aedc86176 100644 --- a/src/metric_family.h +++ b/src/metric_family.h @@ -34,34 +34,12 @@ #include #include "infer_parameter.h" +#include "metrics.h" #include "prometheus/registry.h" #include "tritonserver_apis.h" namespace triton { namespace core { -// -// TritonServerMetricArgs -// -// Implementation for TRITONSERVER_MetricArgs. -// -class TritonServerMetricArgs { - public: - TritonServerMetricArgs() = default; - - void* SetHistogramArgs(const double* buckets, uint64_t bucket_count) - { - kind_ = TRITONSERVER_METRIC_KIND_HISTOGRAM; - buckets_ = std::vector(buckets, buckets + bucket_count); - return nullptr; - } - TRITONSERVER_MetricKind kind() const { return kind_; } - const std::vector& buckets() const { return buckets_; } - - private: - TRITONSERVER_MetricKind kind_; - std::vector buckets_; -}; - // // Implementation for TRITONSERVER_MetricFamily. // @@ -93,14 +71,9 @@ class MetricFamily { void* family_; TRITONSERVER_MetricKind kind_; + std::shared_ptr storage_; // Synchronize access of related metric objects std::mutex metric_mtx_; - // Prometheus returns the existing metric pointer if the metric with the same - // set of labels are requested, as a result, different Metric objects may - // refer to the same prometheus metric. So we must track the reference count - // of the metric and request prometheus to remove it only when all references - // are released. - std::unordered_map prom_metric_ref_cnt_; // Maintain references to metrics created from this metric family to // invalidate their references if a family is deleted before its metric std::set child_metrics_; diff --git a/src/metrics.cc b/src/metrics.cc index 21894bb6e..a29fe21cd 100644 --- a/src/metrics.cc +++ b/src/metrics.cc @@ -46,9 +46,114 @@ namespace triton { namespace core { +void* +MetricsStorage::Add( + TRITONSERVER_MetricKind kind, void* family, + std::map label_map, + const TritonServerMetricArgs* args) +{ + void* prom_metric = nullptr; + switch (kind) { + case TRITONSERVER_METRIC_KIND_COUNTER: { + if (args != nullptr) { + throw std::invalid_argument( + "Unexpected args found in counter Metric constructor."); + } + auto counter_family_ptr = + reinterpret_cast*>(family); + auto counter_ptr = &counter_family_ptr->Add(label_map); + prom_metric = reinterpret_cast(counter_ptr); + break; + } + case TRITONSERVER_METRIC_KIND_GAUGE: { + if (args != nullptr) { + throw std::invalid_argument( + "Unexpected args found in gauge Metric constructor."); + } + auto gauge_family_ptr = + reinterpret_cast*>(family); + auto gauge_ptr = &gauge_family_ptr->Add(label_map); + prom_metric = reinterpret_cast(gauge_ptr); + break; + } + case TRITONSERVER_METRIC_KIND_HISTOGRAM: { + if (args == nullptr) { + throw std::invalid_argument( + "Bucket boundaries not found in Metric args."); + } + if (args->kind() != TRITONSERVER_METRIC_KIND_HISTOGRAM) { + throw std::invalid_argument("Metric args not set to histogram kind."); + } + auto histogram_family_ptr = + reinterpret_cast*>(family); + auto histogram_ptr = + &histogram_family_ptr->Add(label_map, args->buckets()); + prom_metric = reinterpret_cast(histogram_ptr); + break; + } + default: + throw std::invalid_argument( + "Unsupported family kind passed to Metric constructor."); + } + + std::lock_guard lk(metric_mtx_); + ++prom_metric_ref_cnt_[prom_metric]; + + return prom_metric; +} + +void +MetricsStorage::Remove( + TRITONSERVER_MetricKind kind, void* family, void* prom_metric) +{ + { + std::lock_guard lk(metric_mtx_); + const auto it = prom_metric_ref_cnt_.find(prom_metric); + if (it != prom_metric_ref_cnt_.end()) { + --it->second; + if (it->second == 0) { + prom_metric_ref_cnt_.erase(it); + } else { + // Done as it is not the last reference + return; + } + } + } + + switch (kind) { + case TRITONSERVER_METRIC_KIND_COUNTER: { + auto counter_family_ptr = + reinterpret_cast*>(family); + auto counter_ptr = reinterpret_cast(prom_metric); + counter_family_ptr->Remove(counter_ptr); + break; + } + case TRITONSERVER_METRIC_KIND_GAUGE: { + auto gauge_family_ptr = + reinterpret_cast*>(family); + auto gauge_ptr = reinterpret_cast(prom_metric); + gauge_family_ptr->Remove(gauge_ptr); + break; + } + case TRITONSERVER_METRIC_KIND_HISTOGRAM: { + auto histogram_family_ptr = + reinterpret_cast*>(family); + auto histogram_ptr = + reinterpret_cast(prom_metric); + histogram_family_ptr->Remove(histogram_ptr); + break; + } + default: + // Invalid kind should be caught in constructor + LOG_ERROR << "Unsupported kind in Metric destructor."; + break; + } +} + Metrics::Metrics() : registry_(std::make_shared()), serializer_(new prometheus::TextSerializer()), + metrics_storage_(std::make_shared()), inf_success_family_( prometheus::BuildCounter() .Name("nv_inference_request_success") @@ -1040,6 +1145,13 @@ Metrics::GetRegistry() return singleton->registry_; } +std::shared_ptr +Metrics::GetMetricsStorage() +{ + auto singleton = Metrics::GetSingleton(); + return singleton->metrics_storage_; +} + const std::string Metrics::SerializedMetrics() { diff --git a/src/metrics.h b/src/metrics.h index ac04ebebc..f130a4770 100644 --- a/src/metrics.h +++ b/src/metrics.h @@ -107,6 +107,49 @@ struct DcgmMetadata { }; #endif // TRITON_ENABLE_METRICS_GPU +// +// TritonServerMetricArgs +// +// Implementation for TRITONSERVER_MetricArgs. +// +class TritonServerMetricArgs { + public: + TritonServerMetricArgs() = default; + + void* SetHistogramArgs(const double* buckets, uint64_t bucket_count) + { + kind_ = TRITONSERVER_METRIC_KIND_HISTOGRAM; + buckets_ = std::vector(buckets, buckets + bucket_count); + return nullptr; + } + TRITONSERVER_MetricKind kind() const { return kind_; } + const std::vector& buckets() const { return buckets_; } + + private: + TRITONSERVER_MetricKind kind_; + std::vector buckets_; +}; + +class MetricsStorage { + public: + void* Add( + TRITONSERVER_MetricKind kind, void* family, + std::map label_map, + const TritonServerMetricArgs* args); + + void Remove(TRITONSERVER_MetricKind kind, void* family, void* prom_metric); + + private: + // Synchronize access of related metric objects + std::mutex metric_mtx_; + // Prometheus returns the existing metric pointer if the metric with the same + // set of labels are requested, as a result, different Metric objects may + // refer to the same prometheus metric. So we must track the reference count + // of the metric and request prometheus to remove it only when all references + // are released. + std::unordered_map prom_metric_ref_cnt_; +}; + class Metrics { public: // Return the hash value of the labels @@ -142,6 +185,9 @@ class Metrics { // Get the prometheus registry static std::shared_ptr GetRegistry(); + // Get the storage that holds prometheus metrics with reference count + static std::shared_ptr GetMetricsStorage(); + // Get serialized metrics static const std::string SerializedMetrics(); @@ -297,6 +343,7 @@ class Metrics { std::shared_ptr registry_; std::unique_ptr serializer_; + std::shared_ptr metrics_storage_; // DLIS-4761: Refactor into groups of families prometheus::Family& inf_success_family_;