diff --git a/.rubocop.yml b/.rubocop.yml index b33c766d..e6e525d4 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -3,7 +3,7 @@ plugins: - rubocop-performance AllCops: - TargetRubyVersion: '3.0' + TargetRubyVersion: 3.0 NewCops: enable Exclude: - 'sample_start.rb' diff --git a/lib/solarwinds_apm/api/transaction_name.rb b/lib/solarwinds_apm/api/transaction_name.rb index 77061e7d..8860a231 100644 --- a/lib/solarwinds_apm/api/transaction_name.rb +++ b/lib/solarwinds_apm/api/transaction_name.rb @@ -21,15 +21,16 @@ module TransactionName # # === Example: # - # class DogfoodsController < ApplicationController + # class OrdersController < ApplicationController # # def create - # @dogfood = Dogfood.new(params.permit(:brand, :name)) - # @dogfood.save + # @order = Order.new(params.permit(:item, :quantity)) + # if @order.save + # custom_name = "orderscontroller.create_for_#{params[:item]}" + # SolarWindsAPM::API.set_transaction_name(custom_name) + # end # - # SolarWindsAPM::API.set_transaction_name("dogfoodscontroller.create_for_#{params[:brand]}") - # - # redirect_to @dogfood + # redirect_to @order # end # # end diff --git a/lib/solarwinds_apm/config.rb b/lib/solarwinds_apm/config.rb index a357497b..0597a5b0 100644 --- a/lib/solarwinds_apm/config.rb +++ b/lib/solarwinds_apm/config.rb @@ -87,21 +87,32 @@ def self.set_log_level SolarWindsAPM.logger.level = SW_LOG_LEVEL_MAPPING.dig(log_level, :stdlib) || ::Logger::INFO # default log level info end + # e.g. enable_disable_config('STRING', :key, value, false, bool: true) def self.enable_disable_config(env_var, key, value, default, bool: false) - env_value = ENV[env_var.to_s]&.downcase + raw_env_value = ENV.fetch(env_var, '') + env_value = raw_env_value.downcase valid_env_values = bool ? %w[true false] : %w[enabled disabled] - if env_var && valid_env_values.include?(env_value) + if !env_var.empty? && valid_env_values.include?(env_value) value = bool ? true?(env_value) : env_value.to_sym - elsif env_var && !env_value.to_s.empty? - SolarWindsAPM.logger.warn("[#{name}/#{__method__}] #{env_var} must be #{valid_env_values.join('/')} (current setting is #{ENV.fetch(env_var, nil)}). Using default value: #{default}.") - return @@config[key.to_sym] = default + elsif !env_var.empty? && !raw_env_value.empty? + SolarWindsAPM.logger.warn do + "[#{name}/#{__method__}] #{env_var} must be #{valid_env_values.join('/')} (current setting is #{raw_env_value}). Using default value: #{default}." + end + return @@config[key] = default end - return @@config[key.to_sym] = value unless (bool && !boolean?(value)) || (!bool && !symbol?(value)) + # Validate final value efficiently + valid = bool ? boolean?(value) : symbol?(value) + + unless valid + SolarWindsAPM.logger.warn do + "[#{name}/#{__method__}] :#{key} must be #{valid_env_values.join('/')}. Using default value: #{default}." + end + return @@config[key] = default + end - SolarWindsAPM.logger.warn("[#{name}/#{__method__}] :#{key} must be a #{valid_env_values.join('/')}. Using default value: #{default}.") - @@config[key.to_sym] = default + @@config[key] = value end def self.true?(obj) @@ -211,7 +222,7 @@ def self.[]=(key, value) enable_disable_config('SW_APM_TRIGGER_TRACING_MODE', key, value, :enabled) when :tracing_mode - enable_disable_config(nil, key, value, :enabled) + enable_disable_config('', key, value, :enabled) when :tag_sql enable_disable_config('SW_APM_TAG_SQL', key, value, false, bool: true) @@ -242,9 +253,17 @@ def self.compile_settings(settings) return end - # `tracing: disabled` is the default - disabled = settings.select { |v| !v.key?(:tracing) || v[:tracing] == :disabled } - enabled = settings.select { |v| v[:tracing] == :enabled } + # `tracing: disabled` is the default; below only separate the enabled and disabled settings + result = settings.each_with_object({ enabled: [], disabled: [] }) do |setting, acc| + if setting[:tracing] == :enabled + acc[:enabled] << setting + elsif !setting.key?(:tracing) || setting[:tracing] == :disabled + acc[:disabled] << setting + end + end + + enabled = result[:enabled] + disabled = result[:disabled] SolarWindsAPM::Config[:enabled_regexps] = compile_regexp(enabled) SolarWindsAPM::Config[:disabled_regexps] = compile_regexp(disabled) diff --git a/lib/solarwinds_apm/opentelemetry/otlp_processor.rb b/lib/solarwinds_apm/opentelemetry/otlp_processor.rb index f776e340..c447925f 100644 --- a/lib/solarwinds_apm/opentelemetry/otlp_processor.rb +++ b/lib/solarwinds_apm/opentelemetry/otlp_processor.rb @@ -25,8 +25,8 @@ class OTLPProcessor def initialize(txn_manager) @txn_manager = txn_manager - @meters = { 'sw.apm.request.metrics' => ::OpenTelemetry.meter_provider.meter('sw.apm.request.metrics') } @metrics = init_response_time_metrics + @is_lambda = SolarWindsAPM::Utils.determine_lambda @transaction_name = nil end @@ -40,21 +40,24 @@ def on_start(span, parent_context) trace_flags = span.context.trace_flags.sampled? ? '01' : '00' @txn_manager&.set_root_context_h(span.context.hex_trace_id, "#{span.context.hex_span_id}-#{trace_flags}") span.add_attributes({ SW_IS_ENTRY_SPAN => true }) + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_start end" } rescue StandardError => e SolarWindsAPM.logger.info { "[#{self.class}/#{__method__}] processor on_start error: #{e.message}" } end def on_finishing(span) + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_finishing span attributes: #{span.attributes}" } return if non_entry_span(span: span) @transaction_name = calculate_transaction_names(span) span.set_attribute(SW_TRANSACTION_NAME, @transaction_name) @txn_manager.delete_root_context_h(span.context.hex_trace_id) + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_finishing end" } end # @param [Span] span the (immutable) {Span} that just ended. def on_finish(span) - SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_finish span: #{span.to_span_data.inspect}" } + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_finish span attributes: #{span.attributes}" } return if non_entry_span(span: span) record_request_metrics(span) @@ -63,10 +66,9 @@ def on_finish(span) ::OpenTelemetry.meter_provider.metric_readers.each do |reader| reader.pull if reader.respond_to? :pull end - - SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_finish succeed" } + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] processor on_finish end" } rescue StandardError => e - SolarWindsAPM.logger.info { "[#{self.class}/#{__method__}] error processing span on_finish: #{e.message}" } + SolarWindsAPM.logger.info { "[#{self.class}/#{__method__}] processor on_finish error: #{e.message}" } end # @param [optional Numeric] timeout An optional timeout in seconds. @@ -94,7 +96,9 @@ def init_response_time_metrics unit: 'ms') end - instrument = @meters['sw.apm.request.metrics'].create_histogram('trace.service.response_time', unit: 'ms', description: 'Duration of each entry span for the service, typically meaning the time taken to process an inbound request.') + meter = ::OpenTelemetry.meter_provider.meter('sw.apm.request.metrics') + instrument = meter.create_histogram('trace.service.response_time', unit: 'ms', description: 'Duration of each entry span for the service, typically meaning the time taken to process an inbound request.') + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Adding ExponentialBucketHistogram for response time metrics: #{instrument.inspect}" } { response_time: instrument } end @@ -104,37 +108,42 @@ def meter_attributes(span) SW_TRANSACTION_NAME => @transaction_name } - if span_http?(span) + is_http_span = span_http?(span) + + if is_http_span http_status_code = get_http_status_code(span) meter_attrs['http.status_code'] = http_status_code if http_status_code != 0 meter_attrs['http.method'] = span.attributes[HTTP_METHOD] if span.attributes[HTTP_METHOD] end - SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] meter_attrs: #{meter_attrs.inspect}" } + + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] is_http_span: #{is_http_span}; meter_attrs: #{meter_attrs.inspect}" } meter_attrs.compact! meter_attrs end def calculate_lambda_transaction_name(span_name) - (ENV['SW_APM_TRANSACTION_NAME'] || ENV['AWS_LAMBDA_FUNCTION_NAME'] || span_name || 'unknown').slice(0, SolarWindsAPM::Constants::MAX_TXN_NAME_LENGTH) + txn_name = (ENV['SW_APM_TRANSACTION_NAME'] || ENV['AWS_LAMBDA_FUNCTION_NAME'] || span_name || 'unknown').slice(0, SolarWindsAPM::Constants::MAX_TXN_NAME_LENGTH) + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Lambda transaction name: #{txn_name} (from env_txn=#{ENV.fetch('SW_APM_TRANSACTION_NAME', nil)}, lambda_func=#{ENV.fetch('AWS_LAMBDA_FUNCTION_NAME', nil)}, span_name=#{span_name})" } + txn_name end # Get trans_name and url_tran of this span instance. # Predecessor order: custom SDK > env var SW_APM_TRANSACTION_NAME > automatic naming def calculate_transaction_names(span) - return calculate_lambda_transaction_name(span.name) if SolarWindsAPM::Utils.determine_lambda + return calculate_lambda_transaction_name(span.name) if @is_lambda trace_span_id = "#{span.context.hex_trace_id}-#{span.context.hex_span_id}" trans_name = @txn_manager.get(trace_span_id) if trans_name - SolarWindsAPM.logger.debug do - "[#{self.class}/#{__method__}] found trans name from txn_manager: #{trans_name} by #{trace_span_id}" - end + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Using transaction name from txn_manager: #{trans_name} (#{trace_span_id})" } @txn_manager.del(trace_span_id) elsif !ENV['SW_APM_TRANSACTION_NAME'].to_s.empty? trans_name = ENV.fetch('SW_APM_TRANSACTION_NAME', nil) + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Using transaction name from env var: #{trans_name}" } else - trans_name = span.attributes[HTTP_ROUTE] || nil - trans_name = span.name if trans_name.to_s.empty? && span.name + trans_name = span.attributes[HTTP_ROUTE] + trans_name = span.name if trans_name.to_s.empty? + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Using transaction name from span.attributes: #{span.attributes[HTTP_ROUTE]} or span.name: #{span.name}" } end trans_name.to_s.slice(0, SolarWindsAPM::Constants::MAX_TXN_NAME_LENGTH) end diff --git a/lib/solarwinds_apm/opentelemetry/solarwinds_propagator.rb b/lib/solarwinds_apm/opentelemetry/solarwinds_propagator.rb index 26a66f94..4dd4ab42 100644 --- a/lib/solarwinds_apm/opentelemetry/solarwinds_propagator.rb +++ b/lib/solarwinds_apm/opentelemetry/solarwinds_propagator.rb @@ -40,6 +40,9 @@ def extract(carrier, context: ::OpenTelemetry::Context.current, context = ::OpenTelemetry::Context.new({}) if context.nil? context = inject_extracted_header(carrier, context, getter, XTRACEOPTIONS_HEADER_NAME, INTL_SWO_X_OPTIONS_KEY) inject_extracted_header(carrier, context, getter, XTRACEOPTIONS_SIGNATURE_HEADER_NAME, INTL_SWO_SIGNATURE_KEY) + rescue StandardError => e + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Extraction failed: #{e.message}" } + context || ::OpenTelemetry::Context.current end # Inject trace context into the supplied carrier. @@ -80,10 +83,13 @@ def inject(carrier, context: ::OpenTelemetry::Context.current, end setter.set(carrier, TRACESTATE_HEADER_NAME, Utils.trace_state_header(trace_state)) end + rescue StandardError => e + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Injection failed: #{e.message}" } end - # Returns the predefined propagation fields. If your carrier is reused, you - # should delete the fields returned by this method before calling +inject+. + # Returns the predefined propagation fields, required by upstream. + # If your carrier is reused, you should delete the fields returned by + # this method before calling +inject+. # # @return [Array] a list of fields that will be used by this propagator. def fields diff --git a/lib/solarwinds_apm/opentelemetry/solarwinds_response_propagator.rb b/lib/solarwinds_apm/opentelemetry/solarwinds_response_propagator.rb index d618c1f8..e9ab90a5 100644 --- a/lib/solarwinds_apm/opentelemetry/solarwinds_response_propagator.rb +++ b/lib/solarwinds_apm/opentelemetry/solarwinds_response_propagator.rb @@ -56,31 +56,20 @@ def inject(carrier, context: ::OpenTelemetry::Context.current, xtraceoptions_response) end setter.set(carrier, HTTP_HEADER_ACCESS_CONTROL_EXPOSE_HEADERS, exposed_headers.join(',')) - end - - # Returns the predefined propagation fields. If your carrier is reused, you - # should delete the fields returned by this method before calling +inject+. - # - # @return [Array] a list of fields that will be used by this propagator. - def fields - TRACESTATE_HEADER_NAME + rescue StandardError => e + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Injection failed: #{e.message}" } end private - - # sw_xtraceoptions_response_key -> xtrace_options_response + # SW_XTRACEOPTIONS_RESPONSE_KEY -> xtrace_options_response def recover_response_from_tracestate(span_context) sanitized = span_context.tracestate.value(SW_XTRACEOPTIONS_RESPONSE_KEY) sanitized = '' if sanitized.nil? sanitized = sanitized.gsub(SolarWindsAPM::Constants::INTL_SWO_EQUALS_W3C_SANITIZED, SolarWindsAPM::Constants::INTL_SWO_EQUALS) sanitized = sanitized.gsub(':', SolarWindsAPM::Constants::INTL_SWO_EQUALS) - sanitized = sanitized.gsub(SolarWindsAPM::Constants::INTL_SWO_COMMA_W3C_SANITIZED, - SolarWindsAPM::Constants::INTL_SWO_COMMA) - SolarWindsAPM.logger.debug do - "[#{self.class}/#{__method__}] recover_response_from_tracestate sanitized: #{sanitized.inspect}" - end - sanitized + sanitized.gsub(SolarWindsAPM::Constants::INTL_SWO_COMMA_W3C_SANITIZED, + SolarWindsAPM::Constants::INTL_SWO_COMMA) end end end diff --git a/lib/solarwinds_apm/sampling.rb b/lib/solarwinds_apm/sampling.rb index c18f0ce7..a2acc653 100644 --- a/lib/solarwinds_apm/sampling.rb +++ b/lib/solarwinds_apm/sampling.rb @@ -10,7 +10,6 @@ require 'fileutils' require 'tempfile' require 'uri' -require 'opentelemetry-sdk' require_relative 'sampling/sampling_constants' require_relative 'sampling/dice' diff --git a/lib/solarwinds_apm/sampling/dice.rb b/lib/solarwinds_apm/sampling/dice.rb index 8ccf0a35..79dde5bf 100644 --- a/lib/solarwinds_apm/sampling/dice.rb +++ b/lib/solarwinds_apm/sampling/dice.rb @@ -12,7 +12,7 @@ class Dice attr_reader :rate, :scale def initialize(settings) - @scale = settings[:scale] + @scale = settings[:scale] || 1_000_000 @rate = settings[:rate] || 0 @random_generator = Random.new end diff --git a/lib/solarwinds_apm/sampling/http_sampler.rb b/lib/solarwinds_apm/sampling/http_sampler.rb index 5dcace5a..fe0488d6 100644 --- a/lib/solarwinds_apm/sampling/http_sampler.rb +++ b/lib/solarwinds_apm/sampling/http_sampler.rb @@ -25,6 +25,7 @@ def initialize(config) @pid = nil @thread = nil + @logger.debug { "[#{self.class}/#{__method__}] HttpSampler initialized: url=#{@url}, service=#{@service}, hostname=#{@hostname}, setting_url=#{@setting_url}" } reset_on_fork end @@ -57,7 +58,6 @@ def hostname def fetch_with_timeout(url, timeout_seconds = nil) uri = url timeout = timeout_seconds || REQUEST_TIMEOUT - deadline = Process.clock_gettime(Process::CLOCK_MONOTONIC) + timeout remaining = lambda { @@ -94,26 +94,33 @@ def fetch_with_timeout(url, timeout_seconds = nil) # a endless loop within a thread (non-blocking) def settings_request + @logger.debug { "[#{self.class}/#{__method__}] Starting settings request loop" } + sleep_duration = GET_SETTING_DURATION loop do - @logger.debug { "Retrieving sampling settings from #{@setting_url}" } - response = fetch_with_timeout(@setting_url) - parsed = response.nil? ? nil : JSON.parse(response.body) - @logger.debug { "parsed settings in json: #{parsed.inspect}" } + # Check for nil response from timeout + unless response.is_a?(Net::HTTPSuccess) + @logger.warn { "[#{self.class}/#{__method__}] Failed to retrieve settings due to timeout." } + next + end + + parsed = JSON.parse(response.body) if update_settings(parsed) # update the settings before the previous ones expire with some time to spare expiry = (parsed['timestamp'].to_i + parsed['ttl'].to_i) expiry_timeout = expiry - REQUEST_TIMEOUT - Time.now.to_i - sleep([0, expiry_timeout].max) + sleep_duration = [0, expiry_timeout].max else - @logger.warn { 'Retrieved sampling settings are invalid. Ensure proper configuration.' } - sleep(GET_SETTING_DURATION) + @logger.warn { "[#{self.class}/#{__method__}] Retrieved sampling settings are invalid. Ensure proper configuration." } end + rescue JSON::ParserError => e + @logger.warn { "[#{self.class}/#{__method__}] JSON parsing error: #{e.message}" } rescue StandardError => e - @logger.warn { "Failed to retrieve sampling settings (#{e.message}), tracing will be disabled until valid ones are available." } - sleep(GET_SETTING_DURATION) + @logger.warn { "[#{self.class}/#{__method__}] Failed to retrieve sampling settings (#{e.message}), tracing will be disabled until valid ones are available." } + ensure + sleep(sleep_duration) end end end diff --git a/lib/solarwinds_apm/sampling/json_sampler.rb b/lib/solarwinds_apm/sampling/json_sampler.rb index 958bef9d..8f13ea21 100644 --- a/lib/solarwinds_apm/sampling/json_sampler.rb +++ b/lib/solarwinds_apm/sampling/json_sampler.rb @@ -15,11 +15,12 @@ def initialize(config, path = nil) @path = path || DEFAULT_PATH @expiry = Time.now.to_i + @last_mtime = nil + @logger.debug { "[#{self.class}/#{__method__}] JsonSampler initialized: path=#{@path}, initial_expiry=#{@expiry}" } loop_check end # only json sampler will need to check if the settings.json file - # updated or not from collector extention def should_sample?(params) loop_check super @@ -27,26 +28,40 @@ def should_sample?(params) private + # multi-thread is rare in lambda environment, + # here we don't use mutex to guard the execution def loop_check - # Update if we're within 10s of expiry - return if Time.now.to_i + 10 < @expiry + return if Time.now.to_i < @expiry - 10 + # 1. Read and parse settings from the file. begin - contents = File.read(@path) - unparsed = JSON.parse(contents) - rescue StandardError => e - @logger.debug { "missing or invalid settings file; Error: #{e.message}" } + current_mtime = File.mtime(@path) + return if @last_mtime && current_mtime == @last_mtime + + settings_data = JSON.parse(File.read(@path)) + @last_mtime = current_mtime + rescue Errno::ENOENT + # File doesn't exist due to timing, missing collector, etc + @logger.error { "[#{self.class}##{__method__}] Settings file not found at #{@path}." } + return + rescue JSON::ParserError => e + @logger.error { "[#{self.class}##{__method__}] JSON parsing error in #{@path}: #{e.message}" } return end - unless unparsed.is_a?(Array) && unparsed.length == 1 - @logger.debug { "invalid settings file : #{unparsed}" } + # 2. Validate the structure of the parsed settings. + unless settings_data.is_a?(Array) && settings_data.length == 1 + @logger.error { "[#{self.class}##{__method__}] Invalid settings file content: #{settings_data.inspect}" } return end - parsed = update_settings(unparsed.first) - @logger.debug { "update_settings: #{parsed}" } - @expiry = parsed[:timestamp].to_i + parsed[:ttl].to_i if parsed + # 3. Attempt to update the settings. + if (new_settings = update_settings(settings_data.first)) + @expiry = new_settings[:timestamp].to_i + new_settings[:ttl].to_i + @logger.debug { "[#{self.class}##{__method__}] Settings #{new_settings} updated successfully. New expiry: #{@expiry}" } + else + @logger.debug { "[#{self.class}##{__method__}] Settings update failed, keeping current expiry: #{@expiry}" } + end end end end diff --git a/lib/solarwinds_apm/sampling/oboe_sampler.rb b/lib/solarwinds_apm/sampling/oboe_sampler.rb index 93ce26b4..73e17b9b 100644 --- a/lib/solarwinds_apm/sampling/oboe_sampler.rb +++ b/lib/solarwinds_apm/sampling/oboe_sampler.rb @@ -30,13 +30,14 @@ def initialize(logger) @counters = SolarWindsAPM::Metrics::Counter.new @buckets = { SolarWindsAPM::BucketType::DEFAULT => - SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(nil, nil, BUCKET_INTERVAL)), + SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(nil, nil, BUCKET_INTERVAL, 'DEFUALT')), SolarWindsAPM::BucketType::TRIGGER_RELAXED => - SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(nil, nil, BUCKET_INTERVAL)), + SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(nil, nil, BUCKET_INTERVAL, 'TRIGGER_RELAXED')), SolarWindsAPM::BucketType::TRIGGER_STRICT => - SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(nil, nil, BUCKET_INTERVAL)) + SolarWindsAPM::TokenBucket.new(SolarWindsAPM::TokenBucketSettings.new(nil, nil, BUCKET_INTERVAL, 'TRIGGER_STRICT')) } @settings = {} # parsed setting from swo backend + @settings_mutex = ::Mutex.new @buckets.each_value(&:start) end @@ -45,19 +46,22 @@ def initialize(logger) # params: {:trace_id=>, :parent_context=>, :links=>, :name=>, :kind=>, :attributes=>} # propagator -> processor -> sampler def should_sample?(params) - @logger.debug { "should_sample? params: #{params.inspect}" } _, parent_context, _, _, _, attributes = params.values parent_span = ::OpenTelemetry::Trace.current_span(parent_context) type = SolarWindsAPM::SpanType.span_type(parent_span) - @logger.debug { "[#{self.class}/#{__method__}] span type is #{type}" } + @logger.debug { "[#{self.class}/#{__method__}] should_sample? params: #{params.inspect}; span type is #{type}" } # For local spans, we always trust the parent if type == SolarWindsAPM::SpanType::LOCAL - return OTEL_SAMPLING_RESULT.new(decision: OTEL_SAMPLING_DECISION::RECORD_AND_SAMPLE, tracestate: DEFAULT_TRACESTATE) if parent_span.context.trace_flags.sampled? - - return OTEL_SAMPLING_RESULT.new(decision: OTEL_SAMPLING_DECISION::DROP, tracestate: DEFAULT_TRACESTATE) + if parent_span.context.trace_flags.sampled? + return OTEL_SAMPLING_RESULT.new(decision: OTEL_SAMPLING_DECISION::RECORD_AND_SAMPLE, + tracestate: DEFAULT_TRACESTATE) + else + return OTEL_SAMPLING_RESULT.new(decision: OTEL_SAMPLING_DECISION::DROP, + tracestate: DEFAULT_TRACESTATE) + end end sample_state = SampleState.new(OTEL_SAMPLING_DECISION::DROP, @@ -78,10 +82,9 @@ def should_sample?(params) # TraceOptions.parse_trace_options return TriggerTraceOptions sample_state.trace_options = ::SolarWindsAPM::TraceOptions.parse_trace_options(sample_state.headers['X-Trace-Options'], @logger) - @logger.debug { "X-Trace-Options present: #{sample_state.trace_options}" } + @logger.debug { "[#{self.class}/#{__method__}] sample_state.trace_options: #{sample_state.trace_options.inspect}" } if sample_state.headers['X-Trace-Options-Signature'] - @logger.debug { 'X-Trace-Options-Signature present; validating' } # this validate_signature is the function from trace_options file sample_state.trace_options.response.auth = TraceOptions.validate_signature( @@ -92,36 +95,27 @@ def should_sample?(params) ) # If the request has an invalid signature, drop the trace - if sample_state.trace_options.response.auth != Auth::OK # Auth::OK is a string from trace_options.rb: 'ok' - @logger.debug { 'X-Trace-Options-Signature invalid; tracing disabled' } + if sample_state.trace_options.response.auth != Auth::OK + @logger.debug { "[#{self.class}/#{__method__}] signature invalid; tracing disabled (auth=#{sample_state.trace_options.response.auth})" } xtracestate = generate_new_tracestate(parent_span, sample_state) - return OTEL_SAMPLING_RESULT.new(decision: OTEL_SAMPLING_DECISION::DROP, tracestate: xtracestate, attributes: sample_state.attributes) + return OTEL_SAMPLING_RESULT.new(decision: OTEL_SAMPLING_DECISION::DROP, + tracestate: xtracestate, + attributes: sample_state.attributes) end end - unless sample_state.trace_options.trigger_trace - sample_state.trace_options.response.trigger_trace = TriggerTrace::NOT_REQUESTED # 'not-requested' - end - - # Apply trace options to span attributes + # Apply trace options to span attributes and list ignored keys in response + sample_state.trace_options.response.trigger_trace = TriggerTrace::NOT_REQUESTED unless sample_state.trace_options.trigger_trace sample_state.attributes[SW_KEYS_ATTRIBUTE] = sample_state.trace_options[:sw_keys] if sample_state.trace_options[:sw_keys] - - sample_state.trace_options.custom.each do |k, v| - sample_state.attributes[k] = v - end - - # List ignored keys in response + sample_state.trace_options.custom.each { |k, v| sample_state.attributes[k] = v } sample_state.trace_options.response.ignored = sample_state.trace_options[:ignored].map { |k, _| k } if sample_state.trace_options[:ignored].any? end unless sample_state.settings - @logger.debug { 'settings unavailable; sampling disabled' } + @logger.debug { "[#{self.class}/#{__method__}] settings unavailable; sampling disabled" } - if sample_state.trace_options&.trigger_trace - @logger.debug { 'trigger trace requested but unavailable' } - sample_state.trace_options.response.trigger_trace = TriggerTrace::SETTINGS_NOT_AVAILABLE # 'settings-not-available' - end + sample_state.trace_options.response.trigger_trace = TriggerTrace::SETTINGS_NOT_AVAILABLE if sample_state.trace_options&.trigger_trace xtracestate = generate_new_tracestate(parent_span, sample_state) @@ -131,31 +125,23 @@ def should_sample?(params) end @logger.debug { "[#{self.class}/#{__method__}] sample_state before deciding sampling algo: #{sample_state.inspect}" } + # Decide which sampling algo to use and add sampling attribute to decision attributes # https://swicloud.atlassian.net/wiki/spaces/NIT/pages/3815473156/Tracing+Decision+Tree if sample_state.trace_state && TRACESTATE_REGEXP.match?(sample_state.trace_state) - @logger.debug { 'context is valid for parent-based sampling' } parent_based_algo(sample_state) - elsif sample_state.settings[:flags].anybits?(Flags::SAMPLE_START) if sample_state.trace_options&.trigger_trace - @logger.debug { 'trigger trace requested' } trigger_trace_algo(sample_state) else - @logger.debug { 'defaulting to dice roll' } dice_roll_algo(sample_state) end else - @logger.debug { 'SAMPLE_START is unset; sampling disabled' } disabled_algo(sample_state) end - @logger.debug { "final sampling state: #{sample_state.inspect}" } - xtracestate = generate_new_tracestate(parent_span, sample_state) - - # if need to set 'sw.w3c.tracestate' to attributes - # sample_state.attributes['sw.w3c.tracestate'] = ::SolarWindsAPM::Utils.trace_state_header(xtracestate) + @logger.debug { "[#{self.class}/#{__method__}] final sampling state: #{sample_state.inspect}" } OTEL_SAMPLING_RESULT.new(decision: sample_state.decision, tracestate: xtracestate, @@ -163,13 +149,10 @@ def should_sample?(params) end def parent_based_algo(sample_state) - # original js code: const [context] = s.params - # the context is used for metrics e.g. this.#counters.throughTraceCount.add(1, {}, context) + @logger.debug { "[#{self.class}/#{__method__}] parent_based_algo start" } - # compare the parent_id sample_state.attributes[PARENT_ID_ATTRIBUTE] = sample_state.trace_state[0, 16] - - if sample_state.trace_options&.trigger_trace # need to implement trace_options + if sample_state.trace_options&.trigger_trace @logger.debug { 'trigger trace requested but ignored' } sample_state.trace_options.response.trigger_trace = TriggerTrace::IGNORED # 'ignored' end @@ -198,18 +181,22 @@ def parent_based_algo(sample_state) @logger.debug { 'parent is sampled; record and sample' } @counters[:trace_count].add(1) - @counters[:through_trace_count].add(1) # ruby metrics only add incremented value and attributes + @counters[:through_trace_count].add(1) sample_state.decision = OTEL_SAMPLING_DECISION::RECORD_AND_SAMPLE end end + + @logger.debug { "[#{self.class}/#{__method__}] parent_based_algo end" } end def trigger_trace_algo(sample_state) + @logger.debug { "[#{self.class}/#{__method__}] trigger_trace_algo start" } + if sample_state.settings[:flags].nobits?(Flags::TRIGGERED_TRACE) @logger.debug { 'TRIGGERED_TRACE unset; record only' } - sample_state.trace_options.response.trigger_trace = TriggerTrace::TRIGGER_TRACING_DISABLED # 'trigger-tracing-disabled' + sample_state.trace_options.response.trigger_trace = TriggerTrace::TRIGGER_TRACING_DISABLED sample_state.decision = OTEL_SAMPLING_DECISION::RECORD_ONLY else @logger.debug { 'TRIGGERED_TRACE set; trigger tracing' } @@ -244,9 +231,12 @@ def trigger_trace_algo(sample_state) sample_state.decision = OTEL_SAMPLING_DECISION::RECORD_ONLY end end + @logger.debug { "[#{self.class}/#{__method__}] trigger_trace_algo end" } end def dice_roll_algo(sample_state) + @logger.debug { "[#{self.class}/#{__method__}] dice_roll_algo start" } + dice = SolarWindsAPM::Dice.new(rate: sample_state.settings[:sample_rate], scale: DICE_SCALE) sample_state.attributes[SAMPLE_RATE_ATTRIBUTE] = dice.rate sample_state.attributes[SAMPLE_SOURCE_ATTRIBUTE] = sample_state.settings[:sample_source] @@ -278,9 +268,11 @@ def dice_roll_algo(sample_state) @logger.debug { 'dice roll failure; record only' } sample_state.decision = OTEL_SAMPLING_DECISION::RECORD_ONLY end + @logger.debug { "[#{self.class}/#{__method__}] dice_roll_algo end" } end def disabled_algo(sample_state) + @logger.debug { "[#{self.class}/#{__method__}] disabled_algo start" } if sample_state.trace_options&.trigger_trace @logger.debug { 'trigger trace requested but tracing disabled' } sample_state.trace_options.response.trigger_trace = TriggerTrace::TRACING_DISABLED @@ -293,14 +285,17 @@ def disabled_algo(sample_state) @logger.debug { 'SAMPLE_THROUGH_ALWAYS is set; record' } sample_state.decision = OTEL_SAMPLING_DECISION::RECORD_ONLY end + @logger.debug { "[#{self.class}/#{__method__}] disabled_algo end" } end def update_settings(settings) return unless settings[:timestamp] > (@settings[:timestamp] || 0) - @settings = settings - @buckets.each do |type, bucket| - bucket.update(@settings[:buckets][type]) if @settings[:buckets][type] + @settings_mutex.synchronize do + @settings = settings + @buckets.each do |type, bucket| + bucket.update(@settings[:buckets][type]) if @settings[:buckets][type] + end end end @@ -308,26 +303,24 @@ def update_settings(settings) # handle_response_headers functionality is replace by generate_new_tracestate def generate_new_tracestate(parent_span, sample_state) if !parent_span.context.valid? || parent_span.context.tracestate.nil? - @logger.debug { 'create new tracestate' } + action = 'create' decision = sw_from_span_and_decision(parent_span, sample_state.decision) trace_state = ::OpenTelemetry::Trace::Tracestate.from_hash({ 'sw' => decision }) else - @logger.debug { 'update tracestate' } + action = 'update' decision = sw_from_span_and_decision(parent_span, sample_state.decision) trace_state = parent_span.context.tracestate.set_value('sw', decision) end stringified_trace_options = SolarWindsAPM::TraceOptions.stringify_trace_options_response(sample_state.trace_options&.response) - @logger.debug { "[#{self.class}/#{__method__}] stringified_trace_options: #{stringified_trace_options}" } - trace_state = trace_state.set_value('xtrace_options_response', stringified_trace_options) - @logger.debug { "[#{self.class}/#{__method__}] new trace_state: #{trace_state.inspect}" } + @logger.debug { "[#{self.class}/#{__method__}] Tracestate #{action}: decision=#{decision[-2, 2]}, xtrace_resp=#{stringified_trace_options}, trace_state=#{trace_state.inspect}" } trace_state end def sw_from_span_and_decision(parent_span, otel_decision) trace_flag = otel_decision == OTEL_SAMPLING_DECISION::RECORD_AND_SAMPLE ? '01' : '00' - [parent_span.context.hex_span_id, trace_flag].join('-') + "#{parent_span.context.hex_span_id}-#{trace_flag}" end def get_settings(params) @@ -336,12 +329,12 @@ def get_settings(params) expiry = (@settings[:timestamp] + @settings[:ttl]) * 1000 time_now = Time.now.to_i * 1000 if time_now > expiry - @logger.debug { 'settings expired, removing' } + @logger.debug { "[#{self.class}/#{__method__}] settings expired, removing" } @settings = {} return end sampling_setting = SolarWindsAPM::SamplingSettings.merge(@settings, local_settings(params)) - @logger.debug { "sampling_setting: #{sampling_setting.inspect}" } + @logger.debug { "[#{self.class}/#{__method__}] sampling_setting: #{sampling_setting.inspect}" } sampling_setting end end diff --git a/lib/solarwinds_apm/sampling/sampler.rb b/lib/solarwinds_apm/sampling/sampler.rb index ad7017e2..95ddb069 100644 --- a/lib/solarwinds_apm/sampling/sampler.rb +++ b/lib/solarwinds_apm/sampling/sampler.rb @@ -21,6 +21,9 @@ class Sampler < OboeSampler ATTR_URL_PATH = 'url.path' ATTR_HTTP_TARGET = RUBY_SEM_CON::HTTP_TARGET + SW_XTRACEOPTIONS_KEY = 'sw_xtraceoptions' + SW_SIGNATURE_KEY = 'sw_signature' + # tracing_mode is getting from SolarWindsAPM::Config def initialize(config, logger) super(logger) @@ -28,25 +31,22 @@ def initialize(config, logger) @trigger_mode = config[:trigger_trace_enabled] @transaction_settings = config[:transaction_settings] @ready = false + @logger.debug { "[#{self.class}/#{__method__}] Sampler initialized: tracing_mode=#{@tracing_mode}, trigger_mode=#{@trigger_mode}, transaction_settings_count=#{@transaction_settings.inspect}" } end - # wait for getting the first settings def wait_until_ready(timeout = 10) - thread = Thread.new { settings_ready } - thread.join(timeout) || (thread.kill - false) - @ready - end - - def settings_ready(timeout = 10) - deadline = Time.now - loop do - break unless @settings.empty? - + deadline = Time.now + timeout + while Time.now < deadline + # The @settings hash is populated by another thread (e.g., HttpSampler) + unless @settings.empty? + @ready = !@settings[:signature_key].nil? + return @ready + end sleep 0.1 - break if (Time.now - deadline).round(0) >= timeout end - @ready = true unless @settings[:signature_key].nil? + + @logger.warn { "[#{self.class}/#{__method__}] Timed out waiting for settings after #{timeout} seconds." } + @ready # Will be false if timeout is reached end def resolve_tracing_mode(config) @@ -58,59 +58,45 @@ def resolve_tracing_mode(config) def local_settings(params) _trace_id, _parent_context, _links, span_name, span_kind, attributes = params.values settings = { tracing_mode: @tracing_mode, trigger_mode: @trigger_mode } - return settings if @transaction_settings.nil? || @transaction_settings.empty? - @logger.debug { "Current @transaction_settings: #{@transaction_settings.inspect}" } - http_metadata = http_span_metadata(span_kind, attributes) - @logger.debug { "http_metadata: #{http_metadata.inspect}" } + if @transaction_settings.nil? || @transaction_settings.empty? + @logger.debug { "[#{self.class}/#{__method__}] No transaction settings, using defaults settings: #{settings.inspect}" } + else + http_metadata = http_span_metadata(span_kind, attributes) + # below is for filter out unwanted transaction + trans_settings = ::SolarWindsAPM::TransactionSettings.new(url_path: http_metadata[:url], name: span_name, kind: span_kind) + tracing_mode = trans_settings.calculate_trace_mode == 1 ? TracingMode::ALWAYS : TracingMode::NEVER - # below is for filter out unwanted transaction - trans_settings = ::SolarWindsAPM::TransactionSettings.new(url_path: http_metadata[:url], name: span_name, kind: span_kind) - tracing_mode = trans_settings.calculate_trace_mode == 1 ? TracingMode::ALWAYS : TracingMode::NEVER + settings[:tracing_mode] = tracing_mode + end - settings[:tracing_mode] = tracing_mode + @logger.debug { "[#{self.class}/#{__method__}] Transaction settings after calculation #{settings.inspect}" } settings end # if context have sw-related value, it should be stored in context - # named sw_xtraceoptions in header propagator - # original x_trace_options will parse headers in the class, apm-js separate the task - # apm-js will make headers as hash + # named sw_xtraceoptions and sw_signature in header from propagator def request_headers(params) - parent_context = params[:parent_context] - header = obtain_sw_value(parent_context, 'sw_xtraceoptions') - signature = obtain_sw_value(parent_context, 'sw_signature') - @logger.debug { "[#{self.class}/#{__method__}] trace_options option_header: #{header}; trace_options sw_signature: #{signature}" } - - { - 'X-Trace-Options' => header, - 'X-Trace-Options-Signature' => signature - } + header, signature = obtain_traceoptions_headers_signature(params[:parent_context]) + @logger.debug { "[#{self.class}/#{__method__}] trace_options header: #{header.inspect}, signature: #{signature.inspect} from parent_context: #{params[:parent_context].inspect}" } + { 'X-Trace-Options' => header, 'X-Trace-Options-Signature' => signature } end - def obtain_sw_value(context, type) - sw_value = nil - instance_variable = context&.instance_variable_get('@entries') - instance_variable&.each do |key, value| - next unless key.instance_of?(::String) - - sw_value = value if key == type - end - sw_value + def obtain_traceoptions_headers_signature(context) + header = context.value(SW_XTRACEOPTIONS_KEY) + signature = context.value(SW_SIGNATURE_KEY) + [header, signature] end def update_settings(settings) parsed = parse_settings(settings) if parsed - @logger.debug { "valid settings #{parsed.inspect} from setting #{settings.inspect}" } - - super(parsed) # call oboe_sampler update_settings function to update the buckets - + @logger.debug { "[#{self.class}/#{__method__}] Valid settings #{parsed.inspect} from setting #{settings.inspect}" } @logger.warn { "Warning from parsed settings: #{parsed[:warning]}" } if parsed[:warning] - + super(parsed) # call oboe_sampler update_settings function to update the buckets parsed else - @logger.debug { "invalid settings: #{settings.inspect}" } + @logger.debug { "[#{self.class}/#{__method__}] Invalid settings: #{settings.inspect}" } nil end end @@ -136,24 +122,25 @@ def http_span_metadata(kind, attributes) url: url } - @logger.debug { "Retrieved http metadata: #{http_metadata.inspect}" } + @logger.debug { "[#{self.class}/#{__method__}] Retrieved http metadata: #{http_metadata.inspect}" } http_metadata end def parse_settings(unparsed) return unless unparsed.is_a?(Hash) - return unless unparsed['value'].is_a?(Numeric) && - unparsed['timestamp'].is_a?(Numeric) && - unparsed['ttl'].is_a?(Numeric) - sample_rate = unparsed['value'] - timestamp = unparsed['timestamp'] - ttl = unparsed['ttl'] + timestamp = unparsed['timestamp'] + ttl = unparsed['ttl'] + flags = unparsed['flags'] + + return unless sample_rate.is_a?(Numeric) && + timestamp.is_a?(Numeric) && + ttl.is_a?(Numeric) - return unless unparsed['flags'].is_a?(String) + return unless flags.is_a?(String) - flags = unparsed['flags'].split(',').reduce(Flags::OK) do |final_flag, f| + flags = flags.split(',').reduce(Flags::OK) do |final_flag, f| flag = { 'OVERRIDE' => Flags::OVERRIDE, 'SAMPLE_START' => Flags::SAMPLE_START, @@ -167,6 +154,7 @@ def parse_settings(unparsed) buckets = {} signature_key = nil + warning = nil if unparsed['arguments'].is_a?(Hash) args = unparsed['arguments'] diff --git a/lib/solarwinds_apm/sampling/sampling_constants.rb b/lib/solarwinds_apm/sampling/sampling_constants.rb index 55a678de..ffa84bf0 100644 --- a/lib/solarwinds_apm/sampling/sampling_constants.rb +++ b/lib/solarwinds_apm/sampling/sampling_constants.rb @@ -55,7 +55,8 @@ module TriggerTrace TokenBucketSettings = Struct.new(:capacity, # Number :rate, # Number - :interval) # Number + :interval, # Number + :type) # String module SampleSource LOCAL_DEFAULT = 2 @@ -105,11 +106,11 @@ def self.span_type(parent_span) end def self.valid_trace_id?(trace_id) - !!(trace_id =~ VALID_TRACEID_REGEX) && trace_id != INVALID_TRACEID + VALID_TRACEID_REGEX.match?(trace_id) && trace_id != INVALID_TRACEID end def self.valid_span_id?(span_id) - !!(span_id =~ VALID_SPANID_REGEX) && span_id != INVALID_SPANID + VALID_SPANID_REGEX.match?(span_id) && span_id != INVALID_SPANID end def self.span_context_valid?(span_context) diff --git a/lib/solarwinds_apm/sampling/settings.rb b/lib/solarwinds_apm/sampling/settings.rb index 7140930e..5b54bcc6 100644 --- a/lib/solarwinds_apm/sampling/settings.rb +++ b/lib/solarwinds_apm/sampling/settings.rb @@ -9,6 +9,7 @@ module SolarWindsAPM module SamplingSettings def self.merge(remote, local) + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] SamplingSettings merge with remote: #{remote.inspect}; local: #{local.inspect}" } flags = local[:tracing_mode] || remote[:flags] if local[:trigger_mode] == :enabled @@ -22,6 +23,7 @@ def self.merge(remote, local) flags |= SolarWindsAPM::Flags::OVERRIDE end + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] final flags: #{flags}" } remote.merge(flags: flags) end end diff --git a/lib/solarwinds_apm/sampling/token_bucket.rb b/lib/solarwinds_apm/sampling/token_bucket.rb index 820e02c9..859ad673 100644 --- a/lib/solarwinds_apm/sampling/token_bucket.rb +++ b/lib/solarwinds_apm/sampling/token_bucket.rb @@ -13,17 +13,19 @@ class TokenBucket # Maximum value of a signed 32-bit integer MAX_INTERVAL = (2**31) - 1 - attr_reader :capacity, :rate, :interval, :tokens + attr_reader :capacity, :rate, :interval, :tokens, :type def initialize(token_bucket_settings) self.capacity = token_bucket_settings.capacity || 0 self.rate = token_bucket_settings.rate || 0 self.interval = token_bucket_settings.interval || MAX_INTERVAL self.tokens = @capacity + @type = token_bucket_settings.type @timer = nil end - # used call from update_settings e.g. bucket.update(bucket_settings) + # oboe sampler update_settings will update the token + # (thread safe as update_settings is guarded by mutex from oboe sampler) def update(settings) settings.instance_of?(Hash) ? update_from_hash(settings) : update_from_token_bucket_settings(settings) end @@ -72,8 +74,11 @@ def rate=(rate) @rate = [0, rate].max end + # self.interval= sets the @interval and @sleep_interval + # @sleep_interval is used in the timer thread to sleep between replenishing the bucket def interval=(interval) @interval = interval.clamp(0, MAX_INTERVAL) + @sleep_interval = @interval / 1000.0 end def tokens=(tokens) @@ -83,11 +88,15 @@ def tokens=(tokens) # Attempts to consume tokens from the bucket # @param n [Integer] Number of tokens to consume # @return [Boolean] Whether there were enough tokens + # TODO: we need to include thread-safety here since sampler is shared across threads + # and we may have multiple threads trying to consume tokens at the same time def consume(token = 1) if @tokens >= token self.tokens = @tokens - token + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] #{@type} Consumed #{token} from total #{@tokens} (#{(@tokens.to_f / @capacity * 100).round(1)}% remaining)" } true else + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] #{@type} Token consumption failed: requested=#{token}, available=#{@tokens}, capacity=#{@capacity}" } false end end @@ -99,7 +108,7 @@ def start @timer = Thread.new do loop do task - sleep(@interval / 1000.0) + sleep(@sleep_interval) end end end diff --git a/lib/solarwinds_apm/sampling/trace_options.rb b/lib/solarwinds_apm/sampling/trace_options.rb index f4a23515..9bc6cf17 100644 --- a/lib/solarwinds_apm/sampling/trace_options.rb +++ b/lib/solarwinds_apm/sampling/trace_options.rb @@ -9,63 +9,66 @@ module SolarWindsAPM class TraceOptions TRIGGER_TRACE_KEY = 'trigger-trace' - TIMESTAMP_KEY = 'ts' - SW_KEYS_KEY = 'sw-keys' - - CUSTOM_KEY_REGEX = /^custom-[^\s]*$/ + TIMESTAMP_KEY = 'ts' + SW_KEYS_KEY = 'sw-keys' + CUSTOM_KEY_REGEX = /^custom-[^\s]*$/ def self.parse_trace_options(header, logger) + logger.debug { "[#{self.class}/#{__method__}] Parsing trace options header: #{header&.slice(0, 100)}..." } trace_options = TriggerTraceOptions.new(nil, nil, nil, {}, [], TraceOptionsResponse.new(nil, nil, [])) - kvs = header.split(';').map do |kv| + kvs = header.split(';').filter_map do |kv| key, *values = kv.split('=').map(&:strip) + next if key.nil? || key.empty? + value = values.any? ? values.join('=') : nil [key, value] end - kvs.reject! { |key, _| key.nil? || key.empty? } + logger.debug { "[#{self.class}/#{__method__}] Parsed kvs #{kvs.inspect}" } kvs.each do |k, v| case k when TRIGGER_TRACE_KEY if v || trace_options.trigger_trace - logger.debug { 'invalid trace option for trigger trace' } + logger.debug { "[#{self.class}/#{__method__}] invalid trace option for trigger trace: value=#{v}, already_set=#{trace_options.trigger_trace}" } trace_options.ignored << [k, v] next end trace_options.trigger_trace = true when TIMESTAMP_KEY if v.nil? || trace_options.timestamp - logger.debug { 'invalid trace option for timestamp' } + logger.debug { "[#{self.class}/#{__method__}] invalid trace option for timestamp: value=#{v}, already_set=#{trace_options.timestamp}" } trace_options.ignored << [k, v] next end unless numeric_integer?(v) - logger.debug { 'invalid trace option for timestamp, should be an integer' } + logger.debug { "[#{self.class}/#{__method__}] invalid trace option for timestamp, should be an integer: #{v}" } trace_options.ignored << [k, v] next end trace_options.timestamp = v.to_i when SW_KEYS_KEY if v.nil? || trace_options.sw_keys - logger.debug { 'invalid trace option for sw keys' } + logger.debug { "[#{self.class}/#{__method__}] invalid trace option for sw keys: value=#{v}, already_set=#{trace_options.sw_keys}" } trace_options.ignored << [k, v] next end trace_options.sw_keys = v when CUSTOM_KEY_REGEX if v.nil? || trace_options.custom[k] - logger.debug { "invalid trace option for custom key #{k}" } + logger.debug { "[#{self.class}/#{__method__}] invalid trace option for custom key #{k}: value=#{v}, already_set=#{trace_options.custom[k]}" } trace_options.ignored << [k, v] next end trace_options.custom[k] = v else + logger.debug { "[#{self.class}/#{__method__}] Unknown key ignored: #{k}=#{v}" } trace_options.ignored << [k, v] end end - + logger.debug { "[#{self.class}/#{__method__}] Parsing complete: trigger_trace=#{trace_options.trigger_trace}, timestamp=#{trace_options.timestamp}, sw_keys=#{trace_options.sw_keys}, custom_keys=#{trace_options.custom}, ignored=#{trace_options.ignored}" } trace_options end @@ -86,15 +89,29 @@ def self.stringify_trace_options_response(trace_options_response) 'trigger-trace': trace_options_response.trigger_trace, ignored: trace_options_response.ignored.empty? ? nil : trace_options_response.ignored.join(',') } - kvs.compact.map { |k, v| "#{k}:#{v}" }.join(';') + + kvs.compact! + result = kvs.map { |k, v| "#{k}:#{v}" }.join(';') + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Stringified trace options response: #{result}" } + result end def self.validate_signature(header, signature, key, timestamp) - return Auth::NO_SIGNATURE_KEY unless key - return Auth::BAD_TIMESTAMP unless timestamp && (Time.now.to_i - timestamp).abs <= 5 * 60 + unless key + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Signature validation failed: no signature key available" } + return Auth::NO_SIGNATURE_KEY + end + + unless timestamp && (Time.now.to_i - timestamp).abs <= 5 * 60 + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Signature validation failed: bad timestamp (diff more than 300s)" } + return Auth::BAD_TIMESTAMP + end digest = OpenSSL::HMAC.hexdigest('SHA1', key, header) - signature == digest ? Auth::OK : Auth::BAD_SIGNATURE + is_valid = signature == digest + + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Signature validation result: #{is_valid ? 'valid' : 'invalid'}" } + is_valid ? Auth::OK : Auth::BAD_SIGNATURE end end end diff --git a/lib/solarwinds_apm/support/resource_detector.rb b/lib/solarwinds_apm/support/resource_detector.rb index 93c63e0a..dc1f30a4 100644 --- a/lib/solarwinds_apm/support/resource_detector.rb +++ b/lib/solarwinds_apm/support/resource_detector.rb @@ -64,9 +64,7 @@ def self.detect_uams_client_id response = nil ::OpenTelemetry::Common::Utilities.untraced do - http = Net::HTTP.new(url.host, url.port) - request = Net::HTTP::Get.new(url) - response = http.request(request) + response = Net::HTTP.get_response(url) end raise 'Response returned non-200 status code' unless response&.code.to_i == 200 @@ -152,26 +150,23 @@ def self.detect_k8s_attributes end def self.detect_ec2 - attribute = ::OpenTelemetry::Resource::Detector::AWS::EC2.detect - SolarWindsAPM.logger.debug { "#{self.class}/#{__method__}] retrieved resource_attributes: #{attribute.instance_variable_get(:@attributes)}" } - attribute - rescue StandardError - ::OpenTelemetry::SDK::Resources::Resource.create({}) + run_opentelemetry_detector(::OpenTelemetry::Resource::Detector::AWS::EC2) end def self.detect_azure - attribute = ::OpenTelemetry::Resource::Detector::Azure.detect - SolarWindsAPM.logger.debug { "#{self.class}/#{__method__}] retrieved resource_attributes: #{attribute.instance_variable_get(:@attributes)}" } - attribute - rescue StandardError - ::OpenTelemetry::SDK::Resources::Resource.create({}) + run_opentelemetry_detector(::OpenTelemetry::Resource::Detector::Azure) end def self.detect_container - attribute = ::OpenTelemetry::Resource::Detector::Container.detect - SolarWindsAPM.logger.debug { "#{self.class}/#{__method__}] retrieved resource_attributes: #{attribute.instance_variable_get(:@attributes)}" } + run_opentelemetry_detector(::OpenTelemetry::Resource::Detector::Container) + end + + def self.run_opentelemetry_detector(detector_class) + attribute = detector_class.detect + SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] Detector #{detector_class} retrieved: #{attribute.instance_variable_get(:@attributes)}" } attribute - rescue StandardError + rescue StandardError => e + SolarWindsAPM.logger.error { "[#{self.class}/#{__method__}] Detector #{detector_class} failed. Error: #{e.message}." } ::OpenTelemetry::SDK::Resources::Resource.create({}) end diff --git a/lib/solarwinds_apm/support/transaction_settings.rb b/lib/solarwinds_apm/support/transaction_settings.rb index bd0cf245..b0129b00 100644 --- a/lib/solarwinds_apm/support/transaction_settings.rb +++ b/lib/solarwinds_apm/support/transaction_settings.rb @@ -37,17 +37,24 @@ def tracing_enabled? SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] enabled_regexps: #{enabled_regexps&.inspect}" } SolarWindsAPM.logger.debug { "[#{self.class}/#{__method__}] disabled_regexps: #{disabled_regexps&.inspect}" } - return false if disabled_regexps.is_a?(Array) && disabled_regexps.any? { |regex| regex.match?(@url_path) } - return true if enabled_regexps.is_a?(Array) && enabled_regexps.any? { |regex| regex.match?(@url_path) } - return false if disabled_regexps.is_a?(Array) && disabled_regexps.any? { |regex| regex.match?(span_layer) } - return true if enabled_regexps.is_a?(Array) && enabled_regexps.any? { |regex| regex.match?(span_layer) } + return false if matches_any?(disabled_regexps, @url_path) + return true if matches_any?(enabled_regexps, @url_path) + return false if matches_any?(disabled_regexps, span_layer) + return true if matches_any?(enabled_regexps, span_layer) true rescue StandardError => e SolarWindsAPM.logger.warn do - "[#{self.class}/#{__method__}] Could not determine tracing status for #{kind}. #{e.inspect}. transaction_settings regexps/extensions igonred/unfiltered." + "[#{self.class}/#{__method__}] Could not determine tracing status for #{@kind}. #{e.inspect}. transaction_settings regexps/extensions igonred/unfiltered." end true end + + # Checks if a given string matches any regex in a list. + def matches_any?(regex_list, string_to_match) + return false unless regex_list.is_a?(Array) + + regex_list.any? { |regex| regex.match?(string_to_match) } + end end end diff --git a/lib/solarwinds_apm/version.rb b/lib/solarwinds_apm/version.rb index 54f4f657..a6f7f6d7 100644 --- a/lib/solarwinds_apm/version.rb +++ b/lib/solarwinds_apm/version.rb @@ -13,8 +13,8 @@ module SolarWindsAPM module Version MAJOR = 7 # breaking, MINOR = 0 # feature, - PATCH = 1 # fix => BFF - PRE = nil + PATCH = 2 # fix => BFF + PRE = 'prev1' STRING = [MAJOR, MINOR, PATCH, PRE].compact.join('.') end diff --git a/test/minitest_helper.rb b/test/minitest_helper.rb index 5648b489..9d4b46da 100644 --- a/test/minitest_helper.rb +++ b/test/minitest_helper.rb @@ -288,7 +288,8 @@ def initialize module DisableAddView def init_response_time_metrics - instrument = @meters['sw.apm.request.metrics'].create_histogram('trace.service.response_time', unit: 'ms', description: 'Duration of each entry span for the service, typically meaning the time taken to process an inbound request.') + meter = ::OpenTelemetry.meter_provider.meter('sw.apm.request.metrics') + instrument = meter.create_histogram('trace.service.response_time', unit: 'ms', description: 'Duration of each entry span for the service, typically meaning the time taken to process an inbound request.') { response_time: instrument } end end diff --git a/test/opentelemetry/otlp_processor_sampled_test.rb b/test/opentelemetry/otlp_processor_sampled_test.rb index 2a45fc17..5d962cf2 100644 --- a/test/opentelemetry/otlp_processor_sampled_test.rb +++ b/test/opentelemetry/otlp_processor_sampled_test.rb @@ -19,14 +19,8 @@ @processor = SolarWindsAPM::OpenTelemetry::OTLPProcessor.new(txn_manager) end - it 'initializes_meters_and_metrics' do - request_metrics = @processor.instance_variable_get(:@meters)['sw.apm.request.metrics'] - request_metrics_registry = request_metrics.instance_variable_get(:@instrument_registry) - - _(@processor.instance_variable_get(:@meters).size).must_equal 1 + it 'initializes_metrics' do _(@processor.instance_variable_get(:@metrics).size).must_equal 1 - - refute_nil(request_metrics_registry['trace.service.response_time']) end it 'ensure_both_span_and_metrics_have_transaction_name' do diff --git a/test/opentelemetry/otlp_processor_test.rb b/test/opentelemetry/otlp_processor_test.rb index e39ca9af..21e045e3 100644 --- a/test/opentelemetry/otlp_processor_test.rb +++ b/test/opentelemetry/otlp_processor_test.rb @@ -17,18 +17,8 @@ @processor = SolarWindsAPM::OpenTelemetry::OTLPProcessor.new(@txn_manager) end - it 'initializes_meters_and_metrics' do - request_metrics = @processor.instance_variable_get(:@meters)['sw.apm.request.metrics'] - sampling_metrics = @processor.instance_variable_get(:@meters)['sw.apm.sampling.metrics'] - request_metrics_registry = request_metrics.instance_variable_get(:@instrument_registry) - sampling_metrics.instance_variable_get(:@instrument_registry) - - _(@processor.instance_variable_get(:@meters).size).must_equal 1 + it 'initializes_metrics' do _(@processor.instance_variable_get(:@metrics).size).must_equal 1 - - refute_nil(request_metrics_registry['trace.service.response_time']) - - @processor.instance_variable_get(:@meters)['sw.apm.request.metrics'].instance_variable_set(:@instrument_registry, {}) end it 'does_not_have_transaction_manager' do diff --git a/test/solarwinds_apm/config_test.rb b/test/solarwinds_apm/config_test.rb index edf2a289..6d9e1806 100644 --- a/test/solarwinds_apm/config_test.rb +++ b/test/solarwinds_apm/config_test.rb @@ -85,13 +85,13 @@ _(SolarWindsAPM::Config[:dummy_key]).must_equal :enabled end - it 'with nil env key, use config file configuration (disabled)' do - SolarWindsAPM::Config.enable_disable_config(nil, :dummy_key, :disabled, :enabled) + it 'with empty env key, use config file configuration (disabled)' do + SolarWindsAPM::Config.enable_disable_config('', :dummy_key, :disabled, :enabled) _(SolarWindsAPM::Config[:dummy_key]).must_equal :disabled end - it 'with nil env key, use config file configuration (enabled)' do - SolarWindsAPM::Config.enable_disable_config(nil, :dummy_key, :enabled, :enabled) + it 'with empty env key, use config file configuration (enabled)' do + SolarWindsAPM::Config.enable_disable_config('', :dummy_key, :enabled, :enabled) _(SolarWindsAPM::Config[:dummy_key]).must_equal :enabled end @@ -168,20 +168,20 @@ _(SolarWindsAPM::Config[:dummy_key]).must_equal true end - it 'with nil env, config env true, use default false, should be true' do + it 'with empty env, config env true, use default false, should be true' do ENV['DUMMY_KEY'] = 'true' - SolarWindsAPM::Config.enable_disable_config(nil, :dummy_key, true, false, bool: true) + SolarWindsAPM::Config.enable_disable_config('', :dummy_key, true, false, bool: true) _(SolarWindsAPM::Config[:dummy_key]).must_equal true end - it 'with nil env, empty config env, use default false, should be false' do + it 'with empty env, empty config env, use default false, should be false' do ENV['DUMMY_KEY'] = '' - SolarWindsAPM::Config.enable_disable_config(nil, :dummy_key, '', false, bool: true) + SolarWindsAPM::Config.enable_disable_config('', :dummy_key, '', false, bool: true) _(SolarWindsAPM::Config[:dummy_key]).must_equal false end - it 'with nil env, empty config env, use default true, should be true' do - SolarWindsAPM::Config.enable_disable_config(nil, :dummy_key, '', true, bool: true) + it 'with empty env, empty config env, use default true, should be true' do + SolarWindsAPM::Config.enable_disable_config('', :dummy_key, '', true, bool: true) _(SolarWindsAPM::Config[:dummy_key]).must_equal true end end