From c484c095a984729a9d3e594504cae25b8041eb83 Mon Sep 17 00:00:00 2001 From: David Wein Date: Tue, 9 Dec 2025 09:18:06 -0800 Subject: [PATCH] pgbench: Add latency percentile reporting with histogram-based tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add --percentile[=LIST] option to report latency percentiles during pgbench runs. By default reports p50, p90, p99, p99.9, and p99.99. Users can specify a custom comma-separated list of percentiles (0-100) to report. Implementation details: - Multi-tier histogram with bounded memory (~100KB per histogram): * Fine buckets: 0-100ms at 100μs resolution (1000 buckets) * Coarse buckets: 100ms-10s at 1ms resolution (9900 buckets) * Medium buckets: 10s-120s at 100ms resolution (1100 buckets) * Large buckets: 120s-1200s at 1s resolution (1080 buckets) - Dynamic overflow tracking for latencies exceeding 20 minutes, storing individual values in a growable array for accurate tail percentiles - Per-command percentile reporting when combined with -r flag, with percentiles displayed on a single line per command - Per-script percentile aggregation across all threads New options: - --percentile[=LIST]: Enable percentile reporting with optional custom list - --debug-latency-multiplier=N: Multiply latencies for testing overflow handling Memory considerations documented for high-concurrency scenarios with per-command reporting enabled. Includes comprehensive TAP test coverage for custom percentile lists, overflow handling, and per-command reporting. --- src/bin/pgbench/pgbench.c | 504 ++++++++++++++++++- src/bin/pgbench/t/001_pgbench_with_server.pl | 154 ++++++ 2 files changed, 656 insertions(+), 2 deletions(-) diff --git a/src/bin/pgbench/pgbench.c b/src/bin/pgbench/pgbench.c index afe3ce65fb7..54d65716aca 100644 --- a/src/bin/pgbench/pgbench.c +++ b/src/bin/pgbench/pgbench.c @@ -262,6 +262,17 @@ static int agg_interval; /* log aggregates instead of individual static bool per_script_stats = false; /* whether to collect stats per script */ static int progress = 0; /* thread progress report every this seconds */ static bool progress_timestamp = false; /* progress report with Unix time */ +static bool report_percentiles = false; /* report latency percentiles */ + +/* Default percentiles to report */ +#define MAX_PERCENTILES 20 +static double default_percentiles[] = {50.0, 90.0, 99.0, 99.9, 99.99}; +static double percentile_list[MAX_PERCENTILES]; +static int num_percentiles = 0; /* 0 means use defaults */ + +/* Debug option: multiply latencies for testing histogram overflow */ +static double debug_latency_multiplier = 1.0; + static int nclients = 1; /* number of clients */ static int nthreads = 1; /* number of threads */ static bool is_connect; /* establish connection for each transaction */ @@ -364,6 +375,50 @@ typedef struct SimpleStats double sum2; /* sum of squared values */ } SimpleStats; +/* + * Histogram for computing latency percentiles with bounded memory. + * Uses finer buckets for low latencies (where precision matters) and + * progressively coarser buckets for high latencies. + * + * Bucket tiers: + * Fine: 0-100ms at 100μs resolution (1000 buckets) + * Coarse: 100ms-10s at 1ms resolution (9900 buckets) + * Medium: 10s-120s at 100ms resolution (1100 buckets) + * Large: 120s-1200s at 1s resolution (1080 buckets) + * + * Latencies exceeding the maximum trackable (20 minutes) are stored + * individually in a dynamic array for accurate percentile calculation. + * + * This consumes roughly 105KB of memory per histogram (per thread per script). + * + * WARNING: When using --percentile with -r (per-command stats), each + * command in each script requires a separate histogram (~105KB). + * For example: 4 threads x 2 scripts x 50 commands = 400 histograms = ~42MB + * Consider this memory overhead when running with many threads/commands. + */ +#define LATENCY_HIST_FINE_BUCKETS 1000 /* 0-100ms at 100μs resolution */ +#define LATENCY_HIST_COARSE_BUCKETS 9900 /* 100ms-10s at 1ms resolution */ +#define LATENCY_HIST_MEDIUM_BUCKETS 1100 /* 10s-120s at 100ms resolution */ +#define LATENCY_HIST_LARGE_BUCKETS 1080 /* 120s-1200s at 1s resolution */ +#define LATENCY_HIST_BUCKETS (LATENCY_HIST_FINE_BUCKETS + LATENCY_HIST_COARSE_BUCKETS + \ + LATENCY_HIST_MEDIUM_BUCKETS + LATENCY_HIST_LARGE_BUCKETS) +#define LATENCY_HIST_FINE_LIMIT 100000 /* 100ms in microseconds */ +#define LATENCY_HIST_COARSE_LIMIT 10000000 /* 10s in microseconds */ +#define LATENCY_HIST_MEDIUM_LIMIT 120000000 /* 120s in microseconds */ +#define LATENCY_HIST_FINE_WIDTH 100 /* 100μs per fine bucket */ +#define LATENCY_HIST_COARSE_WIDTH 1000 /* 1ms per coarse bucket */ +#define LATENCY_HIST_MEDIUM_WIDTH 100000 /* 100ms per medium bucket */ +#define LATENCY_HIST_LARGE_WIDTH 1000000 /* 1s per large bucket */ + +typedef struct LatencyHistogram +{ + int64 buckets[LATENCY_HIST_BUCKETS]; + int64 overflow_count; /* count of latencies exceeding max trackable */ + double overflow_sum; /* sum of overflow latencies */ + double *overflow_values; /* dynamic array of individual overflow latencies */ + int overflow_capacity; /* allocated size of overflow_values array */ +} LatencyHistogram; + /* * The instr_time type is expensive when dealing with time arithmetic. Define * a type to hold microseconds instead. Type int64 is good enough for about @@ -442,6 +497,7 @@ typedef struct StatsData * error */ SimpleStats latency; SimpleStats lag; + LatencyHistogram *latency_hist; /* histogram for percentile calculation (allocated only if report_percentiles) */ } StatsData; /* @@ -735,6 +791,7 @@ static const char *const QUERYMODE[] = {"simple", "extended", "prepared"}; * aset do gset on all possible queries of a combined query (\;). * expr Parsed expression, if needed. * stats Time spent in this command. + * latency_hist Histogram for percentile calculation (allocated if needed). * retries Number of retries after a serialization or deadlock error in the * current command. * failures Number of errors in the current command that were not retried. @@ -751,6 +808,7 @@ typedef struct Command char *varprefix; PgBenchExpr *expr; SimpleStats stats; + LatencyHistogram *latency_hist; /* for percentile calculation (may be NULL) */ int64 retries; int64 failures; } Command; @@ -954,6 +1012,8 @@ usage(void) " --log-prefix=PREFIX prefix for transaction time log file\n" " (default: \"pgbench_log\")\n" " --max-tries=NUM max number of tries to run transaction (default: 1)\n" + " --percentile[=LIST] report latency percentiles (default: 50,90,99,99.9,99.99)\n" + " LIST is comma-separated values between 0 and 100\n" " --progress-timestamp use Unix epoch timestamps for progress\n" " --random-seed=SEED set random seed (\"time\", \"rand\", integer)\n" " --sampling-rate=NUM fraction of transactions to log (e.g., 0.01 for 1%%)\n" @@ -999,6 +1059,96 @@ is_an_int(const char *str) return *ptr == '\0'; } +/* + * parse_percentile_list -- parse a comma-separated list of percentile values + * + * Input: comma-separated list of floats (e.g., "50,90,99,99.9,99.99") + * Each value must be between 0 and 100 (exclusive of 0, inclusive of 100). + * Returns true on success, false on error. + */ +static bool +parse_percentile_list(const char *str) +{ + char *copy; + char *token; + char *saveptr; + int count = 0; + double values[MAX_PERCENTILES]; + + if (str == NULL || *str == '\0') + { + pg_log_error("empty percentile list"); + return false; + } + + /* Make a copy since strtok_r modifies the string */ + copy = pg_strdup(str); + + for (token = strtok_r(copy, ",", &saveptr); + token != NULL; + token = strtok_r(NULL, ",", &saveptr)) + { + char *endptr; + double val; + + /* Skip leading whitespace */ + while (*token && isspace((unsigned char) *token)) + token++; + + if (*token == '\0') + { + pg_log_error("empty value in percentile list"); + pg_free(copy); + return false; + } + + errno = 0; + val = strtod(token, &endptr); + + /* Skip trailing whitespace */ + while (*endptr && isspace((unsigned char) *endptr)) + endptr++; + + if (*endptr != '\0' || errno != 0) + { + pg_log_error("invalid percentile value: \"%s\"", token); + pg_free(copy); + return false; + } + + if (val <= 0.0 || val > 100.0) + { + pg_log_error("percentile must be between 0 and 100 (exclusive of 0): %g", val); + pg_free(copy); + return false; + } + + if (count >= MAX_PERCENTILES) + { + pg_log_error("too many percentiles (maximum %d)", MAX_PERCENTILES); + pg_free(copy); + return false; + } + + values[count++] = val; + } + + pg_free(copy); + + if (count == 0) + { + pg_log_error("no valid percentiles found in list"); + return false; + } + + /* Copy parsed values to global array */ + for (int i = 0; i < count; i++) + percentile_list[i] = values[i]; + num_percentiles = count; + + return true; +} + /* * strtoint64 -- convert a string to 64-bit integer @@ -1453,6 +1603,242 @@ mergeSimpleStats(SimpleStats *acc, SimpleStats *ss) acc->sum2 += ss->sum2; } +/* + * Initialize the given LatencyHistogram struct to all zeroes. + */ +static void +initLatencyHistogram(LatencyHistogram *hist) +{ + memset(hist->buckets, 0, sizeof(hist->buckets)); + hist->overflow_count = 0; + hist->overflow_sum = 0.0; + hist->overflow_values = NULL; + hist->overflow_capacity = 0; +} + +/* + * Free resources associated with a LatencyHistogram. + */ +static void +freeLatencyHistogram(LatencyHistogram *hist) +{ + if (hist->overflow_values != NULL) + { + pg_free(hist->overflow_values); + hist->overflow_values = NULL; + hist->overflow_capacity = 0; + } +} + +/* + * Add an overflow latency value to the dynamic array. + * Grows the array capacity as needed. + */ +static void +addOverflowValue(LatencyHistogram *hist, double latency_us) +{ + /* Grow array if needed */ + if (hist->overflow_count >= hist->overflow_capacity) + { + int new_capacity = hist->overflow_capacity == 0 ? 16 : hist->overflow_capacity * 2; + + hist->overflow_values = (double *) pg_realloc(hist->overflow_values, + new_capacity * sizeof(double)); + hist->overflow_capacity = new_capacity; + } + + hist->overflow_values[hist->overflow_count] = latency_us; + hist->overflow_count++; + hist->overflow_sum += latency_us; +} + +/* + * Record a latency value (in microseconds) in the histogram. + */ +static void +addToLatencyHistogram(LatencyHistogram *hist, double latency_us) +{ + int bucket; + + /* Apply debug multiplier for testing overflow behavior */ + latency_us *= debug_latency_multiplier; + + if (latency_us < 0) + latency_us = 0; + + if (latency_us < LATENCY_HIST_FINE_LIMIT) + { + /* Fine-grained bucket: 0-100ms at 100μs resolution */ + bucket = (int) (latency_us / LATENCY_HIST_FINE_WIDTH); + if (bucket >= LATENCY_HIST_FINE_BUCKETS) + bucket = LATENCY_HIST_FINE_BUCKETS - 1; + } + else if (latency_us < LATENCY_HIST_COARSE_LIMIT) + { + /* Coarse bucket: 100ms-10s at 1ms resolution */ + bucket = LATENCY_HIST_FINE_BUCKETS + + (int) ((latency_us - LATENCY_HIST_FINE_LIMIT) / LATENCY_HIST_COARSE_WIDTH); + } + else if (latency_us < LATENCY_HIST_MEDIUM_LIMIT) + { + /* Medium bucket: 10s-120s at 100ms resolution */ + bucket = LATENCY_HIST_FINE_BUCKETS + LATENCY_HIST_COARSE_BUCKETS + + (int) ((latency_us - LATENCY_HIST_COARSE_LIMIT) / LATENCY_HIST_MEDIUM_WIDTH); + } + else + { + /* Large bucket: 120s-1200s at 1s resolution */ + bucket = LATENCY_HIST_FINE_BUCKETS + LATENCY_HIST_COARSE_BUCKETS + + LATENCY_HIST_MEDIUM_BUCKETS + + (int) ((latency_us - LATENCY_HIST_MEDIUM_LIMIT) / LATENCY_HIST_LARGE_WIDTH); + if (bucket >= LATENCY_HIST_BUCKETS) + { + /* Overflow: latency exceeds max trackable (20 minutes) */ + addOverflowValue(hist, latency_us); + return; + } + } + + hist->buckets[bucket]++; +} + +/* + * Merge one LatencyHistogram into another. + * Concatenates overflow value arrays from both histograms. + */ +static void +mergeLatencyHistogram(LatencyHistogram *acc, LatencyHistogram *hist) +{ + int i; + + for (i = 0; i < LATENCY_HIST_BUCKETS; i++) + acc->buckets[i] += hist->buckets[i]; + + /* Merge overflow values by concatenating arrays */ + if (hist->overflow_count > 0) + { + int64 new_count = acc->overflow_count + hist->overflow_count; + + /* Grow acc's array if needed */ + if (new_count > acc->overflow_capacity) + { + int new_capacity = acc->overflow_capacity == 0 ? 16 : acc->overflow_capacity; + + while (new_capacity < new_count) + new_capacity *= 2; + acc->overflow_values = (double *) pg_realloc(acc->overflow_values, + new_capacity * sizeof(double)); + acc->overflow_capacity = new_capacity; + } + + /* Copy hist's overflow values to acc */ + for (i = 0; i < hist->overflow_count; i++) + acc->overflow_values[acc->overflow_count + i] = hist->overflow_values[i]; + + acc->overflow_count = new_count; + acc->overflow_sum += hist->overflow_sum; + } +} + +/* + * Comparison function for sorting overflow values (for qsort). + */ +static int +compareDoubles(const void *a, const void *b) +{ + double da = *(const double *) a; + double db = *(const double *) b; + + if (da < db) + return -1; + if (da > db) + return 1; + return 0; +} + +/* + * Get the latency value (in microseconds) at a given percentile (0-100). + * Returns the upper bound of the bucket containing the percentile. + */ +static double +getPercentileLatency(LatencyHistogram *hist, double percentile) +{ + int64 total_count = 0; + int64 target_count; + int64 cumulative = 0; + int i; + + /* Count total entries */ + for (i = 0; i < LATENCY_HIST_BUCKETS; i++) + total_count += hist->buckets[i]; + total_count += hist->overflow_count; + + if (total_count == 0) + return 0.0; + + /* Find the bucket containing the target percentile */ + target_count = (int64) ((percentile / 100.0) * total_count); + if (target_count >= total_count) + target_count = total_count - 1; + + for (i = 0; i < LATENCY_HIST_BUCKETS; i++) + { + cumulative += hist->buckets[i]; + if (cumulative > target_count) + { + /* Return upper bound of this bucket */ + if (i < LATENCY_HIST_FINE_BUCKETS) + { + /* Fine bucket: 0-100ms */ + return (double) ((i + 1) * LATENCY_HIST_FINE_WIDTH); + } + else if (i < LATENCY_HIST_FINE_BUCKETS + LATENCY_HIST_COARSE_BUCKETS) + { + /* Coarse bucket: 100ms-10s */ + return (double) (LATENCY_HIST_FINE_LIMIT + + (i - LATENCY_HIST_FINE_BUCKETS + 1) * LATENCY_HIST_COARSE_WIDTH); + } + else if (i < LATENCY_HIST_FINE_BUCKETS + LATENCY_HIST_COARSE_BUCKETS + + LATENCY_HIST_MEDIUM_BUCKETS) + { + /* Medium bucket: 10s-110s */ + return (double) (LATENCY_HIST_COARSE_LIMIT + + (i - LATENCY_HIST_FINE_BUCKETS - LATENCY_HIST_COARSE_BUCKETS + 1) * + LATENCY_HIST_MEDIUM_WIDTH); + } + else + { + /* Large bucket: 110s-1110s */ + return (double) (LATENCY_HIST_MEDIUM_LIMIT + + (i - LATENCY_HIST_FINE_BUCKETS - LATENCY_HIST_COARSE_BUCKETS - + LATENCY_HIST_MEDIUM_BUCKETS + 1) * LATENCY_HIST_LARGE_WIDTH); + } + } + } + + /* + * Percentile is in overflow region. + * Sort the overflow values and find the exact value at the target position. + */ + if (hist->overflow_count > 0) + { + int64 overflow_target = target_count - cumulative; + + /* Sort overflow values for accurate percentile calculation */ + qsort(hist->overflow_values, hist->overflow_count, sizeof(double), compareDoubles); + + /* Clamp to valid range */ + if (overflow_target < 0) + overflow_target = 0; + if (overflow_target >= hist->overflow_count) + overflow_target = hist->overflow_count - 1; + + return hist->overflow_values[overflow_target]; + } + + return 0.0; +} + /* * Initialize a StatsData struct to mostly zeroes, with its start time set to * the given value. @@ -1469,6 +1855,15 @@ initStats(StatsData *sd, pg_time_usec_t start) sd->deadlock_failures = 0; initSimpleStats(&sd->latency); initSimpleStats(&sd->lag); + if (report_percentiles) + { + sd->latency_hist = (LatencyHistogram *) pg_malloc(sizeof(LatencyHistogram)); + initLatencyHistogram(sd->latency_hist); + } + else + { + sd->latency_hist = NULL; + } } /* @@ -1504,6 +1899,10 @@ accumStats(StatsData *stats, bool skipped, double lat, double lag, addToSimpleStats(&stats->latency, lat); + /* record in histogram for percentile calculation */ + if (report_percentiles) + addToLatencyHistogram(stats->latency_hist, lat); + /* and possibly the same for schedule lag */ if (throttle_delay) addToSimpleStats(&stats->lag, lag); @@ -4079,6 +4478,11 @@ advanceConnectionState(TState *thread, CState *st, StatsData *agg) /* XXX could use a mutex here, but we choose not to */ addToSimpleStats(&command->stats, PG_TIME_GET_DOUBLE(now - st->stmt_begin)); + + /* Also record in histogram for percentile calculation */ + if (report_percentiles && command->latency_hist != NULL) + addToLatencyHistogram(command->latency_hist, + (double) (now - st->stmt_begin)); } /* Go ahead with next command, to be executed or skipped */ @@ -4727,7 +5131,7 @@ processXactStats(TState *thread, CState *st, pg_time_usec_t *now, double latency = 0.0, lag = 0.0; bool detailed = progress || throttle_delay || latency_limit || - use_log || per_script_stats; + use_log || per_script_stats || report_percentiles; if (detailed && !skipped && st->estatus == ESTATUS_NO_ERROR) { @@ -5642,6 +6046,7 @@ create_sql_command(PQExpBuffer buf, const char *source) my_command->varprefix = NULL; /* allocated later, if needed */ my_command->expr = NULL; initSimpleStats(&my_command->stats); + my_command->latency_hist = NULL; /* allocated later if --percentile -r */ my_command->prepname = NULL; /* set later, if needed */ return my_command; @@ -5657,6 +6062,13 @@ free_command(Command *command) pg_free(command->argv[i]); pg_free(command->varprefix); + /* Free latency histogram if allocated */ + if (command->latency_hist != NULL) + { + freeLatencyHistogram(command->latency_hist); + pg_free(command->latency_hist); + } + /* * It should also free expr recursively, but this is currently not needed * as only gset commands (which do not have an expression) are freed. @@ -6312,6 +6724,7 @@ printProgressReport(TState *threads, int64 test_start, pg_time_usec_t now, { mergeSimpleStats(&cur.latency, &threads[i].stats.latency); mergeSimpleStats(&cur.lag, &threads[i].stats.lag); + /* Note: histogram merge skipped here - percentiles not shown in progress */ cur.cnt += threads[i].stats.cnt; cur.skipped += threads[i].stats.skipped; cur.retries += threads[i].stats.retries; @@ -6496,7 +6909,7 @@ printResults(StatsData *total, latency_limit / 1000.0, latency_late, total->cnt, (total->cnt > 0) ? 100.0 * latency_late / total->cnt : 0.0); - if (throttle_delay || progress || latency_limit) + if (throttle_delay || progress || latency_limit || report_percentiles) printSimpleStats("latency", &total->latency); else { @@ -6506,6 +6919,20 @@ printResults(StatsData *total, failures > 0 ? " (including failures)" : ""); } + /* Report latency percentiles if requested */ + if (report_percentiles) + { + double *plist = num_percentiles > 0 ? percentile_list : default_percentiles; + int pcount = num_percentiles > 0 ? num_percentiles : (int) (sizeof(default_percentiles) / sizeof(default_percentiles[0])); + + for (int i = 0; i < pcount; i++) + { + printf("latency percentile %g = %.3f ms\n", + plist[i], + 0.001 * getPercentileLatency(total->latency_hist, plist[i])); + } + } + if (throttle_delay) { /* @@ -6602,6 +7029,20 @@ printResults(StatsData *total, } printSimpleStats(" - latency", &sstats->latency); + + /* Report per-script latency percentiles if requested */ + if (report_percentiles) + { + double *plist = num_percentiles > 0 ? percentile_list : default_percentiles; + int pcount = num_percentiles > 0 ? num_percentiles : (int) (sizeof(default_percentiles) / sizeof(default_percentiles[0])); + + for (int j = 0; j < pcount; j++) + { + printf(" - latency percentile %g = %.3f ms\n", + plist[j], + 0.001 * getPercentileLatency(sstats->latency_hist, plist[j])); + } + } } /* @@ -6637,6 +7078,22 @@ printResults(StatsData *total, (*commands)->failures, (*commands)->retries, (*commands)->first_line); + + /* Print per-command percentiles on a single line */ + if (report_percentiles && (*commands)->latency_hist != NULL) + { + double *plist = num_percentiles > 0 ? percentile_list : default_percentiles; + int pcount = num_percentiles > 0 ? num_percentiles : (int) (sizeof(default_percentiles) / sizeof(default_percentiles[0])); + + printf(" "); + for (int k = 0; k < pcount; k++) + { + printf("p%g: %.3f%s", + plist[k], + 0.001 * getPercentileLatency((*commands)->latency_hist, plist[k]), + (k < pcount - 1) ? ", " : "\n"); + } + } } } } @@ -6739,6 +7196,8 @@ main(int argc, char **argv) {"verbose-errors", no_argument, NULL, 15}, {"exit-on-abort", no_argument, NULL, 16}, {"debug", no_argument, NULL, 17}, + {"percentile", optional_argument, NULL, 18}, + {"debug-latency-multiplier", required_argument, NULL, 19}, {NULL, 0, NULL, 0} }; @@ -7092,6 +7551,24 @@ main(int argc, char **argv) case 17: /* debug */ pg_logging_increase_verbosity(); break; + case 18: /* percentile */ + benchmarking_option_set = true; + report_percentiles = true; + if (optarg != NULL) + { + if (!parse_percentile_list(optarg)) + exit(1); + } + break; + case 19: /* debug-latency-multiplier */ + { + double multiplier = atof(optarg); + + if (multiplier <= 0.0) + pg_fatal("invalid latency multiplier: \"%s\"", optarg); + debug_latency_multiplier = multiplier; + } + break; default: /* getopt_long already emitted a complaint */ pg_log_error_hint("Try \"%s --help\" for more information.", progname); @@ -7120,6 +7597,27 @@ main(int argc, char **argv) total_weight += sql_script[i].weight; } + /* + * Allocate per-command histograms if both --percentile and -r are enabled. + * Note: this can consume significant memory with many commands/threads. + */ + if (report_percentiles && report_per_command) + { + int total_commands = 0; + + for (i = 0; i < num_scripts; i++) + { + Command **commands = sql_script[i].commands; + + for (int j = 0; commands[j] != NULL; j++) + { + commands[j]->latency_hist = (LatencyHistogram *) pg_malloc(sizeof(LatencyHistogram)); + initLatencyHistogram(commands[j]->latency_hist); + total_commands++; + } + } + } + if (total_weight == 0 && !is_init_mode) pg_fatal("total script weight must not be zero"); @@ -7441,6 +7939,8 @@ main(int argc, char **argv) /* aggregate thread level stats */ mergeSimpleStats(&stats.latency, &thread->stats.latency); mergeSimpleStats(&stats.lag, &thread->stats.lag); + if (report_percentiles) + mergeLatencyHistogram(stats.latency_hist, thread->stats.latency_hist); stats.cnt += thread->stats.cnt; stats.skipped += thread->stats.skipped; stats.retries += thread->stats.retries; diff --git a/src/bin/pgbench/t/001_pgbench_with_server.pl b/src/bin/pgbench/t/001_pgbench_with_server.pl index f61dcf68234..9a49aa54cf5 100644 --- a/src/bin/pgbench/t/001_pgbench_with_server.pl +++ b/src/bin/pgbench/t/001_pgbench_with_server.pl @@ -1824,6 +1824,160 @@ BEGIN # Clean up $node->safe_psql('postgres', 'DROP TABLE counter;'); +# Test --percentile output +$node->pgbench( + '-n -t 100 -c 2 --percentile -b select-only', + 0, + [ + qr{processed: 200/200}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{latency percentile 90 = \d+\.\d+ ms}, + qr{latency percentile 99 = \d+\.\d+ ms}, + qr{latency percentile 99\.9 = \d+\.\d+ ms} + ], + [qr{^$}], + 'pgbench percentile output'); + +# Test --percentile with multiple threads +$node->pgbench( + '-n -t 50 -c 4 -j 2 --percentile -b select-only', + 0, + [ + qr{processed: 200/200}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{latency percentile 90 = \d+\.\d+ ms} + ], + [qr{^$}], + 'pgbench percentile with threads'); + +# Test --percentile with per-script stats (multiple scripts) +$node->pgbench( + '-n -t 50 -c 2 --percentile -b select-only@2 -b simple-update@1', + 0, + [ + qr{processed: 100/100}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{SQL script 1:.*\n.*- latency percentile 50 = \d+\.\d+ ms}s, + qr{SQL script 2:.*\n.*- latency percentile 50 = \d+\.\d+ ms}s + ], + [qr{^$}], + 'pgbench percentile with per-script stats'); + +# Test --percentile with custom percentile list +$node->pgbench( + '-n -t 100 -c 2 --percentile=25,75,95 -b select-only', + 0, + [ + qr{processed: 200/200}, + qr{latency percentile 25 = \d+\.\d+ ms}, + qr{latency percentile 75 = \d+\.\d+ ms}, + qr{latency percentile 95 = \d+\.\d+ ms} + ], + [qr{^$}], + 'pgbench custom percentile list'); + +# Test --percentile with fractional custom percentiles +$node->pgbench( + '-n -t 100 -c 2 --percentile=50,99.5,99.95 -b select-only', + 0, + [ + qr{processed: 200/200}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{latency percentile 99\.5 = \d+\.\d+ ms}, + qr{latency percentile 99\.95 = \d+\.\d+ ms} + ], + [qr{^$}], + 'pgbench custom fractional percentiles'); + +# Test --percentile with invalid percentile value (out of range) +$node->pgbench( + '-n -t 10 -c 1 --percentile=50,101 -b select-only', + 1, + [qr{^$}], + [qr{percentile must be between 0 and 100}], + 'pgbench invalid percentile value out of range'); + +# Test --percentile with invalid percentile value (zero) +$node->pgbench( + '-n -t 10 -c 1 --percentile=0,50 -b select-only', + 1, + [qr{^$}], + [qr{percentile must be between 0 and 100}], + 'pgbench invalid percentile value zero'); + +# Test --percentile with invalid percentile value (non-numeric) +$node->pgbench( + '-n -t 10 -c 1 --percentile=50,abc -b select-only', + 1, + [qr{^$}], + [qr{invalid percentile value}], + 'pgbench invalid percentile value non-numeric'); + +# Test --percentile with -r (per-command percentiles) +$node->pgbench( + '-n -t 50 -c 2 --percentile -r -b select-only', + 0, + [ + qr{processed: 100/100}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{statement latencies in milliseconds}, + qr{p50: \d+\.\d+, p90: \d+\.\d+, p99: \d+\.\d+} + ], + [qr{^$}], + 'pgbench per-command percentiles'); + +# Test --percentile with custom list and -r +$node->pgbench( + '-n -t 50 -c 2 --percentile=25,50,75 -r -b select-only', + 0, + [ + qr{processed: 100/100}, + qr{latency percentile 25 = \d+\.\d+ ms}, + qr{p25: \d+\.\d+, p50: \d+\.\d+, p75: \d+\.\d+} + ], + [qr{^$}], + 'pgbench per-command custom percentiles'); + +# Test --debug-latency-multiplier to simulate overflow (latencies > 20 min) +# Using a multiplier of 1e9 will push even microsecond latencies into overflow +$node->pgbench( + '-n -t 10 -c 1 --percentile=50,99 --debug-latency-multiplier=1e9 -b select-only', + 0, + [ + qr{processed: 10/10}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{latency percentile 99 = \d+\.\d+ ms} + ], + [qr{^$}], + 'pgbench latency multiplier for overflow testing'); + +# Test high percentiles (99.99, 99.999) with overflow to exercise tail retrieval +# With 100 transactions all in overflow, p99.99 should retrieve from sorted overflow array +$node->pgbench( + '-n -t 100 -c 1 --percentile=50,90,99,99.9,99.99,99.999 --debug-latency-multiplier=1e9 -b select-only', + 0, + [ + qr{processed: 100/100}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{latency percentile 99\.9 = \d+\.\d+ ms}, + qr{latency percentile 99\.99 = \d+\.\d+ ms}, + qr{latency percentile 99\.999 = \d+\.\d+ ms} + ], + [qr{^$}], + 'pgbench overflow high percentile retrieval'); + +# Test overflow with per-command stats to exercise command histogram overflow +$node->pgbench( + '-n -t 50 -c 1 --percentile=50,99.9 -r --debug-latency-multiplier=1e9 -b select-only', + 0, + [ + qr{processed: 50/50}, + qr{latency percentile 50 = \d+\.\d+ ms}, + qr{p50: \d+\.\d+.*p99\.9: \d+\.\d+} + ], + [qr{^$}], + 'pgbench overflow with per-command percentiles'); + # done $node->safe_psql('postgres', 'DROP TABLESPACE regress_pgbench_tap_1_ts'); $node->stop;