From 29ceac99ae06c5ade71e1f185faaa4de02e97677 Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Thu, 8 Jan 2026 18:58:20 -0500 Subject: [PATCH 1/7] Improved benchmarking suite --- Makefile | 4 +- benchmarks/baseline.json | 155 ++++++++++++++ profile_ddsketch.py | 450 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 582 insertions(+), 27 deletions(-) create mode 100644 benchmarks/baseline.json diff --git a/Makefile b/Makefile index 9cdc91f..d9a173e 100644 --- a/Makefile +++ b/Makefile @@ -30,7 +30,7 @@ install: install-env-run install-env-docs install-env-test install-env-run: @echo "👷‍♂️ $(BLUE)creating virtual environment $(PROJECT)-run$(NC)" pyenv local --unset - -pyenv virtualenv $(PROJECT)-run > /dev/null + -pyenv virtualenv $(word 1,$(PYTHON_VERSIONS)) $(PROJECT)-run > /dev/null pyenv local $(PROJECT)-run pip install --no-user -U pip > /dev/null pip install --no-user -r requirements.txt > /dev/null @@ -39,7 +39,7 @@ install-env-run: install-env-docs: @echo "👷‍♂️ $(BLUE)creating virtual environment $(PROJECT)-docs$(NC)" pyenv local --unset - -pyenv virtualenv $(PROJECT)-docs > /dev/null + -pyenv virtualenv $(word 1,$(PYTHON_VERSIONS)) $(PROJECT)-docs > /dev/null pyenv local $(PROJECT)-docs pip install --no-user -U pip > /dev/null pip install --no-user -r requirements.docs.txt > /dev/null diff --git a/benchmarks/baseline.json b/benchmarks/baseline.json new file mode 100644 index 0000000..58c48d8 --- /dev/null +++ b/benchmarks/baseline.json @@ -0,0 +1,155 @@ +{ + "timestamp": "2026-01-08T18:57:53.373734", + "name": "baseline", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-08T18:57:53.373709" + }, + "stats": [ + { + "function": "profile_ddsketch.py:18(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.565406003, + "cumtime": 16.1225542324, + "percall_tot": 1.565406003, + "percall_cum": 16.1225542324 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 3.4180910396, + "cumtime": 14.5549722284, + "percall_tot": 3.418091039600001e-07, + "percall_cum": 1.45549722284e-06 + }, + { + "function": "contiguous.py:51(add)", + "ncalls": 10000000, + "tottime": 4.7022346094, + "cumtime": 7.0226845072, + "percall_tot": 4.7022346094000004e-07, + "percall_cum": 7.022684507200001e-07 + }, + { + "function": "logarithmic.py:12(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 2.7030884928000005, + "cumtime": 4.114196681600001, + "percall_tot": 2.7030884928e-07, + "percall_cum": 4.1141966816000005e-07 + }, + { + "function": "contiguous.py:37(_get_position)", + "ncalls": 10003320, + "tottime": 1.7105854762, + "cumtime": 2.3211455057999997, + "percall_tot": 1.7100174287063457e-07, + "percall_cum": 2.3203747022605352e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.7740575776, + "cumtime": 0.7740575776, + "percall_tot": 7.7405750019425e-08, + "percall_cum": 7.7405750019425e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.6370525874000001, + "cumtime": 0.6370525874000001, + "percall_tot": 6.370525874000001e-08, + "percall_cum": 6.370525874000001e-08 + }, + { + "function": "~:0()", + "ncalls": 10003337, + "tottime": 0.6105629916, + "cumtime": 0.6105629916, + "percall_tot": 6.103591742666684e-08, + "percall_cum": 6.103591742666684e-08 + }, + { + "function": "core.py:128(quantile)", + "ncalls": 4, + "tottime": 0.000567737, + "cumtime": 0.0021306240000000002, + "percall_tot": 0.00014193425, + "percall_cum": 0.0005326560000000001 + }, + { + "function": "contiguous.py:181(get_count)", + "ncalls": 3321, + "tottime": 0.0008458188000000002, + "cumtime": 0.0015443888, + "percall_tot": 2.549099319598771e-07, + "percall_cum": 4.6540709090191636e-07 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 8.0284e-06, + "cumtime": 4.5377e-05, + "percall_tot": 8.0284e-06, + "percall_cum": 4.5377e-05 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 3.40906e-05, + "cumtime": 3.40906e-05, + "percall_tot": 3.40906e-05, + "percall_cum": 3.40906e-05 + }, + { + "function": "contiguous.py:19(__init__)", + "ncalls": 2, + "tottime": 9.3374e-06, + "cumtime": 3.0868200000000004e-05, + "percall_tot": 4.6687e-06, + "percall_cum": 1.5434100000000002e-05 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.95952e-05, + "cumtime": 1.95952e-05, + "percall_tot": 9.7976e-06, + "percall_cum": 9.7976e-06 + }, + { + "function": "logarithmic.py:18(compute_value_from_index)", + "ncalls": 4, + "tottime": 1.54358e-05, + "cumtime": 1.8498200000000002e-05, + "percall_tot": 3.85895e-06, + "percall_cum": 4.6245500000000005e-06 + }, + { + "function": "logarithmic.py:7(__init__)", + "ncalls": 1, + "tottime": 4.5042e-06, + "cumtime": 6.4804000000000015e-06, + "percall_tot": 4.5042e-06, + "percall_cum": 6.4804000000000015e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 3.0624000000000005e-06, + "cumtime": 3.0624000000000005e-06, + "percall_tot": 7.656000000000001e-07, + "percall_cum": 7.656000000000001e-07 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 1.9355999999999997e-06, + "cumtime": 1.9355999999999997e-06, + "percall_tot": 9.677999999999999e-07, + "percall_cum": 9.677999999999999e-07 + } + ] +} \ No newline at end of file diff --git a/profile_ddsketch.py b/profile_ddsketch.py index a086501..f4a769a 100644 --- a/profile_ddsketch.py +++ b/profile_ddsketch.py @@ -1,50 +1,450 @@ import cProfile import pstats import io -import numpy as np # Using numpy for faster random data generation +import json +import argparse +from pathlib import Path +from datetime import datetime +from typing import Dict, List, Tuple +import numpy as np from QuantileFlow.ddsketch.core import DDSketch -def run_sketch_operations(): - """Runs typical DDSketch operations for profiling.""" - print("Initializing DDSketch...") + +BENCHMARK_DIR = Path("benchmarks") +BENCHMARK_DIR.mkdir(exist_ok=True) + + +def run_sketch_operations(num_values: int = 10_000_000) -> Dict: + """Runs typical DDSketch operations for profiling. + + Returns: + Dict containing operation results and metrics + """ sketch = DDSketch(relative_accuracy=0.01) - num_values = 1_000_000 - print(f"Inserting {num_values} random values...") - # Generate random data more efficiently with numpy + # Generate random data data = np.random.rand(num_values) * 1000 + + # Track insertion time for value in data: sketch.insert(value) + # Compute quantiles quantiles_to_compute = [0.5, 0.9, 0.99, 0.999] - print(f"Computing quantiles: {quantiles_to_compute}...") + quantile_results = {} for q in quantiles_to_compute: try: quantile_value = sketch.quantile(q) - print(f"Quantile({q}): {quantile_value}") + quantile_results[q] = quantile_value except ValueError as e: - print(f"Error computing quantile {q}: {e}") + quantile_results[q] = f"Error: {e}" + + return { + 'num_values': num_values, + 'quantiles': quantile_results + } + + +def extract_profile_stats(profiler: cProfile.Profile, top_n: int = 20) -> List[Dict]: + """Extract key statistics from profiler.""" + stats = pstats.Stats(profiler) + stats_data = [] + + for func, (cc, nc, tt, ct, callers) in stats.stats.items(): + filename, line, func_name = func + stats_data.append({ + 'function': f"{Path(filename).name}:{line}({func_name})", + 'ncalls': nc, + 'tottime': tt, + 'cumtime': ct, + 'percall_tot': tt / nc if nc > 0 else 0, + 'percall_cum': ct / nc if nc > 0 else 0, + }) + + # Sort by cumulative time + stats_data.sort(key=lambda x: x['cumtime'], reverse=True) + return stats_data[:top_n] + + +def print_comparison_table(current: List[Dict], baseline: List[Dict]): + """Print side-by-side comparison of current run vs baseline.""" + print(f"\n{'=' * 130}") + print(f"{'Performance Comparison (Current vs Baseline)':^130}") + print('=' * 130) + + header = (f"{'Function':<45} " + f"{'Curr Time':>12} {'Base Time':>12} {'Diff':>10} {'Change %':>12} {'Calls Δ':>10}") + print(header) + print('-' * 130) + + # Create lookup dict for baseline + baseline_dict = {stat['function']: stat for stat in baseline} + + for curr_stat in current: + func_name = curr_stat['function'] + if len(func_name) > 42: + func_name = "..." + func_name[-42:] + + baseline_stat = baseline_dict.get(curr_stat['function']) + + if baseline_stat: + time_diff = curr_stat['cumtime'] - baseline_stat['cumtime'] + time_change = (time_diff / baseline_stat['cumtime'] * 100) if baseline_stat['cumtime'] > 0 else 0 + calls_diff = curr_stat['ncalls'] - baseline_stat['ncalls'] + + # Color coding for terminal (green for improvement, red for regression) + change_str = f"{time_change:+.1f}%" + if time_change < -5: # Significant improvement + change_str = f"\033[92m{change_str}\033[0m" + elif time_change > 5: # Significant regression + change_str = f"\033[91m{change_str}\033[0m" + + print(f"{func_name:<45} " + f"{curr_stat['cumtime']:>12.4f} " + f"{baseline_stat['cumtime']:>12.4f} " + f"{time_diff:>+10.4f} " + f"{change_str:>12} " + f"{calls_diff:>+10}") + else: + print(f"{func_name:<45} " + f"{curr_stat['cumtime']:>12.4f} " + f"{'N/A':>12} " + f"{'N/A':>10} " + f"{'NEW':>12} " + f"{'N/A':>10}") + + print('=' * 130) - print("Profiling complete.") -def profile(): - """Profiles the run_sketch_operations function.""" - profiler = cProfile.Profile() - profiler.enable() +def save_benchmark(stats_data: List[Dict], name: str, metadata: Dict): + """Save benchmark results to file.""" + benchmark = { + 'timestamp': datetime.now().isoformat(), + 'name': name, + 'metadata': metadata, + 'stats': stats_data + } - run_sketch_operations() + filepath = BENCHMARK_DIR / f"{name}.json" + with open(filepath, 'w') as f: + json.dump(benchmark, f, indent=2) - profiler.disable() + print(f"\n✓ Benchmark saved to: {filepath}") + + +def load_benchmark(name: str) -> Tuple[List[Dict], Dict]: + """Load benchmark from file.""" + filepath = BENCHMARK_DIR / f"{name}.json" + + if not filepath.exists(): + raise FileNotFoundError(f"Benchmark '{name}' not found at {filepath}") + + with open(filepath, 'r') as f: + data = json.load(f) + + return data['stats'], data['metadata'] + + +def list_benchmarks(): + """List all available benchmarks.""" + benchmarks = sorted(BENCHMARK_DIR.glob("*.json")) + + if not benchmarks: + print("\nNo benchmarks found.") + return + + print(f"\n{'Available Benchmarks':^90}") + print('=' * 90) + print(f"{'Name':<25} {'Date':<22} {'Values':<15} {'Trials':<10}") + print('-' * 90) - s = io.StringIO() - # Sort stats by cumulative time spent in the function and its callees - sortby = pstats.SortKey.CUMULATIVE - ps = pstats.Stats(profiler, stream=s).sort_stats(sortby) - ps.print_stats() + for bm_file in benchmarks: + with open(bm_file, 'r') as f: + data = json.load(f) + name = bm_file.stem + timestamp = datetime.fromisoformat(data['timestamp']).strftime('%Y-%m-%d %H:%M:%S') + num_values = data.get('metadata', {}).get('num_values', 'N/A') + num_trials = data.get('metadata', {}).get('num_trials', 1) + + values_str = f"{num_values:,}" if isinstance(num_values, int) else str(num_values) + print(f"{name:<25} {timestamp:<22} {values_str:<15} {num_trials:<10}") - print("\n--- cProfile Results (Sorted by Cumulative Time) ---") - print(s.getvalue()) + print('=' * 90) + + +def merge_trial_stats(all_trial_stats: List[List[Dict]]) -> Tuple[List[Dict], List[Dict]]: + """Merge statistics from multiple trials, computing mean and std dev. + + Args: + all_trial_stats: List of stats from each trial + + Returns: + Tuple of (mean_stats, std_stats) + """ + # Build a dict mapping function -> list of stats from each trial + func_stats = {} + + for trial_stats in all_trial_stats: + for stat in trial_stats: + func = stat['function'] + if func not in func_stats: + func_stats[func] = [] + func_stats[func].append(stat) + + # Compute mean and std dev for each function + mean_stats = [] + std_stats = [] + + for func, stats_list in func_stats.items(): + if not stats_list: + continue + + n = len(stats_list) + + # Compute means + mean_stat = { + 'function': func, + 'ncalls': int(np.mean([s['ncalls'] for s in stats_list])), + 'tottime': np.mean([s['tottime'] for s in stats_list]), + 'cumtime': np.mean([s['cumtime'] for s in stats_list]), + 'percall_tot': np.mean([s['percall_tot'] for s in stats_list]), + 'percall_cum': np.mean([s['percall_cum'] for s in stats_list]), + } + + # Compute standard deviations + std_stat = { + 'function': func, + 'tottime_std': np.std([s['tottime'] for s in stats_list]), + 'cumtime_std': np.std([s['cumtime'] for s in stats_list]), + } + + mean_stats.append(mean_stat) + std_stats.append(std_stat) + + # Sort by cumulative time + mean_stats.sort(key=lambda x: x['cumtime'], reverse=True) + + # Reorder std_stats to match mean_stats + func_to_std = {s['function']: s for s in std_stats} + std_stats = [func_to_std[s['function']] for s in mean_stats] + + return mean_stats, std_stats + + +def print_profile_table_with_std(stats_data: List[Dict], std_data: List[Dict], + title: str = "Profile Results", num_trials: int = 1): + """Print profile statistics with standard deviation.""" + print(f"\n{'=' * 120}") + print(f"{title:^120}") + if num_trials > 1: + print(f"{'(Averaged over ' + str(num_trials) + ' trials)':^120}") + print('=' * 120) + + if num_trials > 1: + header = f"{'Function':<45} {'Calls':>10} {'TotTime':>12} {'CumTime':>12} {'±StdDev':>10} {'Per Call':>12}" + else: + header = f"{'Function':<45} {'Calls':>10} {'TotTime':>12} {'CumTime':>12} {'Per Call':>12}" + print(header) + print('-' * 120) + + std_dict = {s['function']: s for s in std_data} if std_data else {} + + for stat in stats_data: + func_name = stat['function'] + if len(func_name) > 42: + func_name = "..." + func_name[-42:] + + if num_trials > 1 and stat['function'] in std_dict: + std = std_dict[stat['function']] + print(f"{func_name:<45} " + f"{stat['ncalls']:>10} " + f"{stat['tottime']:>12.4f} " + f"{stat['cumtime']:>12.4f} " + f"±{std['cumtime_std']:>9.4f} " + f"{stat['percall_cum']:>12.6f}") + else: + print(f"{func_name:<45} " + f"{stat['ncalls']:>10} " + f"{stat['tottime']:>12.4f} " + f"{stat['cumtime']:>12.4f} " + f"{stat['percall_cum']:>12.6f}") + + print('=' * 120) + + +def profile(num_values: int = 10_000_000, + save_as: str = None, + compare_to: str = None, + top_n: int = 20, + num_trials: int = 1): + """Profiles the run_sketch_operations function. + + Args: + num_values: Number of values to insert per trial + save_as: If provided, save results as a benchmark with this name + compare_to: If provided, compare results to this benchmark + top_n: Number of top functions to display + num_trials: Number of trials to run and average + """ + print(f"\n{'Starting Profile Run':^60}") + print(f"{'=' * 60}") + print(f"Values per trial: {num_values:,}") + print(f"Number of trials: {num_trials}") + print(f"Total operations: {num_values * num_trials:,}") + print(f"Top functions to show: {top_n}") + print('=' * 60) + + all_trial_stats = [] + + for trial in range(num_trials): + if num_trials > 1: + print(f"\n▶ Running trial {trial + 1}/{num_trials}...") + + profiler = cProfile.Profile() + profiler.enable() + + results = run_sketch_operations(num_values) + + profiler.disable() + + # Extract statistics for this trial + trial_stats = extract_profile_stats(profiler, top_n=top_n * 2) # Get more to ensure we have enough after averaging + all_trial_stats.append(trial_stats) + + # Merge stats from all trials + if num_trials > 1: + print(f"\n📊 Computing averages across {num_trials} trials...") + stats_data, std_data = merge_trial_stats(all_trial_stats) + stats_data = stats_data[:top_n] # Trim to requested top_n + std_data = std_data[:top_n] + else: + stats_data = all_trial_stats[0][:top_n] + std_data = [] + + # Print results + print("\n📊 Operation Results:") + print(f" • Values inserted per trial: {results['num_values']:,}") + if num_trials > 1: + print(f" • Total values processed: {results['num_values'] * num_trials:,}") + print(f" • Sample quantiles from last trial:") + for q, val in results['quantiles'].items(): + print(f" - Q({q}): {val}") + + # Show profile table + print_profile_table_with_std(stats_data, std_data, + "Current Run - Top Functions by Cumulative Time", + num_trials=num_trials) + + # Total time summary + total_time = sum(stat['cumtime'] for stat in stats_data[:5]) + if num_trials > 1 and std_data: + total_std = np.sqrt(sum(std['cumtime_std']**2 for std in std_data[:5])) + print(f"\n⏱️ Top 5 functions avg time: {total_time:.4f}s (±{total_std:.4f}s)") + cv = (total_std / total_time * 100) if total_time > 0 else 0 + print(f" Coefficient of variation: {cv:.2f}%", end="") + if cv < 5: + print(" ✓ (Very stable)") + elif cv < 10: + print(" (Stable)") + elif cv < 20: + print(" ⚠ (Moderate variance)") + else: + print(" ⚠ (High variance - consider more trials)") + else: + print(f"\n⏱️ Top 5 functions total time: {total_time:.4f}s") + + # Compare to baseline if requested + if compare_to: + try: + baseline_stats, baseline_metadata = load_benchmark(compare_to) + baseline_trials = baseline_metadata.get('num_trials', 1) + print(f"\n📈 Comparing to baseline: '{compare_to}'") + print(f" Baseline date: {baseline_metadata.get('timestamp', 'N/A')}") + print(f" Baseline values per trial: {baseline_metadata.get('num_values', 'N/A'):,}") + print(f" Baseline trials: {baseline_trials}") + print_comparison_table(stats_data, baseline_stats) + + # Summary statistics + curr_total = sum(s['cumtime'] for s in stats_data[:5]) + base_total = sum(s['cumtime'] for s in baseline_stats[:5]) + change = ((curr_total - base_total) / base_total * 100) if base_total > 0 else 0 + + print(f"\n📊 Overall Performance Change: {change:+.2f}%") + if change < -5: + print(" ✓ \033[92mSignificant improvement!\033[0m") + elif change > 5: + print(" ⚠ \033[91mPerformance regression detected\033[0m") + else: + print(" ≈ Similar performance") + + except FileNotFoundError as e: + print(f"\n⚠️ {e}") + + # Save as benchmark if requested + if save_as: + metadata = { + 'num_values': num_values, + 'num_trials': num_trials, + 'timestamp': datetime.now().isoformat(), + } + save_benchmark(stats_data, save_as, metadata) + + print("\n✅ Profiling complete.\n") + + +def main(): + parser = argparse.ArgumentParser( + description="Profile DDSketch operations with benchmarking capabilities", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Run basic profile with multiple trials + python profile_ddsketch.py --num-trials 5 + + # Save averaged baseline (recommended: use 3-5 trials) + python profile_ddsketch.py --num-trials 5 --save-as baseline + + # Compare against baseline + python profile_ddsketch.py --num-trials 5 --compare-to baseline + + # Save and compare in one run + python profile_ddsketch.py --num-trials 5 --save-as optimized --compare-to baseline + + # List all saved benchmarks + python profile_ddsketch.py --list + + # Quick test with fewer values + python profile_ddsketch.py --num-values 1000000 --num-trials 3 + """ + ) + + parser.add_argument('--num-values', type=int, default=10_000_000, + help='Number of values to insert per trial (default: 10,000,000)') + parser.add_argument('--num-trials', type=int, default=1, + help='Number of trials to run and average (default: 1, recommended: 3-5)') + parser.add_argument('--save-as', type=str, + help='Save results as a benchmark with this name') + parser.add_argument('--compare-to', type=str, + help='Compare results to this benchmark') + parser.add_argument('--list', action='store_true', + help='List all available benchmarks') + parser.add_argument('--top-n', type=int, default=20, + help='Number of top functions to display (default: 20)') + + args = parser.parse_args() + + if args.list: + list_benchmarks() + else: + profile( + num_values=args.num_values, + save_as=args.save_as, + compare_to=args.compare_to, + top_n=args.top_n, + num_trials=args.num_trials + ) + if __name__ == "__main__": - profile() \ No newline at end of file + main() \ No newline at end of file From ec234988479b35802191347fcc84511df0930a27 Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Thu, 8 Jan 2026 19:17:38 -0500 Subject: [PATCH 2/7] Replace calls to _get_position with a one liner to reduce overhead; ~10% speedup --- QuantileFlow/ddsketch/storage/contiguous.py | 28 +--- benchmarks/get_pos_oneliner.json | 147 ++++++++++++++++++++ profile_ddsketch.py | 5 +- 3 files changed, 155 insertions(+), 25 deletions(-) create mode 100644 benchmarks/get_pos_oneliner.json diff --git a/QuantileFlow/ddsketch/storage/contiguous.py b/QuantileFlow/ddsketch/storage/contiguous.py index 366053a..55176d8 100644 --- a/QuantileFlow/ddsketch/storage/contiguous.py +++ b/QuantileFlow/ddsketch/storage/contiguous.py @@ -34,20 +34,6 @@ def __init__(self, max_buckets: int = 2048): self.arr_index_of_min_bucket = 0 # Array index where min bucket is stored self.collapse_count = 0 # Number of times buckets have been collapsed - def _get_position(self, bucket_index: int) -> int: - """ - Get array position for bucket index using new mapping formula. - - Args: - bucket_index: The bucket index to map to array position. - - Returns: - The array position in the circular buffer. - """ - if self.min_index is None: - return 0 - return (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) - def add(self, bucket_index: int, count: int = 1): """ Add count to bucket_index using new collapsing strategy. @@ -72,7 +58,7 @@ def add(self, bucket_index: int, count: int = 1): # Handle insertion below current minimum if new_range > len(self.counts): # Range too large, collapse into min bucket - pos = self._get_position(self.min_index) + pos = (self.arr_index_of_min_bucket) % len(self.counts) self.counts[pos] += count self.collapse_count += 1 else: @@ -80,7 +66,7 @@ def add(self, bucket_index: int, count: int = 1): shift = self.min_index - bucket_index self.min_index = bucket_index self.arr_index_of_min_bucket = self.arr_index_of_min_bucket - shift - pos = self._get_position(bucket_index) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) self.counts[pos] = count self.num_buckets += 1 @@ -103,7 +89,7 @@ def add(self, bucket_index: int, count: int = 1): # Add collapsed values to new min bucket new_min = self.min_index + buckets_to_collapse - new_min_pos = self._get_position(new_min) + new_min_pos = (buckets_to_collapse + self.arr_index_of_min_bucket) % len(self.counts) self.counts[new_min_pos] += collapse_sum # Update tracking variables @@ -113,14 +99,14 @@ def add(self, bucket_index: int, count: int = 1): # Place new value self.max_index = bucket_index - pos = self._get_position(bucket_index) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) was_zero = self.counts[pos] == 0 self.counts[pos] += count if was_zero: self.num_buckets += 1 else: # Normal insertion within current range - pos = self._get_position(bucket_index) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) was_zero = self.counts[pos] == 0 self.counts[pos] += count if was_zero: @@ -143,7 +129,7 @@ def remove(self, bucket_index: int, count: int = 1) -> bool: return False if self.min_index <= bucket_index <= self.max_index: - pos = self._get_position(bucket_index) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) old_count = self.counts[pos] if old_count == 0: @@ -191,7 +177,7 @@ def get_count(self, bucket_index: int) -> int: if self.min_index is None or bucket_index < self.min_index or bucket_index > self.max_index: warnings.warn("Bucket index is out of range. Returning 0.", UserWarning) return 0 - pos = self._get_position(bucket_index) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) return int(self.counts[pos]) def merge(self, other: 'ContiguousStorage'): diff --git a/benchmarks/get_pos_oneliner.json b/benchmarks/get_pos_oneliner.json new file mode 100644 index 0000000..83befe0 --- /dev/null +++ b/benchmarks/get_pos_oneliner.json @@ -0,0 +1,147 @@ +{ + "timestamp": "2026-01-08T19:16:06.640239", + "name": "get_pos_oneliner", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-08T19:16:06.640223" + }, + "stats": [ + { + "function": "profile_ddsketch.py:17(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.6643150042, + "cumtime": 15.3238002182, + "percall_tot": 1.6643150042, + "percall_cum": 15.3238002182 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 3.5052579114, + "cumtime": 13.657695052200001, + "percall_tot": 3.5052579114e-07, + "percall_cum": 1.36576950522e-06 + }, + { + "function": "contiguous.py:37(add)", + "ncalls": 10000000, + "tottime": 5.303862841599999, + "cumtime": 5.9952392386, + "percall_tot": 5.3038628416e-07, + "percall_cum": 5.9952392386e-07 + }, + { + "function": "logarithmic.py:12(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 2.7194350750000007, + "cumtime": 4.1571979022, + "percall_tot": 2.7194350750000006e-07, + "percall_cum": 4.157197902200001e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.7966471508, + "cumtime": 0.7966471508, + "percall_tot": 7.966470711352928e-08, + "percall_cum": 7.966470711352928e-08 + }, + { + "function": "~:0()", + "ncalls": 10003315, + "tottime": 0.6915629914, + "cumtime": 0.6915629914, + "percall_tot": 6.913336317554129e-08, + "percall_cum": 6.913336317554129e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.6411181274000001, + "cumtime": 0.6411181274000001, + "percall_tot": 6.411181274e-08, + "percall_cum": 6.411181274e-08 + }, + { + "function": "core.py:128(quantile)", + "ncalls": 4, + "tottime": 0.0005669172000000001, + "cumtime": 0.0017470114000000003, + "percall_tot": 0.00014172930000000002, + "percall_cum": 0.00043675285000000007 + }, + { + "function": "contiguous.py:167(get_count)", + "ncalls": 3297, + "tottime": 0.0009659642000000001, + "cumtime": 0.0011525586, + "percall_tot": 2.927350312418232e-07, + "percall_cum": 3.491643699871712e-07 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 4.4151600000000006e-05, + "cumtime": 4.4151600000000006e-05, + "percall_tot": 4.4151600000000006e-05, + "percall_cum": 4.4151600000000006e-05 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 7.417200000000001e-06, + "cumtime": 4.3150400000000006e-05, + "percall_tot": 7.417200000000001e-06, + "percall_cum": 4.3150400000000006e-05 + }, + { + "function": "contiguous.py:19(__init__)", + "ncalls": 2, + "tottime": 9.9896e-06, + "cumtime": 2.8892999999999996e-05, + "percall_tot": 4.9948e-06, + "percall_cum": 1.4446499999999998e-05 + }, + { + "function": "logarithmic.py:18(compute_value_from_index)", + "ncalls": 4, + "tottime": 2.4520599999999997e-05, + "cumtime": 2.7535599999999998e-05, + "percall_tot": 6.130149999999999e-06, + "percall_cum": 6.8838999999999994e-06 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.6609e-05, + "cumtime": 1.6609e-05, + "percall_tot": 8.3045e-06, + "percall_cum": 8.3045e-06 + }, + { + "function": "logarithmic.py:7(__init__)", + "ncalls": 1, + "tottime": 4.3892e-06, + "cumtime": 6.8402e-06, + "percall_tot": 4.3892e-06, + "percall_cum": 6.8402e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 3.0150000000000004e-06, + "cumtime": 3.0150000000000004e-06, + "percall_tot": 7.537500000000001e-07, + "percall_cum": 7.537500000000001e-07 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 2.2944000000000004e-06, + "cumtime": 2.2944000000000004e-06, + "percall_tot": 1.1472000000000002e-06, + "percall_cum": 1.1472000000000002e-06 + } + ] +} \ No newline at end of file diff --git a/profile_ddsketch.py b/profile_ddsketch.py index f4a769a..e30aa51 100644 --- a/profile_ddsketch.py +++ b/profile_ddsketch.py @@ -1,6 +1,5 @@ import cProfile import pstats -import io import json import argparse from pathlib import Path @@ -199,8 +198,6 @@ def merge_trial_stats(all_trial_stats: List[List[Dict]]) -> Tuple[List[Dict], Li for func, stats_list in func_stats.items(): if not stats_list: continue - - n = len(stats_list) # Compute means mean_stat = { @@ -327,7 +324,7 @@ def profile(num_values: int = 10_000_000, print(f" • Values inserted per trial: {results['num_values']:,}") if num_trials > 1: print(f" • Total values processed: {results['num_values'] * num_trials:,}") - print(f" • Sample quantiles from last trial:") + print(" • Sample quantiles from last trial:") for q, val in results['quantiles'].items(): print(f" - Q({q}): {val}") From a015364d180dda4c30792d5ae35a4ebede4c8bc5 Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Sat, 10 Jan 2026 16:19:02 -0500 Subject: [PATCH 3/7] Remove redundant int casting in mapping; remove redundant positive value check --- .../ddsketch/mapping/cubic_interpolation.py | 7 +- .../ddsketch/mapping/linear_interpolation.py | 7 +- QuantileFlow/ddsketch/mapping/logarithmic.py | 4 +- .../compute_bucket_index_redundancy.json | 147 ++++++++++++++++++ benchmarks/numba_mapping.json | 147 ++++++++++++++++++ 5 files changed, 299 insertions(+), 13 deletions(-) create mode 100644 benchmarks/compute_bucket_index_redundancy.json create mode 100644 benchmarks/numba_mapping.json diff --git a/QuantileFlow/ddsketch/mapping/cubic_interpolation.py b/QuantileFlow/ddsketch/mapping/cubic_interpolation.py index b1f37f6..c8db29a 100644 --- a/QuantileFlow/ddsketch/mapping/cubic_interpolation.py +++ b/QuantileFlow/ddsketch/mapping/cubic_interpolation.py @@ -59,9 +59,6 @@ def _cubic_interpolation(self, s: float) -> float: return s * (self.C + s * (self.B + s * self.A)) def compute_bucket_index(self, value: float) -> int: - if value <= 0: - raise ValueError("Value must be positive") - # Get binary exponent and normalized significand exponent, significand = self._extract_exponent_and_significand(value) @@ -73,7 +70,7 @@ def compute_bucket_index(self, value: float) -> int: # where m is the optimal multiplier, e is the exponent, # P(s) is the cubic interpolation, and γ is (1+α)/(1-α) index = self.m * (exponent + interpolated) / self.log2_gamma - return int(math.ceil(index)) + return math.ceil(index) def compute_value_from_index(self, index: float) -> float: """ @@ -84,7 +81,7 @@ def compute_value_from_index(self, index: float) -> float: target = (index * self.log2_gamma) / self.m # Extract integer and fractional parts - e = int(math.floor(target)) + e = math.floor(target) f = target - e # If f is close to 0 or 1, return power of 2 directly diff --git a/QuantileFlow/ddsketch/mapping/linear_interpolation.py b/QuantileFlow/ddsketch/mapping/linear_interpolation.py index 97506a4..8660453 100644 --- a/QuantileFlow/ddsketch/mapping/linear_interpolation.py +++ b/QuantileFlow/ddsketch/mapping/linear_interpolation.py @@ -30,9 +30,6 @@ def _extract_exponent(self, value: float) -> tuple[int, float]: return exponent, normalized_fraction def compute_bucket_index(self, value: float) -> int: - if value <= 0: - raise ValueError("Value must be positive") - # Get binary exponent and normalized fraction exponent, normalized_fraction = self._extract_exponent(value) @@ -42,7 +39,7 @@ def compute_bucket_index(self, value: float) -> int: # Compute final index log2_value = exponent + log2_fraction - return int(math.ceil(log2_value / self.log_gamma)) + return math.ceil(log2_value / self.log_gamma) def compute_value_from_index(self, index: int) -> float: """ @@ -58,7 +55,7 @@ def compute_value_from_index(self, index: int) -> float: log2_value = index * self.log_gamma # Extract the integer and fractional parts of log2_value - exponent = int(math.floor(log2_value) + 1) + exponent = math.floor(log2_value) + 1 mantissa = (log2_value - exponent + 2) / 2.0 # Use ldexp to efficiently compute 2^exponent * mantissa diff --git a/QuantileFlow/ddsketch/mapping/logarithmic.py b/QuantileFlow/ddsketch/mapping/logarithmic.py index 3fea7b1..fd25332 100644 --- a/QuantileFlow/ddsketch/mapping/logarithmic.py +++ b/QuantileFlow/ddsketch/mapping/logarithmic.py @@ -10,10 +10,8 @@ def __init__(self, relative_accuracy: float): self.multiplier = 1 / math.log(self.gamma) def compute_bucket_index(self, value: float) -> int: - if value <= 0: - raise ValueError(f"Value must be positive, got {value}") # ceil(log_gamma(value) = ceil(log(value) / log(gamma)) - return int(math.ceil(math.log(value) * self.multiplier)) + return math.ceil(math.log(value) * self.multiplier) def compute_value_from_index(self, index: int) -> float: # Return geometric mean of bucket boundaries diff --git a/benchmarks/compute_bucket_index_redundancy.json b/benchmarks/compute_bucket_index_redundancy.json new file mode 100644 index 0000000..cb79230 --- /dev/null +++ b/benchmarks/compute_bucket_index_redundancy.json @@ -0,0 +1,147 @@ +{ + "timestamp": "2026-01-10T16:17:23.669749", + "name": "compute_bucket_index_redundancy", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-10T16:17:23.669737" + }, + "stats": [ + { + "function": "profile_ddsketch.py:17(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.3887002275999998, + "cumtime": 13.0266837678, + "percall_tot": 1.3887002275999998, + "percall_cum": 13.0266837678 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 3.2283954910000006, + "cumtime": 11.6364473224, + "percall_tot": 3.228395491e-07, + "percall_cum": 1.1636447322399998e-06 + }, + { + "function": "contiguous.py:37(add)", + "ncalls": 10000000, + "tottime": 4.568145479, + "cumtime": 5.2062685638, + "percall_tot": 4.568145479000001e-07, + "percall_cum": 5.206268563800001e-07 + }, + { + "function": "logarithmic.py:12(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 1.903147694, + "cumtime": 3.2017832675999998, + "percall_tot": 1.9031476940000004e-07, + "percall_cum": 3.2017832676e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.7143321826000001, + "cumtime": 0.7143321826000001, + "percall_tot": 7.14332111166789e-08, + "percall_cum": 7.14332111166789e-08 + }, + { + "function": "~:0()", + "ncalls": 10003244, + "tottime": 0.6382800820000002, + "cumtime": 0.6382800820000002, + "percall_tot": 6.380730510039856e-08, + "percall_cum": 6.380730510039856e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.584304876, + "cumtime": 0.584304876, + "percall_tot": 5.8430487600000004e-08, + "percall_cum": 5.8430487600000004e-08 + }, + { + "function": "core.py:128(quantile)", + "ncalls": 4, + "tottime": 0.0004936750000000001, + "cumtime": 0.0014924204000000003, + "percall_tot": 0.00012341875000000001, + "percall_cum": 0.0003731051000000001 + }, + { + "function": "contiguous.py:167(get_count)", + "ncalls": 3223, + "tottime": 0.0008286914000000001, + "cumtime": 0.0009856886, + "percall_tot": 2.571817063873498e-07, + "percall_cum": 3.0584562750259306e-07 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 1.12378e-05, + "cumtime": 4.37974e-05, + "percall_tot": 1.12378e-05, + "percall_cum": 4.37974e-05 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 3.56454e-05, + "cumtime": 3.56454e-05, + "percall_tot": 3.56454e-05, + "percall_cum": 3.56454e-05 + }, + { + "function": "contiguous.py:19(__init__)", + "ncalls": 2, + "tottime": 1.10118e-05, + "cumtime": 2.7115199999999997e-05, + "percall_tot": 5.5059e-06, + "percall_cum": 1.3557599999999999e-05 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.4002600000000002e-05, + "cumtime": 1.4002600000000002e-05, + "percall_tot": 7.001300000000001e-06, + "percall_cum": 7.001300000000001e-06 + }, + { + "function": "logarithmic.py:16(compute_value_from_index)", + "ncalls": 4, + "tottime": 1.12992e-05, + "cumtime": 1.3056800000000002e-05, + "percall_tot": 2.8248e-06, + "percall_cum": 3.2642000000000004e-06 + }, + { + "function": "logarithmic.py:7(__init__)", + "ncalls": 1, + "tottime": 3.959400000000001e-06, + "cumtime": 5.4444e-06, + "percall_tot": 3.959400000000001e-06, + "percall_cum": 5.4444e-06 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 2.1008e-06, + "cumtime": 2.1008e-06, + "percall_tot": 1.0504e-06, + "percall_cum": 1.0504e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 1.7576000000000003e-06, + "cumtime": 1.7576000000000003e-06, + "percall_tot": 4.394000000000001e-07, + "percall_cum": 4.394000000000001e-07 + } + ] +} \ No newline at end of file diff --git a/benchmarks/numba_mapping.json b/benchmarks/numba_mapping.json new file mode 100644 index 0000000..7982e63 --- /dev/null +++ b/benchmarks/numba_mapping.json @@ -0,0 +1,147 @@ +{ + "timestamp": "2026-01-10T16:02:37.248118", + "name": "numba_mapping", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-10T16:02:37.248088" + }, + "stats": [ + { + "function": "profile_ddsketch.py:17(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.5803080648, + "cumtime": 15.3215635052, + "percall_tot": 1.5803080648, + "percall_cum": 15.3215635052 + }, + { + "function": "core.py:80(insert)", + "ncalls": 10000000, + "tottime": 3.561579956, + "cumtime": 13.739444904199999, + "percall_tot": 3.561579956e-07, + "percall_cum": 1.3739444904200001e-06 + }, + { + "function": "contiguous.py:37(add)", + "ncalls": 10000000, + "tottime": 4.967856963600001, + "cumtime": 5.6552094486, + "percall_tot": 4.9678569636e-07, + "percall_cum": 5.6552094486e-07 + }, + { + "function": "logarithmic.py:35(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 3.040597505, + "cumtime": 4.522655499600001, + "percall_tot": 3.0405975050000003e-07, + "percall_cum": 4.5226554996000005e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.775530691, + "cumtime": 0.775530691, + "percall_tot": 7.755306134469388e-08, + "percall_cum": 7.755306134469388e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.7065290746, + "cumtime": 0.7065290746, + "percall_tot": 7.065290746000001e-08, + "percall_cum": 7.065290746000001e-08 + }, + { + "function": "~:0()", + "ncalls": 10003501, + "tottime": 0.6875303102, + "cumtime": 0.6875303102, + "percall_tot": 6.87289631095811e-08, + "percall_cum": 6.87289631095811e-08 + }, + { + "function": "core.py:131(quantile)", + "ncalls": 4, + "tottime": 0.0005834166000000001, + "cumtime": 0.0017633896, + "percall_tot": 0.00014585415000000003, + "percall_cum": 0.0004408474 + }, + { + "function": "contiguous.py:167(get_count)", + "ncalls": 3482, + "tottime": 0.0009676952, + "cumtime": 0.0011455204, + "percall_tot": 2.784679111662184e-07, + "percall_cum": 3.2965915502525064e-07 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 4.857280000000001e-05, + "cumtime": 4.857280000000001e-05, + "percall_tot": 4.857280000000001e-05, + "percall_cum": 4.857280000000001e-05 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 1.14384e-05, + "cumtime": 4.71466e-05, + "percall_tot": 1.14384e-05, + "percall_cum": 4.71466e-05 + }, + { + "function": "logarithmic.py:47(compute_value_from_index)", + "ncalls": 4, + "tottime": 3.120040000000001e-05, + "cumtime": 3.44526e-05, + "percall_tot": 7.800100000000002e-06, + "percall_cum": 8.61315e-06 + }, + { + "function": "contiguous.py:19(__init__)", + "ncalls": 2, + "tottime": 1.0658400000000001e-05, + "cumtime": 2.9616400000000003e-05, + "percall_tot": 5.329200000000001e-06, + "percall_cum": 1.4808200000000002e-05 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.6537600000000002e-05, + "cumtime": 1.6537600000000002e-05, + "percall_tot": 8.268800000000001e-06, + "percall_cum": 8.268800000000001e-06 + }, + { + "function": "logarithmic.py:29(__init__)", + "ncalls": 1, + "tottime": 4.3208e-06, + "cumtime": 6.0918e-06, + "percall_tot": 4.3208e-06, + "percall_cum": 6.0918e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 3.2522000000000004e-06, + "cumtime": 3.2522000000000004e-06, + "percall_tot": 8.130500000000001e-07, + "percall_cum": 8.130500000000001e-07 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 2.4204e-06, + "cumtime": 2.4204e-06, + "percall_tot": 1.2102e-06, + "percall_cum": 1.2102e-06 + } + ] +} \ No newline at end of file From cc016c1f1107da79421cbf1ab44a23f89a53c9c0 Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Sat, 10 Jan 2026 16:28:46 -0500 Subject: [PATCH 4/7] Cached len(self.counts) in contiguous storage --- QuantileFlow/ddsketch/storage/contiguous.py | 25 ++-- benchmarks/cache_len_self_counts.json | 139 ++++++++++++++++++++ 2 files changed, 152 insertions(+), 12 deletions(-) create mode 100644 benchmarks/cache_len_self_counts.json diff --git a/QuantileFlow/ddsketch/storage/contiguous.py b/QuantileFlow/ddsketch/storage/contiguous.py index 55176d8..b2bd949 100644 --- a/QuantileFlow/ddsketch/storage/contiguous.py +++ b/QuantileFlow/ddsketch/storage/contiguous.py @@ -28,6 +28,7 @@ def __init__(self, max_buckets: int = 2048): super().__init__(max_buckets, BucketManagementStrategy.FIXED) self.total_count = 0 self.counts = np.zeros(max_buckets, dtype=np.int64) + self.max_buckets = max_buckets self.min_index = None # Minimum bucket index seen self.max_index = None # Maximum bucket index seen self.num_buckets = 0 # Number of non-zero buckets @@ -56,9 +57,9 @@ def add(self, bucket_index: int, count: int = 1): if bucket_index < self.min_index: new_range = self.max_index - bucket_index + 1 # Handle insertion below current minimum - if new_range > len(self.counts): + if new_range > self.max_buckets: # Range too large, collapse into min bucket - pos = (self.arr_index_of_min_bucket) % len(self.counts) + pos = (self.arr_index_of_min_bucket) % self.max_buckets self.counts[pos] += count self.collapse_count += 1 else: @@ -66,13 +67,13 @@ def add(self, bucket_index: int, count: int = 1): shift = self.min_index - bucket_index self.min_index = bucket_index self.arr_index_of_min_bucket = self.arr_index_of_min_bucket - shift - pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % self.max_buckets self.counts[pos] = count self.num_buckets += 1 elif bucket_index > self.max_index: new_range = bucket_index - self.min_index + 1 - if new_range > len(self.counts): + if new_range > self.max_buckets: # Handle insertion above current maximum buckets_to_collapse = bucket_index - self.max_index # Collapse lowest buckets @@ -89,7 +90,7 @@ def add(self, bucket_index: int, count: int = 1): # Add collapsed values to new min bucket new_min = self.min_index + buckets_to_collapse - new_min_pos = (buckets_to_collapse + self.arr_index_of_min_bucket) % len(self.counts) + new_min_pos = (buckets_to_collapse + self.arr_index_of_min_bucket) % self.max_buckets self.counts[new_min_pos] += collapse_sum # Update tracking variables @@ -99,14 +100,14 @@ def add(self, bucket_index: int, count: int = 1): # Place new value self.max_index = bucket_index - pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % self.max_buckets was_zero = self.counts[pos] == 0 self.counts[pos] += count if was_zero: self.num_buckets += 1 else: # Normal insertion within current range - pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % self.max_buckets was_zero = self.counts[pos] == 0 self.counts[pos] += count if was_zero: @@ -129,7 +130,7 @@ def remove(self, bucket_index: int, count: int = 1) -> bool: return False if self.min_index <= bucket_index <= self.max_index: - pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % self.max_buckets old_count = self.counts[pos] if old_count == 0: @@ -146,7 +147,7 @@ def remove(self, bucket_index: int, count: int = 1) -> bool: elif bucket_index == self.min_index: # Find new minimum index for i in range(self.max_index - self.min_index + 1): - pos = (self.arr_index_of_min_bucket + i) % len(self.counts) + pos = (self.arr_index_of_min_bucket + i) % self.max_buckets if self.counts[pos] > 0: self.min_index += i self.arr_index_of_min_bucket = pos @@ -154,7 +155,7 @@ def remove(self, bucket_index: int, count: int = 1) -> bool: elif bucket_index == self.max_index: # Find new maximum index for i in range(self.max_index - self.min_index + 1): - pos = (self.arr_index_of_min_bucket + (self.max_index - self.min_index - i)) % len(self.counts) + pos = (self.arr_index_of_min_bucket + (self.max_index - self.min_index - i)) % self.max_buckets if self.counts[pos] > 0: self.max_index -= i break @@ -177,7 +178,7 @@ def get_count(self, bucket_index: int) -> int: if self.min_index is None or bucket_index < self.min_index or bucket_index > self.max_index: warnings.warn("Bucket index is out of range. Returning 0.", UserWarning) return 0 - pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % len(self.counts) + pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % self.max_buckets return int(self.counts[pos]) def merge(self, other: 'ContiguousStorage'): @@ -192,7 +193,7 @@ def merge(self, other: 'ContiguousStorage'): # Add each non-zero bucket for i in range(other.max_index - other.min_index + 1): - pos = (other.arr_index_of_min_bucket + i) % len(other.counts) + pos = (other.arr_index_of_min_bucket + i) % other.max_buckets if other.counts[pos] > 0: bucket_index = other.min_index + i self.add(bucket_index, int(other.counts[pos])) diff --git a/benchmarks/cache_len_self_counts.json b/benchmarks/cache_len_self_counts.json new file mode 100644 index 0000000..dfcc096 --- /dev/null +++ b/benchmarks/cache_len_self_counts.json @@ -0,0 +1,139 @@ +{ + "timestamp": "2026-01-10T16:26:33.986951", + "name": "cache_len_self_counts", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-10T16:26:33.986892" + }, + "stats": [ + { + "function": "profile_ddsketch.py:17(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.3366227698000002, + "cumtime": 11.3580648152, + "percall_tot": 1.3366227698000002, + "percall_cum": 11.3580648152 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 3.1263041402000002, + "cumtime": 10.0199071374, + "percall_tot": 3.1263041402e-07, + "percall_cum": 1.0019907137400002e-06 + }, + { + "function": "contiguous.py:38(add)", + "ncalls": 10000000, + "tottime": 3.8569736876, + "cumtime": 3.8569736876, + "percall_tot": 3.8569736875999994e-07, + "percall_cum": 3.8569736875999994e-07 + }, + { + "function": "logarithmic.py:12(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 1.8100902910000003, + "cumtime": 3.0366293096, + "percall_tot": 1.810090291e-07, + "percall_cum": 3.0366293096000003e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.671522876, + "cumtime": 0.671522876, + "percall_tot": 6.71522808847719e-08, + "percall_cum": 6.71522808847719e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.555017911, + "cumtime": 0.555017911, + "percall_tot": 5.550179110000001e-08, + "percall_cum": 5.550179110000001e-08 + }, + { + "function": "core.py:128(quantile)", + "ncalls": 4, + "tottime": 0.0006492008, + "cumtime": 0.0014758362000000002, + "percall_tot": 0.0001623002, + "percall_cum": 0.00036895905000000005 + }, + { + "function": "contiguous.py:168(get_count)", + "ncalls": 3175, + "tottime": 0.0007850656000000001, + "cumtime": 0.0007850656000000001, + "percall_tot": 2.450894805204026e-07, + "percall_cum": 2.450894805204026e-07 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 8.736200000000001e-06, + "cumtime": 5.907179999999999e-05, + "percall_tot": 8.736200000000001e-06, + "percall_cum": 5.907179999999999e-05 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 4.9611800000000005e-05, + "cumtime": 4.9611800000000005e-05, + "percall_tot": 4.9611800000000005e-05, + "percall_cum": 4.9611800000000005e-05 + }, + { + "function": "contiguous.py:19(__init__)", + "ncalls": 2, + "tottime": 1.16142e-05, + "cumtime": 4.36792e-05, + "percall_tot": 5.8071e-06, + "percall_cum": 2.18396e-05 + }, + { + "function": "logarithmic.py:16(compute_value_from_index)", + "ncalls": 4, + "tottime": 3.6364999999999995e-05, + "cumtime": 4.15698e-05, + "percall_tot": 9.091249999999999e-06, + "percall_cum": 1.039245e-05 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 2.748e-05, + "cumtime": 2.748e-05, + "percall_tot": 1.374e-05, + "percall_cum": 1.374e-05 + }, + { + "function": "logarithmic.py:7(__init__)", + "ncalls": 1, + "tottime": 4.8880000000000005e-06, + "cumtime": 6.656400000000001e-06, + "percall_tot": 4.8880000000000005e-06, + "percall_cum": 6.656400000000001e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 5.204800000000001e-06, + "cumtime": 5.204800000000001e-06, + "percall_tot": 1.3012000000000003e-06, + "percall_cum": 1.3012000000000003e-06 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 4.584999999999999e-06, + "cumtime": 4.584999999999999e-06, + "percall_tot": 2.2924999999999997e-06, + "percall_cum": 2.2924999999999997e-06 + } + ] +} \ No newline at end of file From 91c3c865d17d741d1ca823d2ab7fd11b4edec24d Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Sat, 10 Jan 2026 16:38:25 -0500 Subject: [PATCH 5/7] Cleaned up insert comparisons --- QuantileFlow/ddsketch/core.py | 4 +- benchmarks/5streamline_insert_method.json | 139 ++++++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 benchmarks/5streamline_insert_method.json diff --git a/QuantileFlow/ddsketch/core.py b/QuantileFlow/ddsketch/core.py index f3d9b3c..1a623a9 100644 --- a/QuantileFlow/ddsketch/core.py +++ b/QuantileFlow/ddsketch/core.py @@ -89,10 +89,10 @@ def insert(self, value: Union[int, float]) -> None: elif value > 0: bucket_idx = self.mapping.compute_bucket_index(value) self.positive_store.add(bucket_idx) - elif value < 0 and self.cont_neg: + elif self.cont_neg: bucket_idx = self.mapping.compute_bucket_index(-value) self.negative_store.add(bucket_idx) - elif value < 0: + else: raise ValueError("Negative values not supported when cont_neg is False") self.count += 1 diff --git a/benchmarks/5streamline_insert_method.json b/benchmarks/5streamline_insert_method.json new file mode 100644 index 0000000..0b4645b --- /dev/null +++ b/benchmarks/5streamline_insert_method.json @@ -0,0 +1,139 @@ +{ + "timestamp": "2026-01-10T16:37:06.396876", + "name": "5streamline_insert_method", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-10T16:37:06.396868" + }, + "stats": [ + { + "function": "profile_ddsketch.py:17(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.3612975308000004, + "cumtime": 11.6981027108, + "percall_tot": 1.3612975308000004, + "percall_cum": 11.6981027108 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 3.2169409506, + "cumtime": 10.3354853876, + "percall_tot": 3.2169409506e-07, + "percall_cum": 1.0335485387600001e-06 + }, + { + "function": "contiguous.py:38(add)", + "ncalls": 10000000, + "tottime": 3.9631574034000003, + "cumtime": 3.9631574034000003, + "percall_tot": 3.9631574034000003e-07, + "percall_cum": 3.9631574034000003e-07 + }, + { + "function": "logarithmic.py:12(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 1.8843829416000002, + "cumtime": 3.1553870336000003, + "percall_tot": 1.8843829416000001e-07, + "percall_cum": 3.1553870336e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.6960204540000001, + "cumtime": 0.6960204540000001, + "percall_tot": 6.960203843979616e-08, + "percall_cum": 6.960203843979616e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.5749850052000001, + "cumtime": 0.5749850052000001, + "percall_tot": 5.749850052000001e-08, + "percall_cum": 5.749850052000001e-08 + }, + { + "function": "core.py:128(quantile)", + "ncalls": 4, + "tottime": 0.0005329822000000001, + "cumtime": 0.0012824054, + "percall_tot": 0.00013324555000000001, + "percall_cum": 0.00032060135 + }, + { + "function": "contiguous.py:168(get_count)", + "ncalls": 3494, + "tottime": 0.0007340835999999999, + "cumtime": 0.0007340835999999999, + "percall_tot": 2.0989430127963593e-07, + "percall_cum": 2.0989430127963593e-07 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 5.505400000000001e-06, + "cumtime": 3.7387e-05, + "percall_tot": 5.505400000000001e-06, + "percall_cum": 3.7387e-05 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 3.31294e-05, + "cumtime": 3.31294e-05, + "percall_tot": 3.31294e-05, + "percall_cum": 3.31294e-05 + }, + { + "function": "contiguous.py:19(__init__)", + "ncalls": 2, + "tottime": 8.434000000000001e-06, + "cumtime": 2.6972e-05, + "percall_tot": 4.2170000000000005e-06, + "percall_cum": 1.3486e-05 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.6823e-05, + "cumtime": 1.6823e-05, + "percall_tot": 8.4115e-06, + "percall_cum": 8.4115e-06 + }, + { + "function": "logarithmic.py:16(compute_value_from_index)", + "ncalls": 4, + "tottime": 1.3271000000000002e-05, + "cumtime": 1.53396e-05, + "percall_tot": 3.3177500000000005e-06, + "percall_cum": 3.8349e-06 + }, + { + "function": "logarithmic.py:7(__init__)", + "ncalls": 1, + "tottime": 3.5424000000000006e-06, + "cumtime": 4.9096e-06, + "percall_tot": 3.5424000000000006e-06, + "percall_cum": 4.9096e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 2.0686e-06, + "cumtime": 2.0686e-06, + "percall_tot": 5.1715e-07, + "percall_cum": 5.1715e-07 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 1.7150000000000003e-06, + "cumtime": 1.7150000000000003e-06, + "percall_tot": 8.575000000000002e-07, + "percall_cum": 8.575000000000002e-07 + } + ] +} \ No newline at end of file From b3e245f84dc6190739b841a6ca822b02db50dac0 Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Sat, 10 Jan 2026 17:32:03 -0500 Subject: [PATCH 6/7] Added slots --- QuantileFlow/ddsketch/mapping/logarithmic.py | 4 +- QuantileFlow/ddsketch/storage/contiguous.py | 4 + benchmarks/6add_slots.json | 139 ++++++++++++ benchmarks/7reword_value_comparisons.json | 139 ++++++++++++ profile_ddsketch.py | 215 ++++++++++++++++++- 5 files changed, 491 insertions(+), 10 deletions(-) create mode 100644 benchmarks/6add_slots.json create mode 100644 benchmarks/7reword_value_comparisons.json diff --git a/QuantileFlow/ddsketch/mapping/logarithmic.py b/QuantileFlow/ddsketch/mapping/logarithmic.py index fd25332..a495921 100644 --- a/QuantileFlow/ddsketch/mapping/logarithmic.py +++ b/QuantileFlow/ddsketch/mapping/logarithmic.py @@ -4,7 +4,9 @@ from .base import MappingScheme class LogarithmicMapping(MappingScheme): - def __init__(self, relative_accuracy: float): + __slots__ = ('relative_accuracy', 'gamma', 'multiplier') + + def __init__(self, relative_accuracy: float): self.relative_accuracy = relative_accuracy self.gamma = (1 + relative_accuracy) / (1 - relative_accuracy) self.multiplier = 1 / math.log(self.gamma) diff --git a/QuantileFlow/ddsketch/storage/contiguous.py b/QuantileFlow/ddsketch/storage/contiguous.py index b2bd949..92ead6f 100644 --- a/QuantileFlow/ddsketch/storage/contiguous.py +++ b/QuantileFlow/ddsketch/storage/contiguous.py @@ -15,6 +15,10 @@ class ContiguousStorage(Storage): - If inserting below min: collapse if range too large, otherwise adjust min - If inserting above max: collapse lowest buckets to make room """ + + __slots__ = ('total_count', 'counts', 'min_index', 'max_index', + 'num_buckets', 'arr_index_of_min_bucket', 'collapse_count', + 'max_buckets', 'bucket_mask', 'strategy') def __init__(self, max_buckets: int = 2048): """ diff --git a/benchmarks/6add_slots.json b/benchmarks/6add_slots.json new file mode 100644 index 0000000..243e776 --- /dev/null +++ b/benchmarks/6add_slots.json @@ -0,0 +1,139 @@ +{ + "timestamp": "2026-01-10T16:42:43.578971", + "name": "6add_slots", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-10T16:42:43.578962" + }, + "stats": [ + { + "function": "profile_ddsketch.py:17(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.33988225, + "cumtime": 11.5264426936, + "percall_tot": 1.33988225, + "percall_cum": 11.5264426936 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 3.3348853683999997, + "cumtime": 10.185277087, + "percall_tot": 3.3348853684e-07, + "percall_cum": 1.0185277087e-06 + }, + { + "function": "contiguous.py:42(add)", + "ncalls": 10000000, + "tottime": 3.8266771040000003, + "cumtime": 3.8266771040000003, + "percall_tot": 3.826677104e-07, + "percall_cum": 3.826677104e-07 + }, + { + "function": "logarithmic.py:14(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 1.7708338908, + "cumtime": 3.0237146146000002, + "percall_tot": 1.7708338908000002e-07, + "percall_cum": 3.0237146146e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.6956715138, + "cumtime": 0.6956715138, + "percall_tot": 6.956714442328556e-08, + "percall_cum": 6.956714442328556e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.5572106932, + "cumtime": 0.5572106932, + "percall_tot": 5.572106932e-08, + "percall_cum": 5.572106932e-08 + }, + { + "function": "core.py:128(quantile)", + "ncalls": 4, + "tottime": 0.000548316, + "cumtime": 0.0012531198, + "percall_tot": 0.000137079, + "percall_cum": 0.00031327995 + }, + { + "function": "contiguous.py:172(get_count)", + "ncalls": 3324, + "tottime": 0.0006952066000000001, + "cumtime": 0.0006952066000000001, + "percall_tot": 2.092879594892521e-07, + "percall_cum": 2.092879594892521e-07 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 5.0892e-06, + "cumtime": 3.0236799999999995e-05, + "percall_tot": 5.0892e-06, + "percall_cum": 3.0236799999999995e-05 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 2.64168e-05, + "cumtime": 2.64168e-05, + "percall_tot": 2.64168e-05, + "percall_cum": 2.64168e-05 + }, + { + "function": "contiguous.py:23(__init__)", + "ncalls": 2, + "tottime": 7.3314e-06, + "cumtime": 2.0227400000000002e-05, + "percall_tot": 3.6657e-06, + "percall_cum": 1.0113700000000001e-05 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.0474e-05, + "cumtime": 1.0474e-05, + "percall_tot": 5.237e-06, + "percall_cum": 5.237e-06 + }, + { + "function": "logarithmic.py:18(compute_value_from_index)", + "ncalls": 4, + "tottime": 7.9294e-06, + "cumtime": 9.5972e-06, + "percall_tot": 1.98235e-06, + "percall_cum": 2.3993e-06 + }, + { + "function": "logarithmic.py:9(__init__)", + "ncalls": 1, + "tottime": 3.4370000000000003e-06, + "cumtime": 4.9202e-06, + "percall_tot": 3.4370000000000003e-06, + "percall_cum": 4.9202e-06 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 2.422e-06, + "cumtime": 2.422e-06, + "percall_tot": 1.211e-06, + "percall_cum": 1.211e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 1.6678000000000002e-06, + "cumtime": 1.6678000000000002e-06, + "percall_tot": 4.1695000000000004e-07, + "percall_cum": 4.1695000000000004e-07 + } + ] +} \ No newline at end of file diff --git a/benchmarks/7reword_value_comparisons.json b/benchmarks/7reword_value_comparisons.json new file mode 100644 index 0000000..2efe686 --- /dev/null +++ b/benchmarks/7reword_value_comparisons.json @@ -0,0 +1,139 @@ +{ + "timestamp": "2026-01-10T17:13:13.833726", + "name": "7reword_value_comparisons", + "metadata": { + "num_values": 10000000, + "num_trials": 5, + "timestamp": "2026-01-10T17:13:13.833712" + }, + "stats": [ + { + "function": "profile_ddsketch.py:26(run_sketch_operations)", + "ncalls": 1, + "tottime": 1.8573772013999998, + "cumtime": 15.3256251814, + "percall_tot": 1.8573772013999998, + "percall_cum": 15.3256251814 + }, + { + "function": "core.py:77(insert)", + "ncalls": 10000000, + "tottime": 4.248560426, + "cumtime": 13.4664414144, + "percall_tot": 4.2485604260000007e-07, + "percall_cum": 1.34664414144e-06 + }, + { + "function": "contiguous.py:42(add)", + "ncalls": 10000000, + "tottime": 5.1584048128, + "cumtime": 5.1584048128, + "percall_tot": 5.158404812800001e-07, + "percall_cum": 5.158404812800001e-07 + }, + { + "function": "logarithmic.py:14(compute_bucket_index)", + "ncalls": 10000000, + "tottime": 2.3456515052, + "cumtime": 4.0594761756, + "percall_tot": 2.3456515052000006e-07, + "percall_cum": 4.0594761756000005e-07 + }, + { + "function": "~:0()", + "ncalls": 10000001, + "tottime": 0.9696837526, + "cumtime": 0.9696837526, + "percall_tot": 9.696836556316345e-08, + "percall_cum": 9.696836556316345e-08 + }, + { + "function": "~:0()", + "ncalls": 10000000, + "tottime": 0.7441432736, + "cumtime": 0.7441432736, + "percall_tot": 7.441432736e-08, + "percall_cum": 7.441432736e-08 + }, + { + "function": "core.py:129(quantile)", + "ncalls": 4, + "tottime": 0.0007785592000000001, + "cumtime": 0.0017527554, + "percall_tot": 0.00019463980000000003, + "percall_cum": 0.00043818885 + }, + { + "function": "contiguous.py:172(get_count)", + "ncalls": 3338, + "tottime": 0.0009408718, + "cumtime": 0.0009408718, + "percall_tot": 2.7929728771085877e-07, + "percall_cum": 2.7929728771085877e-07 + }, + { + "function": "core.py:24(__init__)", + "ncalls": 1, + "tottime": 1.0402600000000001e-05, + "cumtime": 5.38102e-05, + "percall_tot": 1.0402600000000001e-05, + "percall_cum": 5.38102e-05 + }, + { + "function": "~:0()", + "ncalls": 1, + "tottime": 4.279240000000001e-05, + "cumtime": 4.279240000000001e-05, + "percall_tot": 4.279240000000001e-05, + "percall_cum": 4.279240000000001e-05 + }, + { + "function": "contiguous.py:23(__init__)", + "ncalls": 2, + "tottime": 1.21986e-05, + "cumtime": 3.51634e-05, + "percall_tot": 6.0993e-06, + "percall_cum": 1.75817e-05 + }, + { + "function": "logarithmic.py:18(compute_value_from_index)", + "ncalls": 4, + "tottime": 2.9578800000000002e-05, + "cumtime": 3.332440000000001e-05, + "percall_tot": 7.3947000000000005e-06, + "percall_cum": 8.331100000000002e-06 + }, + { + "function": "~:0()", + "ncalls": 2, + "tottime": 1.8467600000000003e-05, + "cumtime": 1.8467600000000003e-05, + "percall_tot": 9.233800000000001e-06, + "percall_cum": 9.233800000000001e-06 + }, + { + "function": "logarithmic.py:9(__init__)", + "ncalls": 1, + "tottime": 5.8884e-06, + "cumtime": 8.244200000000001e-06, + "percall_tot": 5.8884e-06, + "percall_cum": 8.244200000000001e-06 + }, + { + "function": "base.py:17(__init__)", + "ncalls": 2, + "tottime": 4.4972e-06, + "cumtime": 4.4972e-06, + "percall_tot": 2.2486e-06, + "percall_cum": 2.2486e-06 + }, + { + "function": "~:0()", + "ncalls": 4, + "tottime": 3.7456e-06, + "cumtime": 3.7456e-06, + "percall_tot": 9.364e-07, + "percall_cum": 9.364e-07 + } + ] +} \ No newline at end of file diff --git a/profile_ddsketch.py b/profile_ddsketch.py index e30aa51..68c2e7f 100644 --- a/profile_ddsketch.py +++ b/profile_ddsketch.py @@ -6,26 +6,37 @@ from datetime import datetime from typing import Dict, List, Tuple import numpy as np +import sys +from io import StringIO from QuantileFlow.ddsketch.core import DDSketch +# Optional line profiler import +try: + from line_profiler import LineProfiler + LINE_PROFILER_AVAILABLE = True +except ImportError: + LINE_PROFILER_AVAILABLE = False + BENCHMARK_DIR = Path("benchmarks") BENCHMARK_DIR.mkdir(exist_ok=True) -def run_sketch_operations(num_values: int = 10_000_000) -> Dict: +def run_sketch_operations(sketch: DDSketch, data: np.ndarray) -> Dict: """Runs typical DDSketch operations for profiling. + This function should be called ONLY when profiler is enabled, + so it doesn't include initialization or data generation overhead. + + Args: + sketch: Pre-initialized DDSketch instance + data: Pre-generated data array + Returns: Dict containing operation results and metrics """ - sketch = DDSketch(relative_accuracy=0.01) - - # Generate random data - data = np.random.rand(num_values) * 1000 - - # Track insertion time + # Core operations - ONLY these are profiled for value in data: sketch.insert(value) @@ -40,7 +51,7 @@ def run_sketch_operations(num_values: int = 10_000_000) -> Dict: quantile_results[q] = f"Error: {e}" return { - 'num_values': num_values, + 'num_values': len(data), 'quantiles': quantile_results } @@ -298,10 +309,15 @@ def profile(num_values: int = 10_000_000, if num_trials > 1: print(f"\n▶ Running trial {trial + 1}/{num_trials}...") + # Initialize sketch and data BEFORE profiling (exclude overhead) + sketch = DDSketch(relative_accuracy=0.01) + data = np.random.rand(num_values) * 1000 + + # Profile ONLY the core operations profiler = cProfile.Profile() profiler.enable() - results = run_sketch_operations(num_values) + results = run_sketch_operations(sketch, data) profiler.disable() @@ -390,6 +406,155 @@ def profile(num_values: int = 10_000_000, print("\n✅ Profiling complete.\n") +def line_profile(num_values: int = 1_000_000, + functions_to_profile: List[str] = None, + num_trials: int = 1): + """Perform line-by-line profiling of specified functions. + + Args: + num_values: Number of values to insert per trial + functions_to_profile: List of function paths to profile (e.g., 'insert', 'add', 'compute_bucket_index') + num_trials: Number of trials to run and average + """ + if not LINE_PROFILER_AVAILABLE: + print("\n❌ Error: line_profiler is not installed!") + print(" Install with: pip install line_profiler") + return + + # Import the modules we want to profile + from QuantileFlow.ddsketch.core import DDSketch + from QuantileFlow.ddsketch.storage.contiguous import ContiguousStorage + from QuantileFlow.ddsketch.mapping.logarithmic import LogarithmicMapping + + # Default functions to profile + if functions_to_profile is None: + functions_to_profile = ['insert', 'add', 'compute_bucket_index'] + + # Map function names to actual function objects + function_map = { + 'insert': DDSketch.insert, + 'add': ContiguousStorage.add, + 'remove': ContiguousStorage.remove, + 'get_count': ContiguousStorage.get_count, + 'compute_bucket_index': LogarithmicMapping.compute_bucket_index, + 'compute_value_from_index': LogarithmicMapping.compute_value_from_index, + 'quantile': DDSketch.quantile, + } + + # Validate and get function objects + functions_to_profile_obj = [] + for func_name in functions_to_profile: + if func_name in function_map: + functions_to_profile_obj.append(function_map[func_name]) + else: + print(f"⚠️ Warning: Unknown function '{func_name}', skipping") + + if not functions_to_profile_obj: + print("\n❌ Error: No valid functions to profile!") + return + + print(f"\n{'Starting Line-Level Profile':^60}") + print('=' * 60) + print(f"Values per trial: {num_values:,}") + print(f"Number of trials: {num_trials}") + print("Functions to profile:") + for func in functions_to_profile: + if func in function_map: + print(f" • {func}") + print('=' * 60) + + # Run profiling across multiple trials + all_results = [] + + for trial in range(num_trials): + if num_trials > 1: + print(f"\n▶ Running trial {trial + 1}/{num_trials}...") + + # Initialize sketch and data BEFORE profiling (exclude overhead) + sketch = DDSketch(relative_accuracy=0.01) + data = np.random.rand(num_values) * 1000 + + # Create line profiler + lp = LineProfiler() + + # Add functions to profile + for func in functions_to_profile_obj: + lp.add_function(func) + + # Profile ONLY the core operations + lp.enable() + + # Run insertions (no lambda wrapper overhead) + for value in data: + sketch.insert(value) + + # Also compute some quantiles to profile quantile function + if 'quantile' in functions_to_profile: + for q in [0.5, 0.9, 0.99, 0.999]: + sketch.quantile(q) + + lp.disable() + + # Capture output + string_buffer = StringIO() + lp.print_stats(stream=string_buffer) + all_results.append(string_buffer.getvalue()) + + # Print results from last trial (most representative) + print("\n" + "=" * 120) + print(f"{'Line-by-Line Profiling Results':^120}") + if num_trials > 1: + print(f"{'(Showing results from trial ' + str(num_trials) + ')':^120}") + print("=" * 120) + print(all_results[-1]) + + # Optionally save detailed results + if num_trials > 1: + print(f"\n💡 Tip: Results shown are from the last trial. All {num_trials} trials were run for consistency.") + + # Print optimization suggestions based on results + print("\n" + "=" * 120) + print("🔍 Analysis Tips:") + print(" • Look for lines with high '% Time' - these are the bottlenecks") + print(" • High '# Hits' with moderate time per hit suggests vectorization opportunities") + print(" • Compare 'Time' vs '% Time' to understand relative impact") + print(" • Lines with 0 hits but in hot functions may be branches you can optimize") + print("=" * 120) + + print("\n✅ Line profiling complete.\n") + + +def line_profile_to_file(num_values: int = 1_000_000, + functions_to_profile: List[str] = None, + output_file: str = "line_profile_output.txt"): + """Perform line profiling and save results to file. + + Args: + num_values: Number of values to insert + functions_to_profile: List of function paths to profile + output_file: Output file path + """ + if not LINE_PROFILER_AVAILABLE: + print("\n❌ Error: line_profiler is not installed!") + print(" Install with: pip install line_profiler") + return + + # Capture output + old_stdout = sys.stdout + sys.stdout = StringIO() + + try: + line_profile(num_values, functions_to_profile, num_trials=1) + output = sys.stdout.getvalue() + finally: + sys.stdout = old_stdout + + # Write to file + output_path = Path(output_file) + output_path.write_text(output) + print(f"\n✅ Line profile saved to: {output_path}") + + def main(): parser = argparse.ArgumentParser( description="Profile DDSketch operations with benchmarking capabilities", @@ -413,6 +578,16 @@ def main(): # Quick test with fewer values python profile_ddsketch.py --num-values 1000000 --num-trials 3 + + # LINE PROFILING (requires line_profiler): + # Profile specific functions line-by-line + python profile_ddsketch.py --line-profile --num-values 1000000 + + # Profile specific functions + python profile_ddsketch.py --line-profile --functions insert add compute_bucket_index + + # Profile with multiple trials for stability + python profile_ddsketch.py --line-profile --num-values 500000 --num-trials 3 """ ) @@ -429,10 +604,32 @@ def main(): parser.add_argument('--top-n', type=int, default=20, help='Number of top functions to display (default: 20)') + # Line profiling arguments + parser.add_argument('--line-profile', action='store_true', + help='Enable line-by-line profiling (requires line_profiler)') + parser.add_argument('--functions', nargs='+', + default=['insert', 'add', 'compute_bucket_index'], + help='Functions to line-profile: insert, add, remove, get_count, ' + 'compute_bucket_index, compute_value_from_index, quantile ' + '(default: insert add compute_bucket_index)') + parser.add_argument('--line-output', type=str, + help='Save line profile output to file') + args = parser.parse_args() if args.list: list_benchmarks() + elif args.line_profile: + # Use fewer values by default for line profiling if not specified + num_vals = args.num_values + if args.num_values == 10_000_000: # Default value + num_vals = 1_000_000 + print(f"ℹ️ Using {num_vals:,} values for line profiling (override with --num-values)") + + if args.line_output: + line_profile_to_file(num_vals, args.functions, args.line_output) + else: + line_profile(num_vals, args.functions, args.num_trials) else: profile( num_values=args.num_values, From fe9f8228b4461b1dc9dee1739962d6dfd22d9603 Mon Sep 17 00:00:00 2001 From: Ryan Ji Date: Sat, 10 Jan 2026 18:28:10 -0500 Subject: [PATCH 7/7] Refactor line profiling default value handling and improve DDSketch value insertion logic --- QuantileFlow/ddsketch/core.py | 15 ++++++++------- QuantileFlow/ddsketch/storage/contiguous.py | 3 --- profile_ddsketch.py | 4 ---- 3 files changed, 8 insertions(+), 14 deletions(-) diff --git a/QuantileFlow/ddsketch/core.py b/QuantileFlow/ddsketch/core.py index 1a623a9..d512790 100644 --- a/QuantileFlow/ddsketch/core.py +++ b/QuantileFlow/ddsketch/core.py @@ -84,16 +84,17 @@ def insert(self, value: Union[int, float]) -> None: Raises: ValueError: If value is negative and cont_neg is False. """ - if value == 0: - self.zero_count += 1 - elif value > 0: + if value > 0: bucket_idx = self.mapping.compute_bucket_index(value) self.positive_store.add(bucket_idx) - elif self.cont_neg: - bucket_idx = self.mapping.compute_bucket_index(-value) - self.negative_store.add(bucket_idx) + elif value < 0: + if self.cont_neg: + bucket_idx = self.mapping.compute_bucket_index(-value) + self.negative_store.add(bucket_idx) + else: + raise ValueError("Negative values not supported when cont_neg is False") else: - raise ValueError("Negative values not supported when cont_neg is False") + self.zero_count += 1 self.count += 1 def delete(self, value: Union[int, float]) -> None: diff --git a/QuantileFlow/ddsketch/storage/contiguous.py b/QuantileFlow/ddsketch/storage/contiguous.py index 92ead6f..7ed33dd 100644 --- a/QuantileFlow/ddsketch/storage/contiguous.py +++ b/QuantileFlow/ddsketch/storage/contiguous.py @@ -47,8 +47,6 @@ def add(self, bucket_index: int, count: int = 1): bucket_index: The bucket index to add to. count: The count to add (default 1). """ - if count <= 0: - return if self.min_index is None: # First insertion @@ -180,7 +178,6 @@ def get_count(self, bucket_index: int) -> int: The count at the specified bucket index. """ if self.min_index is None or bucket_index < self.min_index or bucket_index > self.max_index: - warnings.warn("Bucket index is out of range. Returning 0.", UserWarning) return 0 pos = (bucket_index - self.min_index + self.arr_index_of_min_bucket) % self.max_buckets return int(self.counts[pos]) diff --git a/profile_ddsketch.py b/profile_ddsketch.py index 68c2e7f..b696e4c 100644 --- a/profile_ddsketch.py +++ b/profile_ddsketch.py @@ -622,10 +622,6 @@ def main(): elif args.line_profile: # Use fewer values by default for line profiling if not specified num_vals = args.num_values - if args.num_values == 10_000_000: # Default value - num_vals = 1_000_000 - print(f"ℹ️ Using {num_vals:,} values for line profiling (override with --num-values)") - if args.line_output: line_profile_to_file(num_vals, args.functions, args.line_output) else: