Skip to content

📊 Feature: Performance Metrics with Profiling #12

@maycon

Description

@maycon

📊 Feature: Performance Metrics with Profiling

📋 Summary

Implement a comprehensive performance metrics system to identify bottlenecks in TRECO execution. The system should track timing, resource usage, and provide detailed profiling information, all controlled by a feature flag.

🎯 Motivation

As TRECO grows in complexity with multiple states, extractors, and HTTP operations, it's crucial to:

  • Identify performance bottlenecks
  • Optimize slow operations
  • Monitor resource consumption
  • Debug performance issues in production
  • Provide visibility into execution flow

✨ Proposed Solution

Architecture

┌──────────────────────────────────────────┐
│  Performance Metrics System              │
├──────────────────────────────────────────┤
│                                          │
│  ┌─────────────┐      ┌──────────────┐   │
│  │   Timer     │      │   Counter    │   │
│  │   Context   │      │   Collector  │   │
│  └─────────────┘      └──────────────┘   │
│         │                     │          │
│         └──────────┬──────────┘          │
│                    ↓                     │
│         ┌────────────────────┐           │
│         │  Metrics Registry  │           │
│         └────────────────────┘           │
│                    │                     │
│         ┌──────────┴──────────┐          │
│         ↓                     ↓          │
│  ┌─────────────┐      ┌──────────────┐   │
│  │   Logger    │      │   Exporter   │   │
│  │   Backend   │      │   (JSON/CSV) │   │
│  └─────────────┘      └──────────────┘   │
│                                          │
└──────────────────────────────────────────┘

Feature Flag

# Enable via environment variable
TRECO_ENABLE_METRICS=true

# Or via CLI flag
treco run --enable-metrics attack.yaml

# Or programmatically
executor = StateExecutor(enable_metrics=True)

📐 Design

1. Core Metrics Module

# treco/metrics/__init__.py

from .timer import MetricsTimer, timed
from .counter import MetricsCounter, count_calls
from .registry import MetricsRegistry
from .reporter import MetricsReporter

__all__ = [
    'MetricsTimer',
    'MetricsCounter', 
    'MetricsRegistry',
    'MetricsReporter',
    'timed',
    'count_calls',
]

2. Timer Context Manager & Decorator

# treco/metrics/timer.py

from contextlib import contextmanager
from functools import wraps
import time
from typing import Optional, Callable

class MetricsTimer:
    """Track execution time of operations."""
    
    def __init__(self, enabled: bool = True):
        self.enabled = enabled
        self.timings = {}
    
    @contextmanager
    def measure(self, name: str, metadata: dict = None):
        """Context manager for timing operations."""
        if not self.enabled:
            yield
            return
        
        start = time.perf_counter()
        error = None
        
        try:
            yield
        except Exception as e:
            error = str(e)
            raise
        finally:
            duration = time.perf_counter() - start
            self._record(name, duration, metadata, error)
    
    def _record(self, name: str, duration: float, metadata: dict, error: str):
        """Record timing data."""
        if name not in self.timings:
            self.timings[name] = []
        
        self.timings[name].append({
            'duration_ms': duration * 1000,
            'timestamp': time.time(),
            'metadata': metadata or {},
            'error': error,
        })


def timed(name: str = None, metadata: Callable = None):
    """Decorator to automatically time function execution."""
    def decorator(func):
        operation_name = name or f"{func.__module__}.{func.__qualname__}"
        
        @wraps(func)
        def wrapper(*args, **kwargs):
            from .registry import MetricsRegistry
            timer = MetricsRegistry.get_timer()
            
            if not timer.enabled:
                return func(*args, **kwargs)
            
            # Extract metadata if callable provided
            meta = metadata(*args, **kwargs) if metadata else None
            
            with timer.measure(operation_name, meta):
                return func(*args, **kwargs)
        
        return wrapper
    return decorator

3. Counter for Call Tracking

# treco/metrics/counter.py

from functools import wraps
from typing import Callable

class MetricsCounter:
    """Track call counts and frequencies."""
    
    def __init__(self, enabled: bool = True):
        self.enabled = enabled
        self.counts = {}
    
    def increment(self, name: str, value: int = 1, labels: dict = None):
        """Increment a counter."""
        if not self.enabled:
            return
        
        key = (name, frozenset(labels.items()) if labels else frozenset())
        self.counts[key] = self.counts.get(key, 0) + value


def count_calls(name: str = None, labels: Callable = None):
    """Decorator to count function calls."""
    def decorator(func):
        counter_name = name or f"{func.__module__}.{func.__qualname__}"
        
        @wraps(func)
        def wrapper(*args, **kwargs):
            from .registry import MetricsRegistry
            counter = MetricsRegistry.get_counter()
            
            if counter.enabled:
                label_dict = labels(*args, **kwargs) if labels else None
                counter.increment(counter_name, labels=label_dict)
            
            return func(*args, **kwargs)
        
        return wrapper
    return decorator

4. Metrics Registry (Singleton)

# treco/metrics/registry.py

from typing import Optional

class MetricsRegistry:
    """Central registry for metrics components."""
    
    _timer: Optional['MetricsTimer'] = None
    _counter: Optional['MetricsCounter'] = None
    _enabled: bool = False
    
    @classmethod
    def initialize(cls, enabled: bool = True):
        """Initialize metrics system."""
        from .timer import MetricsTimer
        from .counter import MetricsCounter
        
        cls._enabled = enabled
        cls._timer = MetricsTimer(enabled=enabled)
        cls._counter = MetricsCounter(enabled=enabled)
    
    @classmethod
    def get_timer(cls) -> 'MetricsTimer':
        """Get timer instance."""
        if cls._timer is None:
            cls.initialize(enabled=False)
        return cls._timer
    
    @classmethod
    def get_counter(cls) -> 'MetricsCounter':
        """Get counter instance."""
        if cls._counter is None:
            cls.initialize(enabled=False)
        return cls._counter
    
    @classmethod
    def is_enabled(cls) -> bool:
        """Check if metrics are enabled."""
        return cls._enabled

5. Reporter for Output

# treco/metrics/reporter.py

import json
from typing import Dict, Any, Optional
from pathlib import Path

class MetricsReporter:
    """Generate metrics reports."""
    
    @staticmethod
    def generate_report(timer: 'MetricsTimer', counter: 'MetricsCounter') -> Dict[str, Any]:
        """Generate comprehensive metrics report."""
        return {
            'timings': MetricsReporter._analyze_timings(timer.timings),
            'counters': dict(counter.counts),
            'summary': MetricsReporter._generate_summary(timer, counter),
        }
    
    @staticmethod
    def _analyze_timings(timings: Dict) -> Dict[str, Any]:
        """Analyze timing data."""
        analysis = {}
        
        for name, measurements in timings.items():
            if not measurements:
                continue
            
            durations = [m['duration_ms'] for m in measurements if m['error'] is None]
            errors = sum(1 for m in measurements if m['error'] is not None)
            
            if durations:
                analysis[name] = {
                    'count': len(measurements),
                    'errors': errors,
                    'total_ms': sum(durations),
                    'mean_ms': sum(durations) / len(durations),
                    'min_ms': min(durations),
                    'max_ms': max(durations),
                    'p50_ms': sorted(durations)[len(durations) // 2],
                    'p95_ms': sorted(durations)[int(len(durations) * 0.95)],
                    'p99_ms': sorted(durations)[int(len(durations) * 0.99)],
                }
        
        return analysis
    
    @staticmethod
    def _generate_summary(timer: 'MetricsTimer', counter: 'MetricsCounter') -> Dict[str, Any]:
        """Generate summary statistics."""
        total_operations = sum(len(v) for v in timer.timings.values())
        total_time = sum(
            sum(m['duration_ms'] for m in measurements)
            for measurements in timer.timings.values()
        )
        
        return {
            'total_operations': total_operations,
            'total_time_ms': total_time,
            'total_counters': sum(counter.counts.values()),
        }
    
    @staticmethod
    def save_report(report: Dict[str, Any], filepath: Path):
        """Save report to file."""
        with open(filepath, 'w') as f:
            json.dump(report, f, indent=2)
    
    @staticmethod
    def print_report(report: Dict[str, Any]):
        """Print formatted report to console."""
        print("\n" + "="*80)
        print("TRECO PERFORMANCE METRICS REPORT")
        print("="*80 + "\n")
        
        # Summary
        summary = report['summary']
        print(f"Total Operations: {summary['total_operations']}")
        print(f"Total Time: {summary['total_time_ms']:.2f}ms")
        print(f"Total Counters: {summary['total_counters']}\n")
        
        # Top slowest operations
        print("Top 10 Slowest Operations:")
        print("-" * 80)
        
        timings = report['timings']
        sorted_ops = sorted(
            timings.items(),
            key=lambda x: x[1]['mean_ms'],
            reverse=True
        )[:10]
        
        for name, stats in sorted_ops:
            print(f"{name:60} {stats['mean_ms']:>8.2f}ms (±{stats['max_ms']-stats['min_ms']:.2f}ms)")
        
        print()

🎯 Instrumentation Points

Areas to Instrument

  1. HTTP Operations (High Priority)

    @timed("http.request", metadata=lambda req: {'method': req.method, 'url': req.url})
    def request(self, request_config):
        ...
  2. State Execution (High Priority)

    @timed("state.execute", metadata=lambda state: {'state_name': state.name})
    def _execute_single(self, state):
        ...
  3. Extractors (High Priority)

    @timed("extractor.extract", metadata=lambda self, *args: {'type': self.__class__.__name__})
    def extract(self, response, context):
        ...
  4. YAML Parsing (Medium Priority)

    @timed("parser.parse_yaml")
    def parse_attack_file(filepath):
        ...
  5. Validation (Low Priority)

    @timed("validator.validate_state")
    def validate_state(state):
        ...

📊 Metrics to Track

Timing Metrics

Metric Description Priority
http.request HTTP request latency High
http.dns_lookup DNS resolution time Medium
http.connect Connection establishment Medium
http.tls_handshake TLS handshake time Medium
state.execute Full state execution time High
state.transition State transition logic Low
extractor.extract Extractor execution time High
extractor.create Extractor instantiation Low
parser.parse_yaml YAML parsing time Medium
validator.validate Validation time Low

Counter Metrics

Metric Description Priority
http.requests.total Total HTTP requests High
http.requests.by_status Requests by status code High
http.errors.total Total HTTP errors High
extractor.calls.total Total extractor calls Medium
extractor.errors.total Total extraction errors Medium
states.executed.total Total states executed Medium

📝 Usage Examples

Example 1: Enable via Environment Variable

export TRECO_ENABLE_METRICS=true
treco run attack.yaml

Output at end:

================================================================================
TRECO PERFORMANCE METRICS REPORT
================================================================================

Total Operations: 1,234
Total Time: 5,432.10ms
Total Counters: 567

Top 10 Slowest Operations:
--------------------------------------------------------------------------------
http.request                                                      123.45ms (±45.20ms)
extractor.extract                                                  12.34ms (±5.10ms)
state.execute                                                     145.67ms (±67.89ms)
...

Example 2: Enable via CLI Flag

treco run --enable-metrics --metrics-output=metrics.json attack.yaml

Example 3: Programmatic Usage

from treco.metrics import MetricsRegistry, MetricsReporter
from treco.state.executor import StateExecutor

# Enable metrics
MetricsRegistry.initialize(enabled=True)

# Execute attack
executor = StateExecutor()
executor.execute(attack_config)

# Get report
timer = MetricsRegistry.get_timer()
counter = MetricsRegistry.get_counter()
report = MetricsReporter.generate_report(timer, counter)

# Print to console
MetricsReporter.print_report(report)

# Save to file
MetricsReporter.save_report(report, Path('metrics.json'))

Example 4: Custom Metrics

from treco.metrics import MetricsRegistry

timer = MetricsRegistry.get_timer()

# Manual timing
with timer.measure('custom.operation', metadata={'user': 'admin'}):
    # Your code here
    expensive_operation()

📖 Documentation

User Documentation

  • Add section to README
  • Create docs/metrics.md guide
  • Add examples to examples/
  • Update CLI help text

Developer Documentation

  • Document instrumentation API
  • Create instrumentation guide for new features
  • Document metrics data format

🎯 Success Criteria

  • Metrics system adds < 5% overhead when enabled
  • Zero overhead when disabled
  • All critical paths instrumented (HTTP, extractors, states)
  • Clear, actionable reports generated
  • Feature flag works correctly
  • Comprehensive test coverage (>90%)
  • Documentation complete

Note: This is a comprehensive feature that will provide deep visibility into TRECO's performance characteristics and help identify optimization opportunities.

Metadata

Metadata

Assignees

No one assigned

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions