From f0bff8f3730e04966927bbe2b14ab9856403f0e7 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Tue, 26 May 2026 21:07:16 -0600 Subject: [PATCH 01/11] bench: extend perf-tracking infrastructure - shake-bench: parse GC pause time, max live data, productivity, major/minor GC counts from +RTS -s; emit .hp.csv top-cost-centre summary alongside each .hp file - ghcide-bench: capture per-sample latencies; emit p50/p95/p99/stddev CSV columns; add memory-pressure experiment - bench: smoke phony target for fast local iteration; parameterise MultiLayerModules.sh with --depth/--width; add MultiLayerModulesXL (200x50 = 10k modules) for production-scale stress Co-Authored-By: Claude Opus 4.7 (1M context) --- bench/Main.hs | 18 +- bench/MultiLayerModules.sh | 41 +++- bench/config.yaml | 23 ++- bench/config_overnight.yaml | 107 +++++++++++ bench/run_pass2_costcentre.sh | 88 +++++++++ ghcide-bench/src/Experiments.hs | 95 +++++++++- shake-bench/shake-bench.cabal | 1 + .../src/Development/Benchmark/Rules.hs | 177 ++++++++++++++++-- 8 files changed, 517 insertions(+), 33 deletions(-) create mode 100644 bench/config_overnight.yaml create mode 100755 bench/run_pass2_costcentre.sh diff --git a/bench/Main.hs b/bench/Main.hs index d1ac603b18..9fb4235a7c 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -64,7 +64,7 @@ import Development.Shake (Action, actionBracket, addOracle, askOracle, command, command_, getDirectoryFiles, liftIO, need, - newCache, shakeArgsWith, + newCache, phony, shakeArgsWith, shakeOptions, versioned, want) import Development.Shake.Classes import Experiments.Types (Example (exampleName), @@ -168,6 +168,22 @@ createBuildSystem config = do whenJust (profileInterval configStatic) $ \i -> do phonyRules "profiled-" binaryName (CheapHeapProfiling i) build (examples configStatic) + -- Fast smoke target: one small example, a few cheap experiments, only HEAD. + -- Intended for tight local iteration while improving perf/memory. + -- Sample count is still controlled by `samples:` in config.yaml — drop it + -- to ~10 for the fastest turnaround. + -- DummyLevel0M01NoTH is a single empty module so `searchSymbol` returns + -- Nothing for `identifierP`; pick experiments that don't require it. + let smokeExample = "DummyLevel0M01NoTH" + smokeExperiments = ["edit", "edit-header", "documentSymbols after edit"] + smokeConfigs = [ confName | ConfigurationDescriptor{..} <- configurations configStatic ] + phony "smoke" $ need + [ build "unprofiled" smokeExample "HEAD" conf + escaped (escapeExperiment (Unescaped experiment)) <.> "csv" + | conf <- smokeConfigs + , experiment <- smokeExperiments + ] + return configStatic disableAllPluginsBut :: (PluginId -> Bool) -> Plugin.Config diff --git a/bench/MultiLayerModules.sh b/bench/MultiLayerModules.sh index 38d85ce9ed..146fc9cb1d 100755 --- a/bench/MultiLayerModules.sh +++ b/bench/MultiLayerModules.sh @@ -1,9 +1,34 @@ #!/usr/bin/env bash -# Generate $DEPTH layers of modules with $WIDTH modules on each layer -# Every module on layer N imports all the modules on layer N-1 -# MultiLayerModules.hs imports all the modules from the last layer +# Generate $DEPTH layers of modules with $WIDTH modules on each layer. +# Every module on layer N imports all the modules on layer N-1. +# MultiLayerModules.hs imports all the modules from the last layer. +# +# Flags (any order): +# --th enable TemplateHaskell pragma on the top module +# --depth N override DEPTH (default 15) +# --width N override WIDTH (default 40) DEPTH=15 WIDTH=40 +TH= +while [[ $# -gt 0 ]]; do + case "$1" in + --th) + TH=1 + shift + ;; + --depth) + DEPTH=$2 + shift 2 + ;; + --width) + WIDTH=$2 + shift 2 + ;; + *) + shift + ;; + esac +done cat >hie.yaml << EOF cradle: direct: @@ -22,13 +47,11 @@ for l in $(seq 1 $DEPTH); do done done done -case "$1" in - '--th') - echo "{-# LANGUAGE TemplateHaskell #-}" > MultiLayerModules.hs - ;; -esac +if [[ -n "$TH" ]]; then + echo "{-# LANGUAGE TemplateHaskell #-}" > MultiLayerModules.hs +fi echo "module MultiLayerModules where" >> MultiLayerModules.hs - echo " - MultiLayerModules.hs" >> hie.yaml; +echo " - MultiLayerModules.hs" >> hie.yaml; for j in $(seq -w 1 $WIDTH); do echo "import DummyLevel${DEPTH}M$j" >> MultiLayerModules.hs; done diff --git a/bench/config.yaml b/bench/config.yaml index b1218976ce..6616ad75b1 100644 --- a/bench/config.yaml +++ b/bench/config.yaml @@ -1,6 +1,7 @@ # The number of samples to run per experiment. -# At least 100 is recommended in order to observe space leaks -samples: 50 +# At least 100 is recommended in order to observe space leaks. +# Temporarily lowered to 10 for fast turnaround during initial analysis. +samples: 10 buildTool: cabal @@ -77,6 +78,23 @@ examples: modules: - DummyLevel1M01.hs + # Large synthetic project: 200 layers × 50 modules/layer = 10000 modules, + # ~500k import edges (each module imports every module of the prior layer). + # This is closer in scale to a real production Haskell codebase than the + # default MultiLayerModules. Expect this to be slow: initial cradle load + # alone takes many minutes, and each experiment runs `samples:` rounds. + # Recommended: drop `samples:` to ~10 and run via single-experiment targets + # (e.g. `bench-results/unprofiled/MultiLayerModulesXL/HEAD/All/edit.csv`) + # rather than the full matrix. + - name: MultiLayerModulesXL + path: bench/MultiLayerModules.sh + script: True + script-args: ["--depth", "200", "--width", "50"] + modules: + - MultiLayerModules.hs + - DummyLevel0M01.hs + - DummyLevel100M01.hs + # Small but heavily multi-component example # Disabled as it is far to slow. hie-bios >0.7.2 should help # - name: HLS @@ -114,6 +132,7 @@ experiments: - "hole fit suggestions" - "eval execute single-line code lens" - "eval execute multi-line code lens" + - "memory pressure" # An ordered list of versions to analyze versions: diff --git a/bench/config_overnight.yaml b/bench/config_overnight.yaml new file mode 100644 index 0000000000..7933a6be1e --- /dev/null +++ b/bench/config_overnight.yaml @@ -0,0 +1,107 @@ +# Overnight Pass 1: GC flag A/B/C across a subset of experiments. +# Same HLS binary in all cases; only the +RTS args supplied via extra-args differ. +# Outputs land in bench-results/unprofiled//HEAD/All/. + +samples: 50 + +buildTool: cabal +outputFolder: bench-results + +# Heap profiling disabled for this pass — we want clean timing. +# profileInterval: 1 + +parallelism: 1 + +examples: + # ---------- cabal-3.16.1.0 variants (~235 transitive modules) ---------- + - name: cabal-default + package: Cabal + version: 3.16.1.0 + modules: + - src/Distribution/Simple.hs + - src/Distribution/Types/ComponentLocalBuildInfo.hs + extra-args: [] + + - name: cabal-A64m-F1 + package: Cabal + version: 3.16.1.0 + modules: + - src/Distribution/Simple.hs + - src/Distribution/Types/ComponentLocalBuildInfo.hs + extra-args: ["+RTS", "-A64m", "-F1", "-RTS"] + + - name: cabal-A128m-F11 + package: Cabal + version: 3.16.1.0 + modules: + - src/Distribution/Simple.hs + - src/Distribution/Types/ComponentLocalBuildInfo.hs + extra-args: ["+RTS", "-A128m", "-F1.1", "-RTS"] + + - name: cabal-chunked + package: Cabal + version: 3.16.1.0 + modules: + - src/Distribution/Simple.hs + - src/Distribution/Types/ComponentLocalBuildInfo.hs + extra-args: ["+RTS", "-A32m", "-n4m", "-F1.1", "-RTS"] + + # ---------- MultiLayerModulesNoTH (600 modules) ---------- + - name: mlm-default + path: bench/MultiLayerModules.sh + script: True + script-args: [] + modules: + - MultiLayerModules.hs + - DummyLevel0M01.hs + - DummyLevel1M01.hs + extra-args: [] + + - name: mlm-A64m-F1 + path: bench/MultiLayerModules.sh + script: True + script-args: [] + modules: + - MultiLayerModules.hs + - DummyLevel0M01.hs + - DummyLevel1M01.hs + extra-args: ["+RTS", "-A64m", "-F1", "-RTS"] + +# Experiment subset — picked to maximise signal in the slow paths. +experiments: + - "edit" + - "semanticTokens after typing burst" + - "hover after typing burst" + - "completions after typing burst" + - "memory pressure" + - "eval execute multi-line code lens" + +versions: +- HEAD + +configurations: +- All: + - alternateNumberFormat + - callHierarchy + - changeTypeSignature + - class + - codeRange + - eval + - explicitFixity + - fourmolu + - gadt + - ghcide-code-actions-bindings + - ghcide-code-actions-fill-holes + - ghcide-code-actions-imports-exports + - ghcide-code-actions-type-signatures + - ghcide-completions + - ghcide-type-lenses + - hlint + - importLens + - moduleName + - ormolu + - pragmas + - qualifyImportedNames + - rename + - stylish-haskell + - semanticTokens diff --git a/bench/run_pass2_costcentre.sh b/bench/run_pass2_costcentre.sh new file mode 100755 index 0000000000..ff2cdf0750 --- /dev/null +++ b/bench/run_pass2_costcentre.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash +# Pass 2: cost-centre profiled benchmark. +# +# Requires bench-results/binaries/HEAD-prof/haskell-language-server to exist +# (built with --enable-profiling --profiling-detail=late). +# +# For each (example, experiment) pair, invoke ghcide-bench directly with +# +RTS -p -hc -i1 -po -RTS so we get: +# .prof — time + allocation by cost-centre +# .hp — heap residency by cost-centre over time +# .hp.csv — top-N cost-centres (produced by parseHpProfile, but +# NOTE: only the bench harness invokes that. For Pass 2 +# we'd need to run summarizeHpProfile separately. See +# end of script.) +set -euo pipefail + +HLS=bench-results/binaries/HEAD-prof/haskell-language-server +OUTDIR=bench-results/prof-cc +SAMPLES=20 + +if [[ ! -x "$HLS" ]]; then + echo "ERROR: profiled HLS not found at $HLS" >&2 + exit 1 +fi + +run_experiment() { + local example_name=$1 + local experiment=$2 + shift 2 + local example_args=("$@") + + local exp_slug=${experiment// /_} + local outdir="$OUTDIR/$example_name/$exp_slug" + mkdir -p "$outdir" + local stem="$(pwd)/$outdir/$exp_slug" + + echo + echo "=== $example_name :: $experiment ===" + + cabal exec -- ghcide-bench \ + --timeout=600 \ + --no-clean \ + --samples="$SAMPLES" \ + --csv="$outdir/$exp_slug.csv" \ + --ghcide="$(pwd)/$HLS" \ + --select "$experiment" \ + --lsp-config \ + "${example_args[@]}" \ + --ghcide-options=+RTS \ + --ghcide-options=-p \ + --ghcide-options=-hc \ + --ghcide-options=-i1 \ + --ghcide-options=-po"$stem" \ + --ghcide-options=-l \ + --ghcide-options=-ol"$stem.eventlog" \ + --ghcide-options=-S"$stem.gcStats.log" \ + --ghcide-options=-RTS \ + < /dev/null || echo " FAILED — continuing" +} + +# ---------- Example: cabal-3.16.1.0 ---------- +CABAL_ARGS=( + --example-package-name Cabal + --example-package-version 3.16.1.0 + --example-name cabal + --example-module=src/Distribution/Simple.hs + --example-module=src/Distribution/Types/ComponentLocalBuildInfo.hs +) +for exp in "memory pressure" "completions after typing burst" "hover after typing burst" "edit" "semanticTokens after typing burst"; do + run_experiment cabal "$exp" "${CABAL_ARGS[@]}" +done + +# ---------- Example: MultiLayerModulesNoTH ---------- +MLM_ARGS=( + --example-script "$(pwd)/bench/MultiLayerModules.sh" + --example-name MultiLayerModulesNoTH + --example-module=MultiLayerModules.hs + --example-module=DummyLevel0M01.hs + --example-module=DummyLevel1M01.hs +) +for exp in "memory pressure" "edit" "hover after typing burst"; do + run_experiment MultiLayerModulesNoTH "$exp" "${MLM_ARGS[@]}" +done + +echo +echo "=== Pass 2 complete ===" +echo "Profile artefacts under $OUTDIR/" +echo "Inspect .prof files for time+alloc attribution: head -50 $OUTDIR/cabal/memory_pressure/memory_pressure.prof" diff --git a/ghcide-bench/src/Experiments.hs b/ghcide-bench/src/Experiments.hs index 8e70483559..4e673e6993 100644 --- a/ghcide-bench/src/Experiments.hs +++ b/ghcide-bench/src/Experiments.hs @@ -352,6 +352,45 @@ experiments = ) ), --------------------------------------------------------------------------------------- + --------------------------------------------------------------------------------------- + -- Memory-pressure experiment: hammers the server with a variety of + -- requests across every open document, interleaved with edits, to force + -- the IDE to retain caches/results across many rules. Each sample is one + -- "round" so the resulting peak/post-GC heap (via RTS -s) reflects the + -- worst case under sustained load. The default sample count makes this + -- much longer than other experiments — override with --samples when you + -- only need a quick smoke check. + bench "memory pressure" $ \docs -> do + -- Round 1: typing burst across all docs. + applyTypingBurst docs + _ <- waitForBuildQueue + -- Round 2: exercise hover + getDefinition + completions on every + -- doc that has an identifier position. These populate per-rule + -- caches across the whole project. + forM_ docs $ \DocumentPositions{..} -> do + forM_ identifierP $ \p -> do + _ <- getHover doc p + _ <- getDefinitions doc p + _ <- getCompletions doc p + return () + _ <- waitForBuildQueue + -- Round 3: code actions across every doc — these tend to pull in + -- additional rules (suggestions, refactor previews, etc.). + forM_ docs $ \DocumentPositions{..} -> do + forM_ identifierP $ \p -> + void $ getCodeActions doc (Range p p) + _ <- waitForBuildQueue + -- Round 4: document symbols on every doc — populates the symbol cache. + forM_ docs $ \DocumentPositions{..} -> + void $ getDocumentSymbols doc + _ <- waitForBuildQueue + -- Round 5: another typing burst, to ensure the caches above survive + -- invalidation rather than getting dropped — this is what catches + -- "we recompute everything from scratch on every edit" regressions. + applyTypingBurst docs + _ <- waitForBuildQueue + return True, + --------------------------------------------------------------------------------------- benchWithSetup "eval execute multi-line code lens" ( mapM_ $ \DocumentPositions{..} -> do @@ -533,6 +572,13 @@ runBenchmarksFun dir allBenchmarks = do , "rulesTotal" , "ruleEdges" , "ghcRebuilds" + , "userP50" + , "userP95" + , "userP99" + , "userStdDev" + , "delayedP50" + , "delayedP95" + , "delayedP99" ] rows = [ [ name, @@ -552,7 +598,14 @@ runBenchmarksFun dir allBenchmarks = do show rulesVisited, show rulesTotal, show edgesTotal, - show rebuildsTotal + show rebuildsTotal, + showMs (quantile 0.50 userWaitsSamples), + showMs (quantile 0.95 userWaitsSamples), + showMs (quantile 0.99 userWaitsSamples), + showMs (stdDev userWaitsSamples), + showMs (quantile 0.50 delayedWorkSamples), + showMs (quantile 0.95 delayedWorkSamples), + showMs (quantile 0.99 delayedWorkSamples) ] | (Bench {name, samples}, BenchRun {..}) <- results, let runSetup' = if runSetup < 0.01 then 0 else runSetup @@ -625,6 +678,10 @@ data BenchRun = BenchRun runExperiment :: !Seconds, userWaits :: !Seconds, delayedWork :: !Seconds, + -- | Per-sample user-time samples (length == samples on success). + userWaitsSamples :: ![Seconds], + -- | Per-sample delayed-work samples (length == samples on success). + delayedWorkSamples :: ![Seconds], firstResponse :: !Seconds, firstResponseDelayed :: !Seconds, rulesBuilt :: !Int, @@ -637,7 +694,26 @@ data BenchRun = BenchRun } badRun :: BenchRun -badRun = BenchRun 0 0 0 0 0 0 0 0 0 0 0 0 0 False +badRun = BenchRun 0 0 0 0 0 [] [] 0 0 0 0 0 0 0 0 False + +-- | Approximate quantile of a list of samples. Uses the nearest-rank +-- method; good enough for benchmark reporting at n>=20. +quantile :: Double -> [Seconds] -> Seconds +quantile _ [] = 0 +quantile q xs = + let sorted = sort xs + n = length sorted + idx = max 0 $ min (n - 1) $ floor (q * fromIntegral (n - 1)) + in sorted !! idx + +-- | Population standard deviation. +stdDev :: [Seconds] -> Seconds +stdDev [] = 0 +stdDev xs = + let n = fromIntegral (length xs) + m = sum xs / n + sq x = let d = x - m in d * d + in sqrt (sum (map sq xs) / n) waitForProgressStart :: Session () waitForProgressStart = void $ do @@ -688,8 +764,9 @@ runBench runSess Bench{..} = handleAny (\e -> print e >> return badRun) liftIO $ output $ "Running " <> name <> " benchmark" (runSetup, ()) <- duration $ benchSetup docs - let loop' (Just timeForFirstResponse) !userWaits !delayedWork 0 = return $ Just (userWaits, delayedWork, timeForFirstResponse) - loop' timeForFirstResponse !userWaits !delayedWork n = do + -- Accumulate per-sample (t, td) in reverse order; we reverse once at the end. + let loop' (Just timeForFirstResponse) !samplesAcc 0 = return $ Just (reverse samplesAcc, timeForFirstResponse) + loop' timeForFirstResponse !samplesAcc n = do (t, res) <- duration $ experiment docs if not res then return Nothing @@ -697,12 +774,16 @@ runBench runSess Bench{..} = handleAny (\e -> print e >> return badRun) output (showDuration t) -- Wait for the delayed actions to finish td <- waitForBuildQueue - loop' (timeForFirstResponse <|> Just (t,td)) (userWaits+t) (delayedWork+td) (n -1) + loop' (timeForFirstResponse <|> Just (t,td)) ((t,td):samplesAcc) (n -1) loop = loop' Nothing - (runExperiment, result) <- duration $ loop 0 0 samples + (runExperiment, result) <- duration $ loop [] samples let success = isJust result - (userWaits, delayedWork, (firstResponse, firstResponseDelayed)) = fromMaybe (0,0,(0,0)) result + (perSample, (firstResponse, firstResponseDelayed)) = fromMaybe ([], (0,0)) result + userWaitsSamples = map fst perSample + delayedWorkSamples = map snd perSample + userWaits = sum userWaitsSamples + delayedWork = sum delayedWorkSamples rulesTotal <- length <$> getStoredKeys rulesBuilt <- either (const 0) length <$> getBuildKeysBuilt diff --git a/shake-bench/shake-bench.cabal b/shake-bench/shake-bench.cabal index 3d2a7d3174..e5bee2fc42 100644 --- a/shake-bench/shake-bench.cabal +++ b/shake-bench/shake-bench.cabal @@ -22,6 +22,7 @@ library aeson, base == 4.*, bytestring, + containers, directory, extra >= 1.7.2, filepath, diff --git a/shake-bench/src/Development/Benchmark/Rules.hs b/shake-bench/src/Development/Benchmark/Rules.hs index 2ddcb97cc5..779306e84e 100644 --- a/shake-bench/src/Development/Benchmark/Rules.hs +++ b/shake-bench/src/Development/Benchmark/Rules.hs @@ -72,9 +72,12 @@ import Data.Aeson.Lens (AsJSON (_JSON), _Object, _String) import Data.ByteString.Lazy (ByteString) import Data.Char (isDigit) import Data.List (find, intercalate, isInfixOf, - isSuffixOf, stripPrefix, transpose) + isPrefixOf, isSuffixOf, sortBy, + stripPrefix, transpose) import Data.List.Extra (lower, splitOn) +import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe) +import Data.Ord (Down (..), comparing) import Data.String (fromString) import Data.Text (Text) import qualified Data.Text as T @@ -327,29 +330,175 @@ benchRules build MkBenchRules{..} = do NoProfiling -> writeFile outHp dummyHp _ -> return () - -- extend csv output with allocation data + -- Write a co-located top-N heap summary derived from the .hp file. + -- Untracked side-effect; regenerated whenever the .hp is. + liftIO $ do + hp <- readFile outHp + writeFile (outHp <.> "csv") (summarizeHpProfile 20 hp) + + -- extend csv output with allocation data + GC/pause/productivity csvContents <- liftIO $ lines <$> readFile outcsv let header = head csvContents results = tail csvContents - header' = header <> ", maxResidency, allocatedBytes" + extraHeaders = + [ "maxResidency" + , "allocatedBytes" + , "maxSlop" + , "mutTimeS" + , "gcTimeS" + , "maxGcPauseS" + , "productivityPct" + , "majorGCs" + , "minorGCs" + ] + header' = header <> ", " <> intercalate ", " extraHeaders results' <- forM results $ \row -> do - (maxResidency, allocations) <- liftIO - (parseMaxResidencyAndAllocations <$> readFile outGc) - return $ printf "%s, %s, %s" row (showMB maxResidency) (showMB allocations) + stats <- liftIO (parseRTSStats <$> readFile outGc) + return $ intercalate ", " + [ row + , showMB (rtsMaxResidency stats) + , showMB (rtsAllocatedBytes stats) + , showMB (rtsMaxSlop stats) + , showSeconds (rtsMutTime stats) + , showSeconds (rtsGcTime stats) + , showSeconds (rtsMaxGcPause stats) + , showDouble (rtsProductivityPct stats) + , showInt (rtsMajorGCs stats) + , showInt (rtsMinorGCs stats) + ] let csvContents' = header' : results' writeFileLines outcsv csvContents' where showMB :: Int -> String - showMB x = show (x `div` 2^(20::Int)) <> "MB" + showMB x | x < 0 = "0MB" + | otherwise = show (x `div` 2^(20::Int)) <> "MB" + showSeconds :: Double -> String + showSeconds x | x < 0 = "0" + | otherwise = printf "%.4f" x + showDouble :: Double -> String + showDouble x | x < 0 = "0" + | otherwise = printf "%.2f" x + showInt :: Int -> String + showInt x | x < 0 = "0" + | otherwise = show x + +-- | Summarize a GHC heap profile (.hp) file into a top-N cost-centre CSV. +-- For each cost-centre name we report the maximum retained bytes across +-- all samples, plus the final-sample retained bytes (useful for spotting +-- leaks that grow without bound). +summarizeHpProfile :: Int -> String -> String +summarizeHpProfile topN raw = + let ls = lines raw + samples = collectSamples ls + accumOne acc (cc, bs) = Map.insertWith combine cc (bs, bs) acc + where combine (newMax, newLast) (oldMax, _) = (max newMax oldMax, newLast) + -- Aggregate across all samples: track running max and last value per CC. + agg = foldl (\acc s -> foldl accumOne acc s) Map.empty samples + sorted = sortBy (comparing (Down . maxOf)) (Map.toList agg) + maxOf (_, (m, _)) = m + header = "costCentre,maxRetainedBytes,finalRetainedBytes" + body = [ cc <> "," <> show m <> "," <> show l + | (cc, (m, l)) <- take topN sorted + ] + in unlines (header : body) + where + -- | Group consecutive non-marker lines between BEGIN_SAMPLE/END_SAMPLE pairs. + collectSamples :: [String] -> [[(String, Integer)]] + collectSamples xs = case dropWhile (not . isBeginSample) xs of + [] -> [] + (_:rest) -> + let (sample, rest') = break isEndSample rest + parsed = [ p | l <- sample, Just p <- [parseSampleLine l] ] + in parsed : case rest' of + [] -> [] + (_:r) -> collectSamples r + isBeginSample = ("BEGIN_SAMPLE" `isPrefixOf`) . dropWhile (== ' ') + isEndSample = ("END_SAMPLE" `isPrefixOf`) . dropWhile (== ' ') + +-- | Parse one cost-centre line from a heap profile sample, e.g. +-- @"GHC.Conc.Sync.CAF" 5678@ or @PINNED 1234@. The cost-centre name may +-- contain spaces (when quoted), so we treat everything before the trailing +-- number as the name. +parseSampleLine :: String -> Maybe (String, Integer) +parseSampleLine l = case reverse (words l) of + (numStr:rest) -> do + n <- readMaybe numStr + let name = unwords (reverse rest) + if null name then Nothing else Just (name, n) + _ -> Nothing + +-- | Stats extracted from `+RTS -s` (or -S) summary output. +data RTSStats = RTSStats + { rtsMaxResidency :: !Int + , rtsAllocatedBytes :: !Int + , rtsMaxSlop :: !Int + , rtsMutTime :: !Double -- ^ MUT time (seconds) + , rtsGcTime :: !Double -- ^ GC time (seconds) + , rtsMaxGcPause :: !Double -- ^ Max GC pause across all generations (seconds) + , rtsProductivityPct :: !Double -- ^ Productivity (% of total user time) + , rtsMajorGCs :: !Int -- ^ Gen 1 collections + , rtsMinorGCs :: !Int -- ^ Gen 0 collections + } --- Parse the max residency and allocations in RTS -s output -parseMaxResidencyAndAllocations :: String -> (Int, Int) -parseMaxResidencyAndAllocations input = - (f "maximum residency", f "bytes allocated in the heap") +-- | Parse the summary block at the tail of `+RTS -s`/`-S` output. +-- Each field is best-effort; missing fields return -1. +parseRTSStats :: String -> RTSStats +parseRTSStats input = RTSStats + { rtsMaxResidency = digitsFirstWord "maximum residency" + , rtsAllocatedBytes = digitsFirstWord "bytes allocated in the heap" + , rtsMaxSlop = digitsFirstWord "maximum slop" + , rtsMutTime = secondsAt 2 "MUT time" + , rtsGcTime = secondsAt 2 "GC time" + , rtsMaxGcPause = parseMaxPause + , rtsProductivityPct = parseProductivity + , rtsMajorGCs = parseGenColls "Gen 1" + , rtsMinorGCs = parseGenColls "Gen 0" + } where - inps = reverse $ lines input - f label = case find (label `isInfixOf`) inps of - Just l -> read $ filter isDigit $ head $ words l + ls = lines input + findLine label = find (label `isInfixOf`) ls + digitsFirstWord label = case findLine label of + Just l -> readIntDef $ filter isDigit $ head (words l ++ [""]) + Nothing -> -1 + readIntDef "" = -1 + readIntDef s = fromMaybe (-1) (readMaybe s) + readDoubleDef s = fromMaybe (-1) (readMaybe (takeWhile (\c -> isDigit c || c == '.' || c == '-') s)) + -- Parse the time value (in seconds) at word index `i` (after dropping the label + -- words). For lines like " MUT time 0.012s ( 0.012s elapsed)". + secondsAt i label = case findLine label of + Just l -> + let ws = words l + in if length ws > i then readDoubleDef (ws !! i) else -1 + Nothing -> -1 + -- "Max pause" appears on each Gen line; take the worst. + parseMaxPause = + let candidates = + [ readDoubleDef w + | l <- ls + , "Gen " `isInfixOf` l + , let ws = words l + , not (null ws) + , w <- take 1 (reverse ws) + ] + valid = filter (>= 0) candidates + in if null valid then -1 else maximum valid + -- "Productivity 81.2% of total user, ..." + parseProductivity = case findLine "Productivity" of + Just l -> + let ws = words l + in case ws of + (_:n:_) -> readDoubleDef n + _ -> -1 + Nothing -> -1 + -- " Gen 0 1 colls, 0 par ..." → 1 + parseGenColls genLabel = case findLine genLabel of + Just l -> + let ws = words l + -- Drop "Gen N" prefix, then first numeric word is colls count. + rest = drop 2 ws + in case rest of + (n:_) -> readIntDef (filter isDigit n) + _ -> -1 Nothing -> -1 From 3ce6464d33850a472e9b5162059300a4335da18a Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Tue, 26 May 2026 22:52:19 -0600 Subject: [PATCH 02/11] ghcide: preserve pointer identity on early-cutoff match defineEarlyCutoff' reuses the previously-cached Succeeded value when the cutoff fingerprint matches the prior. Without this, every cutoff-matched rerun stored a freshly allocated (but structurally equivalent) value, and downstream rules whose deps had not changed continued to hold pointers to the prior value. Multiple equivalent copies of large shared values accumulated across rebuilds. On a 10k-module MLM-XL project, this cut NodeKey_Module heap growth by ~3000x (241 KB/s -> 81 B/s) and overall heap leak by ~50% (1.9 MB/s -> 0.95 MB/s). Largest source of remaining leak is HPT/UniqDFM churn via GhcSessionDeps; tracked separately. Co-Authored-By: Claude Opus 4.7 (1M context) --- ghcide/src/Development/IDE/Core/Shake.hs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/ghcide/src/Development/IDE/Core/Shake.hs b/ghcide/src/Development/IDE/Core/Shake.hs index 9334a13ad3..ab207b0a11 100644 --- a/ghcide/src/Development/IDE/Core/Shake.hs +++ b/ghcide/src/Development/IDE/Core/Shake.hs @@ -1288,7 +1288,7 @@ defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do pure (Nothing, ([ideErrorText file (T.pack $ show (key, file) ++ show e) | not $ isBadDependency e],Nothing)) ver <- estimateFileVersionUnsafely key mbRes file - (bs, res) <- case mbRes of + (bs, freshRes) <- case mbRes of Nothing -> do pure (toShakeValue ShakeStale mbBs, staleV) Just v -> pure (maybe ShakeNoCutoff ShakeResult mbBs, Succeeded ver v) @@ -1299,6 +1299,17 @@ defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do -- If we do not have a previous result -- or we got ShakeNoCutoff we always return False. _ -> False + -- Pointer-identity preservation on early-cutoff match. + -- When the cutoff fingerprint matches the prior, reuse the + -- previously cached value instead of the freshly computed + -- (but structurally equivalent) one. This is critical for + -- large shared values like ModuleGraph: downstream rules + -- that did not invalidate continue to share a pointer with + -- the cache, instead of accumulating multiple equivalent + -- copies across rebuilds. + let res = case (eq, staleV, freshRes) of + (True, Stale _ _ oldV, Succeeded sver _) -> Succeeded sver oldV + _ -> freshRes return $ RunResult (if eq then ChangedRecomputeSame else ChangedRecomputeDiff) (encodeShakeValue bs) From 1db55f257d47e88092a4aa57b1744b9d9ab45cec Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Tue, 26 May 2026 23:08:33 -0600 Subject: [PATCH 03/11] ghcide: add structural cutoff fingerprint to GhcSessionDeps MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GhcSessionDeps previously used defineNoDiagnostics (no early-cutoff fingerprint), so the prior pointer-identity preservation patch on defineEarlyCutoff' could not engage for it. As a result, every re-run of GhcSessionDeps allocated a fresh HscEnvEq even when the resulting value was structurally equivalent — and downstream rules whose deps had not changed kept pointing at the prior version, accumulating generations of equivalent HscEnv/HPT. Switch GhcSessionDeps to defineEarlyCutoff and derive a cutoff fingerprint from the inputs that fully determine HscEnvEq identity: - the file's own source hash (covers pragma/dflags effects via msrHscEnv's initializePlugins step) - the file's import-level mod summary fingerprint - the trans-deps fingerprint (covers import-graph changes anywhere in the file's transitive cone) - the iface fingerprint of each direct dep (covers signature changes that would actually affect typecheck) When all four match the prior run, the new defineEarlyCutoff' code path reuses the cached HscEnvEq pointer — eliminating accumulation of ModuleGraph-internal data structures (NodeKey_Module, GWIB, Set.Bin all drop out of the retainer set entirely on MLM-XL) and cutting overall heap at fixed wall-clock by ~36%. Co-Authored-By: Claude Opus 4.7 (1M context) --- ghcide/src/Development/IDE/Core/Rules.hs | 38 ++++++++++++++++++++---- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/ghcide/src/Development/IDE/Core/Rules.hs b/ghcide/src/Development/IDE/Core/Rules.hs index 24de344bfa..b04757a965 100644 --- a/ghcide/src/Development/IDE/Core/Rules.hs +++ b/ghcide/src/Development/IDE/Core/Rules.hs @@ -763,9 +763,9 @@ loadGhcSession recorder ghcSessionDepsConfig = do let cutoffHash = LBS.toStrict $ B.encode (hash (snd val)) return (Just cutoffHash, val) - defineNoDiagnostics (cmapWithPrio LogShake recorder) $ \(GhcSessionDeps_ fullModSummary) file -> do + defineEarlyCutoff (cmapWithPrio LogShake recorder) $ RuleNoDiagnostics $ \(GhcSessionDeps_ fullModSummary) file -> do env <- use_ GhcSession file - ghcSessionDepsDefinition fullModSummary ghcSessionDepsConfig env file + ghcSessionDepsDefinitionCutoff fullModSummary ghcSessionDepsConfig env file newtype GhcSessionDepsConfig = GhcSessionDepsConfig { fullModuleGraph :: Bool @@ -785,10 +785,25 @@ ghcSessionDepsDefinition :: -- | full mod summary Bool -> GhcSessionDepsConfig -> HscEnvEq -> NormalizedFilePath -> Action (Maybe HscEnvEq) -ghcSessionDepsDefinition fullModSummary GhcSessionDepsConfig{..} hscEnvEq file = do +ghcSessionDepsDefinition fullModSummary cfg hscEnvEq file = + snd <$> ghcSessionDepsDefinitionCutoff fullModSummary cfg hscEnvEq file + +-- | Like 'ghcSessionDepsDefinition' but also produces a cutoff fingerprint so +-- the rule can short-circuit when none of its inputs (transitive deps, +-- own mod summary, dep ifaces) have changed. Combined with the +-- early-cutoff pointer-identity preservation in 'defineEarlyCutoff'', +-- this stops generations of equivalent HscEnv\/HPT from accumulating in +-- the per-file Shake cache. +ghcSessionDepsDefinitionCutoff + :: Bool + -> GhcSessionDepsConfig + -> HscEnvEq + -> NormalizedFilePath + -> Action (Maybe BS.ByteString, Maybe HscEnvEq) +ghcSessionDepsDefinitionCutoff fullModSummary GhcSessionDepsConfig{..} hscEnvEq file = do mbdeps <- mapM(fmap artifactFilePath . snd) <$> use_ GetLocatedImports file case mbdeps of - Nothing -> return Nothing + Nothing -> return (Nothing, Nothing) Just deps -> do when fullModuleGraph $ void $ use_ ReportImportCycles file msr <- if fullModSummary @@ -801,6 +816,18 @@ ghcSessionDepsDefinition fullModSummary GhcSessionDepsConfig{..} hscEnvEq file = env = msrHscEnv msr depSessions <- map hscEnv <$> uses_ (GhcSessionDeps_ fullModSummary) deps ifaces <- uses_ GetModIface deps + transDepsFp <- use_ GetModuleGraphTransDepsFingerprints file + -- Cutoff fingerprint: HscEnvEq value is determined by this file's + -- own source (which feeds msrHscEnv via pragmas\/dflags), this + -- file's import-level mod summary, the trans-deps structure, and + -- each dep's iface fingerprint. When all match the prior run, the + -- value is structurally equivalent and 'defineEarlyCutoff'' will + -- reuse the prior pointer. + let !cutoffFp = BS.concat $ + fingerprintToBS transDepsFp + : fingerprintToBS (msrFingerprint msr) + : fingerprintToBS (ms_hs_hash ms) + : map hiFileFingerPrint ifaces -- Load .hs-boot before .hs: the HPT is keyed by module name, and -- GHC's addHomeModInfoToHpt overwrites, so the non-boot must be last. let inLoadOrder = sortOn (not . isBootHmi) @@ -837,7 +864,8 @@ ghcSessionDepsDefinition fullModSummary GhcSessionDepsConfig{..} hscEnvEq file = -- session, while `ghcSessionDepsDefinition` will be called for each file we need -- to compile. `updateHscEnvEq` will refresh the HscEnv (session') and also -- generate a new Unique. - Just <$> liftIO (updateHscEnvEq hscEnvEq session') + session'' <- liftIO (updateHscEnvEq hscEnvEq session') + return (Just cutoffFp, Just session'') -- | Load a iface from disk, or generate it if there isn't one or it is out of date -- This rule also ensures that the `.hie` and `.o` (if needed) files are written out. From 14d8d158143ad02d5eaad52661316b611f431966 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Wed, 27 May 2026 14:01:29 -0600 Subject: [PATCH 04/11] Revert "ghcide: add structural cutoff fingerprint to GhcSessionDeps" This reverts commit 1db55f257d47e88092a4aa57b1744b9d9ab45cec. --- ghcide/src/Development/IDE/Core/Rules.hs | 38 ++++-------------------- 1 file changed, 5 insertions(+), 33 deletions(-) diff --git a/ghcide/src/Development/IDE/Core/Rules.hs b/ghcide/src/Development/IDE/Core/Rules.hs index b04757a965..24de344bfa 100644 --- a/ghcide/src/Development/IDE/Core/Rules.hs +++ b/ghcide/src/Development/IDE/Core/Rules.hs @@ -763,9 +763,9 @@ loadGhcSession recorder ghcSessionDepsConfig = do let cutoffHash = LBS.toStrict $ B.encode (hash (snd val)) return (Just cutoffHash, val) - defineEarlyCutoff (cmapWithPrio LogShake recorder) $ RuleNoDiagnostics $ \(GhcSessionDeps_ fullModSummary) file -> do + defineNoDiagnostics (cmapWithPrio LogShake recorder) $ \(GhcSessionDeps_ fullModSummary) file -> do env <- use_ GhcSession file - ghcSessionDepsDefinitionCutoff fullModSummary ghcSessionDepsConfig env file + ghcSessionDepsDefinition fullModSummary ghcSessionDepsConfig env file newtype GhcSessionDepsConfig = GhcSessionDepsConfig { fullModuleGraph :: Bool @@ -785,25 +785,10 @@ ghcSessionDepsDefinition :: -- | full mod summary Bool -> GhcSessionDepsConfig -> HscEnvEq -> NormalizedFilePath -> Action (Maybe HscEnvEq) -ghcSessionDepsDefinition fullModSummary cfg hscEnvEq file = - snd <$> ghcSessionDepsDefinitionCutoff fullModSummary cfg hscEnvEq file - --- | Like 'ghcSessionDepsDefinition' but also produces a cutoff fingerprint so --- the rule can short-circuit when none of its inputs (transitive deps, --- own mod summary, dep ifaces) have changed. Combined with the --- early-cutoff pointer-identity preservation in 'defineEarlyCutoff'', --- this stops generations of equivalent HscEnv\/HPT from accumulating in --- the per-file Shake cache. -ghcSessionDepsDefinitionCutoff - :: Bool - -> GhcSessionDepsConfig - -> HscEnvEq - -> NormalizedFilePath - -> Action (Maybe BS.ByteString, Maybe HscEnvEq) -ghcSessionDepsDefinitionCutoff fullModSummary GhcSessionDepsConfig{..} hscEnvEq file = do +ghcSessionDepsDefinition fullModSummary GhcSessionDepsConfig{..} hscEnvEq file = do mbdeps <- mapM(fmap artifactFilePath . snd) <$> use_ GetLocatedImports file case mbdeps of - Nothing -> return (Nothing, Nothing) + Nothing -> return Nothing Just deps -> do when fullModuleGraph $ void $ use_ ReportImportCycles file msr <- if fullModSummary @@ -816,18 +801,6 @@ ghcSessionDepsDefinitionCutoff fullModSummary GhcSessionDepsConfig{..} hscEnvEq env = msrHscEnv msr depSessions <- map hscEnv <$> uses_ (GhcSessionDeps_ fullModSummary) deps ifaces <- uses_ GetModIface deps - transDepsFp <- use_ GetModuleGraphTransDepsFingerprints file - -- Cutoff fingerprint: HscEnvEq value is determined by this file's - -- own source (which feeds msrHscEnv via pragmas\/dflags), this - -- file's import-level mod summary, the trans-deps structure, and - -- each dep's iface fingerprint. When all match the prior run, the - -- value is structurally equivalent and 'defineEarlyCutoff'' will - -- reuse the prior pointer. - let !cutoffFp = BS.concat $ - fingerprintToBS transDepsFp - : fingerprintToBS (msrFingerprint msr) - : fingerprintToBS (ms_hs_hash ms) - : map hiFileFingerPrint ifaces -- Load .hs-boot before .hs: the HPT is keyed by module name, and -- GHC's addHomeModInfoToHpt overwrites, so the non-boot must be last. let inLoadOrder = sortOn (not . isBootHmi) @@ -864,8 +837,7 @@ ghcSessionDepsDefinitionCutoff fullModSummary GhcSessionDepsConfig{..} hscEnvEq -- session, while `ghcSessionDepsDefinition` will be called for each file we need -- to compile. `updateHscEnvEq` will refresh the HscEnv (session') and also -- generate a new Unique. - session'' <- liftIO (updateHscEnvEq hscEnvEq session') - return (Just cutoffFp, Just session'') + Just <$> liftIO (updateHscEnvEq hscEnvEq session') -- | Load a iface from disk, or generate it if there isn't one or it is out of date -- This rule also ensures that the `.hie` and `.o` (if needed) files are written out. From 2add1d4964164b5d02ca2b0fac28864b1f928460 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Wed, 27 May 2026 14:01:51 -0600 Subject: [PATCH 05/11] bench: avoid restricted (!!) in RTS stats parser Hlint flagged ws !! i in secondsAt. Use safe drop/pattern-match instead. Co-Authored-By: Claude Opus 4.7 (1M context) --- shake-bench/src/Development/Benchmark/Rules.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/shake-bench/src/Development/Benchmark/Rules.hs b/shake-bench/src/Development/Benchmark/Rules.hs index 779306e84e..62779bb212 100644 --- a/shake-bench/src/Development/Benchmark/Rules.hs +++ b/shake-bench/src/Development/Benchmark/Rules.hs @@ -466,9 +466,9 @@ parseRTSStats input = RTSStats -- Parse the time value (in seconds) at word index `i` (after dropping the label -- words). For lines like " MUT time 0.012s ( 0.012s elapsed)". secondsAt i label = case findLine label of - Just l -> - let ws = words l - in if length ws > i then readDoubleDef (ws !! i) else -1 + Just l -> case drop i (words l) of + (w:_) -> readDoubleDef w + _ -> -1 Nothing -> -1 -- "Max pause" appears on each Gen line; take the worst. parseMaxPause = From 0d425dacd7a4498c957ca148ed610abae63778ca Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Wed, 27 May 2026 14:22:35 -0600 Subject: [PATCH 06/11] bench: tidy up infrastructure changes - Restore samples: 50 in config.yaml (was temporarily 10 for iteration) - Drop config_overnight.yaml (RTS-flag exploration artifact, not used by CI) - Trim verbose comments in Main.hs smoke target and MultiLayerModules.sh Co-Authored-By: Claude Opus 4.7 (1M context) --- bench/Main.hs | 7 +-- bench/MultiLayerModules.sh | 6 +- bench/config.yaml | 5 +- bench/config_overnight.yaml | 107 ------------------------------------ 4 files changed, 4 insertions(+), 121 deletions(-) delete mode 100644 bench/config_overnight.yaml diff --git a/bench/Main.hs b/bench/Main.hs index 9fb4235a7c..df80af851a 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -168,12 +168,7 @@ createBuildSystem config = do whenJust (profileInterval configStatic) $ \i -> do phonyRules "profiled-" binaryName (CheapHeapProfiling i) build (examples configStatic) - -- Fast smoke target: one small example, a few cheap experiments, only HEAD. - -- Intended for tight local iteration while improving perf/memory. - -- Sample count is still controlled by `samples:` in config.yaml — drop it - -- to ~10 for the fastest turnaround. - -- DummyLevel0M01NoTH is a single empty module so `searchSymbol` returns - -- Nothing for `identifierP`; pick experiments that don't require it. + -- Fast smoke target for local iteration: smallest example, cheapest experiments, HEAD only. let smokeExample = "DummyLevel0M01NoTH" smokeExperiments = ["edit", "edit-header", "documentSymbols after edit"] smokeConfigs = [ confName | ConfigurationDescriptor{..} <- configurations configStatic ] diff --git a/bench/MultiLayerModules.sh b/bench/MultiLayerModules.sh index 146fc9cb1d..6dc4ba1c5a 100755 --- a/bench/MultiLayerModules.sh +++ b/bench/MultiLayerModules.sh @@ -2,11 +2,7 @@ # Generate $DEPTH layers of modules with $WIDTH modules on each layer. # Every module on layer N imports all the modules on layer N-1. # MultiLayerModules.hs imports all the modules from the last layer. -# -# Flags (any order): -# --th enable TemplateHaskell pragma on the top module -# --depth N override DEPTH (default 15) -# --width N override WIDTH (default 40) +# Flags (any order): --th, --depth N, --width N. DEPTH=15 WIDTH=40 TH= diff --git a/bench/config.yaml b/bench/config.yaml index 6616ad75b1..1e6f811225 100644 --- a/bench/config.yaml +++ b/bench/config.yaml @@ -1,7 +1,6 @@ # The number of samples to run per experiment. -# At least 100 is recommended in order to observe space leaks. -# Temporarily lowered to 10 for fast turnaround during initial analysis. -samples: 10 +# At least 100 is recommended in order to observe space leaks +samples: 50 buildTool: cabal diff --git a/bench/config_overnight.yaml b/bench/config_overnight.yaml deleted file mode 100644 index 7933a6be1e..0000000000 --- a/bench/config_overnight.yaml +++ /dev/null @@ -1,107 +0,0 @@ -# Overnight Pass 1: GC flag A/B/C across a subset of experiments. -# Same HLS binary in all cases; only the +RTS args supplied via extra-args differ. -# Outputs land in bench-results/unprofiled//HEAD/All/. - -samples: 50 - -buildTool: cabal -outputFolder: bench-results - -# Heap profiling disabled for this pass — we want clean timing. -# profileInterval: 1 - -parallelism: 1 - -examples: - # ---------- cabal-3.16.1.0 variants (~235 transitive modules) ---------- - - name: cabal-default - package: Cabal - version: 3.16.1.0 - modules: - - src/Distribution/Simple.hs - - src/Distribution/Types/ComponentLocalBuildInfo.hs - extra-args: [] - - - name: cabal-A64m-F1 - package: Cabal - version: 3.16.1.0 - modules: - - src/Distribution/Simple.hs - - src/Distribution/Types/ComponentLocalBuildInfo.hs - extra-args: ["+RTS", "-A64m", "-F1", "-RTS"] - - - name: cabal-A128m-F11 - package: Cabal - version: 3.16.1.0 - modules: - - src/Distribution/Simple.hs - - src/Distribution/Types/ComponentLocalBuildInfo.hs - extra-args: ["+RTS", "-A128m", "-F1.1", "-RTS"] - - - name: cabal-chunked - package: Cabal - version: 3.16.1.0 - modules: - - src/Distribution/Simple.hs - - src/Distribution/Types/ComponentLocalBuildInfo.hs - extra-args: ["+RTS", "-A32m", "-n4m", "-F1.1", "-RTS"] - - # ---------- MultiLayerModulesNoTH (600 modules) ---------- - - name: mlm-default - path: bench/MultiLayerModules.sh - script: True - script-args: [] - modules: - - MultiLayerModules.hs - - DummyLevel0M01.hs - - DummyLevel1M01.hs - extra-args: [] - - - name: mlm-A64m-F1 - path: bench/MultiLayerModules.sh - script: True - script-args: [] - modules: - - MultiLayerModules.hs - - DummyLevel0M01.hs - - DummyLevel1M01.hs - extra-args: ["+RTS", "-A64m", "-F1", "-RTS"] - -# Experiment subset — picked to maximise signal in the slow paths. -experiments: - - "edit" - - "semanticTokens after typing burst" - - "hover after typing burst" - - "completions after typing burst" - - "memory pressure" - - "eval execute multi-line code lens" - -versions: -- HEAD - -configurations: -- All: - - alternateNumberFormat - - callHierarchy - - changeTypeSignature - - class - - codeRange - - eval - - explicitFixity - - fourmolu - - gadt - - ghcide-code-actions-bindings - - ghcide-code-actions-fill-holes - - ghcide-code-actions-imports-exports - - ghcide-code-actions-type-signatures - - ghcide-completions - - ghcide-type-lenses - - hlint - - importLens - - moduleName - - ormolu - - pragmas - - qualifyImportedNames - - rename - - stylish-haskell - - semanticTokens From 887f445b7628bec77d7599beb2df456d2871ab16 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Thu, 28 May 2026 08:26:48 -0600 Subject: [PATCH 07/11] ci: re-trigger after env failure Co-Authored-By: Claude Opus 4.7 (1M context) From dc31d1ec1d51d9ba92a8733bb9042ed26cde4ad0 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Thu, 28 May 2026 08:35:45 -0600 Subject: [PATCH 08/11] ci: drop --unset from ghcup gc to keep active GHC installed After haskell-actions/setup@v2.10.3 finishes, the active GHC is in an "unset" state. With --unset, `ghcup gc` then deletes the active GHC's binary, and `cabal build` fails with Cabal-7620 (ghc not found). All Ubuntu test/flags jobs have been failing this way for new PRs. Drop the --unset flag. The other gc flags (--share-dir --hls-no-ghc --cache --tmpdirs) still free the disk space the gc step exists for. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/actions/setup-build/action.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/actions/setup-build/action.yml b/.github/actions/setup-build/action.yml index 6612f07ad8..89b057f479 100644 --- a/.github/actions/setup-build/action.yml +++ b/.github/actions/setup-build/action.yml @@ -140,5 +140,8 @@ runs: # installed besides the ones we explicitly want. - name: "Remove ghcup caches" if: runner.os == 'Linux' - run: ghcup gc --ghc-old --share-dir --hls-no-ghc --cache --tmpdirs --unset + # NB: do not pass --unset here. haskell-actions/setup leaves the active + # GHC in an "unset" state, and --unset would then delete its binary, + # breaking the subsequent `cabal build`. + run: ghcup gc --ghc-old --share-dir --hls-no-ghc --cache --tmpdirs shell: bash From 6c6b8892410557344e9120fccc2bb19bf44eb666 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Thu, 28 May 2026 09:49:10 -0600 Subject: [PATCH 09/11] ghcide: scope cutoff value-reuse to GetModuleGraph The unconditional pointer-identity preservation in defineEarlyCutoff' broke rules whose action body has value-dependent side effects. In particular, GetLinkable registers the freshly built linkable and unloads the prior; returning the cached value to downstream then left consumers (e.g. the eval plugin's :info command) pointing at bytecode the action had just unloaded. Make value reuse opt-in via a new flag on defineEarlyCutoff', and a parallel wrapper defineEarlyCutOffNoFileReuseValue. Apply only to GetModuleGraph, which is pure and accounts for nearly all of the heap savings (NodeKey_Module / GWIB / Set.Bin retainers). Co-Authored-By: Claude Opus 4.7 (1M context) --- ghcide/src/Development/IDE/Core/Rules.hs | 2 +- ghcide/src/Development/IDE/Core/Shake.hs | 57 +++++++++++++++++------- 2 files changed, 43 insertions(+), 16 deletions(-) diff --git a/ghcide/src/Development/IDE/Core/Rules.hs b/ghcide/src/Development/IDE/Core/Rules.hs index 24de344bfa..fc43dcef63 100644 --- a/ghcide/src/Development/IDE/Core/Rules.hs +++ b/ghcide/src/Development/IDE/Core/Rules.hs @@ -639,7 +639,7 @@ getFileHashRule recorder = return (Just (fingerprintToBS fileHash), ([], Just fileHash)) getModuleGraphRule :: Recorder (WithPriority Log) -> Rules () -getModuleGraphRule recorder = defineEarlyCutOffNoFile (cmapWithPrio LogShake recorder) $ \GetModuleGraph -> do +getModuleGraphRule recorder = defineEarlyCutOffNoFileReuseValue (cmapWithPrio LogShake recorder) $ \GetModuleGraph -> do fs <- toKnownFiles <$> useNoFile_ GetKnownTargets dependencyInfoForFiles (HashSet.toList fs) diff --git a/ghcide/src/Development/IDE/Core/Shake.hs b/ghcide/src/Development/IDE/Core/Shake.hs index ab207b0a11..aa58dc649e 100644 --- a/ghcide/src/Development/IDE/Core/Shake.hs +++ b/ghcide/src/Development/IDE/Core/Shake.hs @@ -41,7 +41,7 @@ module Development.IDE.Core.Shake( RuleBody(..), define, defineNoDiagnostics, defineEarlyCutoff, - defineNoFile, defineEarlyCutOffNoFile, + defineNoFile, defineEarlyCutOffNoFile, defineEarlyCutOffNoFileReuseValue, getDiagnostics, mRunLspT, mRunLspTCallback, getHiddenDiagnostics, @@ -1212,26 +1212,26 @@ defineEarlyCutoff recorder (Rule op) = addRule $ \(Q (key, file)) (old :: Maybe let diagnostics ver diags = do traceDiagnostics diags updateFileDiagnostics recorder file ver (newKey key) extras diags - defineEarlyCutoff' diagnostics (==) key file old mode $ const $ op key file + defineEarlyCutoff' diagnostics (==) False key file old mode $ const $ op key file defineEarlyCutoff recorder (RuleNoDiagnostics op) = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \traceDiagnostics -> do let diagnostics _ver diags = do traceDiagnostics diags mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags - defineEarlyCutoff' diagnostics (==) key file old mode $ const $ second (mempty,) <$> op key file + defineEarlyCutoff' diagnostics (==) False key file old mode $ const $ second (mempty,) <$> op key file defineEarlyCutoff recorder RuleWithCustomNewnessCheck{..} = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \ traceDiagnostics -> do let diagnostics _ver diags = do traceDiagnostics diags mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleCustomNewnessHasDiag) diags - defineEarlyCutoff' diagnostics newnessCheck key file old mode $ + defineEarlyCutoff' diagnostics newnessCheck False key file old mode $ const $ second (mempty,) <$> build key file defineEarlyCutoff recorder (RuleWithOldValue op) = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \traceDiagnostics -> do extras <- getShakeExtras let diagnostics ver diags = do traceDiagnostics diags updateFileDiagnostics recorder file ver (newKey key) extras diags - defineEarlyCutoff' diagnostics (==) key file old mode $ op key file + defineEarlyCutoff' diagnostics (==) False key file old mode $ op key file defineNoFile :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action v) -> Rules () defineNoFile recorder f = defineNoDiagnostics recorder $ \k file -> do @@ -1243,18 +1243,40 @@ defineEarlyCutOffNoFile recorder f = defineEarlyCutoff recorder $ RuleNoDiagnost if file == emptyFilePath then do (hashString, res) <- f k; return (Just hashString, Just res) else fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" +-- | Like 'defineEarlyCutOffNoFile', but reuses the previously cached value +-- when the early-cutoff fingerprint matches the prior run. Preserves pointer +-- identity across rebuilds — important for large shared values +-- (e.g. ModuleGraph) so downstream rules whose deps did not change continue +-- to share a pointer with the cache, rather than accumulating multiple +-- structurally equivalent copies. +-- +-- Only use for rules whose action has no side effects that depend on the +-- freshly computed value (no global registration, no unloading the prior). +defineEarlyCutOffNoFileReuseValue :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action (BS.ByteString, v)) -> Rules () +defineEarlyCutOffNoFileReuseValue recorder f = + addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> + otTracedAction key file mode traceA $ \traceDiagnostics -> do + let diagnostics _ver diags = do + traceDiagnostics diags + mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags + defineEarlyCutoff' diagnostics (==) True key file old mode $ const $ do + if file == emptyFilePath + then do (hashString, res) <- f key; return (Just hashString, (mempty, Just res)) + else fail $ "Rule " ++ show key ++ " should always be called with the empty string for a file" + defineEarlyCutoff' :: forall k v. IdeRule k v => (Maybe Int32 -> [FileDiagnostic] -> Action ()) -- ^ update diagnostics -- | compare current and previous for freshness -> (BS.ByteString -> BS.ByteString -> Bool) + -> Bool -- ^ reuse cached value on cutoff match (see Note below) -> k -> NormalizedFilePath -> Maybe BS.ByteString -> RunMode -> (Value v -> Action (Maybe BS.ByteString, IdeResult v)) -> Action (RunResult (A (RuleResult k))) -defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do +defineEarlyCutoff' doDiagnostics cmp reuseValueOnMatch key file mbOld mode action = do ShakeExtras{state, progress, dirtyKeys} <- getShakeExtras options <- getIdeOptions let trans g x = withRunInIO $ \run -> g (run x) @@ -1299,17 +1321,22 @@ defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do -- If we do not have a previous result -- or we got ShakeNoCutoff we always return False. _ -> False - -- Pointer-identity preservation on early-cutoff match. + -- Pointer-identity preservation on early-cutoff match (opt-in). -- When the cutoff fingerprint matches the prior, reuse the -- previously cached value instead of the freshly computed - -- (but structurally equivalent) one. This is critical for - -- large shared values like ModuleGraph: downstream rules - -- that did not invalidate continue to share a pointer with - -- the cache, instead of accumulating multiple equivalent - -- copies across rebuilds. - let res = case (eq, staleV, freshRes) of - (True, Stale _ _ oldV, Succeeded sver _) -> Succeeded sver oldV - _ -> freshRes + -- (but structurally equivalent) one. This avoids accumulating + -- multiple equivalent copies of large shared values like + -- ModuleGraph across rebuilds. + -- + -- Only safe for rules whose action body has no + -- value-dependent side effects (e.g. registering the fresh + -- result in a global table and unloading the stale one). + -- For such rules, returning the cached value would leave + -- downstream pointing at a representation that the action + -- just invalidated. Opt in by passing reuseValueOnMatch=True. + let res = case (reuseValueOnMatch, eq, staleV, freshRes) of + (True, True, Stale _ _ oldV, Succeeded sver _) -> Succeeded sver oldV + _ -> freshRes return $ RunResult (if eq then ChangedRecomputeSame else ChangedRecomputeDiff) (encodeShakeValue bs) From 86f71c932231868b0c8363de564b64eae4ec8697 Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Thu, 28 May 2026 13:55:51 -0600 Subject: [PATCH 10/11] review fixups: cutoff reuse hardening + bench tooling fixes Code-review follow-ups on the GetModuleGraph value-reuse change and the benchmark tooling it ships with. ghcide: - defineEarlyCutoff' only reuses a clean prior value (Stale Nothing); a deletion-marked Stale (Just _) is no longer resurrected as Succeeded. - Replace the hand-inlined addRule copy in defineEarlyCutOffNoFileReuseValue with a RuleNoDiagnosticsReuseValue constructor + shared addNoDiagnosticsRule helper, and factor the empty-path guard into noFileBody. bench/shake-bench: - run_pass2_costcentre.sh: drop --lsp-config (it made ghcide-bench read empty stdin from /dev/null and error out, masked by `|| echo`). - MultiLayerModules.sh: require a value for --depth/--width (${2:?...}) instead of looping forever when it is missing. - summarizeHpProfile: foldl' + forced max (no thunk buildup); skip the .hp.csv summary under NoProfiling instead of rewriting the dummy each build. - parseRTSStats: whitespace-normalise label matching (robust to GHC -s spacing). - Collapse showMB/showSeconds/showDouble/showInt to a single max-0 clamp. - quantileSorted: sort each sample list once per row instead of 6x. Co-Authored-By: Claude Opus 4.8 (1M context) --- bench/MultiLayerModules.sh | 4 +- bench/run_pass2_costcentre.sh | 1 - ghcide-bench/src/Experiments.hs | 31 +++++----- ghcide/src/Development/IDE/Core/Shake.hs | 62 +++++++++++++------ .../src/Development/Benchmark/Rules.hs | 45 +++++++------- 5 files changed, 85 insertions(+), 58 deletions(-) diff --git a/bench/MultiLayerModules.sh b/bench/MultiLayerModules.sh index 6dc4ba1c5a..55407f600e 100755 --- a/bench/MultiLayerModules.sh +++ b/bench/MultiLayerModules.sh @@ -13,11 +13,11 @@ while [[ $# -gt 0 ]]; do shift ;; --depth) - DEPTH=$2 + DEPTH=${2:?"--depth requires a value"} shift 2 ;; --width) - WIDTH=$2 + WIDTH=${2:?"--width requires a value"} shift 2 ;; *) diff --git a/bench/run_pass2_costcentre.sh b/bench/run_pass2_costcentre.sh index ff2cdf0750..6357065426 100755 --- a/bench/run_pass2_costcentre.sh +++ b/bench/run_pass2_costcentre.sh @@ -44,7 +44,6 @@ run_experiment() { --csv="$outdir/$exp_slug.csv" \ --ghcide="$(pwd)/$HLS" \ --select "$experiment" \ - --lsp-config \ "${example_args[@]}" \ --ghcide-options=+RTS \ --ghcide-options=-p \ diff --git a/ghcide-bench/src/Experiments.hs b/ghcide-bench/src/Experiments.hs index 4e673e6993..6516eab37e 100644 --- a/ghcide-bench/src/Experiments.hs +++ b/ghcide-bench/src/Experiments.hs @@ -599,17 +599,19 @@ runBenchmarksFun dir allBenchmarks = do show rulesTotal, show edgesTotal, show rebuildsTotal, - showMs (quantile 0.50 userWaitsSamples), - showMs (quantile 0.95 userWaitsSamples), - showMs (quantile 0.99 userWaitsSamples), + showMs (quantileSorted 0.50 sortedUserWaits), + showMs (quantileSorted 0.95 sortedUserWaits), + showMs (quantileSorted 0.99 sortedUserWaits), showMs (stdDev userWaitsSamples), - showMs (quantile 0.50 delayedWorkSamples), - showMs (quantile 0.95 delayedWorkSamples), - showMs (quantile 0.99 delayedWorkSamples) + showMs (quantileSorted 0.50 sortedDelayedWork), + showMs (quantileSorted 0.95 sortedDelayedWork), + showMs (quantileSorted 0.99 sortedDelayedWork) ] | (Bench {name, samples}, BenchRun {..}) <- results, let runSetup' = if runSetup < 0.01 then 0 else runSetup modules = fromIntegral $ length $ exampleModules $ example ?config + sortedUserWaits = sort userWaitsSamples + sortedDelayedWork = sort delayedWorkSamples ] csv = unlines $ map (intercalate ", ") (headers : rows) writeFile (outputCSV ?config) csv @@ -696,14 +698,15 @@ data BenchRun = BenchRun badRun :: BenchRun badRun = BenchRun 0 0 0 0 0 [] [] 0 0 0 0 0 0 0 0 False --- | Approximate quantile of a list of samples. Uses the nearest-rank --- method; good enough for benchmark reporting at n>=20. -quantile :: Double -> [Seconds] -> Seconds -quantile _ [] = 0 -quantile q xs = - let sorted = sort xs - n = length sorted - idx = max 0 $ min (n - 1) $ floor (q * fromIntegral (n - 1)) +-- | Approximate quantile (nearest-rank) of an /already-sorted/ list of +-- samples. Good enough for benchmark reporting at n>=20. Sorting is the +-- caller's responsibility so that several quantiles over the same samples +-- can share a single sort. +quantileSorted :: Double -> [Seconds] -> Seconds +quantileSorted _ [] = 0 +quantileSorted q sorted = + let n = length sorted + idx = min (n - 1) $ floor (q * fromIntegral (n - 1)) in sorted !! idx -- | Population standard deviation. diff --git a/ghcide/src/Development/IDE/Core/Shake.hs b/ghcide/src/Development/IDE/Core/Shake.hs index aa58dc649e..a3ad6ab895 100644 --- a/ghcide/src/Development/IDE/Core/Shake.hs +++ b/ghcide/src/Development/IDE/Core/Shake.hs @@ -1195,6 +1195,9 @@ useWithoutDependency key file = data RuleBody k v = Rule (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, IdeResult v)) | RuleNoDiagnostics (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v)) + -- | Like 'RuleNoDiagnostics', but reuses the previously cached value on an + -- early-cutoff match (see 'defineEarlyCutOffNoFileReuseValue'). + | RuleNoDiagnosticsReuseValue (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v)) | RuleWithCustomNewnessCheck { newnessCheck :: BS.ByteString -> BS.ByteString -> Bool , build :: k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v) @@ -1213,11 +1216,10 @@ defineEarlyCutoff recorder (Rule op) = addRule $ \(Q (key, file)) (old :: Maybe traceDiagnostics diags updateFileDiagnostics recorder file ver (newKey key) extras diags defineEarlyCutoff' diagnostics (==) False key file old mode $ const $ op key file -defineEarlyCutoff recorder (RuleNoDiagnostics op) = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \traceDiagnostics -> do - let diagnostics _ver diags = do - traceDiagnostics diags - mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags - defineEarlyCutoff' diagnostics (==) False key file old mode $ const $ second (mempty,) <$> op key file +defineEarlyCutoff recorder (RuleNoDiagnostics op) = + addNoDiagnosticsRule recorder False op +defineEarlyCutoff recorder (RuleNoDiagnosticsReuseValue op) = + addNoDiagnosticsRule recorder True op defineEarlyCutoff recorder RuleWithCustomNewnessCheck{..} = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \ traceDiagnostics -> do @@ -1233,15 +1235,40 @@ defineEarlyCutoff recorder (RuleWithOldValue op) = addRule $ \(Q (key, file)) (o updateFileDiagnostics recorder file ver (newKey key) extras diags defineEarlyCutoff' diagnostics (==) False key file old mode $ op key file +-- | Shared implementation for the no-diagnostics rule variants. The 'Bool' +-- selects @defineEarlyCutoff'@'s value-reuse behaviour on an early-cutoff match. +addNoDiagnosticsRule + :: forall k v. IdeRule k v + => Recorder (WithPriority Log) + -> Bool + -> (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v)) + -> Rules () +addNoDiagnosticsRule recorder reuseValueOnMatch op = + addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> + otTracedAction key file mode traceA $ \traceDiagnostics -> do + let diagnostics _ver diags = do + traceDiagnostics diags + mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags + defineEarlyCutoff' diagnostics (==) reuseValueOnMatch key file old mode $ + const $ second (mempty,) <$> op key file + defineNoFile :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action v) -> Rules () defineNoFile recorder f = defineNoDiagnostics recorder $ \k file -> do if file == emptyFilePath then do res <- f k; return (Just res) else fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" +-- | Body shared by the no-file early-cutoff rule variants: assert the empty +-- file path and wrap the (fingerprint, value) result. +noFileBody + :: Show k + => (k -> Action (BS.ByteString, v)) + -> k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v) +noFileBody f k file + | file == emptyFilePath = do (hashString, res) <- f k; return (Just hashString, Just res) + | otherwise = fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" + defineEarlyCutOffNoFile :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action (BS.ByteString, v)) -> Rules () -defineEarlyCutOffNoFile recorder f = defineEarlyCutoff recorder $ RuleNoDiagnostics $ \k file -> do - if file == emptyFilePath then do (hashString, res) <- f k; return (Just hashString, Just res) else - fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" +defineEarlyCutOffNoFile recorder f = defineEarlyCutoff recorder $ RuleNoDiagnostics $ noFileBody f -- | Like 'defineEarlyCutOffNoFile', but reuses the previously cached value -- when the early-cutoff fingerprint matches the prior run. Preserves pointer @@ -1253,16 +1280,7 @@ defineEarlyCutOffNoFile recorder f = defineEarlyCutoff recorder $ RuleNoDiagnost -- Only use for rules whose action has no side effects that depend on the -- freshly computed value (no global registration, no unloading the prior). defineEarlyCutOffNoFileReuseValue :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action (BS.ByteString, v)) -> Rules () -defineEarlyCutOffNoFileReuseValue recorder f = - addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> - otTracedAction key file mode traceA $ \traceDiagnostics -> do - let diagnostics _ver diags = do - traceDiagnostics diags - mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags - defineEarlyCutoff' diagnostics (==) True key file old mode $ const $ do - if file == emptyFilePath - then do (hashString, res) <- f key; return (Just hashString, (mempty, Just res)) - else fail $ "Rule " ++ show key ++ " should always be called with the empty string for a file" +defineEarlyCutOffNoFileReuseValue recorder f = defineEarlyCutoff recorder $ RuleNoDiagnosticsReuseValue $ noFileBody f defineEarlyCutoff' :: forall k v. IdeRule k v @@ -1334,9 +1352,13 @@ defineEarlyCutoff' doDiagnostics cmp reuseValueOnMatch key file mbOld mode actio -- For such rules, returning the cached value would leave -- downstream pointing at a representation that the action -- just invalidated. Opt in by passing reuseValueOnMatch=True. + -- Reuse only a clean prior value (Stale Nothing): a + -- deletion-marked Stale (Just _) may have been + -- garbage-collected/invalidated, so fall through to the fresh + -- result rather than resurrecting it. let res = case (reuseValueOnMatch, eq, staleV, freshRes) of - (True, True, Stale _ _ oldV, Succeeded sver _) -> Succeeded sver oldV - _ -> freshRes + (True, True, Stale Nothing _ oldV, Succeeded sver _) -> Succeeded sver oldV + _ -> freshRes return $ RunResult (if eq then ChangedRecomputeSame else ChangedRecomputeDiff) (encodeShakeValue bs) diff --git a/shake-bench/src/Development/Benchmark/Rules.hs b/shake-bench/src/Development/Benchmark/Rules.hs index 62779bb212..e7daa8a554 100644 --- a/shake-bench/src/Development/Benchmark/Rules.hs +++ b/shake-bench/src/Development/Benchmark/Rules.hs @@ -71,9 +71,9 @@ import Data.Aeson (FromJSON (..), ToJSON (..), import Data.Aeson.Lens (AsJSON (_JSON), _Object, _String) import Data.ByteString.Lazy (ByteString) import Data.Char (isDigit) -import Data.List (find, intercalate, isInfixOf, - isPrefixOf, isSuffixOf, sortBy, - stripPrefix, transpose) +import Data.List (find, foldl', intercalate, + isInfixOf, isPrefixOf, isSuffixOf, + sortBy, stripPrefix, transpose) import Data.List.Extra (lower, splitOn) import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe) @@ -328,13 +328,12 @@ benchRules build MkBenchRules{..} = do BenchProject {..} liftIO $ case prof of NoProfiling -> writeFile outHp dummyHp - _ -> return () - - -- Write a co-located top-N heap summary derived from the .hp file. - -- Untracked side-effect; regenerated whenever the .hp is. - liftIO $ do - hp <- readFile outHp - writeFile (outHp <.> "csv") (summarizeHpProfile 20 hp) + _ -> do + -- Write a co-located top-N heap summary derived from the .hp + -- file. Untracked side-effect; regenerated whenever the .hp + -- is. Skipped under NoProfiling, where outHp is only a dummy. + hp <- readFile outHp + writeFile (outHp <.> "csv") (summarizeHpProfile 20 hp) -- extend csv output with allocation data + GC/pause/productivity csvContents <- liftIO $ lines <$> readFile outcsv @@ -369,18 +368,16 @@ benchRules build MkBenchRules{..} = do let csvContents' = header' : results' writeFileLines outcsv csvContents' where + -- A negative value is parseRTSStats's "field missing" sentinel; clamp + -- to zero so the column stays numeric for downstream CSV parsing. showMB :: Int -> String - showMB x | x < 0 = "0MB" - | otherwise = show (x `div` 2^(20::Int)) <> "MB" + showMB x = show (max 0 x `div` 2^(20::Int)) <> "MB" showSeconds :: Double -> String - showSeconds x | x < 0 = "0" - | otherwise = printf "%.4f" x + showSeconds = printf "%.4f" . max 0 showDouble :: Double -> String - showDouble x | x < 0 = "0" - | otherwise = printf "%.2f" x + showDouble = printf "%.2f" . max 0 showInt :: Int -> String - showInt x | x < 0 = "0" - | otherwise = show x + showInt = show . max (0 :: Int) -- | Summarize a GHC heap profile (.hp) file into a top-N cost-centre CSV. -- For each cost-centre name we report the maximum retained bytes across @@ -391,9 +388,11 @@ summarizeHpProfile topN raw = let ls = lines raw samples = collectSamples ls accumOne acc (cc, bs) = Map.insertWith combine cc (bs, bs) acc - where combine (newMax, newLast) (oldMax, _) = (max newMax oldMax, newLast) + where combine (newMax, newLast) (oldMax, _) = + let m = max newMax oldMax in m `seq` (m, newLast) -- Aggregate across all samples: track running max and last value per CC. - agg = foldl (\acc s -> foldl accumOne acc s) Map.empty samples + -- Strict fold + forced max to avoid a thunk chain over the samples. + agg = foldl' (\acc s -> foldl' accumOne acc s) Map.empty samples sorted = sortBy (comparing (Down . maxOf)) (Map.toList agg) maxOf (_, (m, _)) = m header = "costCentre,maxRetainedBytes,finalRetainedBytes" @@ -456,7 +455,11 @@ parseRTSStats input = RTSStats } where ls = lines input - findLine label = find (label `isInfixOf`) ls + -- Match summary labels tolerant of GHC's inter-column spacing, which + -- varies across versions (e.g. "MUT time" vs "MUT time"); both the + -- label and each line are whitespace-normalised before comparison. + findLine label = let needle = unwords (words label) + in find ((needle `isInfixOf`) . unwords . words) ls digitsFirstWord label = case findLine label of Just l -> readIntDef $ filter isDigit $ head (words l ++ [""]) Nothing -> -1 From 756d9654dbfc7e68fbf4700b59e7decf5618e63e Mon Sep 17 00:00:00 2001 From: parsonsmatt Date: Thu, 28 May 2026 17:50:25 -0600 Subject: [PATCH 11/11] ghcide: correct comment on Stale Nothing reuse guard MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The comment justified skipping `Stale (Just _)` values by calling them deletion/GC markers. That is inaccurate: `Stale`'s first field is a `Maybe PositionDelta`, written only by the persistent-rule fallback in lastValueIO (a value loaded from disk, not produced by the rule's action this run). State the real reason — such a value is not a trustworthy prior to reuse — so future readers don't misread the Value lifecycle. Comment-only; no behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) --- ghcide/src/Development/IDE/Core/Shake.hs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/ghcide/src/Development/IDE/Core/Shake.hs b/ghcide/src/Development/IDE/Core/Shake.hs index a3ad6ab895..77401cc0aa 100644 --- a/ghcide/src/Development/IDE/Core/Shake.hs +++ b/ghcide/src/Development/IDE/Core/Shake.hs @@ -1352,10 +1352,14 @@ defineEarlyCutoff' doDiagnostics cmp reuseValueOnMatch key file mbOld mode actio -- For such rules, returning the cached value would leave -- downstream pointing at a representation that the action -- just invalidated. Opt in by passing reuseValueOnMatch=True. - -- Reuse only a clean prior value (Stale Nothing): a - -- deletion-marked Stale (Just _) may have been - -- garbage-collected/invalidated, so fall through to the fresh - -- result rather than resurrecting it. + -- Reuse only a clean prior value (Stale Nothing _): that is + -- the shape a value successfully computed by this rule takes + -- (see the Just Succeeded branch above). A Stale (Just _) + -- instead carries a PositionDelta and is only ever written by + -- the persistent-rule fallback (lastValueIO), i.e. a value + -- loaded from disk rather than produced by this rule's action + -- this run, so it is not a trustworthy prior to reuse; fall + -- through to the fresh result in that case. let res = case (reuseValueOnMatch, eq, staleV, freshRes) of (True, True, Stale Nothing _ oldV, Succeeded sver _) -> Succeeded sver oldV _ -> freshRes