diff --git a/.github/actions/setup-build/action.yml b/.github/actions/setup-build/action.yml index 6612f07ad8..89b057f479 100644 --- a/.github/actions/setup-build/action.yml +++ b/.github/actions/setup-build/action.yml @@ -140,5 +140,8 @@ runs: # installed besides the ones we explicitly want. - name: "Remove ghcup caches" if: runner.os == 'Linux' - run: ghcup gc --ghc-old --share-dir --hls-no-ghc --cache --tmpdirs --unset + # NB: do not pass --unset here. haskell-actions/setup leaves the active + # GHC in an "unset" state, and --unset would then delete its binary, + # breaking the subsequent `cabal build`. + run: ghcup gc --ghc-old --share-dir --hls-no-ghc --cache --tmpdirs shell: bash diff --git a/bench/Main.hs b/bench/Main.hs index d1ac603b18..df80af851a 100644 --- a/bench/Main.hs +++ b/bench/Main.hs @@ -64,7 +64,7 @@ import Development.Shake (Action, actionBracket, addOracle, askOracle, command, command_, getDirectoryFiles, liftIO, need, - newCache, shakeArgsWith, + newCache, phony, shakeArgsWith, shakeOptions, versioned, want) import Development.Shake.Classes import Experiments.Types (Example (exampleName), @@ -168,6 +168,17 @@ createBuildSystem config = do whenJust (profileInterval configStatic) $ \i -> do phonyRules "profiled-" binaryName (CheapHeapProfiling i) build (examples configStatic) + -- Fast smoke target for local iteration: smallest example, cheapest experiments, HEAD only. + let smokeExample = "DummyLevel0M01NoTH" + smokeExperiments = ["edit", "edit-header", "documentSymbols after edit"] + smokeConfigs = [ confName | ConfigurationDescriptor{..} <- configurations configStatic ] + phony "smoke" $ need + [ build "unprofiled" smokeExample "HEAD" conf + escaped (escapeExperiment (Unescaped experiment)) <.> "csv" + | conf <- smokeConfigs + , experiment <- smokeExperiments + ] + return configStatic disableAllPluginsBut :: (PluginId -> Bool) -> Plugin.Config diff --git a/bench/MultiLayerModules.sh b/bench/MultiLayerModules.sh index 38d85ce9ed..55407f600e 100755 --- a/bench/MultiLayerModules.sh +++ b/bench/MultiLayerModules.sh @@ -1,9 +1,30 @@ #!/usr/bin/env bash -# Generate $DEPTH layers of modules with $WIDTH modules on each layer -# Every module on layer N imports all the modules on layer N-1 -# MultiLayerModules.hs imports all the modules from the last layer +# Generate $DEPTH layers of modules with $WIDTH modules on each layer. +# Every module on layer N imports all the modules on layer N-1. +# MultiLayerModules.hs imports all the modules from the last layer. +# Flags (any order): --th, --depth N, --width N. DEPTH=15 WIDTH=40 +TH= +while [[ $# -gt 0 ]]; do + case "$1" in + --th) + TH=1 + shift + ;; + --depth) + DEPTH=${2:?"--depth requires a value"} + shift 2 + ;; + --width) + WIDTH=${2:?"--width requires a value"} + shift 2 + ;; + *) + shift + ;; + esac +done cat >hie.yaml << EOF cradle: direct: @@ -22,13 +43,11 @@ for l in $(seq 1 $DEPTH); do done done done -case "$1" in - '--th') - echo "{-# LANGUAGE TemplateHaskell #-}" > MultiLayerModules.hs - ;; -esac +if [[ -n "$TH" ]]; then + echo "{-# LANGUAGE TemplateHaskell #-}" > MultiLayerModules.hs +fi echo "module MultiLayerModules where" >> MultiLayerModules.hs - echo " - MultiLayerModules.hs" >> hie.yaml; +echo " - MultiLayerModules.hs" >> hie.yaml; for j in $(seq -w 1 $WIDTH); do echo "import DummyLevel${DEPTH}M$j" >> MultiLayerModules.hs; done diff --git a/bench/config.yaml b/bench/config.yaml index b1218976ce..1e6f811225 100644 --- a/bench/config.yaml +++ b/bench/config.yaml @@ -77,6 +77,23 @@ examples: modules: - DummyLevel1M01.hs + # Large synthetic project: 200 layers × 50 modules/layer = 10000 modules, + # ~500k import edges (each module imports every module of the prior layer). + # This is closer in scale to a real production Haskell codebase than the + # default MultiLayerModules. Expect this to be slow: initial cradle load + # alone takes many minutes, and each experiment runs `samples:` rounds. + # Recommended: drop `samples:` to ~10 and run via single-experiment targets + # (e.g. `bench-results/unprofiled/MultiLayerModulesXL/HEAD/All/edit.csv`) + # rather than the full matrix. + - name: MultiLayerModulesXL + path: bench/MultiLayerModules.sh + script: True + script-args: ["--depth", "200", "--width", "50"] + modules: + - MultiLayerModules.hs + - DummyLevel0M01.hs + - DummyLevel100M01.hs + # Small but heavily multi-component example # Disabled as it is far to slow. hie-bios >0.7.2 should help # - name: HLS @@ -114,6 +131,7 @@ experiments: - "hole fit suggestions" - "eval execute single-line code lens" - "eval execute multi-line code lens" + - "memory pressure" # An ordered list of versions to analyze versions: diff --git a/bench/run_pass2_costcentre.sh b/bench/run_pass2_costcentre.sh new file mode 100755 index 0000000000..6357065426 --- /dev/null +++ b/bench/run_pass2_costcentre.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +# Pass 2: cost-centre profiled benchmark. +# +# Requires bench-results/binaries/HEAD-prof/haskell-language-server to exist +# (built with --enable-profiling --profiling-detail=late). +# +# For each (example, experiment) pair, invoke ghcide-bench directly with +# +RTS -p -hc -i1 -po -RTS so we get: +# .prof — time + allocation by cost-centre +# .hp — heap residency by cost-centre over time +# .hp.csv — top-N cost-centres (produced by parseHpProfile, but +# NOTE: only the bench harness invokes that. For Pass 2 +# we'd need to run summarizeHpProfile separately. See +# end of script.) +set -euo pipefail + +HLS=bench-results/binaries/HEAD-prof/haskell-language-server +OUTDIR=bench-results/prof-cc +SAMPLES=20 + +if [[ ! -x "$HLS" ]]; then + echo "ERROR: profiled HLS not found at $HLS" >&2 + exit 1 +fi + +run_experiment() { + local example_name=$1 + local experiment=$2 + shift 2 + local example_args=("$@") + + local exp_slug=${experiment// /_} + local outdir="$OUTDIR/$example_name/$exp_slug" + mkdir -p "$outdir" + local stem="$(pwd)/$outdir/$exp_slug" + + echo + echo "=== $example_name :: $experiment ===" + + cabal exec -- ghcide-bench \ + --timeout=600 \ + --no-clean \ + --samples="$SAMPLES" \ + --csv="$outdir/$exp_slug.csv" \ + --ghcide="$(pwd)/$HLS" \ + --select "$experiment" \ + "${example_args[@]}" \ + --ghcide-options=+RTS \ + --ghcide-options=-p \ + --ghcide-options=-hc \ + --ghcide-options=-i1 \ + --ghcide-options=-po"$stem" \ + --ghcide-options=-l \ + --ghcide-options=-ol"$stem.eventlog" \ + --ghcide-options=-S"$stem.gcStats.log" \ + --ghcide-options=-RTS \ + < /dev/null || echo " FAILED — continuing" +} + +# ---------- Example: cabal-3.16.1.0 ---------- +CABAL_ARGS=( + --example-package-name Cabal + --example-package-version 3.16.1.0 + --example-name cabal + --example-module=src/Distribution/Simple.hs + --example-module=src/Distribution/Types/ComponentLocalBuildInfo.hs +) +for exp in "memory pressure" "completions after typing burst" "hover after typing burst" "edit" "semanticTokens after typing burst"; do + run_experiment cabal "$exp" "${CABAL_ARGS[@]}" +done + +# ---------- Example: MultiLayerModulesNoTH ---------- +MLM_ARGS=( + --example-script "$(pwd)/bench/MultiLayerModules.sh" + --example-name MultiLayerModulesNoTH + --example-module=MultiLayerModules.hs + --example-module=DummyLevel0M01.hs + --example-module=DummyLevel1M01.hs +) +for exp in "memory pressure" "edit" "hover after typing burst"; do + run_experiment MultiLayerModulesNoTH "$exp" "${MLM_ARGS[@]}" +done + +echo +echo "=== Pass 2 complete ===" +echo "Profile artefacts under $OUTDIR/" +echo "Inspect .prof files for time+alloc attribution: head -50 $OUTDIR/cabal/memory_pressure/memory_pressure.prof" diff --git a/ghcide-bench/src/Experiments.hs b/ghcide-bench/src/Experiments.hs index 8e70483559..6516eab37e 100644 --- a/ghcide-bench/src/Experiments.hs +++ b/ghcide-bench/src/Experiments.hs @@ -352,6 +352,45 @@ experiments = ) ), --------------------------------------------------------------------------------------- + --------------------------------------------------------------------------------------- + -- Memory-pressure experiment: hammers the server with a variety of + -- requests across every open document, interleaved with edits, to force + -- the IDE to retain caches/results across many rules. Each sample is one + -- "round" so the resulting peak/post-GC heap (via RTS -s) reflects the + -- worst case under sustained load. The default sample count makes this + -- much longer than other experiments — override with --samples when you + -- only need a quick smoke check. + bench "memory pressure" $ \docs -> do + -- Round 1: typing burst across all docs. + applyTypingBurst docs + _ <- waitForBuildQueue + -- Round 2: exercise hover + getDefinition + completions on every + -- doc that has an identifier position. These populate per-rule + -- caches across the whole project. + forM_ docs $ \DocumentPositions{..} -> do + forM_ identifierP $ \p -> do + _ <- getHover doc p + _ <- getDefinitions doc p + _ <- getCompletions doc p + return () + _ <- waitForBuildQueue + -- Round 3: code actions across every doc — these tend to pull in + -- additional rules (suggestions, refactor previews, etc.). + forM_ docs $ \DocumentPositions{..} -> do + forM_ identifierP $ \p -> + void $ getCodeActions doc (Range p p) + _ <- waitForBuildQueue + -- Round 4: document symbols on every doc — populates the symbol cache. + forM_ docs $ \DocumentPositions{..} -> + void $ getDocumentSymbols doc + _ <- waitForBuildQueue + -- Round 5: another typing burst, to ensure the caches above survive + -- invalidation rather than getting dropped — this is what catches + -- "we recompute everything from scratch on every edit" regressions. + applyTypingBurst docs + _ <- waitForBuildQueue + return True, + --------------------------------------------------------------------------------------- benchWithSetup "eval execute multi-line code lens" ( mapM_ $ \DocumentPositions{..} -> do @@ -533,6 +572,13 @@ runBenchmarksFun dir allBenchmarks = do , "rulesTotal" , "ruleEdges" , "ghcRebuilds" + , "userP50" + , "userP95" + , "userP99" + , "userStdDev" + , "delayedP50" + , "delayedP95" + , "delayedP99" ] rows = [ [ name, @@ -552,11 +598,20 @@ runBenchmarksFun dir allBenchmarks = do show rulesVisited, show rulesTotal, show edgesTotal, - show rebuildsTotal + show rebuildsTotal, + showMs (quantileSorted 0.50 sortedUserWaits), + showMs (quantileSorted 0.95 sortedUserWaits), + showMs (quantileSorted 0.99 sortedUserWaits), + showMs (stdDev userWaitsSamples), + showMs (quantileSorted 0.50 sortedDelayedWork), + showMs (quantileSorted 0.95 sortedDelayedWork), + showMs (quantileSorted 0.99 sortedDelayedWork) ] | (Bench {name, samples}, BenchRun {..}) <- results, let runSetup' = if runSetup < 0.01 then 0 else runSetup modules = fromIntegral $ length $ exampleModules $ example ?config + sortedUserWaits = sort userWaitsSamples + sortedDelayedWork = sort delayedWorkSamples ] csv = unlines $ map (intercalate ", ") (headers : rows) writeFile (outputCSV ?config) csv @@ -625,6 +680,10 @@ data BenchRun = BenchRun runExperiment :: !Seconds, userWaits :: !Seconds, delayedWork :: !Seconds, + -- | Per-sample user-time samples (length == samples on success). + userWaitsSamples :: ![Seconds], + -- | Per-sample delayed-work samples (length == samples on success). + delayedWorkSamples :: ![Seconds], firstResponse :: !Seconds, firstResponseDelayed :: !Seconds, rulesBuilt :: !Int, @@ -637,7 +696,27 @@ data BenchRun = BenchRun } badRun :: BenchRun -badRun = BenchRun 0 0 0 0 0 0 0 0 0 0 0 0 0 False +badRun = BenchRun 0 0 0 0 0 [] [] 0 0 0 0 0 0 0 0 False + +-- | Approximate quantile (nearest-rank) of an /already-sorted/ list of +-- samples. Good enough for benchmark reporting at n>=20. Sorting is the +-- caller's responsibility so that several quantiles over the same samples +-- can share a single sort. +quantileSorted :: Double -> [Seconds] -> Seconds +quantileSorted _ [] = 0 +quantileSorted q sorted = + let n = length sorted + idx = min (n - 1) $ floor (q * fromIntegral (n - 1)) + in sorted !! idx + +-- | Population standard deviation. +stdDev :: [Seconds] -> Seconds +stdDev [] = 0 +stdDev xs = + let n = fromIntegral (length xs) + m = sum xs / n + sq x = let d = x - m in d * d + in sqrt (sum (map sq xs) / n) waitForProgressStart :: Session () waitForProgressStart = void $ do @@ -688,8 +767,9 @@ runBench runSess Bench{..} = handleAny (\e -> print e >> return badRun) liftIO $ output $ "Running " <> name <> " benchmark" (runSetup, ()) <- duration $ benchSetup docs - let loop' (Just timeForFirstResponse) !userWaits !delayedWork 0 = return $ Just (userWaits, delayedWork, timeForFirstResponse) - loop' timeForFirstResponse !userWaits !delayedWork n = do + -- Accumulate per-sample (t, td) in reverse order; we reverse once at the end. + let loop' (Just timeForFirstResponse) !samplesAcc 0 = return $ Just (reverse samplesAcc, timeForFirstResponse) + loop' timeForFirstResponse !samplesAcc n = do (t, res) <- duration $ experiment docs if not res then return Nothing @@ -697,12 +777,16 @@ runBench runSess Bench{..} = handleAny (\e -> print e >> return badRun) output (showDuration t) -- Wait for the delayed actions to finish td <- waitForBuildQueue - loop' (timeForFirstResponse <|> Just (t,td)) (userWaits+t) (delayedWork+td) (n -1) + loop' (timeForFirstResponse <|> Just (t,td)) ((t,td):samplesAcc) (n -1) loop = loop' Nothing - (runExperiment, result) <- duration $ loop 0 0 samples + (runExperiment, result) <- duration $ loop [] samples let success = isJust result - (userWaits, delayedWork, (firstResponse, firstResponseDelayed)) = fromMaybe (0,0,(0,0)) result + (perSample, (firstResponse, firstResponseDelayed)) = fromMaybe ([], (0,0)) result + userWaitsSamples = map fst perSample + delayedWorkSamples = map snd perSample + userWaits = sum userWaitsSamples + delayedWork = sum delayedWorkSamples rulesTotal <- length <$> getStoredKeys rulesBuilt <- either (const 0) length <$> getBuildKeysBuilt diff --git a/ghcide/src/Development/IDE/Core/Rules.hs b/ghcide/src/Development/IDE/Core/Rules.hs index 24de344bfa..fc43dcef63 100644 --- a/ghcide/src/Development/IDE/Core/Rules.hs +++ b/ghcide/src/Development/IDE/Core/Rules.hs @@ -639,7 +639,7 @@ getFileHashRule recorder = return (Just (fingerprintToBS fileHash), ([], Just fileHash)) getModuleGraphRule :: Recorder (WithPriority Log) -> Rules () -getModuleGraphRule recorder = defineEarlyCutOffNoFile (cmapWithPrio LogShake recorder) $ \GetModuleGraph -> do +getModuleGraphRule recorder = defineEarlyCutOffNoFileReuseValue (cmapWithPrio LogShake recorder) $ \GetModuleGraph -> do fs <- toKnownFiles <$> useNoFile_ GetKnownTargets dependencyInfoForFiles (HashSet.toList fs) diff --git a/ghcide/src/Development/IDE/Core/Shake.hs b/ghcide/src/Development/IDE/Core/Shake.hs index 9334a13ad3..77401cc0aa 100644 --- a/ghcide/src/Development/IDE/Core/Shake.hs +++ b/ghcide/src/Development/IDE/Core/Shake.hs @@ -41,7 +41,7 @@ module Development.IDE.Core.Shake( RuleBody(..), define, defineNoDiagnostics, defineEarlyCutoff, - defineNoFile, defineEarlyCutOffNoFile, + defineNoFile, defineEarlyCutOffNoFile, defineEarlyCutOffNoFileReuseValue, getDiagnostics, mRunLspT, mRunLspTCallback, getHiddenDiagnostics, @@ -1195,6 +1195,9 @@ useWithoutDependency key file = data RuleBody k v = Rule (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, IdeResult v)) | RuleNoDiagnostics (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v)) + -- | Like 'RuleNoDiagnostics', but reuses the previously cached value on an + -- early-cutoff match (see 'defineEarlyCutOffNoFileReuseValue'). + | RuleNoDiagnosticsReuseValue (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v)) | RuleWithCustomNewnessCheck { newnessCheck :: BS.ByteString -> BS.ByteString -> Bool , build :: k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v) @@ -1212,49 +1215,86 @@ defineEarlyCutoff recorder (Rule op) = addRule $ \(Q (key, file)) (old :: Maybe let diagnostics ver diags = do traceDiagnostics diags updateFileDiagnostics recorder file ver (newKey key) extras diags - defineEarlyCutoff' diagnostics (==) key file old mode $ const $ op key file -defineEarlyCutoff recorder (RuleNoDiagnostics op) = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \traceDiagnostics -> do - let diagnostics _ver diags = do - traceDiagnostics diags - mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags - defineEarlyCutoff' diagnostics (==) key file old mode $ const $ second (mempty,) <$> op key file + defineEarlyCutoff' diagnostics (==) False key file old mode $ const $ op key file +defineEarlyCutoff recorder (RuleNoDiagnostics op) = + addNoDiagnosticsRule recorder False op +defineEarlyCutoff recorder (RuleNoDiagnosticsReuseValue op) = + addNoDiagnosticsRule recorder True op defineEarlyCutoff recorder RuleWithCustomNewnessCheck{..} = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \ traceDiagnostics -> do let diagnostics _ver diags = do traceDiagnostics diags mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleCustomNewnessHasDiag) diags - defineEarlyCutoff' diagnostics newnessCheck key file old mode $ + defineEarlyCutoff' diagnostics newnessCheck False key file old mode $ const $ second (mempty,) <$> build key file defineEarlyCutoff recorder (RuleWithOldValue op) = addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> otTracedAction key file mode traceA $ \traceDiagnostics -> do extras <- getShakeExtras let diagnostics ver diags = do traceDiagnostics diags updateFileDiagnostics recorder file ver (newKey key) extras diags - defineEarlyCutoff' diagnostics (==) key file old mode $ op key file + defineEarlyCutoff' diagnostics (==) False key file old mode $ op key file + +-- | Shared implementation for the no-diagnostics rule variants. The 'Bool' +-- selects @defineEarlyCutoff'@'s value-reuse behaviour on an early-cutoff match. +addNoDiagnosticsRule + :: forall k v. IdeRule k v + => Recorder (WithPriority Log) + -> Bool + -> (k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v)) + -> Rules () +addNoDiagnosticsRule recorder reuseValueOnMatch op = + addRule $ \(Q (key, file)) (old :: Maybe BS.ByteString) mode -> + otTracedAction key file mode traceA $ \traceDiagnostics -> do + let diagnostics _ver diags = do + traceDiagnostics diags + mapM_ (logWith recorder Warning . LogDefineEarlyCutoffRuleNoDiagHasDiag) diags + defineEarlyCutoff' diagnostics (==) reuseValueOnMatch key file old mode $ + const $ second (mempty,) <$> op key file defineNoFile :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action v) -> Rules () defineNoFile recorder f = defineNoDiagnostics recorder $ \k file -> do if file == emptyFilePath then do res <- f k; return (Just res) else fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" +-- | Body shared by the no-file early-cutoff rule variants: assert the empty +-- file path and wrap the (fingerprint, value) result. +noFileBody + :: Show k + => (k -> Action (BS.ByteString, v)) + -> k -> NormalizedFilePath -> Action (Maybe BS.ByteString, Maybe v) +noFileBody f k file + | file == emptyFilePath = do (hashString, res) <- f k; return (Just hashString, Just res) + | otherwise = fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" + defineEarlyCutOffNoFile :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action (BS.ByteString, v)) -> Rules () -defineEarlyCutOffNoFile recorder f = defineEarlyCutoff recorder $ RuleNoDiagnostics $ \k file -> do - if file == emptyFilePath then do (hashString, res) <- f k; return (Just hashString, Just res) else - fail $ "Rule " ++ show k ++ " should always be called with the empty string for a file" +defineEarlyCutOffNoFile recorder f = defineEarlyCutoff recorder $ RuleNoDiagnostics $ noFileBody f + +-- | Like 'defineEarlyCutOffNoFile', but reuses the previously cached value +-- when the early-cutoff fingerprint matches the prior run. Preserves pointer +-- identity across rebuilds — important for large shared values +-- (e.g. ModuleGraph) so downstream rules whose deps did not change continue +-- to share a pointer with the cache, rather than accumulating multiple +-- structurally equivalent copies. +-- +-- Only use for rules whose action has no side effects that depend on the +-- freshly computed value (no global registration, no unloading the prior). +defineEarlyCutOffNoFileReuseValue :: IdeRule k v => Recorder (WithPriority Log) -> (k -> Action (BS.ByteString, v)) -> Rules () +defineEarlyCutOffNoFileReuseValue recorder f = defineEarlyCutoff recorder $ RuleNoDiagnosticsReuseValue $ noFileBody f defineEarlyCutoff' :: forall k v. IdeRule k v => (Maybe Int32 -> [FileDiagnostic] -> Action ()) -- ^ update diagnostics -- | compare current and previous for freshness -> (BS.ByteString -> BS.ByteString -> Bool) + -> Bool -- ^ reuse cached value on cutoff match (see Note below) -> k -> NormalizedFilePath -> Maybe BS.ByteString -> RunMode -> (Value v -> Action (Maybe BS.ByteString, IdeResult v)) -> Action (RunResult (A (RuleResult k))) -defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do +defineEarlyCutoff' doDiagnostics cmp reuseValueOnMatch key file mbOld mode action = do ShakeExtras{state, progress, dirtyKeys} <- getShakeExtras options <- getIdeOptions let trans g x = withRunInIO $ \run -> g (run x) @@ -1288,7 +1328,7 @@ defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do pure (Nothing, ([ideErrorText file (T.pack $ show (key, file) ++ show e) | not $ isBadDependency e],Nothing)) ver <- estimateFileVersionUnsafely key mbRes file - (bs, res) <- case mbRes of + (bs, freshRes) <- case mbRes of Nothing -> do pure (toShakeValue ShakeStale mbBs, staleV) Just v -> pure (maybe ShakeNoCutoff ShakeResult mbBs, Succeeded ver v) @@ -1299,6 +1339,30 @@ defineEarlyCutoff' doDiagnostics cmp key file mbOld mode action = do -- If we do not have a previous result -- or we got ShakeNoCutoff we always return False. _ -> False + -- Pointer-identity preservation on early-cutoff match (opt-in). + -- When the cutoff fingerprint matches the prior, reuse the + -- previously cached value instead of the freshly computed + -- (but structurally equivalent) one. This avoids accumulating + -- multiple equivalent copies of large shared values like + -- ModuleGraph across rebuilds. + -- + -- Only safe for rules whose action body has no + -- value-dependent side effects (e.g. registering the fresh + -- result in a global table and unloading the stale one). + -- For such rules, returning the cached value would leave + -- downstream pointing at a representation that the action + -- just invalidated. Opt in by passing reuseValueOnMatch=True. + -- Reuse only a clean prior value (Stale Nothing _): that is + -- the shape a value successfully computed by this rule takes + -- (see the Just Succeeded branch above). A Stale (Just _) + -- instead carries a PositionDelta and is only ever written by + -- the persistent-rule fallback (lastValueIO), i.e. a value + -- loaded from disk rather than produced by this rule's action + -- this run, so it is not a trustworthy prior to reuse; fall + -- through to the fresh result in that case. + let res = case (reuseValueOnMatch, eq, staleV, freshRes) of + (True, True, Stale Nothing _ oldV, Succeeded sver _) -> Succeeded sver oldV + _ -> freshRes return $ RunResult (if eq then ChangedRecomputeSame else ChangedRecomputeDiff) (encodeShakeValue bs) diff --git a/shake-bench/shake-bench.cabal b/shake-bench/shake-bench.cabal index 3d2a7d3174..e5bee2fc42 100644 --- a/shake-bench/shake-bench.cabal +++ b/shake-bench/shake-bench.cabal @@ -22,6 +22,7 @@ library aeson, base == 4.*, bytestring, + containers, directory, extra >= 1.7.2, filepath, diff --git a/shake-bench/src/Development/Benchmark/Rules.hs b/shake-bench/src/Development/Benchmark/Rules.hs index 2ddcb97cc5..e7daa8a554 100644 --- a/shake-bench/src/Development/Benchmark/Rules.hs +++ b/shake-bench/src/Development/Benchmark/Rules.hs @@ -71,10 +71,13 @@ import Data.Aeson (FromJSON (..), ToJSON (..), import Data.Aeson.Lens (AsJSON (_JSON), _Object, _String) import Data.ByteString.Lazy (ByteString) import Data.Char (isDigit) -import Data.List (find, intercalate, isInfixOf, - isSuffixOf, stripPrefix, transpose) +import Data.List (find, foldl', intercalate, + isInfixOf, isPrefixOf, isSuffixOf, + sortBy, stripPrefix, transpose) import Data.List.Extra (lower, splitOn) +import qualified Data.Map.Strict as Map import Data.Maybe (fromMaybe) +import Data.Ord (Down (..), comparing) import Data.String (fromString) import Data.Text (Text) import qualified Data.Text as T @@ -325,31 +328,180 @@ benchRules build MkBenchRules{..} = do BenchProject {..} liftIO $ case prof of NoProfiling -> writeFile outHp dummyHp - _ -> return () - - -- extend csv output with allocation data + _ -> do + -- Write a co-located top-N heap summary derived from the .hp + -- file. Untracked side-effect; regenerated whenever the .hp + -- is. Skipped under NoProfiling, where outHp is only a dummy. + hp <- readFile outHp + writeFile (outHp <.> "csv") (summarizeHpProfile 20 hp) + + -- extend csv output with allocation data + GC/pause/productivity csvContents <- liftIO $ lines <$> readFile outcsv let header = head csvContents results = tail csvContents - header' = header <> ", maxResidency, allocatedBytes" + extraHeaders = + [ "maxResidency" + , "allocatedBytes" + , "maxSlop" + , "mutTimeS" + , "gcTimeS" + , "maxGcPauseS" + , "productivityPct" + , "majorGCs" + , "minorGCs" + ] + header' = header <> ", " <> intercalate ", " extraHeaders results' <- forM results $ \row -> do - (maxResidency, allocations) <- liftIO - (parseMaxResidencyAndAllocations <$> readFile outGc) - return $ printf "%s, %s, %s" row (showMB maxResidency) (showMB allocations) + stats <- liftIO (parseRTSStats <$> readFile outGc) + return $ intercalate ", " + [ row + , showMB (rtsMaxResidency stats) + , showMB (rtsAllocatedBytes stats) + , showMB (rtsMaxSlop stats) + , showSeconds (rtsMutTime stats) + , showSeconds (rtsGcTime stats) + , showSeconds (rtsMaxGcPause stats) + , showDouble (rtsProductivityPct stats) + , showInt (rtsMajorGCs stats) + , showInt (rtsMinorGCs stats) + ] let csvContents' = header' : results' writeFileLines outcsv csvContents' where + -- A negative value is parseRTSStats's "field missing" sentinel; clamp + -- to zero so the column stays numeric for downstream CSV parsing. showMB :: Int -> String - showMB x = show (x `div` 2^(20::Int)) <> "MB" + showMB x = show (max 0 x `div` 2^(20::Int)) <> "MB" + showSeconds :: Double -> String + showSeconds = printf "%.4f" . max 0 + showDouble :: Double -> String + showDouble = printf "%.2f" . max 0 + showInt :: Int -> String + showInt = show . max (0 :: Int) + +-- | Summarize a GHC heap profile (.hp) file into a top-N cost-centre CSV. +-- For each cost-centre name we report the maximum retained bytes across +-- all samples, plus the final-sample retained bytes (useful for spotting +-- leaks that grow without bound). +summarizeHpProfile :: Int -> String -> String +summarizeHpProfile topN raw = + let ls = lines raw + samples = collectSamples ls + accumOne acc (cc, bs) = Map.insertWith combine cc (bs, bs) acc + where combine (newMax, newLast) (oldMax, _) = + let m = max newMax oldMax in m `seq` (m, newLast) + -- Aggregate across all samples: track running max and last value per CC. + -- Strict fold + forced max to avoid a thunk chain over the samples. + agg = foldl' (\acc s -> foldl' accumOne acc s) Map.empty samples + sorted = sortBy (comparing (Down . maxOf)) (Map.toList agg) + maxOf (_, (m, _)) = m + header = "costCentre,maxRetainedBytes,finalRetainedBytes" + body = [ cc <> "," <> show m <> "," <> show l + | (cc, (m, l)) <- take topN sorted + ] + in unlines (header : body) + where + -- | Group consecutive non-marker lines between BEGIN_SAMPLE/END_SAMPLE pairs. + collectSamples :: [String] -> [[(String, Integer)]] + collectSamples xs = case dropWhile (not . isBeginSample) xs of + [] -> [] + (_:rest) -> + let (sample, rest') = break isEndSample rest + parsed = [ p | l <- sample, Just p <- [parseSampleLine l] ] + in parsed : case rest' of + [] -> [] + (_:r) -> collectSamples r + isBeginSample = ("BEGIN_SAMPLE" `isPrefixOf`) . dropWhile (== ' ') + isEndSample = ("END_SAMPLE" `isPrefixOf`) . dropWhile (== ' ') + +-- | Parse one cost-centre line from a heap profile sample, e.g. +-- @"GHC.Conc.Sync.CAF" 5678@ or @PINNED 1234@. The cost-centre name may +-- contain spaces (when quoted), so we treat everything before the trailing +-- number as the name. +parseSampleLine :: String -> Maybe (String, Integer) +parseSampleLine l = case reverse (words l) of + (numStr:rest) -> do + n <- readMaybe numStr + let name = unwords (reverse rest) + if null name then Nothing else Just (name, n) + _ -> Nothing + +-- | Stats extracted from `+RTS -s` (or -S) summary output. +data RTSStats = RTSStats + { rtsMaxResidency :: !Int + , rtsAllocatedBytes :: !Int + , rtsMaxSlop :: !Int + , rtsMutTime :: !Double -- ^ MUT time (seconds) + , rtsGcTime :: !Double -- ^ GC time (seconds) + , rtsMaxGcPause :: !Double -- ^ Max GC pause across all generations (seconds) + , rtsProductivityPct :: !Double -- ^ Productivity (% of total user time) + , rtsMajorGCs :: !Int -- ^ Gen 1 collections + , rtsMinorGCs :: !Int -- ^ Gen 0 collections + } --- Parse the max residency and allocations in RTS -s output -parseMaxResidencyAndAllocations :: String -> (Int, Int) -parseMaxResidencyAndAllocations input = - (f "maximum residency", f "bytes allocated in the heap") +-- | Parse the summary block at the tail of `+RTS -s`/`-S` output. +-- Each field is best-effort; missing fields return -1. +parseRTSStats :: String -> RTSStats +parseRTSStats input = RTSStats + { rtsMaxResidency = digitsFirstWord "maximum residency" + , rtsAllocatedBytes = digitsFirstWord "bytes allocated in the heap" + , rtsMaxSlop = digitsFirstWord "maximum slop" + , rtsMutTime = secondsAt 2 "MUT time" + , rtsGcTime = secondsAt 2 "GC time" + , rtsMaxGcPause = parseMaxPause + , rtsProductivityPct = parseProductivity + , rtsMajorGCs = parseGenColls "Gen 1" + , rtsMinorGCs = parseGenColls "Gen 0" + } where - inps = reverse $ lines input - f label = case find (label `isInfixOf`) inps of - Just l -> read $ filter isDigit $ head $ words l + ls = lines input + -- Match summary labels tolerant of GHC's inter-column spacing, which + -- varies across versions (e.g. "MUT time" vs "MUT time"); both the + -- label and each line are whitespace-normalised before comparison. + findLine label = let needle = unwords (words label) + in find ((needle `isInfixOf`) . unwords . words) ls + digitsFirstWord label = case findLine label of + Just l -> readIntDef $ filter isDigit $ head (words l ++ [""]) + Nothing -> -1 + readIntDef "" = -1 + readIntDef s = fromMaybe (-1) (readMaybe s) + readDoubleDef s = fromMaybe (-1) (readMaybe (takeWhile (\c -> isDigit c || c == '.' || c == '-') s)) + -- Parse the time value (in seconds) at word index `i` (after dropping the label + -- words). For lines like " MUT time 0.012s ( 0.012s elapsed)". + secondsAt i label = case findLine label of + Just l -> case drop i (words l) of + (w:_) -> readDoubleDef w + _ -> -1 + Nothing -> -1 + -- "Max pause" appears on each Gen line; take the worst. + parseMaxPause = + let candidates = + [ readDoubleDef w + | l <- ls + , "Gen " `isInfixOf` l + , let ws = words l + , not (null ws) + , w <- take 1 (reverse ws) + ] + valid = filter (>= 0) candidates + in if null valid then -1 else maximum valid + -- "Productivity 81.2% of total user, ..." + parseProductivity = case findLine "Productivity" of + Just l -> + let ws = words l + in case ws of + (_:n:_) -> readDoubleDef n + _ -> -1 + Nothing -> -1 + -- " Gen 0 1 colls, 0 par ..." → 1 + parseGenColls genLabel = case findLine genLabel of + Just l -> + let ws = words l + -- Drop "Gen N" prefix, then first numeric word is colls count. + rest = drop 2 ws + in case rest of + (n:_) -> readIntDef (filter isDigit n) + _ -> -1 Nothing -> -1