haskell · parsonsmatt · May 27, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
@@ -140,5 +140,8 @@ runs:
     # installed besides the ones we explicitly want.
     - name: "Remove ghcup caches"
       if: runner.os == 'Linux'
-      run: ghcup gc --ghc-old --share-dir --hls-no-ghc --cache --tmpdirs --unset
+      # NB: do not pass --unset here. haskell-actions/setup leaves the active
+      # GHC in an "unset" state, and --unset would then delete its binary,
+      # breaking the subsequent `cabal build`.
+      run: ghcup gc --ghc-old --share-dir --hls-no-ghc --cache --tmpdirs
       shell: bash
diff --git a/bench/Main.hs b/bench/Main.hs
@@ -64,7 +64,7 @@ import           Development.Shake           (Action,
                                               actionBracket, addOracle,
                                               askOracle, command, command_,
                                               getDirectoryFiles, liftIO, need,
-                                              newCache, shakeArgsWith,
+                                              newCache, phony, shakeArgsWith,
                                               shakeOptions, versioned, want)
 import           Development.Shake.Classes
 import           Experiments.Types           (Example (exampleName),
@@ -168,6 +168,17 @@ createBuildSystem config = do
   whenJust (profileInterval configStatic) $ \i -> do
     phonyRules "profiled-" binaryName (CheapHeapProfiling i) build (examples configStatic)
 
+  -- Fast smoke target for local iteration: smallest example, cheapest experiments, HEAD only.
+  let smokeExample = "DummyLevel0M01NoTH"
+      smokeExperiments = ["edit", "edit-header", "documentSymbols after edit"]
+      smokeConfigs = [ confName | ConfigurationDescriptor{..} <- configurations configStatic ]
+  phony "smoke" $ need
+    [ build </> "unprofiled" </> smokeExample </> "HEAD" </> conf
+            </> escaped (escapeExperiment (Unescaped experiment)) <.> "csv"
+    | conf <- smokeConfigs
+    , experiment <- smokeExperiments
+    ]
+
   return configStatic
 
 disableAllPluginsBut :: (PluginId -> Bool) -> Plugin.Config

diff --git a/bench/MultiLayerModules.sh b/bench/MultiLayerModules.sh
@@ -1,9 +1,30 @@
 #!/usr/bin/env bash
-# Generate $DEPTH layers of modules with $WIDTH modules on each layer
-# Every module on layer N imports all the modules on layer N-1
-# MultiLayerModules.hs imports all the modules from the last layer
+# Generate $DEPTH layers of modules with $WIDTH modules on each layer.
+# Every module on layer N imports all the modules on layer N-1.
+# MultiLayerModules.hs imports all the modules from the last layer.
+# Flags (any order): --th, --depth N, --width N.
 DEPTH=15
 WIDTH=40
+TH=
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --th)
+      TH=1
+      shift
+      ;;
+    --depth)
+      DEPTH=${2:?"--depth requires a value"}
+      shift 2
+      ;;
+    --width)
+      WIDTH=${2:?"--width requires a value"}
+      shift 2
+      ;;
+    *)
+      shift
+      ;;
+  esac
+done
 cat >hie.yaml << EOF
 cradle:
   direct:
@@ -22,13 +43,11 @@ for l in $(seq 1 $DEPTH); do
     done
   done
 done
-case "$1" in
-  '--th')
-    echo "{-# LANGUAGE TemplateHaskell #-}" > MultiLayerModules.hs
-    ;;
-esac
+if [[ -n "$TH" ]]; then
+  echo "{-# LANGUAGE TemplateHaskell #-}" > MultiLayerModules.hs
+fi
 echo "module MultiLayerModules where" >> MultiLayerModules.hs
-  echo "          - MultiLayerModules.hs" >> hie.yaml;
+echo "          - MultiLayerModules.hs" >> hie.yaml;
 for j in $(seq -w 1 $WIDTH); do
   echo "import DummyLevel${DEPTH}M$j" >> MultiLayerModules.hs;
 done
diff --git a/bench/config.yaml b/bench/config.yaml
@@ -77,6 +77,23 @@ examples:
     modules:
         - DummyLevel1M01.hs
 
+  # Large synthetic project: 200 layers × 50 modules/layer = 10000 modules,
+  # ~500k import edges (each module imports every module of the prior layer).
+  # This is closer in scale to a real production Haskell codebase than the
+  # default MultiLayerModules. Expect this to be slow: initial cradle load
+  # alone takes many minutes, and each experiment runs `samples:` rounds.
+  # Recommended: drop `samples:` to ~10 and run via single-experiment targets
+  # (e.g. `bench-results/unprofiled/MultiLayerModulesXL/HEAD/All/edit.csv`)
+  # rather than the full matrix.
+  - name: MultiLayerModulesXL
+    path: bench/MultiLayerModules.sh
+    script: True
+    script-args: ["--depth", "200", "--width", "50"]
+    modules:
+        - MultiLayerModules.hs
+        - DummyLevel0M01.hs
+        - DummyLevel100M01.hs
+
   # Small but heavily multi-component example
   # Disabled as it is far to slow. hie-bios >0.7.2 should help
   # - name: HLS
@@ -114,6 +131,7 @@ experiments:
     - "hole fit suggestions"
     - "eval execute single-line code lens"
     - "eval execute multi-line code lens"
+    - "memory pressure"
 
 # An ordered list of versions to analyze
 versions:

diff --git a/bench/run_pass2_costcentre.sh b/bench/run_pass2_costcentre.sh
@@ -0,0 +1,87 @@
+#!/usr/bin/env bash
+# Pass 2: cost-centre profiled benchmark.
+#
+# Requires bench-results/binaries/HEAD-prof/haskell-language-server to exist
+# (built with --enable-profiling --profiling-detail=late).
+#
+# For each (example, experiment) pair, invoke ghcide-bench directly with
+# +RTS -p -hc -i1 -po<stem> -RTS so we get:
+#   <stem>.prof       — time + allocation by cost-centre
+#   <stem>.hp         — heap residency by cost-centre over time
+#   <stem>.hp.csv     — top-N cost-centres (produced by parseHpProfile, but
+#                       NOTE: only the bench harness invokes that. For Pass 2
+#                       we'd need to run summarizeHpProfile separately. See
+#                       end of script.)
+set -euo pipefail
+
+HLS=bench-results/binaries/HEAD-prof/haskell-language-server
+OUTDIR=bench-results/prof-cc
+SAMPLES=20
+
+if [[ ! -x "$HLS" ]]; then
+    echo "ERROR: profiled HLS not found at $HLS" >&2
+    exit 1
+fi
+
+run_experiment() {
+    local example_name=$1
+    local experiment=$2
+    shift 2
+    local example_args=("$@")
+
+    local exp_slug=${experiment// /_}
+    local outdir="$OUTDIR/$example_name/$exp_slug"
+    mkdir -p "$outdir"
+    local stem="$(pwd)/$outdir/$exp_slug"
+
+    echo
+    echo "=== $example_name :: $experiment ==="
+
+    cabal exec -- ghcide-bench \
+        --timeout=600 \
+        --no-clean \
+        --samples="$SAMPLES" \
+        --csv="$outdir/$exp_slug.csv" \
+        --ghcide="$(pwd)/$HLS" \
+        --select "$experiment" \
+        "${example_args[@]}" \
+        --ghcide-options=+RTS \
+        --ghcide-options=-p \
+        --ghcide-options=-hc \
+        --ghcide-options=-i1 \
+        --ghcide-options=-po"$stem" \
+        --ghcide-options=-l \
+        --ghcide-options=-ol"$stem.eventlog" \
+        --ghcide-options=-S"$stem.gcStats.log" \
+        --ghcide-options=-RTS \
+        < /dev/null || echo "  FAILED — continuing"
+}
+
+# ---------- Example: cabal-3.16.1.0 ----------
+CABAL_ARGS=(
+    --example-package-name Cabal
+    --example-package-version 3.16.1.0
+    --example-name cabal
+    --example-module=src/Distribution/Simple.hs
+    --example-module=src/Distribution/Types/ComponentLocalBuildInfo.hs
+)
+for exp in "memory pressure" "completions after typing burst" "hover after typing burst" "edit" "semanticTokens after typing burst"; do
+    run_experiment cabal "$exp" "${CABAL_ARGS[@]}"
+done
+
+# ---------- Example: MultiLayerModulesNoTH ----------
+MLM_ARGS=(
+    --example-script "$(pwd)/bench/MultiLayerModules.sh"
+    --example-name MultiLayerModulesNoTH
+    --example-module=MultiLayerModules.hs
+    --example-module=DummyLevel0M01.hs
+    --example-module=DummyLevel1M01.hs
+)
+for exp in "memory pressure" "edit" "hover after typing burst"; do
+    run_experiment MultiLayerModulesNoTH "$exp" "${MLM_ARGS[@]}"
+done
+
+echo
+echo "=== Pass 2 complete ==="
+echo "Profile artefacts under $OUTDIR/"
+echo "Inspect .prof files for time+alloc attribution: head -50 $OUTDIR/cabal/memory_pressure/memory_pressure.prof"
diff --git a/ghcide-bench/src/Experiments.hs b/ghcide-bench/src/Experiments.hs
@@ -352,6 +352,45 @@ experiments =
               )
         ),
       ---------------------------------------------------------------------------------------
+      ---------------------------------------------------------------------------------------
+      -- Memory-pressure experiment: hammers the server with a variety of
+      -- requests across every open document, interleaved with edits, to force
+      -- the IDE to retain caches/results across many rules. Each sample is one
+      -- "round" so the resulting peak/post-GC heap (via RTS -s) reflects the
+      -- worst case under sustained load. The default sample count makes this
+      -- much longer than other experiments — override with --samples when you
+      -- only need a quick smoke check.
+      bench "memory pressure" $ \docs -> do
+        -- Round 1: typing burst across all docs.
+        applyTypingBurst docs
+        _ <- waitForBuildQueue
+        -- Round 2: exercise hover + getDefinition + completions on every
+        -- doc that has an identifier position. These populate per-rule
+        -- caches across the whole project.
+        forM_ docs $ \DocumentPositions{..} -> do
+          forM_ identifierP $ \p -> do
+            _ <- getHover doc p
+            _ <- getDefinitions doc p
+            _ <- getCompletions doc p
+            return ()
+        _ <- waitForBuildQueue
+        -- Round 3: code actions across every doc — these tend to pull in
+        -- additional rules (suggestions, refactor previews, etc.).
+        forM_ docs $ \DocumentPositions{..} -> do
+          forM_ identifierP $ \p ->
+            void $ getCodeActions doc (Range p p)
+        _ <- waitForBuildQueue
+        -- Round 4: document symbols on every doc — populates the symbol cache.
+        forM_ docs $ \DocumentPositions{..} ->
+          void $ getDocumentSymbols doc
+        _ <- waitForBuildQueue
+        -- Round 5: another typing burst, to ensure the caches above survive
+        -- invalidation rather than getting dropped — this is what catches
+        -- "we recompute everything from scratch on every edit" regressions.
+        applyTypingBurst docs
+        _ <- waitForBuildQueue
+        return True,
+      ---------------------------------------------------------------------------------------
       benchWithSetup
         "eval execute multi-line code lens"
         ( mapM_ $ \DocumentPositions{..} -> do
@@ -533,6 +572,13 @@ runBenchmarksFun dir allBenchmarks = do
         , "rulesTotal"
         , "ruleEdges"
         , "ghcRebuilds"
+        , "userP50"
+        , "userP95"
+        , "userP99"
+        , "userStdDev"
+        , "delayedP50"
+        , "delayedP95"
+        , "delayedP99"
         ]
       rows =
         [ [ name,
@@ -552,11 +598,20 @@ runBenchmarksFun dir allBenchmarks = do
             show rulesVisited,
             show rulesTotal,
             show edgesTotal,
-            show rebuildsTotal
+            show rebuildsTotal,
+            showMs (quantileSorted 0.50 sortedUserWaits),
+            showMs (quantileSorted 0.95 sortedUserWaits),
+            showMs (quantileSorted 0.99 sortedUserWaits),
+            showMs (stdDev   userWaitsSamples),
+            showMs (quantileSorted 0.50 sortedDelayedWork),
+            showMs (quantileSorted 0.95 sortedDelayedWork),
+            showMs (quantileSorted 0.99 sortedDelayedWork)
           ]
           | (Bench {name, samples}, BenchRun {..}) <- results,
             let runSetup' = if runSetup < 0.01 then 0 else runSetup
                 modules = fromIntegral $ length $ exampleModules $ example ?config
+                sortedUserWaits   = sort userWaitsSamples
+                sortedDelayedWork = sort delayedWorkSamples
         ]
       csv = unlines $ map (intercalate ", ") (headers : rows)
   writeFile (outputCSV ?config) csv
@@ -625,6 +680,10 @@ data BenchRun = BenchRun
     runExperiment        :: !Seconds,
     userWaits            :: !Seconds,
     delayedWork          :: !Seconds,
+    -- | Per-sample user-time samples (length == samples on success).
+    userWaitsSamples     :: ![Seconds],
+    -- | Per-sample delayed-work samples (length == samples on success).
+    delayedWorkSamples   :: ![Seconds],
     firstResponse        :: !Seconds,
     firstResponseDelayed :: !Seconds,
     rulesBuilt           :: !Int,
@@ -637,7 +696,27 @@ data BenchRun = BenchRun
   }
 
 badRun :: BenchRun
-badRun = BenchRun 0 0 0 0 0 0 0 0 0 0 0 0 0 False
+badRun = BenchRun 0 0 0 0 0 [] [] 0 0 0 0 0 0 0 0 False
+
+-- | Approximate quantile (nearest-rank) of an /already-sorted/ list of
+-- samples. Good enough for benchmark reporting at n>=20. Sorting is the
+-- caller's responsibility so that several quantiles over the same samples
+-- can share a single sort.
+quantileSorted :: Double -> [Seconds] -> Seconds
+quantileSorted _ [] = 0
+quantileSorted q sorted =
+    let n   = length sorted
+        idx = min (n - 1) $ floor (q * fromIntegral (n - 1))
+    in sorted !! idx
+
+-- | Population standard deviation.
+stdDev :: [Seconds] -> Seconds
+stdDev [] = 0
+stdDev xs =
+    let n  = fromIntegral (length xs)
+        m  = sum xs / n
+        sq x = let d = x - m in d * d
+    in sqrt (sum (map sq xs) / n)
 
 waitForProgressStart :: Session ()
 waitForProgressStart = void $ do
@@ -688,21 +767,26 @@ runBench runSess Bench{..} = handleAny (\e -> print e >> return badRun)
 
       liftIO $ output $ "Running " <> name <> " benchmark"
       (runSetup, ()) <- duration $ benchSetup docs
-      let loop' (Just timeForFirstResponse) !userWaits !delayedWork 0 = return $ Just (userWaits, delayedWork, timeForFirstResponse)
-          loop' timeForFirstResponse !userWaits !delayedWork n = do
+      -- Accumulate per-sample (t, td) in reverse order; we reverse once at the end.
+      let loop' (Just timeForFirstResponse) !samplesAcc 0 = return $ Just (reverse samplesAcc, timeForFirstResponse)
+          loop' timeForFirstResponse !samplesAcc n = do
             (t, res) <- duration $ experiment docs
             if not res
               then return Nothing
             else do
                 output (showDuration t)
                 -- Wait for the delayed actions to finish
                 td <- waitForBuildQueue
-                loop' (timeForFirstResponse <|> Just (t,td)) (userWaits+t) (delayedWork+td) (n -1)
+                loop' (timeForFirstResponse <|> Just (t,td)) ((t,td):samplesAcc) (n -1)
           loop = loop' Nothing
 
-      (runExperiment, result) <- duration $ loop 0 0 samples
+      (runExperiment, result) <- duration $ loop [] samples
       let success = isJust result
-          (userWaits, delayedWork, (firstResponse, firstResponseDelayed)) = fromMaybe (0,0,(0,0)) result
+          (perSample, (firstResponse, firstResponseDelayed)) = fromMaybe ([], (0,0)) result
+          userWaitsSamples = map fst perSample
+          delayedWorkSamples = map snd perSample
+          userWaits = sum userWaitsSamples
+          delayedWork = sum delayedWorkSamples
 
       rulesTotal <- length <$> getStoredKeys
       rulesBuilt <- either (const 0) length <$> getBuildKeysBuilt

@@ -639,7 +639,7 @@ getFileHashRule recorder =
         return (Just (fingerprintToBS fileHash), ([], Just fileHash))
 
 getModuleGraphRule :: Recorder (WithPriority Log) -> Rules ()
-getModuleGraphRule recorder = defineEarlyCutOffNoFile (cmapWithPrio LogShake recorder) $ \GetModuleGraph -> do
+getModuleGraphRule recorder = defineEarlyCutOffNoFileReuseValue (cmapWithPrio LogShake recorder) $ \GetModuleGraph -> do
   fs <- toKnownFiles <$> useNoFile_ GetKnownTargets
   dependencyInfoForFiles (HashSet.toList fs)