From 49baa96e29c6f1a3357c067b832e0aa0040b12fd Mon Sep 17 00:00:00 2001 From: Sebastian Webber Date: Wed, 4 Feb 2026 22:50:39 -0300 Subject: [PATCH 1/4] docs: clarify total_cpu expects logical cores Add documentation to clarify that total_cpu parameter should represent logical CPU cores (including hyperthreading), not just physical cores. This resolves confusion about whether an 8-core CPU with hyperthreading should report 8 or 16 cores. The API is designed to work with logical cores (16 in this example) as PostgreSQL benefits from hyperthreading with 15-40% performance improvement. Added documentation in: - Input struct with inline comments and examples - CLI help text for --cpus flag - README with explanation and rationale Closes #34 Signed-off-by: Sebastian Webber --- README.md | 11 +++++++++++ cmd/pgconfigctl/cmd/tune.go | 2 +- pkg/input/input.go | 3 +++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 553520f..37c402b 100644 --- a/README.md +++ b/README.md @@ -51,6 +51,17 @@ PGConfig.org API v2. ## License [![FOSSA Status](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fpgconfig%2Fapi.svg?type=large)](https://app.fossa.com/projects/git%2Bgithub.com%2Fpgconfig%2Fapi?ref=badge_large) +## CPU Core Counting + +The API expects the `total_cpu` parameter to represent the total number of **logical CPU cores**, which includes hyperthreading. This is the standard output from: +- Linux/Unix: `nproc` command +- Go: `runtime.NumCPU()` +- Windows: Total processor count in Task Manager + +**Example**: A system with 8 physical cores and hyperthreading enabled has 16 logical cores. Use `total_cpu=16`. + +**Why logical cores?** Modern PostgreSQL (2017-2025) benefits from hyperthreading with [up to 15% performance improvement](https://www.cybertec-postgresql.com/en/experimenting-scaling-full-parallelism-postgresql/). The tuning formulas for `max_worker_processes`, `max_parallel_workers`, and `io_workers` are designed to work with logical core counts. + ## Rules Engine The configuration is adjusted by a rules engine based on the environment. diff --git a/cmd/pgconfigctl/cmd/tune.go b/cmd/pgconfigctl/cmd/tune.go index 20194fc..36c7ae2 100644 --- a/cmd/pgconfigctl/cmd/tune.go +++ b/cmd/pgconfigctl/cmd/tune.go @@ -97,7 +97,7 @@ func init() { tuneCmd.PersistentFlags().StringVarP(&arch, "arch", "", runtime.GOARCH, "PostgreSQL Version") tuneCmd.PersistentFlags().StringVarP(&diskType, "disk-type", "D", "SSD", "Disk type (possible values are SSD, HDD and SAN)") tuneCmd.PersistentFlags().Float32VarP(&pgVersion, "version", "", defaults.PGVersionF, "PostgreSQL Version") - tuneCmd.PersistentFlags().IntVarP(&totalCPU, "cpus", "c", runtime.NumCPU(), "Total CPU cores") + tuneCmd.PersistentFlags().IntVarP(&totalCPU, "cpus", "c", runtime.NumCPU(), "Total logical CPU cores (includes hyperthreading)") tuneCmd.PersistentFlags().MarkDeprecated("env-name", "please use --profile instead") tuneCmd.PersistentFlags().IntVarP(&maxConnections, "max-connections", "M", 100, "Max expected connections") tuneCmd.PersistentFlags().BoolVarP(&includePgbadger, "include-pgbadger", "B", false, "Include pgbadger params?") diff --git a/pkg/input/input.go b/pkg/input/input.go index a7d0a8b..ab307f7 100644 --- a/pkg/input/input.go +++ b/pkg/input/input.go @@ -13,6 +13,9 @@ type Input struct { Profile profile.Profile `json:"profile"` DiskType string `json:"disk_type"` MaxConnections int `json:"max_connections"` + // TotalCPU represents the total number of logical CPU cores (including hyperthreading). + // Use runtime.NumCPU() or the output of `nproc` command to get this value. + // For CPUs with hyperthreading: 8 physical cores × 2 threads = 16 logical cores. TotalCPU int `json:"total_cpu"` PostgresVersion float32 `json:"postgres_version"` } From af895895d55fa7da7c48081797bee5094f911185 Mon Sep 17 00:00:00 2001 From: Sebastian Webber Date: Wed, 4 Feb 2026 23:16:11 -0300 Subject: [PATCH 2/4] fix: profile constants case sensitivity Change Mixed and Desktop profile constants from mixed case to uppercase to match Set() method behavior which converts input to uppercase. Before this fix, pgconfigctl tune rejected --profile=Mixed and --profile=Desktop even though they appeared in valid options list. Changes: - Update Mixed from "Mixed" to "MIXED" constant - Update Desktop from "Desktop" to "DESKTOP" constant - Add comprehensive unit tests for profile.Set() method - Add CLI integration tests validating all profile parsing scenarios - Export RootCmd in root.go to enable testing Closes #22 Signed-off-by: Sebastian Webber --- cmd/pgconfigctl/cmd/root.go | 7 +- cmd/pgconfigctl/main_test.go | 117 ++++++++++++++++++++++++++++++ pkg/input/profile/profile.go | 4 +- pkg/input/profile/profile_test.go | 109 ++++++++++++++++++++++++++++ 4 files changed, 233 insertions(+), 4 deletions(-) create mode 100644 cmd/pgconfigctl/main_test.go create mode 100644 pkg/input/profile/profile_test.go diff --git a/cmd/pgconfigctl/cmd/root.go b/cmd/pgconfigctl/cmd/root.go index e489e49..c20ada8 100644 --- a/cmd/pgconfigctl/cmd/root.go +++ b/cmd/pgconfigctl/cmd/root.go @@ -34,8 +34,8 @@ import ( var cfgFile string -// rootCmd represents the base command when called without any subcommands -var rootCmd = &cobra.Command{ +// RootCmd represents the base command when called without any subcommands +var RootCmd = &cobra.Command{ Use: "pgconfigctl", Short: "A tool to handle and benchmark your PostgreSQL", // Uncomment the following line if your bare application @@ -43,6 +43,9 @@ var rootCmd = &cobra.Command{ // Run: func(cmd *cobra.Command, args []string) { }, } +// rootCmd is an alias for backwards compatibility +var rootCmd = RootCmd + // Execute adds all child commands to the root command and sets flags appropriately. // This is called by main.main(). It only needs to happen once to the rootCmd. func Execute() { diff --git a/cmd/pgconfigctl/main_test.go b/cmd/pgconfigctl/main_test.go new file mode 100644 index 0000000..f5e5b15 --- /dev/null +++ b/cmd/pgconfigctl/main_test.go @@ -0,0 +1,117 @@ +package main + +import ( + "bytes" + "os" + "strings" + "testing" + + "github.com/pgconfig/api/cmd/pgconfigctl/cmd" +) + +// TestTuneProfileParsing validates that all profile types parse correctly +// This addresses issue #22 where Mixed and Desktop profiles were rejected +func TestTuneProfileParsing(t *testing.T) { + tests := []struct { + name string + args []string + wantError bool + errorMsg string + }{ + { + name: "Mixed profile - mixed case (issue #22)", + args: []string{"tune", "--profile=Mixed"}, + wantError: false, + }, + { + name: "Mixed profile - uppercase", + args: []string{"tune", "--profile=MIXED"}, + wantError: false, + }, + { + name: "Mixed profile - lowercase", + args: []string{"tune", "--profile=mixed"}, + wantError: false, + }, + { + name: "Desktop profile - mixed case (issue #22)", + args: []string{"tune", "--profile=Desktop"}, + wantError: false, + }, + { + name: "Desktop profile - uppercase", + args: []string{"tune", "--profile=DESKTOP"}, + wantError: false, + }, + { + name: "Desktop profile - lowercase", + args: []string{"tune", "--profile=desktop"}, + wantError: false, + }, + { + name: "Web profile - uppercase", + args: []string{"tune", "--profile=WEB"}, + wantError: false, + }, + { + name: "Web profile - lowercase", + args: []string{"tune", "--profile=web"}, + wantError: false, + }, + { + name: "OLTP profile", + args: []string{"tune", "--profile=OLTP"}, + wantError: false, + }, + { + name: "DW profile", + args: []string{"tune", "--profile=DW"}, + wantError: false, + }, + { + name: "Invalid profile", + args: []string{"tune", "--profile=invalid"}, + wantError: true, + errorMsg: "must be one of", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Capture output + outBuf := new(bytes.Buffer) + errBuf := new(bytes.Buffer) + + // Save and restore original output + originalOut := os.Stdout + originalErr := os.Stderr + t.Cleanup(func() { + os.Stdout = originalOut + os.Stderr = originalErr + }) + + // Set command output + cmd.RootCmd.SetOut(outBuf) + cmd.RootCmd.SetErr(errBuf) + cmd.RootCmd.SetArgs(tt.args) + + // Execute command + err := cmd.RootCmd.Execute() + + if tt.wantError { + if err == nil { + t.Errorf("Expected error but got none") + return + } + if tt.errorMsg != "" && !strings.Contains(err.Error(), tt.errorMsg) { + t.Errorf("Error message = %v, want to contain %v", err.Error(), tt.errorMsg) + } + } else { + if err != nil { + t.Errorf("Unexpected error: %v\nOutput: %s\nError output: %s", + err, outBuf.String(), errBuf.String()) + } + } + }) + } +} diff --git a/pkg/input/profile/profile.go b/pkg/input/profile/profile.go index 6f05127..9d3b433 100644 --- a/pkg/input/profile/profile.go +++ b/pkg/input/profile/profile.go @@ -20,11 +20,11 @@ const ( DW Profile = "DW" // Mixed profile - Mixed Profile = "Mixed" + Mixed Profile = "MIXED" // Desktop is the development machine on any non-production server // that needs to consume less resources than a regular server. - Desktop Profile = "Desktop" + Desktop Profile = "DESKTOP" ) // AllProfiles Lists all profiles currently available diff --git a/pkg/input/profile/profile_test.go b/pkg/input/profile/profile_test.go new file mode 100644 index 0000000..1c2c82f --- /dev/null +++ b/pkg/input/profile/profile_test.go @@ -0,0 +1,109 @@ +package profile + +import ( + "testing" +) + +func TestProfile_Set(t *testing.T) { + tests := []struct { + name string + input string + want Profile + wantErr bool + }{ + { + name: "Web uppercase", + input: "WEB", + want: Web, + wantErr: false, + }, + { + name: "Web lowercase", + input: "web", + want: Web, + wantErr: false, + }, + { + name: "OLTP uppercase", + input: "OLTP", + want: OLTP, + wantErr: false, + }, + { + name: "OLTP lowercase", + input: "oltp", + want: OLTP, + wantErr: false, + }, + { + name: "DW uppercase", + input: "DW", + want: DW, + wantErr: false, + }, + { + name: "DW lowercase", + input: "dw", + want: DW, + wantErr: false, + }, + { + name: "Mixed uppercase", + input: "MIXED", + want: Mixed, + wantErr: false, + }, + { + name: "Mixed mixed case", + input: "Mixed", + want: Mixed, + wantErr: false, + }, + { + name: "Mixed lowercase", + input: "mixed", + want: Mixed, + wantErr: false, + }, + { + name: "Desktop uppercase", + input: "DESKTOP", + want: Desktop, + wantErr: false, + }, + { + name: "Desktop mixed case", + input: "Desktop", + want: Desktop, + wantErr: false, + }, + { + name: "Desktop lowercase", + input: "desktop", + want: Desktop, + wantErr: false, + }, + { + name: "Invalid profile", + input: "invalid", + want: "", + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var p Profile + err := p.Set(tt.input) + + if (err != nil) != tt.wantErr { + t.Errorf("Profile.Set() error = %v, wantErr %v", err, tt.wantErr) + return + } + + if !tt.wantErr && p != tt.want { + t.Errorf("Profile.Set() = %v, want %v", p, tt.want) + } + }) + } +} From 39a8e4384e84bbda6d8f5d1a08b40ae6bdf2d01a Mon Sep 17 00:00:00 2001 From: Sebastian Webber Date: Thu, 5 Feb 2026 00:53:02 -0300 Subject: [PATCH 3/4] =?UTF-8?q?fix:=20limit=20work=5Fmem=20and=20maintenan?= =?UTF-8?q?ce=5Fwork=5Fmem=20to=202gb=20on=20windows=20pg=20=E2=89=A4=2017?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Windows PostgreSQL ≤ 17 cannot exceed 2097151 kB (~2GB) for work_mem and maintenance_work_mem due to Windows LLP64 model where sizeof(long) remains 4 even on 64-bit systems. PostgreSQL used MAX_KILOBYTES = INT_MAX/1024 when SIZEOF_LONG <= 4, resulting in the 2GB limit. This was fixed in PostgreSQL 18 by removing the SIZEOF_LONG check, increasing the limit to 2TB. Changes: - Add WindowsMaxWorkMem constant (2097151 KB) - Apply limit in computeOS() for Windows + PostgreSQL < 18.0 - Add comprehensive tests including PostgreSQL 18+ cases - Update rules.yml with version-specific limitation and fix reference Tests verify: - work_mem/maintenance_work_mem capped at ~2GB on Windows PG ≤ 17 - No limitations on Windows PG 18+ - No limitations on Linux/Unix platforms - Correct behavior with high RAM (256GB, 1TB) scenarios References: - PostgreSQL 18 fix: https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217 - pgvector issue: https://github.com/pgvector/pgvector/issues/667 - CommitFest patch: https://commitfest.postgresql.org/patch/5343/ Closes #5 Signed-off-by: Sebastian Webber --- pkg/rules/os.go | 20 ++++++++++++ pkg/rules/os_test.go | 77 ++++++++++++++++++++++++++++++++++++++++++++ rules.yml | 4 +++ 3 files changed, 101 insertions(+) diff --git a/pkg/rules/os.go b/pkg/rules/os.go index 4cef9cc..aca0fb3 100644 --- a/pkg/rules/os.go +++ b/pkg/rules/os.go @@ -14,6 +14,15 @@ const ( Linux = "linux" Unix = "unix" Darwin = "darwin" + + // WindowsMaxWorkMem is the maximum work_mem/maintenance_work_mem on Windows for PostgreSQL <= 17 + // PostgreSQL used MAX_KILOBYTES = INT_MAX/1024 when SIZEOF_LONG <= 4 + // Windows LLP64 model has sizeof(long)==4 even on 64-bit systems + // This resulted in max value of 2097151 kB (~2GB) on Windows + // Fixed in PostgreSQL 18 by removing SIZEOF_LONG check from MAX_KILOBYTES + // Mailing list: https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217 + // Related: https://github.com/pgvector/pgvector/issues/667 + WindowsMaxWorkMem = 2097151 * bytes.KB ) // ValidOS validates the Operating System @@ -43,6 +52,17 @@ func computeOS(in *input.Input, cfg *category.ExportCfg) (*category.ExportCfg, e if in.OS == "windows" { cfg.Storage.EffectiveIOConcurrency = 0 + + // Windows had 2GB limitation for work_mem and maintenance_work_mem on PG <= 17 + // Fixed in PostgreSQL 18: https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217 + if in.PostgresVersion < 18.0 { + if cfg.Memory.WorkMem > WindowsMaxWorkMem { + cfg.Memory.WorkMem = WindowsMaxWorkMem + } + if cfg.Memory.MaintenanceWorkMem > WindowsMaxWorkMem { + cfg.Memory.MaintenanceWorkMem = WindowsMaxWorkMem + } + } } return cfg, nil diff --git a/pkg/rules/os_test.go b/pkg/rules/os_test.go index 7c2f6fe..36c9095 100644 --- a/pkg/rules/os_test.go +++ b/pkg/rules/os_test.go @@ -57,5 +57,82 @@ func Test_computeOS(t *testing.T) { So(err, ShouldBeNil) So(out.Memory.SharedBuffers, ShouldBeGreaterThan, 25*bytes.GB) }) + + Convey("should limit work_mem to ~2GB on Windows (issue #5)", func() { + in := fakeInput() + in.OS = Windows + in.TotalRAM = 256 * bytes.GB + in.MaxConnections = 10 + in.PostgresVersion = 16.0 + + cfg := category.NewExportCfg(*in) + // Force work_mem to be higher than the limit + cfg.Memory.WorkMem = 5 * bytes.GB + + out, err := computeOS(in, cfg) + So(err, ShouldBeNil) + So(out.Memory.WorkMem, ShouldEqual, WindowsMaxWorkMem) + So(out.Memory.WorkMem, ShouldBeLessThan, 2*bytes.GB) + }) + + Convey("should limit maintenance_work_mem to ~2GB on Windows (issue #5)", func() { + in := fakeInput() + in.OS = Windows + in.TotalRAM = 256 * bytes.GB + in.PostgresVersion = 16.0 + + cfg := category.NewExportCfg(*in) + // Force maintenance_work_mem to be higher than the limit + cfg.Memory.MaintenanceWorkMem = 10 * bytes.GB + + out, err := computeOS(in, cfg) + So(err, ShouldBeNil) + So(out.Memory.MaintenanceWorkMem, ShouldEqual, WindowsMaxWorkMem) + So(out.Memory.MaintenanceWorkMem, ShouldBeLessThan, 2*bytes.GB) + }) + + Convey("should not limit work_mem on non-Windows platforms", func() { + in := fakeInput() + in.OS = Linux + in.TotalRAM = 256 * bytes.GB + in.MaxConnections = 10 + in.PostgresVersion = 16.0 + + cfg := category.NewExportCfg(*in) + cfg.Memory.WorkMem = 5 * bytes.GB + + out, err := computeOS(in, cfg) + So(err, ShouldBeNil) + So(out.Memory.WorkMem, ShouldEqual, 5*bytes.GB) + }) + + Convey("should not limit work_mem on Windows with PostgreSQL 18+", func() { + in := fakeInput() + in.OS = Windows + in.TotalRAM = 256 * bytes.GB + in.MaxConnections = 10 + in.PostgresVersion = 18.0 + + cfg := category.NewExportCfg(*in) + cfg.Memory.WorkMem = 5 * bytes.GB + + out, err := computeOS(in, cfg) + So(err, ShouldBeNil) + So(out.Memory.WorkMem, ShouldEqual, 5*bytes.GB) + }) + + Convey("should not limit maintenance_work_mem on Windows with PostgreSQL 18+", func() { + in := fakeInput() + in.OS = Windows + in.TotalRAM = 256 * bytes.GB + in.PostgresVersion = 18.0 + + cfg := category.NewExportCfg(*in) + cfg.Memory.MaintenanceWorkMem = 10 * bytes.GB + + out, err := computeOS(in, cfg) + So(err, ShouldBeNil) + So(out.Memory.MaintenanceWorkMem, ShouldEqual, 10*bytes.GB) + }) }) } diff --git a/rules.yml b/rules.yml index e792062..1bc3eb2 100644 --- a/rules.yml +++ b/rules.yml @@ -35,6 +35,8 @@ categories: Example worst-case: 128MB × 3 operations × 2 workers × 100 connections = **102GB** + **Windows ≤ PostgreSQL 17**: Maximum value is ~2GB (2097151 kB) due to Windows LLP64 model where `sizeof(long)==4` even on 64-bit systems. Fixed in [PostgreSQL 18](https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217) which increased the limit to 2TB. See also [pgvector issue #667](https://github.com/pgvector/pgvector/issues/667). + Monitor temp file usage with `log_temp_files`. Consider **per-session** tuning (`SET work_mem`) for heavy queries instead of global settings. details: - Specifies the amount of memory to be used by internal sort operations and hash tables before writing to temporary disk files. The value defaults to four megabytes (4MB). Note that for a complex query, several sort or hash operations might be running in parallel; each operation will be allowed to use as much memory as this value specifies before it starts to write data into temporary files. Also, several running sessions could be doing such operations concurrently. Therefore, the total memory used could be many times the value of work_mem; it is necessary to keep this fact in mind when choosing the value. Sort operations are used for ORDER BY, DISTINCT, and merge joins. Hash tables are used in hash joins, hash-based aggregation, and hash-based processing of IN subqueries. @@ -54,6 +56,8 @@ categories: **Important**: Total usage = `maintenance_work_mem × autovacuum_max_workers`. Consider using `autovacuum_work_mem` separately. **PostgreSQL ≤16**: 1GB limit (~179M dead tuples per pass). **PostgreSQL 17+**: No limit (uses radix trees). + + **Windows ≤ PostgreSQL 17**: Maximum value is ~2GB (2097151 kB) due to Windows LLP64 model where `sizeof(long)==4` even on 64-bit systems. Fixed in [PostgreSQL 18](https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217) which increased the limit to 2TB. See also [pgvector issue #667](https://github.com/pgvector/pgvector/issues/667). recomendations: Adjusting maintenance_work_mem: https://www.cybertec-postgresql.com/en/adjusting-maintenance_work_mem/ How Much maintenance_work_mem Do I Need?: http://rhaas.blogspot.com/2019/01/how-much-maintenanceworkmem-do-i-need.html From 289b917fe36105e6beb64223bc1ff17c86c740af Mon Sep 17 00:00:00 2001 From: Sebastian Webber Date: Thu, 5 Feb 2026 01:58:19 -0300 Subject: [PATCH 4/4] feat: enhance parameter docs with structured alerts Improve readability and highlight critical information using GitHub-style alert blocks (NOTE, TIP, WARNING, IMPORTANT). Convert formulas to code blocks, add backticks to parameters/commands, and reorganize recommendations for better discoverability. Makes warnings about OOM risks, Windows limits, and security practices more prominent and scannable. Signed-off-by: Sebastian Webber --- rules.yml | 165 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 108 insertions(+), 57 deletions(-) diff --git a/rules.yml b/rules.yml index 1bc3eb2..1ea6635 100644 --- a/rules.yml +++ b/rules.yml @@ -4,7 +4,7 @@ categories: abstract: | Allocates shared memory for caching data pages. Acts as PostgreSQL's main disk cache, similar to Oracle's SGA buffer. - Start with **25% of RAM** as a baseline. For optimal tuning, use the **pg_buffercache extension** to analyze cache hit ratios for your specific workload. + Start with **25% of RAM** as a baseline. For optimal tuning, use the `pg_buffercache` extension to analyze cache hit ratios for your specific workload. recomendations: Tuning Your PostgreSQL Server: https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server#shared_buffers Determining optimal shared_buffers using pg_buffercache: https://aws.amazon.com/blogs/database/determining-the-optimal-value-for-shared_buffers-using-the-pg_buffercache-extension-in-postgresql/ @@ -27,17 +27,28 @@ categories: Optimize PostgreSQL Server Performance Through Configuration: https://blog.crunchydata.com/blog/optimize-postgresql-server-performance work_mem: abstract: | - Memory per operation for sorts, hash joins, and aggregates. Each query can use **multiple work_mem buffers** simultaneously. - - **⚠️ Warning**: With high concurrency and large datasets, you can easily trigger **OOM kills** in Kubernetes pods or cloud instances. - - Maximum potential memory = `work_mem × operations × parallel_workers × connections` - - Example worst-case: 128MB × 3 operations × 2 workers × 100 connections = **102GB** - - **Windows ≤ PostgreSQL 17**: Maximum value is ~2GB (2097151 kB) due to Windows LLP64 model where `sizeof(long)==4` even on 64-bit systems. Fixed in [PostgreSQL 18](https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217) which increased the limit to 2TB. See also [pgvector issue #667](https://github.com/pgvector/pgvector/issues/667). - - Monitor temp file usage with `log_temp_files`. Consider **per-session** tuning (`SET work_mem`) for heavy queries instead of global settings. + Memory per operation for sorts, hash joins, and aggregates. Each query can use multiple `work_mem` buffers simultaneously. + + > [!WARNING] + > With high concurrency and large datasets, you can easily trigger **OOM kills** in Kubernetes pods or cloud instances. + > + > Maximum potential memory: + > ``` + > max = work_mem × operations × parallel_workers × connections + > ``` + > + > Example worst-case: + > ``` + > 128MB × 3 operations × 2 workers × 100 connections = 102GB + > ``` + + > [!NOTE] + > **Windows ≤ PostgreSQL 17**: Maximum value is ~2GB (2097151 kB) due to Windows LLP64 model where `sizeof(long)==4` even on 64-bit systems. + > + > Fixed in [PostgreSQL 18](https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217) which increased the limit to 2TB. See also [pgvector issue #667](https://github.com/pgvector/pgvector/issues/667). + + > [!TIP] + > Monitor temp file usage with `log_temp_files`. Consider **per-session** tuning (`SET work_mem`) for heavy queries instead of global settings. details: - Specifies the amount of memory to be used by internal sort operations and hash tables before writing to temporary disk files. The value defaults to four megabytes (4MB). Note that for a complex query, several sort or hash operations might be running in parallel; each operation will be allowed to use as much memory as this value specifies before it starts to write data into temporary files. Also, several running sessions could be doing such operations concurrently. Therefore, the total memory used could be many times the value of work_mem; it is necessary to keep this fact in mind when choosing the value. Sort operations are used for ORDER BY, DISTINCT, and merge joins. Hash tables are used in hash joins, hash-based aggregation, and hash-based processing of IN subqueries. recomendations: @@ -49,15 +60,22 @@ categories: Let's get back to basics - PostgreSQL Memory Components: https://www.postgresql.fastware.com/blog/back-to-basics-with-postgresql-memory-components maintenance_work_mem: abstract: | - Memory for maintenance operations: **VACUUM**, **CREATE INDEX**, **ALTER TABLE**, and autovacuum workers. - - Can be set higher than work_mem since fewer concurrent maintenance operations run. - - **Important**: Total usage = `maintenance_work_mem × autovacuum_max_workers`. Consider using `autovacuum_work_mem` separately. - - **PostgreSQL ≤16**: 1GB limit (~179M dead tuples per pass). **PostgreSQL 17+**: No limit (uses radix trees). - - **Windows ≤ PostgreSQL 17**: Maximum value is ~2GB (2097151 kB) due to Windows LLP64 model where `sizeof(long)==4` even on 64-bit systems. Fixed in [PostgreSQL 18](https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217) which increased the limit to 2TB. See also [pgvector issue #667](https://github.com/pgvector/pgvector/issues/667). + Memory for maintenance operations: `VACUUM`, `CREATE INDEX`, `ALTER TABLE`, and autovacuum workers. + + Can be set higher than `work_mem` since fewer concurrent maintenance operations run. + + > [!IMPORTANT] + > Total usage: + > ``` + > total = maintenance_work_mem × autovacuum_max_workers + > ``` + > + > Consider using `autovacuum_work_mem` separately. + + > [!NOTE] + > **PostgreSQL ≤16**: 1GB limit (~179M dead tuples per pass). **PostgreSQL 17+**: No limit (uses radix trees). + > + > **Windows ≤ PostgreSQL 17**: Maximum value is ~2GB (2097151 kB) due to Windows LLP64 model where `sizeof(long)==4` even on 64-bit systems. Fixed in [PostgreSQL 18](https://www.postgresql.org/message-id/flat/1a01f0-66ec2d80-3b-68487680@27595217) which increased the limit to 2TB. See also [pgvector issue #667](https://github.com/pgvector/pgvector/issues/667). recomendations: Adjusting maintenance_work_mem: https://www.cybertec-postgresql.com/en/adjusting-maintenance_work_mem/ How Much maintenance_work_mem Do I Need?: http://rhaas.blogspot.com/2019/01/how-much-maintenanceworkmem-do-i-need.html @@ -69,7 +87,7 @@ categories: checkpoint_related: min_wal_size: abstract: | - Minimum size of pg_wal directory (pg_xlog in versions <10). WAL files are **recycled** rather than removed when below this threshold. + Minimum size of `pg_wal` directory (`pg_xlog` in versions <10). WAL files are **recycled** rather than removed when below this threshold. Useful to handle **WAL spikes** during batch jobs or high write periods. recomendations: @@ -79,11 +97,12 @@ categories: "Tuning Your Postgres Database for High Write Loads": https://www.crunchydata.com/blog/tuning-your-postgres-database-for-high-write-loads max_wal_size: abstract: | - Triggers checkpoint when pg_wal exceeds this size. Larger values reduce checkpoint frequency but increase crash recovery time. - - **Recommendation**: Set to hold **1 hour of WAL**. Write-heavy systems may need significantly more. + Triggers checkpoint when `pg_wal` exceeds this size. Larger values reduce checkpoint frequency but increase crash recovery time. - Monitor `pg_stat_bgwriter` to ensure most checkpoints are **timed** (not requested). + > [!TIP] + > Set to hold **1 hour of WAL**. Write-heavy systems may need significantly more. + > + > Monitor `pg_stat_bgwriter` to ensure most checkpoints are **timed** (not requested). recomendations: "Basics of Tuning Checkpoints": https://www.enterprisedb.com/blog/basics-tuning-checkpoints "Tuning max_wal_size in PostgreSQL": https://www.enterprisedb.com/blog/tuning-maxwalsize-postgresql @@ -95,8 +114,14 @@ categories: abstract: | Spreads checkpoint writes over this fraction of `checkpoint_timeout` to reduce I/O spikes. - **Example**: `checkpoint_timeout = 5min` and `checkpoint_completion_target = 0.9` - → Checkpoint spreads writes over **270 seconds (4min 30s)**, leaving 30s buffer for sync overhead. + > [!TIP] + > Example: + > ``` + > checkpoint_timeout = 5min + > checkpoint_completion_target = 0.9 + > ``` + > + > Checkpoint spreads writes over **270 seconds (4min 30s)**, leaving 30s buffer for sync overhead. Values higher than 0.9 risk checkpoint delays. Monitor via `pg_stat_bgwriter`. recomendations: @@ -127,22 +152,35 @@ categories: abstract: | Network interfaces PostgreSQL listens on for connections. - **Security**: Default is `localhost` (local-only). Never use `*` or `0.0.0.0` exposed to internet. - - Use specific IPs with `pg_hba.conf` rules, or SSH tunnels/VPN for remote access. + > [!WARNING] + > **Security**: Default is `localhost` (local-only). Avoid `*` or `0.0.0.0` exposed to internet. + > + > Use specific IPs with `pg_hba.conf` rules, or SSH tunnels/VPN for remote access. + > + > If exposing PostgreSQL over network, **always enable SSL/TLS** (`ssl = on` + certificates) and enforce `hostssl` in `pg_hba.conf`. recomendations: "PostgreSQL Connections and Authentication": https://www.postgresql.org/docs/current/runtime-config-connection.html "PostgreSQL Security: 12 rules for database hardening": https://www.cybertec-postgresql.com/en/postgresql-security-things-to-avoid-in-real-life/ "Postgres security best practices": https://www.bytebase.com/reference/postgres/how-to/postgres-security-best-practices/ max_connections: abstract: | - Maximum concurrent database connections. Each connection consumes memory (~10MB + work_mem per operation). - - **Best practice**: Use **connection pooling** (PgBouncer, pgpool) instead of high max_connections. - - With pooling: 20-50 connections. Without pooling: 100-200 (but review memory impact). - - Formula: `(RAM - shared_buffers) / (work_mem × avg_operations_per_query)` for rough estimate. + Maximum concurrent database connections. Each connection consumes memory (~10MB + `work_mem` per operation). + + > [!TIP] + > Use **connection pooling** instead of high `max_connections`: + > - [PgBouncer](https://www.pgbouncer.org/) - Lightweight, battle-tested + > - [PgCat](https://github.com/postgresml/pgcat) - Modern, written in Rust + > - [Pgpool-II](https://www.pgpool.net/) - Feature-rich with query caching + > + > | Scenario | Recommended Connections | + > |----------|------------------------| + > | With pooling | 20-50 | + > | Without pooling | 100-200 (review memory impact) | + > + > Memory estimation formula: + > ``` + > max_connections_limit = (RAM - shared_buffers) / (work_mem × avg_operations_per_query) + > ``` recomendations: "Tuning max_connections in PostgreSQL": https://www.cybertec-postgresql.com/en/tuning-max_connections-in-postgresql/ "Why you should use Connection Pooling": https://www.enterprisedb.com/postgres-tutorials/why-you-should-use-connection-pooling-when-setting-maxconnections-postgres @@ -156,13 +194,16 @@ categories: Lower values favor index scans, higher values favor sequential scans. Sequential scans become more efficient when queries return ~5-10% or more of table rows, common in analytical/DW workloads. - **Debate (2025)**: Some experts advocate keeping higher values (4.0) for **plan stability** across cache states, while others recommend lower values (1.1-2.0) for SSD to favor index scans. + > [!NOTE] + > **Ongoing debate (2025)**: Some experts advocate keeping higher values (4.0) for **plan stability** across cache states, while others recommend lower values (1.1-2.0) for SSD to favor index scans. + > + > Check suggested readings #1 and #2 for detailed analysis. Test with `EXPLAIN ANALYZE` to verify query plan choices for your workload. recomendations: - "How a single PostgreSQL config change improved slow query performance by 50x": https://amplitude.engineering/how-a-single-postgresql-config-change-improved-slow-query-performance-by-50x-85593b8991b0 - "Better PostgreSQL performance on SSDs": https://www.cybertec-postgresql.com/en/better-postgresql-performance-on-ssds/ "PostgreSQL with modern storage: what about a lower random_page_cost?": https://dev.to/aws-heroes/postgresql-with-modern-storage-what-about-a-lower-randompagecost-5b7f + "Better PostgreSQL performance on SSDs": https://www.cybertec-postgresql.com/en/better-postgresql-performance-on-ssds/ + "How a single PostgreSQL config change improved slow query performance by 50x": https://amplitude.engineering/how-a-single-postgresql-config-change-improved-slow-query-performance-by-50x-85593b8991b0 "Postgres Scan Types in EXPLAIN Plans": https://www.crunchydata.com/blog/postgres-scan-types-in-explain-plans "Tuning Your PostgreSQL Server": https://wiki.postgresql.org/wiki/Tuning_Your_PostgreSQL_Server effective_io_concurrency: @@ -171,22 +212,22 @@ categories: Bitmap scans are used when queries need to fetch moderate result sets (too many rows for index scans, too few for sequential scans) or when combining multiple indexes. They're more common in analytical workloads. - PostgreSQL 18 changes the default from 1 to 16. Values above 200 show diminishing returns in benchmarks. + > [!NOTE] + > **PostgreSQL 18** changes the default from `1` to `16`. Values above `200` show diminishing returns in benchmarks. recomendations: "PostgreSQL: effective_io_concurrency benchmarked": https://portavita.github.io/2019-07-19-PostgreSQL_effective_io_concurrency_benchmarked/ "Bitmap Heap Scan - pganalyze": https://pganalyze.com/docs/explain/scan-nodes/bitmap-heap-scan "PostgreSQL indexing: Index scan vs. Bitmap scan vs. Sequential scan (basics)": https://www.cybertec-postgresql.com/en/postgresql-indexing-index-scan-vs-bitmap-scan-vs-sequential-scan-basics/ io_method: abstract: | - Selects the async I/O implementation for read operations (PostgreSQL 18+). - - **worker** (default): Uses dedicated background processes. Best for most workloads, especially high-bandwidth sequential scans. Recommended as default. - - **io_uring** (Linux only): Kernel-level async I/O. Only switch after extensive testing proves benefit for your specific low-latency random-read patterns. Can hit file descriptor limits with high max_connections. + Selects the async I/O implementation for read operations (PostgreSQL 18+): - **sync**: Traditional synchronous I/O. Slower than async methods - avoid unless debugging or testing. + - **`worker`** (default): Uses dedicated background processes. Best for most workloads, especially high-bandwidth sequential scans. Recommended as default. + - **`io_uring`** (Linux only): Kernel-level async I/O. Only switch after extensive testing proves benefit for your specific low-latency random-read patterns. Can hit file descriptor limits with high `max_connections`. + - **`sync`**: Traditional synchronous I/O. Slower than async methods - avoid unless debugging or testing. - Note: Only affects reads. Writes, checkpoints, and WAL still use sync I/O. + > [!NOTE] + > Only affects reads. Writes, checkpoints, and WAL still use sync I/O. recomendations: "Tuning AIO in PostgreSQL 18 - Tomas Vondra": https://vondra.me/posts/tuning-aio-in-postgresql-18/ "Waiting for Postgres 18: Accelerating Disk Reads with Asynchronous I/O - pganalyze": https://pganalyze.com/blog/postgres-18-async-io @@ -197,7 +238,10 @@ categories: abstract: | Background worker processes for async I/O when `io_method = worker`. - Default of 3 is too low for modern multi-core systems. Recommendation: **10-40% of CPU cores** depending on workload. + > [!TIP] + > Default of `3` is too low for modern multi-core systems. + > + > **Recommendation**: 10-40% of CPU cores depending on workload. Higher values benefit workloads with: - Sequential scans (DW/analytical queries) @@ -247,7 +291,10 @@ categories: abstract: | Hard limit on concurrent I/O operations per backend process (PostgreSQL 18+). - Controls read-ahead with async I/O. Formula: `max read-ahead = effective_io_concurrency × io_combine_limit` + Controls read-ahead with async I/O: + ``` + max_read_ahead = effective_io_concurrency × io_combine_limit + ``` Higher values benefit high-latency storage (cloud/EBS) with high IOPS. Watch memory usage - high concurrency increases memory pressure. recomendations: @@ -258,9 +305,10 @@ categories: "PostgreSQL 18 Asynchronous I/O - Neon": https://neon.com/postgresql/postgresql-18/asynchronous-io file_copy_method: abstract: | - Method for copying files during **CREATE DATABASE** and **ALTER DATABASE SET TABLESPACE** (PostgreSQL 18+). + Method for copying files during `CREATE DATABASE` and `ALTER DATABASE SET TABLESPACE` (PostgreSQL 18+). - Recommendation: Use **clone** if your filesystem supports it - dramatically faster (200-600ms for 100s of GB) and initially consumes zero extra disk space. + > [!TIP] + > Use `clone` if your filesystem supports it - dramatically faster (200-600ms for 100s of GB) and initially consumes zero extra disk space. recomendations: "Instant database clones with PostgreSQL 18": https://boringsql.com/posts/instant-database-clones/ "Instant Per-Branch Databases with PostgreSQL 18's clone": https://medium.com/axial-engineering/instant-per-branch-databases-with-postgresql-18s-clone-file-copy-and-copy-on-write-filesystems-1b1930bddbaa @@ -273,9 +321,10 @@ categories: Pool from which all background workers are drawn. Must accommodate: - Parallel query workers (`max_parallel_workers`) - Logical replication workers - - Extensions (pg_stat_statements, etc.) + - Extensions (`pg_stat_statements`, etc.) - Recommendation: Set to **CPU core count** or at least **25% of vCPUs**. Requires restart. + > [!TIP] + > Set to **CPU core count** or at least **25% of vCPUs**. Requires restart. recomendations: "PostgreSQL Performance Tuning Best Practices 2025": https://www.mydbops.com/blog/postgresql-parameter-tuning-best-practices "PostgreSQL Performance Tuning: Key Parameters": https://www.tigerdata.com/learn/postgresql-performance-tuning-key-parameters @@ -284,7 +333,8 @@ categories: abstract: | Maximum parallel workers per query executor node. - Each worker consumes resources individually (work_mem, CPU, I/O). A query with 4 workers uses 5x resources (1 leader + 4 workers). + > [!IMPORTANT] + > Each worker consumes resources individually (`work_mem`, CPU, I/O). A query with 4 workers uses 5x resources (1 leader + 4 workers). recomendations: "Increasing max parallel workers per gather in Postgres": https://www.pgmustard.com/blog/max-parallel-workers-per-gather "Postgres Tuning & Performance for Analytics Data": https://www.crunchydata.com/blog/postgres-tuning-and-performance-for-analytics-data @@ -296,7 +346,8 @@ categories: Limits total parallel workers from the `max_worker_processes` pool. Cannot exceed `max_worker_processes`. - Recommendation: Set equal to **CPU core count** or `max_worker_processes`. + > [!TIP] + > Set equal to **CPU core count** or `max_worker_processes`. recomendations: "Parallel Queries in Postgres": https://www.crunchydata.com/blog/parallel-queries-in-postgres "PostgreSQL Performance Tuning Best Practices 2025": https://www.mydbops.com/blog/postgresql-parameter-tuning-best-practices