From 8a6c101e60b7f25506ec31583c248e65878c7357 Mon Sep 17 00:00:00 2001 From: Lars T Hansen Date: Tue, 28 Apr 2026 13:39:54 +0200 Subject: [PATCH] Fix #904 - quote all fields for sql access --- code/sonalyze/db/timescaledb.go | 105 +++++++++++++++++--------------- 1 file changed, 57 insertions(+), 48 deletions(-) diff --git a/code/sonalyze/db/timescaledb.go b/code/sonalyze/db/timescaledb.go index 3744eff5..d1c8a580 100644 --- a/code/sonalyze/db/timescaledb.go +++ b/code/sonalyze/db/timescaledb.go @@ -56,6 +56,15 @@ // // Computed fields can sometimes become NULL, notably sum() over an empty set of rows, and must be // handled the same way. +// +// Note that PSQL turns *some* field names into syntax, "user" is known to be a problem (issue 904). +// As a consequence, all field names should either be quoted with double quotes (so, `"user" in +// tbl`, not `user in tbl`) or should be qualified with a table name (`t1.user`, not `user`). The +// responsibility for this lies with whoever introduces a field name into a string that is used +// literally in a select, so it's shared between querySlice() and the callers of that function. +// +// Also, field names seem to be folded to lower case, so anything with upper case characters must +// similarly be quoted ("AllocTRES" not AllocTRES). package db @@ -189,19 +198,19 @@ func (cdb *connectedDB) ReadProcessSamples( ) // Alpha order and KEEP THE FIELD AND BOX LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - t1Fields := "t1.cmd, t1.cpu_avg, t1.cpu_time, t1.cpu_util, t1.data_cancelled, t1.data_read, " + - "t1.data_written, t1.epoch, t1.job, t1.node, t1.num_threads, t1.pid, t1.ppid, " + - "t1.resident_memory, t1.rolledup, t1.time, t1.user, t1.virtual_memory" + t1Fields := `t1.cmd, t1.cpu_avg, t1.cpu_time, t1.cpu_util, t1.data_cancelled, t1.data_read, ` + + `t1.data_written, t1.epoch, t1.job, t1.node, t1.num_threads, t1.pid, t1.ppid, ` + + `t1.resident_memory, t1.rolledup, t1.time, t1.user, t1.virtual_memory` t1Boxes := []any{ &cmd, &cpuAvg, &cpuTime, &cpuUtil, &dataCancelled, &dataRead, &dataWritten, &epoch, &job, &node, &numThreads, &pid, &ppid, &residentMemory, &rolledup, ×tamp, &user, &virtualMemory, } - t2Fields := "sum(t2.gpu_memory), sum(t2.gpu_util), sum(t2.gpu_memory_util), count(t2.uuid)" + t2Fields := `sum(t2.gpu_memory), sum(t2.gpu_util), sum(t2.gpu_memory_util), count(t2.uuid)` t2Boxes := []any{&gpuMemoryp, &gpuUtilp, &gpuMemoryUtilp, &gpuCount} - joinBy := "left join sample_process_gpu as t2 on t1.cluster = t2.cluster and t1.node = t2.node " + - "and t1.time = t2.time and t1.pid = t2.pid and t1.job = t2.job and t1.epoch = t2.epoch " + - "group by " + t1Fields + joinBy := `left join sample_process_gpu as t2 on t1.cluster = t2.cluster and t1.node = t2.node ` + + `and t1.time = t2.time and t1.pid = t2.pid and t1.job = t2.job and t1.epoch = t2.epoch ` + + `group by ` + t1Fields q := query{ DataProviderFilter: filter, table: "sample_process", @@ -288,8 +297,8 @@ func (cdb *connectedDB) ReadNodeSamples( DataProviderFilter: filter, table: "sample_system", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "boot, existing_entities, load1, load15, load5, node, " + - "runnable_entities, time, used_memory", + fields: `"boot", "existing_entities", "load1", "load15", "load5", "node", ` + + `"runnable_entities", "time", "used_memory"`, boxes: []any{ &bootp, &existingEntities, &load1, &load15, &load5, &node, &runnableEntities, ×tamp, &usedMemory, @@ -334,11 +343,11 @@ func (cdb *connectedDB) ReadDiskSamples( DataProviderFilter: filter, table: "sample_disk", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "discards_completed, discards_merged, flush_requests_completed, " + - "ios_currently_in_progress, major, minor, ms_spent_discarding, " + - "ms_spent_doing_ios, ms_spent_flushing, ms_spent_reading, ms_spent_writing, " + - "name, node, reads_completed, reads_merged, sectors_discarded, sectors_read, " + - "sectors_written, time, weighted_ms_spent_doing_ios, writes_completed, writes_merged", + fields: `"discards_completed", "discards_merged", "flush_requests_completed", ` + + `"ios_currently_in_progress", "major", "minor", "ms_spent_discarding", ` + + `"ms_spent_doing_ios", "ms_spent_flushing", "ms_spent_reading", "ms_spent_writing", ` + + `"name", "node", "reads_completed", "reads_merged", "sectors_discarded", "sectors_read", ` + + `"sectors_written", "time", "weighted_ms_spent_doing_ios", "writes_completed", "writes_merged"`, boxes: []any{&discards_completed, &discards_merged, &flush_requests_completed, &ios_currently_in_progress, &major, &minor, &ms_spent_discarding, &ms_spent_doing_ios, &ms_spent_flushing, &ms_spent_reading, &ms_spent_writing, @@ -390,7 +399,7 @@ func (cdb *connectedDB) ReadCpuSamples( DataProviderFilter: filter, table: "sample_system", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "cpus, node, time", + fields: `"cpus", "node", "time"`, boxes: []any{&cpus, &node, ×tamp}, } @@ -440,12 +449,12 @@ func (cdb *connectedDB) ReadGpuSamples( q := query{ DataProviderFilter: filter, table: "sysinfo_gpu_card_config", - join: "join sample_gpu as t2 on t1.uuid = t2.uuid and " + extra + - "age(t1.time, t2.time) < interval '15 minutes'", + join: `join sample_gpu as t2 on t1.uuid = t2.uuid and ` + extra + + `age(t1.time, t2.time) < interval '15 minutes'`, // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "t2.ce_clock, t2.ce_util, t2.compute_mode, t2.failing, t2.fan, t2.index, " + - "t2.memory, t2.memory_clock, t2.memory_util, t1.node, t2.performance_state, " + - "t2.power, t2.power_limit, t2.temperature, t2.time, t2.uuid", + fields: `t2.ce_clock, t2.ce_util, t2.compute_mode, t2.failing, t2.fan, t2.index, ` + + `t2.memory, t2.memory_clock, t2.memory_util, t1.node, t2.performance_state, ` + + `t2.power, t2.power_limit, t2.temperature, t2.time, t2.uuid`, boxes: []any{ &ce_clock, &ce_util, &compute_mode, &failing, &fan, &indexp, &memory, &memory_clock, &memory_util, &node, &performance_state, @@ -505,8 +514,8 @@ func (cdb *connectedDB) ReadSysinfoNodeData( DataProviderFilter: filter, table: "sysinfo_attributes", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "architecture, cards, cluster, cores_per_socket, cpu_model, distances, memory, " + - "node, numa_nodes, os_name, os_release, sockets, threads_per_core, time, topo_svg, topo_text", + fields: `"architecture", "cards", "cluster", "cores_per_socket", "cpu_model", "distances", "memory", ` + + `"node", "numa_nodes", "os_name", "os_release", "sockets", "threads_per_core", "time", "topo_svg", "topo_text"`, boxes: []any{ &architecture, &cards, &cluster, &coresPerSocket, &cpuModel, &distances, &memory, &node, &numaNodesBox, &osName, &osRelease, &sockets, &threadsPerCore, ×tamp, &topoSvgp, &topoTextp, @@ -599,10 +608,10 @@ func (cdb *connectedDB) ReadSysinfoCardData( // sysinfo_gpu_card. That needs to be joined to sysinfo_gpu_card_config here (by UUID) to // get the full story. table: "sysinfo_gpu_card_config", - join: "join sysinfo_gpu_card t2 on t1.uuid = t2.uuid", + join: `join sysinfo_gpu_card t2 on t1.uuid = t2.uuid`, // Alpha field name order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "address, architecture, driver, firmware, index, manufacturer, max_ce_clock, max_memory_clock, " + - "max_power_limit, memory, min_power_limit, model, node, power_limit, time, t1.uuid", + fields: `"address", "architecture", "driver", "firmware", "index", "manufacturer", "max_ce_clock", "max_memory_clock", ` + + `"max_power_limit", "memory", "min_power_limit", "model", "node", "power_limit", "time", t1.uuid`, boxes: []any{ &address, &architecture, &driver, &firmware, &index, &manufacturer, &maxCeClock, &maxMemoryClock, &maxPowerLimit, &memory, &minPowerLimit, &model, &node, &powerLimit, ×tamp, &uuid, @@ -661,17 +670,17 @@ func (cdb *connectedDB) ReadSacctData( DataProviderFilter: filter, fieldMap: map[string]string{"time": "time", "user": "user_name", "job": "job_name", "node": "nodes"}, table: "sample_slurm_job", - join: "join sample_slurm_job_acc as t2 on " + - "t1.cluster = t2.cluster and t1.job_id = t2.job_id and t1.job_step = t2.job_step and " + - "t1.time = t2.time", + join: `join sample_slurm_job_acc as t2 on ` + + `t1.cluster = t2.cluster and t1.job_id = t2.job_id and t1.job_step = t2.job_step and ` + + `t1.time = t2.time`, // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "account, allocated_resources, \"AllocTRES\", array_job_id, array_task_id, \"AveCPU\", " + - "\"AveDiskRead\", \"AveDiskWrite\", \"AveRSS\", \"AveVMSize\", t1.cluster, distribution, \"ElapsedRaw\", " + - "end_time, exit_code, het_job_id, het_job_offset, t1.job_id, job_name, " + - "job_state, t1.job_step, \"MaxRSS\", \"MaxVMSize\", \"MinCPU\", nodes, " + - "partition, priority, requested_cpus, requested_memory_per_node, requested_node_count, " + - "requested_resources, reservation, start_time, submit_time, suspend_time, \"SystemCPU\", " + - "t1.time, time_limit, \"UserCPU\", user_name", + fields: `"account", "allocated_resources", "AllocTRES", "array_job_id", "array_task_id", "AveCPU", ` + + `"AveDiskRead", "AveDiskWrite", "AveRSS", "AveVMSize", t1.cluster, "distribution", "ElapsedRaw", ` + + `"end_time", "exit_code", "het_job_id", "het_job_offset", t1.job_id, "job_name", ` + + `"job_state", t1.job_step, "MaxRSS", "MaxVMSize", "MinCPU", "nodes", ` + + `"partition", "priority", "requested_cpus", "requested_memory_per_node", "requested_node_count", ` + + `"requested_resources", "reservation", "start_time", "submit_time", "suspend_time", "SystemCPU", ` + + `t1.time, "time_limit", "UserCPU", "user_name"`, boxes: []any{ &account, &allocatedResourcesp, &allocTRES, &arrayJobId, &arrayTaskIdp, &aveCPU, &aveDiskRead, &aveDiskWrite, &aveRSS, &aveVMSize, &cluster, &distribution, &elapsedRaw, @@ -809,7 +818,7 @@ func (cdb *connectedDB) ReadCluzterAttributeData( DataProviderFilter: filter, table: "cluster_attributes", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "cluster, slurm, time", + fields: `"cluster", "slurm", "time"`, boxes: []any{&cluster, &slurm, ×tamp}, } @@ -839,7 +848,7 @@ func (cdb *connectedDB) ReadCluzterPartitionData( DataProviderFilter: filter, table: "partition", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "cluster, nodes_compact, partition, time", + fields: `"cluster", "nodes_compact", "partition", "time"`, boxes: []any{&cluster, &nodeNamesCompact, &partName, ×tamp}, } @@ -883,7 +892,7 @@ func (cdb *connectedDB) ReadCluzterNodeData( DataProviderFilter: filter, table: "node_state", // Alpha order and KEEP THESE TWO LISTS COMPLETELY IN SYNC OR YOU WILL BE SORRY! - fields: "cluster, node, states, time", + fields: `"cluster", "node", "states", "time"`, boxes: []any{&cluster, &nodeName, &states, ×tamp}, } @@ -922,7 +931,7 @@ func makeRefillTimeCache( return func() (low, high time.Time, err error) { err = dbc.QueryRowAndScan( context.Background(), - "SELECT MIN(time) FROM sysinfo_attributes WHERE cluster = $1", + `SELECT MIN(time) FROM sysinfo_attributes WHERE "cluster" = $1`, []any{clusterName}, []any{&low}, ) @@ -931,7 +940,7 @@ func makeRefillTimeCache( } err = dbc.QueryRowAndScan( context.Background(), - "SELECT MAX(time) FROM sample_process WHERE cluster = $1", + `SELECT MAX(time) FROM sample_process WHERE "cluster" = $1`, []any{clusterName}, []any{&high}, ) @@ -956,7 +965,7 @@ func querySlice[T any]( q *query, unbox func() *T, ) (finalRows [][]*T, softErrors int, err error) { - primary := "SELECT * FROM " + q.table + " WHERE " + "cluster=$1" + primary := "SELECT * FROM " + q.table + " WHERE \"cluster\"=$1" qarg := []any{cdb.cx.ClusterName()} // A note about adding field filters against values. For values we don't control we have to use @@ -971,11 +980,11 @@ func querySlice[T any]( timeField := mapField("time", q.fieldMap) if !q.FromDate.IsZero() { - primary += fmt.Sprintf(" AND %s >= $%d", timeField, len(qarg)+1) + primary += fmt.Sprintf(" AND \"%s\" >= $%d", timeField, len(qarg)+1) qarg = append(qarg, q.FromDate.Format(time.DateOnly)) } if !q.ToDate.IsZero() { - primary += fmt.Sprintf(" AND %s < $%d", timeField, len(qarg)+1) + primary += fmt.Sprintf(" AND \"%s\" < $%d", timeField, len(qarg)+1) qarg = append(qarg, q.ToDate.Add(time.Hour*24).Format(time.DateOnly)) } @@ -1009,7 +1018,7 @@ func querySlice[T any]( elements = append(elements, fmt.Sprintf("$%d::character varying", len(qarg)+1)) qarg = append(qarg, e) } - primary += " AND " + fieldname + " && ARRAY[" + strings.Join(elements, ",") + "]" + primary += " AND \"" + fieldname + "\" && ARRAY[" + strings.Join(elements, ",") + "]" } } else { conds := make([]string, 0) @@ -1018,7 +1027,7 @@ func querySlice[T any]( for _, p := range q.Node.Patterns() { loc := strings.IndexAny(p, "[*") if !q.Node.IsPrefix() && loc == -1 { - conds = append(conds, fmt.Sprintf("%s = $%d", fieldname, nextIx)) + conds = append(conds, fmt.Sprintf("\"%s\" = $%d", fieldname, nextIx)) } else { // TODO: We can and should do more here: // @@ -1035,7 +1044,7 @@ func querySlice[T any]( // // We should get the hostglobber involved since it has an exact parse of the pattern // set and is already in q.hosts. - conds = append(conds, fmt.Sprintf("%s like $%d", fieldname, nextIx)) + conds = append(conds, fmt.Sprintf("\"%s\" like $%d", fieldname, nextIx)) if loc != -1 { p = p[:loc] } @@ -1057,7 +1066,7 @@ func querySlice[T any]( fieldname := mapField("job", q.fieldMap) var jobs []string for j := range q.Jobs { - jobs = append(jobs, fmt.Sprintf("%s = %d", fieldname, j)) + jobs = append(jobs, fmt.Sprintf("\"%s\" = %d", fieldname, j)) } primary += " AND (" + strings.Join(jobs, " OR ") + ")" } @@ -1068,7 +1077,7 @@ func querySlice[T any]( fieldname := mapField("user", q.fieldMap) var conds []string for u := range q.Users { - conds = append(conds, fmt.Sprintf("%s = $%d", fieldname, len(qarg)+1)) + conds = append(conds, fmt.Sprintf("\"%s\" = $%d", fieldname, len(qarg)+1)) qarg = append(qarg, u.String()) } primary += " AND (" + strings.Join(conds, " OR ") + ")"