-
Notifications
You must be signed in to change notification settings - Fork 76
Expand file tree
/
Copy pathSelectSQLRewriter.php
More file actions
400 lines (326 loc) · 15.5 KB
/
SelectSQLRewriter.php
File metadata and controls
400 lines (326 loc) · 15.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
<?php
class SelectSQLRewriter extends AbstractSQLRewriter
{
public function rewrite(): string
{
global $wpdb;
$sql = $this->original();
// SQL_CALC_FOUND_ROWS doesn't exist in PostgreSQL but it's needed for correct paging
if(false !== strpos($sql, 'SQL_CALC_FOUND_ROWS')) {
$sql = str_replace('SQL_CALC_FOUND_ROWS', '', $sql);
$GLOBALS['pg4wp_numrows_query'] = $sql;
if(PG4WP_DEBUG) {
error_log('[' . microtime(true) . "] Number of rows required for :\n$sql\n---------------------\n", 3, PG4WP_LOG . 'pg4wp_NUMROWS.log');
}
}
if(false !== strpos($sql, 'FOUND_ROWS()')) {
// Here we convert the latest query into a COUNT query
$sql = $GLOBALS['pg4wp_numrows_query'];
// Remove the LIMIT clause if it exists
$sql = preg_replace('/\s+LIMIT\s+\d+(\s*,\s*\d+)?/i', '', $sql);
// Remove the ORDER BY containing case / end clause if it exists
$sql = preg_replace('/\s+ORDER\s+BY\s+.+END\),[^)]+/is', '', $sql);
// Remove the ORDER BY clause if it exists
$sql = preg_replace('/\s+ORDER\s+BY\s+[^)]+/i', '', $sql);
// Replace the fields in the SELECT clause with COUNT(*)
$sql = preg_replace('/SELECT\s+.*?\s+FROM\s+/is', 'SELECT COUNT(*) FROM ', $sql, 1);
}
if(false !== strpos($sql, 'information_schema')) {
// WP Site Health rewrites
if (false !== strpos($sql, "SELECT TABLE_NAME AS 'table', TABLE_ROWS AS 'rows', SUM(data_length + index_length)")) {
$sql = $this->postgresTableSizeRewrite(DB_SCHEMA);
return $sql;
}
throw new Exception("Unsupported call to information_schema, this probably won't work correctly and needs to be specifically handled, open a github issue with the SQL");
}
$sql = $this->ensureOrderByInSelect($sql);
// Handle +0 casting in order by
// Regular expression to match the "ORDER BY" pattern
$pattern = '/ORDER BY\s+([a-zA-Z0-9_]+)\.meta_value\s*\+\s*0/i';
$replacement = 'ORDER BY CAST($1.meta_value AS SIGNED)';
$sql = preg_replace($pattern, $replacement, $sql);
// Convert CONVERT to CAST
$pattern = '/CONVERT\(([^()]*(\(((?>[^()]+)|(?-2))*\))?[^()]*),\s*([^\s]+)\)/x';
$sql = preg_replace($pattern, 'CAST($1 AS $4)', $sql);
// Handle CAST( ... AS CHAR)
$sql = preg_replace('/CAST\((.+) AS CHAR\)/', 'CAST($1 AS TEXT)', $sql);
// Handle CAST( ... AS SIGNED)
$sql = preg_replace('/CAST\((.+) AS SIGNED\)/', 'CAST($1 AS INTEGER)', $sql);
// Handle COUNT(*)...ORDER BY...
$sql = preg_replace('/COUNT(.+)ORDER BY.+/s', 'COUNT$1', $sql);
// HANDLE REGEXP
$sql = preg_replace('/REGEXP/', '~', $sql);
$sql = str_replace("utc_timestamp()", "CURRENT_TIMESTAMP AT TIME ZONE 'UTC'", $sql);
// In order for users counting to work...
$matches = array();
if(preg_match_all('/COUNT[^C]+\),/', $sql, $matches)) {
foreach($matches[0] as $num => $one) {
$sub = substr($one, 0, -1);
$sql = str_replace($sub, $sub . ' AS count' . $num, $sql);
}
}
$sql = $this->convertToPostgresLimitSyntax($sql);
$sql = $this->ensureGroupByOrAggregate($sql);
$pattern = '/DATE_ADD[ ]*\(([^,]+),([^\)]+)\)/';
$sql = preg_replace($pattern, '($1 + $2)', $sql);
// Convert MySQL FIELD function to CASE statement
$pattern = '/FIELD[ ]*\(([^\),]+),([^\)]+)\)/';
// https://dev.mysql.com/doc/refman/5.7/en/string-functions.html#function_field
// Other implementations: https://stackoverflow.com/q/1309624
$sql = preg_replace_callback($pattern, function ($matches) {
$case = 'CASE ' . trim($matches[1]);
$comparands = explode(',', $matches[2]);
foreach($comparands as $i => $comparand) {
$case .= ' WHEN ' . trim($comparand) . ' THEN ' . ($i + 1);
}
$case .= ' ELSE 0 END';
return $case;
}, $sql);
$pattern = '/GROUP_CONCAT\(([^()]*(\(((?>[^()]+)|(?-2))*\))?[^()]*)\)/x';
$sql = preg_replace($pattern, "string_agg($1, ',')", $sql);
// Convert MySQL RAND function to PostgreSQL RANDOM function
$pattern = '/RAND[ ]*\([ ]*\)/';
$sql = preg_replace($pattern, 'RANDOM()', $sql);
// UNIX_TIMESTAMP in MYSQL returns an integer
$pattern = '/UNIX_TIMESTAMP\(([^\)]+)\)/';
$sql = preg_replace($pattern, 'ROUND(DATE_PART(\'epoch\',$1))', $sql);
$date_funcs = array(
'DAYOFMONTH(' => 'EXTRACT(DAY FROM ',
'YEAR(' => 'EXTRACT(YEAR FROM ',
'MONTH(' => 'EXTRACT(MONTH FROM ',
'DAY(' => 'EXTRACT(DAY FROM ',
);
$sql = str_replace('ORDER BY post_date DESC', 'ORDER BY YEAR(post_date) DESC, MONTH(post_date) DESC', $sql);
$sql = str_replace('ORDER BY post_date ASC', 'ORDER BY YEAR(post_date) ASC, MONTH(post_date) ASC', $sql);
$sql = str_replace(array_keys($date_funcs), array_values($date_funcs), $sql);
$curryear = date('Y');
$sql = str_replace('FROM \'' . $curryear, 'FROM TIMESTAMP \'' . $curryear, $sql);
// MySQL 'IF' conversion - Note : NULLIF doesn't need to be corrected
$pattern = '/ (?<!NULL)IF[ ]*\(([^,]+),([^,]+),([^\)]+)\)/';
$sql = preg_replace($pattern, ' CASE WHEN $1 THEN $2 ELSE $3 END', $sql);
// Act like MySQL default configuration, where sql_mode is ""
$pattern = '/@@SESSION.sql_mode/';
$sql = preg_replace($pattern, "''", $sql);
// TODO: this seems wrong but if we remove it we get failures with XYZ is not part of the group By
if(isset($wpdb)) {
$sql = str_replace('GROUP BY ' . $wpdb->prefix . 'posts.ID', '', $sql);
}
// MySQL 'LIKE' is case insensitive by default, whereas PostgreSQL 'LIKE' is
$sql = str_replace(' LIKE ', ' ILIKE ', $sql);
// INDEXES are not yet supported
if(false !== strpos($sql, 'USE INDEX (comment_date_gmt)')) {
$sql = str_replace('USE INDEX (comment_date_gmt)', '', $sql);
}
// HB : timestamp fix for permalinks
$sql = str_replace('post_date_gmt > 1970', 'post_date_gmt > to_timestamp (\'1970\')', $sql);
// Akismet sometimes doesn't write 'comment_ID' with 'ID' in capitals where needed ...
if(isset($wpdb) && $wpdb->comments && false !== strpos($sql, $wpdb->comments)) {
$sql = str_replace(' comment_id ', ' comment_ID ', $sql);
}
// MySQL treats a HAVING clause without GROUP BY like WHERE
if(false !== strpos($sql, 'HAVING') && false === strpos($sql, 'GROUP BY')) {
if(false === strpos($sql, 'WHERE')) {
$sql = str_replace('HAVING', 'WHERE', $sql);
} else {
$pattern = '/WHERE\s+(.*?)\s+HAVING\s+(.*?)(\s*(?:ORDER|LIMIT|PROCEDURE|INTO|FOR|LOCK|$))/';
$sql = preg_replace($pattern, 'WHERE ($1) AND ($2) $3', $sql);
}
}
// MySQL allows integers to be used as boolean expressions
// where 0 is false and all other values are true.
//
// Although this could occur anywhere with any number, so far it
// has only been observed as top-level expressions in the WHERE
// clause and only with 0. For performance, limit current
// replacements to that.
$pattern_after_where = '(?:\s*$|\s+(GROUP|HAVING|ORDER|LIMIT|PROCEDURE|INTO|FOR|LOCK))';
$pattern = '/(WHERE\s+)0(\s+AND|\s+OR|' . $pattern_after_where . ')/';
$sql = preg_replace($pattern, '$1false$2', $sql);
$pattern = '/(AND\s+|OR\s+)0(' . $pattern_after_where . ')/';
$sql = preg_replace($pattern, '$1false$2', $sql);
// MySQL supports strings as names, PostgreSQL needs identifiers.
// Limit to after closing parenthesis to reduce false-positives
// Currently only an issue for nextgen-gallery plugin
$pattern = '/\) AS \'([^\']+)\'/';
$sql = preg_replace($pattern, ') AS "$1"', $sql);
return $sql;
}
/**
* Ensure the columns used in the ORDER BY clause are also present in the SELECT clause.
*
* @param string $sql Original SQL query string.
* @return string Modified SQL query string.
*/
protected function ensureOrderByInSelect(string $sql): string
{
// Extract the SELECT and ORDER BY clauses
preg_match('/SELECT\s+(.*?)\s+FROM/si', $sql, $selectMatches);
preg_match('/ORDER BY(.*?)(ASC|DESC|$)/si', $sql, $orderMatches);
preg_match('/GROUP BY(.*?)(ASC|DESC|$)/si', $sql, $groupMatches);
// If the SELECT clause is missing, return the original query
if (!$selectMatches) {
return $sql;
}
// If both ORDER BY and GROUP BY clauses are missing, return the original query
if (!$orderMatches && !$groupMatches) {
return $sql;
}
$selectClause = trim($selectMatches[1]);
$orderByClause = $orderMatches ? trim($orderMatches[1]) : null;
$groupClause = $groupMatches ? trim($groupMatches[1]) : null;
$modified = false;
// Check for wildcard in SELECT
if (strpos($selectClause, '*') !== false) {
return $sql; // Cannot handle wildcards, return original query
}
// Handle ORDER BY columns
if ($orderByClause) {
$orderByColumns = explode(',', $orderByClause);
foreach ($orderByColumns as $col) {
$col = trim($col);
if (strpos($selectClause, $col) === false) {
$selectClause .= ', ' . $col;
$modified = true;
}
}
}
// Handle GROUP BY columns
if ($groupClause && !$modified) {
$groupColumns = explode(',', $groupClause);
foreach ($groupColumns as $col) {
$col = trim($col);
if (strpos($selectClause, $col) === false) {
$selectClause .= ', ' . $col;
$modified = true;
}
}
}
if (!$modified) {
return $sql;
}
// Find the exact position for the replacement
$selectStartPos = strpos($sql, $selectMatches[1]);
if ($selectStartPos === false) {
return $sql; // If for some reason the exact match is not found, return the original query
}
$postgresSql = substr_replace($sql, $selectClause, $selectStartPos, strlen($selectMatches[1]));
return $postgresSql;
}
/**
* Transforms a given SQL query to include a GROUP BY clause if the SELECT statement has both aggregate
* and non-aggregate columns. This function is specifically designed to work with PostgreSQL.
*
* In PostgreSQL, a query that uses aggregate functions must group by all columns in the SELECT list that
* are not part of the aggregate functions. Failing to do so results in a syntax error. This function
* automatically adds a GROUP BY clause to meet this PostgreSQL requirement when both aggregate (COUNT, SUM,
* AVG, MIN, MAX) and non-aggregate columns are present.
*
* @param string $sql The SQL query string to be transformed.
*
* @return string The transformed SQL query string with appropriate GROUP BY clause if required.
*
* @throws Exception If the SQL query cannot be parsed or modified.
*
* @example
* Input: SELECT COUNT(id), username FROM users;
* Output: SELECT COUNT(id), username FROM users GROUP BY username;
*
*/
protected function ensureGroupByOrAggregate(string $sql): string
{
// Check for system or session variables
if (preg_match('/@@[a-zA-Z0-9_]+/', $sql)) {
return $sql;
}
// Regular expression to capture main SQL components.
$regex = '/(SELECT\s+)(.*?)(\s+FROM\s+)([^ ]+)(\s+WHERE\s+.*?(?= ORDER BY | GROUP BY | LIMIT |$))?(ORDER BY.*?(?= LIMIT |$))?(LIMIT.*?$)?/is';
// Capture main SQL components using regex
if (!preg_match($regex, $sql, $matches)) {
return $sql;
}
$selectClause = trim($matches[2] ?? '');
$fromClause = trim($matches[4] ?? '');
$whereClause = trim($matches[5] ?? '');
$orderClause = trim($matches[6] ?? '');
$limitClause = trim($matches[7] ?? '');
if (empty($selectClause) || empty($fromClause)) {
return $sql;
}
// Regular expression to match commas not within parentheses
$pattern = '/,(?![^\(]*\))/';
// Split columns using a comma, and then trim each element
$columns = array_map('trim', preg_split($pattern, $selectClause));
$aggregateColumns = [];
$nonAggregateColumns = [];
foreach ($columns as $col) {
// Check for aggregate functions in the column
if (preg_match('/(COUNT|SUM|AVG|MIN|MAX)\s*?\(/i', $col)) {
$aggregateColumns[] = $col;
} else {
$nonAggregateColumns[] = $col;
}
}
// Only add a GROUP BY clause if there are both aggregate and non-aggregate columns in SELECT
if (empty($aggregateColumns) || empty($nonAggregateColumns)) {
return $sql;
}
// Assemble new SQL query
$postgresSql = "SELECT $selectClause FROM $fromClause";
if (!empty($whereClause)) {
$postgresSql .= ' ' . $whereClause;
}
$groupByClause = "GROUP BY " . implode(", ", $nonAggregateColumns);
if (!empty($groupByClause)) {
$postgresSql .= ' ' . $groupByClause;
}
if (!empty($orderClause)) {
$postgresSql .= ' ' . $orderClause;
}
if (!empty($limitClause)) {
$postgresSql .= ' ' . $limitClause;
}
return $postgresSql;
}
/**
* Convert MySQL LIMIT syntax to PostgreSQL LIMIT syntax
*
* @param string $sql MySQL query string
* @return string PostgreSQL query string
*/
protected function convertToPostgresLimitSyntax($sql)
{
// Use regex to find "LIMIT m, n" syntax in query
if (preg_match('/LIMIT\s+(\d+),\s*(\d+)/i', $sql, $matches)) {
$offset = $matches[1];
$limit = $matches[2];
// Replace MySQL LIMIT syntax with PostgreSQL LIMIT syntax
$postgresLimitSyntax = "LIMIT $limit OFFSET $offset";
$postgresSql = preg_replace('/LIMIT\s+\d+,\s*\d+/i', $postgresLimitSyntax, $sql);
return $postgresSql;
}
// Return original query if no MySQL LIMIT syntax is found
return $sql;
}
// This method is specifically to handle should_suggest_persistent_object_cache in wp site health
protected function postgresTableSizeRewrite($schema)
{
$sql = <<<SQL
SELECT
C.relname AS "table",
S.n_live_tup AS "rows",
pg_total_relation_size(C.oid) AS "bytes"
FROM
pg_class C
LEFT JOIN
pg_namespace N ON (N.oid = C.relnamespace)
INNER JOIN
pg_stat_user_tables S ON (S.relid = C.oid)
WHERE
N.nspname = '$schema' AND
C.relname IN ('wp_comments','wp_options','wp_posts','wp_terms','wp_users')
GROUP BY
C.relname, pg_total_relation_size(C.oid), S.n_live_tup;
SQL;
return $sql;
}
}