MediaWiki master
QueryBuilderFromRawSql.php
Go to the documentation of this file.
1<?php
20namespace Wikimedia\Rdbms;
21
23
37 private const QUERY_CHANGE_MASK = (
38 SQLPlatform::QUERY_CHANGE_NONE |
39 SQLPlatform::QUERY_CHANGE_TRX |
40 SQLPlatform::QUERY_CHANGE_ROWS |
41 SQLPlatform::QUERY_CHANGE_SCHEMA |
42 SQLPlatform::QUERY_CHANGE_LOCKS
43 ) & ~SQLPlatform::QUERY_IGNORE_DBO_TRX;
44
45 private const SCHEMA_CHANGE_VERBS = [
46 'CREATE',
47 'CREATE TEMPORARY',
48 'CREATE INDEX',
49 'CREATE DATABASE',
50 'ALTER',
51 'ALTER DATABASE',
52 'DROP',
53 'DROP INDEX',
54 'DROP DATABASE',
55 ];
56
57 private const TRX_VERBS = [
58 'BEGIN',
59 'COMMIT',
60 'ROLLBACK',
61 'SAVEPOINT',
62 'RELEASE SAVEPOINT',
63 'ROLLBACK TO SAVEPOINT',
64 ];
65
66 private static string $queryVerbRegex;
67
74 public static function buildQuery( string $sql, $flags, string $tablePrefix = '' ) {
75 $verb = self::getQueryVerb( $sql );
76
77 if ( ( $flags & self::QUERY_CHANGE_MASK ) == 0 ) {
78 $isWriteQuery = self::isWriteQuery( $sql );
79 if ( $isWriteQuery ) {
80 if ( in_array( $verb, self::SCHEMA_CHANGE_VERBS, true ) ) {
81 $flags |= SQLPlatform::QUERY_CHANGE_SCHEMA;
82 } else {
83 $flags |= SQLPlatform::QUERY_CHANGE_ROWS;
84 }
85 } else {
86 if ( in_array( $verb, self::TRX_VERBS, true ) ) {
87 $flags |= SQLPlatform::QUERY_CHANGE_TRX;
88 } else {
89 $flags |= SQLPlatform::QUERY_CHANGE_NONE;
90 }
91 }
92 }
93
94 return new Query(
95 $sql,
96 $flags,
97 $verb,
98 self::getWriteTable( $sql, $tablePrefix )
99 );
100 }
101
102 private static function isWriteQuery( $rawSql ) {
103 // Treat SELECT queries without FOR UPDATE queries as non-writes. This matches
104 // how MySQL enforces read_only (FOR SHARE and LOCK IN SHADE MODE are allowed).
105 // Handle (SELECT ...) UNION (SELECT ...) queries in a similar fashion.
106 if ( preg_match( '/^\s*\‍(?SELECT\b/i', $rawSql ) ) {
107 return (bool)preg_match( '/\bFOR\s+UPDATE\‍)?\s*$/i', $rawSql );
108 }
109 // BEGIN and COMMIT queries are considered non-write queries here.
110 // Database backends and drivers (MySQL, MariaDB, php-mysqli) generally
111 // treat these as write queries, in that their results have "affected rows"
112 // as meta data as from writes, instead of "num rows" as from reads.
113 // But, we treat them as non-write queries because when reading data (from
114 // either replica or primary DB) we use transactions to enable repeatable-read
115 // snapshots, which ensures we get consistent results from the same snapshot
116 // for all queries within a request. Use cases:
117 // - Treating these as writes would trigger ChronologyProtector (see method doc).
118 // - We use this method to reject writes to replicas, but we need to allow
119 // use of transactions on replicas for read snapshots. This is fine given
120 // that transactions by themselves don't make changes, only actual writes
121 // within the transaction matter, which we still detect.
122 return !preg_match(
123 '/^\s*(BEGIN|ROLLBACK|COMMIT|SAVEPOINT|RELEASE|SET|SHOW|EXPLAIN|USE)\b/i',
124 $rawSql
125 );
126 }
127
132 private static function getQueryVerb( $sql ) {
133 // @phan-suppress-next-line PhanRedundantCondition https://github.com/phan/phan/issues/4720
134 if ( !isset( self::$queryVerbRegex ) ) {
135 $multiwordVerbsRegex = implode( '|', array_map(
136 fn ( $words ) => str_replace( ' ', '\s+', $words ),
138 ) );
139 self::$queryVerbRegex = "/^\s*($multiwordVerbsRegex|[a-z]+)/i";
140 }
141 return preg_match( self::$queryVerbRegex, $sql, $m ) ? strtoupper( $m[1] ) : '';
142 }
143
149 private static function getWriteTable( $sql, $tablePrefix ) {
150 // Regex for basic queries that can create/change/drop temporary tables.
151 // For simplicity, this only looks for tables with sensible alphanumeric names.
152 // Temporary tables only need simple programming names anyway.
153 $regex = <<<REGEX
154 /^
155 (?:
156 (?:INSERT|REPLACE)\s+(?:\w+\s+)*?INTO
157 | UPDATE(?:\s+OR\s+\w+|\s+IGNORE|\s+ONLY)?
158 | DELETE\s+(?:\w+\s+)*?FROM(?:\s+ONLY)?
159 | CREATE\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+NOT\s+EXISTS)?
160 | DROP\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+EXISTS)?
161 | TRUNCATE\s+(?:TEMPORARY\s+)?TABLE
162 | ALTER\s+TABLE
163 ) \s+
164 (\w+|`\w+`|'\w+'|"\w+")
165 /ix
166 REGEX;
167 if ( preg_match( $regex, $sql, $m ) ) {
168 $tableName = trim( $m[1], "\"'`" );
169 if ( str_starts_with( $tableName, $tablePrefix ) ) {
170 $tableName = substr( $tableName, strlen( $tablePrefix ) );
171 }
172 return $tableName;
173 }
174 return null;
175 }
176
185 public static function generalizeSQL( $sql ) {
186 # This does the same as the regexp below would do, but in such a way
187 # as to avoid crashing php on some large strings.
188 # $sql = preg_replace( "/'([^\\\\']|\\\\.)*'|\"([^\\\\\"]|\\\\.)*\"/", "'X'", $sql );
189
190 $sql = str_replace( "\\\\", '', $sql );
191 $sql = str_replace( "\\'", '', $sql );
192 $sql = str_replace( "\\\"", '', $sql );
193 $sql = preg_replace( "/'.*'/s", "'X'", $sql );
194 $sql = preg_replace( '/".*"/s', "'X'", $sql );
195
196 # All newlines, tabs, etc replaced by single space
197 $sql = preg_replace( '/\s+/', ' ', $sql );
198
199 # All numbers => N,
200 # except the ones surrounded by characters, e.g. l10n
201 $sql = preg_replace( '/-?\d+(,-?\d+)+/', 'N,...,N', $sql );
202 $sql = preg_replace( '/(?<![a-zA-Z])-?\d+(?![a-zA-Z])/', 'N', $sql );
203
204 return $sql;
205 }
206}
This is to contain any regex on SQL work and get rid of them eventually.
static buildQuery(string $sql, $flags, string $tablePrefix='')
Holds information on Query to be executed.
Definition Query.php:31
const MULTIWORD_VERBS
The possible multi-word values for getVerb().
Definition Query.php:37