MediaWiki master
QueryBuilderFromRawSql.php
Go to the documentation of this file.
1<?php
6namespace Wikimedia\Rdbms;
7
9
23 private const QUERY_CHANGE_MASK = (
24 SQLPlatform::QUERY_CHANGE_NONE |
25 SQLPlatform::QUERY_CHANGE_TRX |
26 SQLPlatform::QUERY_CHANGE_ROWS |
27 SQLPlatform::QUERY_CHANGE_SCHEMA |
28 SQLPlatform::QUERY_CHANGE_LOCKS
29 );
30
31 private const SCHEMA_CHANGE_VERBS = [
32 'CREATE',
33 'CREATE TEMPORARY',
34 'CREATE INDEX',
35 'CREATE DATABASE',
36 'ALTER',
37 'ALTER DATABASE',
38 'DROP',
39 'DROP INDEX',
40 'DROP DATABASE',
41 ];
42
43 private const TRX_VERBS = [
44 'BEGIN',
45 'COMMIT',
46 'ROLLBACK',
47 'SAVEPOINT',
48 'RELEASE SAVEPOINT',
49 'ROLLBACK TO SAVEPOINT',
50 ];
51
52 private static string $queryVerbRegex;
53
60 public static function buildQuery( string $sql, $flags, string $tablePrefix = '' ) {
61 $verb = self::getQueryVerb( $sql );
62
63 if ( ( $flags & self::QUERY_CHANGE_MASK ) == 0 ) {
64 $isWriteQuery = self::isWriteQuery( $sql );
65 if ( $isWriteQuery ) {
66 if ( in_array( $verb, self::SCHEMA_CHANGE_VERBS, true ) ) {
67 $flags |= SQLPlatform::QUERY_CHANGE_SCHEMA;
68 } else {
69 $flags |= SQLPlatform::QUERY_CHANGE_ROWS;
70 }
71 } else {
72 if ( in_array( $verb, self::TRX_VERBS, true ) ) {
73 $flags |= SQLPlatform::QUERY_CHANGE_TRX;
74 } else {
75 $flags |= SQLPlatform::QUERY_CHANGE_NONE;
76 }
77 }
78 }
79
80 return new Query(
81 $sql,
82 $flags,
83 $verb,
84 self::getWriteTable( $sql, $tablePrefix )
85 );
86 }
87
88 private static function isWriteQuery( string $rawSql ): bool {
89 // Treat SELECT queries without FOR UPDATE queries as non-writes. This matches
90 // how MySQL enforces read_only (FOR SHARE and LOCK IN SHADE MODE are allowed).
91 // Handle (SELECT ...) UNION (SELECT ...) queries in a similar fashion.
92 if ( preg_match( '/^\s*\‍(?SELECT\b/i', $rawSql ) ) {
93 return (bool)preg_match( '/\bFOR\s+UPDATE\‍)?\s*$/i', $rawSql );
94 }
95 // BEGIN and COMMIT queries are considered non-write queries here.
96 // Database backends and drivers (MySQL, MariaDB, php-mysqli) generally
97 // treat these as write queries, in that their results have "affected rows"
98 // as meta data as from writes, instead of "num rows" as from reads.
99 // But, we treat them as non-write queries because when reading data (from
100 // either replica or primary DB) we use transactions to enable repeatable-read
101 // snapshots, which ensures we get consistent results from the same snapshot
102 // for all queries within a request. Use cases:
103 // - Treating these as writes would trigger ChronologyProtector (see method doc).
104 // - We use this method to reject writes to replicas, but we need to allow
105 // use of transactions on replicas for read snapshots. This is fine given
106 // that transactions by themselves don't make changes, only actual writes
107 // within the transaction matter, which we still detect.
108 return !preg_match(
109 '/^\s*(BEGIN|ROLLBACK|COMMIT|SAVEPOINT|RELEASE|SET|SHOW|EXPLAIN|USE)\b/i',
110 $rawSql
111 );
112 }
113
118 private static function getQueryVerb( $sql ) {
119 // @phan-suppress-next-line PhanRedundantCondition https://github.com/phan/phan/issues/4720
120 if ( !isset( self::$queryVerbRegex ) ) {
121 $multiwordVerbsRegex = implode( '|', array_map(
122 static fn ( $words ) => str_replace( ' ', '\s+', $words ),
124 ) );
125 self::$queryVerbRegex = "/^\s*($multiwordVerbsRegex|[a-z]+)/i";
126 }
127 return preg_match( self::$queryVerbRegex, $sql, $m ) ? strtoupper( $m[1] ) : '';
128 }
129
135 private static function getWriteTable( $sql, $tablePrefix ) {
136 // Regex for basic queries that can create/change/drop temporary tables.
137 // For simplicity, this only looks for tables with sensible alphanumeric names.
138 // Temporary tables only need simple programming names anyway.
139 $regex = <<<REGEX
140 /^
141 (?:
142 (?:INSERT|REPLACE)\s+(?:\w+\s+)*?INTO
143 | UPDATE(?:\s+OR\s+\w+|\s+IGNORE|\s+ONLY)?
144 | DELETE\s+(?:\w+\s+)*?FROM(?:\s+ONLY)?
145 | CREATE\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+NOT\s+EXISTS)?
146 | DROP\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+EXISTS)?
147 | TRUNCATE\s+(?:TEMPORARY\s+)?TABLE
148 | ALTER\s+TABLE
149 ) \s+
150 (\w+|`\w+`|'\w+'|"\w+")
151 /ix
152 REGEX;
153 if ( preg_match( $regex, $sql, $m ) ) {
154 $tableName = trim( $m[1], "\"'`" );
155 if ( str_starts_with( $tableName, $tablePrefix ) ) {
156 $tableName = substr( $tableName, strlen( $tablePrefix ) );
157 }
158 return $tableName;
159 }
160 return null;
161 }
162
171 public static function generalizeSQL( $sql ) {
172 # This does the same as the regexp below would do, but in such a way
173 # as to avoid crashing php on some large strings.
174 # $sql = preg_replace( "/'([^\\\\']|\\\\.)*'|\"([^\\\\\"]|\\\\.)*\"/", "'X'", $sql );
175
176 $sql = str_replace( "\\\\", '', $sql );
177 $sql = str_replace( "\\'", '', $sql );
178 $sql = str_replace( "\\\"", '', $sql );
179 $sql = preg_replace( "/'.*'/s", "'X'", $sql );
180 $sql = preg_replace( '/".*"/s', "'X'", $sql );
181
182 # All newlines, tabs, etc replaced by single space
183 $sql = preg_replace( '/\s+/', ' ', $sql );
184
185 # All numbers => N,
186 # except the ones surrounded by characters, e.g. l10n
187 $sql = preg_replace( '/-?\d++(,-?\d++)++/', 'N,...,N', $sql );
188 $sql = preg_replace( '/(?<![a-zA-Z])-?\d+(?![a-zA-Z])/', 'N', $sql );
189
190 return $sql;
191 }
192}
if(!defined('MW_SETUP_CALLBACK'))
Definition WebStart.php:68
This is to contain any regex on SQL work and get rid of them eventually.
static buildQuery(string $sql, $flags, string $tablePrefix='')
Holds information on Query to be executed.
Definition Query.php:17
const MULTIWORD_VERBS
The possible multi-word values for getVerb().
Definition Query.php:23