MediaWiki  master
QueryBuilderFromRawSql.php
Go to the documentation of this file.
1 <?php
20 namespace Wikimedia\Rdbms;
21 
23 
36 
43  public static function buildQuery( string $sql, $flags, string $tablePrefix = '' ) {
44  if ( !$flags && !self::isWriteQuery( $sql ) ) {
45  $flags = SQLPlatform::QUERY_CHANGE_NONE;
46  } elseif ( !$flags ) {
47  $flags = SQLPlatform::QUERY_CHANGE_ROWS;
48  }
49  $queryVerb = self::getQueryVerb( $sql );
50  $queryTables = self::getQueryTables( $sql, $tablePrefix );
51  return new Query( $sql, $flags, $queryVerb, $queryTables );
52  }
53 
54  private static function isWriteQuery( $rawSql ) {
55  // Treat SELECT queries without FOR UPDATE queries as non-writes. This matches
56  // how MySQL enforces read_only (FOR SHARE and LOCK IN SHADE MODE are allowed).
57  // Handle (SELECT ...) UNION (SELECT ...) queries in a similar fashion.
58  if ( preg_match( '/^\s*\‍(?SELECT\b/i', $rawSql ) ) {
59  return (bool)preg_match( '/\bFOR\s+UPDATE\‍)?\s*$/i', $rawSql );
60  }
61  // BEGIN and COMMIT queries are considered non-write queries here.
62  // Database backends and drivers (MySQL, MariaDB, php-mysqli) generally
63  // treat these as write queries, in that their results have "affected rows"
64  // as meta data as from writes, instead of "num rows" as from reads.
65  // But, we treat them as non-write queries because when reading data (from
66  // either replica or primary DB) we use transactions to enable repeatable-read
67  // snapshots, which ensures we get consistent results from the same snapshot
68  // for all queries within a request. Use cases:
69  // - Treating these as writes would trigger ChronologyProtector (see method doc).
70  // - We use this method to reject writes to replicas, but we need to allow
71  // use of transactions on replicas for read snapshots. This is fine given
72  // that transactions by themselves don't make changes, only actual writes
73  // within the transaction matter, which we still detect.
74  return !preg_match(
75  '/^\s*(BEGIN|ROLLBACK|COMMIT|SAVEPOINT|RELEASE|SET|SHOW|EXPLAIN|USE)\b/i',
76  $rawSql
77  );
78  }
79 
84  private static function getQueryVerb( $sql ) {
85  // Distinguish ROLLBACK from ROLLBACK TO SAVEPOINT
86  return preg_match(
87  '/^\s*(rollback\s+to\s+savepoint|[a-z]+)/i',
88  $sql,
89  $m
90  ) ? strtoupper( $m[1] ) : '';
91  }
92 
93  private static function getQueryTables( $sql, $tablePrefix ) {
94  // Regexes for basic queries that can create/change/drop temporary tables.
95  // For simplicity, this only looks for tables with sensible, alphanumeric, names;
96  // temporary tables only need simple programming names anyway.
97  static $regexes = null;
98  if ( $regexes === null ) {
99  // Regex with a group for quoted table 0 and a group for quoted tables 1..N
100  $qts = '((?:\w+|`\w+`|\'\w+\'|"\w+")(?:\s*,\s*(?:\w+|`\w+`|\'\w+\'|"\w+"))*)';
101  // Regex to get query verb, table 0, and tables 1..N
102  $regexes = [
103  // DML write queries
104  "/^(INSERT|REPLACE)\s+(?:\w+\s+)*?INTO\s+$qts/i",
105  "/^(UPDATE)(?:\s+OR\s+\w+|\s+IGNORE|\s+ONLY)?\s+$qts/i",
106  "/^(DELETE)\s+(?:\w+\s+)*?FROM(?:\s+ONLY)?\s+$qts/i",
107  // DDL write queries
108  "/^(CREATE)\s+TEMPORARY\s+TABLE(?:\s+IF\s+NOT\s+EXISTS)?\s+$qts/i",
109  "/^(DROP)\s+(?:TEMPORARY\s+)?TABLE(?:\s+IF\s+EXISTS)?\s+$qts/i",
110  "/^(TRUNCATE)\s+(?:TEMPORARY\s+)?TABLE\s+$qts/i",
111  "/^(ALTER)\s+TABLE\s+$qts/i"
112  ];
113  }
114 
115  $queryTables = [];
116  foreach ( $regexes as $regex ) {
117  if ( preg_match( $regex, $sql, $m, PREG_UNMATCHED_AS_NULL ) ) {
118  $allTables = preg_split( '/\s*,\s*/', $m[2] );
119  foreach ( $allTables as $quotedTable ) {
120  $tableName = trim( $quotedTable, "\"'`" );
121  $tableName = preg_replace( '/^' . $tablePrefix . '/', '', $tableName );
122  $queryTables[] = $tableName;
123  }
124  break;
125  }
126  }
127 
128  return $queryTables;
129  }
130 
139  public static function generalizeSQL( $sql ) {
140  # This does the same as the regexp below would do, but in such a way
141  # as to avoid crashing php on some large strings.
142  # $sql = preg_replace( "/'([^\\\\']|\\\\.)*'|\"([^\\\\\"]|\\\\.)*\"/", "'X'", $sql );
143 
144  $sql = str_replace( "\\\\", '', $sql );
145  $sql = str_replace( "\\'", '', $sql );
146  $sql = str_replace( "\\\"", '', $sql );
147  $sql = preg_replace( "/'.*'/s", "'X'", $sql );
148  $sql = preg_replace( '/".*"/s', "'X'", $sql );
149 
150  # All newlines, tabs, etc replaced by single space
151  $sql = preg_replace( '/\s+/', ' ', $sql );
152 
153  # All numbers => N,
154  # except the ones surrounded by characters, e.g. l10n
155  $sql = preg_replace( '/-?\d+(,-?\d+)+/', 'N,...,N', $sql );
156  $sql = preg_replace( '/(?<![a-zA-Z])-?\d+(?![a-zA-Z])/', 'N', $sql );
157 
158  return $sql;
159  }
160 }
This is to contain any regex on SQL work and get rid of them eventually.
static generalizeSQL( $sql)
Removes most variables from an SQL query and replaces them with X or N for numbers.
static buildQuery(string $sql, $flags, string $tablePrefix='')
Holds information on Query to be executed.
Definition: Query.php:31