Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 31 |
|
0.00% |
0 / 6 |
CRAP | |
0.00% |
0 / 1 |
Sanitizer | |
0.00% |
0 / 31 |
|
0.00% |
0 / 6 |
380 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
sanitizeText | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
sanitizeHtml | |
0.00% |
0 / 1 |
|
0.00% |
0 / 1 |
2 | |||
sanitizeAttributes | |
0.00% |
0 / 6 |
|
0.00% |
0 / 1 |
6 | |||
sanitizeUrl | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
42 | |||
unparseUrl | |
0.00% |
0 / 10 |
|
0.00% |
0 / 1 |
72 |
1 | <?php |
2 | /** |
3 | * Sanitizer.php |
4 | * |
5 | * This file is part of the Codex design system, the official design system |
6 | * for Wikimedia projects. It provides the `Sanitizer` class, which is responsible |
7 | * for sanitizing data before rendering. The Sanitizer ensures that all output is safe |
8 | * and helps prevent XSS and other security vulnerabilities. |
9 | * |
10 | * The Sanitizer class includes methods for sanitizing text, HTML content, and HTML attributes. |
11 | * By centralizing the sanitization logic, it adheres to the Single Responsibility Principle |
12 | * and enhances the maintainability and security of the codebase. |
13 | * |
14 | * @category Utility |
15 | * @package Codex\Utility |
16 | * @since 0.1.0 |
17 | * @author Doğu Abaris <abaris@null.net> |
18 | * @license https://www.gnu.org/copyleft/gpl.html GPL-2.0-or-later |
19 | * @link https://doc.wikimedia.org/codex/main/ Codex Documentation |
20 | */ |
21 | |
22 | namespace Wikimedia\Codex\Utility; |
23 | |
24 | use HTMLPurifier; |
25 | |
26 | /** |
27 | * Sanitizer is a class responsible for sanitizing data before rendering. |
28 | * |
29 | * This class provides methods to sanitize text, HTML content, and attributes. |
30 | * It ensures that all data outputted to the user is properly sanitized, preventing XSS |
31 | * and other injection attacks. |
32 | * |
33 | * @category Utility |
34 | * @package Codex\Utility |
35 | * @since 0.1.0 |
36 | * @author Doğu Abaris <abaris@null.net> |
37 | * @license https://www.gnu.org/copyleft/gpl.html GPL-2.0-or-later |
38 | * @link https://doc.wikimedia.org/codex/main/ Codex Documentation |
39 | */ |
40 | class Sanitizer { |
41 | |
42 | /** |
43 | * The HTMLPurifier instance. |
44 | */ |
45 | protected HTMLPurifier $htmlPurifier; |
46 | |
47 | /** |
48 | * Constructor for the Sanitizer class. |
49 | * |
50 | * @param HTMLPurifier $htmlPurifier The HTMLPurifier instance. |
51 | */ |
52 | public function __construct( HTMLPurifier $htmlPurifier ) { |
53 | $this->htmlPurifier = $htmlPurifier; |
54 | } |
55 | |
56 | /** |
57 | * Sanitize a plain text string. |
58 | * |
59 | * This method escapes special HTML characters in a string to prevent XSS attacks. |
60 | * It should be used when the content does not contain any HTML markup and needs |
61 | * to be treated strictly as text. |
62 | * |
63 | * @since 0.1.0 |
64 | * @param string|null $text The plain text to sanitize. |
65 | * @return string The sanitized text. |
66 | */ |
67 | public function sanitizeText( ?string $text ): string { |
68 | return htmlspecialchars( $text ?? '', ENT_QUOTES, 'UTF-8' ); |
69 | } |
70 | |
71 | /** |
72 | * Sanitize HTML content. |
73 | * |
74 | * This method uses HTML Purifier to remove or escape potentially harmful elements |
75 | * and attributes from HTML content, ensuring it is safe for rendering. |
76 | * |
77 | * @since 0.1.0 |
78 | * @param string $html The HTML content to sanitize. |
79 | * @return string The sanitized HTML content. |
80 | */ |
81 | public function sanitizeHtml( string $html ): string { |
82 | return $this->htmlPurifier->purify( $html ); |
83 | } |
84 | |
85 | /** |
86 | * Sanitize an array of HTML attributes. |
87 | * |
88 | * This method escapes both the keys and values of an associative array of attributes |
89 | * to prevent XSS attacks. It should be used for any attributes that will be rendered |
90 | * in HTML elements. |
91 | * |
92 | * @since 0.1.0 |
93 | * @param array $attributes The associative array of attributes to sanitize. |
94 | * @return array The sanitized attributes array. |
95 | */ |
96 | public function sanitizeAttributes( array $attributes ): array { |
97 | $sanitized = []; |
98 | foreach ( $attributes as $key => $value ) { |
99 | $sanitizedKey = htmlspecialchars( $key, ENT_QUOTES, 'UTF-8' ); |
100 | $sanitizedValue = htmlspecialchars( $value, ENT_QUOTES, 'UTF-8' ); |
101 | $sanitized[$sanitizedKey] = $sanitizedValue; |
102 | } |
103 | |
104 | return $sanitized; |
105 | } |
106 | |
107 | /** |
108 | * Sanitize a URL. |
109 | * |
110 | * This method ensures the URL is safe by validating it, removing illegal characters, |
111 | * ensuring it uses an allowed scheme, and properly escaping it for HTML output. |
112 | * |
113 | * @since 0.1.0 |
114 | * @param string|null $url The URL to sanitize. |
115 | * @return string The sanitized URL. |
116 | */ |
117 | public function sanitizeUrl( ?string $url ): string { |
118 | if ( $url === null || $url === '' ) { |
119 | return ''; |
120 | } |
121 | |
122 | $sanitizedUrl = filter_var( $url, FILTER_SANITIZE_URL ); |
123 | |
124 | if ( !filter_var( $sanitizedUrl, FILTER_VALIDATE_URL ) ) { |
125 | return ''; |
126 | } |
127 | |
128 | $parsedUrl = parse_url( $sanitizedUrl ); |
129 | |
130 | $allowedSchemes = [ 'http', 'https' ]; |
131 | if ( |
132 | !isset( $parsedUrl['scheme'] ) || |
133 | !in_array( strtolower( $parsedUrl['scheme'] ), $allowedSchemes, true ) |
134 | ) { |
135 | return ''; |
136 | } |
137 | |
138 | $reconstructedUrl = $this->unparseUrl( $parsedUrl ); |
139 | |
140 | return htmlspecialchars( $reconstructedUrl, ENT_QUOTES, 'UTF-8' ); |
141 | } |
142 | |
143 | /** |
144 | * Helper function to rebuild a URL from its parsed components. |
145 | * |
146 | * @since 0.1.0 |
147 | * @param array $parsedUrl The parsed URL components. |
148 | * @return string The reconstructed URL. |
149 | */ |
150 | private function unparseUrl( array $parsedUrl ): string { |
151 | $scheme = isset( $parsedUrl['scheme'] ) ? $parsedUrl['scheme'] . '://' : ''; |
152 | $host = $parsedUrl['host'] ?? ''; |
153 | $port = isset( $parsedUrl['port'] ) ? ':' . $parsedUrl['port'] : ''; |
154 | $user = $parsedUrl['user'] ?? ''; |
155 | $pass = isset( $parsedUrl['pass'] ) ? ':' . $parsedUrl['pass'] : ''; |
156 | $pass = ( $user || $pass ) ? "$pass@" : ''; |
157 | $path = $parsedUrl['path'] ?? ''; |
158 | $query = isset( $parsedUrl['query'] ) ? '?' . $parsedUrl['query'] : ''; |
159 | $fragment = isset( $parsedUrl['fragment'] ) ? '#' . $parsedUrl['fragment'] : ''; |
160 | |
161 | return "$scheme$user$pass$host$port$path$query$fragment"; |
162 | } |
163 | } |