Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
86.30% |
63 / 73 |
|
64.29% |
9 / 14 |
CRAP | |
0.00% |
0 / 1 |
DomNavigator | |
86.30% |
63 / 73 |
|
64.29% |
9 / 14 |
30.02 | |
0.00% |
0 / 1 |
__construct | |
80.00% |
8 / 10 |
|
0.00% |
0 / 1 |
3.07 | |||
findElementsWithClass | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
findElementsWithClassPrefix | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
findElementsWithClassAndLang | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
1 | |||
findElementsWithId | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
findElementsWithIdPrefix | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
findElementsWithAttribute | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
hasClass | |
80.00% |
4 / 5 |
|
0.00% |
0 / 1 |
2.03 | |||
getFirstClassWithPrefix | |
87.50% |
7 / 8 |
|
0.00% |
0 / 1 |
4.03 | |||
closest | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
4 | |||
findByXpath | |
37.50% |
3 / 8 |
|
0.00% |
0 / 1 |
2.98 | |||
getByXpath | |
75.00% |
3 / 4 |
|
0.00% |
0 / 1 |
2.06 | |||
nextElementSibling | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
3 | |||
handleElementOrList | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 |
1 | <?php |
2 | |
3 | namespace CommonsMetadata; |
4 | |
5 | use DOMDocument; |
6 | use DOMElement; |
7 | use DOMNode; |
8 | use DOMNodeList; |
9 | use DOMXPath; |
10 | |
11 | /** |
12 | * A very simple wrapper to DOMDocument to make it easy to traverse nodes which match |
13 | * simple CSS selectors. |
14 | */ |
15 | class DomNavigator { |
16 | /** |
17 | * The document to search through. |
18 | * @var DOMXPath |
19 | */ |
20 | protected $domx; |
21 | |
22 | /** |
23 | * @param string $html |
24 | */ |
25 | public function __construct( $html ) { |
26 | $oldLoaderState = false; |
27 | if ( LIBXML_VERSION < 20900 ) { |
28 | $oldLoaderState = libxml_disable_entity_loader( true ); |
29 | } |
30 | $oldHandlerState = libxml_use_internal_errors( true ); |
31 | $dom = new DOMDocument(); |
32 | $dom->loadHTML( '<!doctype html><html><head><meta charset="UTF-8"/></head><body>' . $html . '</body></html>' ); |
33 | $this->domx = new DOMXPath( $dom ); |
34 | if ( LIBXML_VERSION < 20900 ) { |
35 | libxml_disable_entity_loader( $oldLoaderState ); |
36 | } |
37 | libxml_use_internal_errors( $oldHandlerState ); |
38 | } |
39 | |
40 | /** |
41 | * Returns a list of elements of the given type which have the given class. |
42 | * (In other words, this is equivalent to the CSS selector 'element.class'.) |
43 | * @param string|array $element HTML tag name (* to accept all) or array of tag names |
44 | * @param string $class |
45 | * @param DOMNode|null $context if present, the method will only search inside this element |
46 | * @return DOMNodeList|DOMElement[] |
47 | */ |
48 | public function findElementsWithClass( $element, $class, DOMNode $context = null ) { |
49 | $element = $this->handleElementOrList( $element ); |
50 | $xpath = "./descendant-or-self::{$element}" . |
51 | "[contains(concat(' ', normalize-space(@class), ' '), ' $class ')]"; |
52 | return $this->findByXpath( $xpath, $context ); |
53 | } |
54 | |
55 | /** |
56 | * Returns a list of elements of the given type which have a class starting with the given |
57 | * string. |
58 | * @param string|array $element HTML tag name (* to accept all) or array of tag names |
59 | * @param string $classPrefix |
60 | * @param DOMNode|null $context if present, the method will only search inside this element |
61 | * @return DOMNodeList|DOMElement[] |
62 | */ |
63 | public function findElementsWithClassPrefix( $element, $classPrefix, DOMNode $context = null ) { |
64 | $element = $this->handleElementOrList( $element ); |
65 | $xpath = "./descendant-or-self::{$element}" . |
66 | "[contains(concat(' ', normalize-space(@class)), ' $classPrefix')]"; |
67 | return $this->findByXpath( $xpath, $context ); |
68 | } |
69 | |
70 | /** |
71 | * Returns a list of elements of the given type which have the given class and any lang |
72 | * attribute. (In other words, this is equivalent to the CSS selector 'element.class[lang]'.) |
73 | * @param string|array $element HTML tag name (* to accept all) or array of tag names |
74 | * @param string $class |
75 | * @param DOMNode|null $context if present, the method will only search inside this element |
76 | * @return DOMNodeList|DOMElement[] |
77 | */ |
78 | public function findElementsWithClassAndLang( $element, $class, DOMNode $context = null ) { |
79 | $element = $this->handleElementOrList( $element ); |
80 | $xpath = "./descendant-or-self::{$element}" . |
81 | "[@lang and contains(concat(' ', normalize-space(@class), ' '), ' $class ')]"; |
82 | return $this->findByXpath( $xpath, $context ); |
83 | } |
84 | |
85 | /** |
86 | * Returns a list of elements of the given type which have the given id. |
87 | * (In other words, this is equivalent to the CSS selector 'element#id'.) |
88 | * When there are multiple elements with this ID, all are returned. |
89 | * @param string|array $element HTML tag name (* to accept all) or array of tag names |
90 | * @param string $id |
91 | * @param DOMNode|null $context if present, the method will only search inside this element |
92 | * @return DOMNodeList|DOMElement[] |
93 | */ |
94 | public function findElementsWithId( $element, $id, DOMNode $context = null ) { |
95 | $element = $this->handleElementOrList( $element ); |
96 | $xpath = "./descendant-or-self::{$element}[@id='$id']"; |
97 | return $this->findByXpath( $xpath, $context ); |
98 | } |
99 | |
100 | /** |
101 | * Returns a list of elements of the given type which have an id starting with the given prefix. |
102 | * (In other words, this is equivalent to the CSS selector 'element[id^=prefix]'.) |
103 | * @param string|array $element HTML tag name (* to accept all) or array of tag names |
104 | * @param string $idPrefix |
105 | * @param DOMNode|null $context if present, the method will only search inside this element |
106 | * @return DOMNodeList|DOMElement[] |
107 | */ |
108 | public function findElementsWithIdPrefix( $element, $idPrefix, DOMNode $context = null ) { |
109 | $element = $this->handleElementOrList( $element ); |
110 | $xpath = "./descendant-or-self::{$element}[starts-with(@id, '$idPrefix')]"; |
111 | return $this->findByXpath( $xpath, $context ); |
112 | } |
113 | |
114 | /** |
115 | * Returns a list of elements of the given type which have the given attribute with any value. |
116 | * (In other words, this is equivalent to the CSS selector 'element[attribute]'.) |
117 | * When there are multiple elements with this attribute, all are returned. |
118 | * @param string|array $element HTML tag name (* to accept all) or array of tag names |
119 | * @param string $attribute |
120 | * @param DOMNode|null $context if present, the method will only search inside this element |
121 | * @return DOMNodeList|DOMElement[] |
122 | */ |
123 | public function findElementsWithAttribute( $element, $attribute, DOMNode $context = null ) { |
124 | $element = $this->handleElementOrList( $element ); |
125 | $xpath = "./descendant-or-self::{$element}[@{$attribute}]"; |
126 | return $this->findByXpath( $xpath, $context ); |
127 | } |
128 | |
129 | /** |
130 | * Returns true if the node has all the specified classes. |
131 | * @param DOMNode $node |
132 | * @param string $classes one or more class names (separated with space) |
133 | * @return bool |
134 | */ |
135 | public function hasClass( DOMNode $node, $classes ) { |
136 | if ( !$node instanceof \DOMElement ) { |
137 | return false; |
138 | } |
139 | $nodeClasses = explode( ' ', $node->getAttribute( 'class' ) ); |
140 | $testClasses = explode( ' ', $classes ); |
141 | return !array_diff( $testClasses, $nodeClasses ); |
142 | } |
143 | |
144 | /** |
145 | * Returns the first class matching a prefix. |
146 | * @param DOMNode $node |
147 | * @param string $classPrefix |
148 | * @return string|null |
149 | */ |
150 | public function getFirstClassWithPrefix( DOMNode $node, $classPrefix ) { |
151 | if ( !$node instanceof \DOMElement ) { |
152 | return null; |
153 | } |
154 | $classes = explode( ' ', $node->getAttribute( 'class' ) ); |
155 | foreach ( $classes as $class ) { |
156 | $length = strlen( $classPrefix ); |
157 | if ( substr( $class, 0, $length ) === $classPrefix ) { |
158 | return $class; |
159 | } |
160 | } |
161 | return null; |
162 | } |
163 | |
164 | /** |
165 | * Returns the closest ancestor of the given node, which is of the given type |
166 | * (like jQuery.closest()) |
167 | * @param DOMNode $node |
168 | * @param string $element HTML tag name |
169 | * @return DOMElement|null |
170 | */ |
171 | public function closest( DOMNode $node, $element ) { |
172 | while ( !$node instanceof DOMElement || $node->nodeName !== $element ) { |
173 | if ( $node->parentNode instanceof DOMNode ) { |
174 | $node = $node->parentNode; |
175 | } else { |
176 | return null; |
177 | } |
178 | } |
179 | // @phan-suppress-next-line PhanTypeMismatchReturnSuperType |
180 | return $node; |
181 | } |
182 | |
183 | /** |
184 | * Returns the nodes matching an XPath expression. |
185 | * @param string $xpath |
186 | * @param DOMNode|null $context |
187 | * @return DOMNodeList|DOMNode[] |
188 | */ |
189 | public function findByXpath( $xpath, DOMNode $context = null ) { |
190 | $results = $this->domx->query( $xpath, $context ); |
191 | if ( $results === false ) { |
192 | $error = libxml_get_last_error(); |
193 | $logMessage = sprintf( 'HTML parsing error: %s (%s) at line %s, columnt %s', |
194 | $error->message, $error->code, $error->line, $error->column ); |
195 | wfDebugLog( 'CommonsMetadata', $logMessage ); |
196 | return new DOMNodeList(); |
197 | } |
198 | return $results; |
199 | } |
200 | |
201 | /** |
202 | * Returns the first node matching an XPath expression, or null. |
203 | * @param string $xpath |
204 | * @param DOMNode|null $context |
205 | * @return DOMNode|null |
206 | */ |
207 | public function getByXpath( $xpath, DOMNode $context = null ) { |
208 | $results = $this->findByXpath( $xpath, $context ); |
209 | foreach ( $results as $result ) { |
210 | return $result; |
211 | } |
212 | return null; |
213 | } |
214 | |
215 | /** |
216 | * Return next sibling element (or null) |
217 | * @param DOMElement $node |
218 | * @return DOMElement|null |
219 | */ |
220 | public function nextElementSibling( DOMElement $node ) { |
221 | $nextSibling = $node->nextSibling; |
222 | while ( $nextSibling && !$nextSibling instanceof DOMElement ) { |
223 | $nextSibling = $nextSibling->nextSibling; |
224 | } |
225 | return $nextSibling; |
226 | } |
227 | |
228 | /** |
229 | * Takes an element name or array of element names and returns an XPath expression which can |
230 | * be used as an element name, but matches all of the provided elements. |
231 | * @param string|array $elmementOrList |
232 | * @return string |
233 | */ |
234 | protected function handleElementOrList( $elmementOrList ) { |
235 | if ( is_array( $elmementOrList ) ) { |
236 | return '*[' . implode( ' or ', array_map( |
237 | static function ( $el ) { |
238 | return 'self::' . $el; |
239 | }, |
240 | $elmementOrList |
241 | ) ) . ']'; |
242 | } else { |
243 | return $elmementOrList; |
244 | } |
245 | } |
246 | |
247 | } |