Code Coverage for /workspace/src/extensions/Collection/includes/BookRenderer.php

	Code Coverage
	Lines			Functions and Methods				Classes and Traits
Total	83.16% covered (warning)	83.16%	163 / 196	77.78% covered (warning)	77.78%	7 / 9	CRAP	0.00% covered (danger)	0.00%	0 / 1
BookRenderer	83.16% covered (warning)	83.16%	163 / 196	77.78% covered (warning)	77.78%	7 / 9	40.85	0.00% covered (danger)	0.00%	0 / 1
__construct	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
getBookTemplateData	83.33% covered (warning)	83.33%	95 / 114	0.00% covered (danger)	0.00%	0 / 1	17.19
renderBook	100.00% covered (success)	100.00%	2 / 2	100.00% covered (success)	100.00%	1 / 1	1
getBookChapterData	100.00% covered (success)	100.00%	7 / 7	100.00% covered (success)	100.00%	1 / 1	1
getArticleChaptersData	100.00% covered (success)	100.00%	18 / 18	100.00% covered (success)	100.00%	1 / 1	3
getAdditionalBookChapters	46.15% covered (danger)	46.15%	12 / 26	0.00% covered (danger)	0.00%	0 / 1	4.41
getBodyContents	100.00% covered (success)	100.00%	1 / 1	100.00% covered (success)	100.00%	1 / 1	1
getNestedOutline	100.00% covered (success)	100.00%	15 / 15	100.00% covered (success)	100.00%	1 / 1	3
fixTemplateData	100.00% covered (success)	100.00%	12 / 12	100.00% covered (success)	100.00%	1 / 1	6

1	<?php
2
3	namespace MediaWiki\Extension\Collection;
4
5	use LogicException;
6	use MediaWiki\Html\Html;
7	use MediaWiki\Html\TemplateParser;
8	use MediaWiki\Parser\Sanitizer;
9	use MediaWiki\Title\Title;
10
11	/**
12	* Renders HTML view of a book by concatenating and transforming HTML and generating some
13	* leading/trailing pages.
14	*/
15	class BookRenderer {
16
17	/** @var TemplateParser */
18	private $templateParser;
19
20	/**
21	* @param TemplateParser $templateParser
22	*/
23	public function __construct( TemplateParser $templateParser ) {
24	$this->templateParser = $templateParser;
25	}
26
27	/**
28	* Generate the concatenated page.
29	* @param array[] $collection as returned by
30	* CollectionSession::getCollection().
31	* @param string[] $pages Map of prefixed DB key => Parsoid HTML.
32	* @param array[] $metadata Map of prefixed DB key => metadata, as returned by fetchMetadata().
33	* Section data will be updated to account for heading level and id changes.
34	* @return array with keys html representing the data needed to render the book
35	*/
36	public function getBookTemplateData( $collection, $pages, $metadata ) {
37	$hasChapters = array_filter( $collection['items'], static function ( $item ) {
38	return $item['type'] === 'chapter';
39	} ) !== [];
40	$articleCount = count( array_filter( $collection['items'], static function ( $item ) {
41	return $item['type'] === 'article';
42	} ) );
43	$hasArticles = $articleCount > 0;
44
45	$headingCounter = new HeadingCounter();
46	$bookBodyHtml = '';
47	$title = $collection['title'];
48	$items = $collection['items'];
49	'@phan-var array[] $items';
50	$tocHeadingCounter = new HeadingCounter();
51	$outline = [];
52
53	// First we need to render the articles as we can't know the TOC anchors for sure
54	// until we have resolved id conflicts.
55	// FastFormatter chokes on Parsoid HTML. HtmlFormatter is still plenty fast anyway.
56	$formatter = new \Wikimedia\RemexHtml\Serializer\HtmlFormatter();
57	$serializer = new \Wikimedia\RemexHtml\Serializer\Serializer( $formatter );
58	$munger = new RemexCollectionMunger( $serializer, [
59	'topHeadingLevel' => $hasChapters ? 3 : 2,
60	] );
61	foreach ( $items as $item ) {
62	$titleText = $item['title'];
63	if ( $item['type'] === 'chapter' ) {
64	$outline[] = $this->getBookChapterData( $titleText, $tocHeadingCounter );
65	$bookBodyHtml .= Html::element( 'h1', [
66	'id' => 'mw-book-chapter-' . Sanitizer::escapeIdForAttribute( $titleText ),
67	'class' => 'mw-book-chapter',
68	'data-mw-sectionnumber' => $headingCounter->incrementAndGet( -2 ),
69	], $titleText ) . "\n";
70	} elseif ( $item['type'] === 'article' ) {
71	$title = Title::newFromText( $titleText );
72	$dbkey = $title ? $title->getPrefixedDBkey() : $titleText;
73	$html = $this->getBodyContents( $pages[$dbkey] );
74
75	$headingAttribs = [
76	'id' => 'mw-book-article-' . $dbkey,
77	'class' => 'mw-book-article',
78	];
79	$mungerOptions = [];
80	if ( $articleCount > 1 ) {
81	$mungerOptions['sectionNumberPrefix'] = $headingAttribs['data-mw-sectionnumber']
82	= $headingCounter->incrementAndGet( -1 );
83	}
84	$bookBodyHtml .= Html::rawElement( 'h2', $headingAttribs,
85	$metadata['displaytitle'][$dbkey] ) . "\n";
86
87	$munger->startCollectionSection( './' . $dbkey, $metadata['sections'][$dbkey],
88	$headingCounter );
89	$treeBuilder = new \Wikimedia\RemexHtml\TreeBuilder\TreeBuilder( $munger, [] );
90	$dispatcher = new \Wikimedia\RemexHtml\TreeBuilder\Dispatcher( $treeBuilder );
91	$tokenizer = new \Wikimedia\RemexHtml\Tokenizer\Tokenizer( $dispatcher, $html, [
92	// HTML comes from Parsoid so we can skip validation
93	'ignoreErrors' => true,
94	'ignoreCharRefs' => true,
95	'ignoreNulls' => true,
96	'skipPreprocess' => true,
97	] );
98	$tokenizer->execute( [
99	'fragmentNamespace' => \Wikimedia\RemexHtml\HTMLData::NS_HTML,
100	'fragmentName' => 'body',
101	] );
102	$outline = array_merge( $outline,
103	$this->getArticleChaptersData( $dbkey, $tocHeadingCounter,
104	$metadata['displaytitle'], $metadata['sections'], $articleCount )
105	);
106	$bookBodyHtml .= Html::openElement( 'article' )
107	. substr( $serializer->getResult(), 15 ) // strip "<!DOCTYPE html>"
108	. Html::closeElement( 'article' );
109	} else {
110	throw new LogicException( 'Unknown collection item type: ' . $item['type'] );
111	}
112	}
113
114	if ( $hasChapters ) {
115	$metadataLevel = -2;
116	} elseif ( $articleCount > 1 ) {
117	$metadataLevel = -1;
118	} else {
119	$metadataLevel = 0;
120	}
121	$hasImages = isset( $metadata['images'] ) && $metadata['images'];
122	$hasLicense = isset( $metadata['license'] ) && $metadata['license'];
123
124	if ( $hasArticles ) {
125	$outline = array_merge( $outline,
126	$this->getAdditionalBookChapters( $tocHeadingCounter, $metadataLevel,
127	$hasImages, $hasLicense )
128	);
129	}
130
131	$templateData = [
132	'toc' => [
133	'title' => $collection['title'],
134	'subtitle' => $collection['subtitle'] ?? false,
135	'toctitle' => wfMessage( 'coll-toc-title' )->text(),
136	'tocitems' => $this->getNestedOutline( $outline ),
137	],
138	'html' => $bookBodyHtml,
139	];
140
141	if ( $hasArticles ) {
142	$templateData['contributors'] = [
143	'names' => array_keys( $metadata['contributors'] ),
144	'headingMsg' => wfMessage( 'coll-contributors-title' )->text(),
145	'level' => $headingCounter->incrementAndGetTopLevel(),
146	];
147	} else {
148	$templateData['contributors'] = false;
149	}
150	if ( $hasImages ) {
151	$messages = [
152	'sourceMsg' => wfMessage( 'coll-images-source' )->text(),
153	'licenseMsg' => wfMessage( 'coll-images-license' )->text(),
154	'artistMsg' => wfMessage( 'coll-images-original-artist' )->text()
155	];
156	// Mustache templates in Lightncandy are not able to access template data in parent object
157	// to circumvent that we have to repeat the common messages across all the items.
158	$images = [];
159	foreach ( $metadata['images'] as $image ) {
160	$images[] = array_merge( $image, $messages );
161	}
162
163	$templateData['images'] = [
164	'images' => $images,
165	'headingMsg' => wfMessage( 'coll-images-title' )->text(),
166	'level' => $headingCounter->incrementAndGetTopLevel(),
167	];
168	} else {
169	$templateData['images'] = false;
170	}
171	if ( $hasLicense ) {
172	$templateData['license'] = [
173	'license' => $metadata['license'],
174	'headingMsg' => wfMessage( 'coll-license-title' )->text(),
175	'level' => $headingCounter->incrementAndGetTopLevel(),
176	];
177	} else {
178	$templateData['license'] = false;
179	}
180	return $templateData;
181	}
182
183	/**
184	* Generate the concatenated page.
185	* @param array[] $collection Collection, as returned by CollectionSession::getCollection().
186	* @param string[] $pages Map of prefixed DB key => Parsoid HTML.
187	* @param array[] &$metadata Map of prefixed DB key => metadata, as returned by fetchMetadata().
188	* Section data will be updated to account for heading level and id changes.
189	* Also, an outline will be added (see getBookTemplateData() for format).
190	* @return string HTML of the rendered book (without body/head).
191	*/
192	public function renderBook( $collection, $pages, &$metadata ) {
193	$book = $this->getBookTemplateData( $collection, $pages, $metadata );
194	return $this->templateParser->processTemplate( 'book', $this->fixTemplateData( $book ) );
195	}
196
197	/**
198	* Generate template data for outline chapter
199	* @param string $titleText for book
200	* @param HeadingCounter $tocHeadingCounter
201	* @return array
202	*/
203	private function getBookChapterData( $titleText, $tocHeadingCounter ) {
204	return [
205	'text' => htmlspecialchars( $titleText, ENT_QUOTES ),
206	'type' => 'chapter',
207	'level' => -2,
208	'anchor' => 'mw-book-chapter-' . Sanitizer::escapeIdForAttribute( $titleText ),
209	'number' => $tocHeadingCounter->incrementAndGet( -2 ),
210	];
211	}
212
213	/**
214	* Generate template data for the chapters in the given article
215	* @param string $dbkey to extract sections for
216	* @param HeadingCounter $tocHeadingCounter
217	* @param array[] $displayTitles mapping dbkeys to display titles for the book
218	* @param array[] $sections Section data; each section is a triple
219	* [ title => ..., id => ..., level => ... ]. RemexCollectionMunger will update the id/level
220	* to keep in sync with document changes.
221	* @param int $articleCount number of articles in the book
222	* @return array
223	*/
224	private function getArticleChaptersData(
225	$dbkey, $tocHeadingCounter, $displayTitles, $sections, $articleCount
226	) {
227	$chapters = [];
228
229	if ( $articleCount > 1 ) {
230	$chapters[] = [
231	'text' => $displayTitles[$dbkey],
232	'type' => 'article',
233	'level' => -1,
234	'anchor' => 'mw-book-article-' . $dbkey,
235	'number' => $tocHeadingCounter->incrementAndGet( -1 ),
236	];
237	}
238	foreach ( $sections[$dbkey] as $section ) {
239	'@phan-var array $section';
240	$chapters[] = [
241	'text' => $section['title'],
242	'type' => 'section',
243	'level' => $section['level'],
244	'anchor' => $section['id'],
245	'number' => $tocHeadingCounter->incrementAndGet( $section['level'] ),
246	];
247	}
248	return $chapters;
249	}
250
251	/**
252	* Generate template data for any additional chapters in the given article
253	* @param HeadingCounter $tocHeadingCounter
254	* @param int $metadataLevel the table of contents level for a given article
255	* @param bool $hasImages whether the book contains images section
256	* @param bool $hasLicense whether the book contains a license section
257	* @return array[]
258	*/
259	private function getAdditionalBookChapters(
260	$tocHeadingCounter, $metadataLevel, $hasImages = false, $hasLicense = false
261	) {
262	$outline = [
263	[
264	'text' => wfMessage( 'coll-contributors-title' )->text(),
265	'type' => 'contributors',
266	'level' => $metadataLevel,
267	'anchor' => 'mw-book-contributors',
268	'number' => $tocHeadingCounter->incrementAndGetTopLevel(),
269	],
270	];
271	if ( $hasImages ) {
272	$outline[] = [
273	'text' => wfMessage( 'coll-images-title' )->text(),
274	'type' => 'images',
275	'level' => $metadataLevel,
276	'anchor' => 'mw-book-images',
277	'number' => $tocHeadingCounter->incrementAndGetTopLevel(),
278	];
279	}
280	if ( $hasLicense ) {
281	$outline[] = [
282	'text' => wfMessage( 'coll-license-title' )->text(),
283	'type' => 'license',
284	'level' => $metadataLevel,
285	'anchor' => 'mw-book-license',
286	'number' => $tocHeadingCounter->incrementAndGetTopLevel(),
287	];
288	}
289	return $outline;
290	}
291
292	/**
293	* Get the part inside the <body> from an HTML file.
294	* Not very robust (a <body> tag in a comment or CDATA section could confuse it) but the
295	* <head> section has no user-controlled part so using it with Parsoid HTML should be fine.
296	* @param string $html
297	* @return string
298	*/
299	private function getBodyContents( $html ) {
300	return preg_replace( '/(^.?<body\b[^>]>)\|(<\/body>\s<\/html>\s$)/si', '', $html );
301	}
302
303	/**
304	* Turns a flat outline into a nested outline. Each outline item will contain
305	* a field called 'children' which as an array of child outline items.
306	* @param array[] $outline An outline, as constructed by getBookTemplateData().
307	* @return array[]
308	*/
309	public function getNestedOutline( array $outline ) {
310	$nestedOutline = [];
311	$lastItems = []; // level => last (currently open) item on that level
312	foreach ( $outline as &$item ) {
313	$item['children'] = [];
314
315	$level = $item['level'];
316	$lastItems = array_filter( $lastItems, static function ( $key ) use ( $level ) {
317	return $key < $level;
318	}, ARRAY_FILTER_USE_KEY );
319
320	if ( $lastItems ) {
321	end( $lastItems );
322	$key = key( $lastItems );
323	$lastItems[$key]['children'][] = &$item;
324	} else {
325	$nestedOutline[] = &$item;
326	}
327	$lastItems[$level] = &$item;
328	}
329	return $nestedOutline;
330	}
331
332	/**
333	* Fix a data array for Mustache.
334	* Mustache is too stupid to be able to handle conditional pre/postfixes for
335	* arrays (e.g. do not wrap into <ul> when the array of list items is empty).
336	* The lightncandy implementation is too stupid to even do that for non-arrays.)
337	* Add a 'foo?' field for every 'foo', which casts it to boolean.
338	* @param array $data
339	* @return array
340	*/
341	public function fixTemplateData( $data ) {
342	$fixedData = [];
343	if ( !is_array( $data ) ) {
344	return $data;
345	}
346	foreach ( $data as $field => $value ) {
347	// treat 0/'0' as truthy
348	if ( !is_bool( $value ) ) {
349	$fixedData[$field . '?'] = !in_array( $value, [ false, [], '' ], true );
350	}
351	if ( is_array( $value ) ) {
352	if ( array_keys( $value ) === array_keys( array_values( $value ) ) ) {
353	// consecutive numeric keys - treat as an array
354	$fixedData[$field] = array_map( [ $this, 'fixTemplateData' ], $value );
355	} else {
356	// treat as a hash
357	$fixedData[$field] = $this->fixTemplateData( $value );
358	}
359	} else {
360	$fixedData[$field] = $value;
361	}
362	}
363	return $fixedData;
364	}
365
366	}