Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 72 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
ParsoidClient | |
0.00% |
0 / 72 |
|
0.00% |
0 / 5 |
110 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 3 |
|
0.00% |
0 / 1 |
6 | |||
parse | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
12 | |||
cacheGet | |
0.00% |
0 / 12 |
|
0.00% |
0 / 1 |
6 | |||
cachePut | |
0.00% |
0 / 8 |
|
0.00% |
0 / 1 |
2 | |||
fetchParse | |
0.00% |
0 / 37 |
|
0.00% |
0 / 1 |
6 |
1 | <?php |
2 | /** |
3 | * @section LICENSE |
4 | * This file is part of Wikimedia Slim application library |
5 | * |
6 | * Wikimedia Slim application library is free software: you can |
7 | * redistribute it and/or modify it under the terms of the GNU General Public |
8 | * License as published by the Free Software Foundation, either version 3 of |
9 | * the License, or (at your option) any later version. |
10 | * |
11 | * Wikimedia Slim application library is distributed in the hope that it |
12 | * will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty |
13 | * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU General Public License along |
17 | * with Wikimedia Grants Review application. If not, see |
18 | * <http://www.gnu.org/licenses/>. |
19 | * |
20 | * @file |
21 | * @copyright © 2016 Bryan Davis, Wikimedia Foundation and contributors. |
22 | */ |
23 | |
24 | namespace Wikimedia\Slimapp; |
25 | |
26 | use Psr\Log\LoggerInterface; |
27 | use Psr\Log\NullLogger; |
28 | |
29 | /** |
30 | * Simple client for sending wikitext to RESTBase to be converted into html. |
31 | * |
32 | * The class name predates the switch from Parsoid to RESTBase as the backing |
33 | * API provider. RESTBase still talks to Parsoid under the covers. |
34 | * |
35 | * @author Bryan Davis <bd808@wikimedia.org> |
36 | * @copyright © 2016 Bryan Davis, Wikimedia Foundation and contributors. |
37 | * @see https://www.mediawiki.org/wiki/RESTBase |
38 | * @see https://en.wikipedia.org/api/rest_v1/ |
39 | */ |
40 | class ParsoidClient { |
41 | |
42 | /** |
43 | * @var string |
44 | */ |
45 | protected $url; |
46 | |
47 | /** |
48 | * @var string |
49 | */ |
50 | protected $cache; |
51 | |
52 | /** |
53 | * @var LoggerInterface |
54 | */ |
55 | protected $logger; |
56 | |
57 | /** |
58 | * @param string $url URL to RESTBase /transform/wikitext/to/html API |
59 | * @param string $cache Cache directory |
60 | * @param LoggerInterface $logger Log channel |
61 | */ |
62 | public function __construct( $url, $cache, $logger = null ) { |
63 | $this->logger = $logger ?: new NullLogger(); |
64 | $this->url = $url; |
65 | $this->cache = $cache; |
66 | } |
67 | |
68 | /** |
69 | * @param string $text Wikitext |
70 | * @return string Parsed text |
71 | */ |
72 | public function parse( $text ) { |
73 | $this->logger->debug( 'Parsing [{text}]', [ |
74 | 'method' => __METHOD__, |
75 | 'text' => $text, |
76 | ] ); |
77 | $key = sha1( $text ); |
78 | $parsed = $this->cacheGet( $key ); |
79 | if ( $parsed === null ) { |
80 | $parsed = $this->fetchParse( $text ); |
81 | if ( $parsed === false ) { |
82 | // return raw text if fetch fails |
83 | $parsed = htmlspecialchars( $text, ENT_QUOTES, 'UTF-8' ); |
84 | } else { |
85 | $this->cachePut( $key, $parsed ); |
86 | } |
87 | } |
88 | return $parsed; |
89 | } |
90 | |
91 | /** |
92 | * @param string $key Cache key |
93 | * @return string Cached parse result |
94 | */ |
95 | protected function cacheGet( $key ) { |
96 | $file = "{$this->cache}/{$key}.restbase"; |
97 | if ( file_exists( $file ) ) { |
98 | $this->logger->debug( 'Cache hit for [{key}]', [ |
99 | 'method' => __METHOD__, |
100 | 'key' => $key, |
101 | ] ); |
102 | return file_get_contents( $file ); |
103 | } |
104 | $this->logger->info( 'Cache miss for [{key}]', [ |
105 | 'method' => __METHOD__, |
106 | 'key' => $key, |
107 | ] ); |
108 | return null; |
109 | } |
110 | |
111 | /** |
112 | * @param string $key Cache key |
113 | * @param string $value Parse result |
114 | */ |
115 | protected function cachePut( $key, $value ) { |
116 | $file = "{$this->cache}/{$key}.restbase"; |
117 | file_put_contents( $file, $value ); |
118 | $this->logger->info( 'Cache put for [{key}]', [ |
119 | 'method' => __METHOD__, |
120 | 'key' => $key, |
121 | 'file' => $file, |
122 | 'value' => $value, |
123 | ] ); |
124 | } |
125 | |
126 | /** |
127 | * @param string $text |
128 | * @return string|bool False on failure, html otherwise |
129 | */ |
130 | protected function fetchParse( $text ) { |
131 | $parms = [ |
132 | 'wikitext' => $text, |
133 | 'body_only' => 'true', |
134 | ]; |
135 | $ch = curl_init(); |
136 | curl_setopt( $ch, CURLOPT_URL, $this->url ); |
137 | curl_setopt( $ch, CURLOPT_POST, true ); |
138 | curl_setopt( $ch, CURLOPT_POSTFIELDS, $parms ); |
139 | curl_setopt( $ch, CURLOPT_ENCODING, '' ); |
140 | curl_setopt( $ch, CURLOPT_RETURNTRANSFER, true ); |
141 | curl_setopt( $ch, CURLOPT_USERAGENT, 'Wikimedia Slimapp' ); |
142 | curl_setopt( $ch, CURLOPT_HTTPHEADER, [ |
143 | 'Accept: text/html; charset=utf-8; profile="https://www.mediawiki.org/wiki/Specs/HTML/1.2.1"', |
144 | ] ); |
145 | $stderr = fopen( 'php://temp', 'rw+' ); |
146 | curl_setopt( $ch, CURLOPT_VERBOSE, true ); |
147 | curl_setopt( $ch, CURLOPT_STDERR, $stderr ); |
148 | $body = curl_exec( $ch ); |
149 | rewind( $stderr ); |
150 | $this->logger->debug( 'RESTBase curl request', [ |
151 | 'method' => __METHOD__, |
152 | 'url' => $this->url, |
153 | 'parms' => $parms, |
154 | 'stderr' => stream_get_contents( $stderr ), |
155 | ] ); |
156 | if ( $body === false ) { |
157 | $this->logger->error( 'Curl error #{errno}: {error}', [ |
158 | 'method' => __METHOD__, |
159 | 'errno' => curl_errno( $ch ), |
160 | 'error' => curl_error( $ch ), |
161 | 'url' => $this->url, |
162 | 'parms' => $parms, |
163 | ] ); |
164 | curl_close( $ch ); |
165 | return false; |
166 | } |
167 | curl_close( $ch ); |
168 | |
169 | // Using a regex to parse html is generally not a sane thing to do, |
170 | // but in this case we are trusting RESTBase to be returning clean HTML |
171 | // and all we want to do is unwrap our payload from the |
172 | // <body>...</body> tag. |
173 | return preg_replace( '@^.*<body[^>]+>(.*)</body>.*$@s', '$1', $body ); |
174 | } |
175 | } |