Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
0.00% |
0 / 95 |
|
0.00% |
0 / 5 |
CRAP | |
0.00% |
0 / 1 |
ListenMetricsEntryFileJournal | |
0.00% |
0 / 95 |
|
0.00% |
0 / 5 |
420 | |
0.00% |
0 / 1 |
__construct | |
0.00% |
0 / 2 |
|
0.00% |
0 / 1 |
2 | |||
appendEntry | |
0.00% |
0 / 14 |
|
0.00% |
0 / 1 |
6 | |||
archiveCurrentMetricsJournal | |
0.00% |
0 / 59 |
|
0.00% |
0 / 1 |
90 | |||
getCurrentMetricsJournalFile | |
0.00% |
0 / 4 |
|
0.00% |
0 / 1 |
6 | |||
flockWithTimeout | |
0.00% |
0 / 16 |
|
0.00% |
0 / 1 |
42 |
1 | <?php |
2 | |
3 | namespace MediaWiki\Wikispeech\Api; |
4 | |
5 | /** |
6 | * @file |
7 | * @ingroup API |
8 | * @ingroup Extensions |
9 | * @license GPL-2.0-or-later |
10 | */ |
11 | |
12 | use Config; |
13 | use InvalidArgumentException; |
14 | use MediaWiki\Logger\LoggerFactory; |
15 | use Psr\Log\LoggerInterface; |
16 | |
17 | /** |
18 | * Adds journal entries as single lines of JSON in a file on the filesystem. |
19 | * |
20 | * @since 0.1.10 |
21 | */ |
22 | class ListenMetricsEntryFileJournal implements ListenMetricsEntryJournal { |
23 | |
24 | /** @var LoggerInterface */ |
25 | private $logger; |
26 | |
27 | /** @var Config */ |
28 | private $config; |
29 | |
30 | /** |
31 | * @since 0.1.10 |
32 | * @param Config $config |
33 | */ |
34 | public function __construct( Config $config ) { |
35 | $this->logger = LoggerFactory::getInstance( 'Wikispeech' ); |
36 | $this->config = $config; |
37 | } |
38 | |
39 | /** |
40 | * @since 0.1.10 |
41 | * @param ListenMetricsEntry $entry |
42 | */ |
43 | public function appendEntry( ListenMetricsEntry $entry ): void { |
44 | /** |
45 | * @var float |
46 | * One second sounds like much, but there is no queuing here, it's optimistic locking. |
47 | * We really need to give it a bit of time in case of really heavy user load. |
48 | */ |
49 | $lockTimeoutSeconds = 1; |
50 | $metricsJournalFile = $this->getCurrentMetricsJournalFile(); |
51 | $metricsSerializer = new ListenMetricsEntrySerializer(); |
52 | $json = json_encode( $metricsSerializer->serialize( $entry ) ); |
53 | $fh = fopen( $metricsJournalFile, 'a' ); |
54 | if ( $this->flockWithTimeout( $fh, LOCK_EX, $lockTimeoutSeconds * 1000000 ) ) { |
55 | try { |
56 | fwrite( $fh, $json ); |
57 | fwrite( $fh, "\n" ); |
58 | fflush( $fh ); |
59 | } finally { |
60 | flock( $fh, LOCK_UN ); |
61 | } |
62 | } else { |
63 | $this->logger->warning( 'Unable to get write lock on {metricsJournalFile}', [ |
64 | 'file' => $metricsJournalFile |
65 | ] ); |
66 | } |
67 | fclose( $fh ); |
68 | |
69 | // @todo switch to database? |
70 | // $crud = new ListenMetricsEntryCrud( $this->dbLoadBalancer ); |
71 | // $crud->create( $this->listenMetricEntry ); |
72 | } |
73 | |
74 | /** |
75 | * In case of an empty or missing journal file, the function returns false. |
76 | * Attempts to rename current metrics journal file with an appended ISO8601 date/timestamp. |
77 | * If this is successful the function returns true. |
78 | * It then attempts to gzip-compress and delete the uncompressed file. |
79 | * Even if any of these actions fails the method will return true |
80 | * but it will produce warnings in the log. |
81 | * |
82 | * @since 0.1.10 |
83 | * @return bool Whether or not the current journal was archived. If false, see log. |
84 | */ |
85 | public function archiveCurrentMetricsJournal(): bool { |
86 | $currentMetricsJournalFile = $this->getCurrentMetricsJournalFile(); |
87 | if ( !file_exists( $currentMetricsJournalFile ) ) { |
88 | $this->logger->info( __METHOD__ . |
89 | 'Attempted to archive non existing journal file {file}', |
90 | [ 'file' => $currentMetricsJournalFile ] |
91 | ); |
92 | return false; |
93 | } |
94 | if ( !filesize( $currentMetricsJournalFile ) ) { |
95 | $this->logger->info( __METHOD__ . |
96 | 'Attempted to archive an empty journal file {file}', |
97 | [ 'file' => $currentMetricsJournalFile ] |
98 | ); |
99 | return false; |
100 | } |
101 | |
102 | $archivedMetricsJournalFile = $currentMetricsJournalFile . '.' . date( 'c' ); |
103 | /** |
104 | * @var float |
105 | * Here we can give it quite a bit of time to lock. No worries. |
106 | */ |
107 | $lockTimeoutSeconds = 10; |
108 | $lockHandler = fopen( $currentMetricsJournalFile, 'r' ); |
109 | if ( !$this->flockWithTimeout( $lockHandler, LOCK_EX, 1000000 * $lockTimeoutSeconds ) ) { |
110 | $this->logger->warning( __METHOD__ . |
111 | 'Unable to achieve file lock on {file}', |
112 | [ 'file' => $currentMetricsJournalFile ] |
113 | ); |
114 | return false; |
115 | } |
116 | try { |
117 | if ( !rename( $currentMetricsJournalFile, $archivedMetricsJournalFile ) ) { |
118 | $this->logger->error( __METHOD__ . |
119 | 'Unable to rename existing file {from} to {to}', |
120 | [ |
121 | 'from' => $currentMetricsJournalFile, |
122 | 'to' => $archivedMetricsJournalFile |
123 | ] |
124 | ); |
125 | return false; |
126 | } |
127 | } finally { |
128 | flock( $lockHandler, LOCK_UN ); |
129 | fclose( $lockHandler ); |
130 | } |
131 | |
132 | $gzippedArchivedMetricsJournalFile = $archivedMetricsJournalFile . '.gz'; |
133 | // wb9 = write binary, compression level 9 |
134 | $out = gzopen( $gzippedArchivedMetricsJournalFile, 'wb9' ); |
135 | if ( $out ) { |
136 | $in = fopen( $archivedMetricsJournalFile, 'rb' ); |
137 | if ( $in ) { |
138 | $bufferLength = 1024 * 512; |
139 | while ( !feof( $in ) ) { |
140 | gzwrite( $out, fread( $in, $bufferLength ) ); |
141 | } |
142 | fclose( $in ); |
143 | } else { |
144 | $this->logger->warning( __METHOD__ . |
145 | 'Unable to read from file {from}. Archived journal was not compressed.', |
146 | [ 'from' => $archivedMetricsJournalFile ] |
147 | ); |
148 | return true; |
149 | } |
150 | gzclose( $out ); |
151 | } else { |
152 | $this->logger->warning( __METHOD__ . |
153 | 'Unable to open new file {file} for compression output. Archived journal was not compressed.', |
154 | [ 'from' => $archivedMetricsJournalFile ] |
155 | ); |
156 | return true; |
157 | } |
158 | if ( !unlink( $archivedMetricsJournalFile ) ) { |
159 | $this->logger->warning( __METHOD__ . |
160 | 'Unable to delete uncompressed archived journal file {file}.', |
161 | [ 'file' => $archivedMetricsJournalFile ] |
162 | ); |
163 | } |
164 | return true; |
165 | } |
166 | |
167 | /** |
168 | * @since 0.1.10 |
169 | * @return string |
170 | */ |
171 | private function getCurrentMetricsJournalFile(): string { |
172 | $metricsJournalFile = $this->config->get( 'WikispeechListenMetricsJournalFile' ); |
173 | if ( !$metricsJournalFile ) { |
174 | $metricsJournalFile = "{$this->config->get( 'UploadDirectory' )}/wikispeechListenMetrics.log"; |
175 | } |
176 | return $metricsJournalFile; |
177 | } |
178 | |
179 | /** |
180 | * https://gist.github.com/CMCDragonkai/a7b446f15094f59083a2 |
181 | * |
182 | * Acquires a lock using flock, provide it a file stream, the |
183 | * lock type, a timeout in microseconds, and a sleep_by in microseconds. |
184 | * PHP's flock does not currently have a timeout or queuing mechanism. |
185 | * So we have to hack a optimistic method of continuously sleeping |
186 | * and retrying to acquire the lock until we reach a timeout. |
187 | * Doing this in microseconds is a good idea, as seconds are too |
188 | * granular and can allow a new thread to cheat the queue. |
189 | * There's no actual queue of locks being implemented here, so |
190 | * it is fundamentally non-deterministic when multiple threads |
191 | * try to acquire a lock with a timeout. |
192 | * This means a possible failure is resource starvation. |
193 | * For example, if there's too many concurrent threads competing for |
194 | * a lock, then this implementation may allow the second thread to be |
195 | * starved and allow the third thread to acquire the lock. |
196 | * The trick here is in the combination of LOCK_NB and $blocking. |
197 | * The $blocking variable is assigned by reference, it returns 1 |
198 | * when the flock is blocked from acquiring a lock. With LOCK_NB |
199 | * the flock returns immediately instead of waiting indefinitely. |
200 | * |
201 | * @param resource $lockfile Lock file resource that is opened. |
202 | * @param int $lockType LOCK_EX or LOCK_SH |
203 | * @param int $timeout_micro In microseconds, where 1 second = 1,000,000 microseconds |
204 | * @param int $sleep_by_micro Microsecond sleep period, by default 0.01 of a second |
205 | * @return bool |
206 | */ |
207 | private function flockWithTimeout( |
208 | $lockfile, |
209 | int $lockType, |
210 | int $timeout_micro, |
211 | int $sleep_by_micro = 10000 |
212 | ): bool { |
213 | // @todo phpcs is not a fan of is_resource. What do we use instead? |
214 | //if ( !is_resource( $lockfile ) ) { |
215 | // throw new InvalidArgumentException( |
216 | // 'The $lockfile was not a file resource or the resource was closed.' |
217 | // ); |
218 | //} |
219 | if ( $sleep_by_micro < 1 ) { |
220 | throw new InvalidArgumentException( |
221 | 'The $sleep_by_micro cannot be less than 1, or else an infinite loop.' |
222 | ); |
223 | } |
224 | if ( $timeout_micro < 1 ) { |
225 | $locked = flock( $lockfile, $lockType | LOCK_NB ); |
226 | } else { |
227 | $count_micro = 0; |
228 | $locked = true; |
229 | while ( !flock( $lockfile, $lockType | LOCK_NB, $blocking ) ) { |
230 | if ( $blocking ) { |
231 | $count_micro += $sleep_by_micro; |
232 | if ( $count_micro <= $timeout_micro ) { |
233 | usleep( $sleep_by_micro ); |
234 | } |
235 | } else { |
236 | $locked = false; |
237 | break; |
238 | } |
239 | } |
240 | } |
241 | return $locked; |
242 | } |
243 | } |