mwtitle/lib.rs
1/*
2Copyright (C) Tim Starling
3Copyright (C) Daniel Kinzler
4Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
5Copyright (C) 2021 Erutuon
6
7This program is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20//! `mwtitle` is a library for parsing, normalizing and formatting MediaWiki
21//! page titles. It is primarily a port of the MediaWikiTitleCodec class
22//! from MediaWiki, and passes the MediaWiki test suite.
23//!
24//! The easiest way to get started is create a [`TitleCodec`] from a [siteinfo](https://www.mediawiki.org/wiki/API:Siteinfo)
25//! API request.
26//! ```
27//! # #[tokio::main]
28//! # async fn main() -> anyhow::Result<()> {
29//! # #[cfg(feature = "parsing")]
30//! # {
31//! # use mwtitle::{SiteInfoResponse, TitleCodec};
32//! let url = "https://en.wikipedia.org/w/api.php\
33//! ?action=query&meta=siteinfo\
34//! &siprop=general|namespaces|namespacealiases|interwikimap\
35//! &formatversion=2&format=json";
36//! let resp: SiteInfoResponse = reqwest::get(url).await?.json().await?;
37//! let codec = TitleCodec::from_site_info(resp.query)?;
38//! let title = codec.new_title("Talk:Main Page#Section 1")?;
39//! assert_eq!(title.namespace(), 1);
40//! assert_eq!(title.dbkey(), "Main_Page");
41//! assert_eq!(title.fragment(), Some("Section 1"));
42//! assert_eq!(codec.to_pretty(&title), "Talk:Main Page".to_string());
43//! assert_eq!(
44//! codec.to_pretty_with_fragment(&title),
45//! "Talk:Main Page#Section 1".to_string()
46//! );
47//! # }
48//! # Ok(())
49//! # }
50//! ```
51//!
52//! It's also possible to possible to create a `TitleCodec` from a JSON
53//! `siteinfo-namespaces.json` or compressed `siteinfo-namespaces.json.gz`
54//! that comes from Wikimedia dumps. This requires the extra `utils` feature
55//! to be enabled.
56//!
57//! ## Contributing
58//! `mwtitle` is a part of the [`mwbot-rs` project](https://www.mediawiki.org/wiki/Mwbot-rs).
59//! We're always looking for new contributors, please [reach out](https://www.mediawiki.org/wiki/Mwbot-rs#Contributing)
60//! if you're interested!
61#![deny(clippy::all)]
62#![deny(rustdoc::all)]
63#![cfg_attr(docsrs, feature(doc_cfg))]
64
65#[cfg(feature = "parsing")]
66#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
67mod codec;
68mod display;
69mod error;
70mod interwiki_set;
71#[cfg(feature = "parsing")]
72#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
73mod ip;
74#[cfg(feature = "parsing")]
75#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
76mod ipv6;
77pub mod namespace;
78mod namespace_map;
79#[cfg(feature = "parsing")]
80#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
81mod php;
82mod site_info;
83
84#[cfg(feature = "parsing")]
85pub use codec::TitleCodec;
86pub use display::TitleWhitespace;
87pub use error::Error;
88pub use interwiki_set::InterwikiSet;
89pub use namespace_map::{Namespace, NamespaceMap};
90pub use site_info::{
91 Interwiki, NamespaceAlias, NamespaceInfo, Response as SiteInfoResponse,
92 SiteInfo,
93};
94pub type Result<T, E = Error> = std::result::Result<T, E>;
95
96use namespace::{NS_CATEGORY, NS_FILE, NS_MAIN};
97
98/// Represents a MediaWiki title. A title can be broken down into the following
99/// attributes: `[[interwiki:ns:db_key#fragment]]`.
100/// * `interwiki`: Optional prefix pointing to another site
101/// * `namespace`: Numerical ID corresponding to a MediaWiki namespace
102/// * `dbkey`: Page name, with underscores instead of spaces
103/// * `fragment`: Optional anchor for a specific section
104///
105/// ```
106/// # use mwtitle::Title;
107/// // ns1 is Talk, so this is [[Talk:Main Page]]
108/// let title = unsafe { Title::new_unchecked(1, "Main_Page".into()) };
109/// assert_eq!(title.namespace(), 1);
110/// assert_eq!(title.dbkey(), "Main_Page");
111/// assert!(title.interwiki().is_none());
112/// assert!(title.fragment().is_none());
113/// let title = title.with_fragment("Section 1".into());
114/// assert_eq!(title.fragment(), Some("Section 1"));
115/// ```
116#[derive(Clone, Debug, Eq, PartialEq)]
117pub struct Title {
118 namespace: i32,
119 dbkey: String,
120 fragment: Option<String>,
121 interwiki: Option<String>,
122 local_interwiki: bool,
123}
124
125impl Title {
126 #[inline]
127 /// Reorders fields into a reasonable order for `PartialOrd` and `Ord` implementations.
128 /// Negates `local_interwiki` to make local interwikis sort first.
129 /// The desired order with regard to interwikis:
130 /// titles without interwikis, titles with local interwikis, titles with other interwikis
131 fn to_sortable(&self) -> impl Ord + '_ {
132 let Title {
133 namespace,
134 dbkey,
135 fragment,
136 interwiki,
137 local_interwiki,
138 } = self;
139 (
140 interwiki.is_some(),
141 !local_interwiki,
142 interwiki.as_deref(),
143 *namespace,
144 dbkey,
145 fragment.as_deref(),
146 )
147 }
148}
149
150impl PartialOrd for Title {
151 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
152 Some(self.cmp(other))
153 }
154}
155
156impl Ord for Title {
157 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
158 self.to_sortable().cmp(&other.to_sortable())
159 }
160}
161
162#[cfg(test)]
163macro_rules! title {
164 (
165 $local_interwiki:literal : $interwiki:literal : $namespace:literal : $dbkey:literal
166 ) => {{
167 Title {
168 local_interwiki: $local_interwiki,
169 interwiki: Some($interwiki.into()),
170 namespace: $namespace,
171 dbkey: $dbkey.into(),
172 fragment: Default::default(),
173 }
174 }};
175 (
176 $interwiki:literal : $namespace:literal : $dbkey:literal
177 ) => {{
178 Title {
179 interwiki: Some($interwiki.into()),
180 namespace: $namespace,
181 dbkey: $dbkey.into(),
182 local_interwiki: Default::default(),
183 fragment: Default::default(),
184 }
185 }};
186 (
187 $namespace:literal : $dbkey:literal
188 ) => {{
189 Title {
190 interwiki: None,
191 namespace: $namespace,
192 dbkey: $dbkey.into(),
193 local_interwiki: Default::default(),
194 fragment: Default::default(),
195 }
196 }};
197}
198
199#[test]
200fn title_ord() {
201 let mut titles = vec![
202 title!(true:"localinterwiki2":4:"Title"),
203 title!(true:"localinterwiki1":4:"Title"),
204 title!("interwiki2":4:"Title"),
205 title!("interwiki1":4:"Title"),
206 title!(4:"Title"),
207 title!(0:"Title"),
208 ];
209 titles.sort();
210 assert_eq!(
211 &titles,
212 &[
213 title!(0:"Title"),
214 title!(4:"Title"),
215 title!(true:"localinterwiki1":4:"Title"),
216 title!(true:"localinterwiki2":4:"Title"),
217 title!("interwiki1":4:"Title"),
218 title!("interwiki2":4:"Title"),
219 ]
220 );
221}
222
223impl Title {
224 /// Create a new `Title` from a namespace ID
225 /// and database key (title without the namespace prefix),
226 /// with no validation on the namespace or text parts.
227 ///
228 /// Good if you're getting the title from a
229 /// trusted place like the API.
230 ///
231 /// The `dbkey` should have underscores
232 /// and be normalized and sanitized
233 /// as if it has been processed by [`TitleCodec::new_title`].
234 /// The namespace must exist in the [`TitleCodec`] or [`NamespaceMap`]
235 /// that will format this title.
236 ///
237 /// # Safety
238 /// If the namespace doesn't exist in the `TitleCodec` or `NamespaceMap`,
239 /// some methods, like [`TitleCodec::to_pretty`], will panic.
240 ///
241 /// If the `dbkey` hasn't been normalized and sanitized,
242 /// the ordering implementations ( `Eq`, `PartialEq`, `Ord`, `PartialOrd`)
243 /// for the `Title` aren't guaranteed to give the correct results.
244 pub unsafe fn new_unchecked(namespace: i32, dbkey: String) -> Self {
245 Self {
246 namespace,
247 dbkey,
248 fragment: None,
249 interwiki: None,
250 local_interwiki: false,
251 }
252 }
253
254 /// Set a fragment.
255 pub fn with_fragment(mut self, fragment: String) -> Self {
256 self.fragment = Some(fragment);
257 self
258 }
259
260 /// Remove the fragment.
261 pub fn remove_fragment(mut self) -> Self {
262 self.fragment = None;
263 self
264 }
265
266 /// Get the namespace ID.
267 pub fn namespace(&self) -> i32 {
268 self.namespace
269 }
270
271 /// Get the dbkey.
272 pub fn dbkey(&self) -> &str {
273 &self.dbkey
274 }
275
276 /// Get the fragment, if there is one.
277 pub fn fragment(&self) -> Option<&str> {
278 self.fragment.as_deref()
279 }
280
281 /// Get the interwiki, if there is one.
282 pub fn interwiki(&self) -> Option<&str> {
283 self.interwiki.as_deref()
284 }
285
286 /// Whether this title was created via a local interwiki link.
287 pub fn is_local_interwiki(&self) -> bool {
288 self.local_interwiki
289 }
290
291 /// If the title is a local page that could exist, basically not an
292 /// interwiki link, nor a fragment-only link, nor a special page.
293 pub fn is_local_page(&self) -> bool {
294 self.interwiki.is_none()
295 && !self.dbkey.is_empty()
296 && self.namespace >= 0
297 }
298
299 /// Whether this title refers to a file.
300 pub fn is_file(&self) -> bool {
301 self.namespace == NS_FILE
302 }
303
304 /// Whether this title refers to a category.
305 pub fn is_category(&self) -> bool {
306 self.namespace == NS_CATEGORY
307 }
308}