mwtitle/
lib.rs

1/*
2Copyright (C) Tim Starling
3Copyright (C) Daniel Kinzler
4Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
5Copyright (C) 2021 Erutuon
6
7This program is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20//! `mwtitle` is a library for parsing, normalizing and formatting MediaWiki
21//! page titles. It is primarily a port of the MediaWikiTitleCodec class
22//! from MediaWiki, and passes the MediaWiki test suite.
23//!
24//! The easiest way to get started is create a [`TitleCodec`] from a [siteinfo](https://www.mediawiki.org/wiki/API:Siteinfo)
25//! API request.
26//! ```
27//! # #[tokio::main]
28//! # async fn main() -> anyhow::Result<()> {
29//! # #[cfg(feature = "parsing")]
30//! # {
31//! # use mwtitle::{SiteInfoResponse, TitleCodec};
32//! # let client = reqwest::Client::builder().user_agent("mwtitle-testing").build()?;
33//! let url = "https://en.wikipedia.org/w/api.php\
34//!            ?action=query&meta=siteinfo\
35//!            &siprop=general|namespaces|namespacealiases|interwikimap\
36//!            &formatversion=2&format=json";
37//! let resp: SiteInfoResponse = client.get(url).send().await?.json().await?;
38//! let codec = TitleCodec::from_site_info(resp.query)?;
39//! let title = codec.new_title("Talk:Main Page#Section 1")?;
40//! assert_eq!(title.namespace(), 1);
41//! assert_eq!(title.dbkey(), "Main_Page");
42//! assert_eq!(title.fragment(), Some("Section 1"));
43//! assert_eq!(codec.to_pretty(&title), "Talk:Main Page".to_string());
44//! assert_eq!(
45//!     codec.to_pretty_with_fragment(&title),
46//!     "Talk:Main Page#Section 1".to_string()
47//! );
48//! # }
49//! # Ok(())
50//! # }
51//! ```
52//!
53//! It's also possible to possible to create a `TitleCodec` from a JSON
54//! `siteinfo-namespaces.json` or compressed `siteinfo-namespaces.json.gz`
55//! that comes from Wikimedia dumps. This requires the extra `utils` feature
56//! to be enabled.
57//!
58//! ## Contributing
59//! `mwtitle` is a part of the [`mwbot-rs` project](https://www.mediawiki.org/wiki/Mwbot-rs).
60//! We're always looking for new contributors, please [reach out](https://www.mediawiki.org/wiki/Mwbot-rs#Contributing)
61//! if you're interested!
62#![deny(clippy::all)]
63#![deny(rustdoc::all)]
64#![cfg_attr(docsrs, feature(doc_cfg))]
65
66#[cfg(feature = "parsing")]
67#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
68mod codec;
69mod display;
70mod error;
71mod interwiki_set;
72#[cfg(feature = "parsing")]
73#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
74mod ip;
75#[cfg(feature = "parsing")]
76#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
77mod ipv6;
78pub mod namespace;
79mod namespace_map;
80#[cfg(feature = "parsing")]
81#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
82mod php;
83mod site_info;
84
85#[cfg(feature = "parsing")]
86pub use codec::TitleCodec;
87pub use display::TitleWhitespace;
88pub use error::Error;
89pub use interwiki_set::InterwikiSet;
90pub use namespace_map::{Namespace, NamespaceMap};
91pub use site_info::{
92    Interwiki, NamespaceAlias, NamespaceInfo, Response as SiteInfoResponse,
93    SiteInfo,
94};
95pub type Result<T, E = Error> = std::result::Result<T, E>;
96
97use namespace::{NS_CATEGORY, NS_FILE, NS_MAIN};
98
99/// Represents a MediaWiki title. A title can be broken down into the following
100/// attributes: `[[interwiki:ns:db_key#fragment]]`.
101/// * `interwiki`: Optional prefix pointing to another site
102/// * `namespace`: Numerical ID corresponding to a MediaWiki namespace
103/// * `dbkey`: Page name, with underscores instead of spaces
104/// * `fragment`: Optional anchor for a specific section
105///
106/// ```
107/// # use mwtitle::Title;
108/// // ns1 is Talk, so this is [[Talk:Main Page]]
109/// let title = unsafe { Title::new_unchecked(1, "Main_Page".into()) };
110/// assert_eq!(title.namespace(), 1);
111/// assert_eq!(title.dbkey(), "Main_Page");
112/// assert!(title.interwiki().is_none());
113/// assert!(title.fragment().is_none());
114/// let title = title.with_fragment("Section 1".into());
115/// assert_eq!(title.fragment(), Some("Section 1"));
116/// ```
117#[derive(Clone, Debug, Eq, PartialEq)]
118pub struct Title {
119    namespace: i32,
120    dbkey: String,
121    fragment: Option<String>,
122    interwiki: Option<String>,
123    local_interwiki: bool,
124}
125
126impl Title {
127    #[inline]
128    /// Reorders fields into a reasonable order for `PartialOrd` and `Ord` implementations.
129    /// Negates `local_interwiki` to make local interwikis sort first.
130    /// The desired order with regard to interwikis:
131    /// titles without interwikis, titles with local interwikis, titles with other interwikis
132    fn to_sortable(&self) -> impl Ord + '_ {
133        let Title {
134            namespace,
135            dbkey,
136            fragment,
137            interwiki,
138            local_interwiki,
139        } = self;
140        (
141            interwiki.is_some(),
142            !local_interwiki,
143            interwiki.as_deref(),
144            *namespace,
145            dbkey,
146            fragment.as_deref(),
147        )
148    }
149}
150
151impl PartialOrd for Title {
152    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
153        Some(self.cmp(other))
154    }
155}
156
157impl Ord for Title {
158    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
159        self.to_sortable().cmp(&other.to_sortable())
160    }
161}
162
163#[cfg(test)]
164macro_rules! title {
165    (
166        $local_interwiki:literal : $interwiki:literal : $namespace:literal : $dbkey:literal
167    ) => {{
168        Title {
169            local_interwiki: $local_interwiki,
170            interwiki: Some($interwiki.into()),
171            namespace: $namespace,
172            dbkey: $dbkey.into(),
173            fragment: Default::default(),
174        }
175    }};
176    (
177        $interwiki:literal : $namespace:literal : $dbkey:literal
178    ) => {{
179        Title {
180            interwiki: Some($interwiki.into()),
181            namespace: $namespace,
182            dbkey: $dbkey.into(),
183            local_interwiki: Default::default(),
184            fragment: Default::default(),
185        }
186    }};
187    (
188        $namespace:literal : $dbkey:literal
189    ) => {{
190        Title {
191            interwiki: None,
192            namespace: $namespace,
193            dbkey: $dbkey.into(),
194            local_interwiki: Default::default(),
195            fragment: Default::default(),
196        }
197    }};
198}
199
200#[test]
201fn title_ord() {
202    let mut titles = vec![
203        title!(true:"localinterwiki2":4:"Title"),
204        title!(true:"localinterwiki1":4:"Title"),
205        title!("interwiki2":4:"Title"),
206        title!("interwiki1":4:"Title"),
207        title!(4:"Title"),
208        title!(0:"Title"),
209    ];
210    titles.sort();
211    assert_eq!(
212        &titles,
213        &[
214            title!(0:"Title"),
215            title!(4:"Title"),
216            title!(true:"localinterwiki1":4:"Title"),
217            title!(true:"localinterwiki2":4:"Title"),
218            title!("interwiki1":4:"Title"),
219            title!("interwiki2":4:"Title"),
220        ]
221    );
222}
223
224impl Title {
225    /// Create a new `Title` from a namespace ID
226    /// and database key (title without the namespace prefix),
227    /// with no validation on the namespace or text parts.
228    ///
229    /// Good if you're getting the title from a
230    /// trusted place like the API.
231    ///
232    /// The `dbkey` should have underscores
233    /// and be normalized and sanitized
234    /// as if it has been processed by [`TitleCodec::new_title`].
235    /// The namespace must exist in the [`TitleCodec`] or [`NamespaceMap`]
236    /// that will format this title.
237    ///
238    /// # Safety
239    /// If the namespace doesn't exist in the `TitleCodec` or `NamespaceMap`,
240    /// some methods, like [`TitleCodec::to_pretty`], will panic.
241    ///
242    /// If the `dbkey` hasn't been normalized and sanitized,
243    /// the ordering implementations ( `Eq`, `PartialEq`, `Ord`, `PartialOrd`)
244    /// for the `Title` aren't guaranteed to give the correct results.
245    pub unsafe fn new_unchecked(namespace: i32, dbkey: String) -> Self {
246        Self {
247            namespace,
248            dbkey,
249            fragment: None,
250            interwiki: None,
251            local_interwiki: false,
252        }
253    }
254
255    /// Set a fragment.
256    pub fn with_fragment(mut self, fragment: String) -> Self {
257        self.fragment = Some(fragment);
258        self
259    }
260
261    /// Remove the fragment.
262    pub fn remove_fragment(mut self) -> Self {
263        self.fragment = None;
264        self
265    }
266
267    /// Get the namespace ID.
268    pub fn namespace(&self) -> i32 {
269        self.namespace
270    }
271
272    /// Get the dbkey.
273    pub fn dbkey(&self) -> &str {
274        &self.dbkey
275    }
276
277    /// Get the fragment, if there is one.
278    pub fn fragment(&self) -> Option<&str> {
279        self.fragment.as_deref()
280    }
281
282    /// Get the interwiki, if there is one.
283    pub fn interwiki(&self) -> Option<&str> {
284        self.interwiki.as_deref()
285    }
286
287    /// Whether this title was created via a local interwiki link.
288    pub fn is_local_interwiki(&self) -> bool {
289        self.local_interwiki
290    }
291
292    /// If the title is a local page that could exist, basically not an
293    /// interwiki link, nor a fragment-only link, nor a special page.
294    pub fn is_local_page(&self) -> bool {
295        self.interwiki.is_none()
296            && !self.dbkey.is_empty()
297            && self.namespace >= 0
298    }
299
300    /// Whether this title refers to a file.
301    pub fn is_file(&self) -> bool {
302        self.namespace == NS_FILE
303    }
304
305    /// Whether this title refers to a category.
306    pub fn is_category(&self) -> bool {
307        self.namespace == NS_CATEGORY
308    }
309}