mwtitle/
lib.rs

1/*
2Copyright (C) Tim Starling
3Copyright (C) Daniel Kinzler
4Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
5Copyright (C) 2021 Erutuon
6
7This program is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program.  If not, see <http://www.gnu.org/licenses/>.
19 */
20//! `mwtitle` is a library for parsing, normalizing and formatting MediaWiki
21//! page titles. It is primarily a port of the MediaWikiTitleCodec class
22//! from MediaWiki, and passes the MediaWiki test suite.
23//!
24//! The easiest way to get started is create a [`TitleCodec`] from a [siteinfo](https://www.mediawiki.org/wiki/API:Siteinfo)
25//! API request.
26//! ```
27//! # #[tokio::main]
28//! # async fn main() -> anyhow::Result<()> {
29//! # #[cfg(feature = "parsing")]
30//! # {
31//! # use mwtitle::{SiteInfoResponse, TitleCodec};
32//! let url = "https://en.wikipedia.org/w/api.php\
33//!            ?action=query&meta=siteinfo\
34//!            &siprop=general|namespaces|namespacealiases|interwikimap\
35//!            &formatversion=2&format=json";
36//! let resp: SiteInfoResponse = reqwest::get(url).await?.json().await?;
37//! let codec = TitleCodec::from_site_info(resp.query)?;
38//! let title = codec.new_title("Talk:Main Page#Section 1")?;
39//! assert_eq!(title.namespace(), 1);
40//! assert_eq!(title.dbkey(), "Main_Page");
41//! assert_eq!(title.fragment(), Some("Section 1"));
42//! assert_eq!(codec.to_pretty(&title), "Talk:Main Page".to_string());
43//! assert_eq!(
44//!     codec.to_pretty_with_fragment(&title),
45//!     "Talk:Main Page#Section 1".to_string()
46//! );
47//! # }
48//! # Ok(())
49//! # }
50//! ```
51//!
52//! It's also possible to possible to create a `TitleCodec` from a JSON
53//! `siteinfo-namespaces.json` or compressed `siteinfo-namespaces.json.gz`
54//! that comes from Wikimedia dumps. This requires the extra `utils` feature
55//! to be enabled.
56//!
57//! ## Contributing
58//! `mwtitle` is a part of the [`mwbot-rs` project](https://www.mediawiki.org/wiki/Mwbot-rs).
59//! We're always looking for new contributors, please [reach out](https://www.mediawiki.org/wiki/Mwbot-rs#Contributing)
60//! if you're interested!
61#![deny(clippy::all)]
62#![deny(rustdoc::all)]
63#![cfg_attr(docsrs, feature(doc_cfg))]
64
65#[cfg(feature = "parsing")]
66#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
67mod codec;
68mod display;
69mod error;
70mod interwiki_set;
71#[cfg(feature = "parsing")]
72#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
73mod ip;
74#[cfg(feature = "parsing")]
75#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
76mod ipv6;
77pub mod namespace;
78mod namespace_map;
79#[cfg(feature = "parsing")]
80#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
81mod php;
82mod site_info;
83
84#[cfg(feature = "parsing")]
85pub use codec::TitleCodec;
86pub use display::TitleWhitespace;
87pub use error::Error;
88pub use interwiki_set::InterwikiSet;
89pub use namespace_map::{Namespace, NamespaceMap};
90pub use site_info::{
91    Interwiki, NamespaceAlias, NamespaceInfo, Response as SiteInfoResponse,
92    SiteInfo,
93};
94pub type Result<T, E = Error> = std::result::Result<T, E>;
95
96use namespace::{NS_CATEGORY, NS_FILE, NS_MAIN};
97
98/// Represents a MediaWiki title. A title can be broken down into the following
99/// attributes: `[[interwiki:ns:db_key#fragment]]`.
100/// * `interwiki`: Optional prefix pointing to another site
101/// * `namespace`: Numerical ID corresponding to a MediaWiki namespace
102/// * `dbkey`: Page name, with underscores instead of spaces
103/// * `fragment`: Optional anchor for a specific section
104///
105/// ```
106/// # use mwtitle::Title;
107/// // ns1 is Talk, so this is [[Talk:Main Page]]
108/// let title = unsafe { Title::new_unchecked(1, "Main_Page".into()) };
109/// assert_eq!(title.namespace(), 1);
110/// assert_eq!(title.dbkey(), "Main_Page");
111/// assert!(title.interwiki().is_none());
112/// assert!(title.fragment().is_none());
113/// let title = title.with_fragment("Section 1".into());
114/// assert_eq!(title.fragment(), Some("Section 1"));
115/// ```
116#[derive(Clone, Debug, Eq, PartialEq)]
117pub struct Title {
118    namespace: i32,
119    dbkey: String,
120    fragment: Option<String>,
121    interwiki: Option<String>,
122    local_interwiki: bool,
123}
124
125impl Title {
126    #[inline]
127    /// Reorders fields into a reasonable order for `PartialOrd` and `Ord` implementations.
128    /// Negates `local_interwiki` to make local interwikis sort first.
129    /// The desired order with regard to interwikis:
130    /// titles without interwikis, titles with local interwikis, titles with other interwikis
131    fn to_sortable(&self) -> impl Ord + '_ {
132        let Title {
133            namespace,
134            dbkey,
135            fragment,
136            interwiki,
137            local_interwiki,
138        } = self;
139        (
140            interwiki.is_some(),
141            !local_interwiki,
142            interwiki.as_deref(),
143            *namespace,
144            dbkey,
145            fragment.as_deref(),
146        )
147    }
148}
149
150impl PartialOrd for Title {
151    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
152        Some(self.cmp(other))
153    }
154}
155
156impl Ord for Title {
157    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
158        self.to_sortable().cmp(&other.to_sortable())
159    }
160}
161
162#[cfg(test)]
163macro_rules! title {
164    (
165        $local_interwiki:literal : $interwiki:literal : $namespace:literal : $dbkey:literal
166    ) => {{
167        Title {
168            local_interwiki: $local_interwiki,
169            interwiki: Some($interwiki.into()),
170            namespace: $namespace,
171            dbkey: $dbkey.into(),
172            fragment: Default::default(),
173        }
174    }};
175    (
176        $interwiki:literal : $namespace:literal : $dbkey:literal
177    ) => {{
178        Title {
179            interwiki: Some($interwiki.into()),
180            namespace: $namespace,
181            dbkey: $dbkey.into(),
182            local_interwiki: Default::default(),
183            fragment: Default::default(),
184        }
185    }};
186    (
187        $namespace:literal : $dbkey:literal
188    ) => {{
189        Title {
190            interwiki: None,
191            namespace: $namespace,
192            dbkey: $dbkey.into(),
193            local_interwiki: Default::default(),
194            fragment: Default::default(),
195        }
196    }};
197}
198
199#[test]
200fn title_ord() {
201    let mut titles = vec![
202        title!(true:"localinterwiki2":4:"Title"),
203        title!(true:"localinterwiki1":4:"Title"),
204        title!("interwiki2":4:"Title"),
205        title!("interwiki1":4:"Title"),
206        title!(4:"Title"),
207        title!(0:"Title"),
208    ];
209    titles.sort();
210    assert_eq!(
211        &titles,
212        &[
213            title!(0:"Title"),
214            title!(4:"Title"),
215            title!(true:"localinterwiki1":4:"Title"),
216            title!(true:"localinterwiki2":4:"Title"),
217            title!("interwiki1":4:"Title"),
218            title!("interwiki2":4:"Title"),
219        ]
220    );
221}
222
223impl Title {
224    /// Create a new `Title` from a namespace ID
225    /// and database key (title without the namespace prefix),
226    /// with no validation on the namespace or text parts.
227    ///
228    /// Good if you're getting the title from a
229    /// trusted place like the API.
230    ///
231    /// The `dbkey` should have underscores
232    /// and be normalized and sanitized
233    /// as if it has been processed by [`TitleCodec::new_title`].
234    /// The namespace must exist in the [`TitleCodec`] or [`NamespaceMap`]
235    /// that will format this title.
236    ///
237    /// # Safety
238    /// If the namespace doesn't exist in the `TitleCodec` or `NamespaceMap`,
239    /// some methods, like [`TitleCodec::to_pretty`], will panic.
240    ///
241    /// If the `dbkey` hasn't been normalized and sanitized,
242    /// the ordering implementations ( `Eq`, `PartialEq`, `Ord`, `PartialOrd`)
243    /// for the `Title` aren't guaranteed to give the correct results.
244    pub unsafe fn new_unchecked(namespace: i32, dbkey: String) -> Self {
245        Self {
246            namespace,
247            dbkey,
248            fragment: None,
249            interwiki: None,
250            local_interwiki: false,
251        }
252    }
253
254    /// Set a fragment.
255    pub fn with_fragment(mut self, fragment: String) -> Self {
256        self.fragment = Some(fragment);
257        self
258    }
259
260    /// Remove the fragment.
261    pub fn remove_fragment(mut self) -> Self {
262        self.fragment = None;
263        self
264    }
265
266    /// Get the namespace ID.
267    pub fn namespace(&self) -> i32 {
268        self.namespace
269    }
270
271    /// Get the dbkey.
272    pub fn dbkey(&self) -> &str {
273        &self.dbkey
274    }
275
276    /// Get the fragment, if there is one.
277    pub fn fragment(&self) -> Option<&str> {
278        self.fragment.as_deref()
279    }
280
281    /// Get the interwiki, if there is one.
282    pub fn interwiki(&self) -> Option<&str> {
283        self.interwiki.as_deref()
284    }
285
286    /// Whether this title was created via a local interwiki link.
287    pub fn is_local_interwiki(&self) -> bool {
288        self.local_interwiki
289    }
290
291    /// If the title is a local page that could exist, basically not an
292    /// interwiki link, nor a fragment-only link, nor a special page.
293    pub fn is_local_page(&self) -> bool {
294        self.interwiki.is_none()
295            && !self.dbkey.is_empty()
296            && self.namespace >= 0
297    }
298
299    /// Whether this title refers to a file.
300    pub fn is_file(&self) -> bool {
301        self.namespace == NS_FILE
302    }
303
304    /// Whether this title refers to a category.
305    pub fn is_category(&self) -> bool {
306        self.namespace == NS_CATEGORY
307    }
308}