mwtitle/lib.rs
1/*
2Copyright (C) Tim Starling
3Copyright (C) Daniel Kinzler
4Copyright (C) 2021 Kunal Mehta <legoktm@debian.org>
5Copyright (C) 2021 Erutuon
6
7This program is free software: you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation, either version 3 of the License, or
10(at your option) any later version.
11
12This program is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with this program. If not, see <http://www.gnu.org/licenses/>.
19 */
20//! `mwtitle` is a library for parsing, normalizing and formatting MediaWiki
21//! page titles. It is primarily a port of the MediaWikiTitleCodec class
22//! from MediaWiki, and passes the MediaWiki test suite.
23//!
24//! The easiest way to get started is create a [`TitleCodec`] from a [siteinfo](https://www.mediawiki.org/wiki/API:Siteinfo)
25//! API request.
26//! ```
27//! # #[tokio::main]
28//! # async fn main() -> anyhow::Result<()> {
29//! # #[cfg(feature = "parsing")]
30//! # {
31//! # use mwtitle::{SiteInfoResponse, TitleCodec};
32//! # let client = reqwest::Client::builder().user_agent("mwtitle-testing").build()?;
33//! let url = "https://en.wikipedia.org/w/api.php\
34//! ?action=query&meta=siteinfo\
35//! &siprop=general|namespaces|namespacealiases|interwikimap\
36//! &formatversion=2&format=json";
37//! let resp: SiteInfoResponse = client.get(url).send().await?.json().await?;
38//! let codec = TitleCodec::from_site_info(resp.query)?;
39//! let title = codec.new_title("Talk:Main Page#Section 1")?;
40//! assert_eq!(title.namespace(), 1);
41//! assert_eq!(title.dbkey(), "Main_Page");
42//! assert_eq!(title.fragment(), Some("Section 1"));
43//! assert_eq!(codec.to_pretty(&title), "Talk:Main Page".to_string());
44//! assert_eq!(
45//! codec.to_pretty_with_fragment(&title),
46//! "Talk:Main Page#Section 1".to_string()
47//! );
48//! # }
49//! # Ok(())
50//! # }
51//! ```
52//!
53//! It's also possible to possible to create a `TitleCodec` from a JSON
54//! `siteinfo-namespaces.json` or compressed `siteinfo-namespaces.json.gz`
55//! that comes from Wikimedia dumps. This requires the extra `utils` feature
56//! to be enabled.
57//!
58//! ## Contributing
59//! `mwtitle` is a part of the [`mwbot-rs` project](https://www.mediawiki.org/wiki/Mwbot-rs).
60//! We're always looking for new contributors, please [reach out](https://www.mediawiki.org/wiki/Mwbot-rs#Contributing)
61//! if you're interested!
62#![deny(clippy::all)]
63#![deny(rustdoc::all)]
64#![cfg_attr(docsrs, feature(doc_cfg))]
65
66#[cfg(feature = "parsing")]
67#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
68mod codec;
69mod display;
70mod error;
71mod interwiki_set;
72#[cfg(feature = "parsing")]
73#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
74mod ip;
75#[cfg(feature = "parsing")]
76#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
77mod ipv6;
78pub mod namespace;
79mod namespace_map;
80#[cfg(feature = "parsing")]
81#[cfg_attr(docsrs, doc(cfg(feature = "parsing")))]
82mod php;
83mod site_info;
84
85#[cfg(feature = "parsing")]
86pub use codec::TitleCodec;
87pub use display::TitleWhitespace;
88pub use error::Error;
89pub use interwiki_set::InterwikiSet;
90pub use namespace_map::{Namespace, NamespaceMap};
91pub use site_info::{
92 Interwiki, NamespaceAlias, NamespaceInfo, Response as SiteInfoResponse,
93 SiteInfo,
94};
95pub type Result<T, E = Error> = std::result::Result<T, E>;
96
97use namespace::{NS_CATEGORY, NS_FILE, NS_MAIN};
98
99/// Represents a MediaWiki title. A title can be broken down into the following
100/// attributes: `[[interwiki:ns:db_key#fragment]]`.
101/// * `interwiki`: Optional prefix pointing to another site
102/// * `namespace`: Numerical ID corresponding to a MediaWiki namespace
103/// * `dbkey`: Page name, with underscores instead of spaces
104/// * `fragment`: Optional anchor for a specific section
105///
106/// ```
107/// # use mwtitle::Title;
108/// // ns1 is Talk, so this is [[Talk:Main Page]]
109/// let title = unsafe { Title::new_unchecked(1, "Main_Page".into()) };
110/// assert_eq!(title.namespace(), 1);
111/// assert_eq!(title.dbkey(), "Main_Page");
112/// assert!(title.interwiki().is_none());
113/// assert!(title.fragment().is_none());
114/// let title = title.with_fragment("Section 1".into());
115/// assert_eq!(title.fragment(), Some("Section 1"));
116/// ```
117#[derive(Clone, Debug, Eq, PartialEq)]
118pub struct Title {
119 namespace: i32,
120 dbkey: String,
121 fragment: Option<String>,
122 interwiki: Option<String>,
123 local_interwiki: bool,
124}
125
126impl Title {
127 #[inline]
128 /// Reorders fields into a reasonable order for `PartialOrd` and `Ord` implementations.
129 /// Negates `local_interwiki` to make local interwikis sort first.
130 /// The desired order with regard to interwikis:
131 /// titles without interwikis, titles with local interwikis, titles with other interwikis
132 fn to_sortable(&self) -> impl Ord + '_ {
133 let Title {
134 namespace,
135 dbkey,
136 fragment,
137 interwiki,
138 local_interwiki,
139 } = self;
140 (
141 interwiki.is_some(),
142 !local_interwiki,
143 interwiki.as_deref(),
144 *namespace,
145 dbkey,
146 fragment.as_deref(),
147 )
148 }
149}
150
151impl PartialOrd for Title {
152 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
153 Some(self.cmp(other))
154 }
155}
156
157impl Ord for Title {
158 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
159 self.to_sortable().cmp(&other.to_sortable())
160 }
161}
162
163#[cfg(test)]
164macro_rules! title {
165 (
166 $local_interwiki:literal : $interwiki:literal : $namespace:literal : $dbkey:literal
167 ) => {{
168 Title {
169 local_interwiki: $local_interwiki,
170 interwiki: Some($interwiki.into()),
171 namespace: $namespace,
172 dbkey: $dbkey.into(),
173 fragment: Default::default(),
174 }
175 }};
176 (
177 $interwiki:literal : $namespace:literal : $dbkey:literal
178 ) => {{
179 Title {
180 interwiki: Some($interwiki.into()),
181 namespace: $namespace,
182 dbkey: $dbkey.into(),
183 local_interwiki: Default::default(),
184 fragment: Default::default(),
185 }
186 }};
187 (
188 $namespace:literal : $dbkey:literal
189 ) => {{
190 Title {
191 interwiki: None,
192 namespace: $namespace,
193 dbkey: $dbkey.into(),
194 local_interwiki: Default::default(),
195 fragment: Default::default(),
196 }
197 }};
198}
199
200#[test]
201fn title_ord() {
202 let mut titles = vec![
203 title!(true:"localinterwiki2":4:"Title"),
204 title!(true:"localinterwiki1":4:"Title"),
205 title!("interwiki2":4:"Title"),
206 title!("interwiki1":4:"Title"),
207 title!(4:"Title"),
208 title!(0:"Title"),
209 ];
210 titles.sort();
211 assert_eq!(
212 &titles,
213 &[
214 title!(0:"Title"),
215 title!(4:"Title"),
216 title!(true:"localinterwiki1":4:"Title"),
217 title!(true:"localinterwiki2":4:"Title"),
218 title!("interwiki1":4:"Title"),
219 title!("interwiki2":4:"Title"),
220 ]
221 );
222}
223
224impl Title {
225 /// Create a new `Title` from a namespace ID
226 /// and database key (title without the namespace prefix),
227 /// with no validation on the namespace or text parts.
228 ///
229 /// Good if you're getting the title from a
230 /// trusted place like the API.
231 ///
232 /// The `dbkey` should have underscores
233 /// and be normalized and sanitized
234 /// as if it has been processed by [`TitleCodec::new_title`].
235 /// The namespace must exist in the [`TitleCodec`] or [`NamespaceMap`]
236 /// that will format this title.
237 ///
238 /// # Safety
239 /// If the namespace doesn't exist in the `TitleCodec` or `NamespaceMap`,
240 /// some methods, like [`TitleCodec::to_pretty`], will panic.
241 ///
242 /// If the `dbkey` hasn't been normalized and sanitized,
243 /// the ordering implementations ( `Eq`, `PartialEq`, `Ord`, `PartialOrd`)
244 /// for the `Title` aren't guaranteed to give the correct results.
245 pub unsafe fn new_unchecked(namespace: i32, dbkey: String) -> Self {
246 Self {
247 namespace,
248 dbkey,
249 fragment: None,
250 interwiki: None,
251 local_interwiki: false,
252 }
253 }
254
255 /// Set a fragment.
256 pub fn with_fragment(mut self, fragment: String) -> Self {
257 self.fragment = Some(fragment);
258 self
259 }
260
261 /// Remove the fragment.
262 pub fn remove_fragment(mut self) -> Self {
263 self.fragment = None;
264 self
265 }
266
267 /// Get the namespace ID.
268 pub fn namespace(&self) -> i32 {
269 self.namespace
270 }
271
272 /// Get the dbkey.
273 pub fn dbkey(&self) -> &str {
274 &self.dbkey
275 }
276
277 /// Get the fragment, if there is one.
278 pub fn fragment(&self) -> Option<&str> {
279 self.fragment.as_deref()
280 }
281
282 /// Get the interwiki, if there is one.
283 pub fn interwiki(&self) -> Option<&str> {
284 self.interwiki.as_deref()
285 }
286
287 /// Whether this title was created via a local interwiki link.
288 pub fn is_local_interwiki(&self) -> bool {
289 self.local_interwiki
290 }
291
292 /// If the title is a local page that could exist, basically not an
293 /// interwiki link, nor a fragment-only link, nor a special page.
294 pub fn is_local_page(&self) -> bool {
295 self.interwiki.is_none()
296 && !self.dbkey.is_empty()
297 && self.namespace >= 0
298 }
299
300 /// Whether this title refers to a file.
301 pub fn is_file(&self) -> bool {
302 self.namespace == NS_FILE
303 }
304
305 /// Whether this title refers to a category.
306 pub fn is_category(&self) -> bool {
307 self.namespace == NS_CATEGORY
308 }
309}