1use crate::display::{TitleDisplay, TitleWhitespace};
21use crate::site_info::{NamespaceAlias, NamespaceInfo, SiteInfo};
22use crate::{Error, Result, Title, NS_MAIN};
23use bytemuck::TransparentWrapper;
24#[cfg(feature = "utils")]
25#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
26use flate2::read::GzDecoder;
27use std::fmt::Display;
28use std::{collections::HashMap, iter::FusedIterator, sync::Arc};
29#[cfg(feature = "utils")]
30#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
31use std::{io::Read, path::Path};
32
33#[cfg(feature = "utils")]
34#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
35use crate::SiteInfoResponse;
36
37pub enum Namespace<'a> {
38 Id(i32),
39 NameOrAlias(&'a str),
40}
41
42impl<'a> From<&'a str> for Namespace<'a> {
43 fn from(name_or_alias: &'a str) -> Self {
44 Namespace::NameOrAlias(name_or_alias)
45 }
46}
47
48impl From<i32> for Namespace<'_> {
49 fn from(id: i32) -> Self {
50 Self::Id(id)
51 }
52}
53
54#[derive(Clone, Debug)]
65#[repr(transparent)]
66pub(crate) struct NamespaceString(pub(crate) String);
67
68unsafe impl TransparentWrapper<String> for NamespaceString {}
71
72impl NamespaceString {
73 fn as_namespace_str(&self) -> &NamespaceStringBorrowed {
74 NamespaceStringBorrowed::from_str(self.0.as_str())
75 }
76}
77
78impl PartialEq for NamespaceString {
79 fn eq(&self, other: &Self) -> bool {
80 self.as_namespace_str().eq(other.as_namespace_str())
81 }
82}
83
84impl Eq for NamespaceString {}
85
86#[cfg(test)]
88const NAMESPACE_STRING_TESTS: [&[&str]; 5] = [
89 &[
90 "User talk",
91 "User_talk",
92 "user talk",
93 "user_talk",
94 "User Talk",
95 "User_Talk",
96 "USER TALK",
97 "USER_TALK",
98 ],
99 &["Catégorie", "CATÉGORIE"],
100 &["Συζήτηση χρήστη", "συζήτηση χρήστη", "ΣΥΖΉΤΗΣΗ ΧΡΉΣΤΗ"],
101 &[
102 "Обсуждение Викисловаря",
103 "обсуждение викисловаря",
104 "ОБСУЖДЕНИЕ ВИКИСЛОВАРЯ",
105 ],
106 &[
107 "Մասնակցի քննարկում",
108 "մասնակցի քննարկում",
109 "ՄԱՍՆԱԿՑԻ ՔՆՆԱՐԿՈՒՄ",
110 ],
111];
112
113#[cfg(test)]
114fn for_each_namespace_string_combination(f: impl Fn(&str, &str)) {
115 for test in NAMESPACE_STRING_TESTS {
116 for a in test {
117 for b in test {
118 f(a, b);
119 }
120 }
121 }
122}
123
124#[test]
125fn hash_and_eq_for_namespace_string_are_case_and_whitespace_insensitive() {
126 for_each_namespace_string_combination(|a, b| {
127 let (a, b) = (
128 NamespaceString(a.to_string()),
129 NamespaceString(b.to_string()),
130 );
131 assert_eq!(a, b);
132 assert_eq!(hash(a), hash(b))
133 });
134}
135
136impl std::borrow::Borrow<NamespaceStringBorrowed> for NamespaceString {
137 fn borrow(&self) -> &NamespaceStringBorrowed {
138 self.as_namespace_str()
139 }
140}
141
142#[allow(clippy::non_canonical_partial_ord_impl)]
143impl PartialOrd for NamespaceString {
144 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
145 Some(self.as_namespace_str().cmp(other.as_namespace_str()))
146 }
147}
148
149impl Ord for NamespaceString {
150 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
151 self.as_namespace_str()
153 .partial_cmp(other.as_namespace_str())
154 .unwrap()
155 }
156}
157
158impl std::hash::Hash for NamespaceString {
159 fn hash<H>(&self, hasher: &mut H)
160 where
161 H: std::hash::Hasher,
162 {
163 self.as_namespace_str().hash(hasher);
164 }
165}
166
167#[cfg(test)]
168fn hash(v: impl std::hash::Hash) -> u64 {
169 use std::hash::Hasher as _;
170 let mut hasher = std::collections::hash_map::DefaultHasher::new();
171 v.hash(&mut hasher);
172 hasher.finish()
173}
174
175impl std::convert::From<&str> for NamespaceString {
176 fn from(s: &str) -> Self {
177 NamespaceString(s.into())
178 }
179}
180
181#[derive(Debug)]
183#[repr(transparent)]
184pub(crate) struct NamespaceStringBorrowed(str);
185
186unsafe impl TransparentWrapper<str> for NamespaceStringBorrowed {}
189
190impl NamespaceStringBorrowed {
191 pub fn from_str(s: &str) -> &Self {
192 Self::wrap_ref(s)
193 }
194
195 fn chars_normalized(&self) -> impl FusedIterator<Item = char> + '_ {
196 enum Iter {
197 One(Option<char>),
198 Many(std::char::ToLowercase),
199 }
200 impl Iterator for Iter {
201 type Item = char;
202
203 fn next(&mut self) -> Option<Self::Item> {
204 match self {
205 Iter::One(char) => char.take(),
206 Iter::Many(chars) => chars.next(),
207 }
208 }
209 }
210 impl FusedIterator for Iter {}
211 self.0.chars().flat_map(|c| {
212 if c == '_' || c == ' ' {
213 Iter::One(Some('_'))
214 } else {
215 Iter::Many(c.to_lowercase())
216 }
217 })
218 }
219}
220
221impl PartialEq for NamespaceStringBorrowed {
222 fn eq(&self, other: &Self) -> bool {
223 self.chars_normalized().eq(other.chars_normalized())
224 }
225}
226
227impl Eq for NamespaceStringBorrowed {}
228
229#[test]
230fn hash_and_eq_for_namespace_string_borrowed_are_case_and_whitespace_insensitive(
231) {
232 for_each_namespace_string_combination(|a, b| {
233 let (a, b) = (
234 NamespaceStringBorrowed::from_str(a),
235 NamespaceStringBorrowed::from_str(b),
236 );
237 assert_eq!(a, b);
238 assert_eq!(hash(a), hash(b));
239 });
240}
241
242impl PartialOrd for NamespaceStringBorrowed {
243 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
244 Some(self.cmp(other))
245 }
246}
247
248impl Ord for NamespaceStringBorrowed {
249 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
250 self.chars_normalized().cmp(other.chars_normalized())
251 }
252}
253
254impl std::hash::Hash for NamespaceStringBorrowed {
255 fn hash<H>(&self, hasher: &mut H)
256 where
257 H: std::hash::Hasher,
258 {
259 for c in self.chars_normalized() {
260 c.hash(hasher);
261 }
262 }
263}
264
265impl<'a> std::convert::From<&'a str> for &'a NamespaceStringBorrowed {
266 fn from(s: &'a str) -> Self {
267 NamespaceStringBorrowed::from_str(s)
268 }
269}
270
271#[derive(Clone, Debug, PartialEq, Eq)]
272pub struct NamespaceMap {
273 namespaces_by_id: HashMap<i32, Arc<NamespaceInfo>>,
274 namespaces_by_name_or_alias: HashMap<NamespaceString, Arc<NamespaceInfo>>,
275}
276
277impl NamespaceMap {
278 pub fn from_site_info(site_info: SiteInfo) -> Result<Self> {
280 Self::from_namespaces_and_namespace_aliases(
281 site_info.namespaces.into_values(),
282 site_info.namespace_aliases,
283 )
284 }
285
286 #[cfg(feature = "utils")]
290 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
291 pub fn from_path(path: &Path) -> Result<Self> {
292 use std::fs::File;
293
294 let json = if path.extension() == Some("gz".as_ref()) {
295 let gz = File::open(path)
296 .map_err(|source| Error::from_io("open file", source, path))?;
297 let mut decoder = GzDecoder::new(gz);
298 let mut decoded = String::new();
299 decoder
300 .read_to_string(&mut decoded)
301 .map_err(|source| Error::from_io("parse GZip", source, path))?;
302 decoded
303 } else {
304 std::fs::read_to_string(path).map_err(|source| {
305 Error::from_io("read file to string", source, path)
306 })?
307 };
308 Self::from_json_with_path(&json, Some(path))
309 }
310
311 pub fn from_namespaces_and_namespace_aliases<
319 NS: IntoIterator<Item = NamespaceInfo>,
320 AL: IntoIterator<Item = NamespaceAlias>,
321 >(
322 namespaces: NS,
323 namespace_aliases: AL,
324 ) -> Result<Self> {
325 let mut namespaces_by_id = HashMap::new();
326 let mut namespaces_by_name_or_alias = HashMap::new();
327 for namespace in namespaces {
328 let namespace = Arc::new(namespace);
329 namespaces_by_id.insert(namespace.id, namespace.clone());
330 namespaces_by_name_or_alias.insert(
331 NamespaceString(namespace.name.clone()),
332 namespace.clone(),
333 );
334 if let Some(canonical) = namespace.canonical.as_deref() {
335 namespaces_by_name_or_alias.insert(
336 NamespaceString(canonical.to_string()),
337 namespace.clone(),
338 );
339 }
340 }
341 let mut aliases_not_found = Vec::new();
342 for alias in namespace_aliases {
343 if let Some(namespace_info) = namespaces_by_id.get(&alias.id) {
344 namespaces_by_name_or_alias.insert(
345 NamespaceString(alias.alias),
346 namespace_info.clone(),
347 );
348 } else {
349 aliases_not_found.push(alias);
350 }
351 }
352 if aliases_not_found.is_empty() {
353 Ok(Self {
354 namespaces_by_id,
355 namespaces_by_name_or_alias,
356 })
357 } else {
358 Err(Error::UnknownAliases(aliases_not_found))
359 }
360 }
361
362 pub fn from_iters<
367 NS: IntoIterator<Item = NI>,
368 NI: IntoIterator<Item = (String, String)>,
369 AL: IntoIterator<Item = (String, i32)>,
370 >(
371 namespaces: NS,
372 namespace_aliases: AL,
373 ) -> Result<Self> {
374 let namespaces = namespaces
377 .into_iter()
378 .map(|hash_map| NamespaceInfo::try_from_iter(hash_map))
379 .collect::<Result<Vec<_>>>()?;
380 Self::from_namespaces_and_namespace_aliases(
381 namespaces,
382 namespace_aliases
383 .into_iter()
384 .map(|(alias, id)| NamespaceAlias { id, alias }),
385 )
386 }
387
388 #[cfg(feature = "utils")]
391 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
392 pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
393 let site_info = serde_json::from_reader::<R, SiteInfoResponse>(reader)
394 .map_err(|source| Error::Json {
395 source: Arc::new(source),
396 })?
397 .query;
398 Self::from_site_info(site_info)
399 }
400
401 #[cfg(feature = "utils")]
403 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
404 pub fn from_json<S: AsRef<str>>(json: S) -> Result<Self> {
405 Self::from_json_with_path(json.as_ref(), None)
406 }
407
408 #[cfg(feature = "utils")]
412 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
413 fn from_json_with_path(json: &str, path: Option<&Path>) -> Result<Self> {
414 Self::from_site_info(
415 serde_json::from_str::<SiteInfoResponse>(json)
416 .map_err(|source| {
417 let source = Arc::new(source);
418 if let Some(path) = path {
419 Error::JsonFile {
420 source,
421 path: path.into(),
422 }
423 } else {
424 Error::Json { source }
425 }
426 })?
427 .query,
428 )
429 }
430
431 pub fn get_by_id(&self, id: i32) -> Option<&NamespaceInfo> {
433 self.namespaces_by_id.get(&id).map(|arc| &**arc)
434 }
435
436 fn get_by_name_or_alias<S>(
439 &self,
440 name_or_alias: &S,
441 ) -> Option<&NamespaceInfo>
442 where
443 S: ?Sized,
444 NamespaceString: std::borrow::Borrow<S>,
445 S: std::hash::Hash + Eq,
446 {
447 self.namespaces_by_name_or_alias
448 .get(name_or_alias)
449 .map(|arc| &**arc)
450 }
451
452 pub fn get_info<'a, 'b, N: Into<Namespace<'b>>>(
454 &'a self,
455 namespace: N,
456 ) -> Option<&'a NamespaceInfo> {
457 match namespace.into() {
458 Namespace::Id(id) => self.get_by_id(id),
459 Namespace::NameOrAlias(name_or_alias) => self.get_by_name_or_alias(
460 NamespaceStringBorrowed::from_str(name_or_alias),
461 ),
462 }
463 }
464
465 pub fn get_id<'a, 'b, N: Into<Namespace<'b>>>(
470 &'a self,
471 namespace: N,
472 ) -> Option<i32> {
473 self.get_info(namespace).map(|info| info.id)
474 }
475
476 pub fn get_name<'a, 'b, N: Into<Namespace<'b>>>(
478 &'a self,
479 namespace: N,
480 ) -> Option<&'a str> {
481 self.get_info(namespace).map(|info| &*info.name)
482 }
483
484 pub fn all_namespaces(&self) -> Vec<i32> {
486 self.namespaces_by_id.keys().copied().collect::<Vec<_>>()
487 }
488
489 pub fn get_case<'a, 'b, N: Into<Namespace<'b>>>(
491 &'a self,
492 namespace: N,
493 ) -> Option<&'a str> {
494 self.get_info(namespace).map(|info| &*info.case)
495 }
496
497 pub fn get_canonical_name<'a, 'b, N: Into<Namespace<'b>>>(
500 &'a self,
501 namespace: N,
502 ) -> Option<&'a str> {
503 self.get_info(namespace)
504 .and_then(|info| info.canonical.as_deref())
505 }
506
507 pub fn is_capitalized<'a, 'b, N: Into<Namespace<'b>>>(
512 &'a self,
513 namespace: N,
514 ) -> Option<bool> {
515 self.get_info(namespace)
516 .map(|info| &*info.case)
517 .map(|case| case == "first-letter")
518 }
519
520 pub fn to_pretty(&self, title: &Title) -> Option<String> {
526 self.title_string(title, TitleWhitespace::Spaces, false)
527 }
528
529 pub fn to_underscores(&self, title: &Title) -> Option<String> {
536 self.title_string(title, TitleWhitespace::Underscores, false)
537 }
538
539 pub fn to_pretty_with_fragment(&self, title: &Title) -> Option<String> {
544 self.title_string(title, TitleWhitespace::Spaces, true)
545 }
546
547 pub fn display_title<'map: 'title, 'title>(
553 &'map self,
554 title: &'title Title,
555 whitespace: TitleWhitespace,
556 include_fragment: bool,
557 ) -> Option<impl Display + 'title> {
558 Some(TitleDisplay {
560 interwiki: title.interwiki(),
561 namespace: if title.namespace() == NS_MAIN {
562 None
563 } else {
564 Some(self.get_name(title.namespace())?)
565 },
566 dbkey: title.dbkey(),
567 fragment: if include_fragment {
568 title.fragment.as_deref()
569 } else {
570 None
571 },
572 whitespace,
573 })
574 }
575
576 fn title_string<'map: 'title, 'title>(
577 &'map self,
578 title: &'title Title,
579 whitespace: TitleWhitespace,
580 include_fragment: bool,
581 ) -> Option<String> {
582 self.display_title(title, whitespace, include_fragment)
583 .map(|display| format!("{display}"))
584 }
585}
586
587#[test]
588fn siteinfo_can_be_converted_to_namespace_map_and_lookup_is_case_insensitive() {
589 {
590 let (
591 (namespaces, aliases),
592 (expected_id_map, expected_name_map),
593 run_tests,
594 ) = (
595 (
596 [
597 (0, "", None, "first-letter"),
598 (1, "Talk", Some("Talk"), "first-letter"),
599 (4, "Wikipedia", Some("Project"), "first-letter"),
600 (10, "Template", Some("Template"), "first-letter"),
601 (14, "Category", Some("Category"), "first-letter"),
602 (
603 15,
604 "Category talk",
605 Some("Category talk"),
606 "first-letter",
607 ),
608 ],
609 [("WP", 4)],
610 ),
611 (
612 [
613 (0, ("", None, "first-letter")),
614 (1, ("Talk", Some("Talk"), "first-letter")),
615 (4, ("Wikipedia", Some("Project"), "first-letter")),
616 (10, ("Template", Some("Template"), "first-letter")),
617 (14, ("Category", Some("Category"), "first-letter")),
618 (
619 15,
620 (
621 "Category talk",
622 Some("Category talk"),
623 "first-letter",
624 ),
625 ),
626 ],
627 [
628 ("", (0, "", None, "first-letter")),
629 ("Talk", (1, "Talk", Some("Talk"), "first-letter")),
630 (
631 "Wikipedia",
632 (4, "Wikipedia", Some("Project"), "first-letter"),
633 ),
634 (
635 "Project",
636 (4, "Wikipedia", Some("Project"), "first-letter"),
637 ),
638 ("WP", (4, "Wikipedia", Some("Project"), "first-letter")),
639 (
640 "Template",
641 (10, "Template", Some("Template"), "first-letter"),
642 ),
643 (
644 "Category",
645 (14, "Category", Some("Category"), "first-letter"),
646 ),
647 (
648 "Category talk",
649 (
650 15,
651 "Category talk",
652 Some("Category talk"),
653 "first-letter",
654 ),
655 ),
656 ],
657 ),
658 |namespace_map: NamespaceMap| {
659 assert_eq!(namespace_map.get_name(1), Some("Talk"));
660 assert_eq!(namespace_map.get_name(4), Some("Wikipedia"));
661 assert_eq!(namespace_map.get_name(14), Some("Category"));
662 assert_eq!(
663 namespace_map.get_canonical_name(4),
664 Some("Project")
665 );
666 assert_eq!(
667 namespace_map.get_name("Project"),
668 Some("Wikipedia")
669 );
670 assert_eq!(
671 namespace_map.get_canonical_name("Wikipedia"),
672 Some("Project")
673 );
674 assert_eq!(
675 namespace_map.get_case("Project"),
676 Some("first-letter")
677 );
678 for (names, expected) in [
679 (&["Talk", "talk", "TALK"][..], 1),
680 (
681 &[
682 "Wikipedia",
683 "wikipedia",
684 "WIKIPEDIA",
685 "Project",
686 "project",
687 "PROJECT",
688 "WP",
689 ],
690 4,
691 ),
692 (
693 &[
694 "Category talk",
695 "Category_talk",
696 "CATEGORY TALK",
697 "CATEGORY_TALK",
698 ],
699 15,
700 ),
701 ] {
702 for name in names {
703 assert_eq!(
704 namespace_map.get_id(*name),
705 Some(expected),
706 "\n{}",
707 name
708 );
709 }
710 }
711 },
712 );
713 let namespaces =
714 Vec::from_iter(namespaces.map(|(id, name, canonical, case)| {
715 NamespaceInfo {
716 id,
717 name: name.into(),
718 canonical: canonical.map(String::from),
719 case: case.into(),
720 }
721 }));
722 let namespacealiases =
723 Vec::from(aliases.map(|(alias, id)| NamespaceAlias {
724 alias: alias.into(),
725 id,
726 }));
727 let expected = Ok(NamespaceMap {
728 namespaces_by_id: HashMap::from_iter(expected_id_map.map(
729 |(id, (name, canonical, case))| {
730 (
731 id,
732 Arc::new(NamespaceInfo {
733 id,
734 name: name.into(),
735 canonical: canonical.map(String::from),
736 case: case.into(),
737 }),
738 )
739 },
740 )),
741 namespaces_by_name_or_alias: HashMap::from_iter(
742 expected_name_map.map(
743 |(name_or_alias, (id, name, canonical, case))| {
744 (
745 name_or_alias.into(),
746 Arc::new(NamespaceInfo {
747 id,
748 name: name.into(),
749 canonical: canonical.map(String::from),
750 case: case.into(),
751 }),
752 )
753 },
754 ),
755 ),
756 });
757 let namespace_map =
760 NamespaceMap::from_namespaces_and_namespace_aliases(
761 namespaces.clone(),
762 namespacealiases.clone(),
763 )
764 .map_err(|e| {
765 if let Error::UnknownAliases(aliases) = e {
766 Some(aliases)
767 } else {
768 None
769 }
770 });
771 assert_eq!(
772 namespace_map, expected,
773 "\nconverting {:?}\n{:?}",
774 &namespaces, &namespacealiases
775 );
776 run_tests(namespace_map.unwrap());
777 }
778}