1use crate::display::{TitleDisplay, TitleWhitespace};
21use crate::site_info::{NamespaceAlias, NamespaceInfo, SiteInfo};
22use crate::{Error, Result, Title, NS_MAIN};
23use bytemuck::TransparentWrapper;
24#[cfg(feature = "utils")]
25#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
26use flate2::read::GzDecoder;
27use std::fmt::Display;
28use std::{collections::HashMap, iter::FusedIterator, sync::Arc};
29#[cfg(feature = "utils")]
30#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
31use std::{io::Read, path::Path};
32
33#[cfg(feature = "utils")]
34#[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
35use crate::SiteInfoResponse;
36
37pub enum Namespace<'a> {
38 Id(i32),
39 NameOrAlias(&'a str),
40}
41
42impl<'a> From<&'a str> for Namespace<'a> {
43 fn from(name_or_alias: &'a str) -> Self {
44 Namespace::NameOrAlias(name_or_alias)
45 }
46}
47
48impl From<i32> for Namespace<'_> {
49 fn from(id: i32) -> Self {
50 Self::Id(id)
51 }
52}
53
54#[derive(Clone, Debug)]
65#[repr(transparent)]
66pub(crate) struct NamespaceString(pub(crate) String);
67
68unsafe impl TransparentWrapper<String> for NamespaceString {}
71
72impl NamespaceString {
73 fn as_namespace_str(&self) -> &NamespaceStringBorrowed {
74 NamespaceStringBorrowed::from_str(self.0.as_str())
75 }
76}
77
78impl PartialEq for NamespaceString {
79 fn eq(&self, other: &Self) -> bool {
80 self.as_namespace_str().eq(other.as_namespace_str())
81 }
82}
83
84impl Eq for NamespaceString {}
85
86#[cfg(test)]
88const NAMESPACE_STRING_TESTS: [&[&str]; 5] = [
89 &[
90 "User talk",
91 "User_talk",
92 "user talk",
93 "user_talk",
94 "User Talk",
95 "User_Talk",
96 "USER TALK",
97 "USER_TALK",
98 ],
99 &["Catégorie", "CATÉGORIE"],
100 &["Συζήτηση χρήστη", "συζήτηση χρήστη", "ΣΥΖΉΤΗΣΗ ΧΡΉΣΤΗ"],
101 &[
102 "Обсуждение Викисловаря",
103 "обсуждение викисловаря",
104 "ОБСУЖДЕНИЕ ВИКИСЛОВАРЯ",
105 ],
106 &[
107 "Մասնակցի քննարկում",
108 "մասնակցի քննարկում",
109 "ՄԱՍՆԱԿՑԻ ՔՆՆԱՐԿՈՒՄ",
110 ],
111];
112
113#[cfg(test)]
114fn for_each_namespace_string_combination(f: impl Fn(&str, &str)) {
115 for test in NAMESPACE_STRING_TESTS {
116 for a in test {
117 for b in test {
118 f(a, b);
119 }
120 }
121 }
122}
123
124#[test]
125fn hash_and_eq_for_namespace_string_are_case_and_whitespace_insensitive() {
126 for_each_namespace_string_combination(|a, b| {
127 let (a, b) = (
128 NamespaceString(a.to_string()),
129 NamespaceString(b.to_string()),
130 );
131 assert_eq!(a, b);
132 assert_eq!(hash(a), hash(b))
133 });
134}
135
136impl std::borrow::Borrow<NamespaceStringBorrowed> for NamespaceString {
137 fn borrow(&self) -> &NamespaceStringBorrowed {
138 self.as_namespace_str()
139 }
140}
141
142impl PartialOrd for NamespaceString {
143 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
144 Some(self.as_namespace_str().cmp(other.as_namespace_str()))
145 }
146}
147
148impl Ord for NamespaceString {
149 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
150 self.as_namespace_str()
152 .partial_cmp(other.as_namespace_str())
153 .unwrap()
154 }
155}
156
157impl std::hash::Hash for NamespaceString {
158 fn hash<H>(&self, hasher: &mut H)
159 where
160 H: std::hash::Hasher,
161 {
162 self.as_namespace_str().hash(hasher);
163 }
164}
165
166#[cfg(test)]
167fn hash(v: impl std::hash::Hash) -> u64 {
168 use std::hash::Hasher as _;
169 let mut hasher = std::collections::hash_map::DefaultHasher::new();
170 v.hash(&mut hasher);
171 hasher.finish()
172}
173
174impl std::convert::From<&str> for NamespaceString {
175 fn from(s: &str) -> Self {
176 NamespaceString(s.into())
177 }
178}
179
180#[derive(Debug)]
182#[repr(transparent)]
183pub(crate) struct NamespaceStringBorrowed(str);
184
185unsafe impl TransparentWrapper<str> for NamespaceStringBorrowed {}
188
189impl NamespaceStringBorrowed {
190 pub fn from_str(s: &str) -> &Self {
191 Self::wrap_ref(s)
192 }
193
194 fn chars_normalized(&self) -> impl FusedIterator<Item = char> + '_ {
195 enum Iter {
196 One(Option<char>),
197 Many(std::char::ToLowercase),
198 }
199 impl Iterator for Iter {
200 type Item = char;
201
202 fn next(&mut self) -> Option<Self::Item> {
203 match self {
204 Iter::One(char) => char.take(),
205 Iter::Many(chars) => chars.next(),
206 }
207 }
208 }
209 impl FusedIterator for Iter {}
210 self.0.chars().flat_map(|c| {
211 if c == '_' || c == ' ' {
212 Iter::One(Some('_'))
213 } else {
214 Iter::Many(c.to_lowercase())
215 }
216 })
217 }
218}
219
220impl PartialEq for NamespaceStringBorrowed {
221 fn eq(&self, other: &Self) -> bool {
222 self.chars_normalized().eq(other.chars_normalized())
223 }
224}
225
226impl Eq for NamespaceStringBorrowed {}
227
228#[test]
229fn hash_and_eq_for_namespace_string_borrowed_are_case_and_whitespace_insensitive(
230) {
231 for_each_namespace_string_combination(|a, b| {
232 let (a, b) = (
233 NamespaceStringBorrowed::from_str(a),
234 NamespaceStringBorrowed::from_str(b),
235 );
236 assert_eq!(a, b);
237 assert_eq!(hash(a), hash(b));
238 });
239}
240
241impl PartialOrd for NamespaceStringBorrowed {
242 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
243 Some(self.cmp(other))
244 }
245}
246
247impl Ord for NamespaceStringBorrowed {
248 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
249 self.chars_normalized().cmp(other.chars_normalized())
250 }
251}
252
253impl std::hash::Hash for NamespaceStringBorrowed {
254 fn hash<H>(&self, hasher: &mut H)
255 where
256 H: std::hash::Hasher,
257 {
258 for c in self.chars_normalized() {
259 c.hash(hasher);
260 }
261 }
262}
263
264impl<'a> std::convert::From<&'a str> for &'a NamespaceStringBorrowed {
265 fn from(s: &'a str) -> Self {
266 NamespaceStringBorrowed::from_str(s)
267 }
268}
269
270#[derive(Clone, Debug, PartialEq, Eq)]
271pub struct NamespaceMap {
272 namespaces_by_id: HashMap<i32, Arc<NamespaceInfo>>,
273 namespaces_by_name_or_alias: HashMap<NamespaceString, Arc<NamespaceInfo>>,
274}
275
276impl NamespaceMap {
277 pub fn from_site_info(site_info: SiteInfo) -> Result<Self> {
279 Self::from_namespaces_and_namespace_aliases(
280 site_info.namespaces.into_values(),
281 site_info.namespace_aliases,
282 )
283 }
284
285 #[cfg(feature = "utils")]
289 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
290 pub fn from_path(path: &Path) -> Result<Self> {
291 use std::fs::File;
292
293 let json = if path.extension() == Some("gz".as_ref()) {
294 let gz = File::open(path)
295 .map_err(|source| Error::from_io("open file", source, path))?;
296 let mut decoder = GzDecoder::new(gz);
297 let mut decoded = String::new();
298 decoder
299 .read_to_string(&mut decoded)
300 .map_err(|source| Error::from_io("parse GZip", source, path))?;
301 decoded
302 } else {
303 std::fs::read_to_string(path).map_err(|source| {
304 Error::from_io("read file to string", source, path)
305 })?
306 };
307 Self::from_json_with_path(&json, Some(path))
308 }
309
310 pub fn from_namespaces_and_namespace_aliases<
318 NS: IntoIterator<Item = NamespaceInfo>,
319 AL: IntoIterator<Item = NamespaceAlias>,
320 >(
321 namespaces: NS,
322 namespace_aliases: AL,
323 ) -> Result<Self> {
324 let mut namespaces_by_id = HashMap::new();
325 let mut namespaces_by_name_or_alias = HashMap::new();
326 for namespace in namespaces {
327 let namespace = Arc::new(namespace);
328 namespaces_by_id.insert(namespace.id, namespace.clone());
329 namespaces_by_name_or_alias.insert(
330 NamespaceString(namespace.name.clone()),
331 namespace.clone(),
332 );
333 if let Some(canonical) = namespace.canonical.as_deref() {
334 namespaces_by_name_or_alias.insert(
335 NamespaceString(canonical.to_string()),
336 namespace.clone(),
337 );
338 }
339 }
340 let mut aliases_not_found = Vec::new();
341 for alias in namespace_aliases {
342 if let Some(namespace_info) = namespaces_by_id.get(&alias.id) {
343 namespaces_by_name_or_alias.insert(
344 NamespaceString(alias.alias),
345 namespace_info.clone(),
346 );
347 } else {
348 aliases_not_found.push(alias);
349 }
350 }
351 if aliases_not_found.is_empty() {
352 Ok(Self {
353 namespaces_by_id,
354 namespaces_by_name_or_alias,
355 })
356 } else {
357 Err(Error::UnknownAliases(aliases_not_found))
358 }
359 }
360
361 pub fn from_iters<
366 NS: IntoIterator<Item = NI>,
367 NI: IntoIterator<Item = (String, String)>,
368 AL: IntoIterator<Item = (String, i32)>,
369 >(
370 namespaces: NS,
371 namespace_aliases: AL,
372 ) -> Result<Self> {
373 let namespaces = namespaces
376 .into_iter()
377 .map(|hash_map| NamespaceInfo::try_from_iter(hash_map))
378 .collect::<Result<Vec<_>>>()?;
379 Self::from_namespaces_and_namespace_aliases(
380 namespaces,
381 namespace_aliases
382 .into_iter()
383 .map(|(alias, id)| NamespaceAlias { id, alias }),
384 )
385 }
386
387 #[cfg(feature = "utils")]
390 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
391 pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
392 let site_info = serde_json::from_reader::<R, SiteInfoResponse>(reader)
393 .map_err(|source| Error::Json {
394 source: Arc::new(source),
395 })?
396 .query;
397 Self::from_site_info(site_info)
398 }
399
400 #[cfg(feature = "utils")]
402 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
403 pub fn from_json<S: AsRef<str>>(json: S) -> Result<Self> {
404 Self::from_json_with_path(json.as_ref(), None)
405 }
406
407 #[cfg(feature = "utils")]
411 #[cfg_attr(docsrs, doc(cfg(feature = "utils")))]
412 fn from_json_with_path(json: &str, path: Option<&Path>) -> Result<Self> {
413 Self::from_site_info(
414 serde_json::from_str::<SiteInfoResponse>(json)
415 .map_err(|source| {
416 let source = Arc::new(source);
417 if let Some(path) = path {
418 Error::JsonFile {
419 source,
420 path: path.into(),
421 }
422 } else {
423 Error::Json { source }
424 }
425 })?
426 .query,
427 )
428 }
429
430 pub fn get_by_id(&self, id: i32) -> Option<&NamespaceInfo> {
432 self.namespaces_by_id.get(&id).map(|arc| &**arc)
433 }
434
435 fn get_by_name_or_alias<S>(
438 &self,
439 name_or_alias: &S,
440 ) -> Option<&NamespaceInfo>
441 where
442 S: ?Sized,
443 NamespaceString: std::borrow::Borrow<S>,
444 S: std::hash::Hash + Eq,
445 {
446 self.namespaces_by_name_or_alias
447 .get(name_or_alias)
448 .map(|arc| &**arc)
449 }
450
451 pub fn get_info<'a, 'b, N: Into<Namespace<'b>>>(
453 &'a self,
454 namespace: N,
455 ) -> Option<&'a NamespaceInfo> {
456 match namespace.into() {
457 Namespace::Id(id) => self.get_by_id(id),
458 Namespace::NameOrAlias(name_or_alias) => self.get_by_name_or_alias(
459 NamespaceStringBorrowed::from_str(name_or_alias),
460 ),
461 }
462 }
463
464 pub fn get_id<'a, 'b, N: Into<Namespace<'b>>>(
469 &'a self,
470 namespace: N,
471 ) -> Option<i32> {
472 self.get_info(namespace).map(|info| info.id)
473 }
474
475 pub fn get_name<'a, 'b, N: Into<Namespace<'b>>>(
477 &'a self,
478 namespace: N,
479 ) -> Option<&'a str> {
480 self.get_info(namespace).map(|info| &*info.name)
481 }
482
483 pub fn all_namespaces(&self) -> Vec<i32> {
485 self.namespaces_by_id.keys().copied().collect::<Vec<_>>()
486 }
487
488 pub fn get_case<'a, 'b, N: Into<Namespace<'b>>>(
490 &'a self,
491 namespace: N,
492 ) -> Option<&'a str> {
493 self.get_info(namespace).map(|info| &*info.case)
494 }
495
496 pub fn get_canonical_name<'a, 'b, N: Into<Namespace<'b>>>(
499 &'a self,
500 namespace: N,
501 ) -> Option<&'a str> {
502 self.get_info(namespace)
503 .and_then(|info| info.canonical.as_deref())
504 }
505
506 pub fn is_capitalized<'a, 'b, N: Into<Namespace<'b>>>(
511 &'a self,
512 namespace: N,
513 ) -> Option<bool> {
514 self.get_info(namespace)
515 .map(|info| &*info.case)
516 .map(|case| case == "first-letter")
517 }
518
519 pub fn to_pretty(&self, title: &Title) -> Option<String> {
525 self.title_string(title, TitleWhitespace::Spaces, false)
526 }
527
528 pub fn to_underscores(&self, title: &Title) -> Option<String> {
535 self.title_string(title, TitleWhitespace::Underscores, false)
536 }
537
538 pub fn to_pretty_with_fragment(&self, title: &Title) -> Option<String> {
543 self.title_string(title, TitleWhitespace::Spaces, true)
544 }
545
546 pub fn display_title<'map: 'title, 'title>(
552 &'map self,
553 title: &'title Title,
554 whitespace: TitleWhitespace,
555 include_fragment: bool,
556 ) -> Option<impl Display + 'title> {
557 Some(TitleDisplay {
559 interwiki: title.interwiki(),
560 namespace: if title.namespace() == NS_MAIN {
561 None
562 } else {
563 Some(self.get_name(title.namespace())?)
564 },
565 dbkey: title.dbkey(),
566 fragment: if include_fragment {
567 title.fragment.as_deref()
568 } else {
569 None
570 },
571 whitespace,
572 })
573 }
574
575 fn title_string<'map: 'title, 'title>(
576 &'map self,
577 title: &'title Title,
578 whitespace: TitleWhitespace,
579 include_fragment: bool,
580 ) -> Option<String> {
581 self.display_title(title, whitespace, include_fragment)
582 .map(|display| format!("{display}"))
583 }
584}
585
586#[test]
587fn siteinfo_can_be_converted_to_namespace_map_and_lookup_is_case_insensitive() {
588 {
589 let (
590 (namespaces, aliases),
591 (expected_id_map, expected_name_map),
592 run_tests,
593 ) = (
594 (
595 [
596 (0, "", None, "first-letter"),
597 (1, "Talk", Some("Talk"), "first-letter"),
598 (4, "Wikipedia", Some("Project"), "first-letter"),
599 (10, "Template", Some("Template"), "first-letter"),
600 (14, "Category", Some("Category"), "first-letter"),
601 (
602 15,
603 "Category talk",
604 Some("Category talk"),
605 "first-letter",
606 ),
607 ],
608 [("WP", 4)],
609 ),
610 (
611 [
612 (0, ("", None, "first-letter")),
613 (1, ("Talk", Some("Talk"), "first-letter")),
614 (4, ("Wikipedia", Some("Project"), "first-letter")),
615 (10, ("Template", Some("Template"), "first-letter")),
616 (14, ("Category", Some("Category"), "first-letter")),
617 (
618 15,
619 (
620 "Category talk",
621 Some("Category talk"),
622 "first-letter",
623 ),
624 ),
625 ],
626 [
627 ("", (0, "", None, "first-letter")),
628 ("Talk", (1, "Talk", Some("Talk"), "first-letter")),
629 (
630 "Wikipedia",
631 (4, "Wikipedia", Some("Project"), "first-letter"),
632 ),
633 (
634 "Project",
635 (4, "Wikipedia", Some("Project"), "first-letter"),
636 ),
637 ("WP", (4, "Wikipedia", Some("Project"), "first-letter")),
638 (
639 "Template",
640 (10, "Template", Some("Template"), "first-letter"),
641 ),
642 (
643 "Category",
644 (14, "Category", Some("Category"), "first-letter"),
645 ),
646 (
647 "Category talk",
648 (
649 15,
650 "Category talk",
651 Some("Category talk"),
652 "first-letter",
653 ),
654 ),
655 ],
656 ),
657 |namespace_map: NamespaceMap| {
658 assert_eq!(namespace_map.get_name(1), Some("Talk"));
659 assert_eq!(namespace_map.get_name(4), Some("Wikipedia"));
660 assert_eq!(namespace_map.get_name(14), Some("Category"));
661 assert_eq!(
662 namespace_map.get_canonical_name(4),
663 Some("Project")
664 );
665 assert_eq!(
666 namespace_map.get_name("Project"),
667 Some("Wikipedia")
668 );
669 assert_eq!(
670 namespace_map.get_canonical_name("Wikipedia"),
671 Some("Project")
672 );
673 assert_eq!(
674 namespace_map.get_case("Project"),
675 Some("first-letter")
676 );
677 for (names, expected) in [
678 (&["Talk", "talk", "TALK"][..], 1),
679 (
680 &[
681 "Wikipedia",
682 "wikipedia",
683 "WIKIPEDIA",
684 "Project",
685 "project",
686 "PROJECT",
687 "WP",
688 ],
689 4,
690 ),
691 (
692 &[
693 "Category talk",
694 "Category_talk",
695 "CATEGORY TALK",
696 "CATEGORY_TALK",
697 ],
698 15,
699 ),
700 ] {
701 for name in names {
702 assert_eq!(
703 namespace_map.get_id(*name),
704 Some(expected),
705 "\n{}",
706 name
707 );
708 }
709 }
710 },
711 );
712 let namespaces =
713 Vec::from_iter(namespaces.map(|(id, name, canonical, case)| {
714 NamespaceInfo {
715 id,
716 name: name.into(),
717 canonical: canonical.map(String::from),
718 case: case.into(),
719 }
720 }));
721 let namespacealiases =
722 Vec::from(aliases.map(|(alias, id)| NamespaceAlias {
723 alias: alias.into(),
724 id,
725 }));
726 let expected = Ok(NamespaceMap {
727 namespaces_by_id: HashMap::from_iter(expected_id_map.map(
728 |(id, (name, canonical, case))| {
729 (
730 id,
731 Arc::new(NamespaceInfo {
732 id,
733 name: name.into(),
734 canonical: canonical.map(String::from),
735 case: case.into(),
736 }),
737 )
738 },
739 )),
740 namespaces_by_name_or_alias: HashMap::from_iter(
741 expected_name_map.map(
742 |(name_or_alias, (id, name, canonical, case))| {
743 (
744 name_or_alias.into(),
745 Arc::new(NamespaceInfo {
746 id,
747 name: name.into(),
748 canonical: canonical.map(String::from),
749 case: case.into(),
750 }),
751 )
752 },
753 ),
754 ),
755 });
756 let namespace_map =
759 NamespaceMap::from_namespaces_and_namespace_aliases(
760 namespaces.clone(),
761 namespacealiases.clone(),
762 )
763 .map_err(|e| {
764 if let Error::UnknownAliases(aliases) = e {
765 Some(aliases)
766 } else {
767 None
768 }
769 });
770 assert_eq!(
771 namespace_map, expected,
772 "\nconverting {:?}\n{:?}",
773 &namespaces, &namespacealiases
774 );
775 run_tests(namespace_map.unwrap());
776 }
777}