Source code for pywikibot.page._filepage

"""Objects representing MediaWiki File pages.

This module includes objects:

* FilePage: A subclass of Page representing a file description page
* FileInfo: a structure holding imageinfo of latest revision of FilePage
"""
#
# (C) Pywikibot team, 2008-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations

from http import HTTPStatus
from os import PathLike
from pathlib import Path
from urllib.parse import urlparse

import pywikibot
from pywikibot.backports import Iterable
from pywikibot.comms import http
from pywikibot.exceptions import NoPageError
from pywikibot.page._page import Page
from pywikibot.tools import compute_file_hash, deprecated


__all__ = (
    'FileInfo',
    'FilePage',
)


[docs] class FilePage(Page): """A subclass of Page representing a file description page. Supports the same interface as Page except *ns*; some added methods. """ def __init__(self, source, title: str = '') -> None: """Initializer. .. versionchanged:: 8.4 check for valid extensions. :param source: the source of the page :type source: pywikibot.page.BaseLink (or subclass), pywikibot.page.Page (or subclass), or pywikibot.page.Site :param title: normalized title of the page; required if source is a Site, ignored otherwise :raises ValueError: Either the title is not in the file namespace or does not have a valid extension. """ self._file_revisions = {} # dictionary to cache File history. super().__init__(source, title, 6) if self.namespace() != 6: raise ValueError(f"'{self.title()}' is not in the file namespace!") title = self.title(with_ns=False, with_section=False) _, sep, extension = title.rpartition('.') if not sep or extension.lower() not in self.site.file_extensions: raise ValueError( f'{title!r} does not have a valid extension ' f'({", ".join(self.site.file_extensions)}).' ) def _load_file_revisions(self, imageinfo) -> None: """ Store an Image revision of FilePage (a FileInfo object) in local cache. Metadata shall be added lazily to the revision already present in cache. """ for file_rev in imageinfo: # filemissing in API response indicates most fields are missing # see https://gerrit.wikimedia.org/r/c/mediawiki/core/+/533482/ if 'filemissing' in file_rev: pywikibot.warning( f"File '{self.title()}' contains missing revisions") continue ts_key = pywikibot.Timestamp.fromISOformat(file_rev['timestamp']) file_revision = self._file_revisions.setdefault( ts_key, FileInfo(file_rev, self)) # add new imageinfo attributes since last request. file_revision.update(file_rev) @property def latest_file_info(self): """ Retrieve and store information of latest Image rev. of FilePage. At the same time, the whole history of Image is fetched and cached in self._file_revisions :return: instance of FileInfo() """ if not self._file_revisions: self.site.loadimageinfo(self, history=True) latest_ts = max(self._file_revisions) return self._file_revisions[latest_ts] @property def oldest_file_info(self): """ Retrieve and store information of oldest Image rev. of FilePage. At the same time, the whole history of Image is fetched and cached in self._file_revisions :return: instance of FileInfo() """ if not self._file_revisions: self.site.loadimageinfo(self, history=True) oldest_ts = min(self._file_revisions) return self._file_revisions[oldest_ts]
[docs] def get_file_info(self, ts) -> dict: """ Retrieve and store information of a specific Image rev. of FilePage. This function will load also metadata. It is also used as a helper in FileInfo to load metadata lazily. .. versionadded:: 8.6 :param ts: timestamp of the Image rev. to retrieve :return: instance of FileInfo() """ self.site.loadimageinfo(self, history=False, timestamp=ts) return self._file_revisions[ts]
[docs] def get_file_history(self) -> dict: """ Return the file's version history. :return: dictionary with: key: timestamp of the entry value: instance of FileInfo() """ if not self._file_revisions: self.site.loadimageinfo(self, history=True) return self._file_revisions
[docs] def getImagePageHtml(self) -> str: # noqa: N802 """Download the file page, and return the HTML, as a string. Caches the HTML code, so that if you run this method twice on the same FilePage object, the page will only be downloaded once. """ if not hasattr(self, '_imagePageHtml'): path = '{}/index.php?title={}'.format(self.site.scriptpath(), self.title(as_url=True)) self._imagePageHtml = http.request(self.site, path).text return self._imagePageHtml
[docs] def get_file_url(self, url_width: int | None = None, url_height: int | None = None, url_param: str | None = None) -> str: """Return the url or the thumburl of the file described on this page. Fetch the information if not available. Once retrieved, file information will also be accessible as :attr:`latest_file_info` attributes, named as in :api:`Imageinfo`. If *url_width*, *url_height* or *url_param* is given, additional properties ``thumbwidth``, ``thumbheight``, ``thumburl`` and ``responsiveUrls`` are provided. .. note:: Parameters validation and error handling left to the API call. .. seealso:: * :meth:`APISite.loadimageinfo() <pywikibot.site._apisite.APISite.loadimageinfo>` * :api:`Imageinfo` :param url_width: get info for a thumbnail with given width :param url_height: get info for a thumbnail with given height :param url_param: get info for a thumbnail with given param :return: latest file url or thumburl """ # Plain url is requested. if url_width is None and url_height is None and url_param is None: return self.latest_file_info.url # Thumburl is requested. self.site.loadimageinfo(self, history=not self._file_revisions, url_width=url_width, url_height=url_height, url_param=url_param) return self.latest_file_info.thumburl
[docs] def file_is_shared(self) -> bool: """Check if the file is stored on any known shared repository. .. versionchanged:: 7.0 return False if file does not exist on shared image repository instead raising NoPageError. """ # as of now, the only known repositories are commons and wikitravel # TODO: put the URLs to family file if not self.site.has_image_repository: return False try: info = self.latest_file_info except NoPageError: return False if 'wikitravel_shared' in self.site.shared_image_repository(): return info.url.startswith('https://wikitravel.org/upload/shared/') # default to commons return info.url.startswith( 'https://upload.wikimedia.org/wikipedia/commons/')
[docs] def getFileVersionHistoryTable(self) -> str: # noqa: N802 """Return the version history in the form of a wiki table.""" lines = [] for info in self.get_file_history().values(): dimension = '{width}×{height} px ({size} bytes)'.format( **info.__dict__) lines.append('| {timestamp} || {user} || {dimension} |' '| <nowiki>{comment}</nowiki>' .format(dimension=dimension, **info.__dict__)) return ('{| class="wikitable"\n' '! {{int:filehist-datetime}} || {{int:filehist-user}} |' '| {{int:filehist-dimensions}} || {{int:filehist-comment}}\n' '|-\n%s\n|}\n' % '\n|-\n'.join(lines))
[docs] def using_pages(self, **kwargs): """Yield Pages on which the file is displayed. For parameters refer :meth:`APISite.imageusage() <pywikibot.site._generators.GeneratorsMixin.imageusage>` Usage example: >>> site = pywikibot.Site('wikipedia:test') >>> file = pywikibot.FilePage(site, 'Pywikibot MW gear icon.svg') >>> used = list(file.using_pages(total=10)) >>> len(used) 2 >>> used[0].title() 'Pywikibot' .. seealso:: :meth:`globalusage` .. versionchanged:: 7.2 all parameters from :meth:`APISite.imageusage() <pywikibot.site._generators.GeneratorsMixin.imageusage>` are available. .. versionchanged:: 7.4 renamed from :meth:`usingPages`. """ return self.site.imageusage(self, **kwargs)
[docs] @deprecated('using_pages', since='7.4.0') def usingPages(self, **kwargs): # noqa: N802 """Yield Pages on which the file is displayed. .. deprecated:: 7.4 Use :meth:`using_pages` instead. """ return self.using_pages(**kwargs)
@property def file_is_used(self) -> bool: """Check whether the file is used at this site. .. versionadded:: 7.1 """ return bool(list(self.using_pages(total=1)))
[docs] def upload(self, source: str, **kwargs) -> bool: """ Upload this file to the wiki. keyword arguments are from site.upload() method. :param source: Path or URL to the file to be uploaded. :keyword comment: Edit summary; if this is not provided, then filepage.text will be used. An empty summary is not permitted. This may also serve as the initial page text (see below). :keyword text: Initial page text; if this is not set, then filepage.text will be used, or comment. :keyword watch: If true, add filepage to the bot user's watchlist :keyword ignore_warnings: It may be a static boolean, a callable returning a boolean or an iterable. The callable gets a list of UploadError instances and the iterable should contain the warning codes for which an equivalent callable would return True if all UploadError codes are in thet list. If the result is False it'll not continue uploading the file and otherwise disable any warning and reattempt to upload the file. .. note:: NOTE: If report_success is True or None it'll raise an UploadError exception if the static boolean is False. :type ignore_warnings: bool or callable or iterable of str :keyword chunk_size: The chunk size in bytesfor chunked uploading (see :api:`Upload#Chunked_uploading`). It will only upload in chunks, if the chunk size is positive but lower than the file size. :type chunk_size: int :keyword report_success: If the upload was successful it'll print a success message and if ignore_warnings is set to False it'll raise an UploadError if a warning occurred. If it's None (default) it'll be True if ignore_warnings is a bool and False otherwise. If it's True or None ignore_warnings must be a bool. :return: It returns True if the upload was successful and False otherwise. """ filename = url = None if '://' in source: url = source else: filename = source return self.site.upload(self, source_filename=filename, source_url=url, **kwargs)
[docs] def download(self, filename: str | PathLike | Iterable[str] | None = None, chunk_size: int = 100 * 1024, revision: FileInfo | None = None, *, url_width: int | None = None, url_height: int | None = None, url_param: str | None = None) -> bool: """Download to filename file of FilePage. **Usage examples:** Download an image: >>> site = pywikibot.Site('wikipedia:test') >>> file = pywikibot.FilePage(site, 'Pywikibot MW gear icon.svg') >>> file.download() True Pywikibot_MW_gear_icon.svg was downloaded. Download a thumnail: >>> file.download(url_param='120px') True The suffix has changed and Pywikibot_MW_gear_icon.png was downloaded. .. versionadded:: 8.2 *url_width*, *url_height* and *url_param* parameters. .. versionchanged:: 8.2 *filename* argument may be also a path-like object or an iterable of path segments. .. note:: filename suffix is adjusted if target url's suffix is different which may be the case if a thumbnail is loaded. .. warning:: If a file already exists, it will be overridden without further notes. .. seealso:: :api:`Imageinfo` for new parameters :param filename: filename where to save file. If ``None``, ``self.title(as_filename=True, with_ns=False)`` will be used. If an Iterable is specified the items will be used as path segments. To specify the user directory path you have to use either ``~`` or ``~user`` as first path segment e.g. ``~/foo`` or ``('~', 'foo')`` as filename. If only the user directory specifier is given, the title is used as filename like for None. If the suffix is missing or different from url (which can happen if a *url_width*, *url_height* or *url_param* argument is given), the file suffix is adjusted. :param chunk_size: the size of each chunk to be received and written to file. :param revision: file revision to download. If None :attr:`latest_file_info` will be used; otherwise provided revision will be used. :param url_width: download thumbnail with given width :param url_height: download thumbnail with given height :param url_param: download thumbnail with given param :return: True if download is successful, False otherwise. :raise IOError: if filename cannot be written for any reason. """ if not filename: path = Path() elif isinstance(filename, (str, PathLike)): path = Path(filename) else: path = Path(*filename) if path.stem in ('', '~', '~user'): path = path / self.title(as_filename=True, with_ns=False) thumb = bool(url_width or url_height or url_param) if thumb or revision is None: url = self.get_file_url(url_width, url_height, url_param) revision = self.latest_file_info else: url = revision.url # adjust suffix path = path.with_suffix(Path(urlparse(url).path).suffix) # adjust user path path = path.expanduser() req = http.fetch(url, stream=True) if req.status_code == HTTPStatus.OK: with open(path, 'wb') as f: for chunk in req.iter_content(chunk_size): f.write(chunk) return thumb or compute_file_hash(path) == revision.sha1 pywikibot.warning( f'Unsuccessful request ({req.status_code}): {req.url}') return False
[docs] def globalusage(self, total=None): """ Iterate all global usage for this page. .. seealso:: :meth:`using_pages` :param total: iterate no more than this number of pages in total :return: a generator that yields Pages also on sites different from self.site. :rtype: generator """ return self.site.globalusage(self, total=total)
[docs] def data_item(self): """ Convenience function to get the associated Wikibase item of the file. If WikibaseMediaInfo extension is available (e.g. on Commons), the method returns the associated mediainfo entity. Otherwise, it falls back to behavior of BasePage.data_item. .. versionadded:: 6.5 :rtype: pywikibot.page.WikibaseEntity """ if self.site.has_extension('WikibaseMediaInfo'): if not hasattr(self, '_item'): self._item = pywikibot.MediaInfo(self.site) self._item._file = self return self._item return super().data_item()
[docs] class FileInfo: """ A structure holding imageinfo of latest rev. of FilePage. All keys of API imageinfo dictionary are mapped to FileInfo attributes. Attributes can be retrieved both as self['key'] or self.key. Following attributes will be returned: - timestamp, user, comment, url, size, sha1, mime, metadata (lazily) - archivename (not for latest revision) see :meth:`Site.loadimageinfo() <pywikibot.site._apisite.APISite.loadimageinfo>` for details. .. note:: timestamp will be casted to :func:`pywikibot.Timestamp`. .. versionchanged:: 7.7 raises KeyError instead of AttributeError if FileInfo is used as Mapping. .. versionchanged:: 8.6 Metadata are loaded lazily. Added *filepage* parameter. """ def __init__(self, file_revision, filepage) -> None: """Initiate the class using the dict from ``APISite.loadimageinfo``.""" self.filepage = filepage self._metadata = None self.update(file_revision)
[docs] def update(self, file_revision): """Update FileInfo with new values. .. versionadded:: 8.6 """ for k, v in file_revision.items(): if k == 'timestamp': v = pywikibot.Timestamp.fromISOformat(v) setattr(self, k, v)
def __getitem__(self, key): """Give access to class values by key.""" try: result = getattr(self, key) except AttributeError as e: raise KeyError(str(e).replace('attribute', 'key')) from None return result def __repr__(self) -> str: """Return a more complete string representation.""" return repr(self.__dict__) def __eq__(self, other) -> bool: """Test if two FileInfo objects are equal.""" return self.__dict__ == other.__dict__ @property def metadata(self): """Return metadata. .. versionadded:: 8.6 """ if self._metadata is None: self.filepage.get_file_info(self.timestamp) return self._metadata @metadata.setter def metadata(self, value): """Set metadata. .. versionadded:: 8.6 """ self._metadata = value