"""Manage updates to automated git repositories."""
import os
from collections.abc import Generator
from contextlib import contextmanager
from logging import getLogger
from pathlib import Path
from shutil import copytree
from tempfile import TemporaryDirectory
from typing import Optional
from git import Actor, Repo
from git.exc import GitError
from git.remote import PushInfo
from wmflib.interactive import ask_confirmation
from spicerack.constants import KEYHOLDER_SOCK
from spicerack.exceptions import SpicerackError
from spicerack.remote import RemoteHosts
logger = getLogger(__name__)
[docs]
class RepoSyncError(SpicerackError):
"""Exception raised for push errors."""
[docs]
class RepoSyncPushError(RepoSyncError):
"""Exception raised for push errors."""
[docs]
class RepoSyncNoChangeError(RepoSyncError):
"""Exception raised for push errors."""
[docs]
class RepoSync:
"""Class for syncing git repos."""
_data_subdir: str = "data"
"""Sub directory to create inside the temporary directory where to create the data."""
_email: str = "noc@wikimedia.org"
"""Email address to use for the git commit."""
def __init__(self, repo: Repo, username: str, remote_hosts: RemoteHosts, *, dry_run: bool) -> None:
"""Initialise the object.
Arguments:
repo: the path to the git repository.
username: The username making the changes.
remote_hosts: A remotes hosts object pointing to all git remote servers. The servers are expected to
have a valid ``KEYHOLDER_SOCK`` configured.
dry_run: don't preform any write actions.
"""
self._hexsha: Optional[str] = None
self._username = username
self._remote_hosts = remote_hosts
self._dry_run = dry_run
self._repo = repo
self._author = Actor(self._username, self._email)
@property
def hexsha(self) -> Optional[str]:
"""Returns the hexsha of the last commit."""
return self._hexsha
def _commit(self, working_repo: Repo, message: str) -> None:
"""Commit files in working repo.
Arguments:
working_repo: The working git repository.
message: the commit message.
Raises:
spicerack.reposync.RepoSyncNoChangeError: if no changes are detected.
spicerack.reposync.RepoSyncError: If no hexsha was returned for the commit.
"""
# Don't use working_repo.index.add(".") as it adds the .git folder
# https://github.com/gitpython-developers/GitPython/issues/292
working_repo.git.add(A=True)
if working_repo.head.is_valid() and not working_repo.index.diff(working_repo.head.commit):
raise RepoSyncNoChangeError("Nothing to commit")
commit = working_repo.index.commit(message, author=self._author, committer=self._author)
if not isinstance(commit.hexsha, str):
raise RepoSyncError("No valid commit hexsha from commit")
logger.info("Committed changes: %s", commit.hexsha)
self._hexsha = commit.hexsha
def _push(self, working_repo: Optional[Repo] = None) -> None:
"""Push the committed changes to the repository's remote.
Arguments:
working_repo: the repository with the commit to push.
Raises:
spicerack.reposync.RepoSyncPushError: if there was an error pushing.
"""
if working_repo is None:
working_repo = self._repo
if self._dry_run:
logger.info("Would have pushed commit")
return
old_ssh_auth_sock = os.getenv("SSH_AUTH_SOCK")
os.environ["SSH_AUTH_SOCK"] = KEYHOLDER_SOCK
try: # pylint: disable=too-many-nested-blocks
for remote in working_repo.remotes:
logger.debug("Attempt push to: %s", remote)
try:
# TODO: later versions of git python have raise_if_error
push_info_list = remote.push()
for push_info in push_info_list:
msg = f"bitflags {push_info.flags}: {push_info.summary.strip()}"
for flag in [
PushInfo.REJECTED,
PushInfo.REMOTE_REJECTED,
PushInfo.REMOTE_FAILURE,
PushInfo.ERROR,
]:
if push_info.flags & flag:
raise RepoSyncPushError(f"Error pushing to {remote}: {msg}")
# remote.push returns an empty list on error
except (StopIteration, GitError) as error:
raise RepoSyncPushError(f"Error pushing to {remote}: {error}") from error
logger.info("Pushed to %s", remote)
finally:
if old_ssh_auth_sock is None:
del os.environ["SSH_AUTH_SOCK"]
else:
os.environ["SSH_AUTH_SOCK"] = old_ssh_auth_sock
def _update_local(self, working_dir: Path, message: str) -> None:
"""Update the repo with data from fetch_data.
Arguments:
working_dir: The temporary directory used to build diffs.
message: the commit message.
"""
repo_dir = working_dir / "repo"
data_dir = working_dir / self._data_subdir
working_repo = self._repo.clone(repo_dir)
# Delete all existing files to ensure removal of stale data
working_repo.git.rm("./", r=True, ignore_unmatch=True)
copytree(data_dir, repo_dir, dirs_exist_ok=True, symlinks=True)
self._commit(working_repo, message)
print(working_repo.git.show(["--color=always", "HEAD"]))
if not self._dry_run:
ask_confirmation(f"Ok to push changes to {self._repo.common_dir}")
self._push(working_repo)
[docs]
def force_sync(self) -> None:
"""Force a sync of the repo on the current host to all remotes."""
self._push()
[docs]
@contextmanager
def update(self, message: str) -> Generator:
"""Context manager for updating a temporary directory with new data.
The context manager will create and yield a temporary directory. Users should populate
this directory with fresh data to be committed to the main repository.
Arguments:
message: the commit message.
Yields:
pathlib.Path: temporary directory to populate with data intended for the git repo.
Raises:
spicerack.reposync.RepoSyncError: if unable to update local repo.
"""
with TemporaryDirectory() as tmp_dir:
working_dir = Path(tmp_dir)
data_dir = working_dir / self._data_subdir
data_dir.mkdir()
yield data_dir
try:
next(data_dir.iterdir())
except StopIteration:
raise RepoSyncError("No data written to data directory") from None
self._update_local(working_dir, message)
logger.debug("Push to remotes: %s", self._repo.remotes)
self._push()