1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
Small library for common tasks on Wikimedia Toolforge
Copyright (C) 2022 Kunal Mehta <legoktm@debian.org>

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */
use clap::Parser;
use std::{io, process::ExitStatus, time::Duration};
use tokio::{process::Command, signal, time};

/// Argument struct. The struct is derived and built by `clap`.
#[derive(Parser)]
#[command(version, about = "Opens SSH tunnels for database replicas", long_about = None)]
struct Arguments {
    // optional `hostname` positional argument.
    hostname: Option<String>,
}

/// Return error for `tunnel`. There are two types of possible errors.
enum TunnelError {
    SpawnError(io::Error),
    ChildError(io::Error),
}

/// s8 is currently the highest numbered slice. s11 is just labtestwiki,
/// which we don't really care about. There's no s9 nor s10 currently.
const HIGHEST_SLICE: usize = 8;

async fn tunnel(
    hostname: &str,
    port: usize,
) -> Result<ExitStatus, TunnelError> {
    println!("opening tunnel to {hostname} on port {port}");
    let suffix = if hostname != "toolsdb" {
        ".analytics"
    } else {
        ""
    };
    let mut child = Command::new("ssh")
        .args([
            "-N",
            "dev.toolforge.org",
            "-L",
            &format!("{port}:{hostname}{suffix}.db.svc.wikimedia.cloud:3306"),
        ])
        .kill_on_drop(true)
        .spawn()
        .map_err(TunnelError::SpawnError)?;
    child.wait().await.map_err(TunnelError::ChildError)
}

async fn tunnel_with_retry(hostname: &str, port: usize) {
    let mut backoff = 1;
    loop {
        let result = tunnel(hostname, port).await;
        match result {
            Ok(status) => {
                eprintln!("{hostname} exited unexpectedly: {status}");
                // reset backoff
                backoff = 1;
            }
            Err(e) => {
                match e {
                    TunnelError::SpawnError(e) => {
                        eprintln!("{hostname} cannot spawn: {e}")
                    }
                    TunnelError::ChildError(e) => {
                        eprintln!("{hostname} subprocess failed: {e}")
                    }
                };
            }
        }
        // sleep
        println!("{hostname} will retry after {backoff} sec");
        let duration = Duration::from_secs(backoff);
        backoff = core::cmp::min(backoff * 2, 900); // would not sleep more than 15 min.
        time::sleep(duration).await;
    }
}

#[tokio::main]
async fn main() {
    let args = Arguments::parse();
    println!("Opening SSH tunnels for database replicas");
    let mut threads = vec![];
    if let Some(hostname) = args.hostname.as_ref() {
        let hostname = hostname.to_owned();
        threads.push(tokio::spawn(async move {
            tunnel_with_retry(&hostname, 3306).await;
        }));
    } else {
        for slice in 1..=HIGHEST_SLICE {
            threads.push(tokio::spawn(async move {
                tunnel_with_retry(&format!("s{slice}"), 3306 + slice).await;
            }));
        }
    }
    tokio::select!(
        _ = futures::future::join_all(threads) => {},
        result = signal::ctrl_c() => {
            result.expect("cannot listen for ctrl-c signal");    // if we cannot listen for ctrl-c, the os is terribly in trouble.
            println!("ctrl-c received, abort all");
            // leave task cleanup to runtime drop
        }
    )
    // the tokio runtime drops here, any remaining spawned task will be dropped here too.
}