Rsync backup.pl

From Nuclear Physics Group Documentation Pages
Revision as of 14:21, 25 June 2007 by Steve (talk | contribs)
Jump to navigationJump to search

Perl script for doing modern backups, called from a shell script run by cron every day.

#!/usr/bin/perl -w

use strict;
use warnings;

our $VERSION = "0.2";

# same functionality as unix df command
use Filesys::Df;
use Date::Manip;
use Unix::Netgroup 'get_netgroup';
use Net::Ping;
use Getopt::Long;

our $dry_run = 0;
GetOptions("dry-run" => \$dry_run);

our @backup_hosts = map resolve($_), @ARGV;
our $archive_disk = "/mnt/npg-daily-current";
our $reserve_disk = "/data";
our $disk_link_fmt = "npg-daily/%02d";
our $bump_threshold = 1.4;
our $incremental_growth = 0.10;
our $rsync_version = rsync_version();
our @rsync = (
    "rsync", "--rsh" => "ssh -T -x -i /root/.ssh/rsync_id_rsa -e none"
);
our @rsync_pull = (@rsync, qw( 
    --archive --sparse --hard-links
    --exclude=/var/log/lastlog
    --exclude=/var/spool/up2date
    --exclude=/var/cache/yum
));

our ($current_disk_no) = ((readlink $archive_disk || '') =~ m/(\d+)\/?$/);
our $today = UnixDate('today', '%Y-%m-%d');
our $ping = Net::Ping->new("icmp");

if( ! -l "$archive_disk" ) {
    die "archive disk '$archive_disk' does not exist or is not a symlink";
}

if( ! @backup_hosts ) {
    die "no backup hosts provided, aborting\n";
}

if( $rsync_version < 2.005_006 ) {
    warn "rsync 2.5.6 or later recommended for --link-dest support\n";
}

rotate_disk();
backup_all();

exit 0;


#
# Map the rsync version string to a float of the style of perl's traditional
# version numbers.   For example:
# 2.6.4 => 2.006_004
#

sub rsync_version
{
    my $ver = `rsync --version`;
    my ($major, $minor, $patchlevel) = 
        ($ver =~ m/ rsync \s+ version \s+ (\d+)\.(\d+)\.(\d+) \b /x)
            or die "unable to detect rsync version";
    return $major + ($minor/1000) + ($patchlevel/1000_000);
}


#
# resolve netgroup host group references
#

sub resolve
{
    my $name = shift;
    if( my ($netgroup) = ($name =~ /^@(.*)/) ) {
        return map $_->host,
            grep $_->host && $_->host ne '-', get_netgroup($netgroup);
    }
    else {
        return $name;
    }
}


#
# Return the mount point for the given disk number
#

sub disk_dir
{
    my $disk_no = shift;
    my $disk_dir = '';
    if( $disk_link_fmt !~ m#^/# ) {
        ($disk_dir = $archive_disk) =~ s#[^/]+$##;
    }
    $disk_dir .= sprintf $disk_link_fmt, $disk_no;
}


#
# Decide whether to store the archive on the current disk or the next
#

sub rotate_disk
{
    my $statfs = df("$archive_disk")
        or die "unable to statfs\n";
    my $trees_on_disk = @{[ glob "$archive_disk/????-??-??" ]};
    if( $trees_on_disk < 2 && $statfs->{bavail} ) {
        return;
    }
    my $est_dump_size =
        $statfs->{used} / (1/$incremental_growth + $trees_on_disk - 1);
    my $dumps_remaining = $statfs->{bavail} / $est_dump_size;
    if( $dumps_remaining > $bump_threshold ) {
        return;
    }

    my $next_disk_link = sprintf $disk_link_fmt, ($current_disk_no+1);
    my $next_disk_dir = disk_dir($current_disk_no+1);

    if( ! -d $next_disk_dir ) {
        warn "unable to change to next archive disk '$next_disk_dir'\n";
        if (!$statfs->{bavail}) {
            # There's no point in dumping to this disk, switch to emergency
            # backup
            $archive_disk = $reserve_disk;
            $current_disk_no++;
            warn "switching to reserve storage '$reserve_disk'";
        }
        return;
    }

    print "switching to disk $next_disk_dir\n";

    $current_disk_no++;

    # move to the next disk
    unlink $archive_disk
        or die "unable to remove link to old archive disk";
    symlink $next_disk_link, $archive_disk;
}


sub backup_all
{
    unless (-d "$archive_disk/$today") {
        mkdir "$archive_disk/$today", 0700
            or die "unable to create $archive_disk/$today: $!";
    }

    print "$archive_disk/$today\n";

    for my $host (@backup_hosts) {
        if ($ping->ping($host)) {
            print "\n-----------------------------------\nbacking up $host\n ";
            backup_host($host);
        }
        else {
            warn "\n-----------------------------------\n$host unreachable, skipped\n";
        }
    }
}

# ########################################################################
#
# Backup the given host, sharing unchanged files with the last complete 
# archive via hardlinks.  Considering incomplete archives would only be
# feasible if rsync supported multiple --link-dest targets.
#
# ########################################################################

sub backup_host
{
    my $host = shift;
    my $last_disk = disk_dir( $current_disk_no - 1 );

    if( ! open MOD_LIST, "-|", @rsync, "${host}::" ) {
        warn "unable to list modules on $host: $!";
        return;
    }
    my @mod_list = <MOD_LIST>;
    close MOD_LIST;

    if( $dry_run ) {
        print "\t", join(" ", map m/(\S+)/, @mod_list), "\n";
        return;
    }

    unless( -d "$archive_disk/$today/$host" ) {
        mkdir "$archive_disk/$today/$host", 0700
            or die "unable to create $archive_disk/$today/$host: $!";
    }

    foreach( @mod_list ) {

        my ($module, $description) = m/(\w+)\s+(.*)/;
        my $host_mod = $host;
        if( $module ne "." ) {
            print "$module ";
            $host_mod .= "/$module";
        }
        my $new_archive = "$archive_disk/$today/$host_mod";
        my $sync_dir = $new_archive;
        if( ! -d $new_archive ) {
            $sync_dir .= ".incomplete";
        }

        my %archives =
            map { (m#/(\d{4}-\d{2}-\d{2})/\w+(?:/[\w\.]+)?$#) => $_ }
                glob "{$archive_disk,$last_disk}/????-??-??/$host_mod"
                    . "{,.incomplete}";
        my ($last_run) = my @run = reverse sort keys %archives;
        my ($last_complete_run) = grep $archives{$_} !~ m/incomplete$/, @run;
        if( $last_run && $archives{$last_run} eq $sync_dir ) {
            undef $last_run;
            undef $last_complete_run;
        }
        my @link_dest;
        if( $last_complete_run ) {
            if( $last_run ne $last_complete_run ) {
                # 2.6.4 allows multiple --link-dest options
                if( $rsync_version >= 2.006_004 ) {
                    push @link_dest, "--link-dest=$archives{$last_run}";
                }
                elsif( $rsync_version >= 2.005_006 ) {
                    system("cp -al $archives{$last_run} $sync_dir") == 0
                        or warn "pre-run link failed";
                }
            }
            if( $rsync_version >= 2.005_006 ) {
                push @link_dest, "--link-dest=$archives{$last_complete_run}";
            }
            else {
                system("cp -al $archives{$last_complete_run} $sync_dir") == 0
                    or warn "pre-run link failed";
            }
        }

        system(@rsync_pull, @link_dest, "${host}::$module/", $sync_dir);
        if( $? == 0 ) {
            rename $sync_dir, $new_archive unless -d $new_archive;
        }
        else {
            warn "backup of ${host}::$module failed";
        }
    }
    # will fail if non-empty
    rmdir "$archive_disk/$today/$host";
    print "\n";
}

Cron shell script

#!/bin/sh

# Send a report email when backups are completed

REPORT_TO="Daily Backups <backups@physics.unh.edu>"
(
    /usr/local/bin/rsync_backup.pl @servers @clients @laptops

    df -h $( readlink --canonicalize /mnt/npg-daily-current )
    (cd /mnt/npg-daily-current && du --max-depth=1 )

    # This is temporary, at best.  Send a detailed listing of the ntbackup
    # files, reverse sorted by time (oldest at the top!)
    nt_archives=$( find /mnt/npg-daily-current/ntbackup/ -iname "*.bkf" )
    if [ -n "$nt_archives" ]; then
        ls -roth $nt_archives
    fi

) &> /tmp/rsync_backup.log
mail -s "npg-daily" -c "" "$REPORT_TO" < /tmp/rsync_backup.log && rm /tmp/rsync_backup.log