Difference between revisions of "Rsync backup.pl"

From Nuclear Physics Group Documentation Pages
Jump to navigationJump to search
Line 269: Line 269:
  
 
REPORT_TO="Daily Backups <backups@physics.unh.edu>"
 
REPORT_TO="Daily Backups <backups@physics.unh.edu>"
 +
 +
# Any errors output from the following commands will go into /tmp/rsync_backup.log
 
(
 
(
 
     /usr/local/bin/rsync_backup.pl @servers @clients @laptops
 
     /usr/local/bin/rsync_backup.pl @servers @clients @laptops
Line 283: Line 285:
  
 
) &> /tmp/rsync_backup.log
 
) &> /tmp/rsync_backup.log
 +
 +
# Mail log, and delete log only if mail was successful
 
mail -s "npg-daily" -c "" "$REPORT_TO" < /tmp/rsync_backup.log && rm /tmp/rsync_backup.log
 
mail -s "npg-daily" -c "" "$REPORT_TO" < /tmp/rsync_backup.log && rm /tmp/rsync_backup.log
 
</pre>
 
</pre>

Revision as of 15:11, 3 July 2007

Perl script for doing modern backups, called from a shell script run by cron every day.

It depends on the modules Parse::RecDescent and Inline, in addition to those listed in the script itself.

Before running the script for the very first time, there needs to be a symbolic link /mnt/npg-daily-current pointing to /mnt/npg-daily/n , where n is the number of the current backup drive. There is also a different configuration for automount, used for mounting the appropriate backup drive when needed.

Make sure that all hosts which are to be backed up have the current ssh keys for the machine doing the backup, or there will be no backup.

#!/usr/bin/perl -w

use strict;
use warnings;

our $VERSION = "0.2";

# same functionality as unix df command
use Filesys::Df;
use Date::Manip;
use Unix::Netgroup 'get_netgroup';
use Net::Ping;
use Getopt::Long;

our $dry_run = 0;
GetOptions("dry-run" => \$dry_run);

our @backup_hosts = map resolve($_), @ARGV;
our $archive_disk = "/mnt/npg-daily-current";
our $reserve_disk = "/data";
our $disk_link_fmt = "npg-daily/%02d";
our $bump_threshold = 1.4;
our $incremental_growth = 0.10;
our $rsync_version = rsync_version();
our @rsync = (
    "rsync", "--rsh" => "ssh -T -x -i /root/.ssh/rsync_id_rsa -e none"
);
our @rsync_pull = (@rsync, qw( 
    --archive --sparse --hard-links
    --exclude=/var/log/lastlog
    --exclude=/var/spool/up2date
    --exclude=/var/cache/yum
));

our ($current_disk_no) = ((readlink $archive_disk || '') =~ m/(\d+)\/?$/);
our $today = UnixDate('today', '%Y-%m-%d');
our $ping = Net::Ping->new("icmp");

if( ! -l "$archive_disk" ) {
    die "archive disk '$archive_disk' does not exist or is not a symlink";
}

if( ! @backup_hosts ) {
    die "no backup hosts provided, aborting\n";
}

if( $rsync_version < 2.005_006 ) {
    warn "rsync 2.5.6 or later recommended for --link-dest support\n";
}

rotate_disk();
backup_all();

exit 0;


#
# Map the rsync version string to a float of the style of perl's traditional
# version numbers.   For example:
# 2.6.4 => 2.006_004
#

sub rsync_version
{
    my $ver = `rsync --version`;
    my ($major, $minor, $patchlevel) = 
        ($ver =~ m/ rsync \s+ version \s+ (\d+)\.(\d+)\.(\d+) \b /x)
            or die "unable to detect rsync version";
    return $major + ($minor/1000) + ($patchlevel/1000_000);
}


#
# resolve netgroup host group references
#

sub resolve
{
    my $name = shift;
    if( my ($netgroup) = ($name =~ /^@(.*)/) ) {
        return map $_->host,
            grep $_->host && $_->host ne '-', get_netgroup($netgroup);
    }
    else {
        return $name;
    }
}


#
# Return the mount point for the given disk number
#

sub disk_dir
{
    my $disk_no = shift;
    my $disk_dir = '';
    if( $disk_link_fmt !~ m#^/# ) {
        ($disk_dir = $archive_disk) =~ s#[^/]+$##;
    }
    $disk_dir .= sprintf $disk_link_fmt, $disk_no;
}


#
# Decide whether to store the archive on the current disk or the next
#

sub rotate_disk
{
    my $statfs = df("$archive_disk")
        or die "unable to statfs\n";
    my $trees_on_disk = @{[ glob "$archive_disk/????-??-??" ]};
    if( $trees_on_disk < 2 && $statfs->{bavail} ) {
        return;
    }
    my $est_dump_size =
        $statfs->{used} / (1/$incremental_growth + $trees_on_disk - 1);
    my $dumps_remaining = $statfs->{bavail} / $est_dump_size;
    if( $dumps_remaining > $bump_threshold ) {
        return;
    }

    my $next_disk_link = sprintf $disk_link_fmt, ($current_disk_no+1);
    my $next_disk_dir = disk_dir($current_disk_no+1);

    if( ! -d $next_disk_dir ) {
        warn "unable to change to next archive disk '$next_disk_dir'\n";
        if (!$statfs->{bavail}) {
            # There's no point in dumping to this disk, switch to emergency
            # backup
            $archive_disk = $reserve_disk;
            $current_disk_no++;
            warn "switching to reserve storage '$reserve_disk'";
        }
        return;
    }

    print "switching to disk $next_disk_dir\n";

    $current_disk_no++;

    # move to the next disk
    unlink $archive_disk
        or die "unable to remove link to old archive disk";
    symlink $next_disk_link, $archive_disk;
}


sub backup_all
{
    unless (-d "$archive_disk/$today") {
        mkdir "$archive_disk/$today", 0700
            or die "unable to create $archive_disk/$today: $!";
    }

    print "$archive_disk/$today\n";

    for my $host (@backup_hosts) {
        if ($ping->ping($host)) {
            print "\n-----------------------------------\nbacking up $host\n ";
            backup_host($host);
        }
        else {
            warn "\n-----------------------------------\n$host unreachable, skipped\n";
        }
    }
}

# ########################################################################
#
# Backup the given host, sharing unchanged files with the last complete 
# archive via hardlinks.  Considering incomplete archives would only be
# feasible if rsync supported multiple --link-dest targets.
#
# ########################################################################

sub backup_host
{
    my $host = shift;
    my $last_disk = disk_dir( $current_disk_no - 1 );

    if( ! open MOD_LIST, "-|", @rsync, "${host}::" ) {
        warn "unable to list modules on $host: $!";
        return;
    }
    my @mod_list = <MOD_LIST>;
    close MOD_LIST;

    if( $dry_run ) {
        print "\t", join(" ", map m/(\S+)/, @mod_list), "\n";
        return;
    }

    unless( -d "$archive_disk/$today/$host" ) {
        mkdir "$archive_disk/$today/$host", 0700
            or die "unable to create $archive_disk/$today/$host: $!";
    }

    foreach( @mod_list ) {

        my ($module, $description) = m/(\w+)\s+(.*)/;
        my $host_mod = $host;
        if( $module ne "." ) {
            print "$module ";
            $host_mod .= "/$module";
        }
        my $new_archive = "$archive_disk/$today/$host_mod";
        my $sync_dir = $new_archive;
        if( ! -d $new_archive ) {
            $sync_dir .= ".incomplete";
        }

        my %archives =
            map { (m#/(\d{4}-\d{2}-\d{2})/\w+(?:/[\w\.]+)?$#) => $_ }
                glob "{$archive_disk,$last_disk}/????-??-??/$host_mod"
                    . "{,.incomplete}";
        my ($last_run) = my @run = reverse sort keys %archives;
        my ($last_complete_run) = grep $archives{$_} !~ m/incomplete$/, @run;
        if( $last_run && $archives{$last_run} eq $sync_dir ) {
            undef $last_run;
            undef $last_complete_run;
        }
        my @link_dest;
        if( $last_complete_run ) {
            if( $last_run ne $last_complete_run ) {
                # 2.6.4 allows multiple --link-dest options
                if( $rsync_version >= 2.006_004 ) {
                    push @link_dest, "--link-dest=$archives{$last_run}";
                }
                elsif( $rsync_version >= 2.005_006 ) {
                    system("cp -al $archives{$last_run} $sync_dir") == 0
                        or warn "pre-run link failed";
                }
            }
            if( $rsync_version >= 2.005_006 ) {
                push @link_dest, "--link-dest=$archives{$last_complete_run}";
            }
            else {
                system("cp -al $archives{$last_complete_run} $sync_dir") == 0
                    or warn "pre-run link failed";
            }
        }

        system(@rsync_pull, @link_dest, "${host}::$module/", $sync_dir);
        if( $? == 0 ) {
            rename $sync_dir, $new_archive unless -d $new_archive;
        }
        else {
            warn "backup of ${host}::$module failed";
        }
    }
    # will fail if non-empty
    rmdir "$archive_disk/$today/$host";
    print "\n";
}

Cron shell script

#!/bin/sh

# Send a report email when backups are completed

REPORT_TO="Daily Backups <backups@physics.unh.edu>"

# Any errors output from the following commands will go into /tmp/rsync_backup.log
(
    /usr/local/bin/rsync_backup.pl @servers @clients @laptops

    df -h $( readlink --canonicalize /mnt/npg-daily-current )
    (cd /mnt/npg-daily-current && du --max-depth=1 )

    # This is temporary, at best.  Send a detailed listing of the ntbackup
    # files, reverse sorted by time (oldest at the top!)
    nt_archives=$( find /mnt/npg-daily-current/ntbackup/ -iname "*.bkf" )
    if [ -n "$nt_archives" ]; then
        ls -roth $nt_archives
    fi

) &> /tmp/rsync_backup.log

# Mail log, and delete log only if mail was successful
mail -s "npg-daily" -c "" "$REPORT_TO" < /tmp/rsync_backup.log && rm /tmp/rsync_backup.log