Rsync backup.pl
From Nuclear Physics Group Documentation Pages
Jump to navigationJump to searchPerl script for doing modern backups, called from a shell script run by cron every day.
It depends on the modules Parse::RecDescent and Inline, in addition to those listed in the script itself.
Before running the script for the very first time, there needs to be a symbolic link /mnt/npg-daily-current pointing to /mnt/npg-daily/n , where n is the number of the current backup drive. There is also a different configuration for automount, used for mounting the appropriate backup drive when needed.
#!/usr/bin/perl -w use strict; use warnings; our $VERSION = "0.2"; # same functionality as unix df command use Filesys::Df; use Date::Manip; use Unix::Netgroup 'get_netgroup'; use Net::Ping; use Getopt::Long; our $dry_run = 0; GetOptions("dry-run" => \$dry_run); our @backup_hosts = map resolve($_), @ARGV; our $archive_disk = "/mnt/npg-daily-current"; our $reserve_disk = "/data"; our $disk_link_fmt = "npg-daily/%02d"; our $bump_threshold = 1.4; our $incremental_growth = 0.10; our $rsync_version = rsync_version(); our @rsync = ( "rsync", "--rsh" => "ssh -T -x -i /root/.ssh/rsync_id_rsa -e none" ); our @rsync_pull = (@rsync, qw( --archive --sparse --hard-links --exclude=/var/log/lastlog --exclude=/var/spool/up2date --exclude=/var/cache/yum )); our ($current_disk_no) = ((readlink $archive_disk || '') =~ m/(\d+)\/?$/); our $today = UnixDate('today', '%Y-%m-%d'); our $ping = Net::Ping->new("icmp"); if( ! -l "$archive_disk" ) { die "archive disk '$archive_disk' does not exist or is not a symlink"; } if( ! @backup_hosts ) { die "no backup hosts provided, aborting\n"; } if( $rsync_version < 2.005_006 ) { warn "rsync 2.5.6 or later recommended for --link-dest support\n"; } rotate_disk(); backup_all(); exit 0; # # Map the rsync version string to a float of the style of perl's traditional # version numbers. For example: # 2.6.4 => 2.006_004 # sub rsync_version { my $ver = `rsync --version`; my ($major, $minor, $patchlevel) = ($ver =~ m/ rsync \s+ version \s+ (\d+)\.(\d+)\.(\d+) \b /x) or die "unable to detect rsync version"; return $major + ($minor/1000) + ($patchlevel/1000_000); } # # resolve netgroup host group references # sub resolve { my $name = shift; if( my ($netgroup) = ($name =~ /^@(.*)/) ) { return map $_->host, grep $_->host && $_->host ne '-', get_netgroup($netgroup); } else { return $name; } } # # Return the mount point for the given disk number # sub disk_dir { my $disk_no = shift; my $disk_dir = ''; if( $disk_link_fmt !~ m#^/# ) { ($disk_dir = $archive_disk) =~ s#[^/]+$##; } $disk_dir .= sprintf $disk_link_fmt, $disk_no; } # # Decide whether to store the archive on the current disk or the next # sub rotate_disk { my $statfs = df("$archive_disk") or die "unable to statfs\n"; my $trees_on_disk = @{[ glob "$archive_disk/????-??-??" ]}; if( $trees_on_disk < 2 && $statfs->{bavail} ) { return; } my $est_dump_size = $statfs->{used} / (1/$incremental_growth + $trees_on_disk - 1); my $dumps_remaining = $statfs->{bavail} / $est_dump_size; if( $dumps_remaining > $bump_threshold ) { return; } my $next_disk_link = sprintf $disk_link_fmt, ($current_disk_no+1); my $next_disk_dir = disk_dir($current_disk_no+1); if( ! -d $next_disk_dir ) { warn "unable to change to next archive disk '$next_disk_dir'\n"; if (!$statfs->{bavail}) { # There's no point in dumping to this disk, switch to emergency # backup $archive_disk = $reserve_disk; $current_disk_no++; warn "switching to reserve storage '$reserve_disk'"; } return; } print "switching to disk $next_disk_dir\n"; $current_disk_no++; # move to the next disk unlink $archive_disk or die "unable to remove link to old archive disk"; symlink $next_disk_link, $archive_disk; } sub backup_all { unless (-d "$archive_disk/$today") { mkdir "$archive_disk/$today", 0700 or die "unable to create $archive_disk/$today: $!"; } print "$archive_disk/$today\n"; for my $host (@backup_hosts) { if ($ping->ping($host)) { print "\n-----------------------------------\nbacking up $host\n "; backup_host($host); } else { warn "\n-----------------------------------\n$host unreachable, skipped\n"; } } } # ######################################################################## # # Backup the given host, sharing unchanged files with the last complete # archive via hardlinks. Considering incomplete archives would only be # feasible if rsync supported multiple --link-dest targets. # # ######################################################################## sub backup_host { my $host = shift; my $last_disk = disk_dir( $current_disk_no - 1 ); if( ! open MOD_LIST, "-|", @rsync, "${host}::" ) { warn "unable to list modules on $host: $!"; return; } my @mod_list = <MOD_LIST>; close MOD_LIST; if( $dry_run ) { print "\t", join(" ", map m/(\S+)/, @mod_list), "\n"; return; } unless( -d "$archive_disk/$today/$host" ) { mkdir "$archive_disk/$today/$host", 0700 or die "unable to create $archive_disk/$today/$host: $!"; } foreach( @mod_list ) { my ($module, $description) = m/(\w+)\s+(.*)/; my $host_mod = $host; if( $module ne "." ) { print "$module "; $host_mod .= "/$module"; } my $new_archive = "$archive_disk/$today/$host_mod"; my $sync_dir = $new_archive; if( ! -d $new_archive ) { $sync_dir .= ".incomplete"; } my %archives = map { (m#/(\d{4}-\d{2}-\d{2})/\w+(?:/[\w\.]+)?$#) => $_ } glob "{$archive_disk,$last_disk}/????-??-??/$host_mod" . "{,.incomplete}"; my ($last_run) = my @run = reverse sort keys %archives; my ($last_complete_run) = grep $archives{$_} !~ m/incomplete$/, @run; if( $last_run && $archives{$last_run} eq $sync_dir ) { undef $last_run; undef $last_complete_run; } my @link_dest; if( $last_complete_run ) { if( $last_run ne $last_complete_run ) { # 2.6.4 allows multiple --link-dest options if( $rsync_version >= 2.006_004 ) { push @link_dest, "--link-dest=$archives{$last_run}"; } elsif( $rsync_version >= 2.005_006 ) { system("cp -al $archives{$last_run} $sync_dir") == 0 or warn "pre-run link failed"; } } if( $rsync_version >= 2.005_006 ) { push @link_dest, "--link-dest=$archives{$last_complete_run}"; } else { system("cp -al $archives{$last_complete_run} $sync_dir") == 0 or warn "pre-run link failed"; } } system(@rsync_pull, @link_dest, "${host}::$module/", $sync_dir); if( $? == 0 ) { rename $sync_dir, $new_archive unless -d $new_archive; } else { warn "backup of ${host}::$module failed"; } } # will fail if non-empty rmdir "$archive_disk/$today/$host"; print "\n"; }
Cron shell script
#!/bin/sh # Send a report email when backups are completed REPORT_TO="Daily Backups <backups@physics.unh.edu>" ( /usr/local/bin/rsync_backup.pl @servers @clients @laptops df -h $( readlink --canonicalize /mnt/npg-daily-current ) (cd /mnt/npg-daily-current && du --max-depth=1 ) # This is temporary, at best. Send a detailed listing of the ntbackup # files, reverse sorted by time (oldest at the top!) nt_archives=$( find /mnt/npg-daily-current/ntbackup/ -iname "*.bkf" ) if [ -n "$nt_archives" ]; then ls -roth $nt_archives fi ) &> /tmp/rsync_backup.log mail -s "npg-daily" -c "" "$REPORT_TO" < /tmp/rsync_backup.log && rm /tmp/rsync_backup.log