Difference between revisions of "Rsync backup.pl"
From Nuclear Physics Group Documentation Pages
Jump to navigationJump to search| Line 1: | Line 1: | ||
Perl script for doing modern backups, called from [[Rsync_backup.pl#Cron shell script|a shell script run by cron]] every day. | Perl script for doing modern backups, called from [[Rsync_backup.pl#Cron shell script|a shell script run by cron]] every day. | ||
| + | |||
| + | It depends on the modules ''Parse::RecDescent'' and ''Inline'', in addition to those listed in the script itself. | ||
<pre>#!/usr/bin/perl -w | <pre>#!/usr/bin/perl -w | ||
Revision as of 18:30, 29 June 2007
Perl script for doing modern backups, called from a shell script run by cron every day.
It depends on the modules Parse::RecDescent and Inline, in addition to those listed in the script itself.
#!/usr/bin/perl -w
use strict;
use warnings;
our $VERSION = "0.2";
# same functionality as unix df command
use Filesys::Df;
use Date::Manip;
use Unix::Netgroup 'get_netgroup';
use Net::Ping;
use Getopt::Long;
our $dry_run = 0;
GetOptions("dry-run" => \$dry_run);
our @backup_hosts = map resolve($_), @ARGV;
our $archive_disk = "/mnt/npg-daily-current";
our $reserve_disk = "/data";
our $disk_link_fmt = "npg-daily/%02d";
our $bump_threshold = 1.4;
our $incremental_growth = 0.10;
our $rsync_version = rsync_version();
our @rsync = (
"rsync", "--rsh" => "ssh -T -x -i /root/.ssh/rsync_id_rsa -e none"
);
our @rsync_pull = (@rsync, qw(
--archive --sparse --hard-links
--exclude=/var/log/lastlog
--exclude=/var/spool/up2date
--exclude=/var/cache/yum
));
our ($current_disk_no) = ((readlink $archive_disk || '') =~ m/(\d+)\/?$/);
our $today = UnixDate('today', '%Y-%m-%d');
our $ping = Net::Ping->new("icmp");
if( ! -l "$archive_disk" ) {
die "archive disk '$archive_disk' does not exist or is not a symlink";
}
if( ! @backup_hosts ) {
die "no backup hosts provided, aborting\n";
}
if( $rsync_version < 2.005_006 ) {
warn "rsync 2.5.6 or later recommended for --link-dest support\n";
}
rotate_disk();
backup_all();
exit 0;
#
# Map the rsync version string to a float of the style of perl's traditional
# version numbers. For example:
# 2.6.4 => 2.006_004
#
sub rsync_version
{
my $ver = `rsync --version`;
my ($major, $minor, $patchlevel) =
($ver =~ m/ rsync \s+ version \s+ (\d+)\.(\d+)\.(\d+) \b /x)
or die "unable to detect rsync version";
return $major + ($minor/1000) + ($patchlevel/1000_000);
}
#
# resolve netgroup host group references
#
sub resolve
{
my $name = shift;
if( my ($netgroup) = ($name =~ /^@(.*)/) ) {
return map $_->host,
grep $_->host && $_->host ne '-', get_netgroup($netgroup);
}
else {
return $name;
}
}
#
# Return the mount point for the given disk number
#
sub disk_dir
{
my $disk_no = shift;
my $disk_dir = '';
if( $disk_link_fmt !~ m#^/# ) {
($disk_dir = $archive_disk) =~ s#[^/]+$##;
}
$disk_dir .= sprintf $disk_link_fmt, $disk_no;
}
#
# Decide whether to store the archive on the current disk or the next
#
sub rotate_disk
{
my $statfs = df("$archive_disk")
or die "unable to statfs\n";
my $trees_on_disk = @{[ glob "$archive_disk/????-??-??" ]};
if( $trees_on_disk < 2 && $statfs->{bavail} ) {
return;
}
my $est_dump_size =
$statfs->{used} / (1/$incremental_growth + $trees_on_disk - 1);
my $dumps_remaining = $statfs->{bavail} / $est_dump_size;
if( $dumps_remaining > $bump_threshold ) {
return;
}
my $next_disk_link = sprintf $disk_link_fmt, ($current_disk_no+1);
my $next_disk_dir = disk_dir($current_disk_no+1);
if( ! -d $next_disk_dir ) {
warn "unable to change to next archive disk '$next_disk_dir'\n";
if (!$statfs->{bavail}) {
# There's no point in dumping to this disk, switch to emergency
# backup
$archive_disk = $reserve_disk;
$current_disk_no++;
warn "switching to reserve storage '$reserve_disk'";
}
return;
}
print "switching to disk $next_disk_dir\n";
$current_disk_no++;
# move to the next disk
unlink $archive_disk
or die "unable to remove link to old archive disk";
symlink $next_disk_link, $archive_disk;
}
sub backup_all
{
unless (-d "$archive_disk/$today") {
mkdir "$archive_disk/$today", 0700
or die "unable to create $archive_disk/$today: $!";
}
print "$archive_disk/$today\n";
for my $host (@backup_hosts) {
if ($ping->ping($host)) {
print "\n-----------------------------------\nbacking up $host\n ";
backup_host($host);
}
else {
warn "\n-----------------------------------\n$host unreachable, skipped\n";
}
}
}
# ########################################################################
#
# Backup the given host, sharing unchanged files with the last complete
# archive via hardlinks. Considering incomplete archives would only be
# feasible if rsync supported multiple --link-dest targets.
#
# ########################################################################
sub backup_host
{
my $host = shift;
my $last_disk = disk_dir( $current_disk_no - 1 );
if( ! open MOD_LIST, "-|", @rsync, "${host}::" ) {
warn "unable to list modules on $host: $!";
return;
}
my @mod_list = <MOD_LIST>;
close MOD_LIST;
if( $dry_run ) {
print "\t", join(" ", map m/(\S+)/, @mod_list), "\n";
return;
}
unless( -d "$archive_disk/$today/$host" ) {
mkdir "$archive_disk/$today/$host", 0700
or die "unable to create $archive_disk/$today/$host: $!";
}
foreach( @mod_list ) {
my ($module, $description) = m/(\w+)\s+(.*)/;
my $host_mod = $host;
if( $module ne "." ) {
print "$module ";
$host_mod .= "/$module";
}
my $new_archive = "$archive_disk/$today/$host_mod";
my $sync_dir = $new_archive;
if( ! -d $new_archive ) {
$sync_dir .= ".incomplete";
}
my %archives =
map { (m#/(\d{4}-\d{2}-\d{2})/\w+(?:/[\w\.]+)?$#) => $_ }
glob "{$archive_disk,$last_disk}/????-??-??/$host_mod"
. "{,.incomplete}";
my ($last_run) = my @run = reverse sort keys %archives;
my ($last_complete_run) = grep $archives{$_} !~ m/incomplete$/, @run;
if( $last_run && $archives{$last_run} eq $sync_dir ) {
undef $last_run;
undef $last_complete_run;
}
my @link_dest;
if( $last_complete_run ) {
if( $last_run ne $last_complete_run ) {
# 2.6.4 allows multiple --link-dest options
if( $rsync_version >= 2.006_004 ) {
push @link_dest, "--link-dest=$archives{$last_run}";
}
elsif( $rsync_version >= 2.005_006 ) {
system("cp -al $archives{$last_run} $sync_dir") == 0
or warn "pre-run link failed";
}
}
if( $rsync_version >= 2.005_006 ) {
push @link_dest, "--link-dest=$archives{$last_complete_run}";
}
else {
system("cp -al $archives{$last_complete_run} $sync_dir") == 0
or warn "pre-run link failed";
}
}
system(@rsync_pull, @link_dest, "${host}::$module/", $sync_dir);
if( $? == 0 ) {
rename $sync_dir, $new_archive unless -d $new_archive;
}
else {
warn "backup of ${host}::$module failed";
}
}
# will fail if non-empty
rmdir "$archive_disk/$today/$host";
print "\n";
}
Cron shell script
#!/bin/sh
# Send a report email when backups are completed
REPORT_TO="Daily Backups <backups@physics.unh.edu>"
(
/usr/local/bin/rsync_backup.pl @servers @clients @laptops
df -h $( readlink --canonicalize /mnt/npg-daily-current )
(cd /mnt/npg-daily-current && du --max-depth=1 )
# This is temporary, at best. Send a detailed listing of the ntbackup
# files, reverse sorted by time (oldest at the top!)
nt_archives=$( find /mnt/npg-daily-current/ntbackup/ -iname "*.bkf" )
if [ -n "$nt_archives" ]; then
ls -roth $nt_archives
fi
) &> /tmp/rsync_backup.log
mail -s "npg-daily" -c "" "$REPORT_TO" < /tmp/rsync_backup.log && rm /tmp/rsync_backup.log