3 # Usage: replicate <hostname> <zfs filesystem>
7 remote_lockdir="/tmp/zfs-admin-lock"
10 # Set the name of the local pool used to store the backup of the remote
13 # Set the email address to send notification to
14 mailto=root@pippins.net
17 # When this variable is set, local filesystems will be destroyed
18 # before receiving a full streams into them from the remote source.
19 destroy_local_filesystem_on_full_replicate=0
21 # The ssh connection doesn't find zfs without this.
24 # Setup our cleanup and exit trap
26 if [[ -e "$local_list" ]]; then
29 if [[ -e "$remote_list" ]]; then
32 if [[ -n "$remote" ]]; then
33 ssh $remote ls -d "$remote_lockdir" > /dev/null 2>&1
34 if [[ $? == 0 ]]; then
35 ssh $remote rm -rf "$remote_lockdir"
41 if [[ -n "$2" ]]; then
42 echo -e "$1" | $mailx -s "zfs replicate on $hostname failed" "$2"
46 trap fatal_and_exit INT
49 # Make sure we have valid arguments
50 if [[ -z "$remote" ]] || [[ -z "$remote_fs" ]]; then
51 fatal_and_exit "Usage: $0 <hostname> <zfs filesystem>"
54 # Make sure the local pool and local receiving filesystem exist, or print some errors
55 zpool list -H "$local_pool" >/dev/null 2>&1
57 fatal_and_exit "-E- The local pool, '$local_pool' doesn't seem to exist." $mailto
59 zfs list "$local_pool/$remote_pool" >/dev/null 2>&1
61 echo >&2 "-I- The local filesystem for the remote pool, '$local_pool/$remote_pool' doesn't seem to exist."
62 echo >&2 " Creating the local filesystem to receive the remote pool into: $local_pool/$remote_pool"
63 $zfs create $local_pool/$remote_pool
65 fatal_and_exit "-E- remote $zfs create command failed" $mailto
69 # Obtain the zpool guid for the local pool
70 local_pool_guid=`zpool get guid $local_pool 2>&1 | grep $local_pool | awk '{ print $3 }'`
71 zpool get guid $local_pool > /dev/null 2>&1
73 fatal_and_exit "-E- Unable to extract the guid for the local pool: $local_pool" $mailto
76 # Turn on shell verbosity
79 # Create the remote lockdir before continuing with the replicate
80 # Spinlock on creating the lock
85 ssh $remote mkdir "$remote_lockdir" >/dev/null 2>&1
87 # Another zfs admin tool is running.
88 # Wait a random amount of time and try again
89 ransleep=$(($RANDOM % $maxsleeptime))
91 ((attempts=attempts+1))
93 # No other zfs admin tool is running, we can now.
96 if [[ $attempts -gt $maxattempts ]]; then
97 # We've exceeded our maximum while loop count
98 echo "-E- The zfs filesystem has been locked down. Skipping replicate operation."
99 fail_msg=`ssh $remote ls -ld $remote_lockdir 2>&1`
100 fatal_and_exit "zfs-replicate-all unable to obtain zfs admin lock:\n$fail_msg" $mailto
104 # Setup our backup marker names
105 current_backup_marker=${remote_fs}@current-backup-${local_pool_guid}
106 previous_backup_marker=${remote_fs}@previous-backup-${local_pool_guid}
108 # List the snapshots on the remote machine.
109 remote_list=$(mktemp /tmp/replicate.XXXXXX)
111 $zfs list -H -t snapshot |
112 grep ^${remote_fs}@ |
113 awk '{print$1}' > $remote_list
115 fatal_and_exit "-E- remote $zfs list command failed" $mailto
118 # List the snapshots on the local machine.
119 # Don't list the current backup marker if it exists on the local side.
120 # If you do, it can mess up the common finding algorithm below.
121 local_list=$(mktemp /tmp/replicate.XXXXXX)
122 $zfs list -H -t snapshot |
123 grep ^${local_pool}/${remote_fs}@ |
124 grep -v ^${local_pool}/${current_backup_marker} |
125 awk "{gsub(/^$local_pool./,\"\",\$1); print\$1}" > $local_list
127 fatal_and_exit "-E- local $zfs list command failed" $mailto
130 # Destroy the current backup marker snapshot on the remote system if it exists
131 grep -q ${current_backup_marker} $remote_list
133 ssh $remote $zfs destroy ${current_backup_marker}
135 fatal_and_exit "-E- remote $zfs destroy command failed" $mailto
139 # Create the current backup marker snapshot on the remote system
140 ssh $remote $zfs snapshot ${current_backup_marker}
142 fatal_and_exit "-E- remote $zfs snapshot command failed" $mailto
145 # Check to see if the previous backup marker exists in the remote snapshot list.
146 # Check to see if the previous backup marker exists in the local snapshot list.
147 # If the previous backup markers exists, perform an incremental replicate. Else:
148 # 1) check to see if a common snapshot exists, and perform an incremental replicate.
149 # 2) if no common snapshot exists, destroy the local filesystem, and perform a full replicate.
150 grep -q ${previous_backup_marker} $remote_list
152 grep -q ${previous_backup_marker} $local_list
153 no_markers=$(($no_markers || $?))
155 if [ $no_markers == 0 ]; then
156 # We found backup markers, incrementally send the new snaps
158 # First, rollback the local pool to the previous backup marker in case the previous
159 # backup was interrupted for some reason. If we don't do this, the zfs send -R command
160 # below may complain about snaps already existing as it tries to resend from the
161 # previous backup marker again from a previously interrupted replicate.
162 $zfs rollback -r ${local_pool}/${previous_backup_marker}
164 fatal_and_exit "-E- remote incremental $zfs rollback command failed" $mailto
166 # Now it should be safe to send the snaps
167 ssh $remote $zfs send -R -I${previous_backup_marker} ${current_backup_marker} |
168 $zfs receive -vF -d ${local_pool}/${remote_pool}
170 fatal_and_exit "-E- remote incremental $zfs send command failed" $mailto
173 # We didn't find any backup markers, next check to see if we have a common snapshot.
175 # See what the most recent snapshot on the remote end is.
176 latest=$(tail -n 1 $remote_list)
178 # I did this to make sure that diff would always display the most recent common
179 # Since we're keying off the context of the diff, we need to ensure we will get context
180 # by injecting a known difference in case no others exist in the lists.
181 echo bogus.remote >> $remote_list
182 echo bogus.local >> $local_list
183 common=$(diff -u $remote_list $local_list | grep '^ ' | tail -n 1)
185 if [[ -n "$common" ]]; then
186 # We found a common snapshot, incrementally send the new snaps
187 ssh $remote $zfs send -R -I${common/*@/@} ${current_backup_marker} |
188 $zfs receive -vF -d ${local_pool}/${remote_pool}
190 fatal_and_exit "-E- remote incremental $zfs send command failed" $mailto
193 # We did not find any markers or a common snapshot
194 # At this point, we'll have to send the entire filesystem
195 # Destroy the local filesystem if it exists before receving the full replicate
196 zfs list ${local_pool}/${remote_fs} > /dev/null 2>&1
198 if [[ $destroy_local_filesystem_on_full_replicate == 1 ]]; then
199 $zfs destroy -r ${local_pool}/${remote_fs}
201 fatal_and_exit "-E- remote full $zfs destroy command failed" $mailto
204 echo "-W- We need to destroy a local filesystem before receiving a full stream."
205 echo " However, since the option is set to prevent this, skipping replicate operation."
206 fatal_and_exit "unable to destroy local filesystem:\n$zfs destroy -r ${local_pool}/${remote_fs} not able to run" $mailto
209 # Send the full filesystem
210 ssh $remote $zfs send -R ${current_backup_marker} |
211 $zfs receive -vF -d ${local_pool}/${remote_pool}
213 fatal_and_exit "-E- remote full $zfs send command failed" $mailto
218 # destroy the previous backup markers now that we've replicated past them
219 # don't check the return codes here because these may not exist, and that is ok
220 $zfs destroy ${local_pool}/${previous_backup_marker} > /dev/null 2>&1
221 ssh $remote $zfs destroy ${previous_backup_marker} > /dev/null 2>&1
223 # Rename the current backup marker to be the previous backup marker
224 $zfs rename ${local_pool}/${current_backup_marker} ${local_pool}/${previous_backup_marker}
226 fatal_and_exit "-E- local $zfs rename command failed" $mailto
228 ssh $remote $zfs rename ${current_backup_marker} ${previous_backup_marker}
230 fatal_and_exit "-E- remote $zfs rename command failed" $mailto