#!/bin/sh
### thinbackup.sh  -*- Sh -*-
## Create incremental LVM thinly-provisioned backups.

### Ivan Shmakov, 2017, 2019, 2020

## To the extent possible under law, the author(s) have dedicated
## all copyright and related and neighboring rights to this software
## to the public domain worldwide.  This software is distributed
## without any warranty.

## You should have received a copy of the CC0 Public Domain Dedication
## along with this software.  If not, see
## <http://creativecommons.org/publicdomain/zero/1.0/>.

### History:

## 0.7  2020-10-24 18:17Z
##      Fixed: also clear prv when a handling a thin pool argument,
##      so that the size mismatch is not erroneously detected.

## 0.6  2019-06-14 17:55:10Z
##      (sfn.W7O3NtH5684t17D1RO7HyAtoCULthZVprrGTPDxQnDk.sh)
##      Fixed an informational message (pool name was missing) and
##      checking for already available backups when handling of thin
##      pools as origins.

## 0.5  2019-06-13 19:10:48Z
##      (sfn.Ti3GmlR3gDMjsj0BXi2dwbcJBa_dvblFIvq1Iv8L2Qg.sh)
##      Allow for a thin pool to be used in place of an origin.
##      (usage): Updated.
##      (layout_pool, check_prv): New helper functions.

## 0.4  2019-06-13 15:15:30Z
##      (sfn.0qX25g87FqqFkxNHm2OsFUXEbyuS5ZlR-8CNnT2humg.sh)
##      Fixed: reset prv_shots to 0 as appropriate; report the filename
##      and sizes correctly when mismatch is detected.  New --force-size
##      option and new --force-time alias to --force.

## 0.3  2017-11-19 18:15:19Z
##      (sfn.XJDQnQjpei24tbSi2DLtj2uKp5sCZV4aRvYMpi54Ji4.sh)
##      New --thin-copy option to allow copying thin LVs between thin
##      pools.  (A thin LV will still be deemed a new origin if belongs
##      to the same pool as the current origin.)  Discern thin LVs by
##      non-empty pool_lv (was: thin in lv_layout), also allowing for
##      thin volumes with newer -xTTTTTTTT prefixes to reside on the
##      same VG (but different pools) without requiring --force.

## 0.2  2017-11-11 19:30:10Z
##      (sfn.MunJx3qJyTZf5IZmpOSWDL6AULRaBoI9tKtuWYxcBEw1.sh)
##      Fixed the detection of non-LV arguments.  Obtain the relevant
##      physical extent size from lvs(8).  Only check thin LVs for being
##      newer than origin; look for the prefix at the beginning of LV
##      name, not anywhere inside it.  Fixed -xTTTTTTTT suffix handling
##      in the file or LV name to copy from.  Support --help (-h).
##      Fixed: fail when test(1) fails.  Initialize prv_shots to 0.
##      Actually use ${new_thin_suffix}.
##      (nth, qre): New helper functions.

## 0.1  2017-10-29 18:05:04Z
##      (sfn.dgWlfTWr4P2VQbC3jeiM7TL1X8JZf3OFL-CuKtG9l8k.sh)
##      Initial revision.

### Code:

set -e

progname=${0##*/}

## FIXME: hardcoded
new_thin_suffix=.thin

phye_z=-INVALID-

usage () {
    if [ "$1" != 0 ] ; then
        exec >&2
    fi
    cat <<EOF
Usage:
  # sh ${progname} [--force-size] [--force-time] [--thin-copy] \\
        [--] THIN-1 FILES-1... \\
        [THIN-2 FILES-2...]...

Example:
  # sh ${progname} -- \\
        /dev/vgi/lvpool-z5d0275 \\
        /dev/disk/by-id/usb-Flash_802184f0eda647209c3514a3-0:0 \\
        /dev/vgi/lvfoo-x59f49f06.thin \\
        /dev/vgi/lvfoo-x59f4a2a5 \\
        /dev/vgi/lvfoo-x59f4b4e0 \\
        /dev/disk/by-uuid/85c68500-515b-4f2b-ace6-449682c5b242
EOF

    ## .
    exit "$1"
}

## GNU error function look-alike (for possible future i18n)
gerr () {
    gerr_exit=${1}
    shift
    printf "$@" >&2

    ## .
    test "$gerr_exit" = 0 \
        || exit "$gerr_exit"
}

nth () {
    shift "$1"
    ## .
    test "$#" -ge 2 \
         && printf %s\\n "$2"
}

qre () {
    ## \-escape all potential RE pattern characters in $1.
    ## (See also: Perl quotemeta () function.)
    ## .
    printf %s\\n "$1" \
        | sed -e "s,[^0-9A-Z_a-z],\\\\&,g"
}

layout_pool () {
    case "$1" in
        (pool | *,pool | pool,* | *,pool,*) ;;
        (*) return 1 ;;
    esac
}

check_prv () {
    prv_pfx1=${prv_lv%.*}
    prv_pfx=${prv_pfx1%-x????????}-x

    ## FIXME: we assume that pool_lv does not contain apostrophes
    if  [ "$prv_pfx" = "$prv" ] \
            || ! prv_new1=$(lvs --noheading -o lv_name -O -lv_name \
                                --unbuffered \
                                --select="pool_lv = '${prv_pool}'
 && lv_name =~ '^$(qre "$prv_pfx")'" \
                                -- "$prv_vg") \
            || ! prv_newr=$(nth 0 ${prv_new1}) \
            || [ "$prv_newr" = "$prv_lv" ] ; then
        : do nothing
    elif [ -n "$force_time_p" ] ; then
        gerr 0 "Warning: %s (%s): Potentially older than %s;\
 proceeding anyway (--force-time in effect)\\n" \
             "$f" "${prv:-"${prv_vg}/${prv_pool}"}" "$prv_newr"
    else
        ## .
        gerr 1 "Error: %s (%s): Potentially older than %s;\
 use --force-time?\\n" \
             "$f" "${prv:-"${prv_vg}/${prv_pool}"}" "$prv_newr"
    fi
}

## FIXME: no proper command-line arguments parsing

if ! [ "$#" -ge 1 ] ; then
    ## .
    usage 0
fi

prv=
prv_shots=0
copy_thin_p=
force_size_p=
force_time_p=

while [ "$#" -ge 1 ] ; do
    case "$1" in
        (-h | --help)   usage 0 ;; 
        (--force-size)  force_size_p=yes ;;
        (--force | --force-time)
            force_time_p=yes ;;
        (--thin-copy)   copy_thin_p=yes ;;
        (--)    shift ; break ;;
        (-*)    usage 1 ;;
        (*)     break ;;
    esac
    shift
done

if ! [ "$#" -ge 1 ] ; then
    ## .
    usage 1
fi

while [ "$#" -ge 1 ] ; do
    f=${1}
    shift

    ## Check if the argument designates a thin pool or thin volume
    name=
    if  ! lv_info=$(lvs --noheading \
                        -o lv_full_name,vg_extent_size,lv_layout,pool_lv \
                        --unbuffered --unit=b --nosuffix -- "$f") ; then
        gerr 0 "D: %s: Not recognized as a logical volume\\n" "$f"
        gerr 0 "D: %s: Try it with file(1)\\n" "$f"
        file -- "$f"
    elif  ! name=$(nth 0 ${lv_info}) \
              || ! layout=$(nth 2 ${lv_info}) \
              || ! pool_lv=$(nth 3 ${lv_info} \
                             || layout_pool "$layout") ; then
        : do nothing
    elif  [ -n "$copy_thin_p" ] && [ -n "$pool_lv" ] && [ -n "$prv_pool" ] \
              && [ "$pool_lv" != "$prv_pool" ] ; then
        gerr 0 \
  "Warning: %s (%s, %s): Found a thin volume, but --thin-copy is in effect\\n" \
             "$f" "$name" "$pool_lv"
    else
        if layout_pool "$layout" ; then
            pool_lv=${name##*/}
            gerr 0 "N: %s (%s): Found a thin pool to use\\n" \
                "$f" "$name"
        else
            gerr 0 "N: %s (%s): Found a thin volume to use as an origin\\n" \
                "$f" "$name"
        fi

        if ! phye_z=$(nth 1 ${lv_info}) ; then
            ## .
            gerr 1 "Error: %s (%s): No physical extent size known?\\n" \
                 "$f" "$name"
        fi

        if [ -z "$prv" ] ; then
            : do nothing
        elif ! [ "$prv_shots" -ge 1 ] ; then
            ## .
            gerr 1 "Error: %s: No snapshots were made for the previous origin\\n" \
                 "${prv:-"${prv_vg}/${prv_pool}"}"
        else
            gerr 0 "I: %s: %d snapshots made\\n" \
                 "${prv:-"${prv_vg}/${prv_pool}"}" "$prv_shots"
            prv_shots=0
        fi

        ## Check for possible newer origins
        prv_pool=${pool_lv}
        prv_vg=${name%/*}
        if [ "$pool_lv" = "${name##*/}" ] ; then
            prv=
            prv_lv=
            prv_pfx=
            continue
        fi
        prv_lv=${name##*/}
        prv=${f}
        check_prv

        ## NB: the handling of the origin thin volume given ends here
        continue
    fi

    if ! [ -n "${prv}${prv_pool}" ] ; then
        ## .
        gerr 1 "Error: %s: No origin given for incremental backup\\n" \
             "$f"
    fi

    prv_z=$(test -z "$prv" || blockdev --getsize64  "$prv")
    z=$(blockdev --getsize64  "$f" || wc -c < "$f")

    newz=
    if [ -z "$prv" ] \
           || [ $((1 + (-1 + z) / phye_z)) = $((prv_z / phye_z)) ] ; then
        : do nothing
    elif [ -n "$force_size_p" ] ; then
        ## .
        gerr 0 "Warning: %s: Size does not match that of origin (%s != %s);\
 proceeding anyway (--force-size in effect)\\n" \
             "$f" "$z" "$prv_z"
        newz=${z}
    else
        ## .
        gerr 1 "Error: %s: Size does not match that of origin (%s != %s);\
 use --force-size?\\n" \
             "$f" "$z" "$prv_z"
    fi

    if [ -b "$f" ] && ro=$(blockdev --getro  "$f") && [ "$ro" != 1 ] ; then
        gerr 0 "Warning: %s: Source block device is currently writable\\n" \
             "$f"
    fi

    ## Use LVM logical volume name, if any, or just basename otherwise
    b=
    case "$name" in
        (?*)    b=${name##*/} ;;
        (*)     b=${f##*/} ;;
    esac

    new=
    case "$b" in
        (*-x???????? | *-x????????.*)
            ## Preserve the timestamp if given (and also basename)
            new=/dev/${prv_vg}/${b%.thin*}${new_thin_suffix}.new ;;
        (*)
            if [ -n "$prv" ] ; then
                ## NB: not using "$b" here, so f=/dev/sdX does not go into "$new"
                new=${prv%-x????????*}-x$(printf %x\\n "$(date +%s)")\
${new_thin_suffix}.new
            else
                new=/dev/${prv_vg}/${b}-x$(printf %x\\n "$(date +%s)")\
${new_thin_suffix}.new
            fi
            ;;
    esac

    if [ -z "$prv_lv" ] ; then
        prv_lv=${b}
        check_prv
    fi

    ## NB: Alternatively, use --setactivationskip=n
    gerr 0 "I: %s: Creating a snapshot using %s as origin\\n" \
         "$new" "${prv:-"${prv_vg}/${prv_pool}"}"
    if [ -n "$prv" ] ; then
        lvcreate --snapshot -n "${new##*/}" -- "$prv"
    else
        lvcreate --thinpool="$prv_pool" -V "$z"B -n "${new##*/}" -- "$prv_vg"
    fi
    if [ -n "$newz" ] ; then
        gerr 0 "I: %s: Extending thin volume to %d bytes\
 (--force-size in effect)\\n" \
            "$new" "$newz"
        lvextend -L "$newz"B -- "$new"
    fi
    lvchange -Ka y -- "$new"

    ## Perform incremental copy
    gerr 0 "I: %s: Copying data incrementally from %s\\n" \
         "$new" "$f"
    perl -e'use common::sense;
            require IO::File;
            my ($in, $out)
                = (($ARGV[0] eq "-" ? \*STDIN : IO::File->new ($ARGV[0], "r")),
                   IO::File->new ($ARGV[1], "r+"));
            foreach my $h ($in, $out) {
                die ("Cannot open, seek or set binary mode: ", $!)
                    unless (defined ($h) && $h->seek (0, 0)
                            && $h->binmode ());
            }
            my ($count, $written) = (0, 0);
            ## FIXME: hardcoded; matches default --chunksize, see lvmthin(7)
            local $/ = \65536;
            while (defined (my $a = $in->getline ())
                   && defined (my $b = $out->getline ())) {
                ++$count;
                next
                    if ($a eq $b);
                my $back = -${$/};
                $out->seek ($back, 1)
                    or die ("Cannot seek back (by ", $back, "): ", $!);
                $out->write ($a)
                    or die ("Cannot write: ", $!);
                ++$written;
                if ($written % 1024 == 0) {
                    warn ("I: Currently ", $written, " blocks written (of ", $count, ")\n");
                    kill (19, 0) if (0);
                }
            }
            warn ("I: ", $written, " blocks written (of ", $count, ")\n");' \
        - "$new" < "$f"

    ## Finalize transaction
    lvchange -p r -- "$new"
    lvrename -- "$new" "${new%.new}"
    gerr 0 "N: %s: Done copying data from %s (as an update to %s)\\n" \
         "${new%.new}" "$f" "$prv"
    prv=${new%.new}
    prv_shots=$((1 + prv_shots))
done

### Emacs trailer
## Local variables:
## coding: us-ascii
## fill-column: 72
## indent-tabs-mode: nil
## ispell-local-dictionary: "american"
## End:
### thinbackup.sh ends here
