#!/usr/bin/perl
### ckpacklist.perl  -*- Perl -*-
## Check list of installed packages against user-specified categories.

### Ivan Shmakov, 2018, 2020

## To the extent possible under law, the author(s) have dedicated
## all copyright and related and neighboring rights to this software
## to the public domain worldwide.  This software is distributed
## without any warranty.

## You should have received a copy of the CC0 Public Domain Dedication
## along with this software.  If not, see
## <http://creativecommons.org/publicdomain/zero/1.0/>.

### History:

## 0.3  2020-07-15 16:15Z
##      Fixed: ignore trailing whitespace in package list
##      (i. e. install, recommend, suggest) lines.
##      ($example): Fixed: use recommend (was: recommends.)

## 0.2  2020-07-08 21:17:23Z
##      (sfn.FzmFOgpQGai856pxcMOyGUfudU0-1HUBdOtIqTTg_S0.perl)
##      New --[no-]debug, --[no-]empty and -u, --unknown[=NAME] options.
##      Fixed: use safer (but newer) <<>> (was: <>.)  Use Getopt::Long.

## 0.1  2018-07-04 16:10:17Z
##      (sfn.LxRflngGh5jBP-af4VVV-MbnLA3XDy7IW5YlPFopHJY.perl)
##      Initial revision.

### Code:

use common::sense;
use English qw (-no_match_vars);

# require Data::Dump;
require Getopt::Long;

our ($usage, $example) = (qq {Usage:
    \$ ${0} [--[no-]debug] [--[no-]empty] [-u|--unknown[=NAME]]
          [-v|--verbose] [--] CATEGORIES.LIST [PACKAGES.LIST]...
    \$ ${0} -h|--help
}, qq {Example:

bash\$ ${0} example.com-cf/categories.list \\
          <(apt-mark showmanual) | sort -n | less -F 
-4	11	network	dns-root-data idn netbase sic
-10	74	base	apg ca-cacert file libencode-locale-perl
 libtasn1-bin libterm-readline-gnu-perl lynx lzop minilzip pdlzip
bash\$ 

CATEGORIES.LIST should be formatted as per the following example.

;;; cats.list
[base]
recommend = apg lynx pdlzip
[network]
recommend = dns-root-data socat
;;; cats.list ends here

Note that the apt-mark(1) command can also be executed via SSH
(like: ssh -- REMOTE apt-mark), or for a system installed under
a specific directory (such as a chroot environment), like:

    (d=/other/rootdir ; apt-mark -o Dir="\$d" \\
         -o Dir::State::status="\$d"/var/lib/dpkg/status)

});

Getopt::Long::Configure (qw (gnu_getopt));

our ($debug_p, $empty_p, $verbose_p)
    = (0, 1, 0);
our ($unknown)
    = ();

my %cmdline_options =  (q (debug!)      => \$debug_p,
                        q (empty!)      => \$empty_p,
                        q (v|verbose!)  => \$verbose_p,
                        q (u|unknown:s) => \$unknown,
                        q (h|help)
                        => sub { print ($usage, "\n", $example); exit (0); });

my $parsable_p
    = Getopt::Long::GetOptions (%cmdline_options)
    or die ("Cannot parse command line arguments");

if (@ARGV < 1) {
    print STDERR ($usage);
    ## .
    exit (1);
}

warn ("D: --verbose in effect") if ($debug_p && $verbose_p);
warn ("D: --no-empty in effect") if ($debug_p && ! $empty_p);

our @ucats   = (! defined ($unknown) ? ()
                : "" eq $unknown ? ("(unknown)")
                : ($unknown));
warn ("D: --unknown=", $ucats[0], " in effect")
    if ($debug_p && @ucats);

my %cat;

my $cf
    = shift (@ARGV);
open (my $ch, "<", $cf)
    or die ($cf, ": Cannot open categories file: ", $!);

## Read the categories.
my $cc;
while (<$ch>) {
    ## .
    next
        if (m { ^ \s* (?: $ | ;) }x);

    ## Parse [CATEGORY] headers.
    if (my ($newc) = m { ^ \s* \[ (\S*) \] $ }x) {
        $cc = $newc;
        ## .
        next;
    }

    ## .
    my ($k, $v) = m {
        ^ \s* (install | recommend | suggest)
        \s* = \s* (.* \S)? \s* $
    }x  or next;
    $cat{$cc} = { }
        unless (exists ($cat{$cc}));
    @{$cat{$cc}}{split (/\s+/, $v)}
        = ();
}

## Map packages back to categories.
my %pkg;
foreach my $c (keys (%cat)) {
    foreach my $p (keys (%{$cat{$c}})) {
        $pkg{$p} = [ ]
            unless (exists ($pkg{$p}));
        push (@{$pkg{$p}}, $c);
    }
}

## Compute the lists of missing packages.
my %miss;
while (<<>>) {
    ## .
    next
        if (m { ^ \s* (?: $ | ;) }x);

    foreach my $p (split ()) {
        unless (exists ($pkg{$p})) {
            if (@ucats > 0) {
                my ($c)
                    = @ucats;
                warn ("D: ", $p, ": Unknown package; put under ", $c)
                    if ($debug_p);
                @{$cat{$c}}{($p)}
                    = ();
            }
            ## .
            next;
        }

        my $pcats
            = $pkg{$p};
        foreach my $c (@$pcats) {
            @{$miss{$c}}{keys (%{$cat{$c}})} = ()
                unless (exists ($miss{$c}));
            delete ($miss{$c}->{$p});
        }
    }
}

## Output the results.
foreach my $c (keys (%cat)) {
    my @all
        = keys (%{$cat{$c}});
    my ($n, @pa)
        = ((@ucats > 0 && $c eq $ucats[0])
           ? (0,
              $verbose_p ? map { $_ . "+"; } (@all) : @all)
           : ! exists ($miss{$c})
           ? (scalar (@all),
              $verbose_p ? map { $_ . "-"; } (@all) : @all)
           : (scalar (keys (%{$miss{$c}})),
              $verbose_p
              ? map { exists ($miss{$c}->{$_}) ? $_ . "-" : $_ } (@all)
              : keys (%{$miss{$c}})));
    ## .
    next
        unless (@pa > 0);
    next
        unless ($empty_p || $n != @all);
    print (- $n, "\t", scalar (@all), "\t", $c,
           "\t", join (" ", sort { $a cmp $b } (@pa)), "\n");
}

### Emacs trailer
## Local variables:
## coding: us-ascii
## indent-tabs-mode: nil
## ispell-local-dictionary: "american"
## End:
### ckpacklist.perl ends here
