#!/usr/bin/perl -w

#
# apt-file - APT package searching utility -- command-line interface
#
# (c) 2001 Sebastien J. Gross <seb@debian.org>
#
# This package is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; version 2 dated June, 1991.
#
# This package is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this package; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA.

use strict;
use warnings;
use Getopt::Long qw/:config gnu_getopt no_ignore_case/;
use File::Basename;
use File::Path;
use File::Temp;
use AptPkg::Config '$_config';
use List::MoreUtils qw/any uniq/;

my $Conf;

sub error($) {
    print STDERR "E: ", shift, $! ? ": $!" : "", "\n";
    undef $!;
    exit 1;
}

sub errorx($) {
    print STDERR "E: ", shift, "\n";
    exit 1;
}

sub debug($;$) {
    return if !defined $Conf->{verbose};
    my ( $msg, $use_errstr ) = @_;
    print STDERR "D: ", $msg;
    print STDERR $! ? ": $!" : "" if $use_errstr;
    print STDERR "\n";
    undef $!;
}

sub debug_line($) {
    return if !defined $Conf->{verbose};
    print STDERR shift;
}

sub reverse_hash($) {
    my $hash = shift;
    my $ret;
    foreach my $key ( keys %$hash ) {
        foreach ( @{ $hash->{$key} } ) {
            push @{ $ret->{$_} }, $key;
        }
    }
    return $ret;
}

sub fetch_files ($) {
    exec('apt', 'update');
}

sub print_winners ($$) {
    my ( $db, $matchfname ) = @_;
    my $filtered_db;

    if ($matchfname) {
        # Everything is a match
        $filtered_db = $db;
    } else {
        # $db is a hash from package name to array of file names.  It
        # is a superset of the matching cases, so first we filter this
        # by the real pattern.
        $filtered_db = {};
        foreach my $key ( keys %$db ) {
            if ( $key =~ /$Conf->{pattern}/ ) {
                $filtered_db->{$key} = $db->{$key};
            }
        }
    }

    if (not $filtered_db or not %{$filtered_db}) {
        debug('No matches');
        exit 1;
    }

    # Now print the winners
    if ( !defined $Conf->{package_only} ) {
        foreach my $key ( sort keys %$filtered_db ) {
            foreach ( uniq sort @{ $filtered_db->{$key} } ) {
                print "$key: $_\n";
            }
        }
    }
    else {
        print map {"$_\n"} ( sort keys %$filtered_db );
    }
    exit 0;
}

sub do_grep($$) {
    my ( $data, $pattern ) = @_;
    my ( $pkgs, $fname, %seen, @cmd, $ret);
    debug "regexp: ${pattern}";
    $| = 1;
    if ($Conf->{is_regexp}) {
        @cmd = ('zcat', '--');
    }
    else {
        delete $ENV{$_} foreach qw{GREP_OPTIONS GREP_COLOR POSIXLY_CORRECT
                                   GREP_COLORS};
        @cmd = ('zfgrep');
        push(@cmd, '-i') if $Conf->{ignore_case};
        if ($Conf->{from_file}) {
            push(@cmd, '-f', $Conf->{zgrep_tmpfile});
        }
        else {
            my $zgrep_pattern = $Conf->{grep_pattern} // '.';
            $zgrep_pattern =~ s{^/}{};
            push(@cmd, '--', $zgrep_pattern);
            @cmd = ('zcat', '--') if $zgrep_pattern eq '.';
        }
    }
    my $regexp = eval { $Conf->{ignore_case} ? qr/${pattern}/i : qr/${pattern}/ };
    error($@) if $@;
    my $quick_regexp = escape_parens($regexp);
    foreach my $file (@{$data}) {
        # Skip already searched files:
        next if $seen{$file}++;
        my $fd;
        if ($cmd[0] eq 'zcat' and $file !~ m/\.gz$/) {
            debug "Opening $file directly";
            open($fd, '<', $file) or
              error("open $file failed");
        } else {
            debug "Search in $file using @cmd";
            open($fd, '-|', @cmd, $file) or
              error("Cannot run \"@cmd\" on $file");
        }
        while (<$fd>) {

            # faster, non-capturing search first
            next if !/$quick_regexp/o;

            next if !( ( $fname, $pkgs ) = /$regexp/o );

            # skip header lines
            # we can safely assume that the name of the top level directory
            # does not contain spaces
            next if !m{^[^\s/]*/};

            debug_line ".";
            foreach ( split /,/, $pkgs ) {

                # Put leading slash on file name
                push @{ $ret->{"/$fname"} }, basename $_;
            }
        }
        close($fd);
        debug_line "\n";
    }
    return reverse_hash($ret);
}

sub escape_parens {
    my $pattern = shift;

    # turn any capturing ( ... ) into non capturing (?: ... )
    $pattern =~ s{ (?<! \\ )    # not preceded by a \ 
                        \(      # (
                   (?!  \? )    # not followed by a ?
                 }{(?:}gx;
    return $pattern;
}

sub fix_regexp {
    my $pattern = shift;
    
    # If pattern starts with /, we need to match both ^pattern-without-slash
    # (which is put in $pattern) and ^.*pattern (put in $pattern2).
    # Later, they will be or'ed together.
    my $pattern2;
    if ( $pattern !~ s{(\$|\\[zZ])$}{} ) {
        # Not anchored at the end:
        $pattern = $pattern . '\S*';
    }

    if ( $pattern =~ s{^(\^|\\A)/?}{} ) {
        # If pattern is anchored at the start, we're just not prefixing it
        # with .* after having removed ^ and /
    }
    else {
        if ( $pattern =~ m{^/} ) {

            # same logic as below, but the "/" is not escaped here
            $pattern2 = '.*?' . $pattern;
            $pattern  = substr( $pattern, 1 );
        }
        else {
            $pattern = '.*?' . $pattern;
        }
    }

    $pattern  = escape_parens($pattern);
    $pattern2 = escape_parens($pattern2) if defined $pattern2;

    return ($pattern, $pattern2);
}

sub grep_file($) {
    my $data    = shift;
    my $pattern = $Conf->{pattern};

    # If pattern starts with /, we need to match both ^pattern-without-slash
    # (which is put in $pattern) and ^.*pattern (put in $pattern2).
    # Later, they will be or'ed together.
    my $pattern2;

    if ( $Conf->{is_regexp} ) {
        if (!$Conf->{from_file}) {
            ($pattern, $pattern2) = fix_regexp($pattern);
        }
    }
    elsif ( substr( $pattern, 0, 2 ) eq '\/' ) {
        if ( $Conf->{fixed_strings} ) {

            # remove leading /
            $pattern = substr( $pattern, 2 );
        }
        else {

            # If pattern starts with /, match both ^pattern-without-slash
            # and ^.*pattern.
            $pattern2 = '.*?' . $pattern;
            $pattern  = substr( $pattern, 2 );
        }
    }
    else {
        $pattern = '.*?' . $pattern unless $Conf->{fixed_strings};
    }

    if ( ! defined $Conf->{fixed_strings} && ! defined $Conf->{is_regexp} ) {
        $pattern  .= '[^\s]*';
        $pattern2 .= '[^\s]*' if defined $pattern2;
    }

    $pattern = "$pattern|$pattern2" if defined $pattern2;
    $pattern = '^(' . $pattern . ')\s+(\S+)\s*$';

    my $ret = do_grep $data, $pattern;
    print_winners $ret, 1;
}

sub grep_package($) {
    my $data = shift;

    my $pkgpat = $Conf->{pattern};
    if ( $Conf->{is_regexp} ) {
        if ( $pkgpat !~ s{^(\^|\\A)}{} ) {
            $pkgpat = '\S*' . $pkgpat;
        }

        if ( $pkgpat !~ s{(\$|\\[zZ])$}{} ) {
            $pkgpat = $pkgpat . '\S*';
        }
        $pkgpat = escape_parens($pkgpat);
    }
    elsif ($Conf->{fixed_strings}) {
        $pkgpat = $Conf->{pattern};
    }
    else {
        $pkgpat = '\S*' . $Conf->{pattern};
    }

    # File name may contain spaces, so match template is
    # ($fname, $pkgs) = (line =~ '^\s*(.*?)\s+(\S+)\s*$')
    my $pattern = join "",
        (
        '^\s*(.*?)\s+', '(\S*/', $pkgpat,
        defined $Conf->{fixed_strings} || defined $Conf->{regexp} ?
            '(?:,\S*|)' : '\S*', ')\s*$',
        );
    my $ret = do_grep $data, $pattern;
    print_winners $ret, 0;
}

sub print_help {
    my $err_code = shift || 0;

    print <<"EOF";

apt-file [options] action [pattern]
apt-file [options] -f action <file>
apt-file [options] -D action <debfile>

Configuration options:
    --architecture     -a  <arch>       Use specific architecture [L]
    --fixed-string     -F               Do not expand pattern
    --from-deb         -D               Use file list of .deb package(s) as
                                        patterns; implies -F
    --from-file        -f               Read patterns from file(s), one per line
                                        (use '-' for stdin)
    --ignore-case      -i               Ignore case distinctions
    --index-names          <names>      Only search indices listed in <names> [L]
    --package-only     -l               Only display packages name
    --substring-match                   pattern is a substring (no glob/regex)
    --regexp           -x               pattern is a regular expression
    --verbose          -v               run in verbose mode
    --help             -h               Show this help.
                       --               End of options (neccessary if pattern
                                        starts with a '-')

[L]: Takes a comma-separated list of values.

Action:
    update                              Fetch Contents files from apt-sources.
    search|find        <pattern>        Search files in packages
    list|show          <pattern>        List files in packages
EOF
    exit $err_code;
}

sub get_options() {
    my %options = (
        "architecture|a=s"  => \$Conf->{arch},
        "index-names=s"     => \$Conf->{index_names},
        "verbose|v"         => \$Conf->{verbose},
        "ignore-case|i"     => \$Conf->{ignore_case},
        "regexp|x"          => \$Conf->{is_regexp},
        "substring-match"   => \$Conf->{substring_match},
        "package-only|l"    => \$Conf->{package_only},
        "fixed-string|F"    => \$Conf->{fixed_strings},
        "from-file|f"       => \$Conf->{from_file},
        "from-deb|D"        => \$Conf->{from_deb},
        "help|h"            => \$Conf->{help},
    );
    Getopt::Long::Configure("bundling");
    GetOptions(%options) || print_help(1);
}

sub main {
    my ($action, $is_search, @arch_res, @index_names);
    get_options();
    $_config->init;
    @arch_res = split(m/\s*,\s*/,
                      $Conf->{arch} || $_config->{'APT::Architecture'});
    @index_names = split(m/\s*,\s*/, $Conf->{index_names} || 'deb');

    $action = shift(@ARGV) // 'none';

    if ($Conf->{fixed_strings} and $Conf->{is_regexp}) {
        errorx('Cannot use -F together with -x');
    }
    if ($Conf->{fixed_strings} and $Conf->{substring_match}) {
        errorx('Cannot use -F together with --substring-match');
    }
    if ($Conf->{is_regexp} and $Conf->{substring_match}) {
        errorx('Cannot use -x together with --substring-match');
    }

    if ($action =~ m/^(?:search|find|list|show)$/) {
        $is_search = 1;
        if (not defined($Conf->{substring_match}) and
            not $Conf->{fixed_strings} and not $Conf->{is_regexp}) {
            debug('substring match (default)');
            $Conf->{fixed_strings} = 1
              if $action eq 'list' or $action eq 'show';
        }
    }

    if ($Conf->{from_file} || $Conf->{from_deb}) {
        use Regexp::Assemble;
        my $ra = Regexp::Assemble->new;
        my @list;
        if ($Conf->{from_deb}) {
            $Conf->{from_file} = 1;
            $Conf->{fixed_strings} = 1;
            $Conf->{is_regexp} = 0;

            debug("this is a .deb file, calling dpkg-deb to get contents");
            my @content;
            foreach my $deb (@ARGV) {
                push @content, qx{dpkg-deb -c \Q$deb};
                if ($? != 0) {
                    errorx("Couldn't get contents from $deb");
                }
            }
            foreach my $line (@content) {
                next if $line =~ m{/$};  # skip dirs
                my @fields = split(/\s+/, $line);
                my $filename = $fields[5];
                $filename =~ s{^\.}{};
                push @list, "$filename\n";
            }
        }
        else {
            # normal text files
            # - assume "STDIN" if no arguments are given.
            push @ARGV, '-' unless @ARGV;
            foreach my $file (@ARGV) {
                if ($file eq '-') {
                    push @list, <STDIN>;
                    next;
                }
                open(my $fh, '<', $file)
                    or error("Can't open $file");
                push @list, <$fh>;
                close($fh);
            }
        }
        if ($Conf->{is_regexp}) {
            foreach my $line (@list) {
                chomp $line;
                my ($p1, $p2) = fix_regexp($line);
                $ra->add($p1);
                $ra->add($p2) if defined $p2;
            }
        }
        else {
            # create tmpfile for zgrep with patterns that have leading slash removed
            my @zgrep_list = @list;
            map { s{^/}{} } @zgrep_list;
            my $tmpfile = File::Temp->new();
            print $tmpfile @zgrep_list;
            $tmpfile->flush();
            $Conf->{zgrep_tmpfile} = $tmpfile;

            # create actual search pattern
            @list = map {quotemeta} @list;
            $ra->add(@list);
        }
        $Conf->{pattern} = $ra->as_string(indent => 0);
    }
    else {
        $Conf->{pattern} = shift @ARGV;
        if (defined($Conf->{pattern}) and not $Conf->{is_regexp}) {
            my $pattern = $Conf->{pattern};
            $Conf->{grep_pattern} = $pattern;
            $pattern = quotemeta($pattern);
            $Conf->{pattern} = $pattern;
        }
    }
    undef $!;

    my $actions = {
        update => \&fetch_files,
        search => \&grep_file,
        find   => \&grep_file,
        list   => \&grep_package,
        show   => \&grep_package,
    };

    $Conf->{help} = 2
        if $is_search && !defined $Conf->{pattern};
    $Conf->{help} = 2
        if !defined $actions->{$action}
            && !defined $Conf->{help};
    print_help( $Conf->{help} - 1 ) if defined $Conf->{help};

    my $action_handler = $actions->{$action};
    if ($is_search) {
        my @contents = contents_file_paths(\@index_names, \@arch_res);
        $action_handler->(\@contents);
    } else {
        $action_handler->();
    }

}

sub contents_file_paths {
    my ($index_names, $arch_res) = @_;
    my (@paths, $any_entries);
    my @cmd = (qw(apt-get indextargets --format),
               '$(CREATED_BY) $(ARCHITECTURE) $(FILENAME)');
    debug("Running @cmd");
    open(my $fd, '-|', @cmd)
      or errorx('Cannot execute apt-get indextargets');
    while (my $line = <$fd>) {
        my ($index_name, $arch, $filename);
        chomp($line);
        next if $line !~ s/^Contents-//;
        $any_entries = 1;
        ($index_name, $arch, $filename) = split(' ', $line, 3);
        if ($arch ne '$(ARCHITECTURE)') {
            next if not any { $_ eq $arch } @{$arch_res};
        }
        next if not any { $_ eq $index_name } @{$index_names};
        push(@paths, $filename);
    }
    if (not @paths) {
        if (not $any_entries) {
            errorx('The cache is empty. You need to run "apt update" first.');
        }
        errorx('No contents with the given restrictions');
    }

    close($fd);
    return @paths;
}

main();

__END__

# our style is roughly "perltidy -pbp"
# vim:sts=4:sw=4:expandtab
