#!/usr/bin/perl -w

# duck - the Debian Url Checker
# Copyright (C) 2014 Simon Kainz <simon@familiekainz.at>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# he Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# On Debian GNU/Linux systems, the complete text of the GNU General
# Public License can be found in `/usr/share/common-licenses/GPL-2'.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, you can find it on the World Wide
# Web at http://www.gnu.org/copyleft/gpl.html, or write to the Free
# Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston,
# MA 02110-1301, USA.

use strict;

use lib '/usr/share/duck/lib';

use DUCK;
use Getopt::Std;
use YAML::XS qw(Load);
use Parse::Debian::Packages;
use Parse::DebControl qw(parse_file);

sub proc($;$;$;$;$);
sub guess_type($);

our $VERSION="0.4";
our $copyright_year="2014";

$Getopt::Std::STANDARD_HELP_VERSION=1;

my $cf_parser = new Parse::DebControl;

#my $debug=0;
my $exitcode=0;

my @yaml_urls;

my @extract=("Homepage","Repository","Repository-Browse","Screenshots","Bug-Submit","Bug-Database","Changelog","Donation","FAQ","Gallery","Other-References","Webservice","Reference","URL","Eprint");

my @upstream_filenames=("debian/upstream","debian/upstream-metadata.yaml","debian/upstream/metadata");

my $schema_to_regex={"Vcs-Git" => '^\s*git:\/\/',
		     "Vcs-Svn" => '^\s*svn:\/\/',
#		     "Email" => 'mailto:\s*.+@.+'
			 
};

my $extract_hash;
my $upstream_filename;


my %opt;
getopts('qvf:u:FU', \%opt);

if ( $opt{v} && $opt{q} ) 
{
    print STDERR " Please specify either -q or -v\n";
    exit(1);
}

foreach my $a (@extract)
{
    $extract_hash->{$a}=1;
}

my $DUCK= DUCK->new();
my $funcref= $DUCK->cb();

my @entries;

#Processing debian/control file

if (!$opt{F})
{
    my $opts= {stripComments => 'true'};
    my $cf=($opt{f} or "debian/control");
    my @data_file = $cf_parser->parse_file($cf, $opts);
    my @cfdata=$data_file[0][0];
 
# create list of urls from debian/control
    
    foreach my $cfline1 (@data_file)
    {
	foreach my $cfline2 (@$cfline1)
	{
	    foreach my $k (keys %$cfline2)
	    {
		push (@entries, ["debian/control",$k,$cfline2->{$k} ]);
	    }
	}
    }
}

#Processing upstream metadata file
if (!$opt{U})
{
# extend list of urls by urls from upstream metadata
    foreach (@upstream_filenames)
    {
	@yaml_urls=();
	
	if ( -f $_)
	{
	    $upstream_filename=$_;
	    open my $fh,"<",$_;
	    
	    my @raw=<$fh>;
	    my $raw_string=join("",@raw);
	    
	    close($fh);
	my $hashref;
	    
	    eval { Load($raw_string);}; if (!$@)
	    {
		$hashref=Load($raw_string);
		
		foreach my $k (keys $hashref)
		{
		    if ($extract_hash->{$k})
		    { 
			proc("",\@yaml_urls,$k,$hashref->{$k});
		    }
		}
		
	    }
	    
	}
	
	
# iterate over all urls, run checks.
	
	foreach my $yaml_url(@yaml_urls)
	{
	    # try to be smart: git:// and svn:// based urls must not be handled 
	    # by curl.
	    
	    my $keyname=guess_type(@$yaml_url[1]); 
	    
    if (!$keyname) {$keyname="URL";}
	    
	    push (@entries, [$upstream_filename.": ".@$yaml_url[2],$keyname,@$yaml_url[1] ]);
	}
    }
    
}

foreach my $entry (@entries)
{
    my $type=@$entry[0];
    my $k=@$entry[1];
    my $url=@$entry[2];
    if ($funcref->{$k})
    {
	my $res=&{$funcref->{$k}}($url);
	
	if (!defined $res)
	{
	    if (!$opt{q})
	    {
		print STDERR " Skipping field ".$k." (Reason: Missing helper!)\n";
	    }
	}
	else
	{
	if ($res->{retval}>0)
	{
	    if (!$opt{q})
	    {
		print STDERR  $type.": ".$k.": ".$url.": ERROR\n";
		print STDERR $res->{response};
		print STDERR "\n\n";
	    }
	   $exitcode=1;
	}
	else
	{
	 if ($opt{v})
	 {
	     print STDOUT $type.": ".$k.": ".$url.": OK\n";
	 }
	 
	}
	}
    }



}

exit($exitcode);

##############################################################################
# Helper functions

sub guess_type($)
{
    my ($url)=@_;


 foreach my $schema (keys $schema_to_regex)
    {
#	print "Schema: $schema, Url: ".$url."\n";

	if ( $url =~/$schema_to_regex->{$schema}/)
	{
#	    print "ist $schema\n";
	    return $schema;
   
	}
    }
    return undef;

}


sub HELP_MESSAGE()
{
print STDOUT <<EOF;

Usage: duck [options] 
  -h\t--help\tdisplay this usage information and exit
  -f file\tspecify path to control file
  -F\t\tskip processing of debian/control file
  -u file\tspecify path to upstream metadata file
  -U\t\tskip processing of upstream metadata file
  -q\t\tquiet mode, suppress all output
  -v\t\tverbose mode

EOF
    exit(0);
}

sub VERSION_MESSAGE()
{
    print "duck $VERSION\n";

print <<EOF;
This code is copyright $copyright_year by Simon Kainz <simon\@familiekainz.at>
all rights reserved.
This program comes with ABSOLUTELY NO WARRANTY.
You are free to redistribute this code under the terms of the
GNU General Public License, version 2 or later.

EOF
}


sub proc($;$;$;$;$)
{
    my ($sp,$ref,$key,$r,$p)=@_;
    my $t=ref($r);
    
    if ($t eq "HASH")
    {
	my %a=%{$r};
	foreach my $e (keys %a)
	{
	    return proc($sp,$ref,$e,$a{$e},$key);
	}
	
    }
    
    
    if ($t eq "ARRAY")
    {
	
	my @a=@{$r};
	foreach my $e (@a)
	{
	    return proc($sp,$ref,$key,$e,$key);
	}
    }
    
    if ($t eq "")
    {
	if ($extract_hash->{$key})
	{
	    my @data=($sp,$r,$key);
	    push(@{$ref},\@data);
	}
    }
}
