#! /usr/bin/env perl

# Use:
# $ perldoc texdirflatten
# to see embedded documentation. Alternatively, you can create manual
# or HTML pages using pod2man and pod2html.

=pod

=head1 NAME

B<texdirflatten> - Collects all components of a (La)TeX file in a
single output directory -- i.e., flattens its hierarchy.

=head1 SYNOPSIS

texdirflatten [-1|--onetex] [-f|--file input.tex] [-o outputdir] [-?|--help]

=head1 DESCRIPTION

It parses the source file, following its included children (La)TeX
files recursively, to collect together its components, such as
graphics and BiBTeX bibliography files in different directories.

=for comment
Run without parameters to see usage.

=head1 OPTIONS

=over

=item B<--file>, B<-f> I<input.tex>

Specifies input (La)TeX file.

=item B<--onetex>, B<-1>

If specified, produces a single TeX file by expanding all \input and
\include commands in place.

=item B<--output>, B<-o> I<outputdir>

Directory to collect all files. B<texdirflatten> will copy each source
file, graphics and bibliography file to this directory. It will be
created if it is unexistent. If unspecified, it defaults to C<flat/>.

=item B<--help>, B<-?>

Show this manual page.

=back

=head1 EXAMPLES

The following example scans C<manuscript.tex> in the current directory
and gathers it and all its components in the C<submit_01/> directory:

 $ texdirflatten -f manuscript.tex -o submit_01

=head1 CAVEATS

Please take backups before running this command. No warranties
whatsoever provided.

You may need to run C<epstopdf> on EPS files if you are using C<pdflatex>:

 $ for i in *.eps; do epstopdf $i; done

=head1 BUGS

Bug reports and patches are welcome.

=head1 AUTHOR

Cengiz Gunay <cengique<AT>users.sf.net>

=head1 COPYRIGHT AND LICENSE

Copyleft 2003-2017, Cengiz Gunay

This library is free software; you may redistribute it and/or modify
it under the same terms as Perl itself.

=cut

# TODO:
#
# - parse BIBINPUTS environment variable and search figures and such
# there as well.

package texdirflatten;

use Getopt::Long qw(GetOptions);
use Pod::Usage;
use strict;
use warnings;
use re 'eval';

# Global
my $outputdir   = "flat";	# Output directory
my $file = "";			# File to read
my $onetex = 0;			# Indicates all input and include commands be expanded
my $help;

# parse helpers
my %texfiles;

# GetOpt::Long::
my $result = GetOptions ("output|o=s" => \$outputdir,
			 "onetex|1" => \$onetex,
			 "file|f=s" => \$file,
			 "help|?" => \$help );

# regexp for none or an even number of backslashes (\):
my $unescaped =
      '(?<! \\\\)(?(?= \\\\) (?: \\\\\\\\)* )?';

# counter for files translated (like LyX does)
$::filecount = 0;
my $DEBUG = 1;

pod2usage( -verbose => 2 ) if ($help);

pod2usage( -section => "SYNOPSIS") if ($file eq "");

system "mkdir $outputdir" if (not -d $outputdir);

# start recursing
parseTeX($file, "", popfile($file), 0);

sub parseTeX {
  my $file = shift;
  my $inputdir = shift;
  my $outfile = "$outputdir/" . shift;
  my $level = shift;

  # Add a proper TeX suffix if it's missing
  $outfile .= '.tex' if ! ($outfile =~ /\.tex\s*$/i );

  my @flats;
  my @longs;
  my $popped;

  open my $FILE, $file or die "Cannot find file to read $file\n";

  # Read to whole file first, and then scan for regexps
  my $contents = "";
  while(<$FILE>) {
    $contents = $contents . $_;
  }
  close $FILE;

  ## First, remove comments so that input commands inside are ignored.
  # Also remove trailing newline and all whitespace at the beginning
  # of the following line like TeX does it. Only for %'s following
  # the proper backslash pattern.
  $contents =~ s/( $unescaped )\%.*?\n[ \t]*/$1/gx;

  # Look for graphics include statement
  if ($contents =~ /\\input\@path\{\{([^}]*)\}\}/) {
    $inputdir = $1;
    print "Found input directory: $inputdir\n";
  }

  print "Parsing '$file' in directory '$inputdir'\n";

  # Default value
  #$inputdir = './' if (-z $inputdir);

  # three cases: graphics, inputs and bibs

  # an \includegraphics statement
  @::flats = ();
  @::longs = ();
  #$::popped = "";

  $contents =~
    s/\\includegraphics(\[[^\]]*\])?\{([^}]*)\}
      (?{ $::popped = flattenfilename($2);
	push @::longs, $2; push @::flats, $::popped })
     /\\includegraphics$1\{$::popped\}/gmx;

  if ($#::flats > -1) {
    my ($long, $flat);

    foreach $long (@::longs) {
      $flat = shift @::flats;

      # convert LyX directory dots
      $long =~ s/\\lyxdot /./g;
      $flat =~ s/\\lyxdot /./g;

      print "Looking for graphics: '$long'\n";

      my @exts = ("", ".eps", ".pdf", ".pstex");
      my @dirs = ("", "$inputdir", $inputdir . '../figures/');

      seekfile($long, $flat, ["", ".eps", ".pdf", ".pstex"], 
	       ["", "$inputdir", $inputdir . '../figures/']);

    }
  }

  # Must do bibs before input commands because, when expanded, they
  # add new bibs inside.
  replacebibs(\$contents, $inputdir);

  # an \input or \include statement
  @flats = ();
  @longs = ();
  $contents =~
    s/\\(input|include)\{([^}]*)\}/"\\${1}\{" .
      ($popped = flattenfilename($2) and push @longs, $2 and push @flats,$popped and $popped)
	. "\}"/egm;

  if ($#flats > -1) {
    #print "Found " . scalar @longs . " items on line: '@flats'.\n";

    my ($long, $flat);

    foreach $long (@longs) {
      $flat = shift @flats;
      print "Found Input/Include: '$long'\n";
      # recurse to parse that tex file unless it's done already

      if (not exists $texfiles{$flat}) {
	if (! -r $long ) {
	  if (-r $inputdir . $long) {
	    $long = $inputdir . $long;
	  } elsif (-r "$long.tex") {
	    $long .= '.tex';
	  }
	}
	$texfiles{$flat} = $long;
	my $newcontents = parseTeX($long, basedir($long), $flat, $level + 1); #later
	if ( $onetex ) {
	  # If specified, expand inclusion directives
	  die "Failed to find input/include to expand for '$flat'!\n"
	    if (! ( $contents =~
		s/\\(input|include) \{ \Q$flat\E \} 
                  (?{ print "===Found \\$1\{$flat\} to expand.\n" if $DEBUG; })
		  / ($1 eq 'include' ? "\\clearpage\n" : '') . "$newcontents"/gxem ) );
	}
      }
    }
  }

  #print "Writing to \"$outfile\"\n" .
  #  "----------------------------------------\n" .
  #  "$contents\n" .
  #  "----------------------------------------\n";

  if (! $onetex || $level == 0) {  # do output at zero recurse level
    # open flat output file
    open my $COPY, ">$outfile" or die "Cannot write file $outfile\n";

    # write to flat copy
    print $COPY $contents;

    close $COPY;
  } else {
    return $contents;
  }
}

# Look for file to copy
sub seekfile {
    my ($file, $dest, $exts, $dirs) = @_;
    my @exts = @$exts;
    my @dirs = @$dirs;
    
  OUT: foreach my $dir (@dirs) {
      foreach my $ext (@exts) {
	  my $tryfile = "$dir$file$ext";
	  if (-r $tryfile) {
	      # add the extension to $dest only if it hasn't any dots
	      my $dext = '';
	      $dext = $ext if (! $dest =~ /\./); 
	      
	      if (system("cp $tryfile $outputdir/$dest$dext") != 0) {
		  die "Cannot copy $tryfile!\n";
	      }
	      # If we found it, then get out of the loop
	      print "Found: '$tryfile'\n";
	      last OUT;
	  }
      }
  }
}

# Return everything after the last /
sub popfile {
  my $file = shift (@_);

  my @a = split /\//, $file;

  #print "Popping $file... split: @a ($#a), pop: " . pop(@a) . "\n";
  pop @a;
}

# Make a new file by flattening directory names
sub flattenfilename {
  local $_ = shift;

  # convert slashes to pluses, dots to Xs
  tr|/|+|;
  tr|\.|X|;

  # add a unique number at the beginning so files don't start with a
  # dot
  $_ = sprintf("%03d_", ++$::filecount) . "$_";

  print "===Flattened file to '$_'\n" if $DEBUG;

  return $_;
}

sub basedir {
  my $file = shift (@_);

  my @a = split /\//, $file;

  #print "Popping $file... split: @a ($#a), pop: " . pop(@a) . "\n";
  pop @a;			# remove file part
  my $basedir = join('/', @a);		# combine the rest with /'s
  $basedir .= '/' if (! $basedir =~ /^$/);		# add a trailing / if not empty
  $basedir;
}

# get the bibliography files
sub replacebibs {
  my ($contentsref, $inputdir) = @_;
  #my $contents = $$contentsref;
  my $popped;

  #print "bib contents: $contents\n";

  local @::flats = ();
  local @::longs = ();
  local @::refs;

  $" = ',';			# set list item separator
  # search for \bibliography statements
  $$contentsref =~
    s/\\bibliography(\[[^\]]*\])? {
      (?{ @::refs = (); #print "Found bibliography!\n"; #initialize ref list
        }) (?: \s* ([^}, ]+)(?=[,}]),?
      (?{ # for each word do:
	  #local (@_flats, @_longs, @_refs);
          #print "Word: \"$+\", "; 
          $popped = flattenfilename($+); push @::longs, $+;
          push @::flats,$popped; push @::refs, $popped;
        }) )+ \s* }
      (?{ #print "\n+++ $#::flats; $#::refs; $#::longs\n";
          #push @::flats, @_flats; push @longs, @_longs; push @refs, @_refs;
          #print "Refs: @::refs\n";
    })/"\\bibliography$1" . "{@::refs}"/egmx;# &&
      #print "bib contents: $$contentsref\n";
      #print "\n*** $#::flats; $#::refs; $#::longs\n";
  $" = ' ';			# restore list item separator

  if ($#::flats > -1) {
    #print "Found " . scalar @::longs . " items on line: '@::flats'.\n";

    my ($long, $flat);

    foreach $long (@::longs) {
      $flat = shift @::flats;
      print "Found bib. file: '$long'\n";
      # recurse to parse that tex file unless it's done already

      # convert LyX directory dots
      $long =~ s/\\lyxdot /./g;

      seekfile($long, "$flat.bib", [".bib"], 
	       ["", "$inputdir"]);
    }
  }
}
