#!/usr/bin/perl
use strict;
use warnings;

#Converts Fastaq python scripts usage into man pages.
#The man pages are placed in the man folder of the main Fastaq directory

createManPages();

sub createManPages {

  my $source= 'scripts';
  my $destination= 'debian/man';
  my $app_name = 'Fastaq';
  my $descriptions = shortDescription();

  unless ( -d $destination ) {
    system(mkdir $destination);
  }

  my @files;

  push(@files,`ls $source/fastaq_*`);

  if ( scalar @files > 0 ) {

    print "Creating manpages\n";
    for my $file ( @files ) {
      $file =~ s/\n$//;

      my $filename = $file;
      $filename =~ s/$source\///;

      my $uc_filename = uc($filename);
      my $man_file = $filename;

      $man_file = $destination . '/' . $man_file . '.1';

      open (my $man_fh, ">", $man_file);

      my $grep_string = $filename . ': error: too few arguments';

      my $cmd = "help2man -m $filename -n $filename --no-discard-stderr $file | sed 's/usage://gi'";
      my @output;
      push(@output, `$cmd`);

      for my $line (@output) {
	$line =~ s/\n$//;

      }

      for (my $i = 0; $i < scalar @output; $i++) {
	my $output_line = $output[$i];

	if ($output_line =~ m/^\.TH/) {
	  $output_line =~ s/\s+/ /g;
	  $output_line =~ s/(\.TH) ("\d+") ("[a-zA-Z0-9_ ]*") ("[a-zA-Z0-9_<>\[\]\/\.\(\), ]*") ("[a-zA-Z0-9_]*")/$1 $uc_filename $2 $3 "$app_name" "Fastaq executables"/;
	}

	$output_line =~ s/ \\- $filename/$filename \- $descriptions->{$filename}/;

	if ( $output_line =~ m/^.PP/ && $output[$i + 1] =~ m/^$filename\:/ ) {
	  $output_line = $output[$i + 1] = '';
	}

	if ($output_line =~ m/^\.SH "SEE ALSO"/) {
	  last;
	}
	print $man_fh "$output_line\n";
      }

      writeAuthorAndCopyright($man_fh,$filename);
      close($man_fh);
    }
    print "Manpage creation complete\n";
  }
}

sub writeAuthorAndCopyright {

  my ($man_fh,$filename) = @_;

  my $author_blurb = <<END_OF_AUTHOR_BLURB;
.SH "AUTHOR"
.sp
$filename was originally written by Martin Hunt (mh12\@sanger.ac.uk)
END_OF_AUTHOR_BLURB

  print $man_fh "$author_blurb\n";

  my $copyright_blurb = <<'END_OF_C_BLURB';
.SH "COPYING"
.sp
Wellcome Trust Sanger Institute Copyright \(co 2013 Wellcome Trust Sanger Institute This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version\&.
END_OF_C_BLURB

  print $man_fh "$copyright_blurb\n";

}


sub shortDescription {

    my %descriptions = (
	fastaq_capillary_to_pairs => 'makes an interleaved file of read pairs',
	fastaq_chunker => 'splits a multi fasta/q file into separate files',
	fastaq_count_sequences => 'counts the number of sequences in a fasta/q file',
	fastaq_deinterleave => 'deinterleaves fasta/q file',
	fastaq_enumerate_names => 'renames sequences in a file, calling them 1,2,3...',
	fastaq_expand_nucleotides => 'makes all combinations of sequences in input file',
	fastaq_extend_gaps => 'extends the length of all gaps in a fasta/q file',
	fastaq_fasta_to_fastq => 'given a fasta and qual file, makes a fastq file',
	fastaq_filter => 'filters a fasta/q file by sequence length and/or by name',
	fastaq_get_ids => 'gets ids from each sequence in a fasta or fastq file',
	fastaq_get_seq_flanking_gaps => 'gets the sequences either side of gaps in a fasta/q file',
	fastaq_insert_or_delete_bases => 'deletes or inserts bases at given position(s)',
	fastaq_interleave => 'interleaves two fasta/q files',
	fastaq_long_read_simulate => 'simulates long reads from a fasta/q file',
	fastaq_make_random_contigs => 'makes a multi-fasta file of random sequences',
	fastaq_merge => 'converts multi fasta/q file to single sequence file',
	fastaq_replace_bases => 'replaces all occurences of one letter with another',
	fastaq_reverse_complement => 'reverse complements all sequences',
	fastaq_scaffolds_to_contigs => 'creates a file of contigs from a file of scaffolds',
	fastaq_search_for_seq => 'searches for an exact match on a given string and its reverese complement. guaranteed to find all hits',
	fastaq_sequence_trim => 'trims sequences off the start of all sequences in a pair of fasta/q files',
	fastaq_split_by_base_count => 'splits a multi fasta/q file into separate files',
	fastaq_strip_illumina_suffix => 'strips /1 or /2 off the end of every read name',
	fastaq_to_fake_qual => 'makes fake quality scores file',
	fastaq_to_fasta => 'converts sequence file to fasta format',
	fastaq_to_mira_xml => 'creates an xml file from a fasta/q file of reads, for use with mira assembler',
	fastaq_to_orfs_gff => 'writes a gff file of open reading frames',
	fastaq_to_perfect_reads => 'makes perfect paired end fastq reads',
	fastaq_to_random_subset => 'takes a random subset of reads',
	fastaq_to_tiling_bam => 'makes a bam file containing perfect (unpaired) reads tiling the whole genome',
	fastaq_to_unique_by_id => 'removes duplicate sequences',
	fastaq_translate => 'translates all sequences',
	fastaq_trim_ends => 'trims set number of bases off each sequence',
	fastaq_trim_Ns_at_end => 'trims any ns off each sequence'
	);

    return(\%descriptions);
}
