#!/usr/bin/perl
#vim: set expandtab sts=2 ts=2 sw=2 ft=80:
# This script requires the following:
# Dependencies: libxml, libxslt, and /usr/share/xml/iso-codes/iso_639.xml
# from iso-codecs

use v5.10;
use warnings;
use strict; use open ':locale'; use Getopt::Long;
use XML::LibXML;
use XML::LibXSLT;
use XML::LibXML::XPathContext;
# an alternative could be: https://metacpan.org/pod/C::DynaLib
use IPC::Open2;

my $verbose = 0;
my $help = 0;
my $infile;
my $outfile;
my $inlang;
my $supports_lang;
my $dryrun = 0;
my $espeak_path;

sub iso639_2T_to_1
{
  my $code_2T = shift;
  my $isofile = '/usr/share/xml/iso-codes/iso_639.xml';
  die "Missing $isofile (In Debian install package iso-codes)!" unless -r $isofile;
  my $isodoc = XML::LibXML->load_xml(location => $isofile);
  my $xpc = XML::LibXML::XPathContext->new($isodoc);
  my $expr = '/iso_639_entries/iso_639_entry/@iso_639_1_code[ ../@iso_639_2T_code=\'' . $code_2T . '\' ]';
  say "Evaluating $expr..." if $verbose>1;
  my $code_1 = $xpc->findvalue($expr);
  say "Translated '$code_2T' to '$code_1'" if $verbose;
  return $code_1;
}

sub read_espeak_voices {
  my $F;
  open($F, "$espeak_path --voices|");
  die "unable to execute espeak $!" unless defined $F;
  my %voices;
  while (<$F>) {
    my $line = $_;
    $line =~ s/^\s*[0-9]*\s*([a-z]+)\s.*$/$1/g;
    $line =~ s/\s+$//;
    # add if a match occured
    $voices{$line} = 1 if length($line) <= 3;
  }
  return %voices;
}

sub language_exists {
  my $language = shift;
  my %espeak_langs = read_espeak_voices();
  if(exists($espeak_langs{$language})) {
    return 1;
  }
  $inlang = iso639_2T_to_1($language);
  if(!defined($inlang) || $inlang eq '' || !exists($espeak_langs{$inlang})) {
    return 0;
  }
  return 1;
}

GetOptions("verbose" => \$verbose,
"infile=s" => \$infile,
"outfile=s" => \$outfile,
"inlang=s" => \$inlang,
"help" => \$help,
"dry-run" => \$dryrun,
"supports-lang=s" => \$supports_lang,
"espeak-path=s" => \$espeak_path,
) or die("Error in command line arguments\n");

if($help)
{
    say <<"EOT";
$0 [OPTIONS]

This script adds pronunciation information to the headwords of a TEI file.
It goes through all <orth> elements and adds a <pron> element after it,
unless <pron> already exists.

This script uses 'espeak(-ng) -v <LANG> --ipa -q ok'.  A previous version used the
espeak API by means of a modified version of Speech::eSpeak, but the
maintainer of that module is unresponsive. Experiments with the espeak API
via C::DynaLib (or Python c_types)  were successful, but due to
cross-platform issues and espeak to espeak-ng API issues not further
pursuit.

Espeak is started as a subprocess and connected using stdin/stdout. In this
line-based mode, espeak reads a line from STDIN and outputs a line on
STDOUT.  Since espeak doesn't flush stdout, we never get a reply.  To prevent
this deadlock, we use the 'stdbuf' command, which uses LD_PRELOAD to
change libc behaviour.  stdbuf is even available on MSYS2.

Options:

	--dry-run
		Don't write OUTFILE.

	--espeak-path <FULL-PATH-TO-ESPEAK-BINARY>
		Give full path to espeak(-ng) in case autodetection fails.

	--help
		This help.

	--infile, -i <INFILE>
		Use INFILE as input.

		Example: -i eng-ita.tei

	--inlang, -l <CODE>
		Language of the headwords in INFILE.  The CODE is used to
		select the speaker language of espeak.  If not given,
		the basename of INFILE is assumed to have the form la1-la2.tei,
		as used in FreeDict and is translated to the corresponding
		espeak code.

		Example: -l de

	--outfile, -o <OUTFILE>
		Use OUTFILE for output.  If not given, "INFILE.withpron" is used.

		Example: -o output.tei

	--supports-lang <LANG>
		Exit with 0 (success) if the language is supported and with 2
		otherwise. This supports both 639-2 and 639-3 codes, but is
		meant to be used with 639-3 codes to check whether a
		dictionary's source language is supported.

	--verbose
		Say what is going on.
EOT
  exit 0;
}

$espeak_path ||= `which espeak-ng`;
$espeak_path ||= `which espeak`;
chomp $espeak_path;

if(!defined $infile && !defined $supports_lang) {
  say "Error: need to specify input file with --infile"; exit 1
};
unless(-x $espeak_path) {
    if (not length $espeak_path) {
        say "Espeak(-ng) executable missing"
    } else {
        say "$espeak_path not executable";
    }
    exit 1
};


if(defined $supports_lang) {
  exit(language_exists($supports_lang) ? 0 : 2);
}
unless(-r $infile) { say "$infile not readable"; exit 1 };
$outfile ||= "$infile.withpron";
say "Using espeak from '$espeak_path'" if $verbose;

unless(defined $inlang)
{
  unless($infile =~ /(\w{3})-\w{3}\.tei$/)
  {
    say "Could not guess input language from $infile.  Please provide --inlang.";
    exit 3;
  }
  my $in3 = $1;
  if (!language_exists($in3)) {
    say "No voice found for input language code $in3";
    exit 4
  }
}

say "Using language: ", $inlang if $verbose;
say "espeak version: ", `"$espeak_path" --version` if $verbose;

$SIG{PIPE} = sub
{
   die "Got SIGPIPE: $!";
};

my($chld_out, $chld_in);
# https://github.com/espeak-ng/espeak-ng/pull/536
#, '--fflush-stdout'
my @cmd = ('stdbuf', '-o0', $espeak_path, '--ipa', '-v', $inlang, '-q');
my $pid = open2($chld_out, $chld_in, @cmd);
die "open2 failed: $!" unless defined $pid;
say "espeak has PID $pid" if $verbose;
binmode $chld_in, ':utf8';
binmode $chld_out, ':utf8';

XML::LibXSLT->register_function("urn:espeak", "ipa",
  sub {
    my $inword = shift;
    my $count = kill 0, $pid;
    die "espeak exited" if $count != 1;
    say "Writing to espeak-ng: ", $inword if $verbose;
    say $chld_in $inword;
    say "Will read..." if $verbose;
    my $ipa = <$chld_out>;
    $ipa =~ s/^\s+|\s+$//g;# trim
    chomp($ipa);
    say "Got from espeak-ng: ", $ipa if $verbose;
    return $ipa;
    });
my $sdoc = XML::LibXML->load_xml(string => <<'EOT');
<xsl:stylesheet version="1.0"
  xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
  xmlns:espeak="urn:espeak"
  xmlns:tei="http://www.tei-c.org/ns/1.0"
  exclude-result-prefixes="tei espeak">

  <xsl:template match="tei:orth[not(boolean(following-sibling::tei:pron))]">
    <xsl:copy>
      <xsl:apply-templates match="@*|node()"/>
    </xsl:copy>
    <pron xmlns="http://www.tei-c.org/ns/1.0"><xsl:value-of select="espeak:ipa(.)"/></pron>
  </xsl:template>

  <xsl:template match="@*|node()">
    <xsl:copy>
      <xsl:apply-templates select="@*|node()"/>
    </xsl:copy>
  </xsl:template>

</xsl:stylesheet>
EOT
my $xslt = XML::LibXSLT->new();
my $stylesheet = $xslt->parse_stylesheet($sdoc);
print "Transforming $infile to $outfile... " if $verbose;
my $results = $stylesheet->transform_file($infile);
$stylesheet->output_file($results, $outfile) unless $dryrun;
close($chld_in);
say "done." if $verbose;

