#!/usr/bin/perl

use utf8;
use Encode;
use Unicode::Normalize;

opendir (DIR, ".");
@xlits = grep /\.xml$/, readdir (DIR);

foreach $file (@xlits) {
    open (INF, "<:encoding(UTF-8)", $file);
    $file =~ s/\.xml$/.txt/;
    $file =~ s/\-/\_/g;
    open (OUTF, ">:encoding(UTF-8)", "..\\$file");

    $file =~ /^(.+?)\_(.+?)(\_(.+?))?\./;
    $source = $1;
    $target = $2;
    $variant = $4;

    print "Creating $source-$target";
    if ($variant ne "") {
	print "\/$variant";
    }
    print "\n";


    print OUTF chr(0xFEFF);
    
    while (<INF>) {
	$line = $_;
	$line =~ s/^\x{FEFF}//;
	$line =~ s/[\t\r\n ]*$//;
	$line =~ s/^[\t\r\n ]*//;
	
	$line =~ s/<[^<>]+?>//g;
	
	if ($line !~ /^[\t\r\n ]*$/) {

	    $line =~ s/↔/<>/g;
	    $line =~ s/→/>/g;
	    $line =~ s/←/</g;

	    $line =~ s/&lt;/>/g;
	    $line =~ s/&gt;/>/g;
	    $line =~ s/&amp;/&/g;

	    $line =~ s/\\u([0-9A-Fa-f]{1,6})/chr(hex($1))/eg;
	    
	    $line = NFD($line);
	    
	    print OUTF "$line\n";
	}
    }

    close (OUTF);
    close (INF);
}
