intltool-merge.in

#!@INTLTOOL_PERL@ -w
# -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-

#
#  The Intltool Message Merger
#
#  Copyright (C) 2000, 2003 Free Software Foundation.
#  Copyright (C) 2000, 2001 Eazel, Inc
#
#  Intltool is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License 
#  version 2 published by the Free Software Foundation.
#
#  Intltool is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#  General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software
#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
#  As a special exception to the GNU General Public License, if you
#  distribute this file as part of a program that contains a
#  configuration script generated by Autoconf, you may include it under
#  the same distribution terms that you use for the rest of that program.
#
#  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
#            Kenneth Christiansen <kenneth@gnu.org>
#            Darin Adler <darin@bentspoon.com>
#
#  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
#

## Release information
my $PROGRAM = "intltool-merge";
my $PACKAGE = "intltool";
my $VERSION = "0.35.0";

## Loaded modules
use strict; 
use Getopt::Long;
use Text::Wrap;
use File::Basename;

my $must_end_tag      = -1;
my $last_depth        = -1;
my $translation_depth = -1;
my @tag_stack = ();
my @entered_tag = ();
my @translation_strings = ();
my $leading_space = "";

## Scalars used by the option stuff
my $HELP_ARG = 0;
my $VERSION_ARG = 0;
my $BA_STYLE_ARG = 0;
my $XML_STYLE_ARG = 0;
my $KEYS_STYLE_ARG = 0;
my $DESKTOP_STYLE_ARG = 0;
my $SCHEMAS_STYLE_ARG = 0;
my $RFC822DEB_STYLE_ARG = 0;
my $QUIET_ARG = 0;
my $PASS_THROUGH_ARG = 0;
my $UTF8_ARG = 0;
my $MULTIPLE_OUTPUT = 0;
my $cache_file;

## Handle options
GetOptions 
(
 "help" => \$HELP_ARG,
 "version" => \$VERSION_ARG,
 "quiet|q" => \$QUIET_ARG,
 "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
 "ba-style|b" => \$BA_STYLE_ARG,
 "xml-style|x" => \$XML_STYLE_ARG,
 "keys-style|k" => \$KEYS_STYLE_ARG,
 "desktop-style|d" => \$DESKTOP_STYLE_ARG,
 "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
 "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
 "pass-through|p" => \$PASS_THROUGH_ARG,
 "utf8|u" => \$UTF8_ARG,
 "multiple-output|m" => \$MULTIPLE_OUTPUT,
 "cache|c=s" => \$cache_file
 ) or &error;

my $PO_DIR;
my $FILE;
my $OUTFILE;

my %po_files_by_lang = ();
my %translations = ();
my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "@INTLTOOL_ICONV@";
my $devnull = ($^O eq 'MSWin32' ? 'NUL:' : '/dev/null');

# Use this instead of \w for XML files to handle more possible characters.
my $w = "[-A-Za-z0-9._:]";

# XML quoted string contents
my $q = "[^\\\"]*";

## Check for options. 

if ($VERSION_ARG) 
{
	&print_version;
} 
elsif ($HELP_ARG) 
{
	&print_help;
} 
elsif ($BA_STYLE_ARG && @ARGV > 2) 
{
	&utf8_sanity_check;
	&preparation;
	&print_message;
	&ba_merge_translations;
	&finalize;
} 
elsif ($XML_STYLE_ARG && @ARGV > 2) 
{
	&utf8_sanity_check;
	&preparation;
	&print_message;
	&xml_merge_output;
	&finalize;
} 
elsif ($KEYS_STYLE_ARG && @ARGV > 2) 
{
	&utf8_sanity_check;
	&preparation;
	&print_message;
	&keys_merge_translations;
	&finalize;
} 
elsif ($DESKTOP_STYLE_ARG && @ARGV > 2) 
{
	&utf8_sanity_check;
	&preparation;
	&print_message;
	&desktop_merge_translations;
	&finalize;
} 
elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2) 
{
	&utf8_sanity_check;
	&preparation;
	&print_message;
	&schemas_merge_translations;
	&finalize;
} 
elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2) 
{
	&preparation;
	&print_message;
	&rfc822deb_merge_translations;
	&finalize;
} 
else 
{
	&print_help;
}

exit;

## Sub for printing release information
sub print_version
{
    print <<_EOF_;
${PROGRAM} (${PACKAGE}) ${VERSION}
Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.

Copyright (C) 2000-2003 Free Software Foundation, Inc.
Copyright (C) 2000-2001 Eazel, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
_EOF_
    exit;
}

## Sub for printing usage information
sub print_help
{
    print <<_EOF_;
Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
Generates an output file that includes some localized attributes from an
untranslated source file.

Mandatory options: (exactly one must be specified)
  -b, --ba-style         includes translations in the bonobo-activation style
  -d, --desktop-style    includes translations in the desktop style
  -k, --keys-style       includes translations in the keys style
  -s, --schemas-style    includes translations in the schemas style
  -r, --rfc822deb-style  includes translations in the RFC822 style
  -x, --xml-style        includes translations in the standard xml style

Other options:
  -u, --utf8             convert all strings to UTF-8 before merging 
                         (default for everything except RFC822 style)
  -p, --pass-through     deprecated, does nothing and issues a warning
  -m, --multiple-output  output one localized file per locale, instead of 
	                 a single file containing all localized elements
  -c, --cache=FILE       specify cache file name
                         (usually \$top_builddir/po/.intltool-merge-cache)
  -q, --quiet            suppress most messages
      --help             display this help and exit
      --version          output version information and exit

Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
or send email to <xml-i18n-tools\@gnome.org>.
_EOF_
    exit;
}


## Sub for printing error messages
sub print_error
{
    print STDERR "Try `${PROGRAM} --help' for more information.\n";
    exit;
}


sub print_message 
{
    print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
}


sub preparation 
{
    $PO_DIR = $ARGV[0];
    $FILE = $ARGV[1];
    $OUTFILE = $ARGV[2];

    &gather_po_files;
    &get_translation_database;
}

# General-purpose code for looking up translations in .po files

sub po_file2lang
{
    my ($tmp) = @_; 
    $tmp =~ s/^.*\/(.*)\.po$/$1/; 
    return $tmp; 
}

sub gather_po_files
{
    for my $po_file (glob "$PO_DIR/*.po") {
	$po_files_by_lang{po_file2lang($po_file)} = $po_file;
    }
}

sub get_local_charset
{
    my ($encoding) = @_;
    my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "@INTLTOOL_LIBDIR@/charset.alias";

    # seek character encoding aliases in charset.alias (glib)

    if (open CHARSET_ALIAS, $alias_file) 
    {
	while (<CHARSET_ALIAS>) 
        {
            next if /^\#/;
            return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
        }

        close CHARSET_ALIAS;
    }

    # if not found, return input string

    return $encoding;
}

sub get_po_encoding
{
    my ($in_po_file) = @_;
    my $encoding = "";

    open IN_PO_FILE, $in_po_file or die;
    while (<IN_PO_FILE>) 
    {
        ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
        if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/) 
        {
            $encoding = $1; 
            last;
        }
    }
    close IN_PO_FILE;

    if (!$encoding) 
    {
        print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
        $encoding = "ISO-8859-1";
    }

    system ("$iconv -f $encoding -t UTF-8 <$devnull 2>$devnull");
    if ($?) {
	$encoding = get_local_charset($encoding);
    }

    return $encoding
}

sub utf8_sanity_check 
{
    print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
    $UTF8_ARG = 1;
}

sub get_translation_database
{
    if ($cache_file) {
	&get_cached_translation_database;
    } else {
        &create_translation_database;
    }
}

sub get_newest_po_age
{
    my $newest_age;

    foreach my $file (values %po_files_by_lang) 
    {
	my $file_age = -M $file;
	$newest_age = $file_age if !$newest_age || $file_age < $newest_age;
    }

    $newest_age = 0 if !$newest_age;

    return $newest_age;
}

sub create_cache
{
    print "Generating and caching the translation database\n" unless $QUIET_ARG;

    &create_translation_database;

    open CACHE, ">$cache_file" || die;
    print CACHE join "\x01", %translations;
    close CACHE;
}

sub load_cache 
{
    print "Found cached translation database\n" unless $QUIET_ARG;

    my $contents;
    open CACHE, "<$cache_file" || die;
    {
        local $/;
        $contents = <CACHE>;
    }
    close CACHE;
    %translations = split "\x01", $contents;
}

sub get_cached_translation_database
{
    my $cache_file_age = -M $cache_file;
    if (defined $cache_file_age) 
    {
        if ($cache_file_age <= &get_newest_po_age) 
        {
            &load_cache;
            return;
        }
        print "Found too-old cached translation database\n" unless $QUIET_ARG;
    }

    &create_cache;
}

sub create_translation_database
{
    for my $lang (keys %po_files_by_lang) 
    {
    	my $po_file = $po_files_by_lang{$lang};

        if ($UTF8_ARG) 
        {
            my $encoding = get_po_encoding ($po_file);

            if (lc $encoding eq "utf-8") 
            {
                open PO_FILE, "<$po_file";	
            } 
            else 
            {
		print "NOTICE: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;

                open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";	
            }
        } 
        else 
        {
            open PO_FILE, "<$po_file";	
        }

	my $nextfuzzy = 0;
	my $inmsgid = 0;
	my $inmsgstr = 0;
	my $msgid = "";
	my $msgstr = "";

        while (<PO_FILE>) 
        {
	    $nextfuzzy = 1 if /^#, fuzzy/;
       
	    if (/^msgid "((\\.|[^\\])*)"/ ) 
            {
		$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
		$msgid = "";
		$msgstr = "";

		if ($nextfuzzy) {
		    $inmsgid = 0;
		} else {
		    $msgid = unescape_po_string($1);
		    $inmsgid = 1;
		}
		$inmsgstr = 0;
		$nextfuzzy = 0;
	    }

	    if (/^msgstr "((\\.|[^\\])*)"/) 
            {
	        $msgstr = unescape_po_string($1);
		$inmsgstr = 1;
		$inmsgid = 0;
	    }

	    if (/^"((\\.|[^\\])*)"/) 
            {
	        $msgid .= unescape_po_string($1) if $inmsgid;
	        $msgstr .= unescape_po_string($1) if $inmsgstr;
	    }
	}
	$translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
    }
}

sub finalize
{
}

sub unescape_one_sequence
{
    my ($sequence) = @_;

    return "\\" if $sequence eq "\\\\";
    return "\"" if $sequence eq "\\\"";
    return "\n" if $sequence eq "\\n";
    return "\r" if $sequence eq "\\r";
    return "\t" if $sequence eq "\\t";
    return "\b" if $sequence eq "\\b";
    return "\f" if $sequence eq "\\f";
    return "\a" if $sequence eq "\\a";
    return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)

    return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
    return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);

    # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489

    return $sequence;
}

sub unescape_po_string
{
    my ($string) = @_;

    $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;

    return $string;
}

## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have 
## > in the entity. For further info please look at #84738.
sub entity_decode
{
    local ($_) = @_;

    s/&apos;/'/g; # '
    s/&quot;/"/g; # "
    s/&amp;/&/g;
    s/&lt;/</g;

    return $_;
}
 
# entity_encode: (string)
#
# Encode the given string to XML format (encode '<' etc).

sub entity_encode
{
    my ($pre_encoded) = @_;

    my @list_of_chars = unpack ('C*', $pre_encoded);

    # with UTF-8 we only encode minimalistic
    return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
}

sub entity_encode_int_minimalist
{
    return "&quot;" if $_ == 34;
    return "&amp;" if $_ == 38;
    return "&apos;" if $_ == 39;
    return "&lt;" if $_ == 60;
    return chr $_;
}

sub entity_encoded_translation
{
    my ($lang, $string) = @_;

    my $translation = $translations{$lang, $string};
    return $string if !$translation;
    return entity_encode ($translation);
}

## XML (bonobo-activation specific) merge code

sub ba_merge_translations
{
    my $source;

    {
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
    # Binmode so that selftest works ok if using a native Win32 Perl...
    binmode (OUTPUT) if $^O eq 'MSWin32';

    while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s) 
    {
        print OUTPUT $1;

        my $node = $2 . "\n";

        my @strings = ();
        $_ = $node;
	while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
             push @strings, entity_decode($3);
        }
	print OUTPUT;

	my %langs;
	for my $string (@strings) 
        {
	    for my $lang (keys %po_files_by_lang) 
            {
                $langs{$lang} = 1 if $translations{$lang, $string};
	    }
	}
	
	for my $lang (sort keys %langs) 
        {
	    $_ = $node;
	    s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
	    s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
	    print OUTPUT;
        }
    }

    print OUTPUT $source;

    close OUTPUT;
}


## XML (non-bonobo-activation) merge code


# Process tag attributes
#   Only parameter is a HASH containing attributes -> values mapping
sub getAttributeString
{
    my $sub = shift;
    my $do_translate = shift || 0;
    my $language = shift || "";
    my $result = "";
    my $translate = shift;
    foreach my $e (reverse(sort(keys %{ $sub }))) {
	my $key    = $e;
	my $string = $sub->{$e};
	my $quote = '"';
	
	$string =~ s/^[\s]+//;
	$string =~ s/[\s]+$//;
	
	if ($string =~ /^'.*'$/)
	{
	    $quote = "'";
	}
	$string =~ s/^['"]//g;
	$string =~ s/['"]$//g;

	if ($do_translate && $key =~ /^_/) {
	    $key =~ s|^_||g;
	    if ($language) {
		# Handle translation
		my $decode_string = entity_decode($string);
		my $translation = $translations{$language, $decode_string};
		if ($translation) {
		    $translation = entity_encode($translation);
		    $string = $translation;
                }
                $$translate = 2;
            } else {
                 $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" $translate
            }
	}
	
	$result .= " $key=$quote$string$quote";
    }
    return $result;
}

# Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
sub getXMLstring
{
    my $ref = shift;
    my $spacepreserve = shift || 0;
    my @list = @{ $ref };
    my $result = "";

    my $count = scalar(@list);
    my $attrs = $list[0];
    my $index = 1;

    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
    $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));

    while ($index < $count) {
	my $type = $list[$index];
	my $content = $list[$index+1];
        if (! $type ) {
	    # We've got CDATA
	    if ($content) {
		# lets strip the whitespace here, and *ONLY* here
                $content =~ s/\s+/ /gs if (!$spacepreserve);
		$result .= $content;
	    }
	} elsif ( "$type" ne "1" ) {
	    # We've got another element
	    $result .= "<$type";
	    $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
	    if ($content) {
		my $subresult = getXMLstring($content, $spacepreserve);
		if ($subresult) {
		    $result .= ">".$subresult . "</$type>";
		} else {
		    $result .= "/>";
		}
	    } else {
		$result .= "/>";
	    }
	}
	$index += 2;
    }
    return $result;
}

# Translate list of nodes if necessary
sub translate_subnodes
{
    my $fh = shift;
    my $content = shift;
    my $language = shift || "";
    my $singlelang = shift || 0;
    my $spacepreserve = shift || 0;

    my @nodes = @{ $content };

    my $count = scalar(@nodes);
    my $index = 0;
    while ($index < $count) {
        my $type = $nodes[$index];
        my $rest = $nodes[$index+1];
        if ($singlelang) {
            my $oldMO = $MULTIPLE_OUTPUT;
            $MULTIPLE_OUTPUT = 1;
            traverse($fh, $type, $rest, $language, $spacepreserve);
            $MULTIPLE_OUTPUT = $oldMO;
        } else {
            traverse($fh, $type, $rest, $language, $spacepreserve);
        }
        $index += 2;
    }
}

sub isWellFormedXmlFragment
{
    my $ret = eval 'require XML::Parser';
    if(!$ret) {
        die "You must have XML::Parser installed to run $0\n\n";
    } 

    my $fragment = shift;
    return 0 if (!$fragment);

    $fragment = "<root>$fragment</root>";
    my $xp = new XML::Parser(Style => 'Tree');
    my $tree = 0;
    eval { $tree = $xp->parse($fragment); };
    return $tree;
}

sub traverse
{
    my $fh = shift; 
    my $nodename = shift;
    my $content = shift;
    my $language = shift || "";
    my $spacepreserve = shift || 0;

    if (!$nodename) {
	if ($content =~ /^[\s]*$/) {
	    $leading_space .= $content;
	}
	print $fh $content;
    } else {
	# element
	my @all = @{ $content };
	my $attrs = shift @all;
	my $translate = 0;
	my $outattr = getAttributeString($attrs, 1, $language, \$translate);

	if ($nodename =~ /^_/) {
	    $translate = 1;
	    $nodename =~ s/^_//;
	}
	my $lookup = '';

        $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
        $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));

	print $fh "<$nodename", $outattr;
	if ($translate) {
	    $lookup = getXMLstring($content, $spacepreserve);
            if (!$spacepreserve) {
                $lookup =~ s/^\s+//s;
                $lookup =~ s/\s+$//s;
            }

	    if ($lookup || $translate == 2) {
                my $translation = $translations{$language, $lookup} if isWellFormedXmlFragment($translations{$language, $lookup});
                if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
                    $translation = $lookup if (!$translation);
                    print $fh " xml:lang=\"", $language, "\"" if $language;
                    print $fh ">";
                    if ($translate == 2) {
                        translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
                    } else {
                        print $fh $translation;
                    }
                    print $fh "</$nodename>";

                    return; # this means there will be no same translation with xml:lang="$language"...
                            # if we want them both, just remove this "return"
                } else {
                    print $fh ">";
                    if ($translate == 2) {
                        translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
                    } else {
                        print $fh $lookup;
                    }
                    print $fh "</$nodename>";
                }
	    } else {
		print $fh "/>";
	    }

	    for my $lang (sort keys %po_files_by_lang) {
                    if ($MULTIPLE_OUTPUT && $lang ne "$language") {
                        next;
                    }
		    if ($lang) {
                        # Handle translation
                        #
                        my $translate = 0;
                        my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
                        my $translation = $translations{$lang, $lookup} if isWellFormedXmlFragment($translations{$lang, $lookup});
                        if ($translate && !$translation) {
                            $translation = $lookup;
                        }

                        if ($translation || $translate) {
			    print $fh "\n";
			    $leading_space =~ s/.*\n//g;
			    print $fh $leading_space;
 			    print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
                            if ($translate == 2) {
                               translate_subnodes($fh, \@all, $lang, 1, $spacepreserve);
                            } else {
                                print $fh $translation;
                            }
                            print $fh "</$nodename>";
			}
                    }
	    }

	} else {
	    my $count = scalar(@all);
	    if ($count > 0) {
		print $fh ">";
                my $index = 0;
                while ($index < $count) {
                    my $type = $all[$index];
                    my $rest = $all[$index+1];
                    traverse($fh, $type, $rest, $language, $spacepreserve);
                    $index += 2;
                }
		print $fh "</$nodename>";
	    } else {
		print $fh "/>";
	    }
	}
    }
}

sub intltool_tree_comment
{
    my $expat = shift;
    my $data  = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    push @$clist, 1 => $data;
}

sub intltool_tree_cdatastart
{
    my $expat    = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    push @$clist, 0 => $expat->original_string();
}

sub intltool_tree_cdataend
{
    my $expat    = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    $clist->[$pos] .= $expat->original_string();
}

sub intltool_tree_char
{
    my $expat = shift;
    my $text  = shift;
    my $clist = $expat->{Curlist};
    my $pos   = $#$clist;

    # Use original_string so that we retain escaped entities
    # in CDATA sections.
    #
    if ($pos > 0 and $clist->[$pos - 1] eq '0') {
        $clist->[$pos] .= $expat->original_string();
    } else {
        push @$clist, 0 => $expat->original_string();
    }
}

sub intltool_tree_start
{
    my $expat    = shift;
    my $tag      = shift;
    my @origlist = ();

    # Use original_string so that we retain escaped entities
    # in attribute values.  We must convert the string to an
    # @origlist array to conform to the structure of the Tree
    # Style.
    #
    my @original_array = split /\x/, $expat->original_string();
    my $source         = $expat->original_string();

    # Remove leading tag.
    #
    $source =~ s|^\s*<\s*(\S+)||s;

    # Grab attribute key/value pairs and push onto @origlist array.
    #
    while ($source)
    {
       if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
       {
           $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
           push @origlist, $1;
           push @origlist, '"' . $2 . '"';
       }
       elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
       {
           $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
           push @origlist, $1;
           push @origlist, "'" . $2 . "'";
       }
       else
       {
           last;
       }
    }

    my $ol = [ { @origlist } ];

    push @{ $expat->{Lists} }, $expat->{Curlist};
    push @{ $expat->{Curlist} }, $tag => $ol;
    $expat->{Curlist} = $ol;
}

sub readXml
{
    my $filename = shift || return;
    if(!-f $filename) {
        die "ERROR Cannot find filename: $filename\n";
    }

    my $ret = eval 'require XML::Parser';
    if(!$ret) {
        die "You must have XML::Parser installed to run $0\n\n";
    } 
    my $xp = new XML::Parser(Style => 'Tree');
    $xp->setHandlers(Char => \&intltool_tree_char);
    $xp->setHandlers(Start => \&intltool_tree_start);
    $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
    $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
    my $tree = $xp->parsefile($filename);

# <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
# would be:
# [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
# 0, "Howdy",  ref, [{}]], 0, "do" ] ]

    return $tree;
}

sub print_header
{
    my $infile = shift;
    my $fh = shift;
    my $source;

    if(!-f $infile) {
        die "ERROR Cannot find filename: $infile\n";
    }

    print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
    {
        local $/;
        open DOCINPUT, "<${FILE}" or die;
        $source = <DOCINPUT>;
        close DOCINPUT;
    }
    if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
    {
        print $fh "$1\n";
    }
    elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
    {
        print $fh "$1\n";
    }
}

sub parseTree
{
    my $fh        = shift;
    my $ref       = shift;
    my $language  = shift || "";

    my $name = shift @{ $ref };
    my $cont = shift @{ $ref };
    
    while (!$name || "$name" eq "1") {
        $name = shift @{ $ref };
        $cont = shift @{ $ref };
    }

    my $spacepreserve = 0;
    my $attrs = @{$cont}[0];
    $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));

    traverse($fh, $name, $cont, $language, $spacepreserve);
}

sub xml_merge_output
{
    my $source;

    if ($MULTIPLE_OUTPUT) {
        for my $lang (sort keys %po_files_by_lang) {
	    if ( ! -e $lang ) {
	        mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
            }
            open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
            binmode (OUTPUT) if $^O eq 'MSWin32';
            my $tree = readXml($FILE);
            print_header($FILE, \*OUTPUT);
            parseTree(\*OUTPUT, $tree, $lang);
            close OUTPUT;
            print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
        }
    } 
    open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
    binmode (OUTPUT) if $^O eq 'MSWin32';
    my $tree = readXml($FILE);
    print_header($FILE, \*OUTPUT);
    parseTree(\*OUTPUT, $tree);
    close OUTPUT;
    print "CREATED $OUTFILE\n" unless $QUIET_ARG;
}

sub keys_merge_translations
{
    open INPUT, "<${FILE}" or die;
    open OUTPUT, ">${OUTFILE}" or die;
    binmode (OUTPUT) if $^O eq 'MSWin32';

    while (<INPUT>) 
    {
        if (s/^(\s*)_(\w+=(.*))/$1$2/)  
        {
	    my $string = $3;

            print OUTPUT;

	    my $non_translated_line = $_;

            for my $lang (sort keys %po_files_by_lang) 
            {
		my $translation = $translations{$lang, $string};
                next if !$translation;

                $_ = $non_translated_line;
		s/(\w+)=.*/[$lang]$1=$translation/;
                print OUTPUT;
            }
	} 
        else 
        {
            print OUTPUT;
        }
    }

    close OUTPUT;
    close INPUT;
}

sub desktop_merge_translations
{
    open INPUT, "<${FILE}" or die;
    open OUTPUT, ">${OUTFILE}" or die;
    binmode (OUTPUT) if $^O eq 'MSWin32';

    while (<INPUT>) 
    {
        if (s/^(\s*)_(\w+=(.*))/$1$2/)  
        {
	    my $string = $3;

            print OUTPUT;

	    my $non_translated_line = $_;

            for my $lang (sort keys %po_files_by_lang) 
            {
                my $translation = $translations{$lang, $string};
                next if !$translation;

                $_ = $non_translated_line;
                s/(\w+)=.*/${1}[$lang]=$translation/;
                print OUTPUT;
            }
	} 
        else 
        {
            print OUTPUT;
        }
    }

    close OUTPUT;
    close INPUT;
}

sub schemas_merge_translations
{
    my $source;

    {
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">$OUTFILE" or die;
    binmode (OUTPUT) if $^O eq 'MSWin32';

    # FIXME: support attribute translations

    # Empty nodes never need translation, so unmark all of them.
    # For example, <_foo/> is just replaced by <foo/>.
    $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;

    while ($source =~ s/
                        (.*?)
                        (\s+)(<locale\ name="C">(\s*)
                            (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
                            (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
                            (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
                        <\/locale>)
                       //sx) 
    {
        print OUTPUT $1;

	my $locale_start_spaces = $2 ? $2 : '';
	my $default_spaces = $4 ? $4 : '';
	my $short_spaces = $7 ? $7 : '';
	my $long_spaces = $10 ? $10 : '';
	my $locale_end_spaces = $13 ? $13 : '';
	my $c_default_block = $3 ? $3 : '';
	my $default_string = $6 ? $6 : '';
	my $short_string = $9 ? $9 : '';
	my $long_string = $12 ? $12 : '';

	print OUTPUT "$locale_start_spaces$c_default_block";

        $default_string =~ s/\s+/ /g;
        $default_string = entity_decode($default_string);
	$short_string =~ s/\s+/ /g;
	$short_string = entity_decode($short_string);
	$long_string =~ s/\s+/ /g;
	$long_string = entity_decode($long_string);

	for my $lang (sort keys %po_files_by_lang) 
        {
	    my $default_translation = $translations{$lang, $default_string};
	    my $short_translation = $translations{$lang, $short_string};
	    my $long_translation  = $translations{$lang, $long_string};

	    next if (!$default_translation && !$short_translation && 
                     !$long_translation);

	    print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";

        print OUTPUT "$default_spaces";    

        if ($default_translation)
        {
            $default_translation = entity_encode($default_translation);
            print OUTPUT "<default>$default_translation</default>";
        }

	    print OUTPUT "$short_spaces";

	    if ($short_translation)
	    {
			$short_translation = entity_encode($short_translation);
			print OUTPUT "<short>$short_translation</short>";
	    }

	    print OUTPUT "$long_spaces";

	    if ($long_translation)
	    {
			$long_translation = entity_encode($long_translation);
			print OUTPUT "<long>$long_translation</long>";
	    }	    

	    print OUTPUT "$locale_end_spaces</locale>";
        }
    }

    print OUTPUT $source;

    close OUTPUT;
}

sub rfc822deb_merge_translations
{
    my %encodings = ();
    for my $lang (keys %po_files_by_lang) {
        $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
    }

    my $source;

    $Text::Wrap::huge = 'overflow';
    $Text::Wrap::break = qr/\n|\s(?=\S)/;

    {
       local $/; # slurp mode
       open INPUT, "<$FILE" or die "can't open $FILE: $!";
       $source = <INPUT>;
       close INPUT;
    }

    open OUTPUT, ">${OUTFILE}" or die;
    binmode (OUTPUT) if $^O eq 'MSWin32';

    while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
    {
	    my $sep = $1;
	    my $non_translated_line = $3.$4;
	    my $string = $5;
	    my $underscore = length($2);
	    next if $underscore eq 0 && $non_translated_line =~ /^#/;
	    #  Remove [] dummy strings
	    my $stripped = $string;
	    $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
	    $stripped =~ s/\[\s[^\[\]]*\]$//;
	    $non_translated_line .= $stripped;

	    print OUTPUT $sep.$non_translated_line;
    
	    if ($underscore) 
	    {
	        my @str_list = rfc822deb_split($underscore, $string);

	        for my $lang (sort keys %po_files_by_lang) 
                {
                    my $is_translated = 1;
                    my $str_translated = '';
                    my $first = 1;
                
                    for my $str (@str_list) 
                    {
                        my $translation = $translations{$lang, $str};
                    
                        if (!$translation) 
                        {
                            $is_translated = 0;
                            last;
                        }

	                #  $translation may also contain [] dummy
                        #  strings, mostly to indicate an empty string
	                $translation =~ s/\[\s[^\[\]]*\]$//;
                        
                        if ($first) 
                        {
                            if ($underscore eq 2)
                            {
                                $str_translated .= $translation;
                            }
                            else
                            {
                                $str_translated .=
                                    Text::Tabs::expand($translation) .
                                    "\n";
                            }
                        } 
                        else 
                        {
                            if ($underscore eq 2)
                            {
                                $str_translated .= ', ' . $translation;
                            }
                            else
                            {
                                $str_translated .= Text::Tabs::expand(
                                    Text::Wrap::wrap(' ', ' ', $translation)) .
                                    "\n .\n";
                            }
                        }
                        $first = 0;

                        #  To fix some problems with Text::Wrap::wrap
                        $str_translated =~ s/(\n )+\n/\n .\n/g;
                    }
                    next unless $is_translated;

                    $str_translated =~ s/\n \.\n$//;
                    $str_translated =~ s/\s+$//;

                    $_ = $non_translated_line;
                    s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
                    print OUTPUT;
                }
	    }
    }
    print OUTPUT "\n";

    close OUTPUT;
    close INPUT;
}

sub rfc822deb_split 
{
    # Debian defines a special way to deal with rfc822-style files:
    # when a value contain newlines, it consists of
    #   1.  a short form (first line)
    #   2.  a long description, all lines begin with a space,
    #       and paragraphs are separated by a single dot on a line
    # This routine returns an array of all paragraphs, and reformat
    # them.
    # When first argument is 2, the string is a comma separated list of
    # values.
    my $type = shift;
    my $text = shift;
    $text =~ s/^[ \t]//mg;
    return (split(/, */, $text, 0)) if $type ne 1;
    return ($text) if $text !~ /\n/;

    $text =~ s/([^\n]*)\n//;
    my @list = ($1);
    my $str = '';

    for my $line (split (/\n/, $text)) 
    {
        chomp $line;
        if ($line =~ /^\.\s*$/)
        {
            #  New paragraph
            $str =~ s/\s*$//;
            push(@list, $str);
            $str = '';
        } 
        elsif ($line =~ /^\s/) 
        {
            #  Line which must not be reformatted
            $str .= "\n" if length ($str) && $str !~ /\n$/;
            $line =~ s/\s+$//;
            $str .= $line."\n";
        } 
        else 
        {
            #  Continuation line, remove newline
            $str .= " " if length ($str) && $str !~ /\n$/;
            $str .= $line;
        }
    }

    $str =~ s/\s*$//;
    push(@list, $str) if length ($str);

    return @list;
}