#!/usr/bin/perl -w

eval 'exec /usr/bin/perl -w -S $0 ${1+"$@"}'
    if 0; # not running under some shell
	eval 'exec perl -S $0 "$@"'
		if 0;


use vars qw( $running_under_some_shell );

=head1 NAME

vnsort -- Vietnamese sort

=head1 FORMAT

vnsort [ C<-c>B<list> | C<-f>B<list> [C<-d>B<regexp>]] [files ...]

=head1 SYNOPSIS

	vnsort -c10-15,50-,25-45 < file 
	vnsort -f3,5-6 < file 
	vnsort -f3,5-6 -s: < file 

=head1 DESCRIPTION

C<vnsort> is a utility that sorts input lines according to rules used in
Vietnamese language with TCVN 5712:1993 encoding (VSCII-1). You can run it
without any options, then it just uses whole lines for sorting. With the
options, it's possible to specify parts of the lines to be used for comparison. 

=over 4

=item B<list>

A comma-separated list of integer field numbers or field ranges. The
are indexed from 1 and if a range is open (eg. C<5->), it means all
remaining fields from the starting number.

=item B<-c>

Stands for columns and the list that follows specifies byte ranges on
the line. You will probably use this option to sort data with fixed
width fields.

=item B<-f>

Fields that will be used for sort.

=item B<-d>

Delimiter that separates fields in the B<-f> option. It is a Perl
regular expression, the default is C<[ \t]+>, which means any number
of spaces or tabs in a row.

=item B<-i>

Treat lowercase letters as uppercase.

=head1 SEE ALSO

cssort(1), vntovn(1).

=head1 AUTHOR

Jan Pazdziora <adelton@fi.muni.cz> -- the original author of cssort.
Han The Thanh <thanh@fi.muni.cz> -- support for Vietnamese.

=cut

use strict;
use Getopt::Std;
use Cz::Vnsort;

my %opts = (
	'd' => '[ \t]+',
	'i' => 0,
	);

getopts('d:f:c:i', \%opts);

init_sort_tab($opts{'i'});

if (defined $opts{'h'})
	{
	print STDERR <<"EOF";
This is vnsort version $Cz::Vnsort::VERSION.
Usage info: vnsort [ -clist | -flist [-dregexp]] [files ...]
	-c	Columns
	-f	Field numbers
	-d	Delimiter, field separator
Lists are comma separated lists of field (column) numbers or ranges.
Example:	vnsort -c10-15,50-,25-45	vnsort -f3,5-6 -s:
EOF
	exit(1);
	}

my $switch = 'c';
my $option = $opts{$switch};
if (not defined $option)
	{
	$switch = 'f';
	$option = $opts{$switch};
	}
if (not defined $option)
	{
	$switch = undef;
	}

if (defined $switch)
	{
	my (@starts, @lengths, @array);
	for (split /,/, $option)
		{
		if (/^\d+$/)
			{ push @starts, $_ - 1; push @lengths, 1; }
		elsif (/^(\d+)-(\d+)$/)
			{ push @starts, $1 - 1; push @lengths, ($2 - $1 + 1); }
		elsif (/^(\d+)-$/)
			{ push @starts, $1 - 1; push @lengths, undef; }
		else
			{ die "Cssort: wrong option '$_' for switch -$switch\n"; }
		}

	if ($switch eq 'c')
		{
		while (<>)
			{
			chomp;
			my $line = [ $_ ];
			my $i;
			for ($i = 0; $i < @starts; $i++)
				{
				if ($starts[$i] >= length $_)
					{ push @$line, undef; }
				elsif (defined $lengths[$i])
					{ push @$line, substr $_, $starts[$i], $lengths[$i]; }
				else
					{ push @$line, substr $_, $starts[$i]; }
				}
			push @array, $line;
			}
		}
	else
		{
		my $regexp = $opts{'d'};
		while (<>)
			{
			chomp;
			my @items = split /$regexp/so;
			my $line = [ $_ ];
			my $i;
			for ($i = 0; $i < @starts; $i++)
				{
				push @$line, @items[$starts[$i] .. (defined $lengths[$i] ? $starts[$i] + $lengths[$i] - 1 : $#items )];
				}
			push @array, $line;
			}
		}

	print map { $_->[0] . "\n" }
		sort
			{
			my $len = ( @$a >= @$b ? @$a : @$b);
			my $i;
			for ($i = 1; $i < $len; $i++)
				{
				if (not defined $a->[$i])
					{
					return 0 if not defined $b->[$i];
					return -1;
					}
				if (not defined $b->[$i])
					{
					return 1;
					}
				my $result = czcmp($a->[$i], $b->[$i]);
				return $result if $result != 0;
				}
			return 0;
			} @array;
	}
else
	{ print vnsort <>; }


