#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
=pod

=head1 NAME

tv_grab_uk_rt - Grab TV listings for the United Kingdom

=head1 SYNOPSIS

tv_grab_uk_rt --help

tv_grab_uk_rt [--config-file FILE] --configure [--gui OPTION]

tv_grab_uk_rt [--config-file FILE] [--output FILE] [--quiet]
              [--days N] [--offset N]
              [--list-channels] 

=head1 DESCRIPTION

Output TV and radio listings in XMLTV format for many stations available in
Britain.  The data comes from a machine-readable file produced by the Radio
Times website.

=head1 USAGE

First you must run B<tv_grab_uk_rt --configure> to choose which stations you
want to receive.  Then running B<tv_grab_uk_rt> with no arguments will get
about a fortnightE<39>s listings for the stations you chose.

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--gui OPTION> Use this option to enable a graphical interface to be used.
OPTION may be 'Tk', or left blank for the best available choice.
Additional allowed values of OPTION are 'Term' for normal terminal output
(default) and 'TermNoProgressBar' to disable the use of Term::ProgressBar.

B<--config-file FILE> Set the name of the configuration file, the default is
B<~/.xmltv/tv_grab_uk_rt.conf>.  This is the file written by B<--configure> and
read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than standard
output.

B<--quiet> suppress the progress messages normally written to standard error.

B<--days N> When grabbing, grab N days rather than as many as possible.

B<--offset N> Start grabbing at today + N.  N may be negative.

B<--list-channels> List channels.

B<--help> print a help message and exit.

=head1 SEE ALSO

L<xmltv(5)>, L<http://www.radiotimes.beeb.com/>

=head1 AUTHOR

Ed Avis, ed@membled.com

=cut

use warnings;
use strict;
use Getopt::Long;
use HTML::Entities;
use Date::Manip; Date_Init('TZ=+0000');
use XMLTV::Version '$Id: tv_grab_uk_rt.in,v 1.77 2005/03/01 08:56:45 axis3x3 Exp $ ';
use XMLTV::Config_file;
use XMLTV::Get_nice;
use XMLTV::Ask;
use XMLTV::ProgressBar;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get_nice';
use XMLTV::DST;
use XMLTV::Usage <<END
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--quiet]
List channels only: $0 --list-channels [--output FILE] [--quiet]
END
  ;
$XMLTV::Get_nice::Delay = 0; # since this is intended for grabbing
my $channel_list_uri = 'http://xmltv.radiotimes.com/xmltv/channels.dat';

sub configure();

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
    *t = sub {};
    *d = sub { '' };
    }
    else {
    *t = \&Log::TraceMessages::t;
    *d = \&Log::TraceMessages::d;
    Log::TraceMessages::check_argv();
    }
}

GetOptions('help'          => \ my $opt_help,
       'configure'     => \ my $opt_configure,
       'config-file=s' => \ my $opt_config_file,
       'gui:s'         => \ my $opt_gui,
       'output=s'      => \ my $opt_output,
       'share=s'       => \ my $opt_share, # also undocumented
       'quiet'         => \ my $opt_quiet,
       'list-channels' => \ my $opt_list_channels,
       'days'          => undef, # ignored
       'offset'        => undef, # ignored
      )
  or usage(0);
if ($opt_help) {
    usage(1);
}

# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_uk_rt.PL.  But we can use
# the current directory instead of share/tv_grab_uk for development.
#
# The 'source' file tv_grab_uk_rt.in has $SHARE_DIR undef, which means
# use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#
my $SHARE_DIR='/usr/share/xmltv'; # by grab/uk_rt/tv_grab_uk_rt.PL
$SHARE_DIR = $opt_share if defined $opt_share;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_uk_rt" : '.';
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;

XMLTV::Ask::init($opt_gui);

# Stuff for the root <tv> element.
my %tv_credits = ( # 'source-info-url'     => "todo",
           'source-info-name'    => 'Radio Times',
           'generator-info-name' => 'XMLTV',
           'generator-info-url'  =>
           'http://membled.com/work/apps/xmltv/',
         );

# Tables to convert between Radio Times and XMLTV ids of channels.
# The way to access these is through the routines rt_to_xmltv() and
# xmltv_to_rt(), not directly.  Those will deal sensibly with a new RT
# channel that isn't mentioned in the file.
#
my (%rt_to_xmltv, %xmltv_to_rt, %extra_dn);
my $line_num = 0;
foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
    ++ $line_num;
    next unless defined;
    my $where = "$CHANNEL_NAMES_FILE:$line_num";
    my @fields = split /:/;
    die "$where: wrong number of fields"
      if @fields < 2 or @fields > 3;

    my ($xmltv_id, $rt_id, $extra_dn) = @fields;
    warn "$where: RT id $rt_id seen already\n"
      if defined $rt_to_xmltv{$rt_id};
    $rt_to_xmltv{$rt_id} = $xmltv_id;
    warn "$where: XMLTV id $xmltv_id seen already\n"
      if defined $xmltv_to_rt{$xmltv_id};
    $xmltv_to_rt{$xmltv_id} = $rt_id;

    $extra_dn{$xmltv_id} = $extra_dn if defined $extra_dn;
}

say <<END
All data is the copyright of the Radio Times website
<http://www.radiotimes.com> and the use of this data is restricted to
personal use only.
END
;

# Whatever we're doing, we need the channel list.
my $bar = new XMLTV::ProgressBar({name => 'finding channels', count => 1})
  if not $opt_quiet;
my $channel_list = get_nice $channel_list_uri;
my (%channels, %seen_rt_id, %seen_name);
foreach (split /\n/, $channel_list) {
    chomp;
    /^(\d+)\|(.+)/ or die "bad line in channel list: $_";
    my ($rt_id, $name) = ($1, $2);
    $seen_rt_id{$rt_id}++ && die "channel with RT id $rt_id seen twice";
    $seen_name{$name}++ && die "channel named '$name' seen twice";
    my $xmltv_id = $rt_to_xmltv{$rt_id};
    if (not defined $xmltv_id) {
    warn "RT id $rt_id ($name) not known in channel ids file\n";
    $xmltv_id = "C$rt_id.radiotimes.com";
    }
    my @names = ([ $name ]);
    for ($extra_dn{$xmltv_id}) { push @names, [ $_ ] if defined }
    $channels{$xmltv_id} = { id => $xmltv_id,
                 rt_id => $rt_id,
                 'display-name' => \@names };
}
update $bar if $bar;
$bar->finish() if $bar;
foreach (keys %xmltv_to_rt) {
    warn "channel $_ ($xmltv_to_rt{$_}) not seen on site"
      if not exists $channels{$_};
}

my %g_args = ();
if (defined $opt_output) {
    my $fh = new IO::File ">$opt_output";
    die "cannot write to $opt_output\n" if not $fh;
#    binmode $fh or die "cannot set binmode for output: $!";
    %g_args = (OUTPUT => $fh);
}

if ($opt_list_channels) {
    die "--list-channels can't be given with --configure\n"
      if $opt_configure;
    my $writer = new XMLTV::Writer(%g_args, encoding => 'ISO-8859-1');
    $writer->start(\%tv_credits);
    foreach (sort keys %channels) {
    delete $channels{$_}{rt_id};
    $writer->write_channel($channels{$_});
    }
    $writer->end;
    exit;
}

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_uk_rt', $opt_quiet);

if ($opt_configure) {
    configure();
    exit;
}

# Ask the user which channels to download, and write $config_file.
#
# Uses global %channels hash.
#
# FIXME commonize with other grabbers.
#
sub configure() {
#    local $Log::TraceMessages::On = 1;

    XMLTV::Config_file::check_no_overwrite($config_file);

    # FIXME need to make directory
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    my %chose_ch;
    t 'channels: ' . d \%channels;

    # For now we just let the user pick among the 'standard' channels.
    for (;;) {
    my $in = ask(<<END
Enter the name of a channel, or '.' to finish selecting channels:
END
    );
    # treat EOF as same as '.' -- finish
    $in = '.' if not defined $in;
    $in =~ s/^\s+//;
    $in =~ s/\s+$//;
    # handle backspace
    $in =~ s/.\x08//g;

    if ($in eq '.') {
        if (not keys %chose_ch) {
        say('You must choose at least one channel.');
        next;
        }
        last;
    }

    # FIXME commonize this matching by display name.
    my @poss;
      CH: foreach my $k (sort keys %channels) {
        my $ch = $channels{$k};
        my $dns = $ch->{'display-name'};
        unless ($dns and @$dns) {
        warn "channel with id $ch->{id} has no display name, so cannot be configured\n";
        next CH;
        }
        foreach (map { $_->[0] } @$dns) {
        # use substring match
        if (index(lc, lc $in) != -1) {
            push @poss, $ch;
            next CH;
        }
        }
    }

    # We only matched based on display names, so we can assume
    # that each possible channel has at least one.
    #
    if (@poss == 0) {
        say("There is no channel called '$in'.");
    }
    elsif (@poss == 1) {
        my $ch = $poss[0];
        if (ask_boolean('Add channel ' .
                $ch->{'display-name'}->[0]->[0] . '?', 1)) {
        my $xmltv_id = $ch->{id};
        unless ($chose_ch{$xmltv_id}++) {
            print CONF "channel $xmltv_id\n";
        }
        }
    }
    elsif (1 <= @poss and @poss < 25) {
        my %dn_to_ch;
        foreach (@poss) {
        my $dn = $_->{'display-name'}->[0]->[0];
        warn "more than one channel called $dn"
          if exists $dn_to_ch{$dn};
        $dn_to_ch{$dn} = $_;
        }
        my $none_option = 'None of the above are what I wanted';
        die 'silly channel name' if exists $dn_to_ch{$none_option};
        my $r = ask_choice('Which channel to add?',
                   $poss[0]->{'display-name'}->[0]->[0],
                   (sort keys %dn_to_ch), $none_option);
        # treat EOF as same as none-option
        next if not defined $r;
        next if $r eq $none_option;
        my $ch = $dn_to_ch{$r}; die if not defined $ch;
        my $xmltv_id = $ch->{id};
        unless ($chose_ch{$xmltv_id}++) {
        print CONF "channel $xmltv_id\n";
        }
    }
    elsif (25 <= @poss) {
        say("'$in' matches lots of channels, be more specific.");
    }
    else { die }
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

# Grabbing.  Start by reading config file.
my @wanted_chs;
my $n = 0;
foreach (XMLTV::Config_file::read_lines $config_file) {
    ++$n;
    next if not defined;
    /^\s*channel\s+(\S+)\s*$/ or die "$config_file: $n: bad line $_\n";
    my $id = $1;
    if (not exists $channels{$id}) {
    warn "channel $id mentioned in $config_file but not on site\n";
    next;
    }
    push @wanted_chs, $id;
}
#@wanted_chs = sort keys %channels;

my $writer = new XMLTV::Writer(%g_args, encoding => 'ISO-8859-1');
$writer->start(\%tv_credits);
foreach (@wanted_chs) {
    my %h = %{$channels{$_}};
    delete $h{rt_id};
    $writer->write_channel(\%h);
}

$bar = new XMLTV::ProgressBar({name => 'grabbing', count => scalar @wanted_chs})
  if not $opt_quiet;
my %warned_wrong_num_fields; # give that warning once per channel file
foreach my $ch (@wanted_chs) {
    my $c = $channels{$ch};
    my $rt_id = $channels{$ch}->{rt_id}; die if not defined $rt_id;

    # Try to get the base timezone for this channel from its name.
    my $base_tz;
    if ($c->{'display-name'}->[0]->[0] =~ /\((UTC|GMT|CET)\)\s*$/) {
    $base_tz = $1;
    }
    for ($base_tz) { $_ = 'UTC' if not defined }

    my $uri = "http://xmltv.radiotimes.com/xmltv/$rt_id.dat";
    local $SIG{__DIE__} = sub { die "$uri: $_[0]" };
    local $SIG{__WARN__} = sub { warn "$uri: $_[0]" };
    my $page = get_nice $uri;

    # Tidy up HTML entities and bad characters.  The site seems to use
    # a mixture of Latin-1 and UTF-8, I'm not sure exactly.  We want
    # our output to be in Latin-1 but HTML::Entities decides to use
    # Unicode so we have to fiddle a few entities manually first.
    #
    for ($page) {
    s/&#8212;/--/g;
    s/&#8230;/.../g;
    decode_entities $_;
    tr/\207\211\200\224/\347\311\055\055/; # bad characters
    }
    foreach (split /\n/, $page) {
    my @fields = split /\~/;
    if (@fields != 23) {
        warn "wrong number of fields in line:\n$_\n"
          unless $warned_wrong_num_fields{$ch}++;
        next;
    }
    foreach (@fields) { s/^\s+//; s/\s+$//; undef $_ if not length }
    my ($title, $sub_title, $episode, $year, $director, $cast,
        $premiere, $film, $repeat, $subtitles, $widescreen,
        $new_series, $deaf_signed, $black_and_white, $star_rating,
        $certificate, $genre, $desc, $choice, $date, $start, $stop,
        $duration_mins) = @fields;
    foreach ($premiere, $film, $repeat, $subtitles, $widescreen,
         $new_series, $deaf_signed, $black_and_white, $choice) {
        die "true/false value not defined" if not defined;
        if ($_ eq 'true') { $_ = 1 }
        elsif ($_ eq 'false') { $_ = 0 }
        else { die "bad true/false value $_" }
    }

    warn "ignoring sub-title $sub_title since episode also given\n"
      if defined $sub_title and defined $episode;
    $sub_title = $episode if defined $episode;

    warn("missing title in: $_"), next if not defined $title;
    my %p = (channel => $ch, title => [ [ $title ] ]);
    for ($sub_title) { $p{'sub-title'} = [ [ $_ ] ] if defined }
    for ($year) { $p{date} = $_ if defined }
    for ($director) { $p{credits}{director} = [ $_ ] if defined }
    if (defined $cast) {
        my @cast;
        if ($cast =~ tr/|//) {
        @cast = split /\|/, $cast;
        # Each bit is in the format 'part*actor' and it seems that
        # even when part is 'director' that is the name of a
        # character.
        #
        foreach (@cast) {
            s/^.*[*]// or warn "bad bit of cast list: $_";
        }
        }
        else {
        @cast = split /,/, $cast;
        }
        foreach (@cast) { s/^\s+//; s/\s+$// }
        $p{credits}{actor} = \@cast;
    }
    $p{premiere} = [ '' ] if $premiere;
    push @{$p{category}}, [ 'film', 'en' ] if $film;
    $p{'previously-shown'} = {} if $repeat;
    $p{subtitles} = [ { type => 'teletext' } ] if $subtitles;
    $p{video}{aspect} = '16:9' if $widescreen;
    $p{new} = 1 if $new_series;
    # $deaf_signed ignored for now
    $p{video}{colour} = 0 if $black_and_white;
    $p{'star-rating'} = [ "$star_rating/5" ] if defined $star_rating;
    $p{rating} = [ [ $certificate, 'BBFC' ] ] if defined $certificate;
    push @{$p{category}}, [ $genre, 'en' ] if defined $genre;
    for ($desc) {
        if (defined) {
        s!</?[A-Za-z]+>!!g;
        $p{desc} = [ [ $_, 'en' ] ];
        }
    }
    # $choice ignored for now

    # Date, start and stop time.
    my ($yyyy, $mm, $dd);
    for ($date) {
        die "missing date in $_" if not defined;
        m!(\d\d)/(\d\d)/(\d{4})$! or die "bad date $_";
        ($dd, $mm, $yyyy) = ($1, $2, $3);
    }
    $p{start} = utc_offset "$yyyy$mm$dd$start", $base_tz;
    $p{stop} = utc_offset "$yyyy$mm$dd$stop", $base_tz;
    if (Date_Cmp($p{start}, $p{stop}) > 0) {
        $p{stop} = utc_offset(DateCalc("$yyyy$mm$dd$stop", '+ 1 day'),
                  $base_tz);
    }
    # Ignore $duration_mins since it may not be reliable.

    $writer->write_programme(\%p);
    }
    update $bar if $bar;
}
$bar->finish() if $bar;
$writer->end;
