#!/usr/bin/perl -w

=pod

=head1 NAME

tv_grab_pt_meo - Grab TV listings for MEO from SAPO in Portugal.

=head1 SYNOPSIS

tv_grab_pt_meo --help

tv_grab_pt_meo --configure [--config-file FILE]

tv_grab_pt_meo [--config-file FILE]
                 [--days N] [--offset N] [--channel xmltvid,xmltvid,...]
                 [--output FILE] [--quiet] [--debug]

tv_grab_pt_meo --list-channels [--config-file FILE]
                 [--output FILE] [--quiet] [--debug]


=head1 DESCRIPTION

Output TV and listings in XMLTV format for many stations
available in Portugal. This program consumes the EPG service offering
from SAPO at L<http://services.sapo.pt/Metadata/Service/EPG?culture=EN>. Or
their new API documentation at
L<https://store.services.sapo.pt/en/cat/catalog/other/meo-epg/technical-description>.
See L<http://seguranca.sapo.pt/termosdeutilizacao/apis_rss_webservices.html>
for their terms of service. (automatic translation suggests it's free
for personal use, verification appreciated)

First you must run B<tv_grab_pt_meo --configure> to choose which stations
you want to receive.

Then running B<tv_grab_pt_meo> with no arguments will get a listings for
the stations you chose for all available days including today.

=head1 OPTIONS

B<--configure> Prompt for which stations to download and write the
configuration file.

B<--config-file FILE> Set the name of the configuration file, the
default is B<~/.xmltv/tv_grab_pt_meo.conf>.  This is the file written by
B<--configure> and read when grabbing.

B<--output FILE> When grabbing, write output to FILE rather than
standard output.

B<--days N> When grabbing, grab N days rather than everything available.

B<--offset N> Start grabbing at today + N days.

B<--quiet> Only print error-messages on STDERR.

B<--debug> Provide more information on progress to stderr to help in
debugging.

B<--list-channels>    Output a list of all channels that data is available
                      for. The list is in xmltv-format.

B<--capabilities> Show which capabilities the grabber supports. For more
information, see L<http://wiki.xmltv.org/index.php/XmltvCapabilities>

B<--version> Show the version of the grabber.

B<--help> Print a help message and exit.

=head1 ERROR HANDLING

If the grabber fails to download data from webstep, it will print an
errormessage to STDERR and then exit with a status code of 1 to indicate
that the data is missing.

=head1 ENVIRONMENT VARIABLES

The environment variable HOME can be set to change where configuration
files are stored. All configuration is stored in $HOME/.xmltv/. On Windows,
it might be necessary to set HOME to a path without spaces in it.

=head1 CREDITS

Grabber written by Karl Dietz, dekarl -at- users -dot- sourceforge -dot- net
as a test of the documentation on grabber writing.
This documentation copied from tv_grab_cz by Mattias Holmlund,
This documentation copied from tv_grab_uk by Ed Avis,
ed -at- membled -dot- com. Original grabber by Jiri Kaderavek,
jiri -dot- kaderavek -at- webstep -dot- net with modifications by
Petr Stehlik, pstehlik -at- sophics -dot- cz.

Data provided via web service from SAPO accompanying their MEO TV service.
Check their terms of usage!

=head1 BUGS

None known.

=cut

use strict;
use DateTime;
use Encode; # used to convert 'perl strings' into 'utf-8 strings'
use XML::LibXML;
use XMLTV;
use XMLTV::Configure::Writer;
use XMLTV::Get_nice qw/get_nice/;
use XMLTV::Options qw/ParseOptions/;

my $maxdays = 1+7; # data source is limited to n days (including today)

my( $opt, $conf ) = ParseOptions( {
    grabber_name => "tv_grab_pt_meo",
    capabilities => [qw/apiconfig baseline manualconfig preferredmethod/],
    listchannels_sub => \&list_channels,
    stage_sub => \&config_stage,
    version => "$XMLTV::VERSION",
    description => "Portugal (MEO)",
    preferredmethod => 'allatonce',
    defaults => { days => $maxdays, offset => 0, quiet => 0, debug => 0 },
} );

# limit to maxdays in the future
if ($opt->{offset} + $opt->{days} > $maxdays) {
    $opt->{days} = $maxdays - $opt->{offset};
}

if ($opt->{days} < 1) {
    $opt->{days} = 0;
}

# Get the actual data and print it to stdout.
my $is_success=1;

my $startDate = DateTime->from_epoch( epoch => time () );
$startDate->set_time_zone( 'Europe/Lisbon' );
$startDate->truncate( to => 'day' );
$startDate->add( days => $opt->{offset} );
my $endDate=$startDate->clone()->add( days => $opt->{days} );
$endDate->add( seconds => -1 );

my $xpc = XML::LibXML::XPathContext->new;
$xpc->registerNs('EPG', 'http://services.sapo.pt/Metadata/EPG');

my $epgsource = '';
if ($opt->{days} > 0) {
    if( !$opt->{quiet} ) {
        print( STDERR "fetching data\n" );
    }
    get_epg( \$epgsource, $startDate, $endDate );
} else {
    if( !$opt->{quiet} ) {
        print( STDERR "no data available for the requested time period\n" );
    }
    $epgsource = '<GetChannelListByDateIntervalResponse xmlns="http://services.sapo.pt/Metadata/EPG" />';
    $is_success = 0;
}
my $parser=XML::LibXML->new();
$parser->load_ext_dtd( 0 );
my $epg=$parser->parse_string( $epgsource )->getDocumentElement();

my %w_args = (
    cutoff => '000000',
    days => $opt->{days},
    encoding => 'UTF-8',
    offset => $opt->{offset}
);

my $writer = new XMLTV::Writer( %w_args );
$writer->start({
    'generator-info-name' => "XMLTV/$XMLTV::VERSION",
    'generator-info-url' => 'http://www.xmltv.org/',
    'source-info-name' => 'SAPO EPG Service for MEO',
    'source-info-url' => 'https://store.services.sapo.pt/en/cat/catalog/other/meo-epg/technical-description',
});

my $channels = $xpc->findnodes( '//EPG:Channel', $epg );
foreach my $channel ($channels->get_nodelist()) {
    my %ch = (
        'display-name' => [ [ encode( 'UTF-8', $xpc->findvalue( 'EPG:Name', $channel ) ), 'pt' ] ],
        'id' => make_channelid( $xpc->findvalue( 'EPG:Sigla', $channel ) )
    );

    $writer->write_channel(\%ch);
}

my $lastchanid;
my $lastend;
my $inprogs = $xpc->findnodes( '//EPG:Program', $epg );
foreach my $inprog ($inprogs->get_nodelist()) {
    my %prog;

    my $chanid = $xpc->findvalue( '../../EPG:Sigla', $inprog );
    $prog{channel} = make_channelid( $chanid );

    my $title = $xpc->findvalue( 'EPG:Title', $inprog );
    $title = parse_title (\%prog, $title);
    $prog{title} = [ [ encode( 'UTF-8', $title ), 'pt' ] ];

    my $desc = $xpc->findvalue( 'EPG:Description', $inprog );
    $prog{desc} = [ [ encode( 'UTF-8', $desc ), 'pt' ] ];

    my $dtstart;
    if( !defined( $lastchanid )||( $lastchanid ne $chanid ) ){
      my $starttime = $xpc->findvalue( 'EPG:StartTime', $inprog );
      $dtstart = dt_from_string( $starttime );
    }else{
      $dtstart = $lastend;
    }
    $prog{start} = $dtstart->strftime( '%Y%m%d%H%M%S %z' );

#    my $endtime = $xpc->findvalue( 'EPG:EndTime', $inprog );
#    my $dtstop = dt_from_string( $endtime );
    my $duration = $xpc->findvalue( 'EPG:Duration', $inprog );
    my $dtstop = $dtstart->clone()->add( seconds => $duration );
    $lastchanid = $chanid;
    $lastend = $dtstop;
    $prog{stop} = $dtstop->strftime( '%Y%m%d%H%M%S %z' );

    $writer->write_programme(\%prog);
}

$writer->end();

if( $is_success ) {
     exit 0;
} else {
     exit 1;
}

sub config_stage
{
     my( $stage, $conf ) = @_;
     die "Unknown stage $stage" if $stage ne "start";

     my $result;
     my $writer = new XMLTV::Configure::Writer( OUTPUT => \$result, encoding => 'utf-8' );
     $writer->start( { grabber => 'tv_grab_pt_meo' } );

	 $writer->write_string( {
				id => 'maxchannels',
				title => [ [ 'Maximum channels to grab at once', 'en' ] ],
				description => [
				 [ 'The source can return an error if too many channels are grabbed at once, so the fetch is split into multiple fetches and joined together.',
					 'en' ] ],
				default => '5',
		 } );

     $writer->end( 'select-channels' );

     return $result;
}

sub list_channels
{
     my( $conf, $opt ) = @_;

     # Return a string containing an xmltv-document with <channel>-elements
     # for all available channels.

     my $channellist=get_nice( 'http://services.sapo.pt/EPG/GetChannelList' );
     my $parser=XML::LibXML->new();
     my $input=$parser->parse_string( $channellist )->getDocumentElement();

     my $output=XML::LibXML::Document->new( '1.0', 'utf-8' );
     my $root=XML::LibXML::Element->new( 'tv' );
     $output->setDocumentElement( $root );

     foreach my $channel( $input->getElementsByTagName( 'Channel') ) {
         my @node=$channel->getElementsByTagName( 'Name' );
         my $name=$node[0]->getFirstChild()->getData();
         @node=$channel->getElementsByTagName( 'Sigla' );
         my $sigla=$node[0]->getFirstChild()->getData();
         my $tmp=XML::LibXML::Element->new( 'channel' );
         $tmp->setAttribute( 'id', encode( 'UTF-8', $sigla ) );
         $tmp->appendTextChild( 'display-name', encode( 'UTF-8', $name ) );
         $root->appendChild( $tmp );
     }

     return $output->toString();
}

# Fetch the epg in chunks for a max of x channels per request
#  x is 'maxchannels' in config file, or default is 5
#  (c.f. Bug #486 Error obtaining data for more than 20 channels )
#
sub get_epg
{
    my( $epgref, $startDate, $endDate ) = @_;

    my $channelsListSize = scalar @{$conf->{channel}};
    my $maxChannelsPerRequest = $conf->{maxchannels}[0] || 5;
    my $baseRequest = 'http://services.sapo.pt/EPG/GetChannelListByDateInterval';
    my $i = 0;
    while ($i < $channelsListSize) {
        my $j = ($i + $maxChannelsPerRequest); $j = ($j > $channelsListSize) ? $channelsListSize : $j; $j--;
        my @partialChannelList = @{$conf->{channel}}[$i ... $j];

        my $partialRequest = make_url(\@partialChannelList, $startDate, $endDate);

        print( STDERR "requesting $partialRequest\n" )  if( !$opt->{quiet} );
        my $partialEpgsource = get_nice( $partialRequest );

        if (length($$epgref) == 0) {
            $$epgref = $partialEpgsource;
        } else {
            # merge new data into existing
            my ($newdata) = $partialEpgsource =~ m/(<Channel>.*<\/Channel>)/is;
            my ($x, $y) = $$epgref =~ m/^(.*<\/Channel>)(.*)$/is;
            $$epgref = ($x || '') . ($newdata || '') . ($y || '');
        }

        $i += $maxChannelsPerRequest;
    }

}

# Format a request url using data supplied
sub make_url
{
    my ( $channels, $startDate, $endDate ) = @_;

    my $requestCallsigns = join( ',', @$channels );
    $requestCallsigns =~ s/([^A-Za-z0-9,])/sprintf("%%%02X", ord($1))/seg;

    return 'http://services.sapo.pt/EPG/GetChannelListByDateInterval'
                . '?channelSiglas=' . $requestCallsigns
                . '&startDate=' . $startDate->ymd('-') . ' ' . $startDate->hms(':')
                . '&endDate=' . $endDate->ymd('-') . ' ' . $endDate->hms(':') ;
}

sub dt_from_string
{
    my( $string ) = @_;
    my($year, $month, $day, $hour, $minute, $second) =
        ($string =~ m|(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})|);
    my $dt = DateTime->new( year   => $year,
                            month  => $month,
                            day    => $day,
                            hour   => $hour,
                            minute => $minute,
                            second => $second,
                            time_zone => 'Europe/Lisbon',
    );
    return $dt;
}

sub make_channelid
{
    my( $id ) = @_;
    $id = lc( $id );      # turn into lowercase
    $id =~ s|\s+||g;      # remove whitespace
    $id =~ s|&||g;        # remove ampersand
    $id =~ s|\x{e7}|c|g;  # turn c-cecille into plain c
    $id =~ s|\+|-plus|g;  # turn + into -plus
    $id .= '.tv.sapo.pt'; # append domain part
    return( $id );
}

sub parse_title
{
    my $prog = shift;
    my $title = shift;

    if (!defined ($title)) {
        return undef;
    }

    if ($title =~ m|\s+T\d+\s+-\s+Ep\.\s+\d+$|) {
        # found season and episode in title
        my ($season, $episode) = ($title =~ m|\s+T(\d+)\s+-\s+Ep\.\s+(\d+)$|);
        $title =~ s|\s+T\d+\s+-\s+Ep\.\s+\d+$||;
        $prog->{'episode-num'} =  [ [ ($season - 1).' . '.($episode-1).' .', 'xmltv_ns' ] ];
    } elsif ($title =~ m|\s+-\s+Ep\.\s+\d+$|) {
        # found episode in title
        my ($episode) = ($title =~ m|\s+-\s+Ep\.\s+(\d+)$|);
        $title =~ s|\s+-\s+Ep\.\s+\d+$||;
        $prog->{'episode-num'} = [ [ '. '.($episode-1).' .', 'xmltv_ns' ] ];
    }

    return $title;
}
