#!/usr/bin/perl -w
#package GUPE::Util::fetchmail_like;
use strict;
require 5.005;

=head1 NAME

fetchmail_like.pl - a perl program that can act a little like fetchmail

=head1 SYNOPSIS

  fetchmail_like.pl [--verbose] [--silent] [--lockfile lockfile] [--lockpolicy lockpolicy] \
                     [--keep] [--all] [--lastmech mechanism] [--lastfile filename] \    
                     [--timeout timeout] [--no_messages_ok] [--daemon nseconds] \
                     [--eval "perl expression"] [--evalmda "perl expression"] [--mda "shell command"] \
                     --url url

=head1 DESCRIPTION

See http://www.fetchmail.org and http://www.tuxedo.org/~esr/fetchmail/fetchmail-man.html

I gave up trying to divine exactly what each of the many fetchmail options does
and how its baroque config file parser works,
and found it faster to re-implement fetchmail in perl than to work that out.

=head1 UNSUPPORTED

We don't need no stinking config files or one-letter options.

We do not muck with headers the way fetchmail does (Received header,
address qualification).

We do not forward to an SMTP server.

We do not ever send email (with errors or otherwise).

We do not examine headers in the message that were generated from the envelope.

We do not do logging to a file or syslog.

=head1 OPTIONS

The --url option is mandatory.

You can specify any subset of --eval, --evalmda and --mda, including none,
but you cannot specify more than one of any of them.
The order is --eval, then --evalmda, then --mda; this is sometimes useful if you want
to have --eval change the message.

=head2 --verbose

Produce debug output (both from ourselves and from modules we use).

=head2 --silent

Do not produce any output to STDOUT (not even the summary line).

=head2 --no_messages_ok

Treat the case of there being 0 messages as a success condition (zero exit code), not error (non-zero exit code).

=head2 --url url

The url argument is intended to encompass the fetchmail options 
of --folder and --username and the hostname command line argument.
An example is:

   pop://username:password@hostname.company.com/INBOX/foldername

Note that this is not a compliant url. 
There is no provision in the pop url standard (rfc2384) for a password 
(as there is in ftp and in basic urls as per rfc1738).
According to http://www.faqs.org/rfcs/rfc2384.html the pop url is:

   pop://<user>;auth=<auth>@<host>:<port>

Where <auth> is an authentication mechanism or '*'. An example is '+APOP'.
Without leading + or * it is a sasl mechanism as per rfc2222.
An example might 'KERBEROS_V4' or 'GSSAPI' or 'ANONYMOUS'.
See http://www.iana.org/numbers.htm and http://www.isi.edu/in-notes/iana/assignments/sasl-mechanisms

Note that fetchmail has no --password command line option like we do;
in fetchmail passwords must be typed into a prompt, in .fetchmailrc,
or .netrc.

Note that the perl URI module does not support imap urls (http://www.faqs.org/rfcs/rfc2192.html),
so in our implementation we've had to do some hacks.

=head2 --username user --password password

Sometimes the password has goofy characters.
You can either user --url anyway, and uri_escape the characters,
or you can just specify them here.

UNIMPLEMENTED.

=head2 --keep

If specified, does no message deletion.
If unspecified, deletes all messages it successfully processes, as well as all previously processed messages
(in case a previous pass neglected to do a deletion).

(In fetchmail I don't know if it deletes all messages or just the ones read, if nokeep.)

=head2 --all

If specified, retrieves all messages, not just ones already read.

To determine what has been read, many mechanisms are potentially possible:

  pop LAST command (highest numbered message *any* client has done RETR with. Note this was in POP2 but is not in POP3)
  pop TOP command
  parsing 'Status:' headers
  client-side saved value of greatest 'Date:' field of a message successfully processed
  client-side database of all uid's of ones read
  client-side saved value of last uid successfully processed

Note that you generally have to use client-side state, because we might
retrieve a message, and then on our client side fail to deliver it somewhere.
So the server has no idea whether it was really "read" or not.

=head2 --lockfile lockfile

The name of a file I can create to hold my pid, so as to ensure that no one
else is doing this too. Choose the name of the file to indicate scoping.
Note this only works within a host.

If unspecified, not client side locking is done.

=head2 --lockpolicy lockpolicy

Either 'steal' (kill the other guy) or 'error' (kill yourself) if 
someone currently has the lock named by lockfile.

Only relevant if --lockfile is specified.

=head2 --lastmech mechanism

One of:

  all            # same as --all
  ask_last_msn   # use LAST comand
  save_last_uid  # save last message in --lastfile filename

If unspecified, defaults to 'all' if --all, to 'save_last_uid' if --lastfile,
and otherwise is 'ask_last_msn'.

If you specify save_last_uid, then the file lastfile must exist and it must
already have a saved uid. To initialize it, run with --lastfile but some other
mechanism.

Since we abandon as soon as we get to any message we cannot process, relying
on the last uid is reasonably safe.

=head2 --lastfile filename

filename to save the last uid of a message successfully processed.

Note we do not call this option --idfile, because fetchmail's option is
different (I think it holds all message uids retrieved in each session).

=head2 --timeout timeout

The timeout for each network request, in seconds.

=head2 --daemon nseconds

Like fetchmail, except that:

 - we do not background ourselves (like fetchmail's --nodetach)
 - we do not set a client-side lock to prevent other polling.
 - we do not handle a SIGHUP or SIGUSR1 specially (to interrupt polling or re-read config file)
 - we do not support a --quit to kill another polling fetchmail.
 - we do not support --logfile or --syslog

=head2 --mda "shell command"

If specified, the string version of each message is piped into this shell command.
The command is restarted for each message.

The command must return exit code 0 for message processing to be considered successful.
(I believe this is the same as fetchmail, though I can't find it
documented anywhere. See the rc checking code in fetchmail's sink.c)

We do support the fetchmail-style substitutions in the string:

   %F -> From
   %T -> To

Like fetchmail, we attempt to filter out shell meta characters (otherwise
someone could get an arbitrary shell command run by sending email).
We also support %S -> Subject.

We run the command as whatever user we are.

=head2 --eval "perl expression"

Similar to --mda, except that the given perl expression is run.

The $_ variable is a Mail::Internet object.

The expression must not die for processing to be considered successful.

When this is being used as a module, the value of this option might
be a code ref rather than a string.

=head2 --evalmda "perl expression"

Like --eval, except that the return value is a string, which is then treated
like an --mda.

=head1 Exit Codes

We roughly follow fetchmail:

  0 one or more messages successfully processed
  1 no messages to receive
  2 could not connect, or other fatal network error
  3 authentication failure
  4 protocol error
  5 bad arguments
  6 bad config file
  7 timeout
  8 client-side lock exists
  9 server-side lock exists
 10 failed talking to smtp port or to mda
 23 internal error

=head1 CAVEATS

=head1 DEPENDENCIES

Relies on:

  Net::POP3
  Mail::Internet


=head1 TODO

Allow multiple --url arguments, so it can rotate through them (would require
some sort of system for naming of --lastfile).

Allow substitution of any header via "%Header" in command line.

Parse out $url->auth() and use it to choose apop() for pop,
or in the case of imap, use AUTHENTICATE instead of LOGIN.
Also, for imap allow username=ANONYMOUS, password=email_address.

Do something like Net::POP3 which uses Net::Netrc to lookup password
if not given.

Also like Net::POP3 which uses Net::Config if not given the host.

Support port in url.

Offer retrieving headers only.

=head1 AUTHOR

Copyright 2000, Mark D. Anderson, mda@discerning.com.

This is free software; you can redistribute it and/or
modify it under the same terms as Perl itself.

=cut

####
# CPAN modules
####
use Data::Dumper;

####
# GUPE modules
####
use FindBin;
use lib "$FindBin::Bin/../..";
# see lock_url() below
# use GUPE::Util::Lock;

####
# constants/config
####
# standalone or not
use vars qw($FETCHMAIL_AS_MODULE);
use vars qw($FETCHMAIL_DEFAULT_TIMEOUT);
$FETCHMAIL_DEFAULT_TIMEOUT = 30;

####
# output utils
####
use vars qw($DEBUG $INFORM);
$DEBUG = 0;
$INFORM = 1;

sub local_debug {
    $| = 1 if $DEBUG;
    print "DEBUG: ", @_, "\n" if $DEBUG;
}

sub local_inform {
    print @_, "\n" if $INFORM;
}

BEGIN {
if (!$FETCHMAIL_AS_MODULE) {
   #undef &debug;
   #undef &inform;
    no strict;
    *debug = \*local_debug;
    *inform = \*local_inform;
}
}

	# note that if we wanted to roll back our previous deletes, 
	# we could now do a $pop->reset() (POP3Client $pop->Reset) which does an RSET
sub fatal_error {
    my ($exit_code, $message, $opts, $pop) = @_;
    $pop->quit(); # ->close() just closes without a QUIT; no way to do that with POP3Client
    $! = $exit_code; # set exit code
    #POP3Client $pop->Message();
    die $message, " last response code was '", $pop->code(), "' and last response text was: '", join("\n", $pop->message()), "'";
}

####
# locking utils
####
sub lock_url {
    my ($url, $opts) = @_;
    my $lockfile = $opts->{lockfile};
    return unless $lockfile;

    require GUPE::Util::Lock;
    return new GUPE::Util::Lock('DotLockLink', $url, {lockfile => $lockfile, if_locked => $opts->{lockpolicy}});
}


####
# app-specific utils
####
sub read_command_line {
    my ($argv) = @_;
    $argv ||= \@ARGV;

    my %opt_values = ();

    my %no_arg_options = map {$_ => 1} qw(verbose silent no_messages_ok keep all);
    my %one_arg_options = map {$_ => 1} qw(lockfile lockpolicy lastmech lastfile timeout daemon eval evalmda mda url username password);
    debug "argv is: ", join('|',@$argv);
    while (defined($_ = shift @$argv)) {
	debug "processing option '$_'";
	next unless $_;
	last if $_ eq '--';
	m/--verbose/ || m/-v/ and $DEBUG = 1 and $opt_values{verbose} = 1, next;
	m/--silent/ and defined($INFORM = 0) and $opt_values{silent} = 1, next;
	my ($optname) = m/--(.*)/;
	die "we only take long options, not '$_'" unless $optname;
	if ($no_arg_options{$optname}) {$opt_values{$optname} = 1; next }
	if ($one_arg_options{$optname}) {$opt_values{$optname} = (shift @$argv || die "undefined value for '$_' option"); next}
	die "undefined option '$_'";
    }
    return \%opt_values, $argv;
}

sub update_lastfile {
    my ($lastfile, $uid) = @_;
    open(LASTFILE, ">$lastfile") || die "can't open lastfile '$lastfile' for write: $!";
    print LASTFILE $uid;
    close LASTFILE || die "some problem closing lastfile $lastfile: $!";
}

sub _evaluate {
    my ($expr, $what) = @_;
    my $warnings = '';
    my $result;
    {
	local $SIG{__WARN__} = sub {$warnings .= $_[0]};
	# if we got passed a sub rather than a string, just call it
	# this might happen if we are being used as a module
	$result = ref($expr) eq 'CODE' ? &$expr() : eval $expr;
    }
    die "evaluation of $what '$expr' died: $@", ($warnings ? "(with warnings '$warnings')" : '') if $@;
    die "evaluation of $what '$expr' had warnings: '$warnings'" if $warnings;
    return $result;
}

sub _pipeto {
    my ($message_obj, $cmd, $what) = @_;
    open (PIPECMD, "| $cmd") || die "could not open pipe to $what '$cmd': $!";
    $message_obj->print(\*PIPECMD);
    # perl returns false if close of a pipe to a program which exits non-zero
    close(PIPECMD) || ($! ? die "error closing pipe to '$cmd': $!" : die "$what '$cmd' return non-zero status: $?");
}    

####
# main
####

sub fetchmail_main {
    my ($opts) = @_;

    if (!$opts) {
	my $rest;
	($opts, $rest) = read_command_line();
	die "unexpected non-options '@$rest'" if @$rest;
    }
    debug "options are: ", Dumper($opts);
    my $urlstr = $opts->{url} || die "you must specify the --url option";
    # no URI::imap module; sigh
    my $is_imap = 0;
    if ($urlstr =~ m/^imap(.*)/i) {
	$urlstr = 'pop' . $1;
	$is_imap = 1;
    }
    use URI;
    my $url = URI->new($urlstr);
    
    # user() is a URI::pop method; it is uri-unescaped and just the part before the auth if any.
    # userinfo() is a URI::_server method; it is still uri-escaped and is the whole thing up to the first '@'
    # in the authority.
    # but if there is a '@' in the password, things are broken
    my $user_and_password;
    if (1) {
	$user_and_password = $url->user(); 
    }
    else {
	# sigh this might work, but then the hostname pattern fails on "pop://user:p@assword@company.com"
	$user_and_password = $url->authority(); # in URI::_generic
	die "url '$urlstr' has no authority" unless $user_and_password;
	$user_and_password =~ s/\@[^\@]*//;
	$user_and_password =~ s/;auth=.*//;
    }
    die "username missing in url '$urlstr'" unless $user_and_password;
    my ($username, $password) = ($user_and_password =~ m/(.+):([^:]+)/);
    
    # TODO:
    # $username ||= $opts->{username}; $password ||= $opts->{password};

    die "password missing in url '$urlstr'" unless defined($password);
    my $scheme = $is_imap ? 'imap' : $url->scheme();
    my $host = $url->host();
    my $folder = $url->path();
    die "no folder allowed except in imap" if $folder && $folder ne '/' && !$is_imap;
    debug "scheme=$scheme, username=$username, password=$password, host=$host, folder=$folder";
    
    my $lastmech = $opts->{lastmech} || ($opts->{all} ? 'all' : ($opts->{lastfile} ? 'save_last_uid' : 'ask_last_msn'));
    $opts->{lastmech} = $lastmech;
    $opts->{timeout} ||= $FETCHMAIL_DEFAULT_TIMEOUT;

    my $nseconds = $opts->{daemon};
    # will be released when goes out of scope;
    
    my $lock = lock_url($url, $opts);

    while (1) {
	if ($scheme eq 'pop') {
	    fetch_pop($opts, $host, $username, $password);
	}
	else {
	    die "sorry scheme '$scheme' from url '$urlstr' not supported";
	}
	# daemon/loop mode
	if ($nseconds) {
	    sleep($nseconds);
	} 
	# one-shot mode
	else {
	    return(0);
	}
    }
}

# pop can be done with Mail::POP3Client or Net::POP3
=begin ignore

	# versus Net::POP3, which is bundled
        # Mail::POP3Client also seems not to have as good means of identifying where things went wrong
	use Mail::POP3Client;
        # will do a Connect() if USER and PASSWORD are supplied
	my $pop = new Mail::POP3Client
	    (
	     DEBUG => $DEBUG,
	     TIMEOUT => $opts->{timeout},
	     USER => $username,
	     PASSWORD => $password,
	     HOST => $host,
	     ) || die "could not open pop url '$url_str'";
=cut

sub fetch_pop {
    my ($opts, $host, $username, $password) = @_;

    my $perl_expr = $opts->{'eval'};
    my $expr_for_pipe = $opts->{evalmda};
    my $shell_command = $opts->{mda};

    use Net::POP3;
    # see IO::Socket::INET; for some reason error conditions are put in $@
    # by default Net::POP3 requires the ability to look up a port by name 'pop3' which doesn't always exist in services
    my $pop = Net::POP3->new($host, Timeout => $opts->{timeout}, Debug => $DEBUG, Port => 110) || die "could not create Net::POP3 to host '$host': $@";

    # put check in to maybe catch case where login fails with no error
    die "got back new Net::POP3 to host '$host' with a closed socket" unless fileno($pop);

    # login() returns '0E0' if successful but not messages
    # POP3Client is $pop->Count() (it does a POPStat() as part of login which is done at connect time)
    my $num_messages = $pop->login($username, $password);
    if (!defined($num_messages) || !$num_messages) {
	fatal_error(3, "login failure", $opts, $pop);
    }

    if ($num_messages == 0) {
	if ($opts->{no_messages_ok}) {
	    inform "no messages to process";
	    $pop->quit();
	    return;
	}
	else {
	    fatal_error(1, "no messages to process", $opts, $pop);
	}
    }

    my $lastfile = $opts->{lastfile};
    my $last_msn;
    my $last_uid;
    my $lastmech = $opts->{lastmech};
    if ($lastmech eq 'ask_last_msn') {
	# $pop->Last() if POP3Client
	$last_msn = $pop->last();
	fatal_error(4, "LAST command failed", $opts, $pop) unless defined($last_msn);
    }
    elsif ($lastmech eq 'save_last_uid') {
	die "--lastmech save_last_uid, but no --lastfile" unless $lastfile;
	die "lastfile $lastfile does not exist" unless -f $lastfile;
	open(LASTFILE,"<$lastfile") || die "could not open $lastfile: $!";
	my $lastcontents;
	{$/ = undef; $lastcontents = <LASTFILE>;}
	close(LASTFILE);
	die "no contents in lastfile $lastfile" unless $lastcontents;
	die "saved last_uid '$lastcontents' has spaces" if $lastcontents =~ /\s/;
	$last_uid = $lastcontents;
    }
    elsif ($lastmech eq 'all') {
    }
    else {
	die "unknown --lastmech $lastmech";
    }

    # get a list of available uids.
    # note we do this even if lastmech eq 'all', so we can track what we've done.
    # POP3Client returns nothing if an error, but we are in array context, so we
    # can't distinguish from no messages unless we expected some.
    # also, it returns an array with nothing in the 0th cell.
    # my @uids = $pop->Uidl(); die "no entries in uidl response" unless @uids;
    # hash from message number to uid
    my $uid_hash = $pop->uidl() || fatal_error(4, "uidl failed", $opts, $pop);
    my @uids; while(my($k,$v) = each %$uid_hash) {die "message number $k > 1000" if $k > 1000; $uids[$k] = $v}
    my $num_uids = scalar(keys %$uid_hash);
    debug "length of uid array is ", scalar(@uids), " and number of uids is $num_uids";

    # retrieve messages, and process them
    my $seen_last = 1 if $lastmech eq 'all';
    my $msn = -1;
    my $delete_already_done = !$opts->{keep};
    my $delete_done_now = !$opts->{keep};
    my $last_ok_uid;
    while (@uids) {
	my $uid = shift @uids;
	$msn++;
	debug "considering msn $msn, uid ", ($uid||'(undef)');
	next unless defined($uid);
	if (!$seen_last) {
	    if ($delete_already_done) {
		debug "deleting already done msn $msn uid '$uid'";
		$pop->delete($msn); #POP3Client $pop->Delete()
	    }
	    else {
		debug "skipping uid '$uid' because seen before";
	    }
	    # we could read the message and look at X-UIDL, but that would be redundant
	    $seen_last = 1 if $lastmech eq 'save_last_uid' && $last_uid eq $uid;
	    $seen_last = 1 if $lastmech eq 'ask_last_msn' && $last_msn eq $msn;
	    next;
	}

	# read message
	# after the get we have to be real careful to catch all dies, so we can update lastfile
	# on what we have done
	use Mail::Internet;
	#POP3Client: my $mess_lines = [$pop->Retrieve($msn)]; 
	my $mess_lines = $pop->get($msn);

	fatal_error(4, "could not retrieve message $msn", $opts, $pop) unless @$mess_lines;
	my $message_obj;
	eval {$message_obj = new Mail::Internet($mess_lines, Modify => 0)};
	fatal_error(4, "bad message $msn: $@", $opts, $pop) if $@;

	eval {
	    if ($perl_expr) {
		debug "msn $msn, uid $uid, evaluating '$perl_expr'";
		$_ = $message_obj;
		_evaluate($perl_expr, "--eval expression");
	    }
	    if ($expr_for_pipe) {
		debug "msn $msn, uid $uid, evalmda '$expr_for_pipe'";
		$_ = $message_obj;
		my $pipe_cmd = _evaluate($expr_for_pipe, "--evalmda expression");
		die "nothing returned from '$expr_for_pipe' to pipe to" unless $pipe_cmd;
		debug "about to pipe to returned '$pipe_cmd'";
                _pipeto($message_obj, $pipe_cmd, "generated mda");
	    }
	    if ($shell_command) {
		my $replaced = $shell_command;
		my $f = $message_obj->head()->get('From');
		my $t = $message_obj->head()->get('To');
		my $s = $message_obj->head()->get('Subject');
		my $UNSAFE = '`<>;\|';
		$f =~ s/$UNSAFE/_/g; $s =~ s/$UNSAFE/_/g; $t =~ s/$UNSAFE/_/g;
                $replaced =~ s/\%F/$f/g; $replaced =~ s/\%T/$t/g; $replaced =~ s/\%S/$s/g;
		debug "msn $msn, uid $uid, running '$replaced'";
                _pipeto($message_obj, $replaced, "mda");
	    }
	};
	fatal_error(10, $@, $opts, $pop) if $@;
	$last_ok_uid = $uid;
	# for simplicity, always update file as we go
	update_lastfile($lastfile, $last_ok_uid) if $lastfile || $lastmech eq 'save_last_uid';
	$pop->delete($msn) if $delete_done_now;

    }
    inform "processed $num_uids messages successfully";

    # quit
    $pop->quit();		# $pop->Close() for POP3Client

    # update lastfile
    # update_lastfile($lastfile, $last_ok_uid) if $lastfile || $lastmech eq 'save_last_uid';
}

exit(fetchmail_main()) unless $FETCHMAIL_AS_MODULE;

####
# testing
####
sub testme {
    # remember '@;:/' is %40%3B%3A%2F
    system("perl -w fetchmail_like.pl --url pop://myname:password\@hostname.company.com --mda cat");
}

1;
