#!/usr/bin/perl -w
#
# m-to-n.pl
#
# Filter mail so it should be accepted by news (unless it is
# really spam).
#
# Originally by Toivo Pedaste, University of Western Australia UCS
#               <toivo@ucs.uwa.edu.au>
# Re-written by David Luyer, University of Western Australia UCS
#               <david@luyer.net>
#
# How to use this;
# - set "myname" in the line below to something appropriate to put
#   in the path line
####
$myname = "myname";
####
# - make a moderated news group, set the moderator address to
#   the mailing list.
# - subscribe a special alias to the mailing list
# - run as " | m-to-n.pl opts | rnews | true "
# - options to use; all of these:
#        -n news.group
#        -o 'List Description'
#        -a mailing@address.here 
# - optionally set -d to a local distribution
#
# This tries to prevent rejects for things like missing from line,
# corrupt date, bad line count, duplicate message-id, followup
# without references, missing subject, etc, which some news peers
# enforce.  The message-id massaging is neccesary for when people
# post to both a newsgroup and a mailing list being gatewayed to
# news.
####
# Usage: m-to-n.pl [opts]
#    -n newsgroup name - REQUIRED
#    -o default organization
#    -a moderator address to fake
#    -d distribution override setting
####
require "getopts.pl";
Getopts('n:o:a:d:');
die "Newsgroup not given!\n" if (!defined $opt_n);
####
# Read useful headers.  Note that some such as "Date" and "Lines"
# are note useful as they may cause news filters to reject the
# article (eg, if the line count is wrong or the date is in the
# wrong format so it can't be parsed to check if it is too old,
# or if the date _is_ too old due to errors somewhere).
####
sub get {
  my $var = shift();
  $lastline = $var;
  $$var = $_;
}
while (<>)  {
    if (/^\s*$/o) {
	last;
    } elsif (/^path:/io) {
	get (\$path);
    } elsif (/^from:/io) {
	get (\$from);
    } elsif (/^subject:/io) {
	get (\$subject);
    } elsif (/^message-id:/io) {
	get (\$message_id);
    } elsif (/^sender:/io) {
	get (\$sender);
    } elsif (/^mime-version:/io) {
	get (\$mime_version);
    } elsif (/^content-type:/io) {
	get (\$content_type);
    } elsif (/^content-transfer-encoding:/io) {
	get (\$content_transfer_encoding);
    } elsif (/^content-length:/io) {
	get (\$content_length);
    } elsif (/^distribution:/io) {
	get (\$distribution);
    } elsif (/^organization:/io) {
	get (\$organization);
    } elsif (/^references:/io) {
	get (\$references);
    } elsif (/^in-reply-to:/io) {
	get (\$in_reply_to);
    } elsif (/^\s/o) {
	$$lastline .= $_;
    } else {
	undef $lastline;
    }
}
undef $lastline;
###
# End of headers found.  Send out new headers.
###
if (defined $path) {
    $path =~ s/^path: //i;
    print "Path: $myname!m-to-n!".$path;
    undef $path;
} else {
    print "Path: $myname!m-to-n\n";
}
sub email_fix {
    # Try and make a rfc974 e-mail address out of anything
    my ($line, $hdr, $com, $em);
    $line = $_[0];
    chomp $line;
    if($line =~ /^(.*):(.*)<([^ (<]*@[^ >)]*)>(.*)$/) {
      # rfc822
      $hdr = $1;
      $com = $2." ".$4;
      $em = $3;
    } elsif($line =~ /^(.*): *([^ (<]*@[^ >)]*) *\((.*)\)(.*)$/) {
      # rfc822 also
      $hdr = $1;
      $com = $3." ".$4;
      $em = $2;
    } elsif($line =~ /^(.*):(.*)\(([^ (<]*@[^ >)]*)\)(.*)$/) {
      # bad - email and name swapped, rfc822
      $hdr = $1;
      $com = $2." ".$4;
      $em = $3;
    } elsif($line =~ /^(.*): *([^ (<]*@[^ >)]*) *<(.*)>(.*)$/) {
      # bad - email and name swapped, rfc822
      $hdr = $1;
      $com = $3." ".$4;
      $em = $2;
    } elsif($line =~ /^(.*): *([^ <()>]+@[^ <()>]+) *$/) {
      # e-mail on its own
      $hdr = $1;
      $com = "";
      $em = $2;
    } elsif($line =~ /^(.*):([^@]*[ <(]|)([^ <()>]+@[^ <()>]+)([ >)].*|)$/) {
      # can't find anything valid, try and salvage an e-mail address
      $hdr = $1;
      $com = "Malformed From Line - best guess";
      $em = $3;
    } elsif($line =~ /^(.*): *([^ <()>]+) *$/) {
      # unqualified e-mail on its own
      $hdr = $1;
      $com = "";
      $em = "$2\@unknown.loopback.edu";
    } elsif($line =~ /^(.*):.*<([^ <()>]+)>.*$/) {
      # unqualified e-mail in <>
      $hdr = $1;
      $com = "";
      $em = "$2\@unknown.loopback.edu";
    } elsif($line =~ /^(.*): *(.*) *\(.*\) *$/) {
      # unqualified e-mail plus comment
      $hdr = $1;
      $em = "$2\@unknown.loopback.edu";
    } else {
      # just salvage the header name
      $line =~ /^(.*):/;
      $hdr = $1;
      $com = "Malformed From Line";
      $em = "nobody\@nowhere.loopback.edu";
    }
    $com =~ s/  +/ /g;
    $com =~ s/^\s+//;
    $com =~ s/\s+$//;
    $line = "$hdr: $em ($com)\n";
    $line =~ s/ \(\)$//;
    $line;
}
if (defined $from) {
    print email_fix($from);
    undef $from;
} else {
    print "From: nobody\@nowhere.loopback.edu (Missing From Line)\n";
}
if (defined $sender) {
    print email_fix($sender);
    undef $sender;
}
$tt = time();
if (defined $message_id) {
    $message_id =~ s/<$myname\+/</g;
    $message_id =~ s/</<$myname\+/g;
    $message_id =~ s/@([^<>@]*)@.*>/\@$1>/g;
    $message_id =~ s/[@.]*>/>/g;
    $message_id =~ s/<([^@]*)>/<$1.$tt.$$\@$myname.m-to-n.gw>/g;
    print $message_id;
    undef $message_id;
} else {
    print "Message-ID: <$myname+$tt.$$\@$myname.m-to-n.gw>\n";
}
@mon = ( "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep",
         "Oct", "Nov", "Dec" );
@day = ( "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat");
@time = gmtime($tt);
printf "Date: $day[$time[6]], %2d $mon[$time[4]] %d %02d:%02d:%02d GMT\n",
       $time[3], $time[5] + 1900, $time[2], $time[1], $time[0];
undef $tt;
undef @mon;
undef @day;
undef @time;
if (defined $mime_version) {
    print $mime_version;
    undef $mime_version;
}
if (defined $content_type) {
    print $content_type;
    undef $content_type;
}
if (defined $content_transfer_encoding) {
    print $content_transfer_encoding;
    undef $content_transfer_encoding;
}
if (defined $content_length) {
    print $content_length;
    undef $content_length;
}
if (defined $distribution) {
    print $distribution;
    undef $distribution;
} elsif (defined $opt_d) {
    print "Distribution: $opt_d\n";
}
undef $opt_d;
print "Newsgroups: $opt_n\n";
undef $opt_n;
if (defined $organization) {
    print $organization;
    undef $organization;
} elsif (defined $opt_o) {
    print "Organization: $opt_o\n";
} else {
    print "Organization: Unknown\n";
}
undef $opt_o;
if (defined $opt_a) {
    print "Approved: $opt_a (Automated Gateway)\n";
}
undef $opt_a;
$references = "References: $1\n"
    if (defined $in_reply_to && !defined $references &&
        $in_reply_to =~ /(<[^>]*>)/);
undef $in_reply_to;
if (defined $subject && $subject =~ /( |\[)re:/i && !defined $references) {
    # Prevent followup-without-references reject (note $myname+ added below)
    $tt = time();
    $references = "References: <missing-refs.$tt.$$\@$myname.m-to-n.gw>\n";
    undef $tt;
}
if (defined $subject && $subject !~ /^subject:\s*$/m) {
    print $subject;
} else {
    print "Subject: No subject given\n";
}
undef $subject;
if (defined $references) {
    # Ensure all message-id's are the news version
    $references =~ s/<$myname\+/</g;
    $references =~ s/</<$myname\+/g;
    # Ensure no 'bad message id' reject on inn peers
    $references =~ s/@([^<>@]*)@.*>/@\$1>/g;
    $references =~ s/[@.]*>/>/g;
    $tt = time();
    $references =~ s/<([^@]*)>/<$1.$tt.$$\@$myname.m-to-n.gw>/g;
    undef $tt;
    print $references;
    undef $references;
}
###
# Now copy the body
###
print $_;
print $_ while (<>);
