#!/usr/bin/perl

############
# killtnef #
############

use strict;
use Convert::TNEF;
use MIME::Parser;
use MIME::Types;

my $VERSION = 1.0;
my $mimetypes = MIME::Types->new;

my $message;
while (defined(my $line = <STDIN>)) {
  my $msg_bound;
  if ($line =~ /^(From\s+.+)\r?\n/ and length($message) or eof) {
    parse_message(\$message, $msg_bound); # All the action happens here...
    $message = "";
    $msg_bound=$line;
  } elsif ($line =~ /^(From\s+.+)\r?\n/) {
    $msg_bound=$line;  # The first 
  } 
  $message.=$line;
}
exit;


# Subroutines ###############################################################

sub parse_message {
  my $msg_body = shift @_;
  my $msg_bound = shift @_;
  my $mime_parser = new MIME::Parser;

  # This module likes to use tmp files, but I try to stop it here.
  $mime_parser->use_inner_files(1);
  $mime_parser->output_to_core(1);
  my $ent = $mime_parser->parse_data($$msg_body);
  my $num_parts=$ent->parts;

  # Determine if we have a MIME w/ms-tnef and act accordingly.
  if ( ($num_parts < 1) || ($$msg_body !~ /ms-tnef/i) ) {
    print "$$msg_body";
  } else {
    # Get the head info
    my $head = $ent->head;
    my $ReturnPath = $head->get('Return-Path');
    my @all_received = $head->get('Received');
    my $Date = $head->get('Date');
    my $From = $head->get('From');
    my $XSender = $head->get('X-Sender');
    my $To = $head->get('To');
    my $Subject = $head->get('Subject');
    my $MessageID = $head->get('Message-ID');
    my $boundary = $head->multipart_boundary;

    # Build a new MIME message based on the one we are examining
    # - LHH: it would probably be better to build this $new_ent
    #        using $ent->head as the basis, thus getting *all* of
    #        the headers, instead of just these few.  We only needed
    #        these few headers for the project this script was
    #        originally written for, but if someone wants to change
    #        this and submit a patch, that would be great.
    my $new_ent = MIME::Entity->build(
				'Type'        => "multipart/mixed",
				'Boundary'    => $boundary,
				'X-Mailer'    => undef
				      );
    my $new_head=$new_ent->head;
    # Try to preserve the order of headers in the original message by
    # extracting it from the original formatted header.
    my(%did_tag);
    foreach my $tag (@{$head->header}, $head->tags) {
	$tag =~ s/:.*//s;
	next if ($did_tag{lc $tag}++);
	next if ($new_head->count($tag));
	foreach my $value ($head->get_all($tag)) {
	    $new_head->add($tag, $value);
	}
    }

    # Loop over each MIME part adding each to the new message
    foreach my $mime_part_i (0 .. ($num_parts - 1)) {
      my $ent_part=$ent->parts($mime_part_i);
      if ($ent_part->mime_type =~ /ms-tnef/i )  {
        add_tnef_parts($ent_part, $new_ent);
      } else {
        $new_ent->add_part($ent_part);
      } 
    }

    # Set the preamble and epilogue equal to the original
    $new_ent->preamble($ent->preamble);
    $new_ent->epilogue($ent->epilogue);

    # Print the newly constructed MIME message
    print "$msg_bound"; 
    print STDOUT $new_ent->stringify;
  }
}

sub add_tnef_parts {
  my $ent = shift;
  my $new = shift;

  ## Create a tnef object
  my %TnefOpts=('output_to_core' => '4194304', 'output_dir' => '/tmp');
  my $tnef = Convert::TNEF->read_ent($ent, \%TnefOpts);
  my $head=$new->head;	# Get the header object from the new message
  if (! $tnef) {
    warn "TNEF CONVERT DID NOT WORK: " . $Convert::TNEF::errstr . "\n";
    warn "  - Failed on msg w/subj: " . $head->get('Subject') . "\n";
    return '';
  }

  #############################################################################
  # This section of code smokes lots of crack, and tries to dig the From:
  # header out of the $tnef->message if the new message we are appending
  # this attachment to does not already have a "From" header.  This is
  # required on most of the Outlook emails that never touch SMTP, only
  # Exchange servers, and never had valid SMTP From headers placed!
  #############################################################################
  my $msg=$tnef->message;
  my $mapi_props=$msg->data('MAPIProps');
  #warn join(", ", keys %{$msg->{MAPIProps}}) . "\n";
  #warn $msg->{MAPIProps}->{MBS_Data} . "\n\n----------------------------\n\n";
  #warn "$mapi_props\n\n---------------------------------\n\n";
  my $test=0x0024;
  #if ($mapi_props =~ m/(\0\0\0\xf8.{20})/) { warn "MATCHED a prop $1\n"; }
  #if (0) {
  if (! length($head->get('From')) ) {
    my $from='';
    my $cntrl_chars='[\c' . join('\c', ('A' .. 'Z')) . ']';
    if ($mapi_props =~ m/From:\s+([^\s\@]+\@[^\s]+)/) {
      $from=$1;
    } elsif ($mapi_props =~ m/\xf8\?\cA\0\0\0$cntrl_chars\0\0\0([^\0]+)\0+\cB\cA/) {
      $from=$1;
    } else {
      if ($mapi_props =~ m/(\xf8\?\cA.{30})/) { warn "MATCH: $1\n"; }
      #$from="Unknown Sender";
    }
    if( length($from)) { $head->replace('from', $from); }
  }
  #############################################################################

  for ($tnef->attachments) {
    $_->longname=~/^[\w\W]+\.([A-Za-z]{2,4})$/;
    my $ext = $1;
    my $type = $mimetypes->mimeTypeOf($ext);
    if (! $type) {
      warn "No MIME type for (" . $_->longname . "/" . $_->name . ")\n";
    }
    my $encoding;
    if ($type) {
	if ($type =~ m,^text/,) {
	    if ($_->data =~ /[^\001-\177]/) {
		$encoding = '8bit';
	    }
	    else {
		$encoding = '7bit';
	    }
	}
	else {
	    $encoding = 'base64';
	}
    }
    elsif ($_->data =~ /[^\t\n\r\f\040-\177]/) {
	$encoding = 'base64';
    }
    else {
	$encoding = '7bit';
    }

    $new->attach( 
                   Type => $type,
                   Encoding => $encoding,
                   Data => $_->data, 
                   Disposition => 'attachment',
                   Filename => $_->longname
                 );
  }
  # If you want to delete the working files
  $tnef->purge;
}


# POD documentation

=head1 SYNOPSIS


cat mbox_msg_w_tnef | killtnef > mbox_msg_mime_compliant

=head1 README

killtnef - Converts emails with MS-TNEF, Microsoft's proprietary Transport Neutral Encapsulated Format, attachments into standard MIME-compliant emails.

This script reads an mbox, or a single email message, from STDIN,
extracts data from any ms-tnef attachments that may exist, and writes
a new mbox (or a single email message) to STDOUT which has each of the
files that were encoded in any ms-tnef attachments attached separately,
as RFC-822/RFC-1521 compliant MIME emails/attachments.

Any email(s) containing no ms-tnef MIME attachments are passed through
this script untouched.

=head1 DESCRIPTION

This script was originally written to convert about 35,000 emails from
some Microsoft Outlook *.pst (post office) files, almost all of which
had ms-tnef encoded attachments, into MIME-compliant emails so that
they could be imported into an email-archiving system that 10East
supplies to some of its customers.  If anyone is curious, an imapd
was used to move the emails from the *.pst files to mbox format using
Outlook 2000 as an IMAP client.

This script can also be used as an incoming mail filter which will
automatically convert ms-tnef attachments into MIME-compliant
attachments.

=head1 AUTHORSHIP

Andrew Orr <aorr@10east.com> (no longer a maintainer)

Lester Hightower <hightowe@10east.com> (maintainer)

=head1 LICENSE

This software is licensed under the terms of the GNU Public License,
which is available for review at http://www.gnu.org/copyleft/gpl.html

=head1 CHANGE LOG

Feb-22-2002: Originally created by Andy Orr

Feb-26-2002: A few enhancements and bug-fixes by Lester Hightower.

Mar-06-2002: Documentation and a few comments added by Lester Hightower
in preparation for submitting this script to CPAN.

Mar-07-2002: After realizing that a POD README section is needed for the
HTML pages generated for the script index in CPAN, LHH added one and
submitted this as killtnef-1.0.1.pl.

Sep-20-2005: Applied a patch provided by Jonathan Kamens and released that
as  killtnef-1.0.2.pl.  The patch did:
  * Use /usr/bin/perl instead of /usr/local/bin/perl.
  * Use MIME::Types instead of hard-coded list of
    extensions and MIME types.
  * Preserve MIME boundary and headers from original
    message.
  * Try to use 7bit or 8bit encoding instead of base64
    whenever possible.  This makes resulting messages
    smaller and easier to full-text index.

=head1 PREREQUISITES

This script requires the C<strict>, C<Convert::TNEF 0.16>,
C<MIME::Parser 5.406>, and C<MIME::Types 1.15> modules.

=head1 COREQUISITES

None.

=pod OSNAMES

Any Unix-like.

=pod SCRIPT CATEGORIES

Mail
Mail/Converters
Mail/Filters

=cut

